1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2021 MediaTek Inc. |
4 | * Author: George Sun <george.sun@mediatek.com> |
5 | */ |
6 | |
7 | #include <linux/module.h> |
8 | #include <linux/slab.h> |
9 | #include <media/videobuf2-dma-contig.h> |
10 | #include <media/v4l2-vp9.h> |
11 | |
12 | #include "../mtk_vcodec_dec.h" |
13 | #include "../../common/mtk_vcodec_intr.h" |
14 | #include "../vdec_drv_base.h" |
15 | #include "../vdec_drv_if.h" |
16 | #include "../vdec_vpu_if.h" |
17 | |
18 | /* reset_frame_context defined in VP9 spec */ |
19 | #define VP9_RESET_FRAME_CONTEXT_NONE0 0 |
20 | #define VP9_RESET_FRAME_CONTEXT_NONE1 1 |
21 | #define VP9_RESET_FRAME_CONTEXT_SPEC 2 |
22 | #define VP9_RESET_FRAME_CONTEXT_ALL 3 |
23 | |
24 | #define VP9_TILE_BUF_SIZE 4096 |
25 | #define VP9_PROB_BUF_SIZE 2560 |
26 | #define VP9_COUNTS_BUF_SIZE 16384 |
27 | |
28 | #define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x)) |
29 | #define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x)) |
30 | #define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x)) |
31 | #define VP9_BAND_6(band) ((band) == 0 ? 3 : 6) |
32 | |
33 | /* |
34 | * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint |
35 | */ |
36 | struct vdec_vp9_slice_frame_ctx { |
37 | struct { |
38 | u8 probs[6][3]; |
39 | u8 padding[2]; |
40 | } coef_probs[4][2][2][6]; |
41 | |
42 | u8 y_mode_prob[4][16]; |
43 | u8 switch_interp_prob[4][16]; |
44 | u8 seg[32]; /* ignore */ |
45 | u8 comp_inter_prob[16]; |
46 | u8 comp_ref_prob[16]; |
47 | u8 single_ref_prob[5][2]; |
48 | u8 single_ref_prob_padding[6]; |
49 | |
50 | u8 joint[3]; |
51 | u8 joint_padding[13]; |
52 | struct { |
53 | u8 sign; |
54 | u8 classes[10]; |
55 | u8 padding[5]; |
56 | } sign_classes[2]; |
57 | struct { |
58 | u8 class0[1]; |
59 | u8 bits[10]; |
60 | u8 padding[5]; |
61 | } class0_bits[2]; |
62 | struct { |
63 | u8 class0_fp[2][3]; |
64 | u8 fp[3]; |
65 | u8 class0_hp; |
66 | u8 hp; |
67 | u8 padding[5]; |
68 | } class0_fp_hp[2]; |
69 | |
70 | u8 uv_mode_prob[10][16]; |
71 | u8 uv_mode_prob_padding[2][16]; |
72 | |
73 | u8 partition_prob[16][4]; |
74 | |
75 | u8 inter_mode_probs[7][4]; |
76 | u8 skip_probs[4]; |
77 | |
78 | u8 tx_p8x8[2][4]; |
79 | u8 tx_p16x16[2][4]; |
80 | u8 tx_p32x32[2][4]; |
81 | u8 intra_inter_prob[8]; |
82 | }; |
83 | |
84 | /* |
85 | * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint |
86 | */ |
87 | struct vdec_vp9_slice_frame_counts { |
88 | union { |
89 | struct { |
90 | u32 band_0[3]; |
91 | u32 padding0[1]; |
92 | u32 band_1_5[5][6]; |
93 | u32 padding1[2]; |
94 | } eob_branch[4][2][2]; |
95 | u32 eob_branch_space[256 * 4]; |
96 | }; |
97 | |
98 | struct { |
99 | u32 band_0[3][4]; |
100 | u32 band_1_5[5][6][4]; |
101 | } coef_probs[4][2][2]; |
102 | |
103 | u32 intra_inter[4][2]; |
104 | u32 comp_inter[5][2]; |
105 | u32 comp_inter_padding[2]; |
106 | u32 comp_ref[5][2]; |
107 | u32 comp_ref_padding[2]; |
108 | u32 single_ref[5][2][2]; |
109 | u32 inter_mode[7][4]; |
110 | u32 y_mode[4][12]; |
111 | u32 uv_mode[10][10]; |
112 | u32 partition[16][4]; |
113 | u32 switchable_interp[4][4]; |
114 | |
115 | u32 tx_p8x8[2][2]; |
116 | u32 tx_p16x16[2][4]; |
117 | u32 tx_p32x32[2][4]; |
118 | |
119 | u32 skip[3][4]; |
120 | |
121 | u32 joint[4]; |
122 | |
123 | struct { |
124 | u32 sign[2]; |
125 | u32 class0[2]; |
126 | u32 classes[12]; |
127 | u32 bits[10][2]; |
128 | u32 padding[4]; |
129 | u32 class0_fp[2][4]; |
130 | u32 fp[4]; |
131 | u32 class0_hp[2]; |
132 | u32 hp[2]; |
133 | } mvcomp[2]; |
134 | |
135 | u32 reserved[126][4]; |
136 | }; |
137 | |
138 | /** |
139 | * struct vdec_vp9_slice_counts_map - vp9 counts tables to map |
140 | * v4l2_vp9_frame_symbol_counts |
141 | * @skip: skip counts. |
142 | * @y_mode: Y prediction mode counts. |
143 | * @filter: interpolation filter counts. |
144 | * @sign: motion vector sign counts. |
145 | * @classes: motion vector class counts. |
146 | * @class0: motion vector class0 bit counts. |
147 | * @bits: motion vector bits counts. |
148 | * @class0_fp: motion vector class0 fractional bit counts. |
149 | * @fp: motion vector fractional bit counts. |
150 | * @class0_hp: motion vector class0 high precision fractional bit counts. |
151 | * @hp: motion vector high precision fractional bit counts. |
152 | */ |
153 | struct vdec_vp9_slice_counts_map { |
154 | u32 skip[3][2]; |
155 | u32 y_mode[4][10]; |
156 | u32 filter[4][3]; |
157 | u32 sign[2][2]; |
158 | u32 classes[2][11]; |
159 | u32 class0[2][2]; |
160 | u32 bits[2][10][2]; |
161 | u32 class0_fp[2][2][4]; |
162 | u32 fp[2][4]; |
163 | u32 class0_hp[2][2]; |
164 | u32 hp[2][2]; |
165 | }; |
166 | |
167 | /* |
168 | * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax |
169 | * used for decoding |
170 | */ |
171 | struct { |
172 | u8 ; |
173 | u8 ; |
174 | u8 ; |
175 | |
176 | u8 ; |
177 | u8 ; |
178 | u8 ; |
179 | |
180 | u8 ; |
181 | u8 [1]; |
182 | u16 ; |
183 | u16 ; |
184 | u16 ; |
185 | u16 ; |
186 | |
187 | u8 ; |
188 | u8 ; |
189 | u8 [4]; |
190 | u8 ; |
191 | u8 ; |
192 | |
193 | u8 ; |
194 | u8 ; |
195 | u8 ; |
196 | |
197 | /* loop_filter_params */ |
198 | u8 ; |
199 | u8 ; |
200 | u8 ; |
201 | s8 [4]; |
202 | s8 [2]; |
203 | |
204 | /* quantization_params */ |
205 | u8 ; |
206 | s8 ; |
207 | s8 ; |
208 | s8 ; |
209 | |
210 | /* segmentation_params */ |
211 | u8 ; |
212 | u8 ; |
213 | u8 [7]; |
214 | u8 [1]; |
215 | u8 ; |
216 | u8 [3]; |
217 | u8 ; |
218 | u8 ; |
219 | u8 [8]; |
220 | s16 [8][4]; |
221 | |
222 | /* tile_info */ |
223 | u8 ; |
224 | u8 ; |
225 | u8 [2]; |
226 | |
227 | u16 ; |
228 | u16 ; |
229 | |
230 | /* LAT OUT, CORE IN */ |
231 | u32 [8][4]; |
232 | }; |
233 | |
234 | /* |
235 | * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax |
236 | * used for decoding. |
237 | */ |
238 | struct { |
239 | u8 ; |
240 | u8 ; |
241 | u8 ; |
242 | u8 [2]; |
243 | u8 [3]; |
244 | }; |
245 | |
246 | /* |
247 | * struct vdec_vp9_slice_tiles - vp9 tile syntax |
248 | */ |
249 | struct vdec_vp9_slice_tiles { |
250 | u32 size[4][64]; |
251 | u32 mi_rows[4]; |
252 | u32 mi_cols[64]; |
253 | u8 actual_rows; |
254 | u8 padding[7]; |
255 | }; |
256 | |
257 | /* |
258 | * struct vdec_vp9_slice_reference - vp9 reference frame information |
259 | */ |
260 | struct vdec_vp9_slice_reference { |
261 | u16 frame_width; |
262 | u16 frame_height; |
263 | u8 bit_depth; |
264 | u8 subsampling_x; |
265 | u8 subsampling_y; |
266 | u8 padding; |
267 | }; |
268 | |
269 | /* |
270 | * struct vdec_vp9_slice_frame - vp9 syntax used for decoding |
271 | */ |
272 | struct vdec_vp9_slice_frame { |
273 | struct vdec_vp9_slice_uncompressed_header uh; |
274 | struct vdec_vp9_slice_compressed_header ch; |
275 | struct vdec_vp9_slice_tiles tiles; |
276 | struct vdec_vp9_slice_reference ref[3]; |
277 | }; |
278 | |
279 | /* |
280 | * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance |
281 | */ |
282 | struct vdec_vp9_slice_init_vsi { |
283 | unsigned int architecture; |
284 | unsigned int reserved; |
285 | u64 core_vsi; |
286 | /* default frame context's position in MicroP */ |
287 | u64 default_frame_ctx; |
288 | }; |
289 | |
290 | /* |
291 | * struct vdec_vp9_slice_mem - memory address and size |
292 | */ |
293 | struct vdec_vp9_slice_mem { |
294 | union { |
295 | u64 buf; |
296 | dma_addr_t dma_addr; |
297 | }; |
298 | union { |
299 | size_t size; |
300 | dma_addr_t dma_addr_end; |
301 | u64 padding; |
302 | }; |
303 | }; |
304 | |
305 | /* |
306 | * struct vdec_vp9_slice_bs - input buffer for decoding |
307 | */ |
308 | struct vdec_vp9_slice_bs { |
309 | struct vdec_vp9_slice_mem buf; |
310 | struct vdec_vp9_slice_mem frame; |
311 | }; |
312 | |
313 | /* |
314 | * struct vdec_vp9_slice_fb - frame buffer for decoding |
315 | */ |
316 | struct vdec_vp9_slice_fb { |
317 | struct vdec_vp9_slice_mem y; |
318 | struct vdec_vp9_slice_mem c; |
319 | }; |
320 | |
321 | /* |
322 | * struct vdec_vp9_slice_state - decoding state |
323 | */ |
324 | struct vdec_vp9_slice_state { |
325 | int err; |
326 | unsigned int full; |
327 | unsigned int timeout; |
328 | unsigned int perf; |
329 | |
330 | unsigned int crc[12]; |
331 | }; |
332 | |
333 | /** |
334 | * struct vdec_vp9_slice_vsi - exchange decoding information |
335 | * between Main CPU and MicroP |
336 | * |
337 | * @bs: input buffer |
338 | * @fb: output buffer |
339 | * @ref: 3 reference buffers |
340 | * @mv: mv working buffer |
341 | * @seg: segmentation working buffer |
342 | * @tile: tile buffer |
343 | * @prob: prob table buffer, used to set/update prob table |
344 | * @counts: counts table buffer, used to update prob table |
345 | * @ube: general buffer |
346 | * @trans: trans buffer position in general buffer |
347 | * @err_map: error buffer |
348 | * @row_info: row info buffer |
349 | * @frame: decoding syntax |
350 | * @state: decoding state |
351 | */ |
352 | struct vdec_vp9_slice_vsi { |
353 | /* used in LAT stage */ |
354 | struct vdec_vp9_slice_bs bs; |
355 | /* used in Core stage */ |
356 | struct vdec_vp9_slice_fb fb; |
357 | struct vdec_vp9_slice_fb ref[3]; |
358 | |
359 | struct vdec_vp9_slice_mem mv[2]; |
360 | struct vdec_vp9_slice_mem seg[2]; |
361 | struct vdec_vp9_slice_mem tile; |
362 | struct vdec_vp9_slice_mem prob; |
363 | struct vdec_vp9_slice_mem counts; |
364 | |
365 | /* LAT stage's output, Core stage's input */ |
366 | struct vdec_vp9_slice_mem ube; |
367 | struct vdec_vp9_slice_mem trans; |
368 | struct vdec_vp9_slice_mem err_map; |
369 | struct vdec_vp9_slice_mem row_info; |
370 | |
371 | /* decoding parameters */ |
372 | struct vdec_vp9_slice_frame frame; |
373 | |
374 | struct vdec_vp9_slice_state state; |
375 | }; |
376 | |
377 | /** |
378 | * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi. |
379 | * pass it from lat to core |
380 | * |
381 | * @vsi: local vsi. copy to/from remote vsi before/after decoding |
382 | * @ref_idx: reference buffer index |
383 | * @seq: picture sequence |
384 | * @state: decoding state |
385 | */ |
386 | struct vdec_vp9_slice_pfc { |
387 | struct vdec_vp9_slice_vsi vsi; |
388 | |
389 | u64 ref_idx[3]; |
390 | |
391 | int seq; |
392 | |
393 | /* LAT/Core CRC */ |
394 | struct vdec_vp9_slice_state state[2]; |
395 | }; |
396 | |
397 | /* |
398 | * enum vdec_vp9_slice_resolution_level |
399 | */ |
400 | enum vdec_vp9_slice_resolution_level { |
401 | VP9_RES_NONE, |
402 | VP9_RES_FHD, |
403 | VP9_RES_4K, |
404 | VP9_RES_8K, |
405 | }; |
406 | |
407 | /* |
408 | * struct vdec_vp9_slice_ref - picture's width & height should kept |
409 | * for later decoding as reference picture |
410 | */ |
411 | struct vdec_vp9_slice_ref { |
412 | unsigned int width; |
413 | unsigned int height; |
414 | }; |
415 | |
416 | /** |
417 | * struct vdec_vp9_slice_instance - represent one vp9 instance |
418 | * |
419 | * @ctx: pointer to codec's context |
420 | * @vpu: VPU instance |
421 | * @seq: global picture sequence |
422 | * @level: level of current resolution |
423 | * @width: width of last picture |
424 | * @height: height of last picture |
425 | * @frame_type: frame_type of last picture |
426 | * @irq: irq to Main CPU or MicroP |
427 | * @show_frame: show_frame of last picture |
428 | * @dpb: picture information (width/height) for reference |
429 | * @mv: mv working buffer |
430 | * @seg: segmentation working buffer |
431 | * @tile: tile buffer |
432 | * @prob: prob table buffer, used to set/update prob table |
433 | * @counts: counts table buffer, used to update prob table |
434 | * @frame_ctx: 4 frame context according to VP9 Spec |
435 | * @frame_ctx_helper: 4 frame context according to newest kernel spec |
436 | * @dirty: state of each frame context |
437 | * @init_vsi: vsi used for initialized VP9 instance |
438 | * @vsi: vsi used for decoding/flush ... |
439 | * @core_vsi: vsi used for Core stage |
440 | * |
441 | * @sc_pfc: per frame context single core |
442 | * @counts_map: used map to counts_helper |
443 | * @counts_helper: counts table according to newest kernel spec |
444 | */ |
445 | struct vdec_vp9_slice_instance { |
446 | struct mtk_vcodec_dec_ctx *ctx; |
447 | struct vdec_vpu_inst vpu; |
448 | |
449 | int seq; |
450 | |
451 | enum vdec_vp9_slice_resolution_level level; |
452 | |
453 | /* for resolution change and get_pic_info */ |
454 | unsigned int width; |
455 | unsigned int height; |
456 | |
457 | /* for last_frame_type */ |
458 | unsigned int frame_type; |
459 | unsigned int irq; |
460 | |
461 | unsigned int show_frame; |
462 | |
463 | /* maintain vp9 reference frame state */ |
464 | struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME]; |
465 | |
466 | /* |
467 | * normal working buffers |
468 | * mv[0]/seg[0]/tile/prob/counts is used for LAT |
469 | * mv[1]/seg[1] is used for CORE |
470 | */ |
471 | struct mtk_vcodec_mem mv[2]; |
472 | struct mtk_vcodec_mem seg[2]; |
473 | struct mtk_vcodec_mem tile; |
474 | struct mtk_vcodec_mem prob; |
475 | struct mtk_vcodec_mem counts; |
476 | |
477 | /* 4 prob tables */ |
478 | struct vdec_vp9_slice_frame_ctx frame_ctx[4]; |
479 | /*4 helper tables */ |
480 | struct v4l2_vp9_frame_context frame_ctx_helper; |
481 | unsigned char dirty[4]; |
482 | |
483 | /* MicroP vsi */ |
484 | union { |
485 | struct vdec_vp9_slice_init_vsi *init_vsi; |
486 | struct vdec_vp9_slice_vsi *vsi; |
487 | }; |
488 | struct vdec_vp9_slice_vsi *core_vsi; |
489 | |
490 | struct vdec_vp9_slice_pfc sc_pfc; |
491 | struct vdec_vp9_slice_counts_map counts_map; |
492 | struct v4l2_vp9_frame_symbol_counts counts_helper; |
493 | }; |
494 | |
495 | /* |
496 | * all VP9 instances could share this default frame context. |
497 | */ |
498 | static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx; |
499 | static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock); |
500 | |
501 | static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf); |
502 | |
503 | static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance) |
504 | { |
505 | struct vdec_vp9_slice_frame_ctx *remote_frame_ctx; |
506 | struct vdec_vp9_slice_frame_ctx *frame_ctx; |
507 | struct mtk_vcodec_dec_ctx *ctx; |
508 | struct vdec_vp9_slice_init_vsi *vsi; |
509 | int ret = 0; |
510 | |
511 | ctx = instance->ctx; |
512 | vsi = instance->vpu.vsi; |
513 | if (!ctx || !vsi) |
514 | return -EINVAL; |
515 | |
516 | remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(fw: ctx->dev->fw_handler, |
517 | mem_addr: (u32)vsi->default_frame_ctx); |
518 | if (!remote_frame_ctx) { |
519 | mtk_vdec_err(ctx, "failed to map default frame ctx\n" ); |
520 | return -EINVAL; |
521 | } |
522 | |
523 | mutex_lock(&vdec_vp9_slice_frame_ctx_lock); |
524 | if (vdec_vp9_slice_default_frame_ctx) |
525 | goto out; |
526 | |
527 | frame_ctx = kmemdup(p: remote_frame_ctx, size: sizeof(*frame_ctx), GFP_KERNEL); |
528 | if (!frame_ctx) { |
529 | ret = -ENOMEM; |
530 | goto out; |
531 | } |
532 | |
533 | vdec_vp9_slice_default_frame_ctx = frame_ctx; |
534 | |
535 | out: |
536 | mutex_unlock(lock: &vdec_vp9_slice_frame_ctx_lock); |
537 | |
538 | return ret; |
539 | } |
540 | |
541 | static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance, |
542 | struct vdec_vp9_slice_vsi *vsi) |
543 | { |
544 | struct mtk_vcodec_dec_ctx *ctx = instance->ctx; |
545 | enum vdec_vp9_slice_resolution_level level; |
546 | /* super blocks */ |
547 | unsigned int max_sb_w; |
548 | unsigned int max_sb_h; |
549 | unsigned int max_w; |
550 | unsigned int max_h; |
551 | unsigned int w; |
552 | unsigned int h; |
553 | size_t size; |
554 | int ret; |
555 | int i; |
556 | |
557 | w = vsi->frame.uh.frame_width; |
558 | h = vsi->frame.uh.frame_height; |
559 | |
560 | if (w > VCODEC_DEC_4K_CODED_WIDTH || |
561 | h > VCODEC_DEC_4K_CODED_HEIGHT) { |
562 | return -EINVAL; |
563 | } else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) { |
564 | /* 4K */ |
565 | level = VP9_RES_4K; |
566 | max_w = VCODEC_DEC_4K_CODED_WIDTH; |
567 | max_h = VCODEC_DEC_4K_CODED_HEIGHT; |
568 | } else { |
569 | /* FHD */ |
570 | level = VP9_RES_FHD; |
571 | max_w = MTK_VDEC_MAX_W; |
572 | max_h = MTK_VDEC_MAX_H; |
573 | } |
574 | |
575 | if (level == instance->level) |
576 | return 0; |
577 | |
578 | mtk_vdec_debug(ctx, "resolution level changed, from %u to %u, %ux%u" , |
579 | instance->level, level, w, h); |
580 | |
581 | max_sb_w = DIV_ROUND_UP(max_w, 64); |
582 | max_sb_h = DIV_ROUND_UP(max_h, 64); |
583 | ret = -ENOMEM; |
584 | |
585 | /* |
586 | * Lat-flush must wait core idle, otherwise core will |
587 | * use released buffers |
588 | */ |
589 | |
590 | size = (max_sb_w * max_sb_h + 2) * 576; |
591 | for (i = 0; i < 2; i++) { |
592 | if (instance->mv[i].va) |
593 | mtk_vcodec_mem_free(priv: ctx, mem: &instance->mv[i]); |
594 | instance->mv[i].size = size; |
595 | if (mtk_vcodec_mem_alloc(priv: ctx, mem: &instance->mv[i])) |
596 | goto err; |
597 | } |
598 | |
599 | size = (max_sb_w * max_sb_h * 32) + 256; |
600 | for (i = 0; i < 2; i++) { |
601 | if (instance->seg[i].va) |
602 | mtk_vcodec_mem_free(priv: ctx, mem: &instance->seg[i]); |
603 | instance->seg[i].size = size; |
604 | if (mtk_vcodec_mem_alloc(priv: ctx, mem: &instance->seg[i])) |
605 | goto err; |
606 | } |
607 | |
608 | if (!instance->tile.va) { |
609 | instance->tile.size = VP9_TILE_BUF_SIZE; |
610 | if (mtk_vcodec_mem_alloc(priv: ctx, mem: &instance->tile)) |
611 | goto err; |
612 | } |
613 | |
614 | if (!instance->prob.va) { |
615 | instance->prob.size = VP9_PROB_BUF_SIZE; |
616 | if (mtk_vcodec_mem_alloc(priv: ctx, mem: &instance->prob)) |
617 | goto err; |
618 | } |
619 | |
620 | if (!instance->counts.va) { |
621 | instance->counts.size = VP9_COUNTS_BUF_SIZE; |
622 | if (mtk_vcodec_mem_alloc(priv: ctx, mem: &instance->counts)) |
623 | goto err; |
624 | } |
625 | |
626 | instance->level = level; |
627 | return 0; |
628 | |
629 | err: |
630 | instance->level = VP9_RES_NONE; |
631 | return ret; |
632 | } |
633 | |
634 | static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance) |
635 | { |
636 | struct mtk_vcodec_dec_ctx *ctx = instance->ctx; |
637 | int i; |
638 | |
639 | for (i = 0; i < ARRAY_SIZE(instance->mv); i++) { |
640 | if (instance->mv[i].va) |
641 | mtk_vcodec_mem_free(priv: ctx, mem: &instance->mv[i]); |
642 | } |
643 | for (i = 0; i < ARRAY_SIZE(instance->seg); i++) { |
644 | if (instance->seg[i].va) |
645 | mtk_vcodec_mem_free(priv: ctx, mem: &instance->seg[i]); |
646 | } |
647 | if (instance->tile.va) |
648 | mtk_vcodec_mem_free(priv: ctx, mem: &instance->tile); |
649 | if (instance->prob.va) |
650 | mtk_vcodec_mem_free(priv: ctx, mem: &instance->prob); |
651 | if (instance->counts.va) |
652 | mtk_vcodec_mem_free(priv: ctx, mem: &instance->counts); |
653 | |
654 | instance->level = VP9_RES_NONE; |
655 | } |
656 | |
657 | static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi, |
658 | struct vdec_vp9_slice_vsi *remote_vsi, |
659 | int skip) |
660 | { |
661 | struct vdec_vp9_slice_frame *rf; |
662 | struct vdec_vp9_slice_frame *f; |
663 | |
664 | /* |
665 | * compressed header |
666 | * dequant |
667 | * buffer position |
668 | * decode state |
669 | */ |
670 | if (!skip) { |
671 | rf = &remote_vsi->frame; |
672 | f = &vsi->frame; |
673 | memcpy(&f->ch, &rf->ch, sizeof(f->ch)); |
674 | memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant)); |
675 | memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans)); |
676 | } |
677 | |
678 | memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state)); |
679 | } |
680 | |
681 | static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi, |
682 | struct vdec_vp9_slice_vsi *remote_vsi) |
683 | { |
684 | memcpy(remote_vsi, vsi, sizeof(*vsi)); |
685 | } |
686 | |
687 | static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2) |
688 | { |
689 | int sbs = (mi_num + 7) >> 3; |
690 | int offset = ((idx * sbs) >> tile_log2) << 3; |
691 | |
692 | return min(offset, mi_num); |
693 | } |
694 | |
695 | static |
696 | int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance) |
697 | { |
698 | struct vb2_v4l2_buffer *src; |
699 | struct vb2_v4l2_buffer *dst; |
700 | |
701 | src = v4l2_m2m_next_src_buf(m2m_ctx: instance->ctx->m2m_ctx); |
702 | if (!src) |
703 | return -EINVAL; |
704 | |
705 | dst = v4l2_m2m_next_dst_buf(m2m_ctx: instance->ctx->m2m_ctx); |
706 | if (!dst) |
707 | return -EINVAL; |
708 | |
709 | v4l2_m2m_buf_copy_metadata(out_vb: src, cap_vb: dst, copy_frame_flags: true); |
710 | |
711 | return 0; |
712 | } |
713 | |
714 | static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance, |
715 | struct vdec_lat_buf *lat_buf) |
716 | { |
717 | struct vb2_v4l2_buffer *src; |
718 | struct vb2_v4l2_buffer *dst; |
719 | |
720 | src = v4l2_m2m_next_src_buf(m2m_ctx: instance->ctx->m2m_ctx); |
721 | if (!src) |
722 | return -EINVAL; |
723 | |
724 | lat_buf->src_buf_req = src->vb2_buf.req_obj.req; |
725 | |
726 | dst = &lat_buf->ts_info; |
727 | v4l2_m2m_buf_copy_metadata(out_vb: src, cap_vb: dst, copy_frame_flags: true); |
728 | return 0; |
729 | } |
730 | |
731 | static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance, |
732 | struct vdec_vp9_slice_uncompressed_header *uh, |
733 | struct v4l2_ctrl_vp9_frame *hdr) |
734 | { |
735 | int i; |
736 | |
737 | uh->profile = hdr->profile; |
738 | uh->last_frame_type = instance->frame_type; |
739 | uh->frame_type = !HDR_FLAG(KEY_FRAME); |
740 | uh->last_show_frame = instance->show_frame; |
741 | uh->show_frame = HDR_FLAG(SHOW_FRAME); |
742 | uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); |
743 | uh->bit_depth = hdr->bit_depth; |
744 | uh->last_frame_width = instance->width; |
745 | uh->last_frame_height = instance->height; |
746 | uh->frame_width = hdr->frame_width_minus_1 + 1; |
747 | uh->frame_height = hdr->frame_height_minus_1 + 1; |
748 | uh->intra_only = HDR_FLAG(INTRA_ONLY); |
749 | /* map v4l2 enum to values defined in VP9 spec for firmware */ |
750 | switch (hdr->reset_frame_context) { |
751 | case V4L2_VP9_RESET_FRAME_CTX_NONE: |
752 | uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; |
753 | break; |
754 | case V4L2_VP9_RESET_FRAME_CTX_SPEC: |
755 | uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC; |
756 | break; |
757 | case V4L2_VP9_RESET_FRAME_CTX_ALL: |
758 | uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL; |
759 | break; |
760 | default: |
761 | uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; |
762 | break; |
763 | } |
764 | /* |
765 | * ref_frame_sign_bias specifies the intended direction |
766 | * of the motion vector in time for each reference frame. |
767 | * - INTRA_FRAME = 0, |
768 | * - LAST_FRAME = 1, |
769 | * - GOLDEN_FRAME = 2, |
770 | * - ALTREF_FRAME = 3, |
771 | * ref_frame_sign_bias[INTRA_FRAME] is always 0 |
772 | * and VDA only passes another 3 directions |
773 | */ |
774 | uh->ref_frame_sign_bias[0] = 0; |
775 | for (i = 0; i < 3; i++) |
776 | uh->ref_frame_sign_bias[i + 1] = |
777 | !!(hdr->ref_frame_sign_bias & (1 << i)); |
778 | uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV); |
779 | uh->interpolation_filter = hdr->interpolation_filter; |
780 | uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX); |
781 | uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE); |
782 | uh->frame_context_idx = hdr->frame_context_idx; |
783 | |
784 | /* tile info */ |
785 | uh->tile_cols_log2 = hdr->tile_cols_log2; |
786 | uh->tile_rows_log2 = hdr->tile_rows_log2; |
787 | |
788 | uh->uncompressed_header_size = hdr->uncompressed_header_size; |
789 | uh->header_size_in_bytes = hdr->compressed_header_size; |
790 | } |
791 | |
792 | static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance, |
793 | struct vdec_vp9_slice_uncompressed_header *uh, |
794 | struct v4l2_ctrl_vp9_frame *hdr) |
795 | { |
796 | int error_resilient_mode; |
797 | int reset_frame_context; |
798 | int key_frame; |
799 | int intra_only; |
800 | int i; |
801 | |
802 | key_frame = HDR_FLAG(KEY_FRAME); |
803 | intra_only = HDR_FLAG(INTRA_ONLY); |
804 | error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); |
805 | reset_frame_context = uh->reset_frame_context; |
806 | |
807 | /* |
808 | * according to "6.2 Uncompressed header syntax" in |
809 | * "VP9 Bitstream & Decoding Process Specification", |
810 | * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode) |
811 | */ |
812 | if (key_frame || intra_only || error_resilient_mode) { |
813 | /* |
814 | * @reset_frame_context specifies |
815 | * whether the frame context should be |
816 | * reset to default values: |
817 | * 0 or 1 means do not reset any frame context |
818 | * 2 resets just the context specified in the frame header |
819 | * 3 resets all contexts |
820 | */ |
821 | if (key_frame || error_resilient_mode || |
822 | reset_frame_context == 3) { |
823 | /* use default table */ |
824 | for (i = 0; i < 4; i++) |
825 | instance->dirty[i] = 0; |
826 | } else if (reset_frame_context == 2) { |
827 | instance->dirty[uh->frame_context_idx] = 0; |
828 | } |
829 | uh->frame_context_idx = 0; |
830 | } |
831 | } |
832 | |
833 | static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh, |
834 | struct v4l2_vp9_loop_filter *lf) |
835 | { |
836 | int i; |
837 | |
838 | uh->loop_filter_level = lf->level; |
839 | uh->loop_filter_sharpness = lf->sharpness; |
840 | uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED); |
841 | for (i = 0; i < 4; i++) |
842 | uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i]; |
843 | for (i = 0; i < 2; i++) |
844 | uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i]; |
845 | } |
846 | |
847 | static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh, |
848 | struct v4l2_vp9_quantization *quant) |
849 | { |
850 | uh->base_q_idx = quant->base_q_idx; |
851 | uh->delta_q_y_dc = quant->delta_q_y_dc; |
852 | uh->delta_q_uv_dc = quant->delta_q_uv_dc; |
853 | uh->delta_q_uv_ac = quant->delta_q_uv_ac; |
854 | } |
855 | |
856 | static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh, |
857 | struct v4l2_vp9_segmentation *seg) |
858 | { |
859 | int i; |
860 | int j; |
861 | |
862 | uh->segmentation_enabled = SEG_FLAG(ENABLED); |
863 | uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP); |
864 | for (i = 0; i < 7; i++) |
865 | uh->segmentation_tree_probs[i] = seg->tree_probs[i]; |
866 | uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE); |
867 | for (i = 0; i < 3; i++) |
868 | uh->segmentation_pred_prob[i] = seg->pred_probs[i]; |
869 | uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA); |
870 | uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE); |
871 | for (i = 0; i < 8; i++) { |
872 | uh->feature_enabled[i] = seg->feature_enabled[i]; |
873 | for (j = 0; j < 4; j++) |
874 | uh->feature_value[i][j] = seg->feature_data[i][j]; |
875 | } |
876 | } |
877 | |
878 | static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi, |
879 | struct v4l2_ctrl_vp9_frame *hdr) |
880 | { |
881 | unsigned int rows_log2; |
882 | unsigned int cols_log2; |
883 | unsigned int rows; |
884 | unsigned int cols; |
885 | unsigned int mi_rows; |
886 | unsigned int mi_cols; |
887 | struct vdec_vp9_slice_tiles *tiles; |
888 | int offset; |
889 | int start; |
890 | int end; |
891 | int i; |
892 | |
893 | rows_log2 = hdr->tile_rows_log2; |
894 | cols_log2 = hdr->tile_cols_log2; |
895 | rows = 1 << rows_log2; |
896 | cols = 1 << cols_log2; |
897 | tiles = &vsi->frame.tiles; |
898 | tiles->actual_rows = 0; |
899 | |
900 | if (rows > 4 || cols > 64) |
901 | return -EINVAL; |
902 | |
903 | /* setup mi rows/cols information */ |
904 | mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3; |
905 | mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3; |
906 | |
907 | for (i = 0; i < rows; i++) { |
908 | start = vdec_vp9_slice_tile_offset(idx: i, mi_num: mi_rows, tile_log2: rows_log2); |
909 | end = vdec_vp9_slice_tile_offset(idx: i + 1, mi_num: mi_rows, tile_log2: rows_log2); |
910 | offset = end - start; |
911 | tiles->mi_rows[i] = (offset + 7) >> 3; |
912 | if (tiles->mi_rows[i]) |
913 | tiles->actual_rows++; |
914 | } |
915 | |
916 | for (i = 0; i < cols; i++) { |
917 | start = vdec_vp9_slice_tile_offset(idx: i, mi_num: mi_cols, tile_log2: cols_log2); |
918 | end = vdec_vp9_slice_tile_offset(idx: i + 1, mi_num: mi_cols, tile_log2: cols_log2); |
919 | offset = end - start; |
920 | tiles->mi_cols[i] = (offset + 7) >> 3; |
921 | } |
922 | |
923 | return 0; |
924 | } |
925 | |
926 | static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi) |
927 | { |
928 | memset(&vsi->state, 0, sizeof(vsi->state)); |
929 | } |
930 | |
931 | static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc, |
932 | struct v4l2_ctrl_vp9_frame *hdr) |
933 | { |
934 | pfc->ref_idx[0] = hdr->last_frame_ts; |
935 | pfc->ref_idx[1] = hdr->golden_frame_ts; |
936 | pfc->ref_idx[2] = hdr->alt_frame_ts; |
937 | } |
938 | |
939 | static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance, |
940 | struct vdec_vp9_slice_pfc *pfc) |
941 | { |
942 | struct v4l2_ctrl_vp9_frame *hdr; |
943 | struct vdec_vp9_slice_uncompressed_header *uh; |
944 | struct v4l2_ctrl *hdr_ctrl; |
945 | struct vdec_vp9_slice_vsi *vsi; |
946 | int ret; |
947 | |
948 | /* frame header */ |
949 | hdr_ctrl = v4l2_ctrl_find(hdl: &instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME); |
950 | if (!hdr_ctrl || !hdr_ctrl->p_cur.p) |
951 | return -EINVAL; |
952 | |
953 | hdr = hdr_ctrl->p_cur.p; |
954 | vsi = &pfc->vsi; |
955 | uh = &vsi->frame.uh; |
956 | |
957 | /* setup vsi information */ |
958 | vdec_vp9_slice_setup_hdr(instance, uh, hdr); |
959 | vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr); |
960 | vdec_vp9_slice_setup_loop_filter(uh, lf: &hdr->lf); |
961 | vdec_vp9_slice_setup_quantization(uh, quant: &hdr->quant); |
962 | vdec_vp9_slice_setup_segmentation(uh, seg: &hdr->seg); |
963 | ret = vdec_vp9_slice_setup_tile(vsi, hdr); |
964 | if (ret) |
965 | return ret; |
966 | vdec_vp9_slice_setup_state(vsi); |
967 | |
968 | /* core stage needs buffer index to get ref y/c ... */ |
969 | vdec_vp9_slice_setup_ref_idx(pfc, hdr); |
970 | |
971 | pfc->seq = instance->seq; |
972 | instance->seq++; |
973 | |
974 | return 0; |
975 | } |
976 | |
977 | static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance, |
978 | struct vdec_vp9_slice_vsi *vsi, |
979 | struct mtk_vcodec_mem *bs, |
980 | struct vdec_lat_buf *lat_buf) |
981 | { |
982 | int i; |
983 | |
984 | vsi->bs.buf.dma_addr = bs->dma_addr; |
985 | vsi->bs.buf.size = bs->size; |
986 | vsi->bs.frame.dma_addr = bs->dma_addr; |
987 | vsi->bs.frame.size = bs->size; |
988 | |
989 | for (i = 0; i < 2; i++) { |
990 | vsi->mv[i].dma_addr = instance->mv[i].dma_addr; |
991 | vsi->mv[i].size = instance->mv[i].size; |
992 | } |
993 | for (i = 0; i < 2; i++) { |
994 | vsi->seg[i].dma_addr = instance->seg[i].dma_addr; |
995 | vsi->seg[i].size = instance->seg[i].size; |
996 | } |
997 | vsi->tile.dma_addr = instance->tile.dma_addr; |
998 | vsi->tile.size = instance->tile.size; |
999 | vsi->prob.dma_addr = instance->prob.dma_addr; |
1000 | vsi->prob.size = instance->prob.size; |
1001 | vsi->counts.dma_addr = instance->counts.dma_addr; |
1002 | vsi->counts.size = instance->counts.size; |
1003 | |
1004 | vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; |
1005 | vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; |
1006 | vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr; |
1007 | /* used to store trans end */ |
1008 | vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr; |
1009 | vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; |
1010 | vsi->err_map.size = lat_buf->wdma_err_addr.size; |
1011 | |
1012 | vsi->row_info.buf = 0; |
1013 | vsi->row_info.size = 0; |
1014 | |
1015 | return 0; |
1016 | } |
1017 | |
1018 | static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance, |
1019 | struct vdec_vp9_slice_vsi *vsi) |
1020 | { |
1021 | struct vdec_vp9_slice_frame_ctx *frame_ctx; |
1022 | struct vdec_vp9_slice_uncompressed_header *uh; |
1023 | |
1024 | uh = &vsi->frame.uh; |
1025 | |
1026 | mtk_vdec_debug(instance->ctx, "ctx dirty %u idx %d\n" , |
1027 | instance->dirty[uh->frame_context_idx], |
1028 | uh->frame_context_idx); |
1029 | |
1030 | if (instance->dirty[uh->frame_context_idx]) |
1031 | frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; |
1032 | else |
1033 | frame_ctx = vdec_vp9_slice_default_frame_ctx; |
1034 | memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx)); |
1035 | |
1036 | return 0; |
1037 | } |
1038 | |
1039 | static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance, |
1040 | struct vdec_vp9_slice_vsi *vsi, |
1041 | struct mtk_vcodec_mem *buf) |
1042 | { |
1043 | struct vdec_vp9_slice_uncompressed_header *uh; |
1044 | |
1045 | /* reset segment buffer */ |
1046 | uh = &vsi->frame.uh; |
1047 | if (uh->frame_type == 0 || |
1048 | uh->intra_only || |
1049 | uh->error_resilient_mode || |
1050 | uh->frame_width != instance->width || |
1051 | uh->frame_height != instance->height) { |
1052 | mtk_vdec_debug(instance->ctx, "reset seg\n" ); |
1053 | memset(buf->va, 0, buf->size); |
1054 | } |
1055 | } |
1056 | |
1057 | /* |
1058 | * parse tiles according to `6.4 Decode tiles syntax` |
1059 | * in "vp9-bitstream-specification" |
1060 | * |
1061 | * frame contains uncompress header, compressed header and several tiles. |
1062 | * this function parses tiles' position and size, stores them to tile buffer |
1063 | * for decoding. |
1064 | */ |
1065 | static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance, |
1066 | struct vdec_vp9_slice_vsi *vsi, |
1067 | struct mtk_vcodec_mem *bs) |
1068 | { |
1069 | struct vdec_vp9_slice_uncompressed_header *uh; |
1070 | unsigned int rows_log2; |
1071 | unsigned int cols_log2; |
1072 | unsigned int rows; |
1073 | unsigned int cols; |
1074 | unsigned int mi_row; |
1075 | unsigned int mi_col; |
1076 | unsigned int offset; |
1077 | dma_addr_t pa; |
1078 | unsigned int size; |
1079 | struct vdec_vp9_slice_tiles *tiles; |
1080 | unsigned char *pos; |
1081 | unsigned char *end; |
1082 | unsigned char *va; |
1083 | unsigned int *tb; |
1084 | int i; |
1085 | int j; |
1086 | |
1087 | uh = &vsi->frame.uh; |
1088 | rows_log2 = uh->tile_rows_log2; |
1089 | cols_log2 = uh->tile_cols_log2; |
1090 | rows = 1 << rows_log2; |
1091 | cols = 1 << cols_log2; |
1092 | |
1093 | if (rows > 4 || cols > 64) { |
1094 | mtk_vdec_err(instance->ctx, "tile_rows %u tile_cols %u\n" , rows, cols); |
1095 | return -EINVAL; |
1096 | } |
1097 | |
1098 | offset = uh->uncompressed_header_size + |
1099 | uh->header_size_in_bytes; |
1100 | if (bs->size <= offset) { |
1101 | mtk_vdec_err(instance->ctx, "bs size %zu tile offset %u\n" , bs->size, offset); |
1102 | return -EINVAL; |
1103 | } |
1104 | |
1105 | tiles = &vsi->frame.tiles; |
1106 | /* setup tile buffer */ |
1107 | |
1108 | va = (unsigned char *)bs->va; |
1109 | pos = va + offset; |
1110 | end = va + bs->size; |
1111 | /* truncated */ |
1112 | pa = bs->dma_addr + offset; |
1113 | tb = instance->tile.va; |
1114 | for (i = 0; i < rows; i++) { |
1115 | for (j = 0; j < cols; j++) { |
1116 | if (i == rows - 1 && |
1117 | j == cols - 1) { |
1118 | size = (unsigned int)(end - pos); |
1119 | } else { |
1120 | if (end - pos < 4) |
1121 | return -EINVAL; |
1122 | |
1123 | size = (pos[0] << 24) | (pos[1] << 16) | |
1124 | (pos[2] << 8) | pos[3]; |
1125 | pos += 4; |
1126 | pa += 4; |
1127 | offset += 4; |
1128 | if (end - pos < size) |
1129 | return -EINVAL; |
1130 | } |
1131 | tiles->size[i][j] = size; |
1132 | if (tiles->mi_rows[i]) { |
1133 | *tb++ = (size << 3) + ((offset << 3) & 0x7f); |
1134 | *tb++ = pa & ~0xf; |
1135 | *tb++ = (pa << 3) & 0x7f; |
1136 | mi_row = (tiles->mi_rows[i] - 1) & 0x1ff; |
1137 | mi_col = (tiles->mi_cols[j] - 1) & 0x3f; |
1138 | *tb++ = (mi_row << 6) + mi_col; |
1139 | } |
1140 | pos += size; |
1141 | pa += size; |
1142 | offset += size; |
1143 | } |
1144 | } |
1145 | |
1146 | return 0; |
1147 | } |
1148 | |
1149 | static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance, |
1150 | struct mtk_vcodec_mem *bs, |
1151 | struct vdec_lat_buf *lat_buf, |
1152 | struct vdec_vp9_slice_pfc *pfc) |
1153 | { |
1154 | struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; |
1155 | int ret; |
1156 | |
1157 | ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf); |
1158 | if (ret) |
1159 | goto err; |
1160 | |
1161 | ret = vdec_vp9_slice_setup_pfc(instance, pfc); |
1162 | if (ret) |
1163 | goto err; |
1164 | |
1165 | ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); |
1166 | if (ret) |
1167 | goto err; |
1168 | |
1169 | ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf); |
1170 | if (ret) |
1171 | goto err; |
1172 | |
1173 | vdec_vp9_slice_setup_seg_buffer(instance, vsi, buf: &instance->seg[0]); |
1174 | |
1175 | /* setup prob/tile buffers for LAT */ |
1176 | |
1177 | ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); |
1178 | if (ret) |
1179 | goto err; |
1180 | |
1181 | ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); |
1182 | if (ret) |
1183 | goto err; |
1184 | |
1185 | return 0; |
1186 | |
1187 | err: |
1188 | return ret; |
1189 | } |
1190 | |
1191 | static |
1192 | void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k, |
1193 | struct vdec_vp9_slice_frame_counts *counts, |
1194 | struct v4l2_vp9_frame_symbol_counts *counts_helper) |
1195 | { |
1196 | u32 l = 0, m; |
1197 | |
1198 | /* |
1199 | * helper eo -> mtk eo |
1200 | * helpre e1 -> mtk c3 |
1201 | * helper c0 -> c0 |
1202 | * helper c1 -> c1 |
1203 | * helper c2 -> c2 |
1204 | */ |
1205 | for (m = 0; m < 3; m++) { |
1206 | counts_helper->coeff[i][j][k][l][m] = |
1207 | (u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m]; |
1208 | counts_helper->eob[i][j][k][l][m][0] = |
1209 | &counts->eob_branch[i][j][k].band_0[m]; |
1210 | counts_helper->eob[i][j][k][l][m][1] = |
1211 | &counts->coef_probs[i][j][k].band_0[m][3]; |
1212 | } |
1213 | |
1214 | for (l = 1; l < 6; l++) { |
1215 | for (m = 0; m < 6; m++) { |
1216 | counts_helper->coeff[i][j][k][l][m] = |
1217 | (u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m]; |
1218 | counts_helper->eob[i][j][k][l][m][0] = |
1219 | &counts->eob_branch[i][j][k].band_1_5[l - 1][m]; |
1220 | counts_helper->eob[i][j][k][l][m][1] = |
1221 | &counts->coef_probs[i][j][k].band_1_5[l - 1][m][3]; |
1222 | } |
1223 | } |
1224 | } |
1225 | |
1226 | static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map, |
1227 | struct vdec_vp9_slice_frame_counts *counts, |
1228 | struct v4l2_vp9_frame_symbol_counts *counts_helper) |
1229 | { |
1230 | int i, j, k; |
1231 | |
1232 | counts_helper->partition = &counts->partition; |
1233 | counts_helper->intra_inter = &counts->intra_inter; |
1234 | counts_helper->tx32p = &counts->tx_p32x32; |
1235 | counts_helper->tx16p = &counts->tx_p16x16; |
1236 | counts_helper->tx8p = &counts->tx_p8x8; |
1237 | counts_helper->uv_mode = &counts->uv_mode; |
1238 | |
1239 | counts_helper->comp = &counts->comp_inter; |
1240 | counts_helper->comp_ref = &counts->comp_ref; |
1241 | counts_helper->single_ref = &counts->single_ref; |
1242 | counts_helper->mv_mode = &counts->inter_mode; |
1243 | counts_helper->mv_joint = &counts->joint; |
1244 | |
1245 | for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++) |
1246 | memcpy(counts_map->skip[i], counts->skip[i], |
1247 | sizeof(counts_map->skip[0])); |
1248 | counts_helper->skip = &counts_map->skip; |
1249 | |
1250 | for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++) |
1251 | memcpy(counts_map->y_mode[i], counts->y_mode[i], |
1252 | sizeof(counts_map->y_mode[0])); |
1253 | counts_helper->y_mode = &counts_map->y_mode; |
1254 | |
1255 | for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++) |
1256 | memcpy(counts_map->filter[i], counts->switchable_interp[i], |
1257 | sizeof(counts_map->filter[0])); |
1258 | counts_helper->filter = &counts_map->filter; |
1259 | |
1260 | for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++) |
1261 | memcpy(counts_map->sign[i], counts->mvcomp[i].sign, |
1262 | sizeof(counts_map->sign[0])); |
1263 | counts_helper->sign = &counts_map->sign; |
1264 | |
1265 | for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++) |
1266 | memcpy(counts_map->classes[i], counts->mvcomp[i].classes, |
1267 | sizeof(counts_map->classes[0])); |
1268 | counts_helper->classes = &counts_map->classes; |
1269 | |
1270 | for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++) |
1271 | memcpy(counts_map->class0[i], counts->mvcomp[i].class0, |
1272 | sizeof(counts_map->class0[0])); |
1273 | counts_helper->class0 = &counts_map->class0; |
1274 | |
1275 | for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++) |
1276 | for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++) |
1277 | memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j], |
1278 | sizeof(counts_map->bits[0][0])); |
1279 | counts_helper->bits = &counts_map->bits; |
1280 | |
1281 | for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++) |
1282 | for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++) |
1283 | memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j], |
1284 | sizeof(counts_map->class0_fp[0][0])); |
1285 | counts_helper->class0_fp = &counts_map->class0_fp; |
1286 | |
1287 | for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++) |
1288 | memcpy(counts_map->fp[i], counts->mvcomp[i].fp, |
1289 | sizeof(counts_map->fp[0])); |
1290 | counts_helper->fp = &counts_map->fp; |
1291 | |
1292 | for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++) |
1293 | memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp, |
1294 | sizeof(counts_map->class0_hp[0])); |
1295 | counts_helper->class0_hp = &counts_map->class0_hp; |
1296 | |
1297 | for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++) |
1298 | memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0])); |
1299 | |
1300 | counts_helper->hp = &counts_map->hp; |
1301 | |
1302 | for (i = 0; i < 4; i++) |
1303 | for (j = 0; j < 2; j++) |
1304 | for (k = 0; k < 2; k++) |
1305 | vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper); |
1306 | } |
1307 | |
1308 | static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k, |
1309 | struct vdec_vp9_slice_frame_ctx *frame_ctx, |
1310 | struct v4l2_vp9_frame_context *frame_ctx_helper) |
1311 | { |
1312 | u32 l, m; |
1313 | |
1314 | for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { |
1315 | for (m = 0; m < VP9_BAND_6(l); m++) { |
1316 | memcpy(frame_ctx_helper->coef[i][j][k][l][m], |
1317 | frame_ctx->coef_probs[i][j][k][l].probs[m], |
1318 | sizeof(frame_ctx_helper->coef[i][j][k][l][0])); |
1319 | } |
1320 | } |
1321 | } |
1322 | |
1323 | static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k, |
1324 | struct vdec_vp9_slice_frame_ctx *frame_ctx, |
1325 | struct v4l2_vp9_frame_context *frame_ctx_helper) |
1326 | { |
1327 | u32 l, m; |
1328 | |
1329 | for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { |
1330 | for (m = 0; m < VP9_BAND_6(l); m++) { |
1331 | memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m], |
1332 | frame_ctx_helper->coef[i][j][k][l][m], |
1333 | sizeof(frame_ctx_helper->coef[i][j][k][l][0])); |
1334 | } |
1335 | } |
1336 | } |
1337 | |
1338 | static |
1339 | void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra, |
1340 | struct vdec_vp9_slice_frame_ctx *pre_frame_ctx, |
1341 | struct vdec_vp9_slice_frame_ctx *frame_ctx, |
1342 | struct v4l2_vp9_frame_context *frame_ctx_helper) |
1343 | { |
1344 | struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; |
1345 | u32 i, j, k; |
1346 | |
1347 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) |
1348 | for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) |
1349 | for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) |
1350 | vdec_vp9_slice_map_to_coef(i, j, k, frame_ctx: pre_frame_ctx, |
1351 | frame_ctx_helper); |
1352 | |
1353 | /* |
1354 | * use previous prob when frame is not intra or |
1355 | * we should use the prob updated by the compressed header parse |
1356 | */ |
1357 | if (!frame_is_intra) |
1358 | frame_ctx = pre_frame_ctx; |
1359 | |
1360 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) |
1361 | memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i], |
1362 | sizeof(frame_ctx_helper->tx8[0])); |
1363 | |
1364 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) |
1365 | memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i], |
1366 | sizeof(frame_ctx_helper->tx16[0])); |
1367 | |
1368 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) |
1369 | memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i], |
1370 | sizeof(frame_ctx_helper->tx32[0])); |
1371 | |
1372 | memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip)); |
1373 | |
1374 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) |
1375 | memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i], |
1376 | sizeof(frame_ctx_helper->inter_mode[0])); |
1377 | |
1378 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) |
1379 | memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i], |
1380 | sizeof(frame_ctx_helper->interp_filter[0])); |
1381 | |
1382 | memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob, |
1383 | sizeof(frame_ctx_helper->is_inter)); |
1384 | |
1385 | memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob, |
1386 | sizeof(frame_ctx_helper->comp_mode)); |
1387 | |
1388 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) |
1389 | memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i], |
1390 | sizeof(frame_ctx_helper->single_ref[0])); |
1391 | |
1392 | memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob, |
1393 | sizeof(frame_ctx_helper->comp_ref)); |
1394 | |
1395 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) |
1396 | memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i], |
1397 | sizeof(frame_ctx_helper->y_mode[0])); |
1398 | |
1399 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) |
1400 | memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i], |
1401 | sizeof(frame_ctx_helper->uv_mode[0])); |
1402 | |
1403 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) |
1404 | memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i], |
1405 | sizeof(frame_ctx_helper->partition[0])); |
1406 | |
1407 | memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint)); |
1408 | |
1409 | for (i = 0; i < ARRAY_SIZE(mv->sign); i++) |
1410 | mv->sign[i] = frame_ctx->sign_classes[i].sign; |
1411 | |
1412 | for (i = 0; i < ARRAY_SIZE(mv->classes); i++) |
1413 | memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes, |
1414 | sizeof(mv->classes[i])); |
1415 | |
1416 | for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) |
1417 | mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0]; |
1418 | |
1419 | for (i = 0; i < ARRAY_SIZE(mv->bits); i++) |
1420 | memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0])); |
1421 | |
1422 | for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) |
1423 | for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) |
1424 | memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j], |
1425 | sizeof(mv->class0_fr[0][0])); |
1426 | |
1427 | for (i = 0; i < ARRAY_SIZE(mv->fr); i++) |
1428 | memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0])); |
1429 | |
1430 | for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) |
1431 | mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp; |
1432 | |
1433 | for (i = 0; i < ARRAY_SIZE(mv->hp); i++) |
1434 | mv->hp[i] = frame_ctx->class0_fp_hp[i].hp; |
1435 | } |
1436 | |
1437 | static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper, |
1438 | struct vdec_vp9_slice_frame_ctx *frame_ctx) |
1439 | { |
1440 | struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; |
1441 | u32 i, j, k; |
1442 | |
1443 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) |
1444 | memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i], |
1445 | sizeof(frame_ctx_helper->tx8[0])); |
1446 | |
1447 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) |
1448 | memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i], |
1449 | sizeof(frame_ctx_helper->tx16[0])); |
1450 | |
1451 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) |
1452 | memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i], |
1453 | sizeof(frame_ctx_helper->tx32[0])); |
1454 | |
1455 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) |
1456 | for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) |
1457 | for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) |
1458 | vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx, |
1459 | frame_ctx_helper); |
1460 | |
1461 | memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip)); |
1462 | |
1463 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) |
1464 | memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i], |
1465 | sizeof(frame_ctx_helper->inter_mode[0])); |
1466 | |
1467 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) |
1468 | memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i], |
1469 | sizeof(frame_ctx_helper->interp_filter[0])); |
1470 | |
1471 | memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter, |
1472 | sizeof(frame_ctx_helper->is_inter)); |
1473 | |
1474 | memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode, |
1475 | sizeof(frame_ctx_helper->comp_mode)); |
1476 | |
1477 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) |
1478 | memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i], |
1479 | sizeof(frame_ctx_helper->single_ref[0])); |
1480 | |
1481 | memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref, |
1482 | sizeof(frame_ctx_helper->comp_ref)); |
1483 | |
1484 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) |
1485 | memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i], |
1486 | sizeof(frame_ctx_helper->y_mode[0])); |
1487 | |
1488 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) |
1489 | memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i], |
1490 | sizeof(frame_ctx_helper->uv_mode[0])); |
1491 | |
1492 | for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) |
1493 | memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i], |
1494 | sizeof(frame_ctx_helper->partition[0])); |
1495 | |
1496 | memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint)); |
1497 | |
1498 | for (i = 0; i < ARRAY_SIZE(mv->sign); i++) |
1499 | frame_ctx->sign_classes[i].sign = mv->sign[i]; |
1500 | |
1501 | for (i = 0; i < ARRAY_SIZE(mv->classes); i++) |
1502 | memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i], |
1503 | sizeof(mv->classes[i])); |
1504 | |
1505 | for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) |
1506 | frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i]; |
1507 | |
1508 | for (i = 0; i < ARRAY_SIZE(mv->bits); i++) |
1509 | memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0])); |
1510 | |
1511 | for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) |
1512 | for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) |
1513 | memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j], |
1514 | sizeof(mv->class0_fr[0][0])); |
1515 | |
1516 | for (i = 0; i < ARRAY_SIZE(mv->fr); i++) |
1517 | memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0])); |
1518 | |
1519 | for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) |
1520 | frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i]; |
1521 | |
1522 | for (i = 0; i < ARRAY_SIZE(mv->hp); i++) |
1523 | frame_ctx->class0_fp_hp[i].hp = mv->hp[i]; |
1524 | } |
1525 | |
1526 | static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance, |
1527 | struct vdec_vp9_slice_vsi *vsi) |
1528 | { |
1529 | struct vdec_vp9_slice_frame_ctx *pre_frame_ctx; |
1530 | struct v4l2_vp9_frame_context *pre_frame_ctx_helper; |
1531 | struct vdec_vp9_slice_frame_ctx *frame_ctx; |
1532 | struct vdec_vp9_slice_frame_counts *counts; |
1533 | struct v4l2_vp9_frame_symbol_counts *counts_helper; |
1534 | struct vdec_vp9_slice_uncompressed_header *uh; |
1535 | bool frame_is_intra; |
1536 | bool use_128; |
1537 | |
1538 | uh = &vsi->frame.uh; |
1539 | pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; |
1540 | pre_frame_ctx_helper = &instance->frame_ctx_helper; |
1541 | frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va; |
1542 | counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va; |
1543 | counts_helper = &instance->counts_helper; |
1544 | |
1545 | if (!uh->refresh_frame_context) |
1546 | return 0; |
1547 | |
1548 | if (!uh->frame_parallel_decoding_mode) { |
1549 | vdec_vp9_slice_counts_map_helper(counts_map: &instance->counts_map, counts, counts_helper); |
1550 | |
1551 | frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only; |
1552 | /* check default prob */ |
1553 | if (!instance->dirty[uh->frame_context_idx]) |
1554 | vdec_vp9_slice_framectx_map_helper(frame_is_intra, |
1555 | pre_frame_ctx: vdec_vp9_slice_default_frame_ctx, |
1556 | frame_ctx, |
1557 | frame_ctx_helper: pre_frame_ctx_helper); |
1558 | else |
1559 | vdec_vp9_slice_framectx_map_helper(frame_is_intra, |
1560 | pre_frame_ctx, |
1561 | frame_ctx, |
1562 | frame_ctx_helper: pre_frame_ctx_helper); |
1563 | |
1564 | use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type; |
1565 | v4l2_vp9_adapt_coef_probs(probs: pre_frame_ctx_helper, |
1566 | counts: counts_helper, |
1567 | use_128, |
1568 | frame_is_intra); |
1569 | if (!frame_is_intra) |
1570 | v4l2_vp9_adapt_noncoef_probs(probs: pre_frame_ctx_helper, |
1571 | counts: counts_helper, |
1572 | V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE, |
1573 | interpolation_filter: vsi->frame.uh.interpolation_filter, |
1574 | tx_mode: vsi->frame.ch.tx_mode, |
1575 | flags: vsi->frame.uh.allow_high_precision_mv ? |
1576 | V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0); |
1577 | vdec_vp9_slice_helper_map_framectx(frame_ctx_helper: pre_frame_ctx_helper, frame_ctx: pre_frame_ctx); |
1578 | } else { |
1579 | memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx)); |
1580 | } |
1581 | |
1582 | instance->dirty[uh->frame_context_idx] = 1; |
1583 | |
1584 | return 0; |
1585 | } |
1586 | |
1587 | static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance, |
1588 | struct vdec_vp9_slice_pfc *pfc) |
1589 | { |
1590 | struct vdec_vp9_slice_vsi *vsi; |
1591 | |
1592 | vsi = &pfc->vsi; |
1593 | memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); |
1594 | |
1595 | mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n" , |
1596 | pfc->seq, vsi->state.crc[0], vsi->state.crc[1], |
1597 | vsi->state.crc[2], vsi->state.crc[3]); |
1598 | mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n" , |
1599 | pfc->seq, vsi->state.crc[4], vsi->state.crc[5], |
1600 | vsi->state.crc[6], vsi->state.crc[7]); |
1601 | |
1602 | vdec_vp9_slice_update_prob(instance, vsi); |
1603 | |
1604 | instance->width = vsi->frame.uh.frame_width; |
1605 | instance->height = vsi->frame.uh.frame_height; |
1606 | instance->frame_type = vsi->frame.uh.frame_type; |
1607 | instance->show_frame = vsi->frame.uh.show_frame; |
1608 | |
1609 | return 0; |
1610 | } |
1611 | |
1612 | static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance, |
1613 | struct vdec_lat_buf *lat_buf, |
1614 | struct vdec_vp9_slice_pfc *pfc) |
1615 | { |
1616 | struct vdec_vp9_slice_vsi *vsi; |
1617 | |
1618 | vsi = &pfc->vsi; |
1619 | memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); |
1620 | |
1621 | mtk_vdec_debug(instance->ctx, "Frame %u LAT CRC 0x%08x %lx %lx\n" , |
1622 | pfc->seq, vsi->state.crc[0], |
1623 | (unsigned long)vsi->trans.dma_addr, |
1624 | (unsigned long)vsi->trans.dma_addr_end); |
1625 | |
1626 | /* buffer full, need to re-decode */ |
1627 | if (vsi->state.full) { |
1628 | /* buffer not enough */ |
1629 | if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == |
1630 | vsi->ube.size) |
1631 | return -ENOMEM; |
1632 | return -EAGAIN; |
1633 | } |
1634 | |
1635 | vdec_vp9_slice_update_prob(instance, vsi); |
1636 | |
1637 | instance->width = vsi->frame.uh.frame_width; |
1638 | instance->height = vsi->frame.uh.frame_height; |
1639 | instance->frame_type = vsi->frame.uh.frame_type; |
1640 | instance->show_frame = vsi->frame.uh.show_frame; |
1641 | |
1642 | return 0; |
1643 | } |
1644 | |
1645 | static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance, |
1646 | struct vdec_lat_buf *lat_buf) |
1647 | { |
1648 | struct vb2_v4l2_buffer *dst; |
1649 | |
1650 | dst = v4l2_m2m_next_dst_buf(m2m_ctx: instance->ctx->m2m_ctx); |
1651 | if (!dst) |
1652 | return -EINVAL; |
1653 | |
1654 | v4l2_m2m_buf_copy_metadata(out_vb: &lat_buf->ts_info, cap_vb: dst, copy_frame_flags: true); |
1655 | return 0; |
1656 | } |
1657 | |
1658 | static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance, |
1659 | struct vdec_vp9_slice_pfc *pfc, |
1660 | struct vdec_vp9_slice_vsi *vsi, |
1661 | struct vdec_fb *fb, |
1662 | struct vdec_lat_buf *lat_buf) |
1663 | { |
1664 | struct vb2_buffer *vb; |
1665 | struct vb2_queue *vq; |
1666 | struct vdec_vp9_slice_reference *ref; |
1667 | int plane; |
1668 | int size; |
1669 | int w; |
1670 | int h; |
1671 | int i; |
1672 | |
1673 | plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; |
1674 | w = vsi->frame.uh.frame_width; |
1675 | h = vsi->frame.uh.frame_height; |
1676 | size = ALIGN(w, 64) * ALIGN(h, 64); |
1677 | |
1678 | /* frame buffer */ |
1679 | vsi->fb.y.dma_addr = fb->base_y.dma_addr; |
1680 | if (plane == 1) |
1681 | vsi->fb.c.dma_addr = fb->base_y.dma_addr + size; |
1682 | else |
1683 | vsi->fb.c.dma_addr = fb->base_c.dma_addr; |
1684 | |
1685 | /* reference buffers */ |
1686 | vq = v4l2_m2m_get_vq(m2m_ctx: instance->ctx->m2m_ctx, |
1687 | type: V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); |
1688 | if (!vq) |
1689 | return -EINVAL; |
1690 | |
1691 | /* get current output buffer */ |
1692 | vb = &v4l2_m2m_next_dst_buf(m2m_ctx: instance->ctx->m2m_ctx)->vb2_buf; |
1693 | if (!vb) |
1694 | return -EINVAL; |
1695 | |
1696 | /* update internal buffer's width/height */ |
1697 | instance->dpb[vb->index].width = w; |
1698 | instance->dpb[vb->index].height = h; |
1699 | |
1700 | /* |
1701 | * get buffer's width/height from instance |
1702 | * get buffer address from vb2buf |
1703 | */ |
1704 | for (i = 0; i < 3; i++) { |
1705 | ref = &vsi->frame.ref[i]; |
1706 | vb = vb2_find_buffer(q: vq, timestamp: pfc->ref_idx[i]); |
1707 | if (!vb) { |
1708 | ref->frame_width = w; |
1709 | ref->frame_height = h; |
1710 | memset(&vsi->ref[i], 0, sizeof(vsi->ref[i])); |
1711 | } else { |
1712 | int idx = vb->index; |
1713 | |
1714 | ref->frame_width = instance->dpb[idx].width; |
1715 | ref->frame_height = instance->dpb[idx].height; |
1716 | vsi->ref[i].y.dma_addr = |
1717 | vb2_dma_contig_plane_dma_addr(vb, plane_no: 0); |
1718 | if (plane == 1) |
1719 | vsi->ref[i].c.dma_addr = |
1720 | vsi->ref[i].y.dma_addr + size; |
1721 | else |
1722 | vsi->ref[i].c.dma_addr = |
1723 | vb2_dma_contig_plane_dma_addr(vb, plane_no: 1); |
1724 | } |
1725 | } |
1726 | |
1727 | return 0; |
1728 | } |
1729 | |
1730 | static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance, |
1731 | struct vdec_vp9_slice_pfc *pfc, |
1732 | struct vdec_vp9_slice_vsi *vsi, |
1733 | struct mtk_vcodec_mem *bs, |
1734 | struct vdec_fb *fb) |
1735 | { |
1736 | int i; |
1737 | |
1738 | vsi->bs.buf.dma_addr = bs->dma_addr; |
1739 | vsi->bs.buf.size = bs->size; |
1740 | vsi->bs.frame.dma_addr = bs->dma_addr; |
1741 | vsi->bs.frame.size = bs->size; |
1742 | |
1743 | for (i = 0; i < 2; i++) { |
1744 | vsi->mv[i].dma_addr = instance->mv[i].dma_addr; |
1745 | vsi->mv[i].size = instance->mv[i].size; |
1746 | } |
1747 | for (i = 0; i < 2; i++) { |
1748 | vsi->seg[i].dma_addr = instance->seg[i].dma_addr; |
1749 | vsi->seg[i].size = instance->seg[i].size; |
1750 | } |
1751 | vsi->tile.dma_addr = instance->tile.dma_addr; |
1752 | vsi->tile.size = instance->tile.size; |
1753 | vsi->prob.dma_addr = instance->prob.dma_addr; |
1754 | vsi->prob.size = instance->prob.size; |
1755 | vsi->counts.dma_addr = instance->counts.dma_addr; |
1756 | vsi->counts.size = instance->counts.size; |
1757 | |
1758 | vsi->row_info.buf = 0; |
1759 | vsi->row_info.size = 0; |
1760 | |
1761 | vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL); |
1762 | } |
1763 | |
1764 | static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance, |
1765 | struct vdec_fb *fb, |
1766 | struct vdec_lat_buf *lat_buf, |
1767 | struct vdec_vp9_slice_pfc *pfc) |
1768 | { |
1769 | struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; |
1770 | int ret; |
1771 | |
1772 | vdec_vp9_slice_setup_state(vsi); |
1773 | |
1774 | ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf); |
1775 | if (ret) |
1776 | goto err; |
1777 | |
1778 | ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf); |
1779 | if (ret) |
1780 | goto err; |
1781 | |
1782 | vdec_vp9_slice_setup_seg_buffer(instance, vsi, buf: &instance->seg[1]); |
1783 | |
1784 | return 0; |
1785 | |
1786 | err: |
1787 | return ret; |
1788 | } |
1789 | |
1790 | static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance, |
1791 | struct mtk_vcodec_mem *bs, |
1792 | struct vdec_fb *fb, |
1793 | struct vdec_vp9_slice_pfc *pfc) |
1794 | { |
1795 | struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; |
1796 | int ret; |
1797 | |
1798 | ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance); |
1799 | if (ret) |
1800 | goto err; |
1801 | |
1802 | ret = vdec_vp9_slice_setup_pfc(instance, pfc); |
1803 | if (ret) |
1804 | goto err; |
1805 | |
1806 | ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); |
1807 | if (ret) |
1808 | goto err; |
1809 | |
1810 | vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb); |
1811 | vdec_vp9_slice_setup_seg_buffer(instance, vsi, buf: &instance->seg[0]); |
1812 | |
1813 | ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); |
1814 | if (ret) |
1815 | goto err; |
1816 | |
1817 | ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); |
1818 | if (ret) |
1819 | goto err; |
1820 | |
1821 | return 0; |
1822 | |
1823 | err: |
1824 | return ret; |
1825 | } |
1826 | |
1827 | static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance, |
1828 | struct vdec_lat_buf *lat_buf, |
1829 | struct vdec_vp9_slice_pfc *pfc) |
1830 | { |
1831 | struct vdec_vp9_slice_vsi *vsi; |
1832 | |
1833 | vsi = &pfc->vsi; |
1834 | memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state)); |
1835 | |
1836 | mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n" , |
1837 | pfc->seq, vsi->state.crc[0], vsi->state.crc[1], |
1838 | vsi->state.crc[2], vsi->state.crc[3]); |
1839 | mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n" , |
1840 | pfc->seq, vsi->state.crc[4], vsi->state.crc[5], |
1841 | vsi->state.crc[6], vsi->state.crc[7]); |
1842 | |
1843 | return 0; |
1844 | } |
1845 | |
1846 | static int vdec_vp9_slice_init(struct mtk_vcodec_dec_ctx *ctx) |
1847 | { |
1848 | struct vdec_vp9_slice_instance *instance; |
1849 | struct vdec_vp9_slice_init_vsi *vsi; |
1850 | int ret; |
1851 | |
1852 | instance = kzalloc(size: sizeof(*instance), GFP_KERNEL); |
1853 | if (!instance) |
1854 | return -ENOMEM; |
1855 | |
1856 | instance->ctx = ctx; |
1857 | instance->vpu.id = SCP_IPI_VDEC_LAT; |
1858 | instance->vpu.core_id = SCP_IPI_VDEC_CORE; |
1859 | instance->vpu.ctx = ctx; |
1860 | instance->vpu.codec_type = ctx->current_codec; |
1861 | |
1862 | ret = vpu_dec_init(vpu: &instance->vpu); |
1863 | if (ret) { |
1864 | mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n" , ret); |
1865 | goto error_vpu_init; |
1866 | } |
1867 | |
1868 | /* init vsi and global flags */ |
1869 | |
1870 | vsi = instance->vpu.vsi; |
1871 | if (!vsi) { |
1872 | mtk_vdec_err(ctx, "failed to get VP9 vsi\n" ); |
1873 | ret = -EINVAL; |
1874 | goto error_vsi; |
1875 | } |
1876 | instance->init_vsi = vsi; |
1877 | instance->core_vsi = mtk_vcodec_fw_map_dm_addr(fw: ctx->dev->fw_handler, |
1878 | mem_addr: (u32)vsi->core_vsi); |
1879 | if (!instance->core_vsi) { |
1880 | mtk_vdec_err(ctx, "failed to get VP9 core vsi\n" ); |
1881 | ret = -EINVAL; |
1882 | goto error_vsi; |
1883 | } |
1884 | |
1885 | instance->irq = 1; |
1886 | |
1887 | ret = vdec_vp9_slice_init_default_frame_ctx(instance); |
1888 | if (ret) |
1889 | goto error_default_frame_ctx; |
1890 | |
1891 | ctx->drv_handle = instance; |
1892 | |
1893 | return 0; |
1894 | |
1895 | error_default_frame_ctx: |
1896 | error_vsi: |
1897 | vpu_dec_deinit(vpu: &instance->vpu); |
1898 | error_vpu_init: |
1899 | kfree(objp: instance); |
1900 | return ret; |
1901 | } |
1902 | |
1903 | static void vdec_vp9_slice_deinit(void *h_vdec) |
1904 | { |
1905 | struct vdec_vp9_slice_instance *instance = h_vdec; |
1906 | |
1907 | if (!instance) |
1908 | return; |
1909 | |
1910 | vpu_dec_deinit(vpu: &instance->vpu); |
1911 | vdec_vp9_slice_free_working_buffer(instance); |
1912 | vdec_msg_queue_deinit(msg_queue: &instance->ctx->msg_queue, ctx: instance->ctx); |
1913 | kfree(objp: instance); |
1914 | } |
1915 | |
1916 | static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs, |
1917 | struct vdec_fb *fb, bool *res_chg) |
1918 | { |
1919 | struct vdec_vp9_slice_instance *instance = h_vdec; |
1920 | |
1921 | mtk_vdec_debug(instance->ctx, "flush ...\n" ); |
1922 | if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE) |
1923 | vdec_msg_queue_wait_lat_buf_full(msg_queue: &instance->ctx->msg_queue); |
1924 | return vpu_dec_reset(vpu: &instance->vpu); |
1925 | } |
1926 | |
1927 | static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance) |
1928 | { |
1929 | struct mtk_vcodec_dec_ctx *ctx = instance->ctx; |
1930 | unsigned int data[3]; |
1931 | |
1932 | mtk_vdec_debug(instance->ctx, "w %u h %u\n" , ctx->picinfo.pic_w, ctx->picinfo.pic_h); |
1933 | |
1934 | data[0] = ctx->picinfo.pic_w; |
1935 | data[1] = ctx->picinfo.pic_h; |
1936 | data[2] = ctx->capture_fourcc; |
1937 | vpu_dec_get_param(vpu: &instance->vpu, data, len: 3, param_type: GET_PARAM_PIC_INFO); |
1938 | |
1939 | ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64); |
1940 | ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64); |
1941 | ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0]; |
1942 | ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1]; |
1943 | } |
1944 | |
1945 | static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance, |
1946 | unsigned int *dpb_sz) |
1947 | { |
1948 | /* refer VP9 specification */ |
1949 | *dpb_sz = 9; |
1950 | } |
1951 | |
1952 | static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) |
1953 | { |
1954 | struct vdec_vp9_slice_instance *instance = h_vdec; |
1955 | |
1956 | switch (type) { |
1957 | case GET_PARAM_PIC_INFO: |
1958 | vdec_vp9_slice_get_pic_info(instance); |
1959 | break; |
1960 | case GET_PARAM_DPB_SIZE: |
1961 | vdec_vp9_slice_get_dpb_size(instance, dpb_sz: out); |
1962 | break; |
1963 | case GET_PARAM_CROP_INFO: |
1964 | mtk_vdec_debug(instance->ctx, "No need to get vp9 crop information." ); |
1965 | break; |
1966 | default: |
1967 | mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n" , type); |
1968 | return -EINVAL; |
1969 | } |
1970 | |
1971 | return 0; |
1972 | } |
1973 | |
1974 | static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs, |
1975 | struct vdec_fb *fb, bool *res_chg) |
1976 | { |
1977 | struct vdec_vp9_slice_instance *instance = h_vdec; |
1978 | struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc; |
1979 | struct vdec_vp9_slice_vsi *vsi; |
1980 | struct mtk_vcodec_dec_ctx *ctx; |
1981 | int ret; |
1982 | |
1983 | if (!instance || !instance->ctx) |
1984 | return -EINVAL; |
1985 | ctx = instance->ctx; |
1986 | |
1987 | /* bs NULL means flush decoder */ |
1988 | if (!bs) |
1989 | return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); |
1990 | |
1991 | fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); |
1992 | if (!fb) |
1993 | return -EBUSY; |
1994 | |
1995 | vsi = &pfc->vsi; |
1996 | |
1997 | ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc); |
1998 | if (ret) { |
1999 | mtk_vdec_err(ctx, "Failed to setup VP9 single ret %d\n" , ret); |
2000 | return ret; |
2001 | } |
2002 | vdec_vp9_slice_vsi_to_remote(vsi, remote_vsi: instance->vsi); |
2003 | |
2004 | ret = vpu_dec_start(vpu: &instance->vpu, NULL, len: 0); |
2005 | if (ret) { |
2006 | mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n" , ret); |
2007 | return ret; |
2008 | } |
2009 | |
2010 | ret = mtk_vcodec_wait_for_done_ctx(priv: ctx, MTK_INST_IRQ_RECEIVED, |
2011 | WAIT_INTR_TIMEOUT_MS, hw_id: MTK_VDEC_CORE); |
2012 | /* update remote vsi if decode timeout */ |
2013 | if (ret) { |
2014 | mtk_vdec_err(ctx, "VP9 decode timeout %d\n" , ret); |
2015 | WRITE_ONCE(instance->vsi->state.timeout, 1); |
2016 | } |
2017 | |
2018 | vpu_dec_end(vpu: &instance->vpu); |
2019 | |
2020 | vdec_vp9_slice_vsi_from_remote(vsi, remote_vsi: instance->vsi, skip: 0); |
2021 | ret = vdec_vp9_slice_update_single(instance, pfc); |
2022 | if (ret) { |
2023 | mtk_vdec_err(ctx, "VP9 decode error: %d\n" , ret); |
2024 | return ret; |
2025 | } |
2026 | |
2027 | instance->ctx->decoded_frame_cnt++; |
2028 | return 0; |
2029 | } |
2030 | |
2031 | static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, |
2032 | struct vdec_fb *fb, bool *res_chg) |
2033 | { |
2034 | struct vdec_vp9_slice_instance *instance = h_vdec; |
2035 | struct vdec_lat_buf *lat_buf; |
2036 | struct vdec_vp9_slice_pfc *pfc; |
2037 | struct vdec_vp9_slice_vsi *vsi; |
2038 | struct mtk_vcodec_dec_ctx *ctx; |
2039 | int ret; |
2040 | |
2041 | if (!instance || !instance->ctx) |
2042 | return -EINVAL; |
2043 | ctx = instance->ctx; |
2044 | |
2045 | /* init msgQ for the first time */ |
2046 | if (vdec_msg_queue_init(msg_queue: &ctx->msg_queue, ctx, |
2047 | core_decode: vdec_vp9_slice_core_decode, |
2048 | private_size: sizeof(*pfc))) |
2049 | return -ENOMEM; |
2050 | |
2051 | /* bs NULL means flush decoder */ |
2052 | if (!bs) |
2053 | return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); |
2054 | |
2055 | lat_buf = vdec_msg_queue_dqbuf(ctx: &instance->ctx->msg_queue.lat_ctx); |
2056 | if (!lat_buf) { |
2057 | mtk_vdec_debug(ctx, "Failed to get VP9 lat buf\n" ); |
2058 | return -EAGAIN; |
2059 | } |
2060 | pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data; |
2061 | if (!pfc) { |
2062 | ret = -EINVAL; |
2063 | goto err_free_fb_out; |
2064 | } |
2065 | vsi = &pfc->vsi; |
2066 | |
2067 | ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc); |
2068 | if (ret) { |
2069 | mtk_vdec_err(ctx, "Failed to setup VP9 lat ret %d\n" , ret); |
2070 | goto err_free_fb_out; |
2071 | } |
2072 | vdec_vp9_slice_vsi_to_remote(vsi, remote_vsi: instance->vsi); |
2073 | |
2074 | ret = vpu_dec_start(vpu: &instance->vpu, NULL, len: 0); |
2075 | if (ret) { |
2076 | mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n" , ret); |
2077 | goto err_free_fb_out; |
2078 | } |
2079 | |
2080 | if (instance->irq) { |
2081 | ret = mtk_vcodec_wait_for_done_ctx(priv: ctx, MTK_INST_IRQ_RECEIVED, |
2082 | WAIT_INTR_TIMEOUT_MS, hw_id: MTK_VDEC_LAT0); |
2083 | /* update remote vsi if decode timeout */ |
2084 | if (ret) { |
2085 | mtk_vdec_err(ctx, "VP9 decode timeout %d pic %d\n" , ret, pfc->seq); |
2086 | WRITE_ONCE(instance->vsi->state.timeout, 1); |
2087 | } |
2088 | vpu_dec_end(vpu: &instance->vpu); |
2089 | } |
2090 | |
2091 | vdec_vp9_slice_vsi_from_remote(vsi, remote_vsi: instance->vsi, skip: 0); |
2092 | ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc); |
2093 | |
2094 | /* LAT trans full, no more UBE or decode timeout */ |
2095 | if (ret) { |
2096 | mtk_vdec_err(ctx, "VP9 decode error: %d\n" , ret); |
2097 | goto err_free_fb_out; |
2098 | } |
2099 | |
2100 | mtk_vdec_debug(ctx, "lat dma addr: 0x%lx 0x%lx\n" , |
2101 | (unsigned long)pfc->vsi.trans.dma_addr, |
2102 | (unsigned long)pfc->vsi.trans.dma_addr_end); |
2103 | |
2104 | vdec_msg_queue_update_ube_wptr(msg_queue: &ctx->msg_queue, |
2105 | ube_wptr: vsi->trans.dma_addr_end + |
2106 | ctx->msg_queue.wdma_addr.dma_addr); |
2107 | vdec_msg_queue_qbuf(ctx: &ctx->msg_queue.core_ctx, buf: lat_buf); |
2108 | |
2109 | return 0; |
2110 | err_free_fb_out: |
2111 | vdec_msg_queue_qbuf(ctx: &ctx->msg_queue.lat_ctx, buf: lat_buf); |
2112 | return ret; |
2113 | } |
2114 | |
2115 | static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, |
2116 | struct vdec_fb *fb, bool *res_chg) |
2117 | { |
2118 | struct vdec_vp9_slice_instance *instance = h_vdec; |
2119 | int ret; |
2120 | |
2121 | if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE) |
2122 | ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg); |
2123 | else |
2124 | ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg); |
2125 | |
2126 | return ret; |
2127 | } |
2128 | |
2129 | static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf) |
2130 | { |
2131 | struct vdec_vp9_slice_instance *instance; |
2132 | struct vdec_vp9_slice_pfc *pfc; |
2133 | struct mtk_vcodec_dec_ctx *ctx = NULL; |
2134 | struct vdec_fb *fb = NULL; |
2135 | int ret = -EINVAL; |
2136 | |
2137 | if (!lat_buf) |
2138 | goto err; |
2139 | |
2140 | pfc = lat_buf->private_data; |
2141 | ctx = lat_buf->ctx; |
2142 | if (!pfc || !ctx) |
2143 | goto err; |
2144 | |
2145 | instance = ctx->drv_handle; |
2146 | if (!instance) |
2147 | goto err; |
2148 | |
2149 | fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); |
2150 | if (!fb) { |
2151 | ret = -EBUSY; |
2152 | goto err; |
2153 | } |
2154 | |
2155 | ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc); |
2156 | if (ret) { |
2157 | mtk_vdec_err(ctx, "vdec_vp9_slice_setup_core\n" ); |
2158 | goto err; |
2159 | } |
2160 | vdec_vp9_slice_vsi_to_remote(vsi: &pfc->vsi, remote_vsi: instance->core_vsi); |
2161 | |
2162 | ret = vpu_dec_core(vpu: &instance->vpu); |
2163 | if (ret) { |
2164 | mtk_vdec_err(ctx, "vpu_dec_core\n" ); |
2165 | goto err; |
2166 | } |
2167 | |
2168 | if (instance->irq) { |
2169 | ret = mtk_vcodec_wait_for_done_ctx(priv: ctx, MTK_INST_IRQ_RECEIVED, |
2170 | WAIT_INTR_TIMEOUT_MS, hw_id: MTK_VDEC_CORE); |
2171 | /* update remote vsi if decode timeout */ |
2172 | if (ret) { |
2173 | mtk_vdec_err(ctx, "VP9 core timeout pic %d\n" , pfc->seq); |
2174 | WRITE_ONCE(instance->core_vsi->state.timeout, 1); |
2175 | } |
2176 | vpu_dec_core_end(vpu: &instance->vpu); |
2177 | } |
2178 | |
2179 | vdec_vp9_slice_vsi_from_remote(vsi: &pfc->vsi, remote_vsi: instance->core_vsi, skip: 1); |
2180 | ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc); |
2181 | if (ret) { |
2182 | mtk_vdec_err(ctx, "vdec_vp9_slice_update_core\n" ); |
2183 | goto err; |
2184 | } |
2185 | |
2186 | pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; |
2187 | mtk_vdec_debug(ctx, "core dma_addr_end 0x%lx\n" , |
2188 | (unsigned long)pfc->vsi.trans.dma_addr_end); |
2189 | vdec_msg_queue_update_ube_rptr(msg_queue: &ctx->msg_queue, ube_rptr: pfc->vsi.trans.dma_addr_end); |
2190 | ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req); |
2191 | |
2192 | return 0; |
2193 | |
2194 | err: |
2195 | if (ctx && pfc) { |
2196 | /* always update read pointer */ |
2197 | vdec_msg_queue_update_ube_rptr(msg_queue: &ctx->msg_queue, ube_rptr: pfc->vsi.trans.dma_addr_end); |
2198 | |
2199 | if (fb) |
2200 | ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req); |
2201 | } |
2202 | return ret; |
2203 | } |
2204 | |
2205 | const struct vdec_common_if vdec_vp9_slice_lat_if = { |
2206 | .init = vdec_vp9_slice_init, |
2207 | .decode = vdec_vp9_slice_decode, |
2208 | .get_param = vdec_vp9_slice_get_param, |
2209 | .deinit = vdec_vp9_slice_deinit, |
2210 | }; |
2211 | |