1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Hantro VPU codec driver |
4 | * |
5 | * Copyright (c) 2014 Rockchip Electronics Co., Ltd. |
6 | * Hertz Wong <hertz.wong@rock-chips.com> |
7 | * Herman Chen <herman.chen@rock-chips.com> |
8 | * |
9 | * Copyright (C) 2014 Google, Inc. |
10 | * Tomasz Figa <tfiga@chromium.org> |
11 | */ |
12 | |
13 | #include <linux/types.h> |
14 | #include <linux/sort.h> |
15 | |
16 | #include <media/v4l2-mem2mem.h> |
17 | |
18 | #include "hantro_hw.h" |
19 | #include "hantro_v4l2.h" |
20 | |
21 | #define VDPU_SWREG(nr) ((nr) * 4) |
22 | |
23 | #define VDPU_REG_DEC_OUT_BASE VDPU_SWREG(63) |
24 | #define VDPU_REG_RLC_VLC_BASE VDPU_SWREG(64) |
25 | #define VDPU_REG_QTABLE_BASE VDPU_SWREG(61) |
26 | #define VDPU_REG_DIR_MV_BASE VDPU_SWREG(62) |
27 | #define VDPU_REG_REFER_BASE(i) (VDPU_SWREG(84 + (i))) |
28 | #define VDPU_REG_DEC_E(v) ((v) ? BIT(0) : 0) |
29 | |
30 | #define VDPU_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(11) : 0) |
31 | #define VDPU_REG_DEC_SCMD_DIS(v) ((v) ? BIT(10) : 0) |
32 | #define VDPU_REG_FILTERING_DIS(v) ((v) ? BIT(8) : 0) |
33 | #define VDPU_REG_PIC_FIXED_QUANT(v) ((v) ? BIT(7) : 0) |
34 | #define VDPU_REG_DEC_LATENCY(v) (((v) << 1) & GENMASK(6, 1)) |
35 | |
36 | #define VDPU_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25)) |
37 | #define VDPU_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0)) |
38 | |
39 | #define VDPU_REG_APF_THRESHOLD(v) (((v) << 17) & GENMASK(30, 17)) |
40 | #define VDPU_REG_STARTMB_X(v) (((v) << 8) & GENMASK(16, 8)) |
41 | #define VDPU_REG_STARTMB_Y(v) (((v) << 0) & GENMASK(7, 0)) |
42 | |
43 | #define VDPU_REG_DEC_MODE(v) (((v) << 0) & GENMASK(3, 0)) |
44 | |
45 | #define VDPU_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(5) : 0) |
46 | #define VDPU_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(4) : 0) |
47 | #define VDPU_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(3) : 0) |
48 | #define VDPU_REG_DEC_INSWAP32_E(v) ((v) ? BIT(2) : 0) |
49 | #define VDPU_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(1) : 0) |
50 | #define VDPU_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(0) : 0) |
51 | |
52 | #define VDPU_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(22) : 0) |
53 | #define VDPU_REG_DEC_MAX_BURST(v) (((v) << 16) & GENMASK(20, 16)) |
54 | #define VDPU_REG_DEC_AXI_WR_ID(v) (((v) << 8) & GENMASK(15, 8)) |
55 | #define VDPU_REG_DEC_AXI_RD_ID(v) (((v) << 0) & GENMASK(7, 0)) |
56 | |
57 | #define VDPU_REG_START_CODE_E(v) ((v) ? BIT(22) : 0) |
58 | #define VDPU_REG_CH_8PIX_ILEAV_E(v) ((v) ? BIT(21) : 0) |
59 | #define VDPU_REG_RLC_MODE_E(v) ((v) ? BIT(20) : 0) |
60 | #define VDPU_REG_PIC_INTERLACE_E(v) ((v) ? BIT(17) : 0) |
61 | #define VDPU_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(16) : 0) |
62 | #define VDPU_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(13) : 0) |
63 | #define VDPU_REG_WRITE_MVS_E(v) ((v) ? BIT(10) : 0) |
64 | #define VDPU_REG_SEQ_MBAFF_E(v) ((v) ? BIT(7) : 0) |
65 | #define VDPU_REG_PICORD_COUNT_E(v) ((v) ? BIT(6) : 0) |
66 | #define VDPU_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(5) : 0) |
67 | #define VDPU_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(4) : 0) |
68 | |
69 | #define VDPU_REG_PRED_BC_TAP_0_0(v) (((v) << 22) & GENMASK(31, 22)) |
70 | #define VDPU_REG_PRED_BC_TAP_0_1(v) (((v) << 12) & GENMASK(21, 12)) |
71 | #define VDPU_REG_PRED_BC_TAP_0_2(v) (((v) << 2) & GENMASK(11, 2)) |
72 | |
73 | #define VDPU_REG_REFBU_E(v) ((v) ? BIT(31) : 0) |
74 | |
75 | #define VDPU_REG_PINIT_RLIST_F9(v) (((v) << 25) & GENMASK(29, 25)) |
76 | #define VDPU_REG_PINIT_RLIST_F8(v) (((v) << 20) & GENMASK(24, 20)) |
77 | #define VDPU_REG_PINIT_RLIST_F7(v) (((v) << 15) & GENMASK(19, 15)) |
78 | #define VDPU_REG_PINIT_RLIST_F6(v) (((v) << 10) & GENMASK(14, 10)) |
79 | #define VDPU_REG_PINIT_RLIST_F5(v) (((v) << 5) & GENMASK(9, 5)) |
80 | #define VDPU_REG_PINIT_RLIST_F4(v) (((v) << 0) & GENMASK(4, 0)) |
81 | |
82 | #define VDPU_REG_PINIT_RLIST_F15(v) (((v) << 25) & GENMASK(29, 25)) |
83 | #define VDPU_REG_PINIT_RLIST_F14(v) (((v) << 20) & GENMASK(24, 20)) |
84 | #define VDPU_REG_PINIT_RLIST_F13(v) (((v) << 15) & GENMASK(19, 15)) |
85 | #define VDPU_REG_PINIT_RLIST_F12(v) (((v) << 10) & GENMASK(14, 10)) |
86 | #define VDPU_REG_PINIT_RLIST_F11(v) (((v) << 5) & GENMASK(9, 5)) |
87 | #define VDPU_REG_PINIT_RLIST_F10(v) (((v) << 0) & GENMASK(4, 0)) |
88 | |
89 | #define VDPU_REG_REFER1_NBR(v) (((v) << 16) & GENMASK(31, 16)) |
90 | #define VDPU_REG_REFER0_NBR(v) (((v) << 0) & GENMASK(15, 0)) |
91 | |
92 | #define VDPU_REG_REFER3_NBR(v) (((v) << 16) & GENMASK(31, 16)) |
93 | #define VDPU_REG_REFER2_NBR(v) (((v) << 0) & GENMASK(15, 0)) |
94 | |
95 | #define VDPU_REG_REFER5_NBR(v) (((v) << 16) & GENMASK(31, 16)) |
96 | #define VDPU_REG_REFER4_NBR(v) (((v) << 0) & GENMASK(15, 0)) |
97 | |
98 | #define VDPU_REG_REFER7_NBR(v) (((v) << 16) & GENMASK(31, 16)) |
99 | #define VDPU_REG_REFER6_NBR(v) (((v) << 0) & GENMASK(15, 0)) |
100 | |
101 | #define VDPU_REG_REFER9_NBR(v) (((v) << 16) & GENMASK(31, 16)) |
102 | #define VDPU_REG_REFER8_NBR(v) (((v) << 0) & GENMASK(15, 0)) |
103 | |
104 | #define VDPU_REG_REFER11_NBR(v) (((v) << 16) & GENMASK(31, 16)) |
105 | #define VDPU_REG_REFER10_NBR(v) (((v) << 0) & GENMASK(15, 0)) |
106 | |
107 | #define VDPU_REG_REFER13_NBR(v) (((v) << 16) & GENMASK(31, 16)) |
108 | #define VDPU_REG_REFER12_NBR(v) (((v) << 0) & GENMASK(15, 0)) |
109 | |
110 | #define VDPU_REG_REFER15_NBR(v) (((v) << 16) & GENMASK(31, 16)) |
111 | #define VDPU_REG_REFER14_NBR(v) (((v) << 0) & GENMASK(15, 0)) |
112 | |
113 | #define VDPU_REG_BINIT_RLIST_F5(v) (((v) << 25) & GENMASK(29, 25)) |
114 | #define VDPU_REG_BINIT_RLIST_F4(v) (((v) << 20) & GENMASK(24, 20)) |
115 | #define VDPU_REG_BINIT_RLIST_F3(v) (((v) << 15) & GENMASK(19, 15)) |
116 | #define VDPU_REG_BINIT_RLIST_F2(v) (((v) << 10) & GENMASK(14, 10)) |
117 | #define VDPU_REG_BINIT_RLIST_F1(v) (((v) << 5) & GENMASK(9, 5)) |
118 | #define VDPU_REG_BINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0)) |
119 | |
120 | #define VDPU_REG_BINIT_RLIST_F11(v) (((v) << 25) & GENMASK(29, 25)) |
121 | #define VDPU_REG_BINIT_RLIST_F10(v) (((v) << 20) & GENMASK(24, 20)) |
122 | #define VDPU_REG_BINIT_RLIST_F9(v) (((v) << 15) & GENMASK(19, 15)) |
123 | #define VDPU_REG_BINIT_RLIST_F8(v) (((v) << 10) & GENMASK(14, 10)) |
124 | #define VDPU_REG_BINIT_RLIST_F7(v) (((v) << 5) & GENMASK(9, 5)) |
125 | #define VDPU_REG_BINIT_RLIST_F6(v) (((v) << 0) & GENMASK(4, 0)) |
126 | |
127 | #define VDPU_REG_BINIT_RLIST_F15(v) (((v) << 15) & GENMASK(19, 15)) |
128 | #define VDPU_REG_BINIT_RLIST_F14(v) (((v) << 10) & GENMASK(14, 10)) |
129 | #define VDPU_REG_BINIT_RLIST_F13(v) (((v) << 5) & GENMASK(9, 5)) |
130 | #define VDPU_REG_BINIT_RLIST_F12(v) (((v) << 0) & GENMASK(4, 0)) |
131 | |
132 | #define VDPU_REG_BINIT_RLIST_B5(v) (((v) << 25) & GENMASK(29, 25)) |
133 | #define VDPU_REG_BINIT_RLIST_B4(v) (((v) << 20) & GENMASK(24, 20)) |
134 | #define VDPU_REG_BINIT_RLIST_B3(v) (((v) << 15) & GENMASK(19, 15)) |
135 | #define VDPU_REG_BINIT_RLIST_B2(v) (((v) << 10) & GENMASK(14, 10)) |
136 | #define VDPU_REG_BINIT_RLIST_B1(v) (((v) << 5) & GENMASK(9, 5)) |
137 | #define VDPU_REG_BINIT_RLIST_B0(v) (((v) << 0) & GENMASK(4, 0)) |
138 | |
139 | #define VDPU_REG_BINIT_RLIST_B11(v) (((v) << 25) & GENMASK(29, 25)) |
140 | #define VDPU_REG_BINIT_RLIST_B10(v) (((v) << 20) & GENMASK(24, 20)) |
141 | #define VDPU_REG_BINIT_RLIST_B9(v) (((v) << 15) & GENMASK(19, 15)) |
142 | #define VDPU_REG_BINIT_RLIST_B8(v) (((v) << 10) & GENMASK(14, 10)) |
143 | #define VDPU_REG_BINIT_RLIST_B7(v) (((v) << 5) & GENMASK(9, 5)) |
144 | #define VDPU_REG_BINIT_RLIST_B6(v) (((v) << 0) & GENMASK(4, 0)) |
145 | |
146 | #define VDPU_REG_BINIT_RLIST_B15(v) (((v) << 15) & GENMASK(19, 15)) |
147 | #define VDPU_REG_BINIT_RLIST_B14(v) (((v) << 10) & GENMASK(14, 10)) |
148 | #define VDPU_REG_BINIT_RLIST_B13(v) (((v) << 5) & GENMASK(9, 5)) |
149 | #define VDPU_REG_BINIT_RLIST_B12(v) (((v) << 0) & GENMASK(4, 0)) |
150 | |
151 | #define VDPU_REG_PINIT_RLIST_F3(v) (((v) << 15) & GENMASK(19, 15)) |
152 | #define VDPU_REG_PINIT_RLIST_F2(v) (((v) << 10) & GENMASK(14, 10)) |
153 | #define VDPU_REG_PINIT_RLIST_F1(v) (((v) << 5) & GENMASK(9, 5)) |
154 | #define VDPU_REG_PINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0)) |
155 | |
156 | #define VDPU_REG_REFER_LTERM_E(v) (((v) << 0) & GENMASK(31, 0)) |
157 | |
158 | #define VDPU_REG_REFER_VALID_E(v) (((v) << 0) & GENMASK(31, 0)) |
159 | |
160 | #define VDPU_REG_STRM_START_BIT(v) (((v) << 0) & GENMASK(5, 0)) |
161 | |
162 | #define VDPU_REG_CH_QP_OFFSET2(v) (((v) << 22) & GENMASK(26, 22)) |
163 | #define VDPU_REG_CH_QP_OFFSET(v) (((v) << 17) & GENMASK(21, 17)) |
164 | #define VDPU_REG_PIC_MB_HEIGHT_P(v) (((v) << 9) & GENMASK(16, 9)) |
165 | #define VDPU_REG_PIC_MB_WIDTH(v) (((v) << 0) & GENMASK(8, 0)) |
166 | |
167 | #define VDPU_REG_WEIGHT_BIPR_IDC(v) (((v) << 16) & GENMASK(17, 16)) |
168 | #define VDPU_REG_REF_FRAMES(v) (((v) << 0) & GENMASK(4, 0)) |
169 | |
170 | #define VDPU_REG_FILT_CTRL_PRES(v) ((v) ? BIT(31) : 0) |
171 | #define VDPU_REG_RDPIC_CNT_PRES(v) ((v) ? BIT(30) : 0) |
172 | #define (v) (((v) << 16) & GENMASK(20, 16)) |
173 | #define (v) (((v) << 0) & GENMASK(15, 0)) |
174 | |
175 | #define VDPU_REG_REFPIC_MK_LEN(v) (((v) << 16) & GENMASK(26, 16)) |
176 | #define VDPU_REG_IDR_PIC_ID(v) (((v) << 0) & GENMASK(15, 0)) |
177 | |
178 | #define VDPU_REG_PPS_ID(v) (((v) << 24) & GENMASK(31, 24)) |
179 | #define VDPU_REG_REFIDX1_ACTIVE(v) (((v) << 19) & GENMASK(23, 19)) |
180 | #define VDPU_REG_REFIDX0_ACTIVE(v) (((v) << 14) & GENMASK(18, 14)) |
181 | #define VDPU_REG_POC_LENGTH(v) (((v) << 0) & GENMASK(7, 0)) |
182 | |
183 | #define VDPU_REG_IDR_PIC_E(v) ((v) ? BIT(8) : 0) |
184 | #define VDPU_REG_DIR_8X8_INFER_E(v) ((v) ? BIT(7) : 0) |
185 | #define VDPU_REG_BLACKWHITE_E(v) ((v) ? BIT(6) : 0) |
186 | #define VDPU_REG_CABAC_E(v) ((v) ? BIT(5) : 0) |
187 | #define VDPU_REG_WEIGHT_PRED_E(v) ((v) ? BIT(4) : 0) |
188 | #define VDPU_REG_CONST_INTRA_E(v) ((v) ? BIT(3) : 0) |
189 | #define VDPU_REG_8X8TRANS_FLAG_E(v) ((v) ? BIT(2) : 0) |
190 | #define VDPU_REG_TYPE1_QUANT_E(v) ((v) ? BIT(1) : 0) |
191 | #define VDPU_REG_FIELDPIC_FLAG_E(v) ((v) ? BIT(0) : 0) |
192 | |
193 | static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) |
194 | { |
195 | const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; |
196 | const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; |
197 | const struct v4l2_ctrl_h264_sps *sps = ctrls->sps; |
198 | const struct v4l2_ctrl_h264_pps *pps = ctrls->pps; |
199 | struct hantro_dev *vpu = ctx->dev; |
200 | u32 reg; |
201 | |
202 | reg = VDPU_REG_DEC_ADV_PRE_DIS(0) | |
203 | VDPU_REG_DEC_SCMD_DIS(0) | |
204 | VDPU_REG_FILTERING_DIS(0) | |
205 | VDPU_REG_PIC_FIXED_QUANT(0) | |
206 | VDPU_REG_DEC_LATENCY(0); |
207 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(50)); |
208 | |
209 | reg = VDPU_REG_INIT_QP(pps->pic_init_qp_minus26 + 26) | |
210 | VDPU_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); |
211 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(51)); |
212 | |
213 | reg = VDPU_REG_APF_THRESHOLD(8) | |
214 | VDPU_REG_STARTMB_X(0) | |
215 | VDPU_REG_STARTMB_Y(0); |
216 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(52)); |
217 | |
218 | reg = VDPU_REG_DEC_MODE(0); |
219 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(53)); |
220 | |
221 | reg = VDPU_REG_DEC_STRENDIAN_E(1) | |
222 | VDPU_REG_DEC_STRSWAP32_E(1) | |
223 | VDPU_REG_DEC_OUTSWAP32_E(1) | |
224 | VDPU_REG_DEC_INSWAP32_E(1) | |
225 | VDPU_REG_DEC_OUT_ENDIAN(1) | |
226 | VDPU_REG_DEC_IN_ENDIAN(0); |
227 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(54)); |
228 | |
229 | reg = VDPU_REG_DEC_DATA_DISC_E(0) | |
230 | VDPU_REG_DEC_MAX_BURST(16) | |
231 | VDPU_REG_DEC_AXI_WR_ID(0) | |
232 | VDPU_REG_DEC_AXI_RD_ID(0xff); |
233 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(56)); |
234 | |
235 | reg = VDPU_REG_START_CODE_E(1) | |
236 | VDPU_REG_CH_8PIX_ILEAV_E(0) | |
237 | VDPU_REG_RLC_MODE_E(0) | |
238 | VDPU_REG_PIC_INTERLACE_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) && |
239 | (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD || |
240 | dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)) | |
241 | VDPU_REG_PIC_FIELDMODE_E(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) | |
242 | VDPU_REG_PIC_TOPFIELD_E(!(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)) | |
243 | VDPU_REG_WRITE_MVS_E((sps->profile_idc > 66) && dec_param->nal_ref_idc) | |
244 | VDPU_REG_SEQ_MBAFF_E(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) | |
245 | VDPU_REG_PICORD_COUNT_E(sps->profile_idc > 66) | |
246 | VDPU_REG_DEC_TIMEOUT_E(1) | |
247 | VDPU_REG_DEC_CLK_GATE_E(1); |
248 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(57)); |
249 | |
250 | reg = VDPU_REG_PRED_BC_TAP_0_0(1) | |
251 | VDPU_REG_PRED_BC_TAP_0_1((u32)-5) | |
252 | VDPU_REG_PRED_BC_TAP_0_2(20); |
253 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(59)); |
254 | |
255 | reg = VDPU_REG_REFBU_E(0); |
256 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(65)); |
257 | |
258 | reg = VDPU_REG_STRM_START_BIT(0); |
259 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(109)); |
260 | |
261 | reg = VDPU_REG_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset) | |
262 | VDPU_REG_CH_QP_OFFSET(pps->chroma_qp_index_offset) | |
263 | VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | |
264 | VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)); |
265 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(110)); |
266 | |
267 | reg = VDPU_REG_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc) | |
268 | VDPU_REG_REF_FRAMES(sps->max_num_ref_frames); |
269 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(111)); |
270 | |
271 | reg = VDPU_REG_FILT_CTRL_PRES(pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) | |
272 | VDPU_REG_RDPIC_CNT_PRES(pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) | |
273 | VDPU_REG_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) | |
274 | VDPU_REG_FRAMENUM(dec_param->frame_num); |
275 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(112)); |
276 | |
277 | reg = VDPU_REG_REFPIC_MK_LEN(dec_param->dec_ref_pic_marking_bit_size) | |
278 | VDPU_REG_IDR_PIC_ID(dec_param->idr_pic_id); |
279 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(113)); |
280 | |
281 | reg = VDPU_REG_PPS_ID(pps->pic_parameter_set_id) | |
282 | VDPU_REG_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) | |
283 | VDPU_REG_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) | |
284 | VDPU_REG_POC_LENGTH(dec_param->pic_order_cnt_bit_size); |
285 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(114)); |
286 | |
287 | reg = VDPU_REG_IDR_PIC_E(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) | |
288 | VDPU_REG_DIR_8X8_INFER_E(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) | |
289 | VDPU_REG_BLACKWHITE_E(sps->profile_idc >= 100 && sps->chroma_format_idc == 0) | |
290 | VDPU_REG_CABAC_E(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) | |
291 | VDPU_REG_WEIGHT_PRED_E(pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) | |
292 | VDPU_REG_CONST_INTRA_E(pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) | |
293 | VDPU_REG_8X8TRANS_FLAG_E(pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) | |
294 | VDPU_REG_TYPE1_QUANT_E(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT) | |
295 | VDPU_REG_FIELDPIC_FLAG_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)); |
296 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(115)); |
297 | } |
298 | |
299 | static void set_ref(struct hantro_ctx *ctx) |
300 | { |
301 | const struct v4l2_h264_reference *b0_reflist, *b1_reflist, *p_reflist; |
302 | struct hantro_dev *vpu = ctx->dev; |
303 | u32 reg; |
304 | int i; |
305 | |
306 | b0_reflist = ctx->h264_dec.reflists.b0; |
307 | b1_reflist = ctx->h264_dec.reflists.b1; |
308 | p_reflist = ctx->h264_dec.reflists.p; |
309 | |
310 | reg = VDPU_REG_PINIT_RLIST_F9(p_reflist[9].index) | |
311 | VDPU_REG_PINIT_RLIST_F8(p_reflist[8].index) | |
312 | VDPU_REG_PINIT_RLIST_F7(p_reflist[7].index) | |
313 | VDPU_REG_PINIT_RLIST_F6(p_reflist[6].index) | |
314 | VDPU_REG_PINIT_RLIST_F5(p_reflist[5].index) | |
315 | VDPU_REG_PINIT_RLIST_F4(p_reflist[4].index); |
316 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(74)); |
317 | |
318 | reg = VDPU_REG_PINIT_RLIST_F15(p_reflist[15].index) | |
319 | VDPU_REG_PINIT_RLIST_F14(p_reflist[14].index) | |
320 | VDPU_REG_PINIT_RLIST_F13(p_reflist[13].index) | |
321 | VDPU_REG_PINIT_RLIST_F12(p_reflist[12].index) | |
322 | VDPU_REG_PINIT_RLIST_F11(p_reflist[11].index) | |
323 | VDPU_REG_PINIT_RLIST_F10(p_reflist[10].index); |
324 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(75)); |
325 | |
326 | reg = VDPU_REG_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, 1)) | |
327 | VDPU_REG_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, 0)); |
328 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(76)); |
329 | |
330 | reg = VDPU_REG_REFER3_NBR(hantro_h264_get_ref_nbr(ctx, 3)) | |
331 | VDPU_REG_REFER2_NBR(hantro_h264_get_ref_nbr(ctx, 2)); |
332 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(77)); |
333 | |
334 | reg = VDPU_REG_REFER5_NBR(hantro_h264_get_ref_nbr(ctx, 5)) | |
335 | VDPU_REG_REFER4_NBR(hantro_h264_get_ref_nbr(ctx, 4)); |
336 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(78)); |
337 | |
338 | reg = VDPU_REG_REFER7_NBR(hantro_h264_get_ref_nbr(ctx, 7)) | |
339 | VDPU_REG_REFER6_NBR(hantro_h264_get_ref_nbr(ctx, 6)); |
340 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(79)); |
341 | |
342 | reg = VDPU_REG_REFER9_NBR(hantro_h264_get_ref_nbr(ctx, 9)) | |
343 | VDPU_REG_REFER8_NBR(hantro_h264_get_ref_nbr(ctx, 8)); |
344 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(80)); |
345 | |
346 | reg = VDPU_REG_REFER11_NBR(hantro_h264_get_ref_nbr(ctx, 11)) | |
347 | VDPU_REG_REFER10_NBR(hantro_h264_get_ref_nbr(ctx, 10)); |
348 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(81)); |
349 | |
350 | reg = VDPU_REG_REFER13_NBR(hantro_h264_get_ref_nbr(ctx, 13)) | |
351 | VDPU_REG_REFER12_NBR(hantro_h264_get_ref_nbr(ctx, 12)); |
352 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(82)); |
353 | |
354 | reg = VDPU_REG_REFER15_NBR(hantro_h264_get_ref_nbr(ctx, 15)) | |
355 | VDPU_REG_REFER14_NBR(hantro_h264_get_ref_nbr(ctx, 14)); |
356 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(83)); |
357 | |
358 | reg = VDPU_REG_BINIT_RLIST_F5(b0_reflist[5].index) | |
359 | VDPU_REG_BINIT_RLIST_F4(b0_reflist[4].index) | |
360 | VDPU_REG_BINIT_RLIST_F3(b0_reflist[3].index) | |
361 | VDPU_REG_BINIT_RLIST_F2(b0_reflist[2].index) | |
362 | VDPU_REG_BINIT_RLIST_F1(b0_reflist[1].index) | |
363 | VDPU_REG_BINIT_RLIST_F0(b0_reflist[0].index); |
364 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(100)); |
365 | |
366 | reg = VDPU_REG_BINIT_RLIST_F11(b0_reflist[11].index) | |
367 | VDPU_REG_BINIT_RLIST_F10(b0_reflist[10].index) | |
368 | VDPU_REG_BINIT_RLIST_F9(b0_reflist[9].index) | |
369 | VDPU_REG_BINIT_RLIST_F8(b0_reflist[8].index) | |
370 | VDPU_REG_BINIT_RLIST_F7(b0_reflist[7].index) | |
371 | VDPU_REG_BINIT_RLIST_F6(b0_reflist[6].index); |
372 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(101)); |
373 | |
374 | reg = VDPU_REG_BINIT_RLIST_F15(b0_reflist[15].index) | |
375 | VDPU_REG_BINIT_RLIST_F14(b0_reflist[14].index) | |
376 | VDPU_REG_BINIT_RLIST_F13(b0_reflist[13].index) | |
377 | VDPU_REG_BINIT_RLIST_F12(b0_reflist[12].index); |
378 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(102)); |
379 | |
380 | reg = VDPU_REG_BINIT_RLIST_B5(b1_reflist[5].index) | |
381 | VDPU_REG_BINIT_RLIST_B4(b1_reflist[4].index) | |
382 | VDPU_REG_BINIT_RLIST_B3(b1_reflist[3].index) | |
383 | VDPU_REG_BINIT_RLIST_B2(b1_reflist[2].index) | |
384 | VDPU_REG_BINIT_RLIST_B1(b1_reflist[1].index) | |
385 | VDPU_REG_BINIT_RLIST_B0(b1_reflist[0].index); |
386 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(103)); |
387 | |
388 | reg = VDPU_REG_BINIT_RLIST_B11(b1_reflist[11].index) | |
389 | VDPU_REG_BINIT_RLIST_B10(b1_reflist[10].index) | |
390 | VDPU_REG_BINIT_RLIST_B9(b1_reflist[9].index) | |
391 | VDPU_REG_BINIT_RLIST_B8(b1_reflist[8].index) | |
392 | VDPU_REG_BINIT_RLIST_B7(b1_reflist[7].index) | |
393 | VDPU_REG_BINIT_RLIST_B6(b1_reflist[6].index); |
394 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(104)); |
395 | |
396 | reg = VDPU_REG_BINIT_RLIST_B15(b1_reflist[15].index) | |
397 | VDPU_REG_BINIT_RLIST_B14(b1_reflist[14].index) | |
398 | VDPU_REG_BINIT_RLIST_B13(b1_reflist[13].index) | |
399 | VDPU_REG_BINIT_RLIST_B12(b1_reflist[12].index); |
400 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(105)); |
401 | |
402 | reg = VDPU_REG_PINIT_RLIST_F3(p_reflist[3].index) | |
403 | VDPU_REG_PINIT_RLIST_F2(p_reflist[2].index) | |
404 | VDPU_REG_PINIT_RLIST_F1(p_reflist[1].index) | |
405 | VDPU_REG_PINIT_RLIST_F0(p_reflist[0].index); |
406 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(106)); |
407 | |
408 | reg = VDPU_REG_REFER_LTERM_E(ctx->h264_dec.dpb_longterm); |
409 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(107)); |
410 | |
411 | reg = VDPU_REG_REFER_VALID_E(ctx->h264_dec.dpb_valid); |
412 | vdpu_write_relaxed(vpu, val: reg, VDPU_SWREG(108)); |
413 | |
414 | /* Set up addresses of DPB buffers. */ |
415 | for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) { |
416 | dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, dpb_idx: i); |
417 | |
418 | vdpu_write_relaxed(vpu, val: dma_addr, VDPU_REG_REFER_BASE(i)); |
419 | } |
420 | } |
421 | |
422 | static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) |
423 | { |
424 | const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; |
425 | struct vb2_v4l2_buffer *dst_buf; |
426 | struct hantro_dev *vpu = ctx->dev; |
427 | dma_addr_t src_dma, dst_dma; |
428 | size_t offset = 0; |
429 | |
430 | /* Source (stream) buffer. */ |
431 | src_dma = vb2_dma_contig_plane_dma_addr(vb: &src_buf->vb2_buf, plane_no: 0); |
432 | vdpu_write_relaxed(vpu, val: src_dma, VDPU_REG_RLC_VLC_BASE); |
433 | |
434 | /* Destination (decoded frame) buffer. */ |
435 | dst_buf = hantro_get_dst_buf(ctx); |
436 | dst_dma = hantro_get_dec_buf_addr(ctx, vb: &dst_buf->vb2_buf); |
437 | /* Adjust dma addr to start at second line for bottom field */ |
438 | if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) |
439 | offset = ALIGN(ctx->src_fmt.width, MB_DIM); |
440 | vdpu_write_relaxed(vpu, val: dst_dma + offset, VDPU_REG_DEC_OUT_BASE); |
441 | |
442 | /* Higher profiles require DMV buffer appended to reference frames. */ |
443 | if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) { |
444 | unsigned int bytes_per_mb = 384; |
445 | |
446 | /* DMV buffer for monochrome start directly after Y-plane */ |
447 | if (ctrls->sps->profile_idc >= 100 && |
448 | ctrls->sps->chroma_format_idc == 0) |
449 | bytes_per_mb = 256; |
450 | offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) * |
451 | MB_HEIGHT(ctx->src_fmt.height); |
452 | |
453 | /* |
454 | * DMV buffer is split in two for field encoded frames, |
455 | * adjust offset for bottom field |
456 | */ |
457 | if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) |
458 | offset += 32 * MB_WIDTH(ctx->src_fmt.width) * |
459 | MB_HEIGHT(ctx->src_fmt.height); |
460 | vdpu_write_relaxed(vpu, val: dst_dma + offset, VDPU_REG_DIR_MV_BASE); |
461 | } |
462 | |
463 | /* Auxiliary buffer prepared in hantro_h264_dec_init(). */ |
464 | vdpu_write_relaxed(vpu, val: ctx->h264_dec.priv.dma, VDPU_REG_QTABLE_BASE); |
465 | } |
466 | |
467 | int rockchip_vpu2_h264_dec_run(struct hantro_ctx *ctx) |
468 | { |
469 | struct hantro_dev *vpu = ctx->dev; |
470 | struct vb2_v4l2_buffer *src_buf; |
471 | u32 reg; |
472 | int ret; |
473 | |
474 | /* Prepare the H264 decoder context. */ |
475 | ret = hantro_h264_dec_prepare_run(ctx); |
476 | if (ret) |
477 | return ret; |
478 | |
479 | src_buf = hantro_get_src_buf(ctx); |
480 | set_params(ctx, src_buf); |
481 | set_ref(ctx); |
482 | set_buffers(ctx, src_buf); |
483 | |
484 | hantro_end_prepare_run(ctx); |
485 | |
486 | /* Start decoding! */ |
487 | reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1); |
488 | vdpu_write(vpu, val: reg, VDPU_SWREG(57)); |
489 | |
490 | return 0; |
491 | } |
492 | |