1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Hantro VPU HEVC codec driver |
4 | * |
5 | * Copyright (C) 2020 Safran Passenger Innovations LLC |
6 | */ |
7 | |
8 | #include <linux/types.h> |
9 | #include <media/v4l2-mem2mem.h> |
10 | |
11 | #include "hantro.h" |
12 | #include "hantro_hw.h" |
13 | |
14 | #define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */ |
15 | /* |
16 | * BSD control data of current picture at tile border |
17 | * 128 bits per 4x4 tile = 128/(8*4) bytes per row |
18 | */ |
19 | #define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */ |
20 | /* tile border coefficients of filter */ |
21 | #define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */ |
22 | |
23 | #define SCALING_LIST_SIZE (16 * 64) |
24 | |
25 | #define MAX_TILE_COLS 20 |
26 | #define MAX_TILE_ROWS 22 |
27 | |
28 | void hantro_hevc_ref_init(struct hantro_ctx *ctx) |
29 | { |
30 | struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; |
31 | |
32 | hevc_dec->ref_bufs_used = 0; |
33 | } |
34 | |
35 | dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, |
36 | s32 poc) |
37 | { |
38 | struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; |
39 | int i; |
40 | |
41 | /* Find the reference buffer in already known ones */ |
42 | for (i = 0; i < NUM_REF_PICTURES; i++) { |
43 | if (hevc_dec->ref_bufs_poc[i] == poc) { |
44 | hevc_dec->ref_bufs_used |= 1 << i; |
45 | return hevc_dec->ref_bufs[i].dma; |
46 | } |
47 | } |
48 | |
49 | return 0; |
50 | } |
51 | |
52 | int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr) |
53 | { |
54 | struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; |
55 | int i; |
56 | |
57 | /* Add a new reference buffer */ |
58 | for (i = 0; i < NUM_REF_PICTURES; i++) { |
59 | if (!(hevc_dec->ref_bufs_used & 1 << i)) { |
60 | hevc_dec->ref_bufs_used |= 1 << i; |
61 | hevc_dec->ref_bufs_poc[i] = poc; |
62 | hevc_dec->ref_bufs[i].dma = addr; |
63 | return 0; |
64 | } |
65 | } |
66 | |
67 | return -EINVAL; |
68 | } |
69 | |
70 | static int tile_buffer_reallocate(struct hantro_ctx *ctx) |
71 | { |
72 | struct hantro_dev *vpu = ctx->dev; |
73 | struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; |
74 | const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; |
75 | const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps; |
76 | const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; |
77 | unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1; |
78 | unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63; |
79 | unsigned int size; |
80 | |
81 | if (num_tile_cols <= 1 || |
82 | num_tile_cols <= hevc_dec->num_tile_cols_allocated) |
83 | return 0; |
84 | |
85 | /* Need to reallocate due to tiles passed via PPS */ |
86 | if (hevc_dec->tile_filter.cpu) { |
87 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_filter.size, |
88 | cpu_addr: hevc_dec->tile_filter.cpu, |
89 | dma_handle: hevc_dec->tile_filter.dma); |
90 | hevc_dec->tile_filter.cpu = NULL; |
91 | } |
92 | |
93 | if (hevc_dec->tile_sao.cpu) { |
94 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_sao.size, |
95 | cpu_addr: hevc_dec->tile_sao.cpu, |
96 | dma_handle: hevc_dec->tile_sao.dma); |
97 | hevc_dec->tile_sao.cpu = NULL; |
98 | } |
99 | |
100 | if (hevc_dec->tile_bsd.cpu) { |
101 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_bsd.size, |
102 | cpu_addr: hevc_dec->tile_bsd.cpu, |
103 | dma_handle: hevc_dec->tile_bsd.dma); |
104 | hevc_dec->tile_bsd.cpu = NULL; |
105 | } |
106 | |
107 | size = (VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8; |
108 | hevc_dec->tile_filter.cpu = dma_alloc_coherent(dev: vpu->dev, size, |
109 | dma_handle: &hevc_dec->tile_filter.dma, |
110 | GFP_KERNEL); |
111 | if (!hevc_dec->tile_filter.cpu) |
112 | return -ENOMEM; |
113 | hevc_dec->tile_filter.size = size; |
114 | |
115 | size = (VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8; |
116 | hevc_dec->tile_sao.cpu = dma_alloc_coherent(dev: vpu->dev, size, |
117 | dma_handle: &hevc_dec->tile_sao.dma, |
118 | GFP_KERNEL); |
119 | if (!hevc_dec->tile_sao.cpu) |
120 | goto err_free_tile_buffers; |
121 | hevc_dec->tile_sao.size = size; |
122 | |
123 | size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1); |
124 | hevc_dec->tile_bsd.cpu = dma_alloc_coherent(dev: vpu->dev, size, |
125 | dma_handle: &hevc_dec->tile_bsd.dma, |
126 | GFP_KERNEL); |
127 | if (!hevc_dec->tile_bsd.cpu) |
128 | goto err_free_sao_buffers; |
129 | hevc_dec->tile_bsd.size = size; |
130 | |
131 | hevc_dec->num_tile_cols_allocated = num_tile_cols; |
132 | |
133 | return 0; |
134 | |
135 | err_free_sao_buffers: |
136 | if (hevc_dec->tile_sao.cpu) |
137 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_sao.size, |
138 | cpu_addr: hevc_dec->tile_sao.cpu, |
139 | dma_handle: hevc_dec->tile_sao.dma); |
140 | hevc_dec->tile_sao.cpu = NULL; |
141 | |
142 | err_free_tile_buffers: |
143 | if (hevc_dec->tile_filter.cpu) |
144 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_filter.size, |
145 | cpu_addr: hevc_dec->tile_filter.cpu, |
146 | dma_handle: hevc_dec->tile_filter.dma); |
147 | hevc_dec->tile_filter.cpu = NULL; |
148 | |
149 | return -ENOMEM; |
150 | } |
151 | |
152 | static int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps) |
153 | { |
154 | /* |
155 | * for tile pixel format check if the width and height match |
156 | * hardware constraints |
157 | */ |
158 | if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) { |
159 | if (ctx->dst_fmt.width != |
160 | ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width)) |
161 | return -EINVAL; |
162 | |
163 | if (ctx->dst_fmt.height != |
164 | ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height)) |
165 | return -EINVAL; |
166 | } |
167 | |
168 | return 0; |
169 | } |
170 | |
171 | int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx) |
172 | { |
173 | struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec; |
174 | struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls; |
175 | int ret; |
176 | |
177 | hantro_start_prepare_run(ctx); |
178 | |
179 | ctrls->decode_params = |
180 | hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS); |
181 | if (WARN_ON(!ctrls->decode_params)) |
182 | return -EINVAL; |
183 | |
184 | ctrls->scaling = |
185 | hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX); |
186 | if (WARN_ON(!ctrls->scaling)) |
187 | return -EINVAL; |
188 | |
189 | ctrls->sps = |
190 | hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SPS); |
191 | if (WARN_ON(!ctrls->sps)) |
192 | return -EINVAL; |
193 | |
194 | ret = hantro_hevc_validate_sps(ctx, sps: ctrls->sps); |
195 | if (ret) |
196 | return ret; |
197 | |
198 | ctrls->pps = |
199 | hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS); |
200 | if (WARN_ON(!ctrls->pps)) |
201 | return -EINVAL; |
202 | |
203 | ret = tile_buffer_reallocate(ctx); |
204 | if (ret) |
205 | return ret; |
206 | |
207 | return 0; |
208 | } |
209 | |
210 | void hantro_hevc_dec_exit(struct hantro_ctx *ctx) |
211 | { |
212 | struct hantro_dev *vpu = ctx->dev; |
213 | struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; |
214 | |
215 | if (hevc_dec->tile_sizes.cpu) |
216 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_sizes.size, |
217 | cpu_addr: hevc_dec->tile_sizes.cpu, |
218 | dma_handle: hevc_dec->tile_sizes.dma); |
219 | hevc_dec->tile_sizes.cpu = NULL; |
220 | |
221 | if (hevc_dec->scaling_lists.cpu) |
222 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->scaling_lists.size, |
223 | cpu_addr: hevc_dec->scaling_lists.cpu, |
224 | dma_handle: hevc_dec->scaling_lists.dma); |
225 | hevc_dec->scaling_lists.cpu = NULL; |
226 | |
227 | if (hevc_dec->tile_filter.cpu) |
228 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_filter.size, |
229 | cpu_addr: hevc_dec->tile_filter.cpu, |
230 | dma_handle: hevc_dec->tile_filter.dma); |
231 | hevc_dec->tile_filter.cpu = NULL; |
232 | |
233 | if (hevc_dec->tile_sao.cpu) |
234 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_sao.size, |
235 | cpu_addr: hevc_dec->tile_sao.cpu, |
236 | dma_handle: hevc_dec->tile_sao.dma); |
237 | hevc_dec->tile_sao.cpu = NULL; |
238 | |
239 | if (hevc_dec->tile_bsd.cpu) |
240 | dma_free_coherent(dev: vpu->dev, size: hevc_dec->tile_bsd.size, |
241 | cpu_addr: hevc_dec->tile_bsd.cpu, |
242 | dma_handle: hevc_dec->tile_bsd.dma); |
243 | hevc_dec->tile_bsd.cpu = NULL; |
244 | } |
245 | |
246 | int hantro_hevc_dec_init(struct hantro_ctx *ctx) |
247 | { |
248 | struct hantro_dev *vpu = ctx->dev; |
249 | struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; |
250 | unsigned int size; |
251 | |
252 | memset(hevc_dec, 0, sizeof(*hevc_dec)); |
253 | |
254 | /* |
255 | * Maximum number of tiles times width and height (2 bytes each), |
256 | * rounding up to next 16 bytes boundary + one extra 16 byte |
257 | * chunk (HW guys wanted to have this). |
258 | */ |
259 | size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16); |
260 | hevc_dec->tile_sizes.cpu = dma_alloc_coherent(dev: vpu->dev, size, |
261 | dma_handle: &hevc_dec->tile_sizes.dma, |
262 | GFP_KERNEL); |
263 | if (!hevc_dec->tile_sizes.cpu) |
264 | return -ENOMEM; |
265 | |
266 | hevc_dec->tile_sizes.size = size; |
267 | |
268 | hevc_dec->scaling_lists.cpu = dma_alloc_coherent(dev: vpu->dev, SCALING_LIST_SIZE, |
269 | dma_handle: &hevc_dec->scaling_lists.dma, |
270 | GFP_KERNEL); |
271 | if (!hevc_dec->scaling_lists.cpu) |
272 | return -ENOMEM; |
273 | |
274 | hevc_dec->scaling_lists.size = SCALING_LIST_SIZE; |
275 | |
276 | hantro_hevc_ref_init(ctx); |
277 | |
278 | return 0; |
279 | } |
280 | |