1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Coda multi-standard codec IP - BIT processor functions |
4 | * |
5 | * Copyright (C) 2012 Vista Silicon S.L. |
6 | * Javier Martin, <javier.martin@vista-silicon.com> |
7 | * Xavier Duret |
8 | * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix |
9 | */ |
10 | |
11 | #include <linux/clk.h> |
12 | #include <linux/irqreturn.h> |
13 | #include <linux/kernel.h> |
14 | #include <linux/log2.h> |
15 | #include <linux/platform_device.h> |
16 | #include <linux/ratelimit.h> |
17 | #include <linux/reset.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/videodev2.h> |
20 | |
21 | #include <media/v4l2-common.h> |
22 | #include <media/v4l2-ctrls.h> |
23 | #include <media/v4l2-fh.h> |
24 | #include <media/v4l2-mem2mem.h> |
25 | #include <media/videobuf2-v4l2.h> |
26 | #include <media/videobuf2-dma-contig.h> |
27 | #include <media/videobuf2-vmalloc.h> |
28 | |
29 | #include "coda.h" |
30 | #include "imx-vdoa.h" |
31 | #define CREATE_TRACE_POINTS |
32 | #include "trace.h" |
33 | |
34 | #define CODA_PARA_BUF_SIZE (10 * 1024) |
35 | #define CODA7_PS_BUF_SIZE 0x28000 |
36 | #define CODA9_PS_SAVE_SIZE (512 * 1024) |
37 | |
38 | #define CODA_DEFAULT_GAMMA 4096 |
39 | #define CODA9_DEFAULT_GAMMA 24576 /* 0.75 * 32768 */ |
40 | |
41 | static void coda_free_bitstream_buffer(struct coda_ctx *ctx); |
42 | |
43 | static inline int coda_is_initialized(struct coda_dev *dev) |
44 | { |
45 | return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0; |
46 | } |
47 | |
48 | static inline unsigned long coda_isbusy(struct coda_dev *dev) |
49 | { |
50 | return coda_read(dev, CODA_REG_BIT_BUSY); |
51 | } |
52 | |
53 | static int coda_wait_timeout(struct coda_dev *dev) |
54 | { |
55 | unsigned long timeout = jiffies + msecs_to_jiffies(m: 1000); |
56 | |
57 | while (coda_isbusy(dev)) { |
58 | if (time_after(jiffies, timeout)) |
59 | return -ETIMEDOUT; |
60 | } |
61 | return 0; |
62 | } |
63 | |
64 | static void coda_command_async(struct coda_ctx *ctx, int cmd) |
65 | { |
66 | struct coda_dev *dev = ctx->dev; |
67 | |
68 | if (dev->devtype->product == CODA_HX4 || |
69 | dev->devtype->product == CODA_7541 || |
70 | dev->devtype->product == CODA_960) { |
71 | /* Restore context related registers to CODA */ |
72 | coda_write(dev, data: ctx->bit_stream_param, |
73 | CODA_REG_BIT_BIT_STREAM_PARAM); |
74 | coda_write(dev, data: ctx->frm_dis_flg, |
75 | CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); |
76 | coda_write(dev, data: ctx->frame_mem_ctrl, |
77 | CODA_REG_BIT_FRAME_MEM_CTRL); |
78 | coda_write(dev, data: ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR); |
79 | } |
80 | |
81 | if (dev->devtype->product == CODA_960) { |
82 | coda_write(dev, data: 1, CODA9_GDI_WPROT_ERR_CLR); |
83 | coda_write(dev, data: 0, CODA9_GDI_WPROT_RGN_EN); |
84 | } |
85 | |
86 | coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); |
87 | |
88 | coda_write(dev, data: ctx->idx, CODA_REG_BIT_RUN_INDEX); |
89 | coda_write(dev, data: ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD); |
90 | coda_write(dev, data: ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD); |
91 | |
92 | trace_coda_bit_run(ctx, cmd); |
93 | |
94 | coda_write(dev, data: cmd, CODA_REG_BIT_RUN_COMMAND); |
95 | } |
96 | |
97 | static int coda_command_sync(struct coda_ctx *ctx, int cmd) |
98 | { |
99 | struct coda_dev *dev = ctx->dev; |
100 | int ret; |
101 | |
102 | lockdep_assert_held(&dev->coda_mutex); |
103 | |
104 | coda_command_async(ctx, cmd); |
105 | ret = coda_wait_timeout(dev); |
106 | trace_coda_bit_done(ctx); |
107 | |
108 | return ret; |
109 | } |
110 | |
111 | int coda_hw_reset(struct coda_ctx *ctx) |
112 | { |
113 | struct coda_dev *dev = ctx->dev; |
114 | unsigned long timeout; |
115 | unsigned int idx; |
116 | int ret; |
117 | |
118 | lockdep_assert_held(&dev->coda_mutex); |
119 | |
120 | if (!dev->rstc) |
121 | return -ENOENT; |
122 | |
123 | idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX); |
124 | |
125 | if (dev->devtype->product == CODA_960) { |
126 | timeout = jiffies + msecs_to_jiffies(m: 100); |
127 | coda_write(dev, data: 0x11, CODA9_GDI_BUS_CTRL); |
128 | while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) { |
129 | if (time_after(jiffies, timeout)) |
130 | return -ETIME; |
131 | cpu_relax(); |
132 | } |
133 | } |
134 | |
135 | ret = reset_control_reset(rstc: dev->rstc); |
136 | if (ret < 0) |
137 | return ret; |
138 | |
139 | if (dev->devtype->product == CODA_960) |
140 | coda_write(dev, data: 0x00, CODA9_GDI_BUS_CTRL); |
141 | coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); |
142 | coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN); |
143 | ret = coda_wait_timeout(dev); |
144 | coda_write(dev, data: idx, CODA_REG_BIT_RUN_INDEX); |
145 | |
146 | return ret; |
147 | } |
148 | |
149 | static void coda_kfifo_sync_from_device(struct coda_ctx *ctx) |
150 | { |
151 | struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; |
152 | struct coda_dev *dev = ctx->dev; |
153 | u32 rd_ptr; |
154 | |
155 | rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); |
156 | kfifo->out = (kfifo->in & ~kfifo->mask) | |
157 | (rd_ptr - ctx->bitstream.paddr); |
158 | if (kfifo->out > kfifo->in) |
159 | kfifo->out -= kfifo->mask + 1; |
160 | } |
161 | |
162 | static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx) |
163 | { |
164 | struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; |
165 | struct coda_dev *dev = ctx->dev; |
166 | u32 rd_ptr, wr_ptr; |
167 | |
168 | rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask); |
169 | coda_write(dev, data: rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); |
170 | wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); |
171 | coda_write(dev, data: wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); |
172 | } |
173 | |
174 | static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx) |
175 | { |
176 | struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; |
177 | struct coda_dev *dev = ctx->dev; |
178 | u32 wr_ptr; |
179 | |
180 | wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); |
181 | coda_write(dev, data: wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); |
182 | } |
183 | |
184 | static int coda_h264_bitstream_pad(struct coda_ctx *ctx, u32 size) |
185 | { |
186 | unsigned char *buf; |
187 | u32 n; |
188 | |
189 | if (size < 6) |
190 | size = 6; |
191 | |
192 | buf = kmalloc(size, GFP_KERNEL); |
193 | if (!buf) |
194 | return -ENOMEM; |
195 | |
196 | coda_h264_filler_nal(size, p: buf); |
197 | n = kfifo_in(&ctx->bitstream_fifo, buf, size); |
198 | kfree(objp: buf); |
199 | |
200 | return (n < size) ? -ENOSPC : 0; |
201 | } |
202 | |
203 | int coda_bitstream_flush(struct coda_ctx *ctx) |
204 | { |
205 | int ret; |
206 | |
207 | if (ctx->inst_type != CODA_INST_DECODER || !ctx->use_bit) |
208 | return 0; |
209 | |
210 | ret = coda_command_sync(ctx, CODA_COMMAND_DEC_BUF_FLUSH); |
211 | if (ret < 0) { |
212 | v4l2_err(&ctx->dev->v4l2_dev, "failed to flush bitstream\n" ); |
213 | return ret; |
214 | } |
215 | |
216 | kfifo_init(&ctx->bitstream_fifo, ctx->bitstream.vaddr, |
217 | ctx->bitstream.size); |
218 | coda_kfifo_sync_to_device_full(ctx); |
219 | |
220 | return 0; |
221 | } |
222 | |
223 | static int coda_bitstream_queue(struct coda_ctx *ctx, const u8 *buf, u32 size) |
224 | { |
225 | u32 n = kfifo_in(&ctx->bitstream_fifo, buf, size); |
226 | |
227 | return (n < size) ? -ENOSPC : 0; |
228 | } |
229 | |
230 | static u32 (struct coda_ctx *ctx, |
231 | struct vb2_v4l2_buffer *src_buf, |
232 | u32 payload) |
233 | { |
234 | u8 *vaddr = vb2_plane_vaddr(vb: &src_buf->vb2_buf, plane_no: 0); |
235 | u32 size = 0; |
236 | |
237 | switch (ctx->codec->src_fourcc) { |
238 | case V4L2_PIX_FMT_MPEG2: |
239 | size = coda_mpeg2_parse_headers(ctx, buf: vaddr, size: payload); |
240 | break; |
241 | case V4L2_PIX_FMT_MPEG4: |
242 | size = coda_mpeg4_parse_headers(ctx, buf: vaddr, size: payload); |
243 | break; |
244 | default: |
245 | break; |
246 | } |
247 | |
248 | return size; |
249 | } |
250 | |
251 | static bool coda_bitstream_try_queue(struct coda_ctx *ctx, |
252 | struct vb2_v4l2_buffer *src_buf) |
253 | { |
254 | unsigned long payload = vb2_get_plane_payload(vb: &src_buf->vb2_buf, plane_no: 0); |
255 | u8 *vaddr = vb2_plane_vaddr(vb: &src_buf->vb2_buf, plane_no: 0); |
256 | int ret; |
257 | int i; |
258 | |
259 | if (coda_get_bitstream_payload(ctx) + payload + 512 >= |
260 | ctx->bitstream.size) |
261 | return false; |
262 | |
263 | if (!vaddr) { |
264 | v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n" ); |
265 | return true; |
266 | } |
267 | |
268 | if (ctx->qsequence == 0 && payload < 512) { |
269 | /* |
270 | * Add padding after the first buffer, if it is too small to be |
271 | * fetched by the CODA, by repeating the headers. Without |
272 | * repeated headers, or the first frame already queued, decoder |
273 | * sequence initialization fails with error code 0x2000 on i.MX6 |
274 | * or error code 0x1 on i.MX51. |
275 | */ |
276 | u32 = coda_buffer_parse_headers(ctx, src_buf, |
277 | payload); |
278 | |
279 | if (header_size) { |
280 | coda_dbg(1, ctx, "pad with %u-byte header\n" , |
281 | header_size); |
282 | for (i = payload; i < 512; i += header_size) { |
283 | ret = coda_bitstream_queue(ctx, buf: vaddr, |
284 | size: header_size); |
285 | if (ret < 0) { |
286 | v4l2_err(&ctx->dev->v4l2_dev, |
287 | "bitstream buffer overflow\n" ); |
288 | return false; |
289 | } |
290 | if (ctx->dev->devtype->product == CODA_960) |
291 | break; |
292 | } |
293 | } else { |
294 | coda_dbg(1, ctx, |
295 | "could not parse header, sequence initialization might fail\n" ); |
296 | } |
297 | |
298 | /* Add padding before the first buffer, if it is too small */ |
299 | if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) |
300 | coda_h264_bitstream_pad(ctx, size: 512 - payload); |
301 | } |
302 | |
303 | ret = coda_bitstream_queue(ctx, buf: vaddr, size: payload); |
304 | if (ret < 0) { |
305 | v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n" ); |
306 | return false; |
307 | } |
308 | |
309 | src_buf->sequence = ctx->qsequence++; |
310 | |
311 | /* Sync read pointer to device */ |
312 | if (ctx == v4l2_m2m_get_curr_priv(m2m_dev: ctx->dev->m2m_dev)) |
313 | coda_kfifo_sync_to_device_write(ctx); |
314 | |
315 | /* Set the stream-end flag after the last buffer is queued */ |
316 | if (src_buf->flags & V4L2_BUF_FLAG_LAST) |
317 | coda_bit_stream_end_flag(ctx); |
318 | ctx->hold = false; |
319 | |
320 | return true; |
321 | } |
322 | |
323 | void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list) |
324 | { |
325 | struct vb2_v4l2_buffer *src_buf; |
326 | struct coda_buffer_meta *meta; |
327 | u32 start; |
328 | |
329 | lockdep_assert_held(&ctx->bitstream_mutex); |
330 | |
331 | if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) |
332 | return; |
333 | |
334 | while (v4l2_m2m_num_src_bufs_ready(m2m_ctx: ctx->fh.m2m_ctx) > 0) { |
335 | /* |
336 | * Only queue two JPEGs into the bitstream buffer to keep |
337 | * latency low. We need at least one complete buffer and the |
338 | * header of another buffer (for prescan) in the bitstream. |
339 | */ |
340 | if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG && |
341 | ctx->num_metas > 1) |
342 | break; |
343 | |
344 | if (ctx->num_internal_frames && |
345 | ctx->num_metas >= ctx->num_internal_frames) { |
346 | meta = list_first_entry(&ctx->buffer_meta_list, |
347 | struct coda_buffer_meta, list); |
348 | |
349 | /* |
350 | * If we managed to fill in at least a full reorder |
351 | * window of buffers (num_internal_frames is a |
352 | * conservative estimate for this) and the bitstream |
353 | * prefetcher has at least 2 256 bytes periods beyond |
354 | * the first buffer to fetch, we can safely stop queuing |
355 | * in order to limit the decoder drain latency. |
356 | */ |
357 | if (coda_bitstream_can_fetch_past(ctx, pos: meta->end)) |
358 | break; |
359 | } |
360 | |
361 | src_buf = v4l2_m2m_next_src_buf(m2m_ctx: ctx->fh.m2m_ctx); |
362 | |
363 | /* Drop frames that do not start/end with a SOI/EOI markers */ |
364 | if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG && |
365 | !coda_jpeg_check_buffer(ctx, vb: &src_buf->vb2_buf)) { |
366 | v4l2_err(&ctx->dev->v4l2_dev, |
367 | "dropping invalid JPEG frame %d\n" , |
368 | ctx->qsequence); |
369 | src_buf = v4l2_m2m_src_buf_remove(m2m_ctx: ctx->fh.m2m_ctx); |
370 | if (buffer_list) { |
371 | struct v4l2_m2m_buffer *m2m_buf; |
372 | |
373 | m2m_buf = container_of(src_buf, |
374 | struct v4l2_m2m_buffer, |
375 | vb); |
376 | list_add_tail(new: &m2m_buf->list, head: buffer_list); |
377 | } else { |
378 | v4l2_m2m_buf_done(buf: src_buf, state: VB2_BUF_STATE_ERROR); |
379 | } |
380 | continue; |
381 | } |
382 | |
383 | /* Dump empty buffers */ |
384 | if (!vb2_get_plane_payload(vb: &src_buf->vb2_buf, plane_no: 0)) { |
385 | src_buf = v4l2_m2m_src_buf_remove(m2m_ctx: ctx->fh.m2m_ctx); |
386 | v4l2_m2m_buf_done(buf: src_buf, state: VB2_BUF_STATE_DONE); |
387 | continue; |
388 | } |
389 | |
390 | /* Buffer start position */ |
391 | start = ctx->bitstream_fifo.kfifo.in; |
392 | |
393 | if (coda_bitstream_try_queue(ctx, src_buf)) { |
394 | /* |
395 | * Source buffer is queued in the bitstream ringbuffer; |
396 | * queue the timestamp and mark source buffer as done |
397 | */ |
398 | src_buf = v4l2_m2m_src_buf_remove(m2m_ctx: ctx->fh.m2m_ctx); |
399 | |
400 | meta = kmalloc(size: sizeof(*meta), GFP_KERNEL); |
401 | if (meta) { |
402 | meta->sequence = src_buf->sequence; |
403 | meta->timecode = src_buf->timecode; |
404 | meta->timestamp = src_buf->vb2_buf.timestamp; |
405 | meta->start = start; |
406 | meta->end = ctx->bitstream_fifo.kfifo.in; |
407 | meta->last = src_buf->flags & V4L2_BUF_FLAG_LAST; |
408 | if (meta->last) |
409 | coda_dbg(1, ctx, "marking last meta" ); |
410 | spin_lock(lock: &ctx->buffer_meta_lock); |
411 | list_add_tail(new: &meta->list, |
412 | head: &ctx->buffer_meta_list); |
413 | ctx->num_metas++; |
414 | spin_unlock(lock: &ctx->buffer_meta_lock); |
415 | |
416 | trace_coda_bit_queue(ctx, buf: src_buf, meta); |
417 | } |
418 | |
419 | if (buffer_list) { |
420 | struct v4l2_m2m_buffer *m2m_buf; |
421 | |
422 | m2m_buf = container_of(src_buf, |
423 | struct v4l2_m2m_buffer, |
424 | vb); |
425 | list_add_tail(new: &m2m_buf->list, head: buffer_list); |
426 | } else { |
427 | v4l2_m2m_buf_done(buf: src_buf, state: VB2_BUF_STATE_DONE); |
428 | } |
429 | } else { |
430 | break; |
431 | } |
432 | } |
433 | } |
434 | |
435 | void coda_bit_stream_end_flag(struct coda_ctx *ctx) |
436 | { |
437 | struct coda_dev *dev = ctx->dev; |
438 | |
439 | ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; |
440 | |
441 | /* If this context is currently running, update the hardware flag */ |
442 | if ((dev->devtype->product == CODA_960) && |
443 | coda_isbusy(dev) && |
444 | (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) { |
445 | coda_write(dev, data: ctx->bit_stream_param, |
446 | CODA_REG_BIT_BIT_STREAM_PARAM); |
447 | } |
448 | } |
449 | |
450 | static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value) |
451 | { |
452 | struct coda_dev *dev = ctx->dev; |
453 | u32 *p = ctx->parabuf.vaddr; |
454 | |
455 | if (dev->devtype->product == CODA_DX6) |
456 | p[index] = value; |
457 | else |
458 | p[index ^ 1] = value; |
459 | } |
460 | |
461 | static inline int coda_alloc_context_buf(struct coda_ctx *ctx, |
462 | struct coda_aux_buf *buf, size_t size, |
463 | const char *name) |
464 | { |
465 | return coda_alloc_aux_buf(dev: ctx->dev, buf, size, name, parent: ctx->debugfs_entry); |
466 | } |
467 | |
468 | |
469 | static void coda_free_framebuffers(struct coda_ctx *ctx) |
470 | { |
471 | int i; |
472 | |
473 | for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++) |
474 | coda_free_aux_buf(dev: ctx->dev, buf: &ctx->internal_frames[i].buf); |
475 | } |
476 | |
477 | static int coda_alloc_framebuffers(struct coda_ctx *ctx, |
478 | struct coda_q_data *q_data, u32 fourcc) |
479 | { |
480 | struct coda_dev *dev = ctx->dev; |
481 | unsigned int ysize, ycbcr_size; |
482 | int ret; |
483 | int i; |
484 | |
485 | if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 || |
486 | ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 || |
487 | ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 || |
488 | ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4) |
489 | ysize = round_up(q_data->rect.width, 16) * |
490 | round_up(q_data->rect.height, 16); |
491 | else |
492 | ysize = round_up(q_data->rect.width, 8) * q_data->rect.height; |
493 | |
494 | if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) |
495 | ycbcr_size = round_up(ysize, 4096) + ysize / 2; |
496 | else |
497 | ycbcr_size = ysize + ysize / 2; |
498 | |
499 | /* Allocate frame buffers */ |
500 | for (i = 0; i < ctx->num_internal_frames; i++) { |
501 | size_t size = ycbcr_size; |
502 | char *name; |
503 | |
504 | /* Add space for mvcol buffers */ |
505 | if (dev->devtype->product != CODA_DX6 && |
506 | (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 || |
507 | (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0))) |
508 | size += ysize / 4; |
509 | name = kasprintf(GFP_KERNEL, fmt: "fb%d" , i); |
510 | if (!name) { |
511 | coda_free_framebuffers(ctx); |
512 | return -ENOMEM; |
513 | } |
514 | ret = coda_alloc_context_buf(ctx, buf: &ctx->internal_frames[i].buf, |
515 | size, name); |
516 | kfree(objp: name); |
517 | if (ret < 0) { |
518 | coda_free_framebuffers(ctx); |
519 | return ret; |
520 | } |
521 | } |
522 | |
523 | /* Register frame buffers in the parameter buffer */ |
524 | for (i = 0; i < ctx->num_internal_frames; i++) { |
525 | u32 y, cb, cr, mvcol; |
526 | |
527 | /* Start addresses of Y, Cb, Cr planes */ |
528 | y = ctx->internal_frames[i].buf.paddr; |
529 | cb = y + ysize; |
530 | cr = y + ysize + ysize/4; |
531 | mvcol = y + ysize + ysize/4 + ysize/4; |
532 | if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) { |
533 | cb = round_up(cb, 4096); |
534 | mvcol = cb + ysize/2; |
535 | cr = 0; |
536 | /* Packed 20-bit MSB of base addresses */ |
537 | /* YYYYYCCC, CCyyyyyc, cccc.... */ |
538 | y = (y & 0xfffff000) | cb >> 20; |
539 | cb = (cb & 0x000ff000) << 12; |
540 | } |
541 | coda_parabuf_write(ctx, index: i * 3 + 0, value: y); |
542 | coda_parabuf_write(ctx, index: i * 3 + 1, value: cb); |
543 | coda_parabuf_write(ctx, index: i * 3 + 2, value: cr); |
544 | |
545 | if (dev->devtype->product == CODA_DX6) |
546 | continue; |
547 | |
548 | /* mvcol buffer for h.264 and mpeg4 */ |
549 | if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) |
550 | coda_parabuf_write(ctx, index: 96 + i, value: mvcol); |
551 | if (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0) |
552 | coda_parabuf_write(ctx, index: 97, value: mvcol); |
553 | } |
554 | |
555 | return 0; |
556 | } |
557 | |
558 | static void coda_free_context_buffers(struct coda_ctx *ctx) |
559 | { |
560 | struct coda_dev *dev = ctx->dev; |
561 | |
562 | coda_free_aux_buf(dev, buf: &ctx->slicebuf); |
563 | coda_free_aux_buf(dev, buf: &ctx->psbuf); |
564 | if (dev->devtype->product != CODA_DX6) |
565 | coda_free_aux_buf(dev, buf: &ctx->workbuf); |
566 | coda_free_aux_buf(dev, buf: &ctx->parabuf); |
567 | } |
568 | |
569 | static int coda_alloc_context_buffers(struct coda_ctx *ctx, |
570 | struct coda_q_data *q_data) |
571 | { |
572 | struct coda_dev *dev = ctx->dev; |
573 | size_t size; |
574 | int ret; |
575 | |
576 | if (!ctx->parabuf.vaddr) { |
577 | ret = coda_alloc_context_buf(ctx, buf: &ctx->parabuf, |
578 | CODA_PARA_BUF_SIZE, name: "parabuf" ); |
579 | if (ret < 0) |
580 | return ret; |
581 | } |
582 | |
583 | if (dev->devtype->product == CODA_DX6) |
584 | return 0; |
585 | |
586 | if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) { |
587 | /* worst case slice size */ |
588 | size = (DIV_ROUND_UP(q_data->rect.width, 16) * |
589 | DIV_ROUND_UP(q_data->rect.height, 16)) * 3200 / 8 + 512; |
590 | ret = coda_alloc_context_buf(ctx, buf: &ctx->slicebuf, size, |
591 | name: "slicebuf" ); |
592 | if (ret < 0) |
593 | goto err; |
594 | } |
595 | |
596 | if (!ctx->psbuf.vaddr && (dev->devtype->product == CODA_HX4 || |
597 | dev->devtype->product == CODA_7541)) { |
598 | ret = coda_alloc_context_buf(ctx, buf: &ctx->psbuf, |
599 | CODA7_PS_BUF_SIZE, name: "psbuf" ); |
600 | if (ret < 0) |
601 | goto err; |
602 | } |
603 | |
604 | if (!ctx->workbuf.vaddr) { |
605 | size = dev->devtype->workbuf_size; |
606 | if (dev->devtype->product == CODA_960 && |
607 | q_data->fourcc == V4L2_PIX_FMT_H264) |
608 | size += CODA9_PS_SAVE_SIZE; |
609 | ret = coda_alloc_context_buf(ctx, buf: &ctx->workbuf, size, |
610 | name: "workbuf" ); |
611 | if (ret < 0) |
612 | goto err; |
613 | } |
614 | |
615 | return 0; |
616 | |
617 | err: |
618 | coda_free_context_buffers(ctx); |
619 | return ret; |
620 | } |
621 | |
622 | static int (struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf, |
623 | int , u8 *, int *size) |
624 | { |
625 | struct vb2_buffer *vb = &buf->vb2_buf; |
626 | struct coda_dev *dev = ctx->dev; |
627 | struct coda_q_data *q_data_src; |
628 | struct v4l2_rect *r; |
629 | size_t bufsize; |
630 | int ret; |
631 | int i; |
632 | |
633 | if (dev->devtype->product == CODA_960) |
634 | memset(vb2_plane_vaddr(vb, 0), 0, 64); |
635 | |
636 | coda_write(dev, data: vb2_dma_contig_plane_dma_addr(vb, plane_no: 0), |
637 | CODA_CMD_ENC_HEADER_BB_START); |
638 | bufsize = vb2_plane_size(vb, plane_no: 0); |
639 | if (dev->devtype->product == CODA_960) |
640 | bufsize /= 1024; |
641 | coda_write(dev, data: bufsize, CODA_CMD_ENC_HEADER_BB_SIZE); |
642 | if (dev->devtype->product == CODA_960 && |
643 | ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 && |
644 | header_code == CODA_HEADER_H264_SPS) { |
645 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
646 | r = &q_data_src->rect; |
647 | |
648 | if (r->width % 16 || r->height % 16) { |
649 | u32 crop_right = round_up(r->width, 16) - r->width; |
650 | u32 crop_bottom = round_up(r->height, 16) - r->height; |
651 | |
652 | coda_write(dev, data: crop_right, |
653 | CODA9_CMD_ENC_HEADER_FRAME_CROP_H); |
654 | coda_write(dev, data: crop_bottom, |
655 | CODA9_CMD_ENC_HEADER_FRAME_CROP_V); |
656 | header_code |= CODA9_HEADER_FRAME_CROP; |
657 | } |
658 | } |
659 | coda_write(dev, data: header_code, CODA_CMD_ENC_HEADER_CODE); |
660 | ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER); |
661 | if (ret < 0) { |
662 | v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n" ); |
663 | return ret; |
664 | } |
665 | |
666 | if (dev->devtype->product == CODA_960) { |
667 | for (i = 63; i > 0; i--) |
668 | if (((char *)vb2_plane_vaddr(vb, plane_no: 0))[i] != 0) |
669 | break; |
670 | *size = i + 1; |
671 | } else { |
672 | *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) - |
673 | coda_read(dev, CODA_CMD_ENC_HEADER_BB_START); |
674 | } |
675 | memcpy(header, vb2_plane_vaddr(vb, 0), *size); |
676 | |
677 | return 0; |
678 | } |
679 | |
680 | static u32 coda_slice_mode(struct coda_ctx *ctx) |
681 | { |
682 | int size, unit; |
683 | |
684 | switch (ctx->params.slice_mode) { |
685 | case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE: |
686 | default: |
687 | return 0; |
688 | case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_MB: |
689 | size = ctx->params.slice_max_mb; |
690 | unit = 1; |
691 | break; |
692 | case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_BYTES: |
693 | size = ctx->params.slice_max_bits; |
694 | unit = 0; |
695 | break; |
696 | } |
697 | |
698 | return ((size & CODA_SLICING_SIZE_MASK) << CODA_SLICING_SIZE_OFFSET) | |
699 | ((unit & CODA_SLICING_UNIT_MASK) << CODA_SLICING_UNIT_OFFSET) | |
700 | ((1 & CODA_SLICING_MODE_MASK) << CODA_SLICING_MODE_OFFSET); |
701 | } |
702 | |
703 | static int coda_enc_param_change(struct coda_ctx *ctx) |
704 | { |
705 | struct coda_dev *dev = ctx->dev; |
706 | u32 change_enable = 0; |
707 | u32 success; |
708 | int ret; |
709 | |
710 | if (ctx->params.gop_size_changed) { |
711 | change_enable |= CODA_PARAM_CHANGE_RC_GOP; |
712 | coda_write(dev, data: ctx->params.gop_size, |
713 | CODA_CMD_ENC_PARAM_RC_GOP); |
714 | ctx->gopcounter = ctx->params.gop_size - 1; |
715 | ctx->params.gop_size_changed = false; |
716 | } |
717 | if (ctx->params.h264_intra_qp_changed) { |
718 | coda_dbg(1, ctx, "parameter change: intra Qp %u\n" , |
719 | ctx->params.h264_intra_qp); |
720 | |
721 | if (ctx->params.bitrate) { |
722 | change_enable |= CODA_PARAM_CHANGE_RC_INTRA_QP; |
723 | coda_write(dev, data: ctx->params.h264_intra_qp, |
724 | CODA_CMD_ENC_PARAM_RC_INTRA_QP); |
725 | } |
726 | ctx->params.h264_intra_qp_changed = false; |
727 | } |
728 | if (ctx->params.bitrate_changed) { |
729 | coda_dbg(1, ctx, "parameter change: bitrate %u kbit/s\n" , |
730 | ctx->params.bitrate); |
731 | change_enable |= CODA_PARAM_CHANGE_RC_BITRATE; |
732 | coda_write(dev, data: ctx->params.bitrate, |
733 | CODA_CMD_ENC_PARAM_RC_BITRATE); |
734 | ctx->params.bitrate_changed = false; |
735 | } |
736 | if (ctx->params.framerate_changed) { |
737 | coda_dbg(1, ctx, "parameter change: frame rate %u/%u Hz\n" , |
738 | ctx->params.framerate & 0xffff, |
739 | (ctx->params.framerate >> 16) + 1); |
740 | change_enable |= CODA_PARAM_CHANGE_RC_FRAME_RATE; |
741 | coda_write(dev, data: ctx->params.framerate, |
742 | CODA_CMD_ENC_PARAM_RC_FRAME_RATE); |
743 | ctx->params.framerate_changed = false; |
744 | } |
745 | if (ctx->params.intra_refresh_changed) { |
746 | coda_dbg(1, ctx, "parameter change: intra refresh MBs %u\n" , |
747 | ctx->params.intra_refresh); |
748 | change_enable |= CODA_PARAM_CHANGE_INTRA_MB_NUM; |
749 | coda_write(dev, data: ctx->params.intra_refresh, |
750 | CODA_CMD_ENC_PARAM_INTRA_MB_NUM); |
751 | ctx->params.intra_refresh_changed = false; |
752 | } |
753 | if (ctx->params.slice_mode_changed) { |
754 | change_enable |= CODA_PARAM_CHANGE_SLICE_MODE; |
755 | coda_write(dev, data: coda_slice_mode(ctx), |
756 | CODA_CMD_ENC_PARAM_SLICE_MODE); |
757 | ctx->params.slice_mode_changed = false; |
758 | } |
759 | |
760 | if (!change_enable) |
761 | return 0; |
762 | |
763 | coda_write(dev, data: change_enable, CODA_CMD_ENC_PARAM_CHANGE_ENABLE); |
764 | |
765 | ret = coda_command_sync(ctx, CODA_COMMAND_RC_CHANGE_PARAMETER); |
766 | if (ret < 0) |
767 | return ret; |
768 | |
769 | success = coda_read(dev, CODA_RET_ENC_PARAM_CHANGE_SUCCESS); |
770 | if (success != 1) |
771 | coda_dbg(1, ctx, "parameter change failed: %u\n" , success); |
772 | |
773 | return 0; |
774 | } |
775 | |
776 | static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size) |
777 | { |
778 | phys_addr_t ret; |
779 | |
780 | size = round_up(size, 1024); |
781 | if (size > iram->remaining) |
782 | return 0; |
783 | iram->remaining -= size; |
784 | |
785 | ret = iram->next_paddr; |
786 | iram->next_paddr += size; |
787 | |
788 | return ret; |
789 | } |
790 | |
791 | static void coda_setup_iram(struct coda_ctx *ctx) |
792 | { |
793 | struct coda_iram_info *iram_info = &ctx->iram_info; |
794 | struct coda_dev *dev = ctx->dev; |
795 | int w64, w128; |
796 | int mb_width; |
797 | int dbk_bits; |
798 | int bit_bits; |
799 | int ip_bits; |
800 | int me_bits; |
801 | |
802 | memset(iram_info, 0, sizeof(*iram_info)); |
803 | iram_info->next_paddr = dev->iram.paddr; |
804 | iram_info->remaining = dev->iram.size; |
805 | |
806 | if (!dev->iram.vaddr) |
807 | return; |
808 | |
809 | switch (dev->devtype->product) { |
810 | case CODA_HX4: |
811 | dbk_bits = CODA7_USE_HOST_DBK_ENABLE; |
812 | bit_bits = CODA7_USE_HOST_BIT_ENABLE; |
813 | ip_bits = CODA7_USE_HOST_IP_ENABLE; |
814 | me_bits = CODA7_USE_HOST_ME_ENABLE; |
815 | break; |
816 | case CODA_7541: |
817 | dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE; |
818 | bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE; |
819 | ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE; |
820 | me_bits = CODA7_USE_HOST_ME_ENABLE | CODA7_USE_ME_ENABLE; |
821 | break; |
822 | case CODA_960: |
823 | dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE; |
824 | bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE; |
825 | ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE; |
826 | me_bits = 0; |
827 | break; |
828 | default: /* CODA_DX6 */ |
829 | return; |
830 | } |
831 | |
832 | if (ctx->inst_type == CODA_INST_ENCODER) { |
833 | struct coda_q_data *q_data_src; |
834 | |
835 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
836 | mb_width = DIV_ROUND_UP(q_data_src->rect.width, 16); |
837 | w128 = mb_width * 128; |
838 | w64 = mb_width * 64; |
839 | |
840 | /* Prioritize in case IRAM is too small for everything */ |
841 | if (dev->devtype->product == CODA_HX4 || |
842 | dev->devtype->product == CODA_7541) { |
843 | iram_info->search_ram_size = round_up(mb_width * 16 * |
844 | 36 + 2048, 1024); |
845 | iram_info->search_ram_paddr = coda_iram_alloc(iram: iram_info, |
846 | size: iram_info->search_ram_size); |
847 | if (!iram_info->search_ram_paddr) { |
848 | pr_err("IRAM is smaller than the search ram size\n" ); |
849 | goto out; |
850 | } |
851 | iram_info->axi_sram_use |= me_bits; |
852 | } |
853 | |
854 | /* Only H.264BP and H.263P3 are considered */ |
855 | iram_info->buf_dbk_y_use = coda_iram_alloc(iram: iram_info, size: w64); |
856 | iram_info->buf_dbk_c_use = coda_iram_alloc(iram: iram_info, size: w64); |
857 | if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use) |
858 | goto out; |
859 | iram_info->axi_sram_use |= dbk_bits; |
860 | |
861 | iram_info->buf_bit_use = coda_iram_alloc(iram: iram_info, size: w128); |
862 | if (!iram_info->buf_bit_use) |
863 | goto out; |
864 | iram_info->axi_sram_use |= bit_bits; |
865 | |
866 | iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram: iram_info, size: w128); |
867 | if (!iram_info->buf_ip_ac_dc_use) |
868 | goto out; |
869 | iram_info->axi_sram_use |= ip_bits; |
870 | |
871 | /* OVL and BTP disabled for encoder */ |
872 | } else if (ctx->inst_type == CODA_INST_DECODER) { |
873 | struct coda_q_data *q_data_dst; |
874 | |
875 | q_data_dst = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_CAPTURE); |
876 | mb_width = DIV_ROUND_UP(q_data_dst->width, 16); |
877 | w128 = mb_width * 128; |
878 | |
879 | iram_info->buf_dbk_y_use = coda_iram_alloc(iram: iram_info, size: w128); |
880 | iram_info->buf_dbk_c_use = coda_iram_alloc(iram: iram_info, size: w128); |
881 | if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use) |
882 | goto out; |
883 | iram_info->axi_sram_use |= dbk_bits; |
884 | |
885 | iram_info->buf_bit_use = coda_iram_alloc(iram: iram_info, size: w128); |
886 | if (!iram_info->buf_bit_use) |
887 | goto out; |
888 | iram_info->axi_sram_use |= bit_bits; |
889 | |
890 | iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram: iram_info, size: w128); |
891 | if (!iram_info->buf_ip_ac_dc_use) |
892 | goto out; |
893 | iram_info->axi_sram_use |= ip_bits; |
894 | |
895 | /* OVL and BTP unused as there is no VC1 support yet */ |
896 | } |
897 | |
898 | out: |
899 | if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE)) |
900 | coda_dbg(1, ctx, "IRAM smaller than needed\n" ); |
901 | |
902 | if (dev->devtype->product == CODA_HX4 || |
903 | dev->devtype->product == CODA_7541) { |
904 | /* TODO - Enabling these causes picture errors on CODA7541 */ |
905 | if (ctx->inst_type == CODA_INST_DECODER) { |
906 | /* fw 1.4.50 */ |
907 | iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | |
908 | CODA7_USE_IP_ENABLE); |
909 | } else { |
910 | /* fw 13.4.29 */ |
911 | iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | |
912 | CODA7_USE_HOST_DBK_ENABLE | |
913 | CODA7_USE_IP_ENABLE | |
914 | CODA7_USE_DBK_ENABLE); |
915 | } |
916 | } |
917 | } |
918 | |
919 | static u32 coda_supported_firmwares[] = { |
920 | CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5), |
921 | CODA_FIRMWARE_VERNUM(CODA_HX4, 1, 4, 50), |
922 | CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50), |
923 | CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5), |
924 | CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 9), |
925 | CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10), |
926 | CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1), |
927 | }; |
928 | |
929 | static bool coda_firmware_supported(u32 vernum) |
930 | { |
931 | int i; |
932 | |
933 | for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++) |
934 | if (vernum == coda_supported_firmwares[i]) |
935 | return true; |
936 | return false; |
937 | } |
938 | |
939 | int coda_check_firmware(struct coda_dev *dev) |
940 | { |
941 | u16 product, major, minor, release; |
942 | u32 data; |
943 | int ret; |
944 | |
945 | ret = clk_prepare_enable(clk: dev->clk_per); |
946 | if (ret) |
947 | goto err_clk_per; |
948 | |
949 | ret = clk_prepare_enable(clk: dev->clk_ahb); |
950 | if (ret) |
951 | goto err_clk_ahb; |
952 | |
953 | coda_write(dev, data: 0, CODA_CMD_FIRMWARE_VERNUM); |
954 | coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); |
955 | coda_write(dev, data: 0, CODA_REG_BIT_RUN_INDEX); |
956 | coda_write(dev, data: 0, CODA_REG_BIT_RUN_COD_STD); |
957 | coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND); |
958 | if (coda_wait_timeout(dev)) { |
959 | v4l2_err(&dev->v4l2_dev, "firmware get command error\n" ); |
960 | ret = -EIO; |
961 | goto err_run_cmd; |
962 | } |
963 | |
964 | if (dev->devtype->product == CODA_960) { |
965 | data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV); |
966 | v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n" , |
967 | data); |
968 | } |
969 | |
970 | /* Check we are compatible with the loaded firmware */ |
971 | data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM); |
972 | product = CODA_FIRMWARE_PRODUCT(data); |
973 | major = CODA_FIRMWARE_MAJOR(data); |
974 | minor = CODA_FIRMWARE_MINOR(data); |
975 | release = CODA_FIRMWARE_RELEASE(data); |
976 | |
977 | clk_disable_unprepare(clk: dev->clk_per); |
978 | clk_disable_unprepare(clk: dev->clk_ahb); |
979 | |
980 | if (product != dev->devtype->product) { |
981 | v4l2_err(&dev->v4l2_dev, |
982 | "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n" , |
983 | coda_product_name(dev->devtype->product), |
984 | coda_product_name(product), major, minor, release); |
985 | return -EINVAL; |
986 | } |
987 | |
988 | v4l2_info(&dev->v4l2_dev, "Initialized %s.\n" , |
989 | coda_product_name(product)); |
990 | |
991 | if (coda_firmware_supported(vernum: data)) { |
992 | v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n" , |
993 | major, minor, release); |
994 | } else { |
995 | v4l2_warn(&dev->v4l2_dev, |
996 | "Unsupported firmware version: %u.%u.%u\n" , |
997 | major, minor, release); |
998 | } |
999 | |
1000 | return 0; |
1001 | |
1002 | err_run_cmd: |
1003 | clk_disable_unprepare(clk: dev->clk_ahb); |
1004 | err_clk_ahb: |
1005 | clk_disable_unprepare(clk: dev->clk_per); |
1006 | err_clk_per: |
1007 | return ret; |
1008 | } |
1009 | |
1010 | static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc) |
1011 | { |
1012 | u32 cache_size, cache_config; |
1013 | |
1014 | if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) { |
1015 | /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */ |
1016 | cache_size = 0x20262024; |
1017 | cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET; |
1018 | } else { |
1019 | /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */ |
1020 | cache_size = 0x02440243; |
1021 | cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET; |
1022 | } |
1023 | coda_write(dev: ctx->dev, data: cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE); |
1024 | if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) { |
1025 | cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET | |
1026 | 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET | |
1027 | 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET; |
1028 | } else { |
1029 | cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET | |
1030 | 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET | |
1031 | 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET; |
1032 | } |
1033 | coda_write(dev: ctx->dev, data: cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG); |
1034 | } |
1035 | |
1036 | /* |
1037 | * Encoder context operations |
1038 | */ |
1039 | |
1040 | static int coda_encoder_reqbufs(struct coda_ctx *ctx, |
1041 | struct v4l2_requestbuffers *rb) |
1042 | { |
1043 | struct coda_q_data *q_data_src; |
1044 | int ret; |
1045 | |
1046 | if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) |
1047 | return 0; |
1048 | |
1049 | if (rb->count) { |
1050 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
1051 | ret = coda_alloc_context_buffers(ctx, q_data: q_data_src); |
1052 | if (ret < 0) |
1053 | return ret; |
1054 | } else { |
1055 | coda_free_context_buffers(ctx); |
1056 | } |
1057 | |
1058 | return 0; |
1059 | } |
1060 | |
1061 | static int coda_start_encoding(struct coda_ctx *ctx) |
1062 | { |
1063 | struct coda_dev *dev = ctx->dev; |
1064 | struct v4l2_device *v4l2_dev = &dev->v4l2_dev; |
1065 | struct coda_q_data *q_data_src, *q_data_dst; |
1066 | u32 bitstream_buf, bitstream_size; |
1067 | struct vb2_v4l2_buffer *buf; |
1068 | int gamma, ret, value; |
1069 | u32 dst_fourcc; |
1070 | int num_fb; |
1071 | u32 stride; |
1072 | |
1073 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
1074 | q_data_dst = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_CAPTURE); |
1075 | dst_fourcc = q_data_dst->fourcc; |
1076 | |
1077 | buf = v4l2_m2m_next_dst_buf(m2m_ctx: ctx->fh.m2m_ctx); |
1078 | bitstream_buf = vb2_dma_contig_plane_dma_addr(vb: &buf->vb2_buf, plane_no: 0); |
1079 | bitstream_size = q_data_dst->sizeimage; |
1080 | |
1081 | if (!coda_is_initialized(dev)) { |
1082 | v4l2_err(v4l2_dev, "coda is not initialized.\n" ); |
1083 | return -EFAULT; |
1084 | } |
1085 | |
1086 | if (dst_fourcc == V4L2_PIX_FMT_JPEG) { |
1087 | if (!ctx->params.jpeg_qmat_tab[0]) { |
1088 | ctx->params.jpeg_qmat_tab[0] = kmalloc(size: 64, GFP_KERNEL); |
1089 | if (!ctx->params.jpeg_qmat_tab[0]) |
1090 | return -ENOMEM; |
1091 | } |
1092 | if (!ctx->params.jpeg_qmat_tab[1]) { |
1093 | ctx->params.jpeg_qmat_tab[1] = kmalloc(size: 64, GFP_KERNEL); |
1094 | if (!ctx->params.jpeg_qmat_tab[1]) |
1095 | return -ENOMEM; |
1096 | } |
1097 | coda_set_jpeg_compression_quality(ctx, quality: ctx->params.jpeg_quality); |
1098 | } |
1099 | |
1100 | mutex_lock(&dev->coda_mutex); |
1101 | |
1102 | coda_write(dev, data: ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); |
1103 | coda_write(dev, data: bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); |
1104 | coda_write(dev, data: bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); |
1105 | switch (dev->devtype->product) { |
1106 | case CODA_DX6: |
1107 | coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN | |
1108 | CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL); |
1109 | break; |
1110 | case CODA_960: |
1111 | coda_write(dev, data: 0, CODA9_GDI_WPROT_RGN_EN); |
1112 | fallthrough; |
1113 | case CODA_HX4: |
1114 | case CODA_7541: |
1115 | coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN | |
1116 | CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL); |
1117 | break; |
1118 | } |
1119 | |
1120 | ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | |
1121 | CODA9_FRAME_TILED2LINEAR); |
1122 | if (q_data_src->fourcc == V4L2_PIX_FMT_NV12) |
1123 | ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; |
1124 | if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) |
1125 | ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR; |
1126 | coda_write(dev, data: ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL); |
1127 | |
1128 | if (dev->devtype->product == CODA_DX6) { |
1129 | /* Configure the coda */ |
1130 | coda_write(dev, data: dev->iram.paddr, |
1131 | CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR); |
1132 | } |
1133 | |
1134 | /* Could set rotation here if needed */ |
1135 | value = 0; |
1136 | switch (dev->devtype->product) { |
1137 | case CODA_DX6: |
1138 | value = (q_data_src->rect.width & CODADX6_PICWIDTH_MASK) |
1139 | << CODADX6_PICWIDTH_OFFSET; |
1140 | value |= (q_data_src->rect.height & CODADX6_PICHEIGHT_MASK) |
1141 | << CODA_PICHEIGHT_OFFSET; |
1142 | break; |
1143 | case CODA_HX4: |
1144 | case CODA_7541: |
1145 | if (dst_fourcc == V4L2_PIX_FMT_H264) { |
1146 | value = (round_up(q_data_src->rect.width, 16) & |
1147 | CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET; |
1148 | value |= (round_up(q_data_src->rect.height, 16) & |
1149 | CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET; |
1150 | break; |
1151 | } |
1152 | fallthrough; |
1153 | case CODA_960: |
1154 | value = (q_data_src->rect.width & CODA7_PICWIDTH_MASK) |
1155 | << CODA7_PICWIDTH_OFFSET; |
1156 | value |= (q_data_src->rect.height & CODA7_PICHEIGHT_MASK) |
1157 | << CODA_PICHEIGHT_OFFSET; |
1158 | } |
1159 | coda_write(dev, data: value, CODA_CMD_ENC_SEQ_SRC_SIZE); |
1160 | if (dst_fourcc == V4L2_PIX_FMT_JPEG) |
1161 | ctx->params.framerate = 0; |
1162 | coda_write(dev, data: ctx->params.framerate, |
1163 | CODA_CMD_ENC_SEQ_SRC_F_RATE); |
1164 | |
1165 | ctx->params.codec_mode = ctx->codec->mode; |
1166 | switch (dst_fourcc) { |
1167 | case V4L2_PIX_FMT_MPEG4: |
1168 | if (dev->devtype->product == CODA_960) |
1169 | coda_write(dev, CODA9_STD_MPEG4, |
1170 | CODA_CMD_ENC_SEQ_COD_STD); |
1171 | else |
1172 | coda_write(dev, CODA_STD_MPEG4, |
1173 | CODA_CMD_ENC_SEQ_COD_STD); |
1174 | coda_write(dev, data: 0, CODA_CMD_ENC_SEQ_MP4_PARA); |
1175 | break; |
1176 | case V4L2_PIX_FMT_H264: |
1177 | if (dev->devtype->product == CODA_960) |
1178 | coda_write(dev, CODA9_STD_H264, |
1179 | CODA_CMD_ENC_SEQ_COD_STD); |
1180 | else |
1181 | coda_write(dev, CODA_STD_H264, |
1182 | CODA_CMD_ENC_SEQ_COD_STD); |
1183 | value = ((ctx->params.h264_disable_deblocking_filter_idc & |
1184 | CODA_264PARAM_DISABLEDEBLK_MASK) << |
1185 | CODA_264PARAM_DISABLEDEBLK_OFFSET) | |
1186 | ((ctx->params.h264_slice_alpha_c0_offset_div2 & |
1187 | CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) << |
1188 | CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) | |
1189 | ((ctx->params.h264_slice_beta_offset_div2 & |
1190 | CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) << |
1191 | CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET) | |
1192 | (ctx->params.h264_constrained_intra_pred_flag << |
1193 | CODA_264PARAM_CONSTRAINEDINTRAPREDFLAG_OFFSET) | |
1194 | (ctx->params.h264_chroma_qp_index_offset & |
1195 | CODA_264PARAM_CHROMAQPOFFSET_MASK); |
1196 | coda_write(dev, data: value, CODA_CMD_ENC_SEQ_264_PARA); |
1197 | break; |
1198 | case V4L2_PIX_FMT_JPEG: |
1199 | coda_write(dev, data: 0, CODA_CMD_ENC_SEQ_JPG_PARA); |
1200 | coda_write(dev, data: ctx->params.jpeg_restart_interval, |
1201 | CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL); |
1202 | coda_write(dev, data: 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN); |
1203 | coda_write(dev, data: 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE); |
1204 | coda_write(dev, data: 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET); |
1205 | |
1206 | coda_jpeg_write_tables(ctx); |
1207 | break; |
1208 | default: |
1209 | v4l2_err(v4l2_dev, |
1210 | "dst format (0x%08x) invalid.\n" , dst_fourcc); |
1211 | ret = -EINVAL; |
1212 | goto out; |
1213 | } |
1214 | |
1215 | /* |
1216 | * slice mode and GOP size registers are used for thumb size/offset |
1217 | * in JPEG mode |
1218 | */ |
1219 | if (dst_fourcc != V4L2_PIX_FMT_JPEG) { |
1220 | value = coda_slice_mode(ctx); |
1221 | coda_write(dev, data: value, CODA_CMD_ENC_SEQ_SLICE_MODE); |
1222 | value = ctx->params.gop_size; |
1223 | coda_write(dev, data: value, CODA_CMD_ENC_SEQ_GOP_SIZE); |
1224 | } |
1225 | |
1226 | if (ctx->params.bitrate && (ctx->params.frame_rc_enable || |
1227 | ctx->params.mb_rc_enable)) { |
1228 | ctx->params.bitrate_changed = false; |
1229 | ctx->params.h264_intra_qp_changed = false; |
1230 | |
1231 | /* Rate control enabled */ |
1232 | value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK) |
1233 | << CODA_RATECONTROL_BITRATE_OFFSET; |
1234 | value |= 1 & CODA_RATECONTROL_ENABLE_MASK; |
1235 | value |= (ctx->params.vbv_delay & |
1236 | CODA_RATECONTROL_INITIALDELAY_MASK) |
1237 | << CODA_RATECONTROL_INITIALDELAY_OFFSET; |
1238 | if (dev->devtype->product == CODA_960) |
1239 | value |= BIT(31); /* disable autoskip */ |
1240 | } else { |
1241 | value = 0; |
1242 | } |
1243 | coda_write(dev, data: value, CODA_CMD_ENC_SEQ_RC_PARA); |
1244 | |
1245 | coda_write(dev, data: ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE); |
1246 | coda_write(dev, data: ctx->params.intra_refresh, |
1247 | CODA_CMD_ENC_SEQ_INTRA_REFRESH); |
1248 | |
1249 | coda_write(dev, data: bitstream_buf, CODA_CMD_ENC_SEQ_BB_START); |
1250 | coda_write(dev, data: bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE); |
1251 | |
1252 | |
1253 | value = 0; |
1254 | if (dev->devtype->product == CODA_960) |
1255 | gamma = CODA9_DEFAULT_GAMMA; |
1256 | else |
1257 | gamma = CODA_DEFAULT_GAMMA; |
1258 | if (gamma > 0) { |
1259 | coda_write(dev, data: (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET, |
1260 | CODA_CMD_ENC_SEQ_RC_GAMMA); |
1261 | } |
1262 | |
1263 | if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) { |
1264 | coda_write(dev, |
1265 | data: ctx->params.h264_min_qp << CODA_QPMIN_OFFSET | |
1266 | ctx->params.h264_max_qp << CODA_QPMAX_OFFSET, |
1267 | CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX); |
1268 | } |
1269 | if (dev->devtype->product == CODA_960) { |
1270 | if (ctx->params.h264_max_qp) |
1271 | value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET; |
1272 | if (CODA_DEFAULT_GAMMA > 0) |
1273 | value |= 1 << CODA9_OPTION_GAMMA_OFFSET; |
1274 | } else { |
1275 | if (CODA_DEFAULT_GAMMA > 0) { |
1276 | if (dev->devtype->product == CODA_DX6) |
1277 | value |= 1 << CODADX6_OPTION_GAMMA_OFFSET; |
1278 | else |
1279 | value |= 1 << CODA7_OPTION_GAMMA_OFFSET; |
1280 | } |
1281 | if (ctx->params.h264_min_qp) |
1282 | value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET; |
1283 | if (ctx->params.h264_max_qp) |
1284 | value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET; |
1285 | } |
1286 | coda_write(dev, data: value, CODA_CMD_ENC_SEQ_OPTION); |
1287 | |
1288 | if (ctx->params.frame_rc_enable && !ctx->params.mb_rc_enable) |
1289 | value = 1; |
1290 | else |
1291 | value = 0; |
1292 | coda_write(dev, data: value, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE); |
1293 | |
1294 | coda_setup_iram(ctx); |
1295 | |
1296 | if (dst_fourcc == V4L2_PIX_FMT_H264) { |
1297 | switch (dev->devtype->product) { |
1298 | case CODA_DX6: |
1299 | value = FMO_SLICE_SAVE_BUF_SIZE << 7; |
1300 | coda_write(dev, data: value, CODADX6_CMD_ENC_SEQ_FMO); |
1301 | break; |
1302 | case CODA_HX4: |
1303 | case CODA_7541: |
1304 | coda_write(dev, data: ctx->iram_info.search_ram_paddr, |
1305 | CODA7_CMD_ENC_SEQ_SEARCH_BASE); |
1306 | coda_write(dev, data: ctx->iram_info.search_ram_size, |
1307 | CODA7_CMD_ENC_SEQ_SEARCH_SIZE); |
1308 | break; |
1309 | case CODA_960: |
1310 | coda_write(dev, data: 0, CODA9_CMD_ENC_SEQ_ME_OPTION); |
1311 | coda_write(dev, data: 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT); |
1312 | } |
1313 | } |
1314 | |
1315 | ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT); |
1316 | if (ret < 0) { |
1317 | v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n" ); |
1318 | goto out; |
1319 | } |
1320 | |
1321 | if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) { |
1322 | v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n" ); |
1323 | ret = -EFAULT; |
1324 | goto out; |
1325 | } |
1326 | ctx->initialized = 1; |
1327 | |
1328 | if (dst_fourcc != V4L2_PIX_FMT_JPEG) { |
1329 | if (dev->devtype->product == CODA_960) |
1330 | ctx->num_internal_frames = 4; |
1331 | else |
1332 | ctx->num_internal_frames = 2; |
1333 | ret = coda_alloc_framebuffers(ctx, q_data: q_data_src, fourcc: dst_fourcc); |
1334 | if (ret < 0) { |
1335 | v4l2_err(v4l2_dev, "failed to allocate framebuffers\n" ); |
1336 | goto out; |
1337 | } |
1338 | num_fb = 2; |
1339 | stride = q_data_src->bytesperline; |
1340 | } else { |
1341 | ctx->num_internal_frames = 0; |
1342 | num_fb = 0; |
1343 | stride = 0; |
1344 | } |
1345 | coda_write(dev, data: num_fb, CODA_CMD_SET_FRAME_BUF_NUM); |
1346 | coda_write(dev, data: stride, CODA_CMD_SET_FRAME_BUF_STRIDE); |
1347 | |
1348 | if (dev->devtype->product == CODA_HX4 || |
1349 | dev->devtype->product == CODA_7541) { |
1350 | coda_write(dev, data: q_data_src->bytesperline, |
1351 | CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE); |
1352 | } |
1353 | if (dev->devtype->product != CODA_DX6) { |
1354 | coda_write(dev, data: ctx->iram_info.buf_bit_use, |
1355 | CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); |
1356 | coda_write(dev, data: ctx->iram_info.buf_ip_ac_dc_use, |
1357 | CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); |
1358 | coda_write(dev, data: ctx->iram_info.buf_dbk_y_use, |
1359 | CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); |
1360 | coda_write(dev, data: ctx->iram_info.buf_dbk_c_use, |
1361 | CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); |
1362 | coda_write(dev, data: ctx->iram_info.buf_ovl_use, |
1363 | CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); |
1364 | if (dev->devtype->product == CODA_960) { |
1365 | coda_write(dev, data: ctx->iram_info.buf_btp_use, |
1366 | CODA9_CMD_SET_FRAME_AXI_BTP_ADDR); |
1367 | |
1368 | coda9_set_frame_cache(ctx, fourcc: q_data_src->fourcc); |
1369 | |
1370 | /* FIXME */ |
1371 | coda_write(dev, data: ctx->internal_frames[2].buf.paddr, |
1372 | CODA9_CMD_SET_FRAME_SUBSAMP_A); |
1373 | coda_write(dev, data: ctx->internal_frames[3].buf.paddr, |
1374 | CODA9_CMD_SET_FRAME_SUBSAMP_B); |
1375 | } |
1376 | } |
1377 | |
1378 | ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF); |
1379 | if (ret < 0) { |
1380 | v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n" ); |
1381 | goto out; |
1382 | } |
1383 | |
1384 | coda_dbg(1, ctx, "start encoding %dx%d %4.4s->%4.4s @ %d/%d Hz\n" , |
1385 | q_data_src->rect.width, q_data_src->rect.height, |
1386 | (char *)&ctx->codec->src_fourcc, (char *)&dst_fourcc, |
1387 | ctx->params.framerate & 0xffff, |
1388 | (ctx->params.framerate >> 16) + 1); |
1389 | |
1390 | /* Save stream headers */ |
1391 | buf = v4l2_m2m_next_dst_buf(m2m_ctx: ctx->fh.m2m_ctx); |
1392 | switch (dst_fourcc) { |
1393 | case V4L2_PIX_FMT_H264: |
1394 | /* |
1395 | * Get SPS in the first frame and copy it to an |
1396 | * intermediate buffer. |
1397 | */ |
1398 | ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS, |
1399 | header: &ctx->vpu_header[0][0], |
1400 | size: &ctx->vpu_header_size[0]); |
1401 | if (ret < 0) |
1402 | goto out; |
1403 | |
1404 | /* |
1405 | * If visible width or height are not aligned to macroblock |
1406 | * size, the crop_right and crop_bottom SPS fields must be set |
1407 | * to the difference between visible and coded size. This is |
1408 | * only supported by CODA960 firmware. All others do not allow |
1409 | * writing frame cropping parameters, so we have to manually |
1410 | * fix up the SPS RBSP (Sequence Parameter Set Raw Byte |
1411 | * Sequence Payload) ourselves. |
1412 | */ |
1413 | if (ctx->dev->devtype->product != CODA_960 && |
1414 | ((q_data_src->rect.width % 16) || |
1415 | (q_data_src->rect.height % 16))) { |
1416 | ret = coda_h264_sps_fixup(ctx, width: q_data_src->rect.width, |
1417 | height: q_data_src->rect.height, |
1418 | buf: &ctx->vpu_header[0][0], |
1419 | size: &ctx->vpu_header_size[0], |
1420 | max_size: sizeof(ctx->vpu_header[0])); |
1421 | if (ret < 0) |
1422 | goto out; |
1423 | } |
1424 | |
1425 | /* |
1426 | * Get PPS in the first frame and copy it to an |
1427 | * intermediate buffer. |
1428 | */ |
1429 | ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS, |
1430 | header: &ctx->vpu_header[1][0], |
1431 | size: &ctx->vpu_header_size[1]); |
1432 | if (ret < 0) |
1433 | goto out; |
1434 | |
1435 | /* |
1436 | * Length of H.264 headers is variable and thus it might not be |
1437 | * aligned for the coda to append the encoded frame. In that is |
1438 | * the case a filler NAL must be added to header 2. |
1439 | */ |
1440 | ctx->vpu_header_size[2] = coda_h264_padding( |
1441 | size: (ctx->vpu_header_size[0] + |
1442 | ctx->vpu_header_size[1]), |
1443 | p: ctx->vpu_header[2]); |
1444 | break; |
1445 | case V4L2_PIX_FMT_MPEG4: |
1446 | /* |
1447 | * Get VOS in the first frame and copy it to an |
1448 | * intermediate buffer |
1449 | */ |
1450 | ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS, |
1451 | header: &ctx->vpu_header[0][0], |
1452 | size: &ctx->vpu_header_size[0]); |
1453 | if (ret < 0) |
1454 | goto out; |
1455 | |
1456 | ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS, |
1457 | header: &ctx->vpu_header[1][0], |
1458 | size: &ctx->vpu_header_size[1]); |
1459 | if (ret < 0) |
1460 | goto out; |
1461 | |
1462 | ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL, |
1463 | header: &ctx->vpu_header[2][0], |
1464 | size: &ctx->vpu_header_size[2]); |
1465 | if (ret < 0) |
1466 | goto out; |
1467 | break; |
1468 | default: |
1469 | /* No more formats need to save headers at the moment */ |
1470 | break; |
1471 | } |
1472 | |
1473 | out: |
1474 | mutex_unlock(lock: &dev->coda_mutex); |
1475 | return ret; |
1476 | } |
1477 | |
1478 | static int coda_prepare_encode(struct coda_ctx *ctx) |
1479 | { |
1480 | struct coda_q_data *q_data_src, *q_data_dst; |
1481 | struct vb2_v4l2_buffer *src_buf, *dst_buf; |
1482 | struct coda_dev *dev = ctx->dev; |
1483 | int force_ipicture; |
1484 | int quant_param = 0; |
1485 | u32 pic_stream_buffer_addr, pic_stream_buffer_size; |
1486 | u32 rot_mode = 0; |
1487 | u32 dst_fourcc; |
1488 | u32 reg; |
1489 | int ret; |
1490 | |
1491 | ret = coda_enc_param_change(ctx); |
1492 | if (ret < 0) { |
1493 | v4l2_warn(&ctx->dev->v4l2_dev, "parameter change failed: %d\n" , |
1494 | ret); |
1495 | } |
1496 | |
1497 | src_buf = v4l2_m2m_next_src_buf(m2m_ctx: ctx->fh.m2m_ctx); |
1498 | dst_buf = v4l2_m2m_next_dst_buf(m2m_ctx: ctx->fh.m2m_ctx); |
1499 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
1500 | q_data_dst = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_CAPTURE); |
1501 | dst_fourcc = q_data_dst->fourcc; |
1502 | |
1503 | src_buf->sequence = ctx->osequence; |
1504 | dst_buf->sequence = ctx->osequence; |
1505 | ctx->osequence++; |
1506 | |
1507 | force_ipicture = ctx->params.force_ipicture; |
1508 | if (force_ipicture) |
1509 | ctx->params.force_ipicture = false; |
1510 | else if (ctx->params.gop_size != 0 && |
1511 | (src_buf->sequence % ctx->params.gop_size) == 0) |
1512 | force_ipicture = 1; |
1513 | |
1514 | /* |
1515 | * Workaround coda firmware BUG that only marks the first |
1516 | * frame as IDR. This is a problem for some decoders that can't |
1517 | * recover when a frame is lost. |
1518 | */ |
1519 | if (!force_ipicture) { |
1520 | src_buf->flags |= V4L2_BUF_FLAG_PFRAME; |
1521 | src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME; |
1522 | } else { |
1523 | src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME; |
1524 | src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME; |
1525 | } |
1526 | |
1527 | if (dev->devtype->product == CODA_960) |
1528 | coda_set_gdi_regs(ctx); |
1529 | |
1530 | /* |
1531 | * Copy headers in front of the first frame and forced I frames for |
1532 | * H.264 only. In MPEG4 they are already copied by the CODA. |
1533 | */ |
1534 | if (src_buf->sequence == 0 || force_ipicture) { |
1535 | pic_stream_buffer_addr = |
1536 | vb2_dma_contig_plane_dma_addr(vb: &dst_buf->vb2_buf, plane_no: 0) + |
1537 | ctx->vpu_header_size[0] + |
1538 | ctx->vpu_header_size[1] + |
1539 | ctx->vpu_header_size[2]; |
1540 | pic_stream_buffer_size = q_data_dst->sizeimage - |
1541 | ctx->vpu_header_size[0] - |
1542 | ctx->vpu_header_size[1] - |
1543 | ctx->vpu_header_size[2]; |
1544 | memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0), |
1545 | &ctx->vpu_header[0][0], ctx->vpu_header_size[0]); |
1546 | memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) |
1547 | + ctx->vpu_header_size[0], &ctx->vpu_header[1][0], |
1548 | ctx->vpu_header_size[1]); |
1549 | memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) |
1550 | + ctx->vpu_header_size[0] + ctx->vpu_header_size[1], |
1551 | &ctx->vpu_header[2][0], ctx->vpu_header_size[2]); |
1552 | } else { |
1553 | pic_stream_buffer_addr = |
1554 | vb2_dma_contig_plane_dma_addr(vb: &dst_buf->vb2_buf, plane_no: 0); |
1555 | pic_stream_buffer_size = q_data_dst->sizeimage; |
1556 | } |
1557 | |
1558 | if (force_ipicture) { |
1559 | switch (dst_fourcc) { |
1560 | case V4L2_PIX_FMT_H264: |
1561 | quant_param = ctx->params.h264_intra_qp; |
1562 | break; |
1563 | case V4L2_PIX_FMT_MPEG4: |
1564 | quant_param = ctx->params.mpeg4_intra_qp; |
1565 | break; |
1566 | case V4L2_PIX_FMT_JPEG: |
1567 | quant_param = 30; |
1568 | break; |
1569 | default: |
1570 | v4l2_warn(&ctx->dev->v4l2_dev, |
1571 | "cannot set intra qp, fmt not supported\n" ); |
1572 | break; |
1573 | } |
1574 | } else { |
1575 | switch (dst_fourcc) { |
1576 | case V4L2_PIX_FMT_H264: |
1577 | quant_param = ctx->params.h264_inter_qp; |
1578 | break; |
1579 | case V4L2_PIX_FMT_MPEG4: |
1580 | quant_param = ctx->params.mpeg4_inter_qp; |
1581 | break; |
1582 | default: |
1583 | v4l2_warn(&ctx->dev->v4l2_dev, |
1584 | "cannot set inter qp, fmt not supported\n" ); |
1585 | break; |
1586 | } |
1587 | } |
1588 | |
1589 | /* submit */ |
1590 | if (ctx->params.rot_mode) |
1591 | rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode; |
1592 | coda_write(dev, data: rot_mode, CODA_CMD_ENC_PIC_ROT_MODE); |
1593 | coda_write(dev, data: quant_param, CODA_CMD_ENC_PIC_QS); |
1594 | |
1595 | if (dev->devtype->product == CODA_960) { |
1596 | coda_write(dev, data: 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX); |
1597 | coda_write(dev, data: q_data_src->bytesperline, |
1598 | CODA9_CMD_ENC_PIC_SRC_STRIDE); |
1599 | coda_write(dev, data: 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC); |
1600 | |
1601 | reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y; |
1602 | } else { |
1603 | reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y; |
1604 | } |
1605 | coda_write_base(ctx, q_data: q_data_src, buf: src_buf, reg_y: reg); |
1606 | |
1607 | coda_write(dev, data: force_ipicture << 1 & 0x2, |
1608 | CODA_CMD_ENC_PIC_OPTION); |
1609 | |
1610 | coda_write(dev, data: pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START); |
1611 | coda_write(dev, data: pic_stream_buffer_size / 1024, |
1612 | CODA_CMD_ENC_PIC_BB_SIZE); |
1613 | |
1614 | if (!ctx->streamon_out) { |
1615 | /* After streamoff on the output side, set stream end flag */ |
1616 | ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; |
1617 | coda_write(dev, data: ctx->bit_stream_param, |
1618 | CODA_REG_BIT_BIT_STREAM_PARAM); |
1619 | } |
1620 | |
1621 | if (dev->devtype->product != CODA_DX6) |
1622 | coda_write(dev, data: ctx->iram_info.axi_sram_use, |
1623 | CODA7_REG_BIT_AXI_SRAM_USE); |
1624 | |
1625 | trace_coda_enc_pic_run(ctx, buf: src_buf); |
1626 | |
1627 | coda_command_async(ctx, CODA_COMMAND_PIC_RUN); |
1628 | |
1629 | return 0; |
1630 | } |
1631 | |
1632 | static char coda_frame_type_char(u32 flags) |
1633 | { |
1634 | return (flags & V4L2_BUF_FLAG_KEYFRAME) ? 'I' : |
1635 | (flags & V4L2_BUF_FLAG_PFRAME) ? 'P' : |
1636 | (flags & V4L2_BUF_FLAG_BFRAME) ? 'B' : '?'; |
1637 | } |
1638 | |
1639 | static void coda_finish_encode(struct coda_ctx *ctx) |
1640 | { |
1641 | struct vb2_v4l2_buffer *src_buf, *dst_buf; |
1642 | struct coda_dev *dev = ctx->dev; |
1643 | u32 wr_ptr, start_ptr; |
1644 | |
1645 | if (ctx->aborting) |
1646 | return; |
1647 | |
1648 | /* |
1649 | * Lock to make sure that an encoder stop command running in parallel |
1650 | * will either already have marked src_buf as last, or it will wake up |
1651 | * the capture queue after the buffers are returned. |
1652 | */ |
1653 | mutex_lock(&ctx->wakeup_mutex); |
1654 | src_buf = v4l2_m2m_src_buf_remove(m2m_ctx: ctx->fh.m2m_ctx); |
1655 | dst_buf = v4l2_m2m_next_dst_buf(m2m_ctx: ctx->fh.m2m_ctx); |
1656 | |
1657 | trace_coda_enc_pic_done(ctx, buf: dst_buf); |
1658 | |
1659 | /* Get results from the coda */ |
1660 | start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START); |
1661 | wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); |
1662 | |
1663 | /* Calculate bytesused field */ |
1664 | if (dst_buf->sequence == 0 || |
1665 | src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) { |
1666 | vb2_set_plane_payload(vb: &dst_buf->vb2_buf, plane_no: 0, size: wr_ptr - start_ptr + |
1667 | ctx->vpu_header_size[0] + |
1668 | ctx->vpu_header_size[1] + |
1669 | ctx->vpu_header_size[2]); |
1670 | } else { |
1671 | vb2_set_plane_payload(vb: &dst_buf->vb2_buf, plane_no: 0, size: wr_ptr - start_ptr); |
1672 | } |
1673 | |
1674 | coda_dbg(1, ctx, "frame size = %u\n" , wr_ptr - start_ptr); |
1675 | |
1676 | coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM); |
1677 | coda_read(dev, CODA_RET_ENC_PIC_FLAG); |
1678 | |
1679 | dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME | |
1680 | V4L2_BUF_FLAG_PFRAME | |
1681 | V4L2_BUF_FLAG_LAST); |
1682 | if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) |
1683 | dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME; |
1684 | else |
1685 | dst_buf->flags |= V4L2_BUF_FLAG_PFRAME; |
1686 | dst_buf->flags |= src_buf->flags & V4L2_BUF_FLAG_LAST; |
1687 | |
1688 | v4l2_m2m_buf_copy_metadata(out_vb: src_buf, cap_vb: dst_buf, copy_frame_flags: false); |
1689 | |
1690 | v4l2_m2m_buf_done(buf: src_buf, state: VB2_BUF_STATE_DONE); |
1691 | |
1692 | dst_buf = v4l2_m2m_dst_buf_remove(m2m_ctx: ctx->fh.m2m_ctx); |
1693 | coda_m2m_buf_done(ctx, buf: dst_buf, state: VB2_BUF_STATE_DONE); |
1694 | mutex_unlock(lock: &ctx->wakeup_mutex); |
1695 | |
1696 | ctx->gopcounter--; |
1697 | if (ctx->gopcounter < 0) |
1698 | ctx->gopcounter = ctx->params.gop_size - 1; |
1699 | |
1700 | coda_dbg(1, ctx, "job finished: encoded %c frame (%d)%s\n" , |
1701 | coda_frame_type_char(dst_buf->flags), dst_buf->sequence, |
1702 | (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? " (last)" : "" ); |
1703 | } |
1704 | |
1705 | static void coda_seq_end_work(struct work_struct *work) |
1706 | { |
1707 | struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work); |
1708 | struct coda_dev *dev = ctx->dev; |
1709 | |
1710 | mutex_lock(&ctx->buffer_mutex); |
1711 | mutex_lock(&dev->coda_mutex); |
1712 | |
1713 | if (ctx->initialized == 0) |
1714 | goto out; |
1715 | |
1716 | coda_dbg(1, ctx, "%s: sent command 'SEQ_END' to coda\n" , __func__); |
1717 | if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) { |
1718 | v4l2_err(&dev->v4l2_dev, |
1719 | "CODA_COMMAND_SEQ_END failed\n" ); |
1720 | } |
1721 | |
1722 | /* |
1723 | * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing |
1724 | * from the output stream after the h.264 decoder has run. Resetting the |
1725 | * hardware after the decoder has finished seems to help. |
1726 | */ |
1727 | if (dev->devtype->product == CODA_960) |
1728 | coda_hw_reset(ctx); |
1729 | |
1730 | kfifo_init(&ctx->bitstream_fifo, |
1731 | ctx->bitstream.vaddr, ctx->bitstream.size); |
1732 | |
1733 | coda_free_framebuffers(ctx); |
1734 | |
1735 | ctx->initialized = 0; |
1736 | |
1737 | out: |
1738 | mutex_unlock(lock: &dev->coda_mutex); |
1739 | mutex_unlock(lock: &ctx->buffer_mutex); |
1740 | } |
1741 | |
1742 | static void coda_bit_release(struct coda_ctx *ctx) |
1743 | { |
1744 | mutex_lock(&ctx->buffer_mutex); |
1745 | coda_free_framebuffers(ctx); |
1746 | coda_free_context_buffers(ctx); |
1747 | coda_free_bitstream_buffer(ctx); |
1748 | mutex_unlock(lock: &ctx->buffer_mutex); |
1749 | } |
1750 | |
1751 | const struct coda_context_ops coda_bit_encode_ops = { |
1752 | .queue_init = coda_encoder_queue_init, |
1753 | .reqbufs = coda_encoder_reqbufs, |
1754 | .start_streaming = coda_start_encoding, |
1755 | .prepare_run = coda_prepare_encode, |
1756 | .finish_run = coda_finish_encode, |
1757 | .seq_end_work = coda_seq_end_work, |
1758 | .release = coda_bit_release, |
1759 | }; |
1760 | |
1761 | /* |
1762 | * Decoder context operations |
1763 | */ |
1764 | |
1765 | static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx, |
1766 | struct coda_q_data *q_data) |
1767 | { |
1768 | if (ctx->bitstream.vaddr) |
1769 | return 0; |
1770 | |
1771 | ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2); |
1772 | ctx->bitstream.vaddr = dma_alloc_wc(dev: ctx->dev->dev, size: ctx->bitstream.size, |
1773 | dma_addr: &ctx->bitstream.paddr, GFP_KERNEL); |
1774 | if (!ctx->bitstream.vaddr) { |
1775 | v4l2_err(&ctx->dev->v4l2_dev, |
1776 | "failed to allocate bitstream ringbuffer" ); |
1777 | return -ENOMEM; |
1778 | } |
1779 | kfifo_init(&ctx->bitstream_fifo, |
1780 | ctx->bitstream.vaddr, ctx->bitstream.size); |
1781 | |
1782 | return 0; |
1783 | } |
1784 | |
1785 | static void coda_free_bitstream_buffer(struct coda_ctx *ctx) |
1786 | { |
1787 | if (ctx->bitstream.vaddr == NULL) |
1788 | return; |
1789 | |
1790 | dma_free_wc(dev: ctx->dev->dev, size: ctx->bitstream.size, cpu_addr: ctx->bitstream.vaddr, |
1791 | dma_addr: ctx->bitstream.paddr); |
1792 | ctx->bitstream.vaddr = NULL; |
1793 | kfifo_init(&ctx->bitstream_fifo, NULL, 0); |
1794 | } |
1795 | |
1796 | static int coda_decoder_reqbufs(struct coda_ctx *ctx, |
1797 | struct v4l2_requestbuffers *rb) |
1798 | { |
1799 | struct coda_q_data *q_data_src; |
1800 | int ret; |
1801 | |
1802 | if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) |
1803 | return 0; |
1804 | |
1805 | if (rb->count) { |
1806 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
1807 | ret = coda_alloc_context_buffers(ctx, q_data: q_data_src); |
1808 | if (ret < 0) |
1809 | return ret; |
1810 | ret = coda_alloc_bitstream_buffer(ctx, q_data: q_data_src); |
1811 | if (ret < 0) { |
1812 | coda_free_context_buffers(ctx); |
1813 | return ret; |
1814 | } |
1815 | } else { |
1816 | coda_free_bitstream_buffer(ctx); |
1817 | coda_free_context_buffers(ctx); |
1818 | } |
1819 | |
1820 | return 0; |
1821 | } |
1822 | |
1823 | static bool coda_reorder_enable(struct coda_ctx *ctx) |
1824 | { |
1825 | struct coda_dev *dev = ctx->dev; |
1826 | int profile; |
1827 | |
1828 | if (dev->devtype->product != CODA_HX4 && |
1829 | dev->devtype->product != CODA_7541 && |
1830 | dev->devtype->product != CODA_960) |
1831 | return false; |
1832 | |
1833 | if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) |
1834 | return false; |
1835 | |
1836 | if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264) |
1837 | return true; |
1838 | |
1839 | profile = coda_h264_profile(profile_idc: ctx->params.h264_profile_idc); |
1840 | if (profile < 0) |
1841 | v4l2_warn(&dev->v4l2_dev, "Unknown H264 Profile: %u\n" , |
1842 | ctx->params.h264_profile_idc); |
1843 | |
1844 | /* Baseline profile does not support reordering */ |
1845 | return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; |
1846 | } |
1847 | |
1848 | static void coda_decoder_drop_used_metas(struct coda_ctx *ctx) |
1849 | { |
1850 | struct coda_buffer_meta *meta, *tmp; |
1851 | |
1852 | /* |
1853 | * All metas that end at or before the RD pointer (fifo out), |
1854 | * are now consumed by the VPU and should be released. |
1855 | */ |
1856 | spin_lock(lock: &ctx->buffer_meta_lock); |
1857 | list_for_each_entry_safe(meta, tmp, &ctx->buffer_meta_list, list) { |
1858 | if (ctx->bitstream_fifo.kfifo.out >= meta->end) { |
1859 | coda_dbg(2, ctx, "releasing meta: seq=%d start=%d end=%d\n" , |
1860 | meta->sequence, meta->start, meta->end); |
1861 | |
1862 | list_del(entry: &meta->list); |
1863 | ctx->num_metas--; |
1864 | ctx->first_frame_sequence++; |
1865 | kfree(objp: meta); |
1866 | } |
1867 | } |
1868 | spin_unlock(lock: &ctx->buffer_meta_lock); |
1869 | } |
1870 | |
1871 | static int __coda_decoder_seq_init(struct coda_ctx *ctx) |
1872 | { |
1873 | struct coda_q_data *q_data_src, *q_data_dst; |
1874 | u32 bitstream_buf, bitstream_size; |
1875 | struct coda_dev *dev = ctx->dev; |
1876 | int width, height; |
1877 | u32 src_fourcc, dst_fourcc; |
1878 | u32 val; |
1879 | int ret; |
1880 | |
1881 | lockdep_assert_held(&dev->coda_mutex); |
1882 | |
1883 | coda_dbg(1, ctx, "Video Data Order Adapter: %s\n" , |
1884 | ctx->use_vdoa ? "Enabled" : "Disabled" ); |
1885 | |
1886 | /* Start decoding */ |
1887 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
1888 | q_data_dst = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_CAPTURE); |
1889 | bitstream_buf = ctx->bitstream.paddr; |
1890 | bitstream_size = ctx->bitstream.size; |
1891 | src_fourcc = q_data_src->fourcc; |
1892 | dst_fourcc = q_data_dst->fourcc; |
1893 | |
1894 | /* Update coda bitstream read and write pointers from kfifo */ |
1895 | coda_kfifo_sync_to_device_full(ctx); |
1896 | |
1897 | ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | |
1898 | CODA9_FRAME_TILED2LINEAR); |
1899 | if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) |
1900 | ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; |
1901 | if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) |
1902 | ctx->frame_mem_ctrl |= (0x3 << 9) | |
1903 | ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); |
1904 | coda_write(dev, data: ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL); |
1905 | |
1906 | ctx->display_idx = -1; |
1907 | ctx->frm_dis_flg = 0; |
1908 | coda_write(dev, data: 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); |
1909 | |
1910 | coda_write(dev, data: bitstream_buf, CODA_CMD_DEC_SEQ_BB_START); |
1911 | coda_write(dev, data: bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE); |
1912 | val = 0; |
1913 | if (coda_reorder_enable(ctx)) |
1914 | val |= CODA_REORDER_ENABLE; |
1915 | if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) |
1916 | val |= CODA_NO_INT_ENABLE; |
1917 | coda_write(dev, data: val, CODA_CMD_DEC_SEQ_OPTION); |
1918 | |
1919 | ctx->params.codec_mode = ctx->codec->mode; |
1920 | if (dev->devtype->product == CODA_960 && |
1921 | src_fourcc == V4L2_PIX_FMT_MPEG4) |
1922 | ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4; |
1923 | else |
1924 | ctx->params.codec_mode_aux = 0; |
1925 | if (src_fourcc == V4L2_PIX_FMT_MPEG4) { |
1926 | coda_write(dev, CODA_MP4_CLASS_MPEG4, |
1927 | CODA_CMD_DEC_SEQ_MP4_ASP_CLASS); |
1928 | } |
1929 | if (src_fourcc == V4L2_PIX_FMT_H264) { |
1930 | if (dev->devtype->product == CODA_HX4 || |
1931 | dev->devtype->product == CODA_7541) { |
1932 | coda_write(dev, data: ctx->psbuf.paddr, |
1933 | CODA_CMD_DEC_SEQ_PS_BB_START); |
1934 | coda_write(dev, data: (CODA7_PS_BUF_SIZE / 1024), |
1935 | CODA_CMD_DEC_SEQ_PS_BB_SIZE); |
1936 | } |
1937 | if (dev->devtype->product == CODA_960) { |
1938 | coda_write(dev, data: 0, CODA_CMD_DEC_SEQ_X264_MV_EN); |
1939 | coda_write(dev, data: 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE); |
1940 | } |
1941 | } |
1942 | if (src_fourcc == V4L2_PIX_FMT_JPEG) |
1943 | coda_write(dev, data: 0, CODA_CMD_DEC_SEQ_JPG_THUMB_EN); |
1944 | if (dev->devtype->product != CODA_960) |
1945 | coda_write(dev, data: 0, CODA_CMD_DEC_SEQ_SRC_SIZE); |
1946 | |
1947 | ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE; |
1948 | ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT); |
1949 | ctx->bit_stream_param = 0; |
1950 | if (ret) { |
1951 | v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n" ); |
1952 | return ret; |
1953 | } |
1954 | ctx->sequence_offset = ~0U; |
1955 | ctx->initialized = 1; |
1956 | ctx->first_frame_sequence = 0; |
1957 | |
1958 | /* Update kfifo out pointer from coda bitstream read pointer */ |
1959 | coda_kfifo_sync_from_device(ctx); |
1960 | |
1961 | /* |
1962 | * After updating the read pointer, we need to check if |
1963 | * any metas are consumed and should be released. |
1964 | */ |
1965 | coda_decoder_drop_used_metas(ctx); |
1966 | |
1967 | if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) { |
1968 | v4l2_err(&dev->v4l2_dev, |
1969 | "CODA_COMMAND_SEQ_INIT failed, error code = 0x%x\n" , |
1970 | coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON)); |
1971 | return -EAGAIN; |
1972 | } |
1973 | |
1974 | val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE); |
1975 | if (dev->devtype->product == CODA_DX6) { |
1976 | width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK; |
1977 | height = val & CODADX6_PICHEIGHT_MASK; |
1978 | } else { |
1979 | width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK; |
1980 | height = val & CODA7_PICHEIGHT_MASK; |
1981 | } |
1982 | |
1983 | if (width > q_data_dst->bytesperline || height > q_data_dst->height) { |
1984 | v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n" , |
1985 | width, height, q_data_dst->bytesperline, |
1986 | q_data_dst->height); |
1987 | return -EINVAL; |
1988 | } |
1989 | |
1990 | width = round_up(width, 16); |
1991 | height = round_up(height, 16); |
1992 | |
1993 | coda_dbg(1, ctx, "start decoding: %dx%d\n" , width, height); |
1994 | |
1995 | ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED); |
1996 | /* |
1997 | * If the VDOA is used, the decoder needs one additional frame, |
1998 | * because the frames are freed when the next frame is decoded. |
1999 | * Otherwise there are visible errors in the decoded frames (green |
2000 | * regions in displayed frames) and a broken order of frames (earlier |
2001 | * frames are sporadically displayed after later frames). |
2002 | */ |
2003 | if (ctx->use_vdoa) |
2004 | ctx->num_internal_frames += 1; |
2005 | if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) { |
2006 | v4l2_err(&dev->v4l2_dev, |
2007 | "not enough framebuffers to decode (%d < %d)\n" , |
2008 | CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames); |
2009 | return -EINVAL; |
2010 | } |
2011 | |
2012 | if (src_fourcc == V4L2_PIX_FMT_H264) { |
2013 | u32 left_right; |
2014 | u32 top_bottom; |
2015 | |
2016 | left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT); |
2017 | top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM); |
2018 | |
2019 | q_data_dst->rect.left = (left_right >> 10) & 0x3ff; |
2020 | q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff; |
2021 | q_data_dst->rect.width = width - q_data_dst->rect.left - |
2022 | (left_right & 0x3ff); |
2023 | q_data_dst->rect.height = height - q_data_dst->rect.top - |
2024 | (top_bottom & 0x3ff); |
2025 | } |
2026 | |
2027 | if (dev->devtype->product != CODA_DX6) { |
2028 | u8 profile, level; |
2029 | |
2030 | val = coda_read(dev, CODA7_RET_DEC_SEQ_HEADER_REPORT); |
2031 | profile = val & 0xff; |
2032 | level = (val >> 8) & 0x7f; |
2033 | |
2034 | if (profile || level) |
2035 | coda_update_profile_level_ctrls(ctx, profile_idc: profile, level_idc: level); |
2036 | } |
2037 | |
2038 | return 0; |
2039 | } |
2040 | |
2041 | static void coda_dec_seq_init_work(struct work_struct *work) |
2042 | { |
2043 | struct coda_ctx *ctx = container_of(work, |
2044 | struct coda_ctx, seq_init_work); |
2045 | struct coda_dev *dev = ctx->dev; |
2046 | |
2047 | mutex_lock(&ctx->buffer_mutex); |
2048 | mutex_lock(&dev->coda_mutex); |
2049 | |
2050 | if (!ctx->initialized) |
2051 | __coda_decoder_seq_init(ctx); |
2052 | |
2053 | mutex_unlock(lock: &dev->coda_mutex); |
2054 | mutex_unlock(lock: &ctx->buffer_mutex); |
2055 | } |
2056 | |
2057 | static int __coda_start_decoding(struct coda_ctx *ctx) |
2058 | { |
2059 | struct coda_q_data *q_data_src, *q_data_dst; |
2060 | struct coda_dev *dev = ctx->dev; |
2061 | u32 src_fourcc, dst_fourcc; |
2062 | int ret; |
2063 | |
2064 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
2065 | q_data_dst = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_CAPTURE); |
2066 | src_fourcc = q_data_src->fourcc; |
2067 | dst_fourcc = q_data_dst->fourcc; |
2068 | |
2069 | if (!ctx->initialized) { |
2070 | ret = __coda_decoder_seq_init(ctx); |
2071 | if (ret < 0) |
2072 | return ret; |
2073 | } else { |
2074 | ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | |
2075 | CODA9_FRAME_TILED2LINEAR); |
2076 | if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) |
2077 | ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; |
2078 | if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) |
2079 | ctx->frame_mem_ctrl |= (0x3 << 9) | |
2080 | ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); |
2081 | } |
2082 | |
2083 | coda_write(dev, data: ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); |
2084 | |
2085 | ret = coda_alloc_framebuffers(ctx, q_data: q_data_dst, fourcc: src_fourcc); |
2086 | if (ret < 0) { |
2087 | v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n" ); |
2088 | return ret; |
2089 | } |
2090 | |
2091 | /* Tell the decoder how many frame buffers we allocated. */ |
2092 | coda_write(dev, data: ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM); |
2093 | coda_write(dev, round_up(q_data_dst->rect.width, 16), |
2094 | CODA_CMD_SET_FRAME_BUF_STRIDE); |
2095 | |
2096 | if (dev->devtype->product != CODA_DX6) { |
2097 | /* Set secondary AXI IRAM */ |
2098 | coda_setup_iram(ctx); |
2099 | |
2100 | coda_write(dev, data: ctx->iram_info.buf_bit_use, |
2101 | CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); |
2102 | coda_write(dev, data: ctx->iram_info.buf_ip_ac_dc_use, |
2103 | CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); |
2104 | coda_write(dev, data: ctx->iram_info.buf_dbk_y_use, |
2105 | CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); |
2106 | coda_write(dev, data: ctx->iram_info.buf_dbk_c_use, |
2107 | CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); |
2108 | coda_write(dev, data: ctx->iram_info.buf_ovl_use, |
2109 | CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); |
2110 | if (dev->devtype->product == CODA_960) { |
2111 | coda_write(dev, data: ctx->iram_info.buf_btp_use, |
2112 | CODA9_CMD_SET_FRAME_AXI_BTP_ADDR); |
2113 | |
2114 | coda_write(dev, data: -1, CODA9_CMD_SET_FRAME_DELAY); |
2115 | coda9_set_frame_cache(ctx, fourcc: dst_fourcc); |
2116 | } |
2117 | } |
2118 | |
2119 | if (src_fourcc == V4L2_PIX_FMT_H264) { |
2120 | coda_write(dev, data: ctx->slicebuf.paddr, |
2121 | CODA_CMD_SET_FRAME_SLICE_BB_START); |
2122 | coda_write(dev, data: ctx->slicebuf.size / 1024, |
2123 | CODA_CMD_SET_FRAME_SLICE_BB_SIZE); |
2124 | } |
2125 | |
2126 | if (dev->devtype->product == CODA_HX4 || |
2127 | dev->devtype->product == CODA_7541) { |
2128 | int max_mb_x = 1920 / 16; |
2129 | int max_mb_y = 1088 / 16; |
2130 | int max_mb_num = max_mb_x * max_mb_y; |
2131 | |
2132 | coda_write(dev, data: max_mb_num << 16 | max_mb_x << 8 | max_mb_y, |
2133 | CODA7_CMD_SET_FRAME_MAX_DEC_SIZE); |
2134 | } else if (dev->devtype->product == CODA_960) { |
2135 | int max_mb_x = 1920 / 16; |
2136 | int max_mb_y = 1088 / 16; |
2137 | int max_mb_num = max_mb_x * max_mb_y; |
2138 | |
2139 | coda_write(dev, data: max_mb_num << 16 | max_mb_x << 8 | max_mb_y, |
2140 | CODA9_CMD_SET_FRAME_MAX_DEC_SIZE); |
2141 | } |
2142 | |
2143 | if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) { |
2144 | v4l2_err(&ctx->dev->v4l2_dev, |
2145 | "CODA_COMMAND_SET_FRAME_BUF timeout\n" ); |
2146 | return -ETIMEDOUT; |
2147 | } |
2148 | |
2149 | return 0; |
2150 | } |
2151 | |
2152 | static int coda_start_decoding(struct coda_ctx *ctx) |
2153 | { |
2154 | struct coda_dev *dev = ctx->dev; |
2155 | int ret; |
2156 | |
2157 | mutex_lock(&dev->coda_mutex); |
2158 | ret = __coda_start_decoding(ctx); |
2159 | mutex_unlock(lock: &dev->coda_mutex); |
2160 | |
2161 | return ret; |
2162 | } |
2163 | |
2164 | static int coda_prepare_decode(struct coda_ctx *ctx) |
2165 | { |
2166 | struct vb2_v4l2_buffer *dst_buf; |
2167 | struct coda_dev *dev = ctx->dev; |
2168 | struct coda_q_data *q_data_dst; |
2169 | struct coda_buffer_meta *meta; |
2170 | u32 rot_mode = 0; |
2171 | u32 reg_addr, reg_stride; |
2172 | |
2173 | dst_buf = v4l2_m2m_next_dst_buf(m2m_ctx: ctx->fh.m2m_ctx); |
2174 | q_data_dst = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_CAPTURE); |
2175 | |
2176 | /* Try to copy source buffer contents into the bitstream ringbuffer */ |
2177 | mutex_lock(&ctx->bitstream_mutex); |
2178 | coda_fill_bitstream(ctx, NULL); |
2179 | mutex_unlock(lock: &ctx->bitstream_mutex); |
2180 | |
2181 | if (coda_get_bitstream_payload(ctx) < 512 && |
2182 | (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) { |
2183 | coda_dbg(1, ctx, "bitstream payload: %d, skipping\n" , |
2184 | coda_get_bitstream_payload(ctx)); |
2185 | return -EAGAIN; |
2186 | } |
2187 | |
2188 | /* Run coda_start_decoding (again) if not yet initialized */ |
2189 | if (!ctx->initialized) { |
2190 | int ret = __coda_start_decoding(ctx); |
2191 | |
2192 | if (ret < 0) { |
2193 | v4l2_err(&dev->v4l2_dev, "failed to start decoding\n" ); |
2194 | return -EAGAIN; |
2195 | } else { |
2196 | ctx->initialized = 1; |
2197 | } |
2198 | } |
2199 | |
2200 | if (dev->devtype->product == CODA_960) |
2201 | coda_set_gdi_regs(ctx); |
2202 | |
2203 | if (ctx->use_vdoa && |
2204 | ctx->display_idx >= 0 && |
2205 | ctx->display_idx < ctx->num_internal_frames) { |
2206 | vdoa_device_run(ctx: ctx->vdoa, |
2207 | dst: vb2_dma_contig_plane_dma_addr(vb: &dst_buf->vb2_buf, plane_no: 0), |
2208 | src: ctx->internal_frames[ctx->display_idx].buf.paddr); |
2209 | } else { |
2210 | if (dev->devtype->product == CODA_960) { |
2211 | /* |
2212 | * It was previously assumed that the CODA960 has an |
2213 | * internal list of 64 buffer entries that contains |
2214 | * both the registered internal frame buffers as well |
2215 | * as the rotator buffer output, and that the ROT_INDEX |
2216 | * register must be set to a value between the last |
2217 | * internal frame buffers' index and 64. |
2218 | * At least on firmware version 3.1.1 it turns out that |
2219 | * setting ROT_INDEX to any value >= 32 causes CODA |
2220 | * hangups that it can not recover from with the SRC VPU |
2221 | * reset. |
2222 | * It does appear to work however, to just set it to a |
2223 | * fixed value in the [ctx->num_internal_frames, 31] |
2224 | * range, for example CODA_MAX_FRAMEBUFFERS. |
2225 | */ |
2226 | coda_write(dev, CODA_MAX_FRAMEBUFFERS, |
2227 | CODA9_CMD_DEC_PIC_ROT_INDEX); |
2228 | |
2229 | reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y; |
2230 | reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE; |
2231 | } else { |
2232 | reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y; |
2233 | reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE; |
2234 | } |
2235 | coda_write_base(ctx, q_data: q_data_dst, buf: dst_buf, reg_y: reg_addr); |
2236 | coda_write(dev, data: q_data_dst->bytesperline, reg: reg_stride); |
2237 | |
2238 | rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode; |
2239 | } |
2240 | |
2241 | coda_write(dev, data: rot_mode, CODA_CMD_DEC_PIC_ROT_MODE); |
2242 | |
2243 | switch (dev->devtype->product) { |
2244 | case CODA_DX6: |
2245 | /* TBD */ |
2246 | case CODA_HX4: |
2247 | case CODA_7541: |
2248 | coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION); |
2249 | break; |
2250 | case CODA_960: |
2251 | /* 'hardcode to use interrupt disable mode'? */ |
2252 | coda_write(dev, data: (1 << 10), CODA_CMD_DEC_PIC_OPTION); |
2253 | break; |
2254 | } |
2255 | |
2256 | coda_write(dev, data: 0, CODA_CMD_DEC_PIC_SKIP_NUM); |
2257 | |
2258 | coda_write(dev, data: 0, CODA_CMD_DEC_PIC_BB_START); |
2259 | coda_write(dev, data: 0, CODA_CMD_DEC_PIC_START_BYTE); |
2260 | |
2261 | if (dev->devtype->product != CODA_DX6) |
2262 | coda_write(dev, data: ctx->iram_info.axi_sram_use, |
2263 | CODA7_REG_BIT_AXI_SRAM_USE); |
2264 | |
2265 | spin_lock(lock: &ctx->buffer_meta_lock); |
2266 | meta = list_first_entry_or_null(&ctx->buffer_meta_list, |
2267 | struct coda_buffer_meta, list); |
2268 | |
2269 | if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) { |
2270 | |
2271 | /* If this is the last buffer in the bitstream, add padding */ |
2272 | if (meta->end == ctx->bitstream_fifo.kfifo.in) { |
2273 | static unsigned char buf[512]; |
2274 | unsigned int pad; |
2275 | |
2276 | /* Pad to multiple of 256 and then add 256 more */ |
2277 | pad = ((0 - meta->end) & 0xff) + 256; |
2278 | |
2279 | memset(buf, 0xff, sizeof(buf)); |
2280 | |
2281 | kfifo_in(&ctx->bitstream_fifo, buf, pad); |
2282 | } |
2283 | } |
2284 | spin_unlock(lock: &ctx->buffer_meta_lock); |
2285 | |
2286 | coda_kfifo_sync_to_device_full(ctx); |
2287 | |
2288 | /* Clear decode success flag */ |
2289 | coda_write(dev, data: 0, CODA_RET_DEC_PIC_SUCCESS); |
2290 | |
2291 | /* Clear error return value */ |
2292 | coda_write(dev, data: 0, CODA_RET_DEC_PIC_ERR_MB); |
2293 | |
2294 | trace_coda_dec_pic_run(ctx, meta); |
2295 | |
2296 | coda_command_async(ctx, CODA_COMMAND_PIC_RUN); |
2297 | |
2298 | return 0; |
2299 | } |
2300 | |
2301 | static void coda_finish_decode(struct coda_ctx *ctx) |
2302 | { |
2303 | struct coda_dev *dev = ctx->dev; |
2304 | struct coda_q_data *q_data_src; |
2305 | struct coda_q_data *q_data_dst; |
2306 | struct vb2_v4l2_buffer *dst_buf; |
2307 | struct coda_buffer_meta *meta; |
2308 | int width, height; |
2309 | int decoded_idx; |
2310 | int display_idx; |
2311 | struct coda_internal_frame *decoded_frame = NULL; |
2312 | u32 src_fourcc; |
2313 | int success; |
2314 | u32 err_mb; |
2315 | int err_vdoa = 0; |
2316 | u32 val; |
2317 | |
2318 | if (ctx->aborting) |
2319 | return; |
2320 | |
2321 | /* Update kfifo out pointer from coda bitstream read pointer */ |
2322 | coda_kfifo_sync_from_device(ctx); |
2323 | |
2324 | /* |
2325 | * in stream-end mode, the read pointer can overshoot the write pointer |
2326 | * by up to 512 bytes |
2327 | */ |
2328 | if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) { |
2329 | if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512) |
2330 | kfifo_init(&ctx->bitstream_fifo, |
2331 | ctx->bitstream.vaddr, ctx->bitstream.size); |
2332 | } |
2333 | |
2334 | q_data_src = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_OUTPUT); |
2335 | src_fourcc = q_data_src->fourcc; |
2336 | |
2337 | val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS); |
2338 | if (val != 1) |
2339 | pr_err("DEC_PIC_SUCCESS = %d\n" , val); |
2340 | |
2341 | success = val & 0x1; |
2342 | if (!success) |
2343 | v4l2_err(&dev->v4l2_dev, "decode failed\n" ); |
2344 | |
2345 | if (src_fourcc == V4L2_PIX_FMT_H264) { |
2346 | if (val & (1 << 3)) |
2347 | v4l2_err(&dev->v4l2_dev, |
2348 | "insufficient PS buffer space (%d bytes)\n" , |
2349 | ctx->psbuf.size); |
2350 | if (val & (1 << 2)) |
2351 | v4l2_err(&dev->v4l2_dev, |
2352 | "insufficient slice buffer space (%d bytes)\n" , |
2353 | ctx->slicebuf.size); |
2354 | } |
2355 | |
2356 | val = coda_read(dev, CODA_RET_DEC_PIC_SIZE); |
2357 | width = (val >> 16) & 0xffff; |
2358 | height = val & 0xffff; |
2359 | |
2360 | q_data_dst = get_q_data(ctx, type: V4L2_BUF_TYPE_VIDEO_CAPTURE); |
2361 | |
2362 | /* frame crop information */ |
2363 | if (src_fourcc == V4L2_PIX_FMT_H264) { |
2364 | u32 left_right; |
2365 | u32 top_bottom; |
2366 | |
2367 | left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT); |
2368 | top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM); |
2369 | |
2370 | if (left_right == 0xffffffff && top_bottom == 0xffffffff) { |
2371 | /* Keep current crop information */ |
2372 | } else { |
2373 | struct v4l2_rect *rect = &q_data_dst->rect; |
2374 | |
2375 | rect->left = left_right >> 16 & 0xffff; |
2376 | rect->top = top_bottom >> 16 & 0xffff; |
2377 | rect->width = width - rect->left - |
2378 | (left_right & 0xffff); |
2379 | rect->height = height - rect->top - |
2380 | (top_bottom & 0xffff); |
2381 | } |
2382 | } else { |
2383 | /* no cropping */ |
2384 | } |
2385 | |
2386 | err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB); |
2387 | if (err_mb > 0) { |
2388 | if (__ratelimit(&dev->mb_err_rs)) |
2389 | coda_dbg(1, ctx, "errors in %d macroblocks\n" , err_mb); |
2390 | v4l2_ctrl_s_ctrl(ctrl: ctx->mb_err_cnt_ctrl, |
2391 | val: v4l2_ctrl_g_ctrl(ctrl: ctx->mb_err_cnt_ctrl) + err_mb); |
2392 | } |
2393 | |
2394 | if (dev->devtype->product == CODA_HX4 || |
2395 | dev->devtype->product == CODA_7541) { |
2396 | val = coda_read(dev, CODA_RET_DEC_PIC_OPTION); |
2397 | if (val == 0) { |
2398 | /* not enough bitstream data */ |
2399 | coda_dbg(1, ctx, "prescan failed: %d\n" , val); |
2400 | ctx->hold = true; |
2401 | return; |
2402 | } |
2403 | } |
2404 | |
2405 | /* Wait until the VDOA finished writing the previous display frame */ |
2406 | if (ctx->use_vdoa && |
2407 | ctx->display_idx >= 0 && |
2408 | ctx->display_idx < ctx->num_internal_frames) { |
2409 | err_vdoa = vdoa_wait_for_completion(ctx: ctx->vdoa); |
2410 | } |
2411 | |
2412 | ctx->frm_dis_flg = coda_read(dev, |
2413 | CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); |
2414 | |
2415 | /* The previous display frame was copied out and can be overwritten */ |
2416 | if (ctx->display_idx >= 0 && |
2417 | ctx->display_idx < ctx->num_internal_frames) { |
2418 | ctx->frm_dis_flg &= ~(1 << ctx->display_idx); |
2419 | coda_write(dev, data: ctx->frm_dis_flg, |
2420 | CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); |
2421 | } |
2422 | |
2423 | /* |
2424 | * The index of the last decoded frame, not necessarily in |
2425 | * display order, and the index of the next display frame. |
2426 | * The latter could have been decoded in a previous run. |
2427 | */ |
2428 | decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX); |
2429 | display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX); |
2430 | |
2431 | if (decoded_idx == -1) { |
2432 | /* no frame was decoded, but we might have a display frame */ |
2433 | if (display_idx >= 0 && display_idx < ctx->num_internal_frames) |
2434 | ctx->sequence_offset++; |
2435 | else if (ctx->display_idx < 0) |
2436 | ctx->hold = true; |
2437 | } else if (decoded_idx == -2) { |
2438 | if (ctx->display_idx >= 0 && |
2439 | ctx->display_idx < ctx->num_internal_frames) |
2440 | ctx->sequence_offset++; |
2441 | /* no frame was decoded, we still return remaining buffers */ |
2442 | } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) { |
2443 | v4l2_err(&dev->v4l2_dev, |
2444 | "decoded frame index out of range: %d\n" , decoded_idx); |
2445 | } else { |
2446 | int sequence; |
2447 | |
2448 | decoded_frame = &ctx->internal_frames[decoded_idx]; |
2449 | |
2450 | val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM); |
2451 | if (ctx->sequence_offset == -1) |
2452 | ctx->sequence_offset = val; |
2453 | |
2454 | sequence = val + ctx->first_frame_sequence |
2455 | - ctx->sequence_offset; |
2456 | spin_lock(lock: &ctx->buffer_meta_lock); |
2457 | if (!list_empty(head: &ctx->buffer_meta_list)) { |
2458 | meta = list_first_entry(&ctx->buffer_meta_list, |
2459 | struct coda_buffer_meta, list); |
2460 | list_del(entry: &meta->list); |
2461 | ctx->num_metas--; |
2462 | spin_unlock(lock: &ctx->buffer_meta_lock); |
2463 | /* |
2464 | * Clamp counters to 16 bits for comparison, as the HW |
2465 | * counter rolls over at this point for h.264. This |
2466 | * may be different for other formats, but using 16 bits |
2467 | * should be enough to detect most errors and saves us |
2468 | * from doing different things based on the format. |
2469 | */ |
2470 | if ((sequence & 0xffff) != (meta->sequence & 0xffff)) { |
2471 | v4l2_err(&dev->v4l2_dev, |
2472 | "sequence number mismatch (%d(%d) != %d)\n" , |
2473 | sequence, ctx->sequence_offset, |
2474 | meta->sequence); |
2475 | } |
2476 | decoded_frame->meta = *meta; |
2477 | kfree(objp: meta); |
2478 | } else { |
2479 | spin_unlock(lock: &ctx->buffer_meta_lock); |
2480 | v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n" ); |
2481 | memset(&decoded_frame->meta, 0, |
2482 | sizeof(struct coda_buffer_meta)); |
2483 | decoded_frame->meta.sequence = sequence; |
2484 | decoded_frame->meta.last = false; |
2485 | ctx->sequence_offset++; |
2486 | } |
2487 | |
2488 | trace_coda_dec_pic_done(ctx, meta: &decoded_frame->meta); |
2489 | |
2490 | val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7; |
2491 | decoded_frame->type = (val == 0) ? V4L2_BUF_FLAG_KEYFRAME : |
2492 | (val == 1) ? V4L2_BUF_FLAG_PFRAME : |
2493 | V4L2_BUF_FLAG_BFRAME; |
2494 | |
2495 | decoded_frame->error = err_mb; |
2496 | } |
2497 | |
2498 | if (display_idx == -1) { |
2499 | /* |
2500 | * no more frames to be decoded, but there could still |
2501 | * be rotator output to dequeue |
2502 | */ |
2503 | ctx->hold = true; |
2504 | } else if (display_idx == -3) { |
2505 | /* possibly prescan failure */ |
2506 | } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) { |
2507 | v4l2_err(&dev->v4l2_dev, |
2508 | "presentation frame index out of range: %d\n" , |
2509 | display_idx); |
2510 | } |
2511 | |
2512 | /* If a frame was copied out, return it */ |
2513 | if (ctx->display_idx >= 0 && |
2514 | ctx->display_idx < ctx->num_internal_frames) { |
2515 | struct coda_internal_frame *ready_frame; |
2516 | |
2517 | ready_frame = &ctx->internal_frames[ctx->display_idx]; |
2518 | |
2519 | dst_buf = v4l2_m2m_dst_buf_remove(m2m_ctx: ctx->fh.m2m_ctx); |
2520 | dst_buf->sequence = ctx->osequence++; |
2521 | |
2522 | dst_buf->field = V4L2_FIELD_NONE; |
2523 | dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME | |
2524 | V4L2_BUF_FLAG_PFRAME | |
2525 | V4L2_BUF_FLAG_BFRAME); |
2526 | dst_buf->flags |= ready_frame->type; |
2527 | meta = &ready_frame->meta; |
2528 | if (meta->last && !coda_reorder_enable(ctx)) { |
2529 | /* |
2530 | * If this was the last decoded frame, and reordering |
2531 | * is disabled, this will be the last display frame. |
2532 | */ |
2533 | coda_dbg(1, ctx, "last meta, marking as last frame\n" ); |
2534 | dst_buf->flags |= V4L2_BUF_FLAG_LAST; |
2535 | } else if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG && |
2536 | display_idx == -1) { |
2537 | /* |
2538 | * If there is no designated presentation frame anymore, |
2539 | * this frame has to be the last one. |
2540 | */ |
2541 | coda_dbg(1, ctx, |
2542 | "no more frames to return, marking as last frame\n" ); |
2543 | dst_buf->flags |= V4L2_BUF_FLAG_LAST; |
2544 | } |
2545 | dst_buf->timecode = meta->timecode; |
2546 | dst_buf->vb2_buf.timestamp = meta->timestamp; |
2547 | |
2548 | trace_coda_dec_rot_done(ctx, buf: dst_buf, meta); |
2549 | |
2550 | vb2_set_plane_payload(vb: &dst_buf->vb2_buf, plane_no: 0, |
2551 | size: q_data_dst->sizeimage); |
2552 | |
2553 | if (ready_frame->error || err_vdoa) |
2554 | coda_m2m_buf_done(ctx, buf: dst_buf, state: VB2_BUF_STATE_ERROR); |
2555 | else |
2556 | coda_m2m_buf_done(ctx, buf: dst_buf, state: VB2_BUF_STATE_DONE); |
2557 | |
2558 | if (decoded_frame) { |
2559 | coda_dbg(1, ctx, "job finished: decoded %c frame %u, returned %c frame %u (%u/%u)%s\n" , |
2560 | coda_frame_type_char(decoded_frame->type), |
2561 | decoded_frame->meta.sequence, |
2562 | coda_frame_type_char(dst_buf->flags), |
2563 | ready_frame->meta.sequence, |
2564 | dst_buf->sequence, ctx->qsequence, |
2565 | (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? |
2566 | " (last)" : "" ); |
2567 | } else { |
2568 | coda_dbg(1, ctx, "job finished: no frame decoded (%d), returned %c frame %u (%u/%u)%s\n" , |
2569 | decoded_idx, |
2570 | coda_frame_type_char(dst_buf->flags), |
2571 | ready_frame->meta.sequence, |
2572 | dst_buf->sequence, ctx->qsequence, |
2573 | (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? |
2574 | " (last)" : "" ); |
2575 | } |
2576 | } else { |
2577 | if (decoded_frame) { |
2578 | coda_dbg(1, ctx, "job finished: decoded %c frame %u, no frame returned (%d)\n" , |
2579 | coda_frame_type_char(decoded_frame->type), |
2580 | decoded_frame->meta.sequence, |
2581 | ctx->display_idx); |
2582 | } else { |
2583 | coda_dbg(1, ctx, "job finished: no frame decoded (%d) or returned (%d)\n" , |
2584 | decoded_idx, ctx->display_idx); |
2585 | } |
2586 | } |
2587 | |
2588 | /* The rotator will copy the current display frame next time */ |
2589 | ctx->display_idx = display_idx; |
2590 | |
2591 | /* |
2592 | * The current decode run might have brought the bitstream fill level |
2593 | * below the size where we can start the next decode run. As userspace |
2594 | * might have filled the output queue completely and might thus be |
2595 | * blocked, we can't rely on the next qbuf to trigger the bitstream |
2596 | * refill. Check if we have data to refill the bitstream now. |
2597 | */ |
2598 | mutex_lock(&ctx->bitstream_mutex); |
2599 | coda_fill_bitstream(ctx, NULL); |
2600 | mutex_unlock(lock: &ctx->bitstream_mutex); |
2601 | } |
2602 | |
2603 | static void coda_decode_timeout(struct coda_ctx *ctx) |
2604 | { |
2605 | struct vb2_v4l2_buffer *dst_buf; |
2606 | |
2607 | /* |
2608 | * For now this only handles the case where we would deadlock with |
2609 | * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS, |
2610 | * but after a failed decode run we would hold the context and wait for |
2611 | * userspace to queue more buffers. |
2612 | */ |
2613 | if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)) |
2614 | return; |
2615 | |
2616 | dst_buf = v4l2_m2m_dst_buf_remove(m2m_ctx: ctx->fh.m2m_ctx); |
2617 | dst_buf->sequence = ctx->qsequence - 1; |
2618 | |
2619 | coda_m2m_buf_done(ctx, buf: dst_buf, state: VB2_BUF_STATE_ERROR); |
2620 | } |
2621 | |
2622 | const struct coda_context_ops coda_bit_decode_ops = { |
2623 | .queue_init = coda_decoder_queue_init, |
2624 | .reqbufs = coda_decoder_reqbufs, |
2625 | .start_streaming = coda_start_decoding, |
2626 | .prepare_run = coda_prepare_decode, |
2627 | .finish_run = coda_finish_decode, |
2628 | .run_timeout = coda_decode_timeout, |
2629 | .seq_init_work = coda_dec_seq_init_work, |
2630 | .seq_end_work = coda_seq_end_work, |
2631 | .release = coda_bit_release, |
2632 | }; |
2633 | |
2634 | irqreturn_t coda_irq_handler(int irq, void *data) |
2635 | { |
2636 | struct coda_dev *dev = data; |
2637 | struct coda_ctx *ctx; |
2638 | |
2639 | /* read status register to attend the IRQ */ |
2640 | coda_read(dev, CODA_REG_BIT_INT_STATUS); |
2641 | coda_write(dev, data: 0, CODA_REG_BIT_INT_REASON); |
2642 | coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET, |
2643 | CODA_REG_BIT_INT_CLEAR); |
2644 | |
2645 | ctx = v4l2_m2m_get_curr_priv(m2m_dev: dev->m2m_dev); |
2646 | if (ctx == NULL) { |
2647 | v4l2_err(&dev->v4l2_dev, |
2648 | "Instance released before the end of transaction\n" ); |
2649 | return IRQ_HANDLED; |
2650 | } |
2651 | |
2652 | trace_coda_bit_done(ctx); |
2653 | |
2654 | if (ctx->aborting) { |
2655 | coda_dbg(1, ctx, "task has been aborted\n" ); |
2656 | } |
2657 | |
2658 | if (coda_isbusy(dev: ctx->dev)) { |
2659 | coda_dbg(1, ctx, "coda is still busy!!!!\n" ); |
2660 | return IRQ_NONE; |
2661 | } |
2662 | |
2663 | complete(&ctx->completion); |
2664 | |
2665 | return IRQ_HANDLED; |
2666 | } |
2667 | |