1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Hantro VPU codec driver |
4 | * |
5 | * Copyright (C) 2018 Rockchip Electronics Co., Ltd. |
6 | * |
7 | * JPEG encoder |
8 | * ------------ |
9 | * The VPU JPEG encoder produces JPEG baseline sequential format. |
10 | * The quantization coefficients are 8-bit values, complying with |
11 | * the baseline specification. Therefore, it requires |
12 | * luma and chroma quantization tables. The hardware does entropy |
13 | * encoding using internal Huffman tables, as specified in the JPEG |
14 | * specification. |
15 | * |
16 | * In other words, only the luma and chroma quantization tables are |
17 | * required for the encoding operation. |
18 | * |
19 | * Quantization luma table values are written to registers |
20 | * VEPU_swreg_0-VEPU_swreg_15, and chroma table values to |
21 | * VEPU_swreg_16-VEPU_swreg_31. A special order is needed, neither |
22 | * zigzag, nor linear. |
23 | */ |
24 | |
25 | #include <asm/unaligned.h> |
26 | #include <media/v4l2-mem2mem.h> |
27 | #include "hantro_jpeg.h" |
28 | #include "hantro.h" |
29 | #include "hantro_v4l2.h" |
30 | #include "hantro_hw.h" |
31 | #include "rockchip_vpu2_regs.h" |
32 | |
33 | #define VEPU_JPEG_QUANT_TABLE_COUNT 16 |
34 | |
35 | static void rockchip_vpu2_set_src_img_ctrl(struct hantro_dev *vpu, |
36 | struct hantro_ctx *ctx) |
37 | { |
38 | u32 overfill_r, overfill_b; |
39 | u32 reg; |
40 | |
41 | /* |
42 | * The format width and height are already macroblock aligned |
43 | * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination |
44 | * format width and height can be further modified by |
45 | * .vidioc_s_selection(), and the width is 4-aligned. |
46 | */ |
47 | overfill_r = ctx->src_fmt.width - ctx->dst_fmt.width; |
48 | overfill_b = ctx->src_fmt.height - ctx->dst_fmt.height; |
49 | |
50 | reg = VEPU_REG_IN_IMG_CTRL_ROW_LEN(ctx->src_fmt.width); |
51 | vepu_write_relaxed(vpu, val: reg, VEPU_REG_INPUT_LUMA_INFO); |
52 | |
53 | reg = VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r / 4) | |
54 | VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b); |
55 | /* |
56 | * This register controls the input crop, as the offset |
57 | * from the right/bottom within the last macroblock. The offset from the |
58 | * right must be divided by 4 and so the crop must be aligned to 4 pixels |
59 | * horizontally. |
60 | */ |
61 | vepu_write_relaxed(vpu, val: reg, VEPU_REG_ENC_OVER_FILL_STRM_OFFSET); |
62 | |
63 | reg = VEPU_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt); |
64 | vepu_write_relaxed(vpu, val: reg, VEPU_REG_ENC_CTRL1); |
65 | } |
66 | |
67 | static void rockchip_vpu2_jpeg_enc_set_buffers(struct hantro_dev *vpu, |
68 | struct hantro_ctx *ctx, |
69 | struct vb2_buffer *src_buf, |
70 | struct vb2_buffer *dst_buf) |
71 | { |
72 | struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt; |
73 | dma_addr_t src[3]; |
74 | u32 size_left; |
75 | |
76 | size_left = vb2_plane_size(vb: dst_buf, plane_no: 0) - ctx->vpu_dst_fmt->header_size; |
77 | if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size)) |
78 | size_left = 0; |
79 | |
80 | WARN_ON(pix_fmt->num_planes > 3); |
81 | |
82 | vepu_write_relaxed(vpu, val: vb2_dma_contig_plane_dma_addr(vb: dst_buf, plane_no: 0) + |
83 | ctx->vpu_dst_fmt->header_size, |
84 | VEPU_REG_ADDR_OUTPUT_STREAM); |
85 | vepu_write_relaxed(vpu, val: size_left, VEPU_REG_STR_BUF_LIMIT); |
86 | |
87 | if (pix_fmt->num_planes == 1) { |
88 | src[0] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 0); |
89 | vepu_write_relaxed(vpu, val: src[0], VEPU_REG_ADDR_IN_PLANE_0); |
90 | } else if (pix_fmt->num_planes == 2) { |
91 | src[0] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 0); |
92 | src[1] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 1); |
93 | vepu_write_relaxed(vpu, val: src[0], VEPU_REG_ADDR_IN_PLANE_0); |
94 | vepu_write_relaxed(vpu, val: src[1], VEPU_REG_ADDR_IN_PLANE_1); |
95 | } else { |
96 | src[0] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 0); |
97 | src[1] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 1); |
98 | src[2] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 2); |
99 | vepu_write_relaxed(vpu, val: src[0], VEPU_REG_ADDR_IN_PLANE_0); |
100 | vepu_write_relaxed(vpu, val: src[1], VEPU_REG_ADDR_IN_PLANE_1); |
101 | vepu_write_relaxed(vpu, val: src[2], VEPU_REG_ADDR_IN_PLANE_2); |
102 | } |
103 | } |
104 | |
105 | static void |
106 | rockchip_vpu2_jpeg_enc_set_qtable(struct hantro_dev *vpu, |
107 | unsigned char *luma_qtable, |
108 | unsigned char *chroma_qtable) |
109 | { |
110 | u32 reg, i; |
111 | __be32 *luma_qtable_p; |
112 | __be32 *chroma_qtable_p; |
113 | |
114 | luma_qtable_p = (__be32 *)luma_qtable; |
115 | chroma_qtable_p = (__be32 *)chroma_qtable; |
116 | |
117 | /* |
118 | * Quantization table registers must be written in contiguous blocks. |
119 | * DO NOT collapse the below two "for" loops into one. |
120 | */ |
121 | for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) { |
122 | reg = get_unaligned_be32(p: &luma_qtable_p[i]); |
123 | vepu_write_relaxed(vpu, val: reg, VEPU_REG_JPEG_LUMA_QUAT(i)); |
124 | } |
125 | |
126 | for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) { |
127 | reg = get_unaligned_be32(p: &chroma_qtable_p[i]); |
128 | vepu_write_relaxed(vpu, val: reg, VEPU_REG_JPEG_CHROMA_QUAT(i)); |
129 | } |
130 | } |
131 | |
132 | int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx) |
133 | { |
134 | struct hantro_dev *vpu = ctx->dev; |
135 | struct vb2_v4l2_buffer *src_buf, *dst_buf; |
136 | struct hantro_jpeg_ctx jpeg_ctx; |
137 | u32 reg; |
138 | |
139 | src_buf = hantro_get_src_buf(ctx); |
140 | dst_buf = hantro_get_dst_buf(ctx); |
141 | |
142 | hantro_start_prepare_run(ctx); |
143 | |
144 | memset(&jpeg_ctx, 0, sizeof(jpeg_ctx)); |
145 | jpeg_ctx.buffer = vb2_plane_vaddr(vb: &dst_buf->vb2_buf, plane_no: 0); |
146 | if (!jpeg_ctx.buffer) |
147 | return -ENOMEM; |
148 | |
149 | jpeg_ctx.width = ctx->dst_fmt.width; |
150 | jpeg_ctx.height = ctx->dst_fmt.height; |
151 | jpeg_ctx.quality = ctx->jpeg_quality; |
152 | hantro_jpeg_header_assemble(ctx: &jpeg_ctx); |
153 | |
154 | /* Switch to JPEG encoder mode before writing registers */ |
155 | vepu_write_relaxed(vpu, VEPU_REG_ENCODE_FORMAT_JPEG, |
156 | VEPU_REG_ENCODE_START); |
157 | |
158 | rockchip_vpu2_set_src_img_ctrl(vpu, ctx); |
159 | rockchip_vpu2_jpeg_enc_set_buffers(vpu, ctx, src_buf: &src_buf->vb2_buf, |
160 | dst_buf: &dst_buf->vb2_buf); |
161 | rockchip_vpu2_jpeg_enc_set_qtable(vpu, luma_qtable: jpeg_ctx.hw_luma_qtable, |
162 | chroma_qtable: jpeg_ctx.hw_chroma_qtable); |
163 | |
164 | reg = VEPU_REG_OUTPUT_SWAP32 |
165 | | VEPU_REG_OUTPUT_SWAP16 |
166 | | VEPU_REG_OUTPUT_SWAP8 |
167 | | VEPU_REG_INPUT_SWAP8 |
168 | | VEPU_REG_INPUT_SWAP16 |
169 | | VEPU_REG_INPUT_SWAP32; |
170 | /* Make sure that all registers are written at this point. */ |
171 | vepu_write(vpu, val: reg, VEPU_REG_DATA_ENDIAN); |
172 | |
173 | reg = VEPU_REG_AXI_CTRL_BURST_LEN(16); |
174 | vepu_write_relaxed(vpu, val: reg, VEPU_REG_AXI_CTRL); |
175 | |
176 | reg = VEPU_REG_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) |
177 | | VEPU_REG_MB_HEIGHT(MB_HEIGHT(ctx->src_fmt.height)) |
178 | | VEPU_REG_FRAME_TYPE_INTRA |
179 | | VEPU_REG_ENCODE_FORMAT_JPEG |
180 | | VEPU_REG_ENCODE_ENABLE; |
181 | |
182 | /* Kick the watchdog and start encoding */ |
183 | hantro_end_prepare_run(ctx); |
184 | vepu_write(vpu, val: reg, VEPU_REG_ENCODE_START); |
185 | |
186 | return 0; |
187 | } |
188 | |
189 | void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx) |
190 | { |
191 | struct hantro_dev *vpu = ctx->dev; |
192 | u32 bytesused = vepu_read(vpu, VEPU_REG_STR_BUF_LIMIT) / 8; |
193 | struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx); |
194 | |
195 | vb2_set_plane_payload(vb: &dst_buf->vb2_buf, plane_no: 0, |
196 | size: ctx->vpu_dst_fmt->header_size + bytesused); |
197 | } |
198 | |