1 | // Copyright 2022 Google Inc. All Rights Reserved. |
2 | // |
3 | // Use of this source code is governed by a BSD-style license |
4 | // that can be found in the COPYING file in the root of the source |
5 | // tree. An additional intellectual property rights grant can be found |
6 | // in the file PATENTS. All contributing project authors may |
7 | // be found in the AUTHORS file in the root of the source tree. |
8 | // ----------------------------------------------------------------------------- |
9 | // |
10 | // Sharp RGB to YUV conversion. |
11 | // |
12 | // Author: Skal (pascal.massimino@gmail.com) |
13 | |
14 | #include "sharpyuv/sharpyuv.h" |
15 | |
16 | #include <assert.h> |
17 | #include <limits.h> |
18 | #include <stddef.h> |
19 | #include <stdlib.h> |
20 | #include <string.h> |
21 | |
22 | #include "src/webp/types.h" |
23 | #include "sharpyuv/sharpyuv_cpu.h" |
24 | #include "sharpyuv/sharpyuv_dsp.h" |
25 | #include "sharpyuv/sharpyuv_gamma.h" |
26 | |
27 | //------------------------------------------------------------------------------ |
28 | |
29 | int SharpYuvGetVersion(void) { |
30 | return SHARPYUV_VERSION; |
31 | } |
32 | |
33 | //------------------------------------------------------------------------------ |
34 | // Sharp RGB->YUV conversion |
35 | |
36 | static const int kNumIterations = 4; |
37 | |
38 | #define YUV_FIX 16 // fixed-point precision for RGB->YUV |
39 | static const int kYuvHalf = 1 << (YUV_FIX - 1); |
40 | |
41 | // Max bit depth so that intermediate calculations fit in 16 bits. |
42 | static const int kMaxBitDepth = 14; |
43 | |
44 | // Returns the precision shift to use based on the input rgb_bit_depth. |
45 | static int GetPrecisionShift(int rgb_bit_depth) { |
46 | // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove |
47 | // bits if needed. |
48 | return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2 |
49 | : (kMaxBitDepth - rgb_bit_depth); |
50 | } |
51 | |
52 | typedef int16_t fixed_t; // signed type with extra precision for UV |
53 | typedef uint16_t fixed_y_t; // unsigned type with extra precision for W |
54 | |
55 | //------------------------------------------------------------------------------ |
56 | |
57 | static uint8_t clip_8b(fixed_t v) { |
58 | return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; |
59 | } |
60 | |
61 | static uint16_t clip(fixed_t v, int max) { |
62 | return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; |
63 | } |
64 | |
65 | static fixed_y_t clip_bit_depth(int y, int bit_depth) { |
66 | const int max = (1 << bit_depth) - 1; |
67 | return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max; |
68 | } |
69 | |
70 | //------------------------------------------------------------------------------ |
71 | |
72 | static int RGBToGray(int64_t r, int64_t g, int64_t b) { |
73 | const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf; |
74 | return (int)(luma >> YUV_FIX); |
75 | } |
76 | |
77 | static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
78 | int rgb_bit_depth, |
79 | SharpYuvTransferFunctionType transfer_type) { |
80 | const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); |
81 | const uint32_t A = SharpYuvGammaToLinear(v: a, bit_depth, transfer_type); |
82 | const uint32_t B = SharpYuvGammaToLinear(v: b, bit_depth, transfer_type); |
83 | const uint32_t C = SharpYuvGammaToLinear(v: c, bit_depth, transfer_type); |
84 | const uint32_t D = SharpYuvGammaToLinear(v: d, bit_depth, transfer_type); |
85 | return SharpYuvLinearToGamma(value: (A + B + C + D + 2) >> 2, bit_depth, |
86 | transfer_type); |
87 | } |
88 | |
89 | static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w, |
90 | int rgb_bit_depth, |
91 | SharpYuvTransferFunctionType transfer_type) { |
92 | const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); |
93 | int i = 0; |
94 | do { |
95 | const uint32_t R = |
96 | SharpYuvGammaToLinear(v: src[0 * w + i], bit_depth, transfer_type); |
97 | const uint32_t G = |
98 | SharpYuvGammaToLinear(v: src[1 * w + i], bit_depth, transfer_type); |
99 | const uint32_t B = |
100 | SharpYuvGammaToLinear(v: src[2 * w + i], bit_depth, transfer_type); |
101 | const uint32_t Y = RGBToGray(r: R, g: G, b: B); |
102 | dst[i] = (fixed_y_t)SharpYuvLinearToGamma(value: Y, bit_depth, transfer_type); |
103 | } while (++i < w); |
104 | } |
105 | |
106 | static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, |
107 | fixed_t* dst, int uv_w, int rgb_bit_depth, |
108 | SharpYuvTransferFunctionType transfer_type) { |
109 | int i = 0; |
110 | do { |
111 | const int r = |
112 | ScaleDown(a: src1[0 * uv_w + 0], b: src1[0 * uv_w + 1], c: src2[0 * uv_w + 0], |
113 | d: src2[0 * uv_w + 1], rgb_bit_depth, transfer_type); |
114 | const int g = |
115 | ScaleDown(a: src1[2 * uv_w + 0], b: src1[2 * uv_w + 1], c: src2[2 * uv_w + 0], |
116 | d: src2[2 * uv_w + 1], rgb_bit_depth, transfer_type); |
117 | const int b = |
118 | ScaleDown(a: src1[4 * uv_w + 0], b: src1[4 * uv_w + 1], c: src2[4 * uv_w + 0], |
119 | d: src2[4 * uv_w + 1], rgb_bit_depth, transfer_type); |
120 | const int W = RGBToGray(r, g, b); |
121 | dst[0 * uv_w] = (fixed_t)(r - W); |
122 | dst[1 * uv_w] = (fixed_t)(g - W); |
123 | dst[2 * uv_w] = (fixed_t)(b - W); |
124 | dst += 1; |
125 | src1 += 2; |
126 | src2 += 2; |
127 | } while (++i < uv_w); |
128 | } |
129 | |
130 | static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { |
131 | int i = 0; |
132 | assert(w > 0); |
133 | do { |
134 | y[i] = RGBToGray(r: rgb[0 * w + i], g: rgb[1 * w + i], b: rgb[2 * w + i]); |
135 | } while (++i < w); |
136 | } |
137 | |
138 | //------------------------------------------------------------------------------ |
139 | |
140 | static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) { |
141 | const int v0 = (A * 3 + B + 2) >> 2; |
142 | return clip_bit_depth(y: v0 + W0, bit_depth); |
143 | } |
144 | |
145 | //------------------------------------------------------------------------------ |
146 | |
147 | static WEBP_INLINE int Shift(int v, int shift) { |
148 | return (shift >= 0) ? (v << shift) : (v >> -shift); |
149 | } |
150 | |
151 | static void ImportOneRow(const uint8_t* const r_ptr, |
152 | const uint8_t* const g_ptr, |
153 | const uint8_t* const b_ptr, |
154 | int rgb_step, |
155 | int rgb_bit_depth, |
156 | int pic_width, |
157 | fixed_y_t* const dst) { |
158 | // Convert the rgb_step from a number of bytes to a number of uint8_t or |
159 | // uint16_t values depending the bit depth. |
160 | const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step; |
161 | int i = 0; |
162 | const int w = (pic_width + 1) & ~1; |
163 | do { |
164 | const int off = i * step; |
165 | const int shift = GetPrecisionShift(rgb_bit_depth); |
166 | if (rgb_bit_depth == 8) { |
167 | dst[i + 0 * w] = Shift(v: r_ptr[off], shift); |
168 | dst[i + 1 * w] = Shift(v: g_ptr[off], shift); |
169 | dst[i + 2 * w] = Shift(v: b_ptr[off], shift); |
170 | } else { |
171 | dst[i + 0 * w] = Shift(v: ((uint16_t*)r_ptr)[off], shift); |
172 | dst[i + 1 * w] = Shift(v: ((uint16_t*)g_ptr)[off], shift); |
173 | dst[i + 2 * w] = Shift(v: ((uint16_t*)b_ptr)[off], shift); |
174 | } |
175 | } while (++i < pic_width); |
176 | if (pic_width & 1) { // replicate rightmost pixel |
177 | dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; |
178 | dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; |
179 | dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; |
180 | } |
181 | } |
182 | |
183 | static void InterpolateTwoRows(const fixed_y_t* const best_y, |
184 | const fixed_t* prev_uv, |
185 | const fixed_t* cur_uv, |
186 | const fixed_t* next_uv, |
187 | int w, |
188 | fixed_y_t* out1, |
189 | fixed_y_t* out2, |
190 | int rgb_bit_depth) { |
191 | const int uv_w = w >> 1; |
192 | const int len = (w - 1) >> 1; // length to filter |
193 | int k = 3; |
194 | const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); |
195 | while (k-- > 0) { // process each R/G/B segments in turn |
196 | // special boundary case for i==0 |
197 | out1[0] = Filter2(A: cur_uv[0], B: prev_uv[0], W0: best_y[0], bit_depth); |
198 | out2[0] = Filter2(A: cur_uv[0], B: next_uv[0], W0: best_y[w], bit_depth); |
199 | |
200 | SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1, |
201 | bit_depth); |
202 | SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1, |
203 | bit_depth); |
204 | |
205 | // special boundary case for i == w - 1 when w is even |
206 | if (!(w & 1)) { |
207 | out1[w - 1] = Filter2(A: cur_uv[uv_w - 1], B: prev_uv[uv_w - 1], |
208 | W0: best_y[w - 1 + 0], bit_depth); |
209 | out2[w - 1] = Filter2(A: cur_uv[uv_w - 1], B: next_uv[uv_w - 1], |
210 | W0: best_y[w - 1 + w], bit_depth); |
211 | } |
212 | out1 += w; |
213 | out2 += w; |
214 | prev_uv += uv_w; |
215 | cur_uv += uv_w; |
216 | next_uv += uv_w; |
217 | } |
218 | } |
219 | |
220 | static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b, |
221 | const int coeffs[4], int sfix) { |
222 | const int srounder = 1 << (YUV_FIX + sfix - 1); |
223 | const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + |
224 | coeffs[3] + srounder; |
225 | return (luma >> (YUV_FIX + sfix)); |
226 | } |
227 | |
228 | static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, |
229 | uint8_t* y_ptr, int y_stride, uint8_t* u_ptr, |
230 | int u_stride, uint8_t* v_ptr, int v_stride, |
231 | int rgb_bit_depth, |
232 | int yuv_bit_depth, int width, int height, |
233 | const SharpYuvConversionMatrix* yuv_matrix) { |
234 | int i, j; |
235 | const fixed_t* const best_uv_base = best_uv; |
236 | const int w = (width + 1) & ~1; |
237 | const int h = (height + 1) & ~1; |
238 | const int uv_w = w >> 1; |
239 | const int uv_h = h >> 1; |
240 | const int sfix = GetPrecisionShift(rgb_bit_depth); |
241 | const int yuv_max = (1 << yuv_bit_depth) - 1; |
242 | |
243 | best_uv = best_uv_base; |
244 | j = 0; |
245 | do { |
246 | i = 0; |
247 | do { |
248 | const int off = (i >> 1); |
249 | const int W = best_y[i]; |
250 | const int r = best_uv[off + 0 * uv_w] + W; |
251 | const int g = best_uv[off + 1 * uv_w] + W; |
252 | const int b = best_uv[off + 2 * uv_w] + W; |
253 | const int y = RGBToYUVComponent(r, g, b, coeffs: yuv_matrix->rgb_to_y, sfix); |
254 | if (yuv_bit_depth <= 8) { |
255 | y_ptr[i] = clip_8b(v: y); |
256 | } else { |
257 | ((uint16_t*)y_ptr)[i] = clip(v: y, max: yuv_max); |
258 | } |
259 | } while (++i < width); |
260 | best_y += w; |
261 | best_uv += (j & 1) * 3 * uv_w; |
262 | y_ptr += y_stride; |
263 | } while (++j < height); |
264 | |
265 | best_uv = best_uv_base; |
266 | j = 0; |
267 | do { |
268 | i = 0; |
269 | do { |
270 | // Note r, g and b values here are off by W, but a constant offset on all |
271 | // 3 components doesn't change the value of u and v with a YCbCr matrix. |
272 | const int r = best_uv[i + 0 * uv_w]; |
273 | const int g = best_uv[i + 1 * uv_w]; |
274 | const int b = best_uv[i + 2 * uv_w]; |
275 | const int u = RGBToYUVComponent(r, g, b, coeffs: yuv_matrix->rgb_to_u, sfix); |
276 | const int v = RGBToYUVComponent(r, g, b, coeffs: yuv_matrix->rgb_to_v, sfix); |
277 | if (yuv_bit_depth <= 8) { |
278 | u_ptr[i] = clip_8b(v: u); |
279 | v_ptr[i] = clip_8b(v); |
280 | } else { |
281 | ((uint16_t*)u_ptr)[i] = clip(v: u, max: yuv_max); |
282 | ((uint16_t*)v_ptr)[i] = clip(v, max: yuv_max); |
283 | } |
284 | } while (++i < uv_w); |
285 | best_uv += 3 * uv_w; |
286 | u_ptr += u_stride; |
287 | v_ptr += v_stride; |
288 | } while (++j < uv_h); |
289 | return 1; |
290 | } |
291 | |
292 | //------------------------------------------------------------------------------ |
293 | // Main function |
294 | |
295 | static void* SafeMalloc(uint64_t nmemb, size_t size) { |
296 | const uint64_t total_size = nmemb * (uint64_t)size; |
297 | if (total_size != (size_t)total_size) return NULL; |
298 | return malloc(size: (size_t)total_size); |
299 | } |
300 | |
301 | #define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((uint64_t)(W) * (H), sizeof(T))) |
302 | |
303 | static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, |
304 | const uint8_t* b_ptr, int rgb_step, int rgb_stride, |
305 | int rgb_bit_depth, uint8_t* y_ptr, int y_stride, |
306 | uint8_t* u_ptr, int u_stride, uint8_t* v_ptr, |
307 | int v_stride, int yuv_bit_depth, int width, |
308 | int height, |
309 | const SharpYuvConversionMatrix* yuv_matrix, |
310 | SharpYuvTransferFunctionType transfer_type) { |
311 | // we expand the right/bottom border if needed |
312 | const int w = (width + 1) & ~1; |
313 | const int h = (height + 1) & ~1; |
314 | const int uv_w = w >> 1; |
315 | const int uv_h = h >> 1; |
316 | const int y_bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); |
317 | uint64_t prev_diff_y_sum = ~0; |
318 | int j, iter; |
319 | |
320 | // TODO(skal): allocate one big memory chunk. But for now, it's easier |
321 | // for valgrind debugging to have several chunks. |
322 | fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch |
323 | fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); |
324 | fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); |
325 | fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); |
326 | fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); |
327 | fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); |
328 | fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); |
329 | fixed_y_t* best_y = best_y_base; |
330 | fixed_y_t* target_y = target_y_base; |
331 | fixed_t* best_uv = best_uv_base; |
332 | fixed_t* target_uv = target_uv_base; |
333 | const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); |
334 | int ok; |
335 | assert(w > 0); |
336 | assert(h > 0); |
337 | |
338 | if (best_y_base == NULL || best_uv_base == NULL || |
339 | target_y_base == NULL || target_uv_base == NULL || |
340 | best_rgb_y == NULL || best_rgb_uv == NULL || |
341 | tmp_buffer == NULL) { |
342 | ok = 0; |
343 | goto End; |
344 | } |
345 | |
346 | // Import RGB samples to W/RGB representation. |
347 | for (j = 0; j < height; j += 2) { |
348 | const int is_last_row = (j == height - 1); |
349 | fixed_y_t* const src1 = tmp_buffer + 0 * w; |
350 | fixed_y_t* const src2 = tmp_buffer + 3 * w; |
351 | |
352 | // prepare two rows of input |
353 | ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, pic_width: width, |
354 | dst: src1); |
355 | if (!is_last_row) { |
356 | ImportOneRow(r_ptr: r_ptr + rgb_stride, g_ptr: g_ptr + rgb_stride, b_ptr: b_ptr + rgb_stride, |
357 | rgb_step, rgb_bit_depth, pic_width: width, dst: src2); |
358 | } else { |
359 | memcpy(dest: src2, src: src1, n: 3 * w * sizeof(*src2)); |
360 | } |
361 | StoreGray(rgb: src1, y: best_y + 0, w); |
362 | StoreGray(rgb: src2, y: best_y + w, w); |
363 | |
364 | UpdateW(src: src1, dst: target_y, w, rgb_bit_depth, transfer_type); |
365 | UpdateW(src: src2, dst: target_y + w, w, rgb_bit_depth, transfer_type); |
366 | UpdateChroma(src1, src2, dst: target_uv, uv_w, rgb_bit_depth, transfer_type); |
367 | memcpy(dest: best_uv, src: target_uv, n: 3 * uv_w * sizeof(*best_uv)); |
368 | best_y += 2 * w; |
369 | best_uv += 3 * uv_w; |
370 | target_y += 2 * w; |
371 | target_uv += 3 * uv_w; |
372 | r_ptr += 2 * rgb_stride; |
373 | g_ptr += 2 * rgb_stride; |
374 | b_ptr += 2 * rgb_stride; |
375 | } |
376 | |
377 | // Iterate and resolve clipping conflicts. |
378 | for (iter = 0; iter < kNumIterations; ++iter) { |
379 | const fixed_t* cur_uv = best_uv_base; |
380 | const fixed_t* prev_uv = best_uv_base; |
381 | uint64_t diff_y_sum = 0; |
382 | |
383 | best_y = best_y_base; |
384 | best_uv = best_uv_base; |
385 | target_y = target_y_base; |
386 | target_uv = target_uv_base; |
387 | j = 0; |
388 | do { |
389 | fixed_y_t* const src1 = tmp_buffer + 0 * w; |
390 | fixed_y_t* const src2 = tmp_buffer + 3 * w; |
391 | { |
392 | const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); |
393 | InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, |
394 | out1: src1, out2: src2, rgb_bit_depth); |
395 | prev_uv = cur_uv; |
396 | cur_uv = next_uv; |
397 | } |
398 | |
399 | UpdateW(src: src1, dst: best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type); |
400 | UpdateW(src: src2, dst: best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type); |
401 | UpdateChroma(src1, src2, dst: best_rgb_uv, uv_w, rgb_bit_depth, transfer_type); |
402 | |
403 | // update two rows of Y and one row of RGB |
404 | diff_y_sum += |
405 | SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, y_bit_depth); |
406 | SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); |
407 | |
408 | best_y += 2 * w; |
409 | best_uv += 3 * uv_w; |
410 | target_y += 2 * w; |
411 | target_uv += 3 * uv_w; |
412 | j += 2; |
413 | } while (j < h); |
414 | // test exit condition |
415 | if (iter > 0) { |
416 | if (diff_y_sum < diff_y_threshold) break; |
417 | if (diff_y_sum > prev_diff_y_sum) break; |
418 | } |
419 | prev_diff_y_sum = diff_y_sum; |
420 | } |
421 | |
422 | // final reconstruction |
423 | ok = ConvertWRGBToYUV(best_y: best_y_base, best_uv: best_uv_base, y_ptr, y_stride, u_ptr, |
424 | u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth, |
425 | width, height, yuv_matrix); |
426 | |
427 | End: |
428 | free(ptr: best_y_base); |
429 | free(ptr: best_uv_base); |
430 | free(ptr: target_y_base); |
431 | free(ptr: target_uv_base); |
432 | free(ptr: best_rgb_y); |
433 | free(ptr: best_rgb_uv); |
434 | free(ptr: tmp_buffer); |
435 | return ok; |
436 | } |
437 | |
438 | #undef SAFE_ALLOC |
439 | |
440 | #if defined(WEBP_USE_THREAD) && !defined(_WIN32) |
441 | #include <pthread.h> // NOLINT |
442 | |
443 | #define LOCK_ACCESS \ |
444 | static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \ |
445 | if (pthread_mutex_lock(&sharpyuv_lock)) return |
446 | #define UNLOCK_ACCESS_AND_RETURN \ |
447 | do { \ |
448 | (void)pthread_mutex_unlock(&sharpyuv_lock); \ |
449 | return; \ |
450 | } while (0) |
451 | #else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) |
452 | #define LOCK_ACCESS do {} while (0) |
453 | #define UNLOCK_ACCESS_AND_RETURN return |
454 | #endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) |
455 | |
456 | // Hidden exported init function. |
457 | // By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed, |
458 | // users can declare it as extern and call it with an alternate VP8CPUInfo |
459 | // function. |
460 | extern VP8CPUInfo SharpYuvGetCPUInfo; |
461 | SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func); |
462 | void SharpYuvInit(VP8CPUInfo cpu_info_func) { |
463 | static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = |
464 | (VP8CPUInfo)&sharpyuv_last_cpuinfo_used; |
465 | LOCK_ACCESS; |
466 | // Only update SharpYuvGetCPUInfo when called from external code to avoid a |
467 | // race on reading the value in SharpYuvConvert(). |
468 | if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) { |
469 | SharpYuvGetCPUInfo = cpu_info_func; |
470 | } |
471 | if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) { |
472 | UNLOCK_ACCESS_AND_RETURN; |
473 | } |
474 | |
475 | SharpYuvInitDsp(); |
476 | SharpYuvInitGammaTables(); |
477 | |
478 | sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo; |
479 | UNLOCK_ACCESS_AND_RETURN; |
480 | } |
481 | |
482 | int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr, |
483 | int rgb_step, int rgb_stride, int rgb_bit_depth, |
484 | void* y_ptr, int y_stride, void* u_ptr, int u_stride, |
485 | void* v_ptr, int v_stride, int yuv_bit_depth, int width, |
486 | int height, const SharpYuvConversionMatrix* yuv_matrix) { |
487 | SharpYuvOptions options; |
488 | options.yuv_matrix = yuv_matrix; |
489 | options.transfer_type = kSharpYuvTransferFunctionSrgb; |
490 | return SharpYuvConvertWithOptions( |
491 | r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride, |
492 | u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, options: &options); |
493 | } |
494 | |
495 | int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix, |
496 | SharpYuvOptions* options, int version) { |
497 | const int major = (version >> 24); |
498 | const int minor = (version >> 16) & 0xff; |
499 | if (options == NULL || yuv_matrix == NULL || |
500 | (major == SHARPYUV_VERSION_MAJOR && major == 0 && |
501 | minor != SHARPYUV_VERSION_MINOR) || |
502 | (major != SHARPYUV_VERSION_MAJOR)) { |
503 | return 0; |
504 | } |
505 | options->yuv_matrix = yuv_matrix; |
506 | options->transfer_type = kSharpYuvTransferFunctionSrgb; |
507 | return 1; |
508 | } |
509 | |
510 | int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr, |
511 | const void* b_ptr, int rgb_step, int rgb_stride, |
512 | int rgb_bit_depth, void* y_ptr, int y_stride, |
513 | void* u_ptr, int u_stride, void* v_ptr, |
514 | int v_stride, int yuv_bit_depth, int width, |
515 | int height, const SharpYuvOptions* options) { |
516 | const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix; |
517 | SharpYuvTransferFunctionType transfer_type = options->transfer_type; |
518 | SharpYuvConversionMatrix scaled_matrix; |
519 | const int rgb_max = (1 << rgb_bit_depth) - 1; |
520 | const int rgb_round = 1 << (rgb_bit_depth - 1); |
521 | const int yuv_max = (1 << yuv_bit_depth) - 1; |
522 | const int sfix = GetPrecisionShift(rgb_bit_depth); |
523 | |
524 | if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX || |
525 | r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL || |
526 | u_ptr == NULL || v_ptr == NULL) { |
527 | return 0; |
528 | } |
529 | if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 && |
530 | rgb_bit_depth != 16) { |
531 | return 0; |
532 | } |
533 | if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) { |
534 | return 0; |
535 | } |
536 | if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride % 2 != 0)) { |
537 | // Step/stride should be even for uint16_t buffers. |
538 | return 0; |
539 | } |
540 | if (yuv_bit_depth > 8 && |
541 | (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) { |
542 | // Stride should be even for uint16_t buffers. |
543 | return 0; |
544 | } |
545 | // The address of the function pointer is used to avoid a read race. |
546 | SharpYuvInit(cpu_info_func: (VP8CPUInfo)&SharpYuvGetCPUInfo); |
547 | |
548 | // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the |
549 | // rgb->yuv conversion matrix. |
550 | if (rgb_bit_depth == yuv_bit_depth) { |
551 | memcpy(dest: &scaled_matrix, src: yuv_matrix, n: sizeof(scaled_matrix)); |
552 | } else { |
553 | int i; |
554 | for (i = 0; i < 3; ++i) { |
555 | scaled_matrix.rgb_to_y[i] = |
556 | (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max; |
557 | scaled_matrix.rgb_to_u[i] = |
558 | (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max; |
559 | scaled_matrix.rgb_to_v[i] = |
560 | (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max; |
561 | } |
562 | } |
563 | // Also incorporate precision change scaling. |
564 | scaled_matrix.rgb_to_y[3] = Shift(v: yuv_matrix->rgb_to_y[3], shift: sfix); |
565 | scaled_matrix.rgb_to_u[3] = Shift(v: yuv_matrix->rgb_to_u[3], shift: sfix); |
566 | scaled_matrix.rgb_to_v[3] = Shift(v: yuv_matrix->rgb_to_v[3], shift: sfix); |
567 | |
568 | return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, |
569 | rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride, |
570 | v_ptr, v_stride, yuv_bit_depth, width, height, |
571 | yuv_matrix: &scaled_matrix, transfer_type); |
572 | } |
573 | |
574 | //------------------------------------------------------------------------------ |
575 | |