| 1 | // Copyright 2022 Google Inc. All Rights Reserved. | 
| 2 | // | 
| 3 | // Use of this source code is governed by a BSD-style license | 
| 4 | // that can be found in the COPYING file in the root of the source | 
| 5 | // tree. An additional intellectual property rights grant can be found | 
| 6 | // in the file PATENTS. All contributing project authors may | 
| 7 | // be found in the AUTHORS file in the root of the source tree. | 
| 8 | // ----------------------------------------------------------------------------- | 
| 9 | // | 
| 10 | // Sharp RGB to YUV conversion. | 
| 11 | // | 
| 12 | // Author: Skal (pascal.massimino@gmail.com) | 
| 13 |  | 
| 14 | #include "sharpyuv/sharpyuv.h" | 
| 15 |  | 
| 16 | #include <assert.h> | 
| 17 | #include <limits.h> | 
| 18 | #include <stddef.h> | 
| 19 | #include <stdlib.h> | 
| 20 | #include <string.h> | 
| 21 |  | 
| 22 | #include "src/webp/types.h" | 
| 23 | #include "sharpyuv/sharpyuv_cpu.h" | 
| 24 | #include "sharpyuv/sharpyuv_dsp.h" | 
| 25 | #include "sharpyuv/sharpyuv_gamma.h" | 
| 26 |  | 
| 27 | //------------------------------------------------------------------------------ | 
| 28 |  | 
| 29 | int SharpYuvGetVersion(void) { | 
| 30 |   return SHARPYUV_VERSION; | 
| 31 | } | 
| 32 |  | 
| 33 | //------------------------------------------------------------------------------ | 
| 34 | // Sharp RGB->YUV conversion | 
| 35 |  | 
| 36 | static const int kNumIterations = 4; | 
| 37 |  | 
| 38 | #define YUV_FIX 16  // fixed-point precision for RGB->YUV | 
| 39 | static const int kYuvHalf = 1 << (YUV_FIX - 1); | 
| 40 |  | 
| 41 | // Max bit depth so that intermediate calculations fit in 16 bits. | 
| 42 | static const int kMaxBitDepth = 14; | 
| 43 |  | 
| 44 | // Returns the precision shift to use based on the input rgb_bit_depth. | 
| 45 | static int GetPrecisionShift(int rgb_bit_depth) { | 
| 46 |   // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove | 
| 47 |   // bits if needed. | 
| 48 |   return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2 | 
| 49 |                                                : (kMaxBitDepth - rgb_bit_depth); | 
| 50 | } | 
| 51 |  | 
| 52 | typedef int16_t fixed_t;      // signed type with extra precision for UV | 
| 53 | typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W | 
| 54 |  | 
| 55 | //------------------------------------------------------------------------------ | 
| 56 |  | 
| 57 | static uint8_t clip_8b(fixed_t v) { | 
| 58 |   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; | 
| 59 | } | 
| 60 |  | 
| 61 | static uint16_t clip(fixed_t v, int max) { | 
| 62 |   return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; | 
| 63 | } | 
| 64 |  | 
| 65 | static fixed_y_t clip_bit_depth(int y, int bit_depth) { | 
| 66 |   const int max = (1 << bit_depth) - 1; | 
| 67 |   return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max; | 
| 68 | } | 
| 69 |  | 
| 70 | //------------------------------------------------------------------------------ | 
| 71 |  | 
| 72 | static int RGBToGray(int64_t r, int64_t g, int64_t b) { | 
| 73 |   const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf; | 
| 74 |   return (int)(luma >> YUV_FIX); | 
| 75 | } | 
| 76 |  | 
| 77 | static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d, | 
| 78 |                           int rgb_bit_depth, | 
| 79 |                           SharpYuvTransferFunctionType transfer_type) { | 
| 80 |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); | 
| 81 |   const uint32_t A = SharpYuvGammaToLinear(v: a, bit_depth, transfer_type); | 
| 82 |   const uint32_t B = SharpYuvGammaToLinear(v: b, bit_depth, transfer_type); | 
| 83 |   const uint32_t C = SharpYuvGammaToLinear(v: c, bit_depth, transfer_type); | 
| 84 |   const uint32_t D = SharpYuvGammaToLinear(v: d, bit_depth, transfer_type); | 
| 85 |   return SharpYuvLinearToGamma(value: (A + B + C + D + 2) >> 2, bit_depth, | 
| 86 |                                transfer_type); | 
| 87 | } | 
| 88 |  | 
| 89 | static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w, | 
| 90 |                                 int rgb_bit_depth, | 
| 91 |                                 SharpYuvTransferFunctionType transfer_type) { | 
| 92 |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); | 
| 93 |   int i = 0; | 
| 94 |   do { | 
| 95 |     const uint32_t R = | 
| 96 |         SharpYuvGammaToLinear(v: src[0 * w + i], bit_depth, transfer_type); | 
| 97 |     const uint32_t G = | 
| 98 |         SharpYuvGammaToLinear(v: src[1 * w + i], bit_depth, transfer_type); | 
| 99 |     const uint32_t B = | 
| 100 |         SharpYuvGammaToLinear(v: src[2 * w + i], bit_depth, transfer_type); | 
| 101 |     const uint32_t Y = RGBToGray(r: R, g: G, b: B); | 
| 102 |     dst[i] = (fixed_y_t)SharpYuvLinearToGamma(value: Y, bit_depth, transfer_type); | 
| 103 |   } while (++i < w); | 
| 104 | } | 
| 105 |  | 
| 106 | static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, | 
| 107 |                          fixed_t* dst, int uv_w, int rgb_bit_depth, | 
| 108 |                          SharpYuvTransferFunctionType transfer_type) { | 
| 109 |   int i = 0; | 
| 110 |   do { | 
| 111 |     const int r = | 
| 112 |         ScaleDown(a: src1[0 * uv_w + 0], b: src1[0 * uv_w + 1], c: src2[0 * uv_w + 0], | 
| 113 |                   d: src2[0 * uv_w + 1], rgb_bit_depth, transfer_type); | 
| 114 |     const int g = | 
| 115 |         ScaleDown(a: src1[2 * uv_w + 0], b: src1[2 * uv_w + 1], c: src2[2 * uv_w + 0], | 
| 116 |                   d: src2[2 * uv_w + 1], rgb_bit_depth, transfer_type); | 
| 117 |     const int b = | 
| 118 |         ScaleDown(a: src1[4 * uv_w + 0], b: src1[4 * uv_w + 1], c: src2[4 * uv_w + 0], | 
| 119 |                   d: src2[4 * uv_w + 1], rgb_bit_depth, transfer_type); | 
| 120 |     const int W = RGBToGray(r, g, b); | 
| 121 |     dst[0 * uv_w] = (fixed_t)(r - W); | 
| 122 |     dst[1 * uv_w] = (fixed_t)(g - W); | 
| 123 |     dst[2 * uv_w] = (fixed_t)(b - W); | 
| 124 |     dst  += 1; | 
| 125 |     src1 += 2; | 
| 126 |     src2 += 2; | 
| 127 |   } while (++i < uv_w); | 
| 128 | } | 
| 129 |  | 
| 130 | static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { | 
| 131 |   int i = 0; | 
| 132 |   assert(w > 0); | 
| 133 |   do { | 
| 134 |     y[i] = RGBToGray(r: rgb[0 * w + i], g: rgb[1 * w + i], b: rgb[2 * w + i]); | 
| 135 |   } while (++i < w); | 
| 136 | } | 
| 137 |  | 
| 138 | //------------------------------------------------------------------------------ | 
| 139 |  | 
| 140 | static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) { | 
| 141 |   const int v0 = (A * 3 + B + 2) >> 2; | 
| 142 |   return clip_bit_depth(y: v0 + W0, bit_depth); | 
| 143 | } | 
| 144 |  | 
| 145 | //------------------------------------------------------------------------------ | 
| 146 |  | 
| 147 | static WEBP_INLINE int Shift(int v, int shift) { | 
| 148 |   return (shift >= 0) ? (v << shift) : (v >> -shift); | 
| 149 | } | 
| 150 |  | 
| 151 | static void ImportOneRow(const uint8_t* const r_ptr, | 
| 152 |                          const uint8_t* const g_ptr, | 
| 153 |                          const uint8_t* const b_ptr, | 
| 154 |                          int rgb_step, | 
| 155 |                          int rgb_bit_depth, | 
| 156 |                          int pic_width, | 
| 157 |                          fixed_y_t* const dst) { | 
| 158 |   // Convert the rgb_step from a number of bytes to a number of uint8_t or | 
| 159 |   // uint16_t values depending the bit depth. | 
| 160 |   const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step; | 
| 161 |   int i = 0; | 
| 162 |   const int w = (pic_width + 1) & ~1; | 
| 163 |   do { | 
| 164 |     const int off = i * step; | 
| 165 |     const int shift = GetPrecisionShift(rgb_bit_depth); | 
| 166 |     if (rgb_bit_depth == 8) { | 
| 167 |       dst[i + 0 * w] = Shift(v: r_ptr[off], shift); | 
| 168 |       dst[i + 1 * w] = Shift(v: g_ptr[off], shift); | 
| 169 |       dst[i + 2 * w] = Shift(v: b_ptr[off], shift); | 
| 170 |     } else { | 
| 171 |       dst[i + 0 * w] = Shift(v: ((uint16_t*)r_ptr)[off], shift); | 
| 172 |       dst[i + 1 * w] = Shift(v: ((uint16_t*)g_ptr)[off], shift); | 
| 173 |       dst[i + 2 * w] = Shift(v: ((uint16_t*)b_ptr)[off], shift); | 
| 174 |     } | 
| 175 |   } while (++i < pic_width); | 
| 176 |   if (pic_width & 1) {  // replicate rightmost pixel | 
| 177 |     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; | 
| 178 |     dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; | 
| 179 |     dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; | 
| 180 |   } | 
| 181 | } | 
| 182 |  | 
| 183 | static void InterpolateTwoRows(const fixed_y_t* const best_y, | 
| 184 |                                const fixed_t* prev_uv, | 
| 185 |                                const fixed_t* cur_uv, | 
| 186 |                                const fixed_t* next_uv, | 
| 187 |                                int w, | 
| 188 |                                fixed_y_t* out1, | 
| 189 |                                fixed_y_t* out2, | 
| 190 |                                int rgb_bit_depth) { | 
| 191 |   const int uv_w = w >> 1; | 
| 192 |   const int len = (w - 1) >> 1;   // length to filter | 
| 193 |   int k = 3; | 
| 194 |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); | 
| 195 |   while (k-- > 0) {   // process each R/G/B segments in turn | 
| 196 |     // special boundary case for i==0 | 
| 197 |     out1[0] = Filter2(A: cur_uv[0], B: prev_uv[0], W0: best_y[0], bit_depth); | 
| 198 |     out2[0] = Filter2(A: cur_uv[0], B: next_uv[0], W0: best_y[w], bit_depth); | 
| 199 |  | 
| 200 |     SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1, | 
| 201 |                       bit_depth); | 
| 202 |     SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1, | 
| 203 |                       bit_depth); | 
| 204 |  | 
| 205 |     // special boundary case for i == w - 1 when w is even | 
| 206 |     if (!(w & 1)) { | 
| 207 |       out1[w - 1] = Filter2(A: cur_uv[uv_w - 1], B: prev_uv[uv_w - 1], | 
| 208 |                             W0: best_y[w - 1 + 0], bit_depth); | 
| 209 |       out2[w - 1] = Filter2(A: cur_uv[uv_w - 1], B: next_uv[uv_w - 1], | 
| 210 |                             W0: best_y[w - 1 + w], bit_depth); | 
| 211 |     } | 
| 212 |     out1 += w; | 
| 213 |     out2 += w; | 
| 214 |     prev_uv += uv_w; | 
| 215 |     cur_uv  += uv_w; | 
| 216 |     next_uv += uv_w; | 
| 217 |   } | 
| 218 | } | 
| 219 |  | 
| 220 | static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b, | 
| 221 |                                          const int coeffs[4], int sfix) { | 
| 222 |   const int srounder = 1 << (YUV_FIX + sfix - 1); | 
| 223 |   const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + | 
| 224 |                    coeffs[3] + srounder; | 
| 225 |   return (luma >> (YUV_FIX + sfix)); | 
| 226 | } | 
| 227 |  | 
| 228 | static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, | 
| 229 |                             uint8_t* y_ptr, int y_stride, uint8_t* u_ptr, | 
| 230 |                             int u_stride, uint8_t* v_ptr, int v_stride, | 
| 231 |                             int rgb_bit_depth, | 
| 232 |                             int yuv_bit_depth, int width, int height, | 
| 233 |                             const SharpYuvConversionMatrix* yuv_matrix) { | 
| 234 |   int i, j; | 
| 235 |   const fixed_t* const best_uv_base = best_uv; | 
| 236 |   const int w = (width + 1) & ~1; | 
| 237 |   const int h = (height + 1) & ~1; | 
| 238 |   const int uv_w = w >> 1; | 
| 239 |   const int uv_h = h >> 1; | 
| 240 |   const int sfix = GetPrecisionShift(rgb_bit_depth); | 
| 241 |   const int yuv_max = (1 << yuv_bit_depth) - 1; | 
| 242 |  | 
| 243 |   best_uv = best_uv_base; | 
| 244 |   j = 0; | 
| 245 |   do { | 
| 246 |     i = 0; | 
| 247 |     do { | 
| 248 |       const int off = (i >> 1); | 
| 249 |       const int W = best_y[i]; | 
| 250 |       const int r = best_uv[off + 0 * uv_w] + W; | 
| 251 |       const int g = best_uv[off + 1 * uv_w] + W; | 
| 252 |       const int b = best_uv[off + 2 * uv_w] + W; | 
| 253 |       const int y = RGBToYUVComponent(r, g, b, coeffs: yuv_matrix->rgb_to_y, sfix); | 
| 254 |       if (yuv_bit_depth <= 8) { | 
| 255 |         y_ptr[i] = clip_8b(v: y); | 
| 256 |       } else { | 
| 257 |         ((uint16_t*)y_ptr)[i] = clip(v: y, max: yuv_max); | 
| 258 |       } | 
| 259 |     } while (++i < width); | 
| 260 |     best_y += w; | 
| 261 |     best_uv += (j & 1) * 3 * uv_w; | 
| 262 |     y_ptr += y_stride; | 
| 263 |   } while (++j < height); | 
| 264 |  | 
| 265 |   best_uv = best_uv_base; | 
| 266 |   j = 0; | 
| 267 |   do { | 
| 268 |     i = 0; | 
| 269 |     do { | 
| 270 |       // Note r, g and b values here are off by W, but a constant offset on all | 
| 271 |       // 3 components doesn't change the value of u and v with a YCbCr matrix. | 
| 272 |       const int r = best_uv[i + 0 * uv_w]; | 
| 273 |       const int g = best_uv[i + 1 * uv_w]; | 
| 274 |       const int b = best_uv[i + 2 * uv_w]; | 
| 275 |       const int u = RGBToYUVComponent(r, g, b, coeffs: yuv_matrix->rgb_to_u, sfix); | 
| 276 |       const int v = RGBToYUVComponent(r, g, b, coeffs: yuv_matrix->rgb_to_v, sfix); | 
| 277 |       if (yuv_bit_depth <= 8) { | 
| 278 |         u_ptr[i] = clip_8b(v: u); | 
| 279 |         v_ptr[i] = clip_8b(v); | 
| 280 |       } else { | 
| 281 |         ((uint16_t*)u_ptr)[i] = clip(v: u, max: yuv_max); | 
| 282 |         ((uint16_t*)v_ptr)[i] = clip(v, max: yuv_max); | 
| 283 |       } | 
| 284 |     } while (++i < uv_w); | 
| 285 |     best_uv += 3 * uv_w; | 
| 286 |     u_ptr += u_stride; | 
| 287 |     v_ptr += v_stride; | 
| 288 |   } while (++j < uv_h); | 
| 289 |   return 1; | 
| 290 | } | 
| 291 |  | 
| 292 | //------------------------------------------------------------------------------ | 
| 293 | // Main function | 
| 294 |  | 
| 295 | static void* SafeMalloc(uint64_t nmemb, size_t size) { | 
| 296 |   const uint64_t total_size = nmemb * (uint64_t)size; | 
| 297 |   if (total_size != (size_t)total_size) return NULL; | 
| 298 |   return malloc(size: (size_t)total_size); | 
| 299 | } | 
| 300 |  | 
| 301 | #define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((uint64_t)(W) * (H), sizeof(T))) | 
| 302 |  | 
| 303 | static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, | 
| 304 |                             const uint8_t* b_ptr, int rgb_step, int rgb_stride, | 
| 305 |                             int rgb_bit_depth, uint8_t* y_ptr, int y_stride, | 
| 306 |                             uint8_t* u_ptr, int u_stride, uint8_t* v_ptr, | 
| 307 |                             int v_stride, int yuv_bit_depth, int width, | 
| 308 |                             int height, | 
| 309 |                             const SharpYuvConversionMatrix* yuv_matrix, | 
| 310 |                             SharpYuvTransferFunctionType transfer_type) { | 
| 311 |   // we expand the right/bottom border if needed | 
| 312 |   const int w = (width + 1) & ~1; | 
| 313 |   const int h = (height + 1) & ~1; | 
| 314 |   const int uv_w = w >> 1; | 
| 315 |   const int uv_h = h >> 1; | 
| 316 |   const int y_bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); | 
| 317 |   uint64_t prev_diff_y_sum = ~0; | 
| 318 |   int j, iter; | 
| 319 |  | 
| 320 |   // TODO(skal): allocate one big memory chunk. But for now, it's easier | 
| 321 |   // for valgrind debugging to have several chunks. | 
| 322 |   fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch | 
| 323 |   fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); | 
| 324 |   fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); | 
| 325 |   fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); | 
| 326 |   fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); | 
| 327 |   fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); | 
| 328 |   fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); | 
| 329 |   fixed_y_t* best_y = best_y_base; | 
| 330 |   fixed_y_t* target_y = target_y_base; | 
| 331 |   fixed_t* best_uv = best_uv_base; | 
| 332 |   fixed_t* target_uv = target_uv_base; | 
| 333 |   const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); | 
| 334 |   int ok; | 
| 335 |   assert(w > 0); | 
| 336 |   assert(h > 0); | 
| 337 |  | 
| 338 |   if (best_y_base == NULL || best_uv_base == NULL || | 
| 339 |       target_y_base == NULL || target_uv_base == NULL || | 
| 340 |       best_rgb_y == NULL || best_rgb_uv == NULL || | 
| 341 |       tmp_buffer == NULL) { | 
| 342 |     ok = 0; | 
| 343 |     goto End; | 
| 344 |   } | 
| 345 |  | 
| 346 |   // Import RGB samples to W/RGB representation. | 
| 347 |   for (j = 0; j < height; j += 2) { | 
| 348 |     const int is_last_row = (j == height - 1); | 
| 349 |     fixed_y_t* const src1 = tmp_buffer + 0 * w; | 
| 350 |     fixed_y_t* const src2 = tmp_buffer + 3 * w; | 
| 351 |  | 
| 352 |     // prepare two rows of input | 
| 353 |     ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, pic_width: width, | 
| 354 |                  dst: src1); | 
| 355 |     if (!is_last_row) { | 
| 356 |       ImportOneRow(r_ptr: r_ptr + rgb_stride, g_ptr: g_ptr + rgb_stride, b_ptr: b_ptr + rgb_stride, | 
| 357 |                    rgb_step, rgb_bit_depth, pic_width: width, dst: src2); | 
| 358 |     } else { | 
| 359 |       memcpy(dest: src2, src: src1, n: 3 * w * sizeof(*src2)); | 
| 360 |     } | 
| 361 |     StoreGray(rgb: src1, y: best_y + 0, w); | 
| 362 |     StoreGray(rgb: src2, y: best_y + w, w); | 
| 363 |  | 
| 364 |     UpdateW(src: src1, dst: target_y, w, rgb_bit_depth, transfer_type); | 
| 365 |     UpdateW(src: src2, dst: target_y + w, w, rgb_bit_depth, transfer_type); | 
| 366 |     UpdateChroma(src1, src2, dst: target_uv, uv_w, rgb_bit_depth, transfer_type); | 
| 367 |     memcpy(dest: best_uv, src: target_uv, n: 3 * uv_w * sizeof(*best_uv)); | 
| 368 |     best_y += 2 * w; | 
| 369 |     best_uv += 3 * uv_w; | 
| 370 |     target_y += 2 * w; | 
| 371 |     target_uv += 3 * uv_w; | 
| 372 |     r_ptr += 2 * rgb_stride; | 
| 373 |     g_ptr += 2 * rgb_stride; | 
| 374 |     b_ptr += 2 * rgb_stride; | 
| 375 |   } | 
| 376 |  | 
| 377 |   // Iterate and resolve clipping conflicts. | 
| 378 |   for (iter = 0; iter < kNumIterations; ++iter) { | 
| 379 |     const fixed_t* cur_uv = best_uv_base; | 
| 380 |     const fixed_t* prev_uv = best_uv_base; | 
| 381 |     uint64_t diff_y_sum = 0; | 
| 382 |  | 
| 383 |     best_y = best_y_base; | 
| 384 |     best_uv = best_uv_base; | 
| 385 |     target_y = target_y_base; | 
| 386 |     target_uv = target_uv_base; | 
| 387 |     j = 0; | 
| 388 |     do { | 
| 389 |       fixed_y_t* const src1 = tmp_buffer + 0 * w; | 
| 390 |       fixed_y_t* const src2 = tmp_buffer + 3 * w; | 
| 391 |       { | 
| 392 |         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); | 
| 393 |         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, | 
| 394 |                            out1: src1, out2: src2, rgb_bit_depth); | 
| 395 |         prev_uv = cur_uv; | 
| 396 |         cur_uv = next_uv; | 
| 397 |       } | 
| 398 |  | 
| 399 |       UpdateW(src: src1, dst: best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type); | 
| 400 |       UpdateW(src: src2, dst: best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type); | 
| 401 |       UpdateChroma(src1, src2, dst: best_rgb_uv, uv_w, rgb_bit_depth, transfer_type); | 
| 402 |  | 
| 403 |       // update two rows of Y and one row of RGB | 
| 404 |       diff_y_sum += | 
| 405 |           SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, y_bit_depth); | 
| 406 |       SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); | 
| 407 |  | 
| 408 |       best_y += 2 * w; | 
| 409 |       best_uv += 3 * uv_w; | 
| 410 |       target_y += 2 * w; | 
| 411 |       target_uv += 3 * uv_w; | 
| 412 |       j += 2; | 
| 413 |     } while (j < h); | 
| 414 |     // test exit condition | 
| 415 |     if (iter > 0) { | 
| 416 |       if (diff_y_sum < diff_y_threshold) break; | 
| 417 |       if (diff_y_sum > prev_diff_y_sum) break; | 
| 418 |     } | 
| 419 |     prev_diff_y_sum = diff_y_sum; | 
| 420 |   } | 
| 421 |  | 
| 422 |   // final reconstruction | 
| 423 |   ok = ConvertWRGBToYUV(best_y: best_y_base, best_uv: best_uv_base, y_ptr, y_stride, u_ptr, | 
| 424 |                         u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth, | 
| 425 |                         width, height, yuv_matrix); | 
| 426 |  | 
| 427 |  End: | 
| 428 |   free(ptr: best_y_base); | 
| 429 |   free(ptr: best_uv_base); | 
| 430 |   free(ptr: target_y_base); | 
| 431 |   free(ptr: target_uv_base); | 
| 432 |   free(ptr: best_rgb_y); | 
| 433 |   free(ptr: best_rgb_uv); | 
| 434 |   free(ptr: tmp_buffer); | 
| 435 |   return ok; | 
| 436 | } | 
| 437 |  | 
| 438 | #undef SAFE_ALLOC | 
| 439 |  | 
| 440 | #if defined(WEBP_USE_THREAD) && !defined(_WIN32) | 
| 441 | #include <pthread.h>  // NOLINT | 
| 442 |  | 
| 443 | #define LOCK_ACCESS \ | 
| 444 |     static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \ | 
| 445 |     if (pthread_mutex_lock(&sharpyuv_lock)) return | 
| 446 | #define UNLOCK_ACCESS_AND_RETURN                  \ | 
| 447 |     do {                                          \ | 
| 448 |       (void)pthread_mutex_unlock(&sharpyuv_lock); \ | 
| 449 |       return;                                     \ | 
| 450 |     } while (0) | 
| 451 | #else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) | 
| 452 | #define LOCK_ACCESS do {} while (0) | 
| 453 | #define UNLOCK_ACCESS_AND_RETURN return | 
| 454 | #endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32) | 
| 455 |  | 
| 456 | // Hidden exported init function. | 
| 457 | // By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed, | 
| 458 | // users can declare it as extern and call it with an alternate VP8CPUInfo | 
| 459 | // function. | 
| 460 | extern VP8CPUInfo SharpYuvGetCPUInfo; | 
| 461 | SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func); | 
| 462 | void SharpYuvInit(VP8CPUInfo cpu_info_func) { | 
| 463 |   static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = | 
| 464 |       (VP8CPUInfo)&sharpyuv_last_cpuinfo_used; | 
| 465 |   LOCK_ACCESS; | 
| 466 |   // Only update SharpYuvGetCPUInfo when called from external code to avoid a | 
| 467 |   // race on reading the value in SharpYuvConvert(). | 
| 468 |   if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) { | 
| 469 |     SharpYuvGetCPUInfo = cpu_info_func; | 
| 470 |   } | 
| 471 |   if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) { | 
| 472 |     UNLOCK_ACCESS_AND_RETURN; | 
| 473 |   } | 
| 474 |  | 
| 475 |   SharpYuvInitDsp(); | 
| 476 |   SharpYuvInitGammaTables(); | 
| 477 |  | 
| 478 |   sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo; | 
| 479 |   UNLOCK_ACCESS_AND_RETURN; | 
| 480 | } | 
| 481 |  | 
| 482 | int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr, | 
| 483 |                     int rgb_step, int rgb_stride, int rgb_bit_depth, | 
| 484 |                     void* y_ptr, int y_stride, void* u_ptr, int u_stride, | 
| 485 |                     void* v_ptr, int v_stride, int yuv_bit_depth, int width, | 
| 486 |                     int height, const SharpYuvConversionMatrix* yuv_matrix) { | 
| 487 |   SharpYuvOptions options; | 
| 488 |   options.yuv_matrix = yuv_matrix; | 
| 489 |   options.transfer_type = kSharpYuvTransferFunctionSrgb; | 
| 490 |   return SharpYuvConvertWithOptions( | 
| 491 |       r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride, | 
| 492 |       u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, options: &options); | 
| 493 | } | 
| 494 |  | 
| 495 | int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix, | 
| 496 |                                 SharpYuvOptions* options, int version) { | 
| 497 |   const int major = (version >> 24); | 
| 498 |   const int minor = (version >> 16) & 0xff; | 
| 499 |   if (options == NULL || yuv_matrix == NULL || | 
| 500 |       (major == SHARPYUV_VERSION_MAJOR && major == 0 && | 
| 501 |        minor != SHARPYUV_VERSION_MINOR) || | 
| 502 |       (major != SHARPYUV_VERSION_MAJOR)) { | 
| 503 |     return 0; | 
| 504 |   } | 
| 505 |   options->yuv_matrix = yuv_matrix; | 
| 506 |   options->transfer_type = kSharpYuvTransferFunctionSrgb; | 
| 507 |   return 1; | 
| 508 | } | 
| 509 |  | 
| 510 | int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr, | 
| 511 |                                const void* b_ptr, int rgb_step, int rgb_stride, | 
| 512 |                                int rgb_bit_depth, void* y_ptr, int y_stride, | 
| 513 |                                void* u_ptr, int u_stride, void* v_ptr, | 
| 514 |                                int v_stride, int yuv_bit_depth, int width, | 
| 515 |                                int height, const SharpYuvOptions* options) { | 
| 516 |   const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix; | 
| 517 |   SharpYuvTransferFunctionType transfer_type = options->transfer_type; | 
| 518 |   SharpYuvConversionMatrix scaled_matrix; | 
| 519 |   const int rgb_max = (1 << rgb_bit_depth) - 1; | 
| 520 |   const int rgb_round = 1 << (rgb_bit_depth - 1); | 
| 521 |   const int yuv_max = (1 << yuv_bit_depth) - 1; | 
| 522 |   const int sfix = GetPrecisionShift(rgb_bit_depth); | 
| 523 |  | 
| 524 |   if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX || | 
| 525 |       r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL || | 
| 526 |       u_ptr == NULL || v_ptr == NULL) { | 
| 527 |     return 0; | 
| 528 |   } | 
| 529 |   if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 && | 
| 530 |       rgb_bit_depth != 16) { | 
| 531 |     return 0; | 
| 532 |   } | 
| 533 |   if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) { | 
| 534 |     return 0; | 
| 535 |   } | 
| 536 |   if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride % 2 != 0)) { | 
| 537 |     // Step/stride should be even for uint16_t buffers. | 
| 538 |     return 0; | 
| 539 |   } | 
| 540 |   if (yuv_bit_depth > 8 && | 
| 541 |       (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) { | 
| 542 |     // Stride should be even for uint16_t buffers. | 
| 543 |     return 0; | 
| 544 |   } | 
| 545 |   // The address of the function pointer is used to avoid a read race. | 
| 546 |   SharpYuvInit(cpu_info_func: (VP8CPUInfo)&SharpYuvGetCPUInfo); | 
| 547 |  | 
| 548 |   // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the | 
| 549 |   // rgb->yuv conversion matrix. | 
| 550 |   if (rgb_bit_depth == yuv_bit_depth) { | 
| 551 |     memcpy(dest: &scaled_matrix, src: yuv_matrix, n: sizeof(scaled_matrix)); | 
| 552 |   } else { | 
| 553 |     int i; | 
| 554 |     for (i = 0; i < 3; ++i) { | 
| 555 |       scaled_matrix.rgb_to_y[i] = | 
| 556 |           (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max; | 
| 557 |       scaled_matrix.rgb_to_u[i] = | 
| 558 |           (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max; | 
| 559 |       scaled_matrix.rgb_to_v[i] = | 
| 560 |           (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max; | 
| 561 |     } | 
| 562 |   } | 
| 563 |   // Also incorporate precision change scaling. | 
| 564 |   scaled_matrix.rgb_to_y[3] = Shift(v: yuv_matrix->rgb_to_y[3], shift: sfix); | 
| 565 |   scaled_matrix.rgb_to_u[3] = Shift(v: yuv_matrix->rgb_to_u[3], shift: sfix); | 
| 566 |   scaled_matrix.rgb_to_v[3] = Shift(v: yuv_matrix->rgb_to_v[3], shift: sfix); | 
| 567 |  | 
| 568 |   return DoSharpArgbToYuv( | 
| 569 |       r_ptr: (const uint8_t*)r_ptr, g_ptr: (const uint8_t*)g_ptr, b_ptr: (const uint8_t*)b_ptr, | 
| 570 |       rgb_step, rgb_stride, rgb_bit_depth, y_ptr: (uint8_t*)y_ptr, y_stride, | 
| 571 |       u_ptr: (uint8_t*)u_ptr, u_stride, v_ptr: (uint8_t*)v_ptr, v_stride, yuv_bit_depth, | 
| 572 |       width, height, yuv_matrix: &scaled_matrix, transfer_type); | 
| 573 | } | 
| 574 |  | 
| 575 | //------------------------------------------------------------------------------ | 
| 576 |  |