1 | // Copyright (C) 2022 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qcolortransform.h" |
5 | #include "qcolortransform_p.h" |
6 | |
7 | #include "qcolormatrix_p.h" |
8 | #include "qcolorspace_p.h" |
9 | #include "qcolortrc_p.h" |
10 | #include "qcolortrclut_p.h" |
11 | |
12 | #include <QtCore/qatomic.h> |
13 | #include <QtCore/qmath.h> |
14 | #include <QtGui/qcolor.h> |
15 | #include <QtGui/qimage.h> |
16 | #include <QtGui/qtransform.h> |
17 | #include <QtCore/private/qsimd_p.h> |
18 | |
19 | #include <qdebug.h> |
20 | |
21 | QT_BEGIN_NAMESPACE |
22 | |
23 | std::shared_ptr<QColorTrcLut> lutFromTrc(const QColorTrc &trc) |
24 | { |
25 | if (trc.m_type == QColorTrc::Type::Table) |
26 | return QColorTrcLut::fromTransferTable(transTable: trc.m_table); |
27 | if (trc.m_type == QColorTrc::Type::Function) |
28 | return QColorTrcLut::fromTransferFunction(transfn: trc.m_fun); |
29 | qWarning() << "TRC uninitialized" ; |
30 | return nullptr; |
31 | } |
32 | |
33 | void QColorTransformPrivate::updateLutsIn() const |
34 | { |
35 | if (colorSpaceIn->lut.generated.loadAcquire()) |
36 | return; |
37 | QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock); |
38 | if (colorSpaceIn->lut.generated.loadRelaxed()) |
39 | return; |
40 | |
41 | for (int i = 0; i < 3; ++i) { |
42 | if (!colorSpaceIn->trc[i].isValid()) |
43 | return; |
44 | } |
45 | |
46 | if (colorSpaceIn->trc[0] == colorSpaceIn->trc[1] && colorSpaceIn->trc[0] == colorSpaceIn->trc[2]) { |
47 | colorSpaceIn->lut[0] = lutFromTrc(trc: colorSpaceIn->trc[0]); |
48 | colorSpaceIn->lut[1] = colorSpaceIn->lut[0]; |
49 | colorSpaceIn->lut[2] = colorSpaceIn->lut[0]; |
50 | } else { |
51 | for (int i = 0; i < 3; ++i) |
52 | colorSpaceIn->lut[i] = lutFromTrc(trc: colorSpaceIn->trc[i]); |
53 | } |
54 | |
55 | colorSpaceIn->lut.generated.storeRelease(newValue: 1); |
56 | } |
57 | |
58 | void QColorTransformPrivate::updateLutsOut() const |
59 | { |
60 | if (colorSpaceOut->lut.generated.loadAcquire()) |
61 | return; |
62 | QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock); |
63 | if (colorSpaceOut->lut.generated.loadRelaxed()) |
64 | return; |
65 | for (int i = 0; i < 3; ++i) { |
66 | if (!colorSpaceOut->trc[i].isValid()) |
67 | return; |
68 | } |
69 | |
70 | if (colorSpaceOut->trc[0] == colorSpaceOut->trc[1] && colorSpaceOut->trc[0] == colorSpaceOut->trc[2]) { |
71 | colorSpaceOut->lut[0] = lutFromTrc(trc: colorSpaceOut->trc[0]); |
72 | colorSpaceOut->lut[1] = colorSpaceOut->lut[0]; |
73 | colorSpaceOut->lut[2] = colorSpaceOut->lut[0]; |
74 | } else { |
75 | for (int i = 0; i < 3; ++i) |
76 | colorSpaceOut->lut[i] = lutFromTrc(trc: colorSpaceOut->trc[i]); |
77 | } |
78 | |
79 | colorSpaceOut->lut.generated.storeRelease(newValue: 1); |
80 | } |
81 | |
82 | /*! |
83 | \class QColorTransform |
84 | \brief The QColorTransform class is a transformation between color spaces. |
85 | \since 5.14 |
86 | |
87 | \ingroup painting |
88 | \ingroup appearance |
89 | \inmodule QtGui |
90 | |
91 | QColorTransform is an instantiation of a transformation between color spaces. |
92 | It can be applied on color and pixels to convert them from one color space to |
93 | another. |
94 | |
95 | Setting up a QColorTransform takes some preprocessing, so keeping around |
96 | QColorTransforms that you need often is recommended, instead of generating |
97 | them on the fly. |
98 | */ |
99 | |
100 | |
101 | QColorTransform::QColorTransform(const QColorTransform &colorTransform) noexcept = default; |
102 | |
103 | QColorTransform::~QColorTransform() = default; |
104 | |
105 | QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QColorTransformPrivate) |
106 | |
107 | /*! |
108 | \since 6.4 |
109 | Returns true if the color transform is the identity transform. |
110 | */ |
111 | bool QColorTransform::isIdentity() const noexcept |
112 | { |
113 | return !d || d->isIdentity(); |
114 | } |
115 | |
116 | /*! |
117 | \fn bool QColorTransform::operator==(const QColorTransform &ct1, const QColorTransform &ct2) |
118 | \since 6.4 |
119 | Returns true if \a ct1 defines the same color transformation as \a ct2. |
120 | */ |
121 | |
122 | /*! |
123 | \fn bool QColorTransform::operator!=(const QColorTransform &ct1, const QColorTransform &ct2) |
124 | \since 6.4 |
125 | Returns true if \a ct1 does not define the same transformation as \a ct2. |
126 | */ |
127 | |
128 | /*! \internal |
129 | */ |
130 | bool QColorTransform::compare(const QColorTransform &other) const |
131 | { |
132 | if (d == other.d) |
133 | return true; |
134 | if (bool(d) != bool(other.d)) |
135 | return d ? d->isIdentity() : other.d->isIdentity(); |
136 | if (d->colorMatrix != other.d->colorMatrix) |
137 | return false; |
138 | if (bool(d->colorSpaceIn) != bool(other.d->colorSpaceIn)) |
139 | return false; |
140 | if (bool(d->colorSpaceOut) != bool(other.d->colorSpaceOut)) |
141 | return false; |
142 | for (int i = 0; i < 3; ++i) { |
143 | if (d->colorSpaceIn && d->colorSpaceIn->trc[i] != other.d->colorSpaceIn->trc[i]) |
144 | return false; |
145 | if (d->colorSpaceOut && d->colorSpaceOut->trc[i] != other.d->colorSpaceOut->trc[i]) |
146 | return false; |
147 | } |
148 | return true; |
149 | } |
150 | |
151 | /*! |
152 | Applies the color transformation on the QRgb value \a argb. |
153 | |
154 | The input should be opaque or unpremultiplied. |
155 | */ |
156 | QRgb QColorTransform::map(QRgb argb) const |
157 | { |
158 | if (!d) |
159 | return argb; |
160 | constexpr float f = 1.0f / 255.0f; |
161 | QColorVector c = { qRed(rgb: argb) * f, qGreen(rgb: argb) * f, qBlue(rgb: argb) * f }; |
162 | if (d->colorSpaceIn->lut.generated.loadAcquire()) { |
163 | c.x = d->colorSpaceIn->lut[0]->toLinear(f: c.x); |
164 | c.y = d->colorSpaceIn->lut[1]->toLinear(f: c.y); |
165 | c.z = d->colorSpaceIn->lut[2]->toLinear(f: c.z); |
166 | } else { |
167 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
168 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
169 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
170 | } |
171 | c = d->colorMatrix.map(c); |
172 | c.x = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.x)); |
173 | c.y = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.y)); |
174 | c.z = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.z)); |
175 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
176 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
177 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
178 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
179 | } else { |
180 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
181 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
182 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
183 | } |
184 | |
185 | return qRgba(r: c.x * 255 + 0.5f, g: c.y * 255 + 0.5f, b: c.z * 255 + 0.5f, a: qAlpha(rgb: argb)); |
186 | } |
187 | |
188 | /*! |
189 | Applies the color transformation on the QRgba64 value \a rgba64. |
190 | |
191 | The input should be opaque or unpremultiplied. |
192 | */ |
193 | QRgba64 QColorTransform::map(QRgba64 rgba64) const |
194 | { |
195 | if (!d) |
196 | return rgba64; |
197 | constexpr float f = 1.0f / 65535.0f; |
198 | QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f }; |
199 | if (d->colorSpaceIn->lut.generated.loadAcquire()) { |
200 | c.x = d->colorSpaceIn->lut[0]->toLinear(f: c.x); |
201 | c.y = d->colorSpaceIn->lut[1]->toLinear(f: c.y); |
202 | c.z = d->colorSpaceIn->lut[2]->toLinear(f: c.z); |
203 | } else { |
204 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
205 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
206 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
207 | } |
208 | c = d->colorMatrix.map(c); |
209 | c.x = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.x)); |
210 | c.y = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.y)); |
211 | c.z = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.z)); |
212 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
213 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
214 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
215 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
216 | } else { |
217 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
218 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
219 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
220 | } |
221 | |
222 | return QRgba64::fromRgba64(red: c.x * 65535.f + 0.5f, green: c.y * 65535.f + 0.5f, blue: c.z * 65535.f + 0.5f, alpha: rgba64.alpha()); |
223 | } |
224 | |
225 | /*! |
226 | Applies the color transformation on the QRgbaFloat16 value \a rgbafp16. |
227 | |
228 | The input should be opaque or unpremultiplied. |
229 | \since 6.4 |
230 | */ |
231 | QRgbaFloat16 QColorTransform::map(QRgbaFloat16 rgbafp16) const |
232 | { |
233 | if (!d) |
234 | return rgbafp16; |
235 | QColorVector c; |
236 | c.x = d->colorSpaceIn->trc[0].applyExtended(x: rgbafp16.r); |
237 | c.y = d->colorSpaceIn->trc[1].applyExtended(x: rgbafp16.g); |
238 | c.z = d->colorSpaceIn->trc[2].applyExtended(x: rgbafp16.b); |
239 | c = d->colorMatrix.map(c); |
240 | rgbafp16.r = qfloat16(d->colorSpaceOut->trc[0].applyInverseExtended(x: c.x)); |
241 | rgbafp16.g = qfloat16(d->colorSpaceOut->trc[1].applyInverseExtended(x: c.y)); |
242 | rgbafp16.b = qfloat16(d->colorSpaceOut->trc[2].applyInverseExtended(x: c.z)); |
243 | return rgbafp16; |
244 | } |
245 | |
246 | /*! |
247 | Applies the color transformation on the QRgbaFloat32 value \a rgbafp32. |
248 | |
249 | The input should be opaque or unpremultiplied. |
250 | \since 6.4 |
251 | */ |
252 | QRgbaFloat32 QColorTransform::map(QRgbaFloat32 rgbafp32) const |
253 | { |
254 | if (!d) |
255 | return rgbafp32; |
256 | QColorVector c; |
257 | c.x = d->colorSpaceIn->trc[0].applyExtended(x: rgbafp32.r); |
258 | c.y = d->colorSpaceIn->trc[1].applyExtended(x: rgbafp32.g); |
259 | c.z = d->colorSpaceIn->trc[2].applyExtended(x: rgbafp32.b); |
260 | c = d->colorMatrix.map(c); |
261 | rgbafp32.r = d->colorSpaceOut->trc[0].applyInverseExtended(x: c.x); |
262 | rgbafp32.g = d->colorSpaceOut->trc[1].applyInverseExtended(x: c.y); |
263 | rgbafp32.b = d->colorSpaceOut->trc[2].applyInverseExtended(x: c.z); |
264 | return rgbafp32; |
265 | } |
266 | |
267 | /*! |
268 | Applies the color transformation on the QColor value \a color. |
269 | |
270 | */ |
271 | QColor QColorTransform::map(const QColor &color) const |
272 | { |
273 | if (!d) |
274 | return color; |
275 | QColor clr = color; |
276 | if (color.spec() != QColor::ExtendedRgb || color.spec() != QColor::Rgb) |
277 | clr = clr.toRgb(); |
278 | |
279 | QColorVector c = { (float)clr.redF(), (float)clr.greenF(), (float)clr.blueF() }; |
280 | if (clr.spec() == QColor::ExtendedRgb) { |
281 | c.x = d->colorSpaceIn->trc[0].applyExtended(x: c.x); |
282 | c.y = d->colorSpaceIn->trc[1].applyExtended(x: c.y); |
283 | c.z = d->colorSpaceIn->trc[2].applyExtended(x: c.z); |
284 | } else { |
285 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
286 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
287 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
288 | } |
289 | c = d->colorMatrix.map(c); |
290 | bool inGamut = c.x >= 0.0f && c.x <= 1.0f && c.y >= 0.0f && c.y <= 1.0f && c.z >= 0.0f && c.z <= 1.0f; |
291 | if (inGamut) { |
292 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
293 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
294 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
295 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
296 | } else { |
297 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
298 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
299 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
300 | } |
301 | } else { |
302 | c.x = d->colorSpaceOut->trc[0].applyInverseExtended(x: c.x); |
303 | c.y = d->colorSpaceOut->trc[1].applyInverseExtended(x: c.y); |
304 | c.z = d->colorSpaceOut->trc[2].applyInverseExtended(x: c.z); |
305 | } |
306 | QColor out; |
307 | out.setRgbF(r: c.x, g: c.y, b: c.z, a: color.alphaF()); |
308 | return out; |
309 | } |
310 | |
311 | // Optimized sub-routines for fast block based conversion: |
312 | |
313 | template<bool DoClamp = true> |
314 | static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix) |
315 | { |
316 | #if defined(__SSE2__) |
317 | const __m128 minV = _mm_set1_ps(w: 0.0f); |
318 | const __m128 maxV = _mm_set1_ps(w: 1.0f); |
319 | const __m128 xMat = _mm_loadu_ps(p: &colorMatrix.r.x); |
320 | const __m128 yMat = _mm_loadu_ps(p: &colorMatrix.g.x); |
321 | const __m128 zMat = _mm_loadu_ps(p: &colorMatrix.b.x); |
322 | for (qsizetype j = 0; j < len; ++j) { |
323 | __m128 c = _mm_loadu_ps(p: &buffer[j].x); |
324 | __m128 cx = _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)); |
325 | __m128 cy = _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)); |
326 | __m128 cz = _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)); |
327 | cx = _mm_mul_ps(a: cx, b: xMat); |
328 | cy = _mm_mul_ps(a: cy, b: yMat); |
329 | cz = _mm_mul_ps(a: cz, b: zMat); |
330 | cx = _mm_add_ps(a: cx, b: cy); |
331 | cx = _mm_add_ps(a: cx, b: cz); |
332 | // Clamp: |
333 | if (DoClamp) { |
334 | cx = _mm_min_ps(a: cx, b: maxV); |
335 | cx = _mm_max_ps(a: cx, b: minV); |
336 | } |
337 | _mm_storeu_ps(p: &buffer[j].x, a: cx); |
338 | } |
339 | #elif defined(__ARM_NEON__) |
340 | const float32x4_t minV = vdupq_n_f32(0.0f); |
341 | const float32x4_t maxV = vdupq_n_f32(1.0f); |
342 | const float32x4_t xMat = vld1q_f32(&colorMatrix.r.x); |
343 | const float32x4_t yMat = vld1q_f32(&colorMatrix.g.x); |
344 | const float32x4_t zMat = vld1q_f32(&colorMatrix.b.x); |
345 | for (qsizetype j = 0; j < len; ++j) { |
346 | float32x4_t c = vld1q_f32(&buffer[j].x); |
347 | float32x4_t cx = vmulq_n_f32(xMat, vgetq_lane_f32(c, 0)); |
348 | float32x4_t cy = vmulq_n_f32(yMat, vgetq_lane_f32(c, 1)); |
349 | float32x4_t cz = vmulq_n_f32(zMat, vgetq_lane_f32(c, 2)); |
350 | cx = vaddq_f32(cx, cy); |
351 | cx = vaddq_f32(cx, cz); |
352 | // Clamp: |
353 | if (DoClamp) { |
354 | cx = vminq_f32(cx, maxV); |
355 | cx = vmaxq_f32(cx, minV); |
356 | } |
357 | vst1q_f32(&buffer[j].x, cx); |
358 | } |
359 | #else |
360 | for (int j = 0; j < len; ++j) { |
361 | const QColorVector cv = colorMatrix.map(buffer[j]); |
362 | if (DoClamp) { |
363 | buffer[j].x = std::max(0.0f, std::min(1.0f, cv.x)); |
364 | buffer[j].y = std::max(0.0f, std::min(1.0f, cv.y)); |
365 | buffer[j].z = std::max(0.0f, std::min(1.0f, cv.z)); |
366 | } else { |
367 | buffer[j] = cv; |
368 | } |
369 | } |
370 | #endif |
371 | } |
372 | |
373 | #if defined(__SSE2__) || defined(__ARM_NEON__) |
374 | template<typename T> |
375 | static constexpr inline bool isArgb(); |
376 | template<> |
377 | constexpr inline bool isArgb<QRgb>() { return true; } |
378 | template<> |
379 | constexpr inline bool isArgb<QRgba64>() { return false; } |
380 | |
381 | template<typename T> |
382 | static inline int getAlpha(const T &p); |
383 | template<> |
384 | inline int getAlpha<QRgb>(const QRgb &p) |
385 | { return qAlpha(rgb: p); } |
386 | template<> |
387 | inline int getAlpha<QRgba64>(const QRgba64 &p) |
388 | { return p.alpha(); } |
389 | #endif |
390 | |
391 | template<typename T> |
392 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); |
393 | template<typename T> |
394 | static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); |
395 | |
396 | #if defined(__SSE2__) |
397 | // Load to [0-alpha] in 4x32 SIMD |
398 | template<typename T> |
399 | static inline void loadP(const T &p, __m128i &v); |
400 | |
401 | template<> |
402 | inline void loadP<QRgb>(const QRgb &p, __m128i &v) |
403 | { |
404 | v = _mm_cvtsi32_si128(a: p); |
405 | #if defined(__SSE4_1__) |
406 | v = _mm_cvtepu8_epi32(v); |
407 | #else |
408 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
409 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
410 | #endif |
411 | } |
412 | |
413 | template<> |
414 | inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v) |
415 | { |
416 | v = _mm_loadl_epi64(p: (const __m128i *)&p); |
417 | #if defined(__SSE4_1__) |
418 | v = _mm_cvtepu16_epi32(v); |
419 | #else |
420 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
421 | #endif |
422 | } |
423 | |
424 | template<typename T> |
425 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
426 | { |
427 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
428 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
429 | constexpr bool isARGB = isArgb<T>(); |
430 | for (qsizetype i = 0; i < len; ++i) { |
431 | __m128i v; |
432 | loadP<T>(src[i], v); |
433 | __m128 vf = _mm_cvtepi32_ps(a: v); |
434 | // Approximate 1/a: |
435 | __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3)); |
436 | __m128 via = _mm_rcp_ps(a: va); |
437 | via = _mm_sub_ps(a: _mm_add_ps(a: via, b: via), b: _mm_mul_ps(a: via, b: _mm_mul_ps(a: via, b: va))); |
438 | // v * (1/a) |
439 | vf = _mm_mul_ps(a: vf, b: via); |
440 | |
441 | // Handle zero alpha |
442 | __m128 vAlphaMask = _mm_cmpeq_ps(a: va, b: _mm_set1_ps(w: 0.0f)); |
443 | vf = _mm_andnot_ps(a: vAlphaMask, b: vf); |
444 | |
445 | // LUT |
446 | v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
447 | const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0); |
448 | const int gidx = _mm_extract_epi16(v, 2); |
449 | const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4); |
450 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
451 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
452 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
453 | vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00); |
454 | |
455 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
456 | } |
457 | } |
458 | |
459 | template<> |
460 | void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
461 | { |
462 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
463 | const __m128 viFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
464 | const __m128 vZero = _mm_set1_ps(w: 0.0f); |
465 | const __m128 vOne = _mm_set1_ps(w: 1.0f); |
466 | for (qsizetype i = 0; i < len; ++i) { |
467 | __m128 vf = _mm_loadu_ps(p: &src[i].r); |
468 | // Approximate 1/a: |
469 | __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3)); |
470 | __m128 via = _mm_rcp_ps(a: va); |
471 | via = _mm_sub_ps(a: _mm_add_ps(a: via, b: via), b: _mm_mul_ps(a: via, b: _mm_mul_ps(a: via, b: va))); |
472 | // v * (1/a) |
473 | vf = _mm_mul_ps(a: vf, b: via); |
474 | |
475 | // Handle zero alpha |
476 | __m128 vAlphaMask = _mm_cmpeq_ps(a: va, b: vZero); |
477 | vf = _mm_andnot_ps(a: vAlphaMask, b: vf); |
478 | |
479 | // LUT |
480 | const __m128 under = _mm_cmplt_ps(a: vf, b: vZero); |
481 | const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne); |
482 | if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == 0) { |
483 | // Within gamut |
484 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
485 | const int ridx = _mm_extract_epi16(v, 0); |
486 | const int gidx = _mm_extract_epi16(v, 2); |
487 | const int bidx = _mm_extract_epi16(v, 4); |
488 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
489 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
490 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
491 | vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: viFF00); |
492 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
493 | } else { |
494 | // Outside 0.0->1.0 gamut |
495 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
496 | buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(x: buffer[i].x); |
497 | buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(x: buffer[i].y); |
498 | buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(x: buffer[i].z); |
499 | } |
500 | } |
501 | } |
502 | |
503 | // Load to [0-4080] in 4x32 SIMD |
504 | template<typename T> |
505 | static inline void loadPU(const T &p, __m128i &v); |
506 | |
507 | template<> |
508 | inline void loadPU<QRgb>(const QRgb &p, __m128i &v) |
509 | { |
510 | v = _mm_cvtsi32_si128(a: p); |
511 | #if defined(__SSE4_1__) |
512 | v = _mm_cvtepu8_epi32(v); |
513 | #else |
514 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
515 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
516 | #endif |
517 | v = _mm_slli_epi32(a: v, count: 4); |
518 | } |
519 | |
520 | template<> |
521 | inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v) |
522 | { |
523 | v = _mm_loadl_epi64(p: (const __m128i *)&p); |
524 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
525 | #if defined(__SSE4_1__) |
526 | v = _mm_cvtepu16_epi32(v); |
527 | #else |
528 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
529 | #endif |
530 | v = _mm_srli_epi32(a: v, count: 4); |
531 | } |
532 | |
533 | template<typename T> |
534 | void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
535 | { |
536 | constexpr bool isARGB = isArgb<T>(); |
537 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
538 | for (qsizetype i = 0; i < len; ++i) { |
539 | __m128i v; |
540 | loadPU<T>(src[i], v); |
541 | const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0); |
542 | const int gidx = _mm_extract_epi16(v, 2); |
543 | const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4); |
544 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
545 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
546 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
547 | __m128 vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00); |
548 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
549 | } |
550 | } |
551 | |
552 | template<> |
553 | void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
554 | { |
555 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
556 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
557 | const __m128 vZero = _mm_set1_ps(w: 0.0f); |
558 | const __m128 vOne = _mm_set1_ps(w: 1.0f); |
559 | for (qsizetype i = 0; i < len; ++i) { |
560 | __m128 vf = _mm_loadu_ps(p: &src[i].r); |
561 | const __m128 under = _mm_cmplt_ps(a: vf, b: vZero); |
562 | const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne); |
563 | if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == 0) { |
564 | // Within gamut |
565 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
566 | const int ridx = _mm_extract_epi16(v, 0); |
567 | const int gidx = _mm_extract_epi16(v, 2); |
568 | const int bidx = _mm_extract_epi16(v, 4); |
569 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
570 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
571 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
572 | vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00); |
573 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
574 | } else { |
575 | // Outside 0.0->1.0 gamut |
576 | buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(x: src[i].r); |
577 | buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(x: src[i].g); |
578 | buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(x: src[i].b); |
579 | } |
580 | } |
581 | } |
582 | |
583 | #elif defined(__ARM_NEON__) |
584 | // Load to [0-alpha] in 4x32 SIMD |
585 | template<typename T> |
586 | static inline void loadP(const T &p, uint32x4_t &v); |
587 | |
588 | template<> |
589 | inline void loadP<QRgb>(const QRgb &p, uint32x4_t &v) |
590 | { |
591 | v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p))))); |
592 | } |
593 | |
594 | template<> |
595 | inline void loadP<QRgba64>(const QRgba64 &p, uint32x4_t &v) |
596 | { |
597 | v = vmovl_u16(vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p)))); |
598 | } |
599 | |
600 | template<typename T> |
601 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
602 | { |
603 | constexpr bool isARGB = isArgb<T>(); |
604 | const float iFF00 = 1.0f / (255 * 256); |
605 | for (qsizetype i = 0; i < len; ++i) { |
606 | uint32x4_t v; |
607 | loadP<T>(src[i], v); |
608 | float32x4_t vf = vcvtq_f32_u32(v); |
609 | // Approximate 1/a: |
610 | float32x4_t va = vdupq_n_f32(vgetq_lane_f32(vf, 3)); |
611 | float32x4_t via = vrecpeq_f32(va); // estimate 1/a |
612 | via = vmulq_f32(vrecpsq_f32(va, via), via); |
613 | |
614 | // v * (1/a) |
615 | vf = vmulq_f32(vf, via); |
616 | |
617 | // Handle zero alpha |
618 | #if defined(Q_PROCESSOR_ARM_64) |
619 | uint32x4_t vAlphaMask = vceqzq_f32(va); |
620 | #else |
621 | uint32x4_t vAlphaMask = vceqq_f32(va, vdupq_n_f32(0.0)); |
622 | #endif |
623 | vf = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf), vAlphaMask)); |
624 | |
625 | // LUT |
626 | v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f))); |
627 | const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0); |
628 | const int gidx = vgetq_lane_u32(v, 1); |
629 | const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2); |
630 | v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); |
631 | v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); |
632 | v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); |
633 | vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); |
634 | |
635 | vst1q_f32(&buffer[i].x, vf); |
636 | } |
637 | } |
638 | |
639 | // Load to [0-4080] in 4x32 SIMD |
640 | template<typename T> |
641 | static inline void loadPU(const T &p, uint32x4_t &v); |
642 | |
643 | template<> |
644 | inline void loadPU<QRgb>(const QRgb &p, uint32x4_t &v) |
645 | { |
646 | v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p))))); |
647 | v = vshlq_n_u32(v, 4); |
648 | } |
649 | |
650 | template<> |
651 | inline void loadPU<QRgba64>(const QRgba64 &p, uint32x4_t &v) |
652 | { |
653 | uint16x4_t v16 = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p))); |
654 | v16 = vsub_u16(v16, vshr_n_u16(v16, 8)); |
655 | v = vmovl_u16(v16); |
656 | v = vshrq_n_u32(v, 4); |
657 | } |
658 | |
659 | template<typename T> |
660 | void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
661 | { |
662 | constexpr bool isARGB = isArgb<T>(); |
663 | const float iFF00 = 1.0f / (255 * 256); |
664 | for (qsizetype i = 0; i < len; ++i) { |
665 | uint32x4_t v; |
666 | loadPU<T>(src[i], v); |
667 | const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0); |
668 | const int gidx = vgetq_lane_u32(v, 1); |
669 | const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2); |
670 | v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); |
671 | v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); |
672 | v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); |
673 | float32x4_t vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); |
674 | vst1q_f32(&buffer[i].x, vf); |
675 | } |
676 | } |
677 | #else |
678 | template<> |
679 | void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
680 | { |
681 | for (qsizetype i = 0; i < len; ++i) { |
682 | const uint p = src[i]; |
683 | const int a = qAlpha(p); |
684 | if (a) { |
685 | const float ia = 4080.0f / a; |
686 | const int ridx = int(qRed(p) * ia + 0.5f); |
687 | const int gidx = int(qGreen(p) * ia + 0.5f); |
688 | const int bidx = int(qBlue(p) * ia + 0.5f); |
689 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256)); |
690 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256)); |
691 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256)); |
692 | } else { |
693 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
694 | } |
695 | } |
696 | } |
697 | |
698 | template<> |
699 | void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
700 | { |
701 | for (qsizetype i = 0; i < len; ++i) { |
702 | const QRgba64 &p = src[i]; |
703 | const int a = p.alpha(); |
704 | if (a) { |
705 | const float ia = 4080.0f / a; |
706 | const int ridx = int(p.red() * ia + 0.5f); |
707 | const int gidx = int(p.green() * ia + 0.5f); |
708 | const int bidx = int(p.blue() * ia + 0.5f); |
709 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256)); |
710 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256)); |
711 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256)); |
712 | } else { |
713 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
714 | } |
715 | } |
716 | } |
717 | |
718 | template<> |
719 | void loadUnpremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
720 | { |
721 | for (qsizetype i = 0; i < len; ++i) { |
722 | const uint p = src[i]; |
723 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(qRed(p)); |
724 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u8ToLinearF32(qGreen(p)); |
725 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u8ToLinearF32(qBlue(p)); |
726 | } |
727 | } |
728 | |
729 | template<> |
730 | void loadUnpremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
731 | { |
732 | for (qsizetype i = 0; i < len; ++i) { |
733 | const QRgba64 &p = src[i]; |
734 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(p.red()); |
735 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u16ToLinearF32(p.green()); |
736 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u16ToLinearF32(p.blue()); |
737 | } |
738 | } |
739 | #endif |
740 | #if !defined(__SSE2__) |
741 | template<> |
742 | void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
743 | { |
744 | for (qsizetype i = 0; i < len; ++i) { |
745 | const QRgbaFloat32 &p = src[i]; |
746 | const float a = p.a; |
747 | if (a) { |
748 | const float ia = 1.0f / a; |
749 | buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(p.r * ia); |
750 | buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(p.g * ia); |
751 | buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(p.b * ia); |
752 | } else { |
753 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
754 | } |
755 | } |
756 | } |
757 | |
758 | template<> |
759 | void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
760 | { |
761 | for (qsizetype i = 0; i < len; ++i) { |
762 | const QRgbaFloat32 &p = src[i]; |
763 | buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(p.r); |
764 | buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(p.g); |
765 | buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(p.b); |
766 | } |
767 | } |
768 | #endif |
769 | |
770 | #if defined(__SSE2__) |
771 | template<typename T> |
772 | static inline void storeP(T &p, __m128i &v, int a); |
773 | template<> |
774 | inline void storeP<QRgb>(QRgb &p, __m128i &v, int a) |
775 | { |
776 | v = _mm_packs_epi32(a: v, b: v); |
777 | v = _mm_insert_epi16(v, a, 3); |
778 | p = _mm_cvtsi128_si32(a: _mm_packus_epi16(a: v, b: v)); |
779 | } |
780 | template<> |
781 | inline void storeP<QRgba64>(QRgba64 &p, __m128i &v, int a) |
782 | { |
783 | #if defined(__SSE4_1__) |
784 | v = _mm_packus_epi32(v, v); |
785 | v = _mm_insert_epi16(v, a, 3); |
786 | _mm_storel_epi64((__m128i *)&p, v); |
787 | #else |
788 | const int r = _mm_extract_epi16(v, 0); |
789 | const int g = _mm_extract_epi16(v, 2); |
790 | const int b = _mm_extract_epi16(v, 4); |
791 | p = qRgba64(r, g, b, a); |
792 | #endif |
793 | } |
794 | |
795 | template<typename T> |
796 | static void storePremultiplied(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, |
797 | const QColorTransformPrivate *d_ptr) |
798 | { |
799 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
800 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
801 | constexpr bool isARGB = isArgb<T>(); |
802 | for (qsizetype i = 0; i < len; ++i) { |
803 | const int a = getAlpha<T>(src[i]); |
804 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
805 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
806 | __m128 va = _mm_mul_ps(a: _mm_set1_ps(w: a), b: iFF00); |
807 | const int ridx = _mm_extract_epi16(v, 0); |
808 | const int gidx = _mm_extract_epi16(v, 2); |
809 | const int bidx = _mm_extract_epi16(v, 4); |
810 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 4 : 0); |
811 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2); |
812 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 4); |
813 | vf = _mm_cvtepi32_ps(a: v); |
814 | vf = _mm_mul_ps(a: vf, b: va); |
815 | v = _mm_cvtps_epi32(a: vf); |
816 | storeP<T>(dst[i], v, a); |
817 | } |
818 | } |
819 | |
820 | template<> |
821 | void storePremultiplied<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, |
822 | const QColorVector *buffer, const qsizetype len, |
823 | const QColorTransformPrivate *d_ptr) |
824 | { |
825 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
826 | const __m128 vZero = _mm_set1_ps(w: 0.0f); |
827 | const __m128 vOne = _mm_set1_ps(w: 1.0f); |
828 | const __m128 viFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
829 | for (qsizetype i = 0; i < len; ++i) { |
830 | const float a = src[i].a; |
831 | __m128 va = _mm_set1_ps(w: a); |
832 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
833 | const __m128 under = _mm_cmplt_ps(a: vf, b: vZero); |
834 | const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne); |
835 | if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == 0) { |
836 | // Within gamut |
837 | va = _mm_mul_ps(a: va, b: viFF00); |
838 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
839 | const int ridx = _mm_extract_epi16(v, 0); |
840 | const int gidx = _mm_extract_epi16(v, 2); |
841 | const int bidx = _mm_extract_epi16(v, 4); |
842 | v = _mm_setzero_si128(); |
843 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0); |
844 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2); |
845 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4); |
846 | vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: va); |
847 | _mm_store_ps(p: &dst[i].r, a: vf); |
848 | } else { |
849 | dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(x: buffer[i].x); |
850 | dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(x: buffer[i].y); |
851 | dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(x: buffer[i].z); |
852 | vf = _mm_mul_ps(a: _mm_load_ps(p: &dst[i].r), b: va); |
853 | _mm_store_ps(p: &dst[i].r, a: vf); |
854 | } |
855 | dst[i].a = a; |
856 | } |
857 | } |
858 | |
859 | template<typename T> |
860 | static inline void storePU(T &p, __m128i &v, int a); |
861 | template<> |
862 | inline void storePU<QRgb>(QRgb &p, __m128i &v, int a) |
863 | { |
864 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
865 | v = _mm_srli_epi16(a: v, count: 8); |
866 | v = _mm_insert_epi16(v, a, 3); |
867 | p = _mm_cvtsi128_si32(a: _mm_packus_epi16(a: v, b: v)); |
868 | } |
869 | template<> |
870 | inline void storePU<QRgba64>(QRgba64 &p, __m128i &v, int a) |
871 | { |
872 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
873 | v = _mm_insert_epi16(v, a, 3); |
874 | _mm_storel_epi64(p: (__m128i *)&p, a: v); |
875 | } |
876 | |
877 | template<typename T> |
878 | static void storeUnpremultiplied(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, |
879 | const QColorTransformPrivate *d_ptr) |
880 | { |
881 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
882 | constexpr bool isARGB = isArgb<T>(); |
883 | for (qsizetype i = 0; i < len; ++i) { |
884 | const int a = getAlpha<T>(src[i]); |
885 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
886 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
887 | const int ridx = _mm_extract_epi16(v, 0); |
888 | const int gidx = _mm_extract_epi16(v, 2); |
889 | const int bidx = _mm_extract_epi16(v, 4); |
890 | v = _mm_setzero_si128(); |
891 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 2 : 0); |
892 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); |
893 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 2); |
894 | storePU<T>(dst[i], v, a); |
895 | } |
896 | } |
897 | |
898 | template<> |
899 | void storeUnpremultiplied<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, |
900 | const QColorVector *buffer, const qsizetype len, |
901 | const QColorTransformPrivate *d_ptr) |
902 | { |
903 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
904 | const __m128 vZero = _mm_set1_ps(w: 0.0f); |
905 | const __m128 vOne = _mm_set1_ps(w: 1.0f); |
906 | const __m128 viFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
907 | for (qsizetype i = 0; i < len; ++i) { |
908 | const float a = src[i].a; |
909 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
910 | const __m128 under = _mm_cmplt_ps(a: vf, b: vZero); |
911 | const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne); |
912 | if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == 0) { |
913 | // Within gamut |
914 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
915 | const int ridx = _mm_extract_epi16(v, 0); |
916 | const int gidx = _mm_extract_epi16(v, 2); |
917 | const int bidx = _mm_extract_epi16(v, 4); |
918 | v = _mm_setzero_si128(); |
919 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0); |
920 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2); |
921 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4); |
922 | vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: viFF00); |
923 | _mm_storeu_ps(p: &dst[i].r, a: vf); |
924 | } else { |
925 | dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(x: buffer[i].x); |
926 | dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(x: buffer[i].y); |
927 | dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(x: buffer[i].z); |
928 | } |
929 | dst[i].a = a; |
930 | } |
931 | } |
932 | |
933 | template<typename T> |
934 | static void storeOpaque(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, |
935 | const QColorTransformPrivate *d_ptr) |
936 | { |
937 | Q_UNUSED(src); |
938 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
939 | constexpr bool isARGB = isArgb<T>(); |
940 | for (qsizetype i = 0; i < len; ++i) { |
941 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
942 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
943 | const int ridx = _mm_extract_epi16(v, 0); |
944 | const int gidx = _mm_extract_epi16(v, 2); |
945 | const int bidx = _mm_extract_epi16(v, 4); |
946 | v = _mm_setzero_si128(); |
947 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 2 : 0); |
948 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); |
949 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 2); |
950 | storePU<T>(dst[i], v, isARGB ? 255 : 0xffff); |
951 | } |
952 | } |
953 | |
954 | template<> |
955 | void storeOpaque<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, |
956 | const QColorVector *buffer, const qsizetype len, |
957 | const QColorTransformPrivate *d_ptr) |
958 | { |
959 | Q_UNUSED(src); |
960 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
961 | const __m128 vZero = _mm_set1_ps(w: 0.0f); |
962 | const __m128 vOne = _mm_set1_ps(w: 1.0f); |
963 | const __m128 viFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
964 | for (qsizetype i = 0; i < len; ++i) { |
965 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
966 | const __m128 under = _mm_cmplt_ps(a: vf, b: vZero); |
967 | const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne); |
968 | if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == 0) { |
969 | // Within gamut |
970 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
971 | const int ridx = _mm_extract_epi16(v, 0); |
972 | const int gidx = _mm_extract_epi16(v, 2); |
973 | const int bidx = _mm_extract_epi16(v, 4); |
974 | v = _mm_setzero_si128(); |
975 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0); |
976 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2); |
977 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4); |
978 | vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: viFF00); |
979 | _mm_store_ps(p: &dst[i].r, a: vf); |
980 | } else { |
981 | dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(x: buffer[i].x); |
982 | dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(x: buffer[i].y); |
983 | dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(x: buffer[i].z); |
984 | } |
985 | dst[i].a = 1.0f; |
986 | } |
987 | } |
988 | |
989 | #elif defined(__ARM_NEON__) |
990 | template<typename T> |
991 | static inline void storeP(T &p, const uint16x4_t &v); |
992 | template<> |
993 | inline void storeP<QRgb>(QRgb &p, const uint16x4_t &v) |
994 | { |
995 | p = vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(v, v))), 0); |
996 | } |
997 | template<> |
998 | inline void storeP<QRgba64>(QRgba64 &p, const uint16x4_t &v) |
999 | { |
1000 | vst1_u16((uint16_t *)&p, v); |
1001 | } |
1002 | |
1003 | template<typename T> |
1004 | static void storePremultiplied(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, |
1005 | const QColorTransformPrivate *d_ptr) |
1006 | { |
1007 | const float iFF00 = 1.0f / (255 * 256); |
1008 | constexpr bool isARGB = isArgb<T>(); |
1009 | for (qsizetype i = 0; i < len; ++i) { |
1010 | const int a = getAlpha<T>(src[i]); |
1011 | float32x4_t vf = vld1q_f32(&buffer[i].x); |
1012 | uint32x4_t v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f))); |
1013 | const int ridx = vgetq_lane_u32(v, 0); |
1014 | const int gidx = vgetq_lane_u32(v, 1); |
1015 | const int bidx = vgetq_lane_u32(v, 2); |
1016 | v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0); |
1017 | v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1); |
1018 | v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2); |
1019 | vf = vcvtq_f32_u32(v); |
1020 | vf = vmulq_n_f32(vf, a * iFF00); |
1021 | vf = vaddq_f32(vf, vdupq_n_f32(0.5f)); |
1022 | v = vcvtq_u32_f32(vf); |
1023 | uint16x4_t v16 = vmovn_u32(v); |
1024 | v16 = vset_lane_u16(a, v16, 3); |
1025 | storeP<T>(dst[i], v16); |
1026 | } |
1027 | } |
1028 | |
1029 | template<typename T> |
1030 | static inline void storePU(T &p, uint16x4_t &v, int a); |
1031 | template<> |
1032 | inline void storePU<QRgb>(QRgb &p, uint16x4_t &v, int a) |
1033 | { |
1034 | v = vadd_u16(v, vdup_n_u16(0x80)); |
1035 | v = vshr_n_u16(v, 8); |
1036 | v = vset_lane_u16(a, v, 3); |
1037 | p = vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(v, v))), 0); |
1038 | } |
1039 | template<> |
1040 | inline void storePU<QRgba64>(QRgba64 &p, uint16x4_t &v, int a) |
1041 | { |
1042 | v = vadd_u16(v, vshr_n_u16(v, 8)); |
1043 | v = vset_lane_u16(a, v, 3); |
1044 | vst1_u16((uint16_t *)&p, v); |
1045 | } |
1046 | |
1047 | template<typename T> |
1048 | static void storeUnpremultiplied(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, |
1049 | const QColorTransformPrivate *d_ptr) |
1050 | { |
1051 | constexpr bool isARGB = isArgb<T>(); |
1052 | for (qsizetype i = 0; i < len; ++i) { |
1053 | const int a = getAlpha<T>(src[i]); |
1054 | float32x4_t vf = vld1q_f32(&buffer[i].x); |
1055 | uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f)))); |
1056 | const int ridx = vget_lane_u16(v, 0); |
1057 | const int gidx = vget_lane_u16(v, 1); |
1058 | const int bidx = vget_lane_u16(v, 2); |
1059 | v = vset_lane_u16(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0); |
1060 | v = vset_lane_u16(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1); |
1061 | v = vset_lane_u16(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2); |
1062 | storePU<T>(dst[i], v, a); |
1063 | } |
1064 | } |
1065 | |
1066 | template<typename T> |
1067 | static void storeOpaque(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, |
1068 | const QColorTransformPrivate *d_ptr) |
1069 | { |
1070 | Q_UNUSED(src); |
1071 | constexpr bool isARGB = isArgb<T>(); |
1072 | for (qsizetype i = 0; i < len; ++i) { |
1073 | float32x4_t vf = vld1q_f32(&buffer[i].x); |
1074 | uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f)))); |
1075 | const int ridx = vget_lane_u16(v, 0); |
1076 | const int gidx = vget_lane_u16(v, 1); |
1077 | const int bidx = vget_lane_u16(v, 2); |
1078 | v = vset_lane_u16(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0); |
1079 | v = vset_lane_u16(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1); |
1080 | v = vset_lane_u16(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2); |
1081 | storePU<T>(dst[i], v, isARGB ? 255 : 0xffff); |
1082 | } |
1083 | } |
1084 | #else |
1085 | static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
1086 | const QColorTransformPrivate *d_ptr) |
1087 | { |
1088 | for (qsizetype i = 0; i < len; ++i) { |
1089 | const int a = qAlpha(src[i]); |
1090 | const float fa = a / (255.0f * 256.0f); |
1091 | const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; |
1092 | const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; |
1093 | const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; |
1094 | dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a); |
1095 | } |
1096 | } |
1097 | |
1098 | static void storeUnpremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
1099 | const QColorTransformPrivate *d_ptr) |
1100 | { |
1101 | for (qsizetype i = 0; i < len; ++i) { |
1102 | const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); |
1103 | const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); |
1104 | const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z); |
1105 | dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0); |
1106 | } |
1107 | } |
1108 | |
1109 | static void storeOpaque(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
1110 | const QColorTransformPrivate *d_ptr) |
1111 | { |
1112 | Q_UNUSED(src); |
1113 | for (qsizetype i = 0; i < len; ++i) { |
1114 | const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); |
1115 | const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); |
1116 | const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z); |
1117 | dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0); |
1118 | } |
1119 | } |
1120 | |
1121 | static void storePremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
1122 | const QColorTransformPrivate *d_ptr) |
1123 | { |
1124 | for (qsizetype i = 0; i < len; ++i) { |
1125 | const int a = src[i].alpha(); |
1126 | const float fa = a / (255.0f * 256.0f); |
1127 | const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; |
1128 | const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; |
1129 | const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; |
1130 | dst[i] = qRgba64(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a); |
1131 | } |
1132 | } |
1133 | |
1134 | static void storeUnpremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
1135 | const QColorTransformPrivate *d_ptr) |
1136 | { |
1137 | for (qsizetype i = 0; i < len; ++i) { |
1138 | const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x); |
1139 | const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y); |
1140 | const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z); |
1141 | dst[i] = qRgba64(r, g, b, src[i].alpha()); |
1142 | } |
1143 | } |
1144 | |
1145 | static void storeOpaque(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
1146 | const QColorTransformPrivate *d_ptr) |
1147 | { |
1148 | Q_UNUSED(src); |
1149 | for (qsizetype i = 0; i < len; ++i) { |
1150 | const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x); |
1151 | const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y); |
1152 | const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z); |
1153 | dst[i] = qRgba64(r, g, b, 0xFFFF); |
1154 | } |
1155 | } |
1156 | #endif |
1157 | #if !defined(__SSE2__) |
1158 | static void storePremultiplied(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const QColorVector *buffer, |
1159 | const qsizetype len, const QColorTransformPrivate *d_ptr) |
1160 | { |
1161 | for (qsizetype i = 0; i < len; ++i) { |
1162 | const float a = src[i].a; |
1163 | dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x) * a; |
1164 | dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y) * a; |
1165 | dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z) * a; |
1166 | dst[i].a = a; |
1167 | } |
1168 | } |
1169 | |
1170 | static void storeUnpremultiplied(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const QColorVector *buffer, |
1171 | const qsizetype len, const QColorTransformPrivate *d_ptr) |
1172 | { |
1173 | for (qsizetype i = 0; i < len; ++i) { |
1174 | const float a = src[i].a; |
1175 | dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x); |
1176 | dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y); |
1177 | dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z); |
1178 | dst[i].a = a; |
1179 | } |
1180 | } |
1181 | |
1182 | static void storeOpaque(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const QColorVector *buffer, const qsizetype len, |
1183 | const QColorTransformPrivate *d_ptr) |
1184 | { |
1185 | Q_UNUSED(src); |
1186 | for (qsizetype i = 0; i < len; ++i) { |
1187 | dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x); |
1188 | dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y); |
1189 | dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z); |
1190 | dst[i].a = 1.0f; |
1191 | } |
1192 | } |
1193 | #endif |
1194 | static void storeGray(quint8 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
1195 | const QColorTransformPrivate *d_ptr) |
1196 | { |
1197 | Q_UNUSED(src); |
1198 | for (qsizetype i = 0; i < len; ++i) |
1199 | dst[i] = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(f: buffer[i].y); |
1200 | } |
1201 | |
1202 | static void storeGray(quint16 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
1203 | const QColorTransformPrivate *d_ptr) |
1204 | { |
1205 | Q_UNUSED(src); |
1206 | for (qsizetype i = 0; i < len; ++i) |
1207 | dst[i] = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(f: buffer[i].y); |
1208 | } |
1209 | |
1210 | static constexpr qsizetype WorkBlockSize = 256; |
1211 | |
1212 | template <typename T, int Count = 1> |
1213 | class QUninitialized |
1214 | { |
1215 | public: |
1216 | operator T*() { return reinterpret_cast<T *>(this); } |
1217 | private: |
1218 | alignas(T) char data[sizeof(T) * Count]; |
1219 | }; |
1220 | |
1221 | template<typename T> |
1222 | void QColorTransformPrivate::apply(T *dst, const T *src, qsizetype count, TransformFlags flags) const |
1223 | { |
1224 | if (!colorMatrix.isValid()) |
1225 | return; |
1226 | |
1227 | updateLutsIn(); |
1228 | updateLutsOut(); |
1229 | |
1230 | bool doApplyMatrix = !colorMatrix.isIdentity(); |
1231 | constexpr bool DoClip = !std::is_same_v<T, QRgbaFloat16> && !std::is_same_v<T, QRgbaFloat32>; |
1232 | |
1233 | QUninitialized<QColorVector, WorkBlockSize> buffer; |
1234 | |
1235 | qsizetype i = 0; |
1236 | while (i < count) { |
1237 | const qsizetype len = qMin(a: count - i, b: WorkBlockSize); |
1238 | if (flags & InputPremultiplied) |
1239 | loadPremultiplied(buffer, src + i, len, this); |
1240 | else |
1241 | loadUnpremultiplied(buffer, src + i, len, this); |
1242 | |
1243 | if (doApplyMatrix) |
1244 | applyMatrix<DoClip>(buffer, len, colorMatrix); |
1245 | |
1246 | if (flags & InputOpaque) |
1247 | storeOpaque(dst + i, src + i, buffer, len, this); |
1248 | else if (flags & OutputPremultiplied) |
1249 | storePremultiplied(dst + i, src + i, buffer, len, this); |
1250 | else |
1251 | storeUnpremultiplied(dst + i, src + i, buffer, len, this); |
1252 | |
1253 | i += len; |
1254 | } |
1255 | } |
1256 | |
1257 | template<typename D, typename S> |
1258 | void QColorTransformPrivate::applyReturnGray(D *dst, const S *src, qsizetype count, TransformFlags flags) const |
1259 | { |
1260 | if (!colorMatrix.isValid()) |
1261 | return; |
1262 | |
1263 | updateLutsIn(); |
1264 | updateLutsOut(); |
1265 | |
1266 | QUninitialized<QColorVector, WorkBlockSize> buffer; |
1267 | |
1268 | qsizetype i = 0; |
1269 | while (i < count) { |
1270 | const qsizetype len = qMin(a: count - i, b: WorkBlockSize); |
1271 | if (flags & InputPremultiplied) |
1272 | loadPremultiplied(buffer, src + i, len, this); |
1273 | else |
1274 | loadUnpremultiplied(buffer, src + i, len, this); |
1275 | |
1276 | applyMatrix(buffer, len, colorMatrix); |
1277 | |
1278 | storeGray(dst + i, src + i, buffer, len, this); |
1279 | |
1280 | i += len; |
1281 | } |
1282 | } |
1283 | |
1284 | /*! |
1285 | \internal |
1286 | \enum QColorTransformPrivate::TransformFlag |
1287 | |
1288 | Defines how the transform is to be applied. |
1289 | |
1290 | \value Unpremultiplied The input and output should both be unpremultiplied. |
1291 | \value InputOpaque The input is guaranteed to be opaque. |
1292 | \value InputPremultiplied The input is premultiplied. |
1293 | \value OutputPremultiplied The output should be premultiplied. |
1294 | \value Premultiplied Both input and output should both be premultiplied. |
1295 | */ |
1296 | |
1297 | /*! |
1298 | \internal |
1299 | Prepares a color transformation for fast application. You do not need to |
1300 | call this explicitly as it will be called implicitly on the first transforms, but |
1301 | if you want predictable performance on the first transforms, you can perform it |
1302 | in advance. |
1303 | |
1304 | \sa QColorTransform::map(), apply() |
1305 | */ |
1306 | void QColorTransformPrivate::prepare() |
1307 | { |
1308 | updateLutsIn(); |
1309 | updateLutsOut(); |
1310 | } |
1311 | |
1312 | /*! |
1313 | \internal |
1314 | Applies the color transformation on \a count QRgb pixels starting from |
1315 | \a src and stores the result in \a dst. |
1316 | |
1317 | Thread-safe if prepare() has been called first. |
1318 | |
1319 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
1320 | |
1321 | \sa prepare() |
1322 | */ |
1323 | void QColorTransformPrivate::apply(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const |
1324 | { |
1325 | apply<QRgb>(dst, src, count, flags); |
1326 | } |
1327 | |
1328 | /*! |
1329 | \internal |
1330 | Applies the color transformation on \a count QRgba64 pixels starting from |
1331 | \a src and stores the result in \a dst. |
1332 | |
1333 | Thread-safe if prepare() has been called first. |
1334 | |
1335 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
1336 | |
1337 | \sa prepare() |
1338 | */ |
1339 | void QColorTransformPrivate::apply(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const |
1340 | { |
1341 | apply<QRgba64>(dst, src, count, flags); |
1342 | } |
1343 | |
1344 | /*! |
1345 | \internal |
1346 | Applies the color transformation on \a count QRgbaFloat32 pixels starting from |
1347 | \a src and stores the result in \a dst. |
1348 | |
1349 | Thread-safe if prepare() has been called first. |
1350 | |
1351 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
1352 | |
1353 | \sa prepare() |
1354 | */ |
1355 | void QColorTransformPrivate::apply(QRgbaFloat32 *dst, const QRgbaFloat32 *src, qsizetype count, |
1356 | TransformFlags flags) const |
1357 | { |
1358 | apply<QRgbaFloat32>(dst, src, count, flags); |
1359 | } |
1360 | |
1361 | /*! |
1362 | \internal |
1363 | Is to be called on a color-transform to XYZ, returns only luminance values. |
1364 | |
1365 | */ |
1366 | void QColorTransformPrivate::apply(quint8 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const |
1367 | { |
1368 | applyReturnGray<quint8, QRgb>(dst, src, count, flags); |
1369 | } |
1370 | |
1371 | /*! |
1372 | \internal |
1373 | Is to be called on a color-transform to XYZ, returns only luminance values. |
1374 | |
1375 | */ |
1376 | void QColorTransformPrivate::apply(quint16 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const |
1377 | { |
1378 | applyReturnGray<quint16, QRgba64>(dst, src, count, flags); |
1379 | } |
1380 | |
1381 | |
1382 | /*! |
1383 | \internal |
1384 | */ |
1385 | bool QColorTransformPrivate::isIdentity() const |
1386 | { |
1387 | if (!colorMatrix.isIdentity()) |
1388 | return false; |
1389 | if (colorSpaceIn && colorSpaceOut) { |
1390 | if (colorSpaceIn->transferFunction != colorSpaceOut->transferFunction) |
1391 | return false; |
1392 | if (colorSpaceIn->transferFunction == QColorSpace::TransferFunction::Custom) { |
1393 | return colorSpaceIn->trc[0] == colorSpaceOut->trc[0] |
1394 | && colorSpaceIn->trc[1] == colorSpaceOut->trc[1] |
1395 | && colorSpaceIn->trc[2] == colorSpaceOut->trc[2]; |
1396 | } |
1397 | } else { |
1398 | if (colorSpaceIn && colorSpaceIn->transferFunction != QColorSpace::TransferFunction::Linear) |
1399 | return false; |
1400 | if (colorSpaceOut && colorSpaceOut->transferFunction != QColorSpace::TransferFunction::Linear) |
1401 | return false; |
1402 | } |
1403 | return true; |
1404 | } |
1405 | |
1406 | QT_END_NAMESPACE |
1407 | |