1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2018 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtGui module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | |
41 | #include "qcolortransform.h" |
42 | #include "qcolortransform_p.h" |
43 | |
44 | #include "qcolormatrix_p.h" |
45 | #include "qcolorspace_p.h" |
46 | #include "qcolortrc_p.h" |
47 | #include "qcolortrclut_p.h" |
48 | |
49 | #include <QtCore/qatomic.h> |
50 | #include <QtCore/qmath.h> |
51 | #include <QtGui/qcolor.h> |
52 | #include <QtGui/qtransform.h> |
53 | #include <QtCore/private/qsimd_p.h> |
54 | |
55 | #include <qdebug.h> |
56 | |
57 | QT_BEGIN_NAMESPACE |
58 | |
59 | QColorTrcLut *lutFromTrc(const QColorTrc &trc) |
60 | { |
61 | if (trc.m_type == QColorTrc::Type::Table) |
62 | return QColorTrcLut::fromTransferTable(transTable: trc.m_table); |
63 | if (trc.m_type == QColorTrc::Type::Function) |
64 | return QColorTrcLut::fromTransferFunction(transfn: trc.m_fun); |
65 | qWarning() << "TRC uninitialized" ; |
66 | return nullptr; |
67 | } |
68 | |
69 | void QColorTransformPrivate::updateLutsIn() const |
70 | { |
71 | if (colorSpaceIn->lut.generated.loadAcquire()) |
72 | return; |
73 | QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock); |
74 | if (colorSpaceIn->lut.generated.loadRelaxed()) |
75 | return; |
76 | |
77 | for (int i = 0; i < 3; ++i) { |
78 | if (!colorSpaceIn->trc[i].isValid()) |
79 | return; |
80 | } |
81 | |
82 | if (colorSpaceIn->trc[0] == colorSpaceIn->trc[1] && colorSpaceIn->trc[0] == colorSpaceIn->trc[2]) { |
83 | colorSpaceIn->lut[0].reset(t: lutFromTrc(trc: colorSpaceIn->trc[0])); |
84 | colorSpaceIn->lut[1] = colorSpaceIn->lut[0]; |
85 | colorSpaceIn->lut[2] = colorSpaceIn->lut[0]; |
86 | } else { |
87 | for (int i = 0; i < 3; ++i) |
88 | colorSpaceIn->lut[i].reset(t: lutFromTrc(trc: colorSpaceIn->trc[i])); |
89 | } |
90 | |
91 | colorSpaceIn->lut.generated.storeRelease(newValue: 1); |
92 | } |
93 | |
94 | void QColorTransformPrivate::updateLutsOut() const |
95 | { |
96 | if (colorSpaceOut->lut.generated.loadAcquire()) |
97 | return; |
98 | QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock); |
99 | if (colorSpaceOut->lut.generated.loadRelaxed()) |
100 | return; |
101 | for (int i = 0; i < 3; ++i) { |
102 | if (!colorSpaceOut->trc[i].isValid()) |
103 | return; |
104 | } |
105 | |
106 | if (colorSpaceOut->trc[0] == colorSpaceOut->trc[1] && colorSpaceOut->trc[0] == colorSpaceOut->trc[2]) { |
107 | colorSpaceOut->lut[0].reset(t: lutFromTrc(trc: colorSpaceOut->trc[0])); |
108 | colorSpaceOut->lut[1] = colorSpaceOut->lut[0]; |
109 | colorSpaceOut->lut[2] = colorSpaceOut->lut[0]; |
110 | } else { |
111 | for (int i = 0; i < 3; ++i) |
112 | colorSpaceOut->lut[i].reset(t: lutFromTrc(trc: colorSpaceOut->trc[i])); |
113 | } |
114 | |
115 | colorSpaceOut->lut.generated.storeRelease(newValue: 1); |
116 | } |
117 | |
118 | /*! |
119 | \class QColorTransform |
120 | \brief The QColorTransform class is a transformation between color spaces. |
121 | \since 5.14 |
122 | |
123 | \ingroup painting |
124 | \ingroup appearance |
125 | \inmodule QtGui |
126 | |
127 | QColorTransform is an instantiation of a transformation between color spaces. |
128 | It can be applied on color and pixels to convert them from one color space to |
129 | another. |
130 | |
131 | Setting up a QColorTransform takes some preprocessing, so keeping around |
132 | QColorTransforms that you need often is recommended, instead of generating |
133 | them on the fly. |
134 | */ |
135 | |
136 | |
137 | QColorTransform::QColorTransform(const QColorTransform &colorTransform) noexcept |
138 | : d(colorTransform.d) |
139 | { |
140 | if (d) |
141 | d->ref.ref(); |
142 | } |
143 | |
144 | |
145 | QColorTransform::~QColorTransform() |
146 | { |
147 | if (d && !d->ref.deref()) |
148 | delete d; |
149 | } |
150 | |
151 | /*! |
152 | Applies the color transformation on the QRgb value \a argb. |
153 | |
154 | The input should be opaque or unpremultiplied. |
155 | */ |
156 | QRgb QColorTransform::map(QRgb argb) const |
157 | { |
158 | if (!d) |
159 | return argb; |
160 | constexpr float f = 1.0f / 255.0f; |
161 | QColorVector c = { qRed(rgb: argb) * f, qGreen(rgb: argb) * f, qBlue(rgb: argb) * f }; |
162 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
163 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
164 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
165 | c = d->colorMatrix.map(c); |
166 | c.x = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.x)); |
167 | c.y = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.y)); |
168 | c.z = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.z)); |
169 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
170 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
171 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
172 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
173 | } else { |
174 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
175 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
176 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
177 | } |
178 | |
179 | return qRgba(r: c.x * 255 + 0.5f, g: c.y * 255 + 0.5f, b: c.z * 255 + 0.5f, a: qAlpha(rgb: argb)); |
180 | } |
181 | |
182 | /*! |
183 | Applies the color transformation on the QRgba64 value \a rgba64. |
184 | |
185 | The input should be opaque or unpremultiplied. |
186 | */ |
187 | QRgba64 QColorTransform::map(QRgba64 rgba64) const |
188 | { |
189 | if (!d) |
190 | return rgba64; |
191 | constexpr float f = 1.0f / 65535.0f; |
192 | QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f }; |
193 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
194 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
195 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
196 | c = d->colorMatrix.map(c); |
197 | c.x = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.x)); |
198 | c.y = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.y)); |
199 | c.z = std::max(a: 0.0f, b: std::min(a: 1.0f, b: c.z)); |
200 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
201 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
202 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
203 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
204 | } else { |
205 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
206 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
207 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
208 | } |
209 | |
210 | return QRgba64::fromRgba64(red: c.x * 65535, green: c.y * 65535, blue: c.z * 65535, alpha: rgba64.alpha()); |
211 | } |
212 | |
213 | /*! |
214 | Applies the color transformation on the QColor value \a color. |
215 | |
216 | */ |
217 | QColor QColorTransform::map(const QColor &color) const |
218 | { |
219 | if (!d) |
220 | return color; |
221 | QColor clr = color; |
222 | if (color.spec() != QColor::ExtendedRgb || color.spec() != QColor::Rgb) |
223 | clr = clr.toRgb(); |
224 | |
225 | QColorVector c = { (float)clr.redF(), (float)clr.greenF(), (float)clr.blueF() }; |
226 | if (clr.spec() == QColor::ExtendedRgb) { |
227 | c.x = d->colorSpaceIn->trc[0].applyExtended(x: c.x); |
228 | c.y = d->colorSpaceIn->trc[1].applyExtended(x: c.y); |
229 | c.z = d->colorSpaceIn->trc[2].applyExtended(x: c.z); |
230 | } else { |
231 | c.x = d->colorSpaceIn->trc[0].apply(x: c.x); |
232 | c.y = d->colorSpaceIn->trc[1].apply(x: c.y); |
233 | c.z = d->colorSpaceIn->trc[2].apply(x: c.z); |
234 | } |
235 | c = d->colorMatrix.map(c); |
236 | bool inGamut = c.x >= 0.0f && c.x <= 1.0f && c.y >= 0.0f && c.y <= 1.0f && c.z >= 0.0f && c.z <= 1.0f; |
237 | if (inGamut) { |
238 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
239 | c.x = d->colorSpaceOut->lut[0]->fromLinear(f: c.x); |
240 | c.y = d->colorSpaceOut->lut[1]->fromLinear(f: c.y); |
241 | c.z = d->colorSpaceOut->lut[2]->fromLinear(f: c.z); |
242 | } else { |
243 | c.x = d->colorSpaceOut->trc[0].applyInverse(x: c.x); |
244 | c.y = d->colorSpaceOut->trc[1].applyInverse(x: c.y); |
245 | c.z = d->colorSpaceOut->trc[2].applyInverse(x: c.z); |
246 | } |
247 | } else { |
248 | c.x = d->colorSpaceOut->trc[0].applyInverseExtended(x: c.x); |
249 | c.y = d->colorSpaceOut->trc[1].applyInverseExtended(x: c.y); |
250 | c.z = d->colorSpaceOut->trc[2].applyInverseExtended(x: c.z); |
251 | } |
252 | QColor out; |
253 | out.setRgbF(r: c.x, g: c.y, b: c.z, a: color.alphaF()); |
254 | return out; |
255 | } |
256 | |
257 | // Optimized sub-routines for fast block based conversion: |
258 | |
259 | static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix) |
260 | { |
261 | #if defined(__SSE2__) |
262 | const __m128 minV = _mm_set1_ps(w: 0.0f); |
263 | const __m128 maxV = _mm_set1_ps(w: 1.0f); |
264 | const __m128 xMat = _mm_loadu_ps(p: &colorMatrix.r.x); |
265 | const __m128 yMat = _mm_loadu_ps(p: &colorMatrix.g.x); |
266 | const __m128 zMat = _mm_loadu_ps(p: &colorMatrix.b.x); |
267 | for (qsizetype j = 0; j < len; ++j) { |
268 | __m128 c = _mm_loadu_ps(p: &buffer[j].x); |
269 | __m128 cx = _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)); |
270 | __m128 cy = _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)); |
271 | __m128 cz = _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)); |
272 | cx = _mm_mul_ps(a: cx, b: xMat); |
273 | cy = _mm_mul_ps(a: cy, b: yMat); |
274 | cz = _mm_mul_ps(a: cz, b: zMat); |
275 | cx = _mm_add_ps(a: cx, b: cy); |
276 | cx = _mm_add_ps(a: cx, b: cz); |
277 | // Clamp: |
278 | cx = _mm_min_ps(a: cx, b: maxV); |
279 | cx = _mm_max_ps(a: cx, b: minV); |
280 | _mm_storeu_ps(p: &buffer[j].x, a: cx); |
281 | } |
282 | #else |
283 | for (int j = 0; j < len; ++j) { |
284 | const QColorVector cv = colorMatrix.map(buffer[j]); |
285 | buffer[j].x = std::max(0.0f, std::min(1.0f, cv.x)); |
286 | buffer[j].y = std::max(0.0f, std::min(1.0f, cv.y)); |
287 | buffer[j].z = std::max(0.0f, std::min(1.0f, cv.z)); |
288 | } |
289 | #endif |
290 | } |
291 | |
292 | template<typename T> |
293 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); |
294 | template<typename T> |
295 | static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); |
296 | |
297 | #if defined(__SSE2__) |
298 | // Load to [0-alpha] in 4x32 SIMD |
299 | template<typename T> |
300 | static inline void loadP(const T &p, __m128i &v); |
301 | |
302 | template<> |
303 | inline void loadP<QRgb>(const QRgb &p, __m128i &v) |
304 | { |
305 | v = _mm_cvtsi32_si128(a: p); |
306 | #if defined(__SSE4_1__) |
307 | v = _mm_cvtepu8_epi32(v); |
308 | #else |
309 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
310 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
311 | #endif |
312 | } |
313 | |
314 | template<> |
315 | inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v) |
316 | { |
317 | v = _mm_loadl_epi64(p: (const __m128i *)&p); |
318 | #if defined(__SSE4_1__) |
319 | v = _mm_cvtepu16_epi32(v); |
320 | #else |
321 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
322 | #endif |
323 | // Shuffle to ARGB as the template below expects it |
324 | v = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 0, 1, 2)); |
325 | } |
326 | |
327 | template<typename T> |
328 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
329 | { |
330 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
331 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
332 | for (qsizetype i = 0; i < len; ++i) { |
333 | __m128i v; |
334 | loadP<T>(src[i], v); |
335 | __m128 vf = _mm_cvtepi32_ps(a: v); |
336 | // Approximate 1/a: |
337 | __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3)); |
338 | __m128 via = _mm_rcp_ps(a: va); |
339 | via = _mm_sub_ps(a: _mm_add_ps(a: via, b: via), b: _mm_mul_ps(a: via, b: _mm_mul_ps(a: via, b: va))); |
340 | // v * (1/a) |
341 | vf = _mm_mul_ps(a: vf, b: via); |
342 | |
343 | // Handle zero alpha |
344 | __m128 vAlphaMask = _mm_cmpeq_ps(a: va, b: _mm_set1_ps(w: 0.0f)); |
345 | vf = _mm_andnot_ps(a: vAlphaMask, b: vf); |
346 | |
347 | // LUT |
348 | v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
349 | const int ridx = _mm_extract_epi16(v, 4); |
350 | const int gidx = _mm_extract_epi16(v, 2); |
351 | const int bidx = _mm_extract_epi16(v, 0); |
352 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
353 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
354 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
355 | vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00); |
356 | |
357 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
358 | } |
359 | } |
360 | |
361 | // Load to [0-4080] in 4x32 SIMD |
362 | template<typename T> |
363 | static inline void loadPU(const T &p, __m128i &v); |
364 | |
365 | template<> |
366 | inline void loadPU<QRgb>(const QRgb &p, __m128i &v) |
367 | { |
368 | v = _mm_cvtsi32_si128(a: p); |
369 | #if defined(__SSE4_1__) |
370 | v = _mm_cvtepu8_epi32(v); |
371 | #else |
372 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
373 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
374 | #endif |
375 | v = _mm_slli_epi32(a: v, count: 4); |
376 | } |
377 | |
378 | template<> |
379 | inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v) |
380 | { |
381 | v = _mm_loadl_epi64(p: (const __m128i *)&p); |
382 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
383 | #if defined(__SSE4_1__) |
384 | v = _mm_cvtepu16_epi32(v); |
385 | #else |
386 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
387 | #endif |
388 | v = _mm_srli_epi32(a: v, count: 4); |
389 | // Shuffle to ARGB as the template below expects it |
390 | v = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 0, 1, 2)); |
391 | } |
392 | |
393 | template<typename T> |
394 | void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
395 | { |
396 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
397 | for (qsizetype i = 0; i < len; ++i) { |
398 | __m128i v; |
399 | loadPU<T>(src[i], v); |
400 | const int ridx = _mm_extract_epi16(v, 4); |
401 | const int gidx = _mm_extract_epi16(v, 2); |
402 | const int bidx = _mm_extract_epi16(v, 0); |
403 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
404 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
405 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
406 | __m128 vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00); |
407 | _mm_storeu_ps(p: &buffer[i].x, a: vf); |
408 | } |
409 | } |
410 | |
411 | #else |
412 | template<> |
413 | void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
414 | { |
415 | for (qsizetype i = 0; i < len; ++i) { |
416 | const uint p = src[i]; |
417 | const int a = qAlpha(p); |
418 | if (a) { |
419 | const float ia = 4080.0f / a; |
420 | const int ridx = int(qRed(p) * ia + 0.5f); |
421 | const int gidx = int(qGreen(p) * ia + 0.5f); |
422 | const int bidx = int(qBlue(p) * ia + 0.5f); |
423 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256)); |
424 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256)); |
425 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256)); |
426 | } else { |
427 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
428 | } |
429 | } |
430 | } |
431 | |
432 | template<> |
433 | void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
434 | { |
435 | for (qsizetype i = 0; i < len; ++i) { |
436 | const QRgba64 &p = src[i]; |
437 | const int a = p.alpha(); |
438 | if (a) { |
439 | const float ia = 4080.0f / a; |
440 | const int ridx = int(p.red() * ia + 0.5f); |
441 | const int gidx = int(p.green() * ia + 0.5f); |
442 | const int bidx = int(p.blue() * ia + 0.5f); |
443 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256)); |
444 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256)); |
445 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256)); |
446 | } else { |
447 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
448 | } |
449 | } |
450 | } |
451 | |
452 | template<> |
453 | void loadUnpremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
454 | { |
455 | for (qsizetype i = 0; i < len; ++i) { |
456 | const uint p = src[i]; |
457 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(qRed(p)); |
458 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u8ToLinearF32(qGreen(p)); |
459 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u8ToLinearF32(qBlue(p)); |
460 | } |
461 | } |
462 | |
463 | template<> |
464 | void loadUnpremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
465 | { |
466 | for (qsizetype i = 0; i < len; ++i) { |
467 | const QRgba64 &p = src[i]; |
468 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(p.red()); |
469 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u16ToLinearF32(p.green()); |
470 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u16ToLinearF32(p.blue()); |
471 | } |
472 | } |
473 | #endif |
474 | |
475 | static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
476 | const QColorTransformPrivate *d_ptr) |
477 | { |
478 | #if defined(__SSE2__) |
479 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
480 | const __m128 iFF00 = _mm_set1_ps(w: 1.0f / (255 * 256)); |
481 | for (qsizetype i = 0; i < len; ++i) { |
482 | const int a = qAlpha(rgb: src[i]); |
483 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
484 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
485 | __m128 va = _mm_set1_ps(w: a); |
486 | va = _mm_mul_ps(a: va, b: iFF00); |
487 | const int ridx = _mm_extract_epi16(v, 0); |
488 | const int gidx = _mm_extract_epi16(v, 2); |
489 | const int bidx = _mm_extract_epi16(v, 4); |
490 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 4); |
491 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2); |
492 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
493 | vf = _mm_cvtepi32_ps(a: v); |
494 | vf = _mm_mul_ps(a: vf, b: va); |
495 | v = _mm_cvtps_epi32(a: vf); |
496 | v = _mm_packs_epi32(a: v, b: v); |
497 | v = _mm_insert_epi16(v, a, 3); |
498 | v = _mm_packus_epi16(a: v, b: v); |
499 | dst[i] = _mm_cvtsi128_si32(a: v); |
500 | } |
501 | #else |
502 | for (qsizetype i = 0; i < len; ++i) { |
503 | const int a = qAlpha(src[i]); |
504 | const float fa = a / (255.0f * 256.0f); |
505 | const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; |
506 | const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; |
507 | const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; |
508 | dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a); |
509 | } |
510 | #endif |
511 | } |
512 | |
513 | static void storeUnpremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
514 | const QColorTransformPrivate *d_ptr) |
515 | { |
516 | #if defined(__SSE2__) |
517 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
518 | for (qsizetype i = 0; i < len; ++i) { |
519 | const int a = qAlpha(rgb: src[i]); |
520 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
521 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
522 | const int ridx = _mm_extract_epi16(v, 0); |
523 | const int gidx = _mm_extract_epi16(v, 2); |
524 | const int bidx = _mm_extract_epi16(v, 4); |
525 | v = _mm_setzero_si128(); |
526 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 2); |
527 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); |
528 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
529 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
530 | v = _mm_srli_epi16(a: v, count: 8); |
531 | v = _mm_insert_epi16(v, a, 3); |
532 | v = _mm_packus_epi16(a: v, b: v); |
533 | dst[i] = _mm_cvtsi128_si32(a: v); |
534 | } |
535 | #else |
536 | for (qsizetype i = 0; i < len; ++i) { |
537 | const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); |
538 | const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); |
539 | const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z); |
540 | dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0); |
541 | } |
542 | #endif |
543 | } |
544 | |
545 | static void storeOpaque(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
546 | const QColorTransformPrivate *d_ptr) |
547 | { |
548 | Q_UNUSED(src); |
549 | #if defined(__SSE2__) |
550 | const __m128 v4080 = _mm_set1_ps(w: 4080.f); |
551 | for (qsizetype i = 0; i < len; ++i) { |
552 | __m128 vf = _mm_loadu_ps(p: &buffer[i].x); |
553 | __m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080)); |
554 | const int ridx = _mm_extract_epi16(v, 0); |
555 | const int gidx = _mm_extract_epi16(v, 2); |
556 | const int bidx = _mm_extract_epi16(v, 4); |
557 | v = _mm_setzero_si128(); |
558 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 2); |
559 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); |
560 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
561 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
562 | v = _mm_srli_epi16(a: v, count: 8); |
563 | v = _mm_insert_epi16(v, 255, 3); |
564 | v = _mm_packus_epi16(a: v, b: v); |
565 | dst[i] = _mm_cvtsi128_si32(a: v); |
566 | } |
567 | #else |
568 | for (qsizetype i = 0; i < len; ++i) { |
569 | const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); |
570 | const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); |
571 | const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z); |
572 | dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0); |
573 | } |
574 | #endif |
575 | } |
576 | |
577 | static void storePremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
578 | const QColorTransformPrivate *d_ptr) |
579 | { |
580 | for (qsizetype i = 0; i < len; ++i) { |
581 | const int a = src[i].alpha(); |
582 | const float fa = a / (255.0f * 256.0f); |
583 | const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; |
584 | const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; |
585 | const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; |
586 | dst[i] = qRgba64(r: r * fa + 0.5f, g: g * fa + 0.5f, b: b * fa + 0.5f, a); |
587 | } |
588 | } |
589 | |
590 | static void storeUnpremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
591 | const QColorTransformPrivate *d_ptr) |
592 | { |
593 | for (qsizetype i = 0; i < len; ++i) { |
594 | const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(f: buffer[i].x); |
595 | const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(f: buffer[i].y); |
596 | const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(f: buffer[i].z); |
597 | dst[i] = qRgba64(r, g, b, a: src[i].alpha()); |
598 | } |
599 | } |
600 | |
601 | static void storeOpaque(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
602 | const QColorTransformPrivate *d_ptr) |
603 | { |
604 | Q_UNUSED(src); |
605 | for (qsizetype i = 0; i < len; ++i) { |
606 | const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(f: buffer[i].x); |
607 | const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(f: buffer[i].y); |
608 | const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(f: buffer[i].z); |
609 | dst[i] = qRgba64(r, g, b, a: 0xFFFF); |
610 | } |
611 | } |
612 | |
613 | static constexpr qsizetype WorkBlockSize = 256; |
614 | |
615 | template <typename T, int Count = 1> |
616 | class QUninitialized |
617 | { |
618 | public: |
619 | operator T*() { return reinterpret_cast<T *>(this); } |
620 | private: |
621 | alignas(T) char data[sizeof(T) * Count]; |
622 | }; |
623 | |
624 | template<typename T> |
625 | void QColorTransformPrivate::apply(T *dst, const T *src, qsizetype count, TransformFlags flags) const |
626 | { |
627 | if (!colorMatrix.isValid()) |
628 | return; |
629 | |
630 | updateLutsIn(); |
631 | updateLutsOut(); |
632 | |
633 | bool doApplyMatrix = (colorMatrix != QColorMatrix::identity()); |
634 | |
635 | QUninitialized<QColorVector, WorkBlockSize> buffer; |
636 | |
637 | qsizetype i = 0; |
638 | while (i < count) { |
639 | const qsizetype len = qMin(a: count - i, b: WorkBlockSize); |
640 | if (flags & InputPremultiplied) |
641 | loadPremultiplied(buffer, src + i, len, this); |
642 | else |
643 | loadUnpremultiplied(buffer, src + i, len, this); |
644 | |
645 | if (doApplyMatrix) |
646 | applyMatrix(buffer, len, colorMatrix); |
647 | |
648 | if (flags & InputOpaque) |
649 | storeOpaque(dst + i, src + i, buffer, len, this); |
650 | else if (flags & OutputPremultiplied) |
651 | storePremultiplied(dst + i, src + i, buffer, len, this); |
652 | else |
653 | storeUnpremultiplied(dst + i, src + i, buffer, len, this); |
654 | |
655 | i += len; |
656 | } |
657 | } |
658 | |
659 | /*! |
660 | \internal |
661 | \enum QColorTransformPrivate::TransformFlag |
662 | |
663 | Defines how the transform is to be applied. |
664 | |
665 | \value Unpremultiplied The input and output should both be unpremultiplied. |
666 | \value InputOpaque The input is guaranteed to be opaque. |
667 | \value InputPremultiplied The input is premultiplied. |
668 | \value OutputPremultiplied The output should be premultiplied. |
669 | \value Premultiplied Both input and output should both be premultiplied. |
670 | */ |
671 | |
672 | /*! |
673 | \internal |
674 | Prepares a color transformation for fast application. You do not need to |
675 | call this explicitly as it will be called implicitly on the first transforms, but |
676 | if you want predictable performance on the first transforms, you can perform it |
677 | in advance. |
678 | |
679 | \sa QColorTransform::map(), apply() |
680 | */ |
681 | void QColorTransformPrivate::prepare() |
682 | { |
683 | updateLutsIn(); |
684 | updateLutsOut(); |
685 | } |
686 | |
687 | /*! |
688 | \internal |
689 | Applies the color transformation on \a count QRgb pixels starting from |
690 | \a src and stores the result in \a dst. |
691 | |
692 | Thread-safe if prepare() has been called first. |
693 | |
694 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
695 | |
696 | \sa prepare() |
697 | */ |
698 | void QColorTransformPrivate::apply(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const |
699 | { |
700 | apply<QRgb>(dst, src, count, flags); |
701 | } |
702 | |
703 | /*! |
704 | \internal |
705 | Applies the color transformation on \a count QRgba64 pixels starting from |
706 | \a src and stores the result in \a dst. |
707 | |
708 | Thread-safe if prepare() has been called first. |
709 | |
710 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
711 | |
712 | \sa prepare() |
713 | */ |
714 | void QColorTransformPrivate::apply(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const |
715 | { |
716 | apply<QRgba64>(dst, src, count, flags); |
717 | } |
718 | |
719 | |
720 | QT_END_NAMESPACE |
721 | |