qcolortransform.cpp source code [qtbase/src/gui/painting/qcolortransform.cpp]

1	// Copyright (C) 2022 The Qt Company Ltd.
2	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4	#include "qcolortransform.h"
5	#include "qcolortransform_p.h"
6
7	#include "qcolormatrix_p.h"
8	#include "qcolorspace_p.h"
9	#include "qcolortrc_p.h"
10	#include "qcolortrclut_p.h"
11
12	#include <QtCore/qatomic.h>
13	#include <QtCore/qmath.h>
14	#include <QtGui/qcolor.h>
15	#include <QtGui/qimage.h>
16	#include <QtGui/qtransform.h>
17	#include <QtCore/private/qsimd_p.h>
18
19	#include <qdebug.h>
20
21	QT_BEGIN_NAMESPACE
22
23	std::shared_ptr<QColorTrcLut> lutFromTrc(const QColorTrc &trc)
24	{
25	if (trc.m_type == QColorTrc::Type::Table)
26	return QColorTrcLut::fromTransferTable(transTable: trc.m_table);
27	if (trc.m_type == QColorTrc::Type::Function)
28	return QColorTrcLut::fromTransferFunction(transfn: trc.m_fun);
29	qWarning() << "TRC uninitialized";
30	return nullptr;
31	}
32
33	void QColorTransformPrivate::updateLutsIn() const
34	{
35	if (colorSpaceIn ->lut.generated.loadAcquire())
36	return;
37	QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock);
38	if (colorSpaceIn ->lut.generated.loadRelaxed())
39	return;
40
41	for (int i = `0`; i < `3`; ++i) {
42	if (!colorSpaceIn ->trc[i].isValid())
43	return;
44	}
45
46	if (colorSpaceIn ->trc[`0`] == colorSpaceIn ->trc[`1`] && colorSpaceIn ->trc[`0`] == colorSpaceIn ->trc[`2`]) {
47	colorSpaceIn ->lut [`0`] = lutFromTrc(trc: colorSpaceIn ->trc[`0`]);
48	colorSpaceIn ->lut [`1`] = colorSpaceIn ->lut [`0`];
49	colorSpaceIn ->lut [`2`] = colorSpaceIn ->lut [`0`];
50	} else {
51	for (int i = `0`; i < `3`; ++i)
52	colorSpaceIn ->lut [i] = lutFromTrc(trc: colorSpaceIn ->trc[i]);
53	}
54
55	colorSpaceIn ->lut.generated.storeRelease(newValue: `1`);
56	}
57
58	void QColorTransformPrivate::updateLutsOut() const
59	{
60	if (colorSpaceOut ->lut.generated.loadAcquire())
61	return;
62	QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock);
63	if (colorSpaceOut ->lut.generated.loadRelaxed())
64	return;
65	for (int i = `0`; i < `3`; ++i) {
66	if (!colorSpaceOut ->trc[i].isValid())
67	return;
68	}
69
70	if (colorSpaceOut ->trc[`0`] == colorSpaceOut ->trc[`1`] && colorSpaceOut ->trc[`0`] == colorSpaceOut ->trc[`2`]) {
71	colorSpaceOut ->lut [`0`] = lutFromTrc(trc: colorSpaceOut ->trc[`0`]);
72	colorSpaceOut ->lut [`1`] = colorSpaceOut ->lut [`0`];
73	colorSpaceOut ->lut [`2`] = colorSpaceOut ->lut [`0`];
74	} else {
75	for (int i = `0`; i < `3`; ++i)
76	colorSpaceOut ->lut [i] = lutFromTrc(trc: colorSpaceOut ->trc[i]);
77	}
78
79	colorSpaceOut ->lut.generated.storeRelease(newValue: `1`);
80	}
81
82	/!*
83	\class QColorTransform
84	\brief The QColorTransform class is a transformation between color spaces.
85	\since 5.14
86
87	\ingroup painting
88	\ingroup appearance
89	\inmodule QtGui
90
91	QColorTransform is an instantiation of a transformation between color spaces.
92	It can be applied on color and pixels to convert them from one color space to
93	another.
94
95	Setting up a QColorTransform takes some preprocessing, so keeping around
96	QColorTransforms that you need often is recommended, instead of generating
97	them on the fly.
98	*/
99
100
101	QColorTransform::QColorTransform(const QColorTransform &colorTransform) noexcept = default;
102
103	QColorTransform::~QColorTransform() = default;
104
105	QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QColorTransformPrivate)
106
107	/!*
108	\since 6.4
109	Returns true if the color transform is the identity transform.
110	*/
111	bool QColorTransform::isIdentity() const noexcept
112	{
113	return !d \|\| d ->isIdentity();
114	}
115
116	/!*
117	\fn bool QColorTransform::operator==(const QColorTransform &ct1, const QColorTransform &ct2)
118	\since 6.4
119	Returns true if \a ct1 defines the same color transformation as \a ct2.
120	*/
121
122	/!*
123	\fn bool QColorTransform::operator!=(const QColorTransform &ct1, const QColorTransform &ct2)
124	\since 6.4
125	Returns true if \a ct1 does not define the same transformation as \a ct2.
126	*/
127
128	/! \internal*
129	*/
130	bool QColorTransform::compare(const QColorTransform &other) const
131	{
132	if (d == other.d)
133	return true;
134	if (bool(d) != bool(other.d))
135	return d ? d ->isIdentity() : other.d ->isIdentity();
136	if (d ->colorMatrix != other.d ->colorMatrix)
137	return false;
138	if (bool(d ->colorSpaceIn) != bool(other.d ->colorSpaceIn))
139	return false;
140	if (bool(d ->colorSpaceOut) != bool(other.d ->colorSpaceOut))
141	return false;
142	for (int i = `0`; i < `3`; ++i) {
143	if (d ->colorSpaceIn && d ->colorSpaceIn ->trc[i] != other.d ->colorSpaceIn ->trc[i])
144	return false;
145	if (d ->colorSpaceOut && d ->colorSpaceOut ->trc[i] != other.d ->colorSpaceOut ->trc[i])
146	return false;
147	}
148	return true;
149	}
150
151	/!*
152	Applies the color transformation on the QRgb value \a argb.
153
154	The input should be opaque or unpremultiplied.
155	*/
156	QRgb QColorTransform::map(QRgb argb) const
157	{
158	if (!d)
159	return argb;
160	constexpr float f = `1.0f` / `255.0f`;
161	QColorVector c = { qRed(rgb: argb) * f, qGreen(rgb: argb) * f, qBlue(rgb: argb) * f };
162	if (d ->colorSpaceIn ->lut.generated.loadAcquire()) {
163	c.x = d ->colorSpaceIn ->lut [`0`]->toLinear(f: c.x);
164	c.y = d ->colorSpaceIn ->lut [`1`]->toLinear(f: c.y);
165	c.z = d ->colorSpaceIn ->lut [`2`]->toLinear(f: c.z);
166	} else {
167	c.x = d ->colorSpaceIn ->trc[`0`].apply(x: c.x);
168	c.y = d ->colorSpaceIn ->trc[`1`].apply(x: c.y);
169	c.z = d ->colorSpaceIn ->trc[`2`].apply(x: c.z);
170	}
171	c = d ->colorMatrix.map(c);
172	c.x = std::max(a: `0.0f`, b: std::min(a: `1.0f`, b: c.x));
173	c.y = std::max(a: `0.0f`, b: std::min(a: `1.0f`, b: c.y));
174	c.z = std::max(a: `0.0f`, b: std::min(a: `1.0f`, b: c.z));
175	if (d ->colorSpaceOut ->lut.generated.loadAcquire()) {
176	c.x = d ->colorSpaceOut ->lut [`0`]->fromLinear(f: c.x);
177	c.y = d ->colorSpaceOut ->lut [`1`]->fromLinear(f: c.y);
178	c.z = d ->colorSpaceOut ->lut [`2`]->fromLinear(f: c.z);
179	} else {
180	c.x = d ->colorSpaceOut ->trc[`0`].applyInverse(x: c.x);
181	c.y = d ->colorSpaceOut ->trc[`1`].applyInverse(x: c.y);
182	c.z = d ->colorSpaceOut ->trc[`2`].applyInverse(x: c.z);
183	}
184
185	return qRgba(r: c.x * `255` + `0.5f`, g: c.y * `255` + `0.5f`, b: c.z * `255` + `0.5f`, a: qAlpha(rgb: argb));
186	}
187
188	/!*
189	Applies the color transformation on the QRgba64 value \a rgba64.
190
191	The input should be opaque or unpremultiplied.
192	*/
193	QRgba64 QColorTransform::map(QRgba64 rgba64) const
194	{
195	if (!d)
196	return rgba64;
197	constexpr float f = `1.0f` / `65535.0f`;
198	QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f };
199	if (d ->colorSpaceIn ->lut.generated.loadAcquire()) {
200	c.x = d ->colorSpaceIn ->lut [`0`]->toLinear(f: c.x);
201	c.y = d ->colorSpaceIn ->lut [`1`]->toLinear(f: c.y);
202	c.z = d ->colorSpaceIn ->lut [`2`]->toLinear(f: c.z);
203	} else {
204	c.x = d ->colorSpaceIn ->trc[`0`].apply(x: c.x);
205	c.y = d ->colorSpaceIn ->trc[`1`].apply(x: c.y);
206	c.z = d ->colorSpaceIn ->trc[`2`].apply(x: c.z);
207	}
208	c = d ->colorMatrix.map(c);
209	c.x = std::max(a: `0.0f`, b: std::min(a: `1.0f`, b: c.x));
210	c.y = std::max(a: `0.0f`, b: std::min(a: `1.0f`, b: c.y));
211	c.z = std::max(a: `0.0f`, b: std::min(a: `1.0f`, b: c.z));
212	if (d ->colorSpaceOut ->lut.generated.loadAcquire()) {
213	c.x = d ->colorSpaceOut ->lut [`0`]->fromLinear(f: c.x);
214	c.y = d ->colorSpaceOut ->lut [`1`]->fromLinear(f: c.y);
215	c.z = d ->colorSpaceOut ->lut [`2`]->fromLinear(f: c.z);
216	} else {
217	c.x = d ->colorSpaceOut ->trc[`0`].applyInverse(x: c.x);
218	c.y = d ->colorSpaceOut ->trc[`1`].applyInverse(x: c.y);
219	c.z = d ->colorSpaceOut ->trc[`2`].applyInverse(x: c.z);
220	}
221
222	return QRgba64::fromRgba64(red: c.x * `65535.f` + `0.5f`, green: c.y * `65535.f` + `0.5f`, blue: c.z * `65535.f` + `0.5f`, alpha: rgba64.alpha());
223	}
224
225	/!*
226	Applies the color transformation on the QRgbaFloat16 value \a rgbafp16.
227
228	The input should be opaque or unpremultiplied.
229	\since 6.4
230	*/
231	QRgbaFloat16 QColorTransform::map(QRgbaFloat16 rgbafp16) const
232	{
233	if (!d)
234	return rgbafp16;
235	QColorVector c;
236	c.x = d ->colorSpaceIn ->trc[`0`].applyExtended(x: rgbafp16.r);
237	c.y = d ->colorSpaceIn ->trc[`1`].applyExtended(x: rgbafp16.g);
238	c.z = d ->colorSpaceIn ->trc[`2`].applyExtended(x: rgbafp16.b);
239	c = d ->colorMatrix.map(c);
240	rgbafp16.r = qfloat16 (d ->colorSpaceOut ->trc[`0`].applyInverseExtended(x: c.x));
241	rgbafp16.g = qfloat16 (d ->colorSpaceOut ->trc[`1`].applyInverseExtended(x: c.y));
242	rgbafp16.b = qfloat16 (d ->colorSpaceOut ->trc[`2`].applyInverseExtended(x: c.z));
243	return rgbafp16;
244	}
245
246	/!*
247	Applies the color transformation on the QRgbaFloat32 value \a rgbafp32.
248
249	The input should be opaque or unpremultiplied.
250	\since 6.4
251	*/
252	QRgbaFloat32 QColorTransform::map(QRgbaFloat32 rgbafp32) const
253	{
254	if (!d)
255	return rgbafp32;
256	QColorVector c;
257	c.x = d ->colorSpaceIn ->trc[`0`].applyExtended(x: rgbafp32.r);
258	c.y = d ->colorSpaceIn ->trc[`1`].applyExtended(x: rgbafp32.g);
259	c.z = d ->colorSpaceIn ->trc[`2`].applyExtended(x: rgbafp32.b);
260	c = d ->colorMatrix.map(c);
261	rgbafp32.r = d ->colorSpaceOut ->trc[`0`].applyInverseExtended(x: c.x);
262	rgbafp32.g = d ->colorSpaceOut ->trc[`1`].applyInverseExtended(x: c.y);
263	rgbafp32.b = d ->colorSpaceOut ->trc[`2`].applyInverseExtended(x: c.z);
264	return rgbafp32;
265	}
266
267	/!*
268	Applies the color transformation on the QColor value \a color.
269
270	*/
271	QColor QColorTransform::map(const QColor &color) const
272	{
273	if (!d)
274	return color;
275	QColor clr = color;
276	if (color.spec() != QColor::ExtendedRgb \|\| color.spec() != QColor::Rgb)
277	clr = clr.toRgb();
278
279	QColorVector c = { (float)clr.redF(), (float)clr.greenF(), (float)clr.blueF() };
280	if (clr.spec() == QColor::ExtendedRgb) {
281	c.x = d ->colorSpaceIn ->trc[`0`].applyExtended(x: c.x);
282	c.y = d ->colorSpaceIn ->trc[`1`].applyExtended(x: c.y);
283	c.z = d ->colorSpaceIn ->trc[`2`].applyExtended(x: c.z);
284	} else {
285	c.x = d ->colorSpaceIn ->trc[`0`].apply(x: c.x);
286	c.y = d ->colorSpaceIn ->trc[`1`].apply(x: c.y);
287	c.z = d ->colorSpaceIn ->trc[`2`].apply(x: c.z);
288	}
289	c = d ->colorMatrix.map(c);
290	bool inGamut = c.x >= `0.0f` && c.x <= `1.0f` && c.y >= `0.0f` && c.y <= `1.0f` && c.z >= `0.0f` && c.z <= `1.0f`;
291	if (inGamut) {
292	if (d ->colorSpaceOut ->lut.generated.loadAcquire()) {
293	c.x = d ->colorSpaceOut ->lut [`0`]->fromLinear(f: c.x);
294	c.y = d ->colorSpaceOut ->lut [`1`]->fromLinear(f: c.y);
295	c.z = d ->colorSpaceOut ->lut [`2`]->fromLinear(f: c.z);
296	} else {
297	c.x = d ->colorSpaceOut ->trc[`0`].applyInverse(x: c.x);
298	c.y = d ->colorSpaceOut ->trc[`1`].applyInverse(x: c.y);
299	c.z = d ->colorSpaceOut ->trc[`2`].applyInverse(x: c.z);
300	}
301	} else {
302	c.x = d ->colorSpaceOut ->trc[`0`].applyInverseExtended(x: c.x);
303	c.y = d ->colorSpaceOut ->trc[`1`].applyInverseExtended(x: c.y);
304	c.z = d ->colorSpaceOut ->trc[`2`].applyInverseExtended(x: c.z);
305	}
306	QColor out;
307	out.setRgbF(r: c.x, g: c.y, b: c.z, a: color.alphaF());
308	return out;
309	}
310
311	// Optimized sub-routines for fast block based conversion:
312
313	template<bool DoClamp = true>
314	static void applyMatrix(QColorVector buffer, const* qsizetype len, const QColorMatrix &colorMatrix)
315	{
316	#if defined(__SSE2__)
317	const __m128 minV = _mm_set1_ps(w: `0.0f`);
318	const __m128 maxV = _mm_set1_ps(w: `1.0f`);
319	const __m128 xMat = _mm_loadu_ps(p: &colorMatrix.r.x);
320	const __m128 yMat = _mm_loadu_ps(p: &colorMatrix.g.x);
321	const __m128 zMat = _mm_loadu_ps(p: &colorMatrix.b.x);
322	for (qsizetype j = `0`; j < len; ++j) {
323	__m128 c = _mm_loadu_ps(p: &buffer[j].x);
324	__m128 cx = _mm_shuffle_ps(c, c, _MM_SHUFFLE(`0`, `0`, `0`, `0`));
325	__m128 cy = _mm_shuffle_ps(c, c, _MM_SHUFFLE(`1`, `1`, `1`, `1`));
326	__m128 cz = _mm_shuffle_ps(c, c, _MM_SHUFFLE(`2`, `2`, `2`, `2`));
327	cx = _mm_mul_ps(a: cx, b: xMat);
328	cy = _mm_mul_ps(a: cy, b: yMat);
329	cz = _mm_mul_ps(a: cz, b: zMat);
330	cx = _mm_add_ps(a: cx, b: cy);
331	cx = _mm_add_ps(a: cx, b: cz);
332	// Clamp:
333	if (DoClamp) {
334	cx = _mm_min_ps(a: cx, b: maxV);
335	cx = _mm_max_ps(a: cx, b: minV);
336	}
337	_mm_storeu_ps(p: &buffer[j].x, a: cx);
338	}
339	#elif defined(__ARM_NEON__)
340	const float32x4_t minV = vdupq_n_f32(`0.0f`);
341	const float32x4_t maxV = vdupq_n_f32(`1.0f`);
342	const float32x4_t xMat = vld1q_f32(&colorMatrix.r.x);
343	const float32x4_t yMat = vld1q_f32(&colorMatrix.g.x);
344	const float32x4_t zMat = vld1q_f32(&colorMatrix.b.x);
345	for (qsizetype j = `0`; j < len; ++j) {
346	float32x4_t c = vld1q_f32(&buffer[j].x);
347	float32x4_t cx = vmulq_n_f32(xMat, vgetq_lane_f32(c, `0`));
348	float32x4_t cy = vmulq_n_f32(yMat, vgetq_lane_f32(c, `1`));
349	float32x4_t cz = vmulq_n_f32(zMat, vgetq_lane_f32(c, `2`));
350	cx = vaddq_f32(cx, cy);
351	cx = vaddq_f32(cx, cz);
352	// Clamp:
353	if (DoClamp) {
354	cx = vminq_f32(cx, maxV);
355	cx = vmaxq_f32(cx, minV);
356	}
357	vst1q_f32(&buffer[j].x, cx);
358	}
359	#else
360	for (int j = `0`; j < len; ++j) {
361	const QColorVector cv = colorMatrix.map(buffer[j]);
362	if (DoClamp) {
363	buffer[j].x = std::max(`0.0f`, std::min(`1.0f`, cv.x));
364	buffer[j].y = std::max(`0.0f`, std::min(`1.0f`, cv.y));
365	buffer[j].z = std::max(`0.0f`, std::min(`1.0f`, cv.z));
366	} else {
367	buffer[j] = cv;
368	}
369	}
370	#endif
371	}
372
373	#if defined(__SSE2__) \|\| defined(__ARM_NEON__)
374	template<typename T>
375	static constexpr inline bool isArgb();
376	template<>
377	constexpr inline bool isArgb<QRgb>() { return true; }
378	template<>
379	constexpr inline bool isArgb<QRgba64>() { return false; }
380
381	template<typename T>
382	static inline int getAlpha(const T &p);
383	template<>
384	inline int getAlpha<QRgb>(const QRgb &p)
385	{ return qAlpha(rgb: p); }
386	template<>
387	inline int getAlpha<QRgba64>(const QRgba64 &p)
388	{ return p.alpha(); }
389	#endif
390
391	template<typename T>
392	static void loadPremultiplied(QColorVector buffer, const* T src, const* qsizetype len, const QColorTransformPrivate *d_ptr);
393	template<typename T>
394	static void loadUnpremultiplied(QColorVector buffer, const* T src, const* qsizetype len, const QColorTransformPrivate *d_ptr);
395
396	#if defined(__SSE2__)
397	// Load to [0-alpha] in 4x32 SIMD
398	template<typename T>
399	static inline void loadP(const T &p, __m128i &v);
400
401	template<>
402	inline void loadP<QRgb>(const QRgb &p, __m128i &v)
403	{
404	v = _mm_cvtsi32_si128(a: p);
405	#if defined(__SSE4_1__)
406	v = _mm_cvtepu8_epi32(v);
407	#else
408	v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128());
409	v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128());
410	#endif
411	}
412
413	template<>
414	inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v)
415	{
416	v = _mm_loadl_epi64(p: (const __m128i *)&p);
417	#if defined(__SSE4_1__)
418	v = _mm_cvtepu16_epi32(v);
419	#else
420	v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128());
421	#endif
422	}
423
424	template<typename T>
425	static void loadPremultiplied(QColorVector buffer, const* T src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
426	{
427	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
428	const __m128 iFF00 = _mm_set1_ps(w: `1.0f` / (`255` * `256`));
429	constexpr bool isARGB = isArgb<T>();
430	for (qsizetype i = `0`; i < len; ++i) {
431	__m128i v;
432	loadP<T>(src[i], v);
433	__m128 vf = _mm_cvtepi32_ps(a: v);
434	// Approximate 1/a:
435	__m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(`3`, `3`, `3`, `3`));
436	__m128 via = _mm_rcp_ps(a: va);
437	via = _mm_sub_ps(a: _mm_add_ps(a: via, b: via), b: _mm_mul_ps(a: via, b: _mm_mul_ps(a: via, b: va)));
438	// v (1/a)*
439	vf = _mm_mul_ps(a: vf, b: via);
440
441	// Handle zero alpha
442	__m128 vAlphaMask = _mm_cmpeq_ps(a: va, b: _mm_set1_ps(w: `0.0f`));
443	vf = _mm_andnot_ps(a: vAlphaMask, b: vf);
444
445	// LUT
446	v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
447	const int ridx = isARGB ? _mm_extract_epi16(v, `4`) : _mm_extract_epi16(v, `0`);
448	const int gidx = _mm_extract_epi16(v, `2`);
449	const int bidx = isARGB ? _mm_extract_epi16(v, `0`) : _mm_extract_epi16(v, `4`);
450	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`0`]->m_toLinear[ridx], `0`);
451	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`1`]->m_toLinear[gidx], `2`);
452	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`2`]->m_toLinear[bidx], `4`);
453	vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00);
454
455	_mm_storeu_ps(p: &buffer[i].x, a: vf);
456	}
457	}
458
459	template<>
460	void loadPremultiplied<QRgbaFloat32>(QColorVector buffer, const* QRgbaFloat32 src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
461	{
462	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
463	const __m128 viFF00 = _mm_set1_ps(w: `1.0f` / (`255` * `256`));
464	const __m128 vZero = _mm_set1_ps(w: `0.0f`);
465	const __m128 vOne = _mm_set1_ps(w: `1.0f`);
466	for (qsizetype i = `0`; i < len; ++i) {
467	__m128 vf = _mm_loadu_ps(p: &src[i].r);
468	// Approximate 1/a:
469	__m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(`3`, `3`, `3`, `3`));
470	__m128 via = _mm_rcp_ps(a: va);
471	via = _mm_sub_ps(a: _mm_add_ps(a: via, b: via), b: _mm_mul_ps(a: via, b: _mm_mul_ps(a: via, b: va)));
472	// v (1/a)*
473	vf = _mm_mul_ps(a: vf, b: via);
474
475	// Handle zero alpha
476	__m128 vAlphaMask = _mm_cmpeq_ps(a: va, b: vZero);
477	vf = _mm_andnot_ps(a: vAlphaMask, b: vf);
478
479	// LUT
480	const __m128 under = _mm_cmplt_ps(a: vf, b: vZero);
481	const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne);
482	if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == `0`) {
483	// Within gamut
484	__m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
485	const int ridx = _mm_extract_epi16(v, `0`);
486	const int gidx = _mm_extract_epi16(v, `2`);
487	const int bidx = _mm_extract_epi16(v, `4`);
488	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`0`]->m_toLinear[ridx], `0`);
489	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`1`]->m_toLinear[gidx], `2`);
490	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`2`]->m_toLinear[bidx], `4`);
491	vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: viFF00);
492	_mm_storeu_ps(p: &buffer[i].x, a: vf);
493	} else {
494	// Outside 0.0->1.0 gamut
495	_mm_storeu_ps(p: &buffer[i].x, a: vf);
496	buffer[i].x = d_ptr->colorSpaceIn ->trc[`0`].applyExtended(x: buffer[i].x);
497	buffer[i].y = d_ptr->colorSpaceIn ->trc[`1`].applyExtended(x: buffer[i].y);
498	buffer[i].z = d_ptr->colorSpaceIn ->trc[`2`].applyExtended(x: buffer[i].z);
499	}
500	}
501	}
502
503	// Load to [0-4080] in 4x32 SIMD
504	template<typename T>
505	static inline void loadPU(const T &p, __m128i &v);
506
507	template<>
508	inline void loadPU<QRgb>(const QRgb &p, __m128i &v)
509	{
510	v = _mm_cvtsi32_si128(a: p);
511	#if defined(__SSE4_1__)
512	v = _mm_cvtepu8_epi32(v);
513	#else
514	v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128());
515	v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128());
516	#endif
517	v = _mm_slli_epi32(a: v, count: `4`);
518	}
519
520	template<>
521	inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v)
522	{
523	v = _mm_loadl_epi64(p: (const __m128i *)&p);
524	v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: `8`));
525	#if defined(__SSE4_1__)
526	v = _mm_cvtepu16_epi32(v);
527	#else
528	v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128());
529	#endif
530	v = _mm_srli_epi32(a: v, count: `4`);
531	}
532
533	template<typename T>
534	void loadUnpremultiplied(QColorVector buffer, const* T src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
535	{
536	constexpr bool isARGB = isArgb<T>();
537	const __m128 iFF00 = _mm_set1_ps(w: `1.0f` / (`255` * `256`));
538	for (qsizetype i = `0`; i < len; ++i) {
539	__m128i v;
540	loadPU<T>(src[i], v);
541	const int ridx = isARGB ? _mm_extract_epi16(v, `4`) : _mm_extract_epi16(v, `0`);
542	const int gidx = _mm_extract_epi16(v, `2`);
543	const int bidx = isARGB ? _mm_extract_epi16(v, `0`) : _mm_extract_epi16(v, `4`);
544	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`0`]->m_toLinear[ridx], `0`);
545	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`1`]->m_toLinear[gidx], `2`);
546	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`2`]->m_toLinear[bidx], `4`);
547	__m128 vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00);
548	_mm_storeu_ps(p: &buffer[i].x, a: vf);
549	}
550	}
551
552	template<>
553	void loadUnpremultiplied<QRgbaFloat32>(QColorVector buffer, const* QRgbaFloat32 src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
554	{
555	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
556	const __m128 iFF00 = _mm_set1_ps(w: `1.0f` / (`255` * `256`));
557	const __m128 vZero = _mm_set1_ps(w: `0.0f`);
558	const __m128 vOne = _mm_set1_ps(w: `1.0f`);
559	for (qsizetype i = `0`; i < len; ++i) {
560	__m128 vf = _mm_loadu_ps(p: &src[i].r);
561	const __m128 under = _mm_cmplt_ps(a: vf, b: vZero);
562	const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne);
563	if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == `0`) {
564	// Within gamut
565	__m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
566	const int ridx = _mm_extract_epi16(v, `0`);
567	const int gidx = _mm_extract_epi16(v, `2`);
568	const int bidx = _mm_extract_epi16(v, `4`);
569	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`0`]->m_toLinear[ridx], `0`);
570	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`1`]->m_toLinear[gidx], `2`);
571	v = _mm_insert_epi16(v, d_ptr->colorSpaceIn ->lut[`2`]->m_toLinear[bidx], `4`);
572	vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: iFF00);
573	_mm_storeu_ps(p: &buffer[i].x, a: vf);
574	} else {
575	// Outside 0.0->1.0 gamut
576	buffer[i].x = d_ptr->colorSpaceIn ->trc[`0`].applyExtended(x: src[i].r);
577	buffer[i].y = d_ptr->colorSpaceIn ->trc[`1`].applyExtended(x: src[i].g);
578	buffer[i].z = d_ptr->colorSpaceIn ->trc[`2`].applyExtended(x: src[i].b);
579	}
580	}
581	}
582
583	#elif defined(__ARM_NEON__)
584	// Load to [0-alpha] in 4x32 SIMD
585	template<typename T>
586	static inline void loadP(const T &p, uint32x4_t &v);
587
588	template<>
589	inline void loadP<QRgb>(const QRgb &p, uint32x4_t &v)
590	{
591	v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p)))));
592	}
593
594	template<>
595	inline void loadP<QRgba64>(const QRgba64 &p, uint32x4_t &v)
596	{
597	v = vmovl_u16(vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p))));
598	}
599
600	template<typename T>
601	static void loadPremultiplied(QColorVector buffer, const* T src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
602	{
603	constexpr bool isARGB = isArgb<T>();
604	const float iFF00 = `1.0f` / (`255` * `256`);
605	for (qsizetype i = `0`; i < len; ++i) {
606	uint32x4_t v;
607	loadP<T>(src[i], v);
608	float32x4_t vf = vcvtq_f32_u32(v);
609	// Approximate 1/a:
610	float32x4_t va = vdupq_n_f32(vgetq_lane_f32(vf, `3`));
611	float32x4_t via = vrecpeq_f32(va); // estimate 1/a
612	via = vmulq_f32(vrecpsq_f32(va, via), via);
613
614	// v (1/a)*
615	vf = vmulq_f32(vf, via);
616
617	// Handle zero alpha
618	#if defined(Q_PROCESSOR_ARM_64)
619	uint32x4_t vAlphaMask = vceqzq_f32(va);
620	#else
621	uint32x4_t vAlphaMask = vceqq_f32(va, vdupq_n_f32(`0.0`));
622	#endif
623	vf = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf), vAlphaMask));
624
625	// LUT
626	v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, `4080.f`), vdupq_n_f32(`0.5f`)));
627	const int ridx = isARGB ? vgetq_lane_u32(v, `2`) : vgetq_lane_u32(v, `0`);
628	const int gidx = vgetq_lane_u32(v, `1`);
629	const int bidx = isARGB ? vgetq_lane_u32(v, `0`) : vgetq_lane_u32(v, `2`);
630	v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[`0`]->m_toLinear[ridx], v, `0`);
631	v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[`1`]->m_toLinear[gidx], v, `1`);
632	v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[`2`]->m_toLinear[bidx], v, `2`);
633	vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00);
634
635	vst1q_f32(&buffer[i].x, vf);
636	}
637	}
638
639	// Load to [0-4080] in 4x32 SIMD
640	template<typename T>
641	static inline void loadPU(const T &p, uint32x4_t &v);
642
643	template<>
644	inline void loadPU<QRgb>(const QRgb &p, uint32x4_t &v)
645	{
646	v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p)))));
647	v = vshlq_n_u32(v, `4`);
648	}
649
650	template<>
651	inline void loadPU<QRgba64>(const QRgba64 &p, uint32x4_t &v)
652	{
653	uint16x4_t v16 = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p)));
654	v16 = vsub_u16(v16, vshr_n_u16(v16, `8`));
655	v = vmovl_u16(v16);
656	v = vshrq_n_u32(v, `4`);
657	}
658
659	template<typename T>
660	void loadUnpremultiplied(QColorVector buffer, const* T src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
661	{
662	constexpr bool isARGB = isArgb<T>();
663	const float iFF00 = `1.0f` / (`255` * `256`);
664	for (qsizetype i = `0`; i < len; ++i) {
665	uint32x4_t v;
666	loadPU<T>(src[i], v);
667	const int ridx = isARGB ? vgetq_lane_u32(v, `2`) : vgetq_lane_u32(v, `0`);
668	const int gidx = vgetq_lane_u32(v, `1`);
669	const int bidx = isARGB ? vgetq_lane_u32(v, `0`) : vgetq_lane_u32(v, `2`);
670	v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[`0`]->m_toLinear[ridx], v, `0`);
671	v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[`1`]->m_toLinear[gidx], v, `1`);
672	v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[`2`]->m_toLinear[bidx], v, `2`);
673	float32x4_t vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00);
674	vst1q_f32(&buffer[i].x, vf);
675	}
676	}
677	#else
678	template<>
679	void loadPremultiplied<QRgb>(QColorVector buffer, const* QRgb src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
680	{
681	for (qsizetype i = `0`; i < len; ++i) {
682	const uint p = src[i];
683	const int a = qAlpha(p);
684	if (a) {
685	const float ia = `4080.0f` / a;
686	const int ridx = int(qRed(p) * ia + `0.5f`);
687	const int gidx = int(qGreen(p) * ia + `0.5f`);
688	const int bidx = int(qBlue(p) * ia + `0.5f`);
689	buffer[i].x = d_ptr->colorSpaceIn->lut[`0`]->m_toLinear[ridx] * (`1.0f` / (`255` * `256`));
690	buffer[i].y = d_ptr->colorSpaceIn->lut[`1`]->m_toLinear[gidx] * (`1.0f` / (`255` * `256`));
691	buffer[i].z = d_ptr->colorSpaceIn->lut[`2`]->m_toLinear[bidx] * (`1.0f` / (`255` * `256`));
692	} else {
693	buffer[i].x = buffer[i].y = buffer[i].z = `0.0f`;
694	}
695	}
696	}
697
698	template<>
699	void loadPremultiplied<QRgba64>(QColorVector buffer, const* QRgba64 src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
700	{
701	for (qsizetype i = `0`; i < len; ++i) {
702	const QRgba64 &p = src[i];
703	const int a = p.alpha();
704	if (a) {
705	const float ia = `4080.0f` / a;
706	const int ridx = int(p.red() * ia + `0.5f`);
707	const int gidx = int(p.green() * ia + `0.5f`);
708	const int bidx = int(p.blue() * ia + `0.5f`);
709	buffer[i].x = d_ptr->colorSpaceIn->lut[`0`]->m_toLinear[ridx] * (`1.0f` / (`255` * `256`));
710	buffer[i].y = d_ptr->colorSpaceIn->lut[`1`]->m_toLinear[gidx] * (`1.0f` / (`255` * `256`));
711	buffer[i].z = d_ptr->colorSpaceIn->lut[`2`]->m_toLinear[bidx] * (`1.0f` / (`255` * `256`));
712	} else {
713	buffer[i].x = buffer[i].y = buffer[i].z = `0.0f`;
714	}
715	}
716	}
717
718	template<>
719	void loadUnpremultiplied<QRgb>(QColorVector buffer, const* QRgb src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
720	{
721	for (qsizetype i = `0`; i < len; ++i) {
722	const uint p = src[i];
723	buffer[i].x = d_ptr->colorSpaceIn->lut[`0`]->u8ToLinearF32(qRed(p));
724	buffer[i].y = d_ptr->colorSpaceIn->lut[`1`]->u8ToLinearF32(qGreen(p));
725	buffer[i].z = d_ptr->colorSpaceIn->lut[`2`]->u8ToLinearF32(qBlue(p));
726	}
727	}
728
729	template<>
730	void loadUnpremultiplied<QRgba64>(QColorVector buffer, const* QRgba64 src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
731	{
732	for (qsizetype i = `0`; i < len; ++i) {
733	const QRgba64 &p = src[i];
734	buffer[i].x = d_ptr->colorSpaceIn->lut[`0`]->u16ToLinearF32(p.red());
735	buffer[i].y = d_ptr->colorSpaceIn->lut[`1`]->u16ToLinearF32(p.green());
736	buffer[i].z = d_ptr->colorSpaceIn->lut[`2`]->u16ToLinearF32(p.blue());
737	}
738	}
739	#endif
740	#if !defined(__SSE2__)
741	template<>
742	void loadPremultiplied<QRgbaFloat32>(QColorVector buffer, const* QRgbaFloat32 src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
743	{
744	for (qsizetype i = `0`; i < len; ++i) {
745	const QRgbaFloat32 &p = src[i];
746	const float a = p.a;
747	if (a) {
748	const float ia = `1.0f` / a;
749	buffer[i].x = d_ptr->colorSpaceIn->trc[`0`].applyExtended(p.r * ia);
750	buffer[i].y = d_ptr->colorSpaceIn->trc[`1`].applyExtended(p.g * ia);
751	buffer[i].z = d_ptr->colorSpaceIn->trc[`2`].applyExtended(p.b * ia);
752	} else {
753	buffer[i].x = buffer[i].y = buffer[i].z = `0.0f`;
754	}
755	}
756	}
757
758	template<>
759	void loadUnpremultiplied<QRgbaFloat32>(QColorVector buffer, const* QRgbaFloat32 src, const* qsizetype len, const QColorTransformPrivate *d_ptr)
760	{
761	for (qsizetype i = `0`; i < len; ++i) {
762	const QRgbaFloat32 &p = src[i];
763	buffer[i].x = d_ptr->colorSpaceIn->trc[`0`].applyExtended(p.r);
764	buffer[i].y = d_ptr->colorSpaceIn->trc[`1`].applyExtended(p.g);
765	buffer[i].z = d_ptr->colorSpaceIn->trc[`2`].applyExtended(p.b);
766	}
767	}
768	#endif
769
770	#if defined(__SSE2__)
771	template<typename T>
772	static inline void storeP(T &p, __m128i &v, int a);
773	template<>
774	inline void storeP<QRgb>(QRgb &p, __m128i &v, int a)
775	{
776	v = _mm_packs_epi32(a: v, b: v);
777	v = _mm_insert_epi16(v, a, `3`);
778	p = _mm_cvtsi128_si32(a: _mm_packus_epi16(a: v, b: v));
779	}
780	template<>
781	inline void storeP<QRgba64>(QRgba64 &p, __m128i &v, int a)
782	{
783	#if defined(__SSE4_1__)
784	v = _mm_packus_epi32(v, v);
785	v = _mm_insert_epi16(v, a, `3`);
786	_mm_storel_epi64((__m128i *)&p, v);
787	#else
788	const int r = _mm_extract_epi16(v, `0`);
789	const int g = _mm_extract_epi16(v, `2`);
790	const int b = _mm_extract_epi16(v, `4`);
791	p = qRgba64(r, g, b, a);
792	#endif
793	}
794
795	template<typename T>
796	static void storePremultiplied(T dst, const* T src, const* QColorVector buffer, const* qsizetype len,
797	const QColorTransformPrivate *d_ptr)
798	{
799	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
800	const __m128 iFF00 = _mm_set1_ps(w: `1.0f` / (`255` * `256`));
801	constexpr bool isARGB = isArgb<T>();
802	for (qsizetype i = `0`; i < len; ++i) {
803	const int a = getAlpha<T>(src[i]);
804	__m128 vf = _mm_loadu_ps(p: &buffer[i].x);
805	__m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
806	__m128 va = _mm_mul_ps(a: _mm_set1_ps(w: a), b: iFF00);
807	const int ridx = _mm_extract_epi16(v, `0`);
808	const int gidx = _mm_extract_epi16(v, `2`);
809	const int bidx = _mm_extract_epi16(v, `4`);
810	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`0`]->m_fromLinear[ridx], isARGB ? `4` : `0`);
811	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`1`]->m_fromLinear[gidx], `2`);
812	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`2`]->m_fromLinear[bidx], isARGB ? `0` : `4`);
813	vf = _mm_cvtepi32_ps(a: v);
814	vf = _mm_mul_ps(a: vf, b: va);
815	v = _mm_cvtps_epi32(a: vf);
816	storeP<T>(dst[i], v, a);
817	}
818	}
819
820	template<>
821	void storePremultiplied<QRgbaFloat32>(QRgbaFloat32 dst, const* QRgbaFloat32 *src,
822	const QColorVector buffer, const* qsizetype len,
823	const QColorTransformPrivate *d_ptr)
824	{
825	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
826	const __m128 vZero = _mm_set1_ps(w: `0.0f`);
827	const __m128 vOne = _mm_set1_ps(w: `1.0f`);
828	const __m128 viFF00 = _mm_set1_ps(w: `1.0f` / (`255` * `256`));
829	for (qsizetype i = `0`; i < len; ++i) {
830	const float a = src[i].a;
831	__m128 va = _mm_set1_ps(w: a);
832	__m128 vf = _mm_loadu_ps(p: &buffer[i].x);
833	const __m128 under = _mm_cmplt_ps(a: vf, b: vZero);
834	const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne);
835	if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == `0`) {
836	// Within gamut
837	va = _mm_mul_ps(a: va, b: viFF00);
838	__m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
839	const int ridx = _mm_extract_epi16(v, `0`);
840	const int gidx = _mm_extract_epi16(v, `2`);
841	const int bidx = _mm_extract_epi16(v, `4`);
842	v = _mm_setzero_si128();
843	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`0`]->m_fromLinear[ridx], `0`);
844	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`1`]->m_fromLinear[gidx], `2`);
845	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`2`]->m_fromLinear[bidx], `4`);
846	vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: va);
847	_mm_store_ps(p: &dst[i].r, a: vf);
848	} else {
849	dst[i].r = d_ptr->colorSpaceOut ->trc[`0`].applyInverseExtended(x: buffer[i].x);
850	dst[i].g = d_ptr->colorSpaceOut ->trc[`1`].applyInverseExtended(x: buffer[i].y);
851	dst[i].b = d_ptr->colorSpaceOut ->trc[`2`].applyInverseExtended(x: buffer[i].z);
852	vf = _mm_mul_ps(a: _mm_load_ps(p: &dst[i].r), b: va);
853	_mm_store_ps(p: &dst[i].r, a: vf);
854	}
855	dst[i].a = a;
856	}
857	}
858
859	template<typename T>
860	static inline void storePU(T &p, __m128i &v, int a);
861	template<>
862	inline void storePU<QRgb>(QRgb &p, __m128i &v, int a)
863	{
864	v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: `0x80`));
865	v = _mm_srli_epi16(a: v, count: `8`);
866	v = _mm_insert_epi16(v, a, `3`);
867	p = _mm_cvtsi128_si32(a: _mm_packus_epi16(a: v, b: v));
868	}
869	template<>
870	inline void storePU<QRgba64>(QRgba64 &p, __m128i &v, int a)
871	{
872	v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: `8`));
873	v = _mm_insert_epi16(v, a, `3`);
874	_mm_storel_epi64(p: (__m128i *)&p, a: v);
875	}
876
877	template<typename T>
878	static void storeUnpremultiplied(T dst, const* T src, const* QColorVector buffer, const* qsizetype len,
879	const QColorTransformPrivate *d_ptr)
880	{
881	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
882	constexpr bool isARGB = isArgb<T>();
883	for (qsizetype i = `0`; i < len; ++i) {
884	const int a = getAlpha<T>(src[i]);
885	__m128 vf = _mm_loadu_ps(p: &buffer[i].x);
886	__m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
887	const int ridx = _mm_extract_epi16(v, `0`);
888	const int gidx = _mm_extract_epi16(v, `2`);
889	const int bidx = _mm_extract_epi16(v, `4`);
890	v = _mm_setzero_si128();
891	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`0`]->m_fromLinear[ridx], isARGB ? `2` : `0`);
892	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`1`]->m_fromLinear[gidx], `1`);
893	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`2`]->m_fromLinear[bidx], isARGB ? `0` : `2`);
894	storePU<T>(dst[i], v, a);
895	}
896	}
897
898	template<>
899	void storeUnpremultiplied<QRgbaFloat32>(QRgbaFloat32 dst, const* QRgbaFloat32 *src,
900	const QColorVector buffer, const* qsizetype len,
901	const QColorTransformPrivate *d_ptr)
902	{
903	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
904	const __m128 vZero = _mm_set1_ps(w: `0.0f`);
905	const __m128 vOne = _mm_set1_ps(w: `1.0f`);
906	const __m128 viFF00 = _mm_set1_ps(w: `1.0f` / (`255` * `256`));
907	for (qsizetype i = `0`; i < len; ++i) {
908	const float a = src[i].a;
909	__m128 vf = _mm_loadu_ps(p: &buffer[i].x);
910	const __m128 under = _mm_cmplt_ps(a: vf, b: vZero);
911	const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne);
912	if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == `0`) {
913	// Within gamut
914	__m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
915	const int ridx = _mm_extract_epi16(v, `0`);
916	const int gidx = _mm_extract_epi16(v, `2`);
917	const int bidx = _mm_extract_epi16(v, `4`);
918	v = _mm_setzero_si128();
919	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`0`]->m_fromLinear[ridx], `0`);
920	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`1`]->m_fromLinear[gidx], `2`);
921	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`2`]->m_fromLinear[bidx], `4`);
922	vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: viFF00);
923	_mm_storeu_ps(p: &dst[i].r, a: vf);
924	} else {
925	dst[i].r = d_ptr->colorSpaceOut ->trc[`0`].applyInverseExtended(x: buffer[i].x);
926	dst[i].g = d_ptr->colorSpaceOut ->trc[`1`].applyInverseExtended(x: buffer[i].y);
927	dst[i].b = d_ptr->colorSpaceOut ->trc[`2`].applyInverseExtended(x: buffer[i].z);
928	}
929	dst[i].a = a;
930	}
931	}
932
933	template<typename T>
934	static void storeOpaque(T dst, const* T src, const* QColorVector buffer, const* qsizetype len,
935	const QColorTransformPrivate *d_ptr)
936	{
937	Q_UNUSED(src);
938	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
939	constexpr bool isARGB = isArgb<T>();
940	for (qsizetype i = `0`; i < len; ++i) {
941	__m128 vf = _mm_loadu_ps(p: &buffer[i].x);
942	__m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
943	const int ridx = _mm_extract_epi16(v, `0`);
944	const int gidx = _mm_extract_epi16(v, `2`);
945	const int bidx = _mm_extract_epi16(v, `4`);
946	v = _mm_setzero_si128();
947	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`0`]->m_fromLinear[ridx], isARGB ? `2` : `0`);
948	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`1`]->m_fromLinear[gidx], `1`);
949	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`2`]->m_fromLinear[bidx], isARGB ? `0` : `2`);
950	storePU<T>(dst[i], v, isARGB ? `255` : `0xffff`);
951	}
952	}
953
954	template<>
955	void storeOpaque<QRgbaFloat32>(QRgbaFloat32 dst, const* QRgbaFloat32 *src,
956	const QColorVector buffer, const* qsizetype len,
957	const QColorTransformPrivate *d_ptr)
958	{
959	Q_UNUSED(src);
960	const __m128 v4080 = _mm_set1_ps(w: `4080.f`);
961	const __m128 vZero = _mm_set1_ps(w: `0.0f`);
962	const __m128 vOne = _mm_set1_ps(w: `1.0f`);
963	const __m128 viFF00 = _mm_set1_ps(w: `1.0f` / (`255` * `256`));
964	for (qsizetype i = `0`; i < len; ++i) {
965	__m128 vf = _mm_loadu_ps(p: &buffer[i].x);
966	const __m128 under = _mm_cmplt_ps(a: vf, b: vZero);
967	const __m128 over = _mm_cmpgt_ps(a: vf, b: vOne);
968	if (_mm_movemask_ps(a: _mm_or_ps(a: under, b: over)) == `0`) {
969	// Within gamut
970	__m128i v = _mm_cvtps_epi32(a: _mm_mul_ps(a: vf, b: v4080));
971	const int ridx = _mm_extract_epi16(v, `0`);
972	const int gidx = _mm_extract_epi16(v, `2`);
973	const int bidx = _mm_extract_epi16(v, `4`);
974	v = _mm_setzero_si128();
975	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`0`]->m_fromLinear[ridx], `0`);
976	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`1`]->m_fromLinear[gidx], `2`);
977	v = _mm_insert_epi16(v, d_ptr->colorSpaceOut ->lut[`2`]->m_fromLinear[bidx], `4`);
978	vf = _mm_mul_ps(a: _mm_cvtepi32_ps(a: v), b: viFF00);
979	_mm_store_ps(p: &dst[i].r, a: vf);
980	} else {
981	dst[i].r = d_ptr->colorSpaceOut ->trc[`0`].applyInverseExtended(x: buffer[i].x);
982	dst[i].g = d_ptr->colorSpaceOut ->trc[`1`].applyInverseExtended(x: buffer[i].y);
983	dst[i].b = d_ptr->colorSpaceOut ->trc[`2`].applyInverseExtended(x: buffer[i].z);
984	}
985	dst[i].a = `1.0f`;
986	}
987	}
988
989	#elif defined(__ARM_NEON__)
990	template<typename T>
991	static inline void storeP(T &p, const uint16x4_t &v);
992	template<>
993	inline void storeP<QRgb>(QRgb &p, const uint16x4_t &v)
994	{
995	p = vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(v, v))), `0`);
996	}
997	template<>
998	inline void storeP<QRgba64>(QRgba64 &p, const uint16x4_t &v)
999	{
1000	vst1_u16((uint16_t *)&p, v);
1001	}
1002
1003	template<typename T>
1004	static void storePremultiplied(T dst, const* T src, const* QColorVector buffer, const* qsizetype len,
1005	const QColorTransformPrivate *d_ptr)
1006	{
1007	const float iFF00 = `1.0f` / (`255` * `256`);
1008	constexpr bool isARGB = isArgb<T>();
1009	for (qsizetype i = `0`; i < len; ++i) {
1010	const int a = getAlpha<T>(src[i]);
1011	float32x4_t vf = vld1q_f32(&buffer[i].x);
1012	uint32x4_t v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, `4080.f`), vdupq_n_f32(`0.5f`)));
1013	const int ridx = vgetq_lane_u32(v, `0`);
1014	const int gidx = vgetq_lane_u32(v, `1`);
1015	const int bidx = vgetq_lane_u32(v, `2`);
1016	v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[`0`]->m_fromLinear[ridx], v, isARGB ? `2` : `0`);
1017	v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[`1`]->m_fromLinear[gidx], v, `1`);
1018	v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[`2`]->m_fromLinear[bidx], v, isARGB ? `0` : `2`);
1019	vf = vcvtq_f32_u32(v);
1020	vf = vmulq_n_f32(vf, a * iFF00);
1021	vf = vaddq_f32(vf, vdupq_n_f32(`0.5f`));
1022	v = vcvtq_u32_f32(vf);
1023	uint16x4_t v16 = vmovn_u32(v);
1024	v16 = vset_lane_u16(a, v16, `3`);
1025	storeP<T>(dst[i], v16);
1026	}
1027	}
1028
1029	template<typename T>
1030	static inline void storePU(T &p, uint16x4_t &v, int a);
1031	template<>
1032	inline void storePU<QRgb>(QRgb &p, uint16x4_t &v, int a)
1033	{
1034	v = vadd_u16(v, vdup_n_u16(`0x80`));
1035	v = vshr_n_u16(v, `8`);
1036	v = vset_lane_u16(a, v, `3`);
1037	p = vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(v, v))), `0`);
1038	}
1039	template<>
1040	inline void storePU<QRgba64>(QRgba64 &p, uint16x4_t &v, int a)
1041	{
1042	v = vadd_u16(v, vshr_n_u16(v, `8`));
1043	v = vset_lane_u16(a, v, `3`);
1044	vst1_u16((uint16_t *)&p, v);
1045	}
1046
1047	template<typename T>
1048	static void storeUnpremultiplied(T dst, const* T src, const* QColorVector buffer, const* qsizetype len,
1049	const QColorTransformPrivate *d_ptr)
1050	{
1051	constexpr bool isARGB = isArgb<T>();
1052	for (qsizetype i = `0`; i < len; ++i) {
1053	const int a = getAlpha<T>(src[i]);
1054	float32x4_t vf = vld1q_f32(&buffer[i].x);
1055	uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, `4080.f`), vdupq_n_f32(`0.5f`))));
1056	const int ridx = vget_lane_u16(v, `0`);
1057	const int gidx = vget_lane_u16(v, `1`);
1058	const int bidx = vget_lane_u16(v, `2`);
1059	v = vset_lane_u16(d_ptr->colorSpaceOut->lut[`0`]->m_fromLinear[ridx], v, isARGB ? `2` : `0`);
1060	v = vset_lane_u16(d_ptr->colorSpaceOut->lut[`1`]->m_fromLinear[gidx], v, `1`);
1061	v = vset_lane_u16(d_ptr->colorSpaceOut->lut[`2`]->m_fromLinear[bidx], v, isARGB ? `0` : `2`);
1062	storePU<T>(dst[i], v, a);
1063	}
1064	}
1065
1066	template<typename T>
1067	static void storeOpaque(T dst, const* T src, const* QColorVector buffer, const* qsizetype len,
1068	const QColorTransformPrivate *d_ptr)
1069	{
1070	Q_UNUSED(src);
1071	constexpr bool isARGB = isArgb<T>();
1072	for (qsizetype i = `0`; i < len; ++i) {
1073	float32x4_t vf = vld1q_f32(&buffer[i].x);
1074	uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, `4080.f`), vdupq_n_f32(`0.5f`))));
1075	const int ridx = vget_lane_u16(v, `0`);
1076	const int gidx = vget_lane_u16(v, `1`);
1077	const int bidx = vget_lane_u16(v, `2`);
1078	v = vset_lane_u16(d_ptr->colorSpaceOut->lut[`0`]->m_fromLinear[ridx], v, isARGB ? `2` : `0`);
1079	v = vset_lane_u16(d_ptr->colorSpaceOut->lut[`1`]->m_fromLinear[gidx], v, `1`);
1080	v = vset_lane_u16(d_ptr->colorSpaceOut->lut[`2`]->m_fromLinear[bidx], v, isARGB ? `0` : `2`);
1081	storePU<T>(dst[i], v, isARGB ? `255` : `0xffff`);
1082	}
1083	}
1084	#else
1085	static void storePremultiplied(QRgb dst, const* QRgb src, const* QColorVector buffer, const* qsizetype len,
1086	const QColorTransformPrivate *d_ptr)
1087	{
1088	for (qsizetype i = `0`; i < len; ++i) {
1089	const int a = qAlpha(src[i]);
1090	const float fa = a / (`255.0f` * `256.0f`);
1091	const float r = d_ptr->colorSpaceOut->lut[`0`]->m_fromLinear[int(buffer[i].x * `4080.0f` + `0.5f`)];
1092	const float g = d_ptr->colorSpaceOut->lut[`1`]->m_fromLinear[int(buffer[i].y * `4080.0f` + `0.5f`)];
1093	const float b = d_ptr->colorSpaceOut->lut[`2`]->m_fromLinear[int(buffer[i].z * `4080.0f` + `0.5f`)];
1094	dst[i] = qRgba(r * fa + `0.5f`, g * fa + `0.5f`, b * fa + `0.5f`, a);
1095	}
1096	}
1097
1098	static void storeUnpremultiplied(QRgb dst, const* QRgb src, const* QColorVector buffer, const* qsizetype len,
1099	const QColorTransformPrivate *d_ptr)
1100	{
1101	for (qsizetype i = `0`; i < len; ++i) {
1102	const int r = d_ptr->colorSpaceOut->lut[`0`]->u8FromLinearF32(buffer[i].x);
1103	const int g = d_ptr->colorSpaceOut->lut[`1`]->u8FromLinearF32(buffer[i].y);
1104	const int b = d_ptr->colorSpaceOut->lut[`2`]->u8FromLinearF32(buffer[i].z);
1105	dst[i] = (src[i] & `0xff000000`) \| (r << `16`) \| (g << `8`) \| (b << `0`);
1106	}
1107	}
1108
1109	static void storeOpaque(QRgb dst, const* QRgb src, const* QColorVector buffer, const* qsizetype len,
1110	const QColorTransformPrivate *d_ptr)
1111	{
1112	Q_UNUSED(src);
1113	for (qsizetype i = `0`; i < len; ++i) {
1114	const int r = d_ptr->colorSpaceOut->lut[`0`]->u8FromLinearF32(buffer[i].x);
1115	const int g = d_ptr->colorSpaceOut->lut[`1`]->u8FromLinearF32(buffer[i].y);
1116	const int b = d_ptr->colorSpaceOut->lut[`2`]->u8FromLinearF32(buffer[i].z);
1117	dst[i] = `0xff000000` \| (r << `16`) \| (g << `8`) \| (b << `0`);
1118	}
1119	}
1120
1121	static void storePremultiplied(QRgba64 dst, const* QRgba64 src, const* QColorVector buffer, const* qsizetype len,
1122	const QColorTransformPrivate *d_ptr)
1123	{
1124	for (qsizetype i = `0`; i < len; ++i) {
1125	const int a = src[i].alpha();
1126	const float fa = a / (`255.0f` * `256.0f`);
1127	const float r = d_ptr->colorSpaceOut->lut[`0`]->m_fromLinear[int(buffer[i].x * `4080.0f` + `0.5f`)];
1128	const float g = d_ptr->colorSpaceOut->lut[`1`]->m_fromLinear[int(buffer[i].y * `4080.0f` + `0.5f`)];
1129	const float b = d_ptr->colorSpaceOut->lut[`2`]->m_fromLinear[int(buffer[i].z * `4080.0f` + `0.5f`)];
1130	dst[i] = qRgba64(r * fa + `0.5f`, g * fa + `0.5f`, b * fa + `0.5f`, a);
1131	}
1132	}
1133
1134	static void storeUnpremultiplied(QRgba64 dst, const* QRgba64 src, const* QColorVector buffer, const* qsizetype len,
1135	const QColorTransformPrivate *d_ptr)
1136	{
1137	for (qsizetype i = `0`; i < len; ++i) {
1138	const int r = d_ptr->colorSpaceOut->lut[`0`]->u16FromLinearF32(buffer[i].x);
1139	const int g = d_ptr->colorSpaceOut->lut[`1`]->u16FromLinearF32(buffer[i].y);
1140	const int b = d_ptr->colorSpaceOut->lut[`2`]->u16FromLinearF32(buffer[i].z);
1141	dst[i] = qRgba64(r, g, b, src[i].alpha());
1142	}
1143	}
1144
1145	static void storeOpaque(QRgba64 dst, const* QRgba64 src, const* QColorVector buffer, const* qsizetype len,
1146	const QColorTransformPrivate *d_ptr)
1147	{
1148	Q_UNUSED(src);
1149	for (qsizetype i = `0`; i < len; ++i) {
1150	const int r = d_ptr->colorSpaceOut->lut[`0`]->u16FromLinearF32(buffer[i].x);
1151	const int g = d_ptr->colorSpaceOut->lut[`1`]->u16FromLinearF32(buffer[i].y);
1152	const int b = d_ptr->colorSpaceOut->lut[`2`]->u16FromLinearF32(buffer[i].z);
1153	dst[i] = qRgba64(r, g, b, `0xFFFF`);
1154	}
1155	}
1156	#endif
1157	#if !defined(__SSE2__)
1158	static void storePremultiplied(QRgbaFloat32 dst, const* QRgbaFloat32 src, const* QColorVector *buffer,
1159	const qsizetype len, const QColorTransformPrivate *d_ptr)
1160	{
1161	for (qsizetype i = `0`; i < len; ++i) {
1162	const float a = src[i].a;
1163	dst[i].r = d_ptr->colorSpaceOut->trc[`0`].applyInverseExtended(buffer[i].x) * a;
1164	dst[i].g = d_ptr->colorSpaceOut->trc[`1`].applyInverseExtended(buffer[i].y) * a;
1165	dst[i].b = d_ptr->colorSpaceOut->trc[`2`].applyInverseExtended(buffer[i].z) * a;
1166	dst[i].a = a;
1167	}
1168	}
1169
1170	static void storeUnpremultiplied(QRgbaFloat32 dst, const* QRgbaFloat32 src, const* QColorVector *buffer,
1171	const qsizetype len, const QColorTransformPrivate *d_ptr)
1172	{
1173	for (qsizetype i = `0`; i < len; ++i) {
1174	const float a = src[i].a;
1175	dst[i].r = d_ptr->colorSpaceOut->trc[`0`].applyInverseExtended(buffer[i].x);
1176	dst[i].g = d_ptr->colorSpaceOut->trc[`1`].applyInverseExtended(buffer[i].y);
1177	dst[i].b = d_ptr->colorSpaceOut->trc[`2`].applyInverseExtended(buffer[i].z);
1178	dst[i].a = a;
1179	}
1180	}
1181
1182	static void storeOpaque(QRgbaFloat32 dst, const* QRgbaFloat32 src, const* QColorVector buffer, const* qsizetype len,
1183	const QColorTransformPrivate *d_ptr)
1184	{
1185	Q_UNUSED(src);
1186	for (qsizetype i = `0`; i < len; ++i) {
1187	dst[i].r = d_ptr->colorSpaceOut->trc[`0`].applyInverseExtended(buffer[i].x);
1188	dst[i].g = d_ptr->colorSpaceOut->trc[`1`].applyInverseExtended(buffer[i].y);
1189	dst[i].b = d_ptr->colorSpaceOut->trc[`2`].applyInverseExtended(buffer[i].z);
1190	dst[i].a = `1.0f`;
1191	}
1192	}
1193	#endif
1194	static void storeGray(quint8 dst, const* QRgb src, const* QColorVector buffer, const* qsizetype len,
1195	const QColorTransformPrivate *d_ptr)
1196	{
1197	Q_UNUSED(src);
1198	for (qsizetype i = `0`; i < len; ++i)
1199	dst[i] = d_ptr->colorSpaceOut ->lut [`1`]->u8FromLinearF32(f: buffer[i].y);
1200	}
1201
1202	static void storeGray(quint16 dst, const* QRgba64 src, const* QColorVector buffer, const* qsizetype len,
1203	const QColorTransformPrivate *d_ptr)
1204	{
1205	Q_UNUSED(src);
1206	for (qsizetype i = `0`; i < len; ++i)
1207	dst[i] = d_ptr->colorSpaceOut ->lut [`1`]->u16FromLinearF32(f: buffer[i].y);
1208	}
1209
1210	static constexpr qsizetype WorkBlockSize = `256`;
1211
1212	template <typename T, int Count = `1`>
1213	class QUninitialized
1214	{
1215	public:
1216	operator T() { return* reinterpret_cast<T >(this*); }
1217	private:
1218	alignas(T) char data[sizeof(T) * Count];
1219	};
1220
1221	template<typename T>
1222	void QColorTransformPrivate::apply(T dst, const* T src, qsizetype count, TransformFlags flags) const*
1223	{
1224	if (!colorMatrix.isValid())
1225	return;
1226
1227	updateLutsIn();
1228	updateLutsOut();
1229
1230	bool doApplyMatrix = !colorMatrix.isIdentity();
1231	constexpr bool DoClip = !std::is_same_v<T, QRgbaFloat16> && !std::is_same_v<T, QRgbaFloat32>;
1232
1233	QUninitialized<QColorVector, WorkBlockSize> buffer;
1234
1235	qsizetype i = `0`;
1236	while (i < count) {
1237	const qsizetype len = qMin(a: count - i, b: WorkBlockSize);
1238	if (flags & InputPremultiplied)
1239	loadPremultiplied(buffer, src + i, len, this);
1240	else
1241	loadUnpremultiplied(buffer, src + i, len, this);
1242
1243	if (doApplyMatrix)
1244	applyMatrix<DoClip>(buffer, len, colorMatrix);
1245
1246	if (flags & InputOpaque)
1247	storeOpaque(dst + i, src + i, buffer, len, this);
1248	else if (flags & OutputPremultiplied)
1249	storePremultiplied(dst + i, src + i, buffer, len, this);
1250	else
1251	storeUnpremultiplied(dst + i, src + i, buffer, len, this);
1252
1253	i += len;
1254	}
1255	}
1256
1257	template<typename D, typename S>
1258	void QColorTransformPrivate::applyReturnGray(D dst, const* S src, qsizetype count, TransformFlags flags) const*
1259	{
1260	if (!colorMatrix.isValid())
1261	return;
1262
1263	updateLutsIn();
1264	updateLutsOut();
1265
1266	QUninitialized<QColorVector, WorkBlockSize> buffer;
1267
1268	qsizetype i = `0`;
1269	while (i < count) {
1270	const qsizetype len = qMin(a: count - i, b: WorkBlockSize);
1271	if (flags & InputPremultiplied)
1272	loadPremultiplied(buffer, src + i, len, this);
1273	else
1274	loadUnpremultiplied(buffer, src + i, len, this);
1275
1276	applyMatrix(buffer, len, colorMatrix);
1277
1278	storeGray(dst + i, src + i, buffer, len, this);
1279
1280	i += len;
1281	}
1282	}
1283
1284	/!*
1285	\internal
1286	\enum QColorTransformPrivate::TransformFlag
1287
1288	Defines how the transform is to be applied.
1289
1290	\value Unpremultiplied The input and output should both be unpremultiplied.
1291	\value InputOpaque The input is guaranteed to be opaque.
1292	\value InputPremultiplied The input is premultiplied.
1293	\value OutputPremultiplied The output should be premultiplied.
1294	\value Premultiplied Both input and output should both be premultiplied.
1295	*/
1296
1297	/!*
1298	\internal
1299	Prepares a color transformation for fast application. You do not need to
1300	call this explicitly as it will be called implicitly on the first transforms, but
1301	if you want predictable performance on the first transforms, you can perform it
1302	in advance.
1303
1304	\sa QColorTransform::map(), apply()
1305	*/
1306	void QColorTransformPrivate::prepare()
1307	{
1308	updateLutsIn();
1309	updateLutsOut();
1310	}
1311
1312	/!*
1313	\internal
1314	Applies the color transformation on \a count QRgb pixels starting from
1315	\a src and stores the result in \a dst.
1316
1317	Thread-safe if prepare() has been called first.
1318
1319	Assumes unpremultiplied data by default. Set \a flags to change defaults.
1320
1321	\sa prepare()
1322	*/
1323	void QColorTransformPrivate::apply(QRgb dst, const* QRgb src, qsizetype count, TransformFlags flags) const*
1324	{
1325	apply<QRgb>(dst, src, count, flags);
1326	}
1327
1328	/!*
1329	\internal
1330	Applies the color transformation on \a count QRgba64 pixels starting from
1331	\a src and stores the result in \a dst.
1332
1333	Thread-safe if prepare() has been called first.
1334
1335	Assumes unpremultiplied data by default. Set \a flags to change defaults.
1336
1337	\sa prepare()
1338	*/
1339	void QColorTransformPrivate::apply(QRgba64 dst, const* QRgba64 src, qsizetype count, TransformFlags flags) const*
1340	{
1341	apply<QRgba64>(dst, src, count, flags);
1342	}
1343
1344	/!*
1345	\internal
1346	Applies the color transformation on \a count QRgbaFloat32 pixels starting from
1347	\a src and stores the result in \a dst.
1348
1349	Thread-safe if prepare() has been called first.
1350
1351	Assumes unpremultiplied data by default. Set \a flags to change defaults.
1352
1353	\sa prepare()
1354	*/
1355	void QColorTransformPrivate::apply(QRgbaFloat32 dst, const* QRgbaFloat32 *src, qsizetype count,
1356	TransformFlags flags) const
1357	{
1358	apply<QRgbaFloat32>(dst, src, count, flags);
1359	}
1360
1361	/!*
1362	\internal
1363	Is to be called on a color-transform to XYZ, returns only luminance values.
1364
1365	*/
1366	void QColorTransformPrivate::apply(quint8 dst, const* QRgb src, qsizetype count, TransformFlags flags) const*
1367	{
1368	applyReturnGray<quint8, QRgb>(dst, src, count, flags);
1369	}
1370
1371	/!*
1372	\internal
1373	Is to be called on a color-transform to XYZ, returns only luminance values.
1374
1375	*/
1376	void QColorTransformPrivate::apply(quint16 dst, const* QRgba64 src, qsizetype count, TransformFlags flags) const*
1377	{
1378	applyReturnGray<quint16, QRgba64>(dst, src, count, flags);
1379	}
1380
1381
1382	/!*
1383	\internal
1384	*/
1385	bool QColorTransformPrivate::isIdentity() const
1386	{
1387	if (!colorMatrix.isIdentity())
1388	return false;
1389	if (colorSpaceIn && colorSpaceOut) {
1390	if (colorSpaceIn ->transferFunction != colorSpaceOut ->transferFunction)
1391	return false;
1392	if (colorSpaceIn ->transferFunction == QColorSpace::TransferFunction::Custom) {
1393	return colorSpaceIn ->trc[`0`] == colorSpaceOut ->trc[`0`]
1394	&& colorSpaceIn ->trc[`1`] == colorSpaceOut ->trc[`1`]
1395	&& colorSpaceIn ->trc[`2`] == colorSpaceOut ->trc[`2`];
1396	}
1397	} else {
1398	if (colorSpaceIn && colorSpaceIn ->transferFunction != QColorSpace::TransferFunction::Linear)
1399	return false;
1400	if (colorSpaceOut && colorSpaceOut ->transferFunction != QColorSpace::TransferFunction::Linear)
1401	return false;
1402	}
1403	return true;
1404	}
1405
1406	QT_END_NAMESPACE
1407

source code of qtbase/src/gui/painting/qcolortransform.cpp