qdrawhelper_sse4.cpp source code [qtbase/src/gui/painting/qdrawhelper_sse4.cpp]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 The Qt Company Ltd.
4	** Contact: https://www.qt.io/licensing/
5	**
6	** This file is part of the QtGui module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial License Usage
10	** Licensees holding valid commercial Qt licenses may use this file in
11	** accordance with the commercial license agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and The Qt Company. For licensing terms
14	** and conditions see https://www.qt.io/terms-conditions. For further
15	** information use the contact form at https://www.qt.io/contact-us.
16	**
17	** GNU Lesser General Public License Usage
18	** Alternatively, this file may be used under the terms of the GNU Lesser
19	** General Public License version 3 as published by the Free Software
20	** Foundation and appearing in the file LICENSE.LGPL3 included in the
21	** packaging of this file. Please review the following information to
22	** ensure the GNU Lesser General Public License version 3 requirements
23	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24	**
25	** GNU General Public License Usage
26	** Alternatively, this file may be used under the terms of the GNU
27	** General Public License version 2.0 or (at your option) the GNU General
28	** Public license version 3 or any later version approved by the KDE Free
29	** Qt Foundation. The licenses are as published by the Free Software
30	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31	** included in the packaging of this file. Please review the following
32	** information to ensure the GNU General Public License requirements will
33	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34	** https://www.gnu.org/licenses/gpl-3.0.html.
35	**
36	** $QT_END_LICENSE$
37	**
38	****************************************************************************/
39
40	#include <private/qdrawhelper_p.h>
41	#include <private/qdrawingprimitive_sse2_p.h>
42	#include <private/qpaintengine_raster_p.h>
43
44	#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
45
46	QT_BEGIN_NAMESPACE
47
48	#ifndef __AVX2__
49	template<bool RGBA>
50	static void convertARGBToARGB32PM_sse4(uint buffer, const* uint src, int* count)
51	{
52	int i = `0`;
53	const __m128i alphaMask = _mm_set1_epi32(i: `0xff000000`);
54	const __m128i rgbaMask = _mm_setr_epi8(b0: `2`, b1: `1`, b2: `0`, b3: `3`, b4: `6`, b5: `5`, b6: `4`, b7: `7`, b8: `10`, b9: `9`, b10: `8`, b11: `11`, b12: `14`, b13: `13`, b14: `12`, b15: `15`);
55	const __m128i shuffleMask = _mm_setr_epi8(b0: `6`, b1: `7`, b2: `6`, b3: `7`, b4: `6`, b5: `7`, b6: `6`, b7: `7`, b8: `14`, b9: `15`, b10: `14`, b11: `15`, b12: `14`, b13: `15`, b14: `14`, b15: `15`);
56	const __m128i half = _mm_set1_epi16(w: `0x0080`);
57	const __m128i zero = _mm_setzero_si128();
58
59	for (; i < count - `3`; i += `4`) {
60	__m128i srcVector = _mm_loadu_si128(p: (const __m128i *)&src[i]);
61	if (!_mm_testz_si128(M: srcVector, V: alphaMask)) {
62	if (!_mm_testc_si128(M: srcVector, V: alphaMask)) {
63	if (RGBA)
64	srcVector = _mm_shuffle_epi8(a: srcVector, b: rgbaMask);
65	__m128i src1 = _mm_unpacklo_epi8(a: srcVector, b: zero);
66	__m128i src2 = _mm_unpackhi_epi8(a: srcVector, b: zero);
67	__m128i alpha1 = _mm_shuffle_epi8(a: src1, b: shuffleMask);
68	__m128i alpha2 = _mm_shuffle_epi8(a: src2, b: shuffleMask);
69	src1 = _mm_mullo_epi16(a: src1, b: alpha1);
70	src2 = _mm_mullo_epi16(a: src2, b: alpha2);
71	src1 = _mm_add_epi16(a: src1, b: _mm_srli_epi16(a: src1, count: `8`));
72	src2 = _mm_add_epi16(a: src2, b: _mm_srli_epi16(a: src2, count: `8`));
73	src1 = _mm_add_epi16(a: src1, b: half);
74	src2 = _mm_add_epi16(a: src2, b: half);
75	src1 = _mm_srli_epi16(a: src1, count: `8`);
76	src2 = _mm_srli_epi16(a: src2, count: `8`);
77	src1 = _mm_blend_epi16(src1, alpha1, `0x88`);
78	src2 = _mm_blend_epi16(src2, alpha2, `0x88`);
79	srcVector = _mm_packus_epi16(a: src1, b: src2);
80	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: srcVector);
81	} else {
82	if (RGBA)
83	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: _mm_shuffle_epi8(a: srcVector, b: rgbaMask));
84	else if (buffer != src)
85	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: srcVector);
86	}
87	} else {
88	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: zero);
89	}
90	}
91
92	SIMD_EPILOGUE(i, count, `3`) {
93	uint v = qPremultiply(x: src[i]);
94	buffer[i] = RGBA ? RGBA2ARGB(x: v) : v;
95	}
96	}
97
98	template<bool RGBA>
99	static void convertARGBToRGBA64PM_sse4(QRgba64 buffer, const* uint src, int* count)
100	{
101	int i = `0`;
102	const __m128i alphaMask = _mm_set1_epi32(i: `0xff000000`);
103	const __m128i rgbaMask = _mm_setr_epi8(b0: `2`, b1: `1`, b2: `0`, b3: `3`, b4: `6`, b5: `5`, b6: `4`, b7: `7`, b8: `10`, b9: `9`, b10: `8`, b11: `11`, b12: `14`, b13: `13`, b14: `12`, b15: `15`);
104	const __m128i shuffleMask = _mm_setr_epi8(b0: `6`, b1: `7`, b2: `6`, b3: `7`, b4: `6`, b5: `7`, b6: `6`, b7: `7`, b8: `14`, b9: `15`, b10: `14`, b11: `15`, b12: `14`, b13: `15`, b14: `14`, b15: `15`);
105	const __m128i zero = _mm_setzero_si128();
106
107	for (; i < count - `3`; i += `4`) {
108	__m128i srcVector = _mm_loadu_si128(p: (const __m128i *)&src[i]);
109	if (!_mm_testz_si128(M: srcVector, V: alphaMask)) {
110	bool cf = _mm_testc_si128(M: srcVector, V: alphaMask);
111
112	if (!RGBA)
113	srcVector = _mm_shuffle_epi8(a: srcVector, b: rgbaMask);
114	const __m128i src1 = _mm_unpacklo_epi8(a: srcVector, b: srcVector);
115	const __m128i src2 = _mm_unpackhi_epi8(a: srcVector, b: srcVector);
116	if (!cf) {
117	__m128i alpha1 = _mm_shuffle_epi8(a: src1, b: shuffleMask);
118	__m128i alpha2 = _mm_shuffle_epi8(a: src2, b: shuffleMask);
119	__m128i dst1 = _mm_mulhi_epu16(a: src1, b: alpha1);
120	__m128i dst2 = _mm_mulhi_epu16(a: src2, b: alpha2);
121	// Map 0->0xfffe to 0->0xffff
122	dst1 = _mm_add_epi16(a: dst1, b: _mm_srli_epi16(a: dst1, count: `15`));
123	dst2 = _mm_add_epi16(a: dst2, b: _mm_srli_epi16(a: dst2, count: `15`));
124	// correct alpha value:
125	dst1 = _mm_blend_epi16(dst1, src1, `0x88`);
126	dst2 = _mm_blend_epi16(dst2, src2, `0x88`);
127	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: dst1);
128	_mm_storeu_si128(p: (__m128i *)&buffer[i + `2`], b: dst2);
129	} else {
130	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: src1);
131	_mm_storeu_si128(p: (__m128i *)&buffer[i + `2`], b: src2);
132	}
133	} else {
134	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: zero);
135	_mm_storeu_si128(p: (__m128i *)&buffer[i + `2`], b: zero);
136	}
137	}
138
139	SIMD_EPILOGUE(i, count, `3`) {
140	const uint s = RGBA ? RGBA2ARGB(x: src[i]) : src[i];
141	buffer[i] = QRgba64::fromArgb32(rgb: s).premultiplied();
142	}
143	}
144	#endif // __AVX2__
145
146	static inline __m128 Q_DECL_VECTORCALL reciprocal_mul_ps(__m128 a, float mul)
147	{
148	__m128 ia = _mm_rcp_ps(a: a); // Approximate 1/a
149	// Improve precision of ia using Newton-Raphson
150	ia = _mm_sub_ps(a: _mm_add_ps(a: ia, b: ia), b: _mm_mul_ps(a: ia, b: _mm_mul_ps(a: ia, b: a)));
151	ia = _mm_mul_ps(a: ia, b: _mm_set1_ps(w: mul));
152	return ia;
153	}
154
155	template<bool RGBA, bool RGBx>
156	static inline void convertARGBFromARGB32PM_sse4(uint buffer, const* uint src, int* count)
157	{
158	int i = `0`;
159	if ((_MM_GET_EXCEPTION_MASK() & _MM_MASK_INVALID) == `0`) {
160	for (; i < count; ++i) {
161	uint v = qUnpremultiply(p: src[i]);
162	if (RGBx)
163	v = `0xff000000` \| v;
164	if (RGBA)
165	v = ARGB2RGBA(x: v);
166	buffer[i] = v;
167	}
168	return;
169	}
170	const __m128i alphaMask = _mm_set1_epi32(i: `0xff000000`);
171	const __m128i rgbaMask = _mm_setr_epi8(b0: `2`, b1: `1`, b2: `0`, b3: `3`, b4: `6`, b5: `5`, b6: `4`, b7: `7`, b8: `10`, b9: `9`, b10: `8`, b11: `11`, b12: `14`, b13: `13`, b14: `12`, b15: `15`);
172	const __m128i zero = _mm_setzero_si128();
173
174	for (; i < count - `3`; i += `4`) {
175	__m128i srcVector = _mm_loadu_si128(p: (const __m128i *)&src[i]);
176	if (!_mm_testz_si128(M: srcVector, V: alphaMask)) {
177	if (!_mm_testc_si128(M: srcVector, V: alphaMask)) {
178	__m128i srcVectorAlpha = _mm_srli_epi32(a: srcVector, count: `24`);
179	if (RGBA)
180	srcVector = _mm_shuffle_epi8(a: srcVector, b: rgbaMask);
181	const __m128 a = _mm_cvtepi32_ps(a: srcVectorAlpha);
182	const __m128 ia = reciprocal_mul_ps(a, mul: `255.0f`);
183	__m128i src1 = _mm_unpacklo_epi8(a: srcVector, b: zero);
184	__m128i src3 = _mm_unpackhi_epi8(a: srcVector, b: zero);
185	__m128i src2 = _mm_unpackhi_epi16(a: src1, b: zero);
186	__m128i src4 = _mm_unpackhi_epi16(a: src3, b: zero);
187	src1 = _mm_unpacklo_epi16(a: src1, b: zero);
188	src3 = _mm_unpacklo_epi16(a: src3, b: zero);
189	__m128 ia1 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(`0`, `0`, `0`, `0`));
190	__m128 ia2 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(`1`, `1`, `1`, `1`));
191	__m128 ia3 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(`2`, `2`, `2`, `2`));
192	__m128 ia4 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(`3`, `3`, `3`, `3`));
193	src1 = _mm_cvtps_epi32(a: _mm_mul_ps(a: _mm_cvtepi32_ps(a: src1), b: ia1));
194	src2 = _mm_cvtps_epi32(a: _mm_mul_ps(a: _mm_cvtepi32_ps(a: src2), b: ia2));
195	src3 = _mm_cvtps_epi32(a: _mm_mul_ps(a: _mm_cvtepi32_ps(a: src3), b: ia3));
196	src4 = _mm_cvtps_epi32(a: _mm_mul_ps(a: _mm_cvtepi32_ps(a: src4), b: ia4));
197	src1 = _mm_packus_epi32(V1: src1, V2: src2);
198	src3 = _mm_packus_epi32(V1: src3, V2: src4);
199	src1 = _mm_packus_epi16(a: src1, b: src3);
200	// Handle potential alpha == 0 values:
201	__m128i srcVectorAlphaMask = _mm_cmpeq_epi32(a: srcVectorAlpha, b: zero);
202	src1 = _mm_andnot_si128(a: srcVectorAlphaMask, b: src1);
203	// Fixup alpha values:
204	if (RGBx)
205	srcVector = _mm_or_si128(a: src1, b: alphaMask);
206	else
207	srcVector = _mm_blendv_epi8(V1: src1, V2: srcVector, M: alphaMask);
208	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: srcVector);
209	} else {
210	if (RGBA)
211	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: _mm_shuffle_epi8(a: srcVector, b: rgbaMask));
212	else if (buffer != src)
213	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: srcVector);
214	}
215	} else {
216	if (RGBx)
217	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: alphaMask);
218	else
219	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: zero);
220	}
221	}
222
223	SIMD_EPILOGUE(i, count, `3`) {
224	uint v = qUnpremultiply_sse4(p: src[i]);
225	if (RGBx)
226	v = `0xff000000` \| v;
227	if (RGBA)
228	v = ARGB2RGBA(x: v);
229	buffer[i] = v;
230	}
231	}
232
233	template<bool RGBA>
234	static inline void convertARGBFromRGBA64PM_sse4(uint buffer, const* QRgba64 src, int* count)
235	{
236	int i = `0`;
237	if ((_MM_GET_EXCEPTION_MASK() & _MM_MASK_INVALID) == `0`) {
238	for (; i < count; ++i) {
239	const QRgba64 v = src[i].unpremultiplied();
240	buffer[i] = RGBA ? toRgba8888(rgba64: v) : toArgb32(rgba64: v);
241	}
242	return;
243	}
244	const __m128i alphaMask = _mm_set1_epi64x(q: qint64(Q_UINT64_C(`0xffff`) << `48`));
245	const __m128i alphaMask32 = _mm_set1_epi32(i: `0xff000000`);
246	const __m128i rgbaMask = _mm_setr_epi8(b0: `2`, b1: `1`, b2: `0`, b3: `3`, b4: `6`, b5: `5`, b6: `4`, b7: `7`, b8: `10`, b9: `9`, b10: `8`, b11: `11`, b12: `14`, b13: `13`, b14: `12`, b15: `15`);
247	const __m128i zero = _mm_setzero_si128();
248
249	for (; i < count - `3`; i += `4`) {
250	__m128i srcVector1 = _mm_loadu_si128(p: (const __m128i *)&src[i]);
251	__m128i srcVector2 = _mm_loadu_si128(p: (const __m128i *)&src[i + `2`]);
252	bool transparent1 = _mm_testz_si128(M: srcVector1, V: alphaMask);
253	bool opaque1 = _mm_testc_si128(M: srcVector1, V: alphaMask);
254	bool transparent2 = _mm_testz_si128(M: srcVector2, V: alphaMask);
255	bool opaque2 = _mm_testc_si128(M: srcVector2, V: alphaMask);
256
257	if (!(transparent1 && transparent2)) {
258	if (!(opaque1 && opaque2)) {
259	__m128i srcVector1Alpha = _mm_srli_epi64(a: srcVector1, count: `48`);
260	__m128i srcVector2Alpha = _mm_srli_epi64(a: srcVector2, count: `48`);
261	__m128i srcVectorAlpha = _mm_packus_epi32(V1: srcVector1Alpha, V2: srcVector2Alpha);
262	const __m128 a = _mm_cvtepi32_ps(a: srcVectorAlpha);
263	// Convert srcVectorAlpha to final 8-bit alpha channel
264	srcVectorAlpha = _mm_add_epi32(a: srcVectorAlpha, b: _mm_set1_epi32(i: `128`));
265	srcVectorAlpha = _mm_sub_epi32(a: srcVectorAlpha, b: _mm_srli_epi32(a: srcVectorAlpha, count: `8`));
266	srcVectorAlpha = _mm_srli_epi32(a: srcVectorAlpha, count: `8`);
267	srcVectorAlpha = _mm_slli_epi32(a: srcVectorAlpha, count: `24`);
268	const __m128 ia = reciprocal_mul_ps(a, mul: `255.0f`);
269	__m128i src1 = _mm_unpacklo_epi16(a: srcVector1, b: zero);
270	__m128i src2 = _mm_unpackhi_epi16(a: srcVector1, b: zero);
271	__m128i src3 = _mm_unpacklo_epi16(a: srcVector2, b: zero);
272	__m128i src4 = _mm_unpackhi_epi16(a: srcVector2, b: zero);
273	__m128 ia1 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(`0`, `0`, `0`, `0`));
274	__m128 ia2 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(`1`, `1`, `1`, `1`));
275	__m128 ia3 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(`2`, `2`, `2`, `2`));
276	__m128 ia4 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(`3`, `3`, `3`, `3`));
277	src1 = _mm_cvtps_epi32(a: _mm_mul_ps(a: _mm_cvtepi32_ps(a: src1), b: ia1));
278	src2 = _mm_cvtps_epi32(a: _mm_mul_ps(a: _mm_cvtepi32_ps(a: src2), b: ia2));
279	src3 = _mm_cvtps_epi32(a: _mm_mul_ps(a: _mm_cvtepi32_ps(a: src3), b: ia3));
280	src4 = _mm_cvtps_epi32(a: _mm_mul_ps(a: _mm_cvtepi32_ps(a: src4), b: ia4));
281	src1 = _mm_packus_epi32(V1: src1, V2: src2);
282	src3 = _mm_packus_epi32(V1: src3, V2: src4);
283	// Handle potential alpha == 0 values:
284	__m128i srcVector1AlphaMask = _mm_cmpeq_epi64(V1: srcVector1Alpha, V2: zero);
285	__m128i srcVector2AlphaMask = _mm_cmpeq_epi64(V1: srcVector2Alpha, V2: zero);
286	src1 = _mm_andnot_si128(a: srcVector1AlphaMask, b: src1);
287	src3 = _mm_andnot_si128(a: srcVector2AlphaMask, b: src3);
288	src1 = _mm_packus_epi16(a: src1, b: src3);
289	// Fixup alpha values:
290	src1 = _mm_blendv_epi8(V1: src1, V2: srcVectorAlpha, M: alphaMask32);
291	// Fix RGB order
292	if (!RGBA)
293	src1 = _mm_shuffle_epi8(a: src1, b: rgbaMask);
294	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: src1);
295	} else {
296	__m128i src1 = _mm_unpacklo_epi16(a: srcVector1, b: zero);
297	__m128i src2 = _mm_unpackhi_epi16(a: srcVector1, b: zero);
298	__m128i src3 = _mm_unpacklo_epi16(a: srcVector2, b: zero);
299	__m128i src4 = _mm_unpackhi_epi16(a: srcVector2, b: zero);
300	src1 = _mm_add_epi32(a: src1, b: _mm_set1_epi32(i: `128`));
301	src2 = _mm_add_epi32(a: src2, b: _mm_set1_epi32(i: `128`));
302	src3 = _mm_add_epi32(a: src3, b: _mm_set1_epi32(i: `128`));
303	src4 = _mm_add_epi32(a: src4, b: _mm_set1_epi32(i: `128`));
304	src1 = _mm_sub_epi32(a: src1, b: _mm_srli_epi32(a: src1, count: `8`));
305	src2 = _mm_sub_epi32(a: src2, b: _mm_srli_epi32(a: src2, count: `8`));
306	src3 = _mm_sub_epi32(a: src3, b: _mm_srli_epi32(a: src3, count: `8`));
307	src4 = _mm_sub_epi32(a: src4, b: _mm_srli_epi32(a: src4, count: `8`));
308	src1 = _mm_srli_epi32(a: src1, count: `8`);
309	src2 = _mm_srli_epi32(a: src2, count: `8`);
310	src3 = _mm_srli_epi32(a: src3, count: `8`);
311	src4 = _mm_srli_epi32(a: src4, count: `8`);
312	src1 = _mm_packus_epi32(V1: src1, V2: src2);
313	src3 = _mm_packus_epi32(V1: src3, V2: src4);
314	src1 = _mm_packus_epi16(a: src1, b: src3);
315	if (!RGBA)
316	src1 = _mm_shuffle_epi8(a: src1, b: rgbaMask);
317	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: src1);
318	}
319	} else {
320	_mm_storeu_si128(p: (__m128i *)&buffer[i], b: zero);
321	}
322	}
323
324	SIMD_EPILOGUE(i, count, `3`) {
325	buffer[i] = qConvertRgba64ToRgb32_sse4<RGBA ? PixelOrderRGB : PixelOrderBGR>(src[i]);
326	}
327	}
328
329	#ifndef __AVX2__
330	void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint buffer, int* count, const QVector<QRgb> *)
331	{
332	convertARGBToARGB32PM_sse4<false>(buffer, src: buffer, count);
333	}
334
335	void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint buffer, int* count, const QVector<QRgb> *)
336	{
337	convertARGBToARGB32PM_sse4<true>(buffer, src: buffer, count);
338	}
339
340	const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 buffer, const* uint src, int* count,
341	const QVector<QRgb> , QDitherInfo )
342	{
343	convertARGBToRGBA64PM_sse4<false>(buffer, src, count);
344	return buffer;
345	}
346
347	const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 buffer, const* uint src, int* count,
348	const QVector<QRgb> , QDitherInfo )
349	{
350	convertARGBToRGBA64PM_sse4<true>(buffer, src, count);
351	return buffer;
352	}
353
354	const uint QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint buffer, const uchar src, int* index, int count,
355	const QVector<QRgb> , QDitherInfo )
356	{
357	convertARGBToARGB32PM_sse4<false>(buffer, src: reinterpret_cast<const uint *>(src) + index, count);
358	return buffer;
359	}
360
361	const uint QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint buffer, const uchar src, int* index, int count,
362	const QVector<QRgb> , QDitherInfo )
363	{
364	convertARGBToARGB32PM_sse4<true>(buffer, src: reinterpret_cast<const uint *>(src) + index, count);
365	return buffer;
366	}
367
368	const QRgba64 QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 buffer, const uchar src, int* index, int count,
369	const QVector<QRgb> , QDitherInfo )
370	{
371	convertARGBToRGBA64PM_sse4<false>(buffer, src: reinterpret_cast<const uint *>(src) + index, count);
372	return buffer;
373	}
374
375	const QRgba64 QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 buffer, const uchar src, int* index, int count,
376	const QVector<QRgb> , QDitherInfo )
377	{
378	convertARGBToRGBA64PM_sse4<true>(buffer, src: reinterpret_cast<const uint *>(src) + index, count);
379	return buffer;
380	}
381	#endif // __AVX2__
382
383	void QT_FASTCALL storeRGB32FromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
384	const QVector<QRgb> , QDitherInfo )
385	{
386	uint d = reinterpret_cast<uint >(dest) + index;
387	convertARGBFromARGB32PM_sse4<false,true>(buffer: d, src, count);
388	}
389
390	void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
391	const QVector<QRgb> , QDitherInfo )
392	{
393	uint d = reinterpret_cast<uint >(dest) + index;
394	convertARGBFromARGB32PM_sse4<false,false>(buffer: d, src, count);
395	}
396
397	void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
398	const QVector<QRgb> , QDitherInfo )
399	{
400	uint d = reinterpret_cast<uint >(dest) + index;
401	convertARGBFromARGB32PM_sse4<true,false>(buffer: d, src, count);
402	}
403
404	void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
405	const QVector<QRgb> , QDitherInfo )
406	{
407	uint d = reinterpret_cast<uint >(dest) + index;
408	convertARGBFromARGB32PM_sse4<true,true>(buffer: d, src, count);
409	}
410
411	template<QtPixelOrder PixelOrder>
412	void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
413	const QVector<QRgb> , QDitherInfo )
414	{
415	uint d = reinterpret_cast<uint >(dest) + index;
416	for (int i = `0`; i < count; ++i)
417	d[i] = qConvertArgb32ToA2rgb30_sse4<PixelOrder>(src[i]);
418	}
419
420	#if QT_CONFIG(raster_64bit)
421	void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length)
422	{
423	uint dest = (uint)rasterBuffer->scanLine(y) + x;
424	convertARGBFromRGBA64PM_sse4<false>(buffer: dest, src: buffer, count: length);
425	}
426
427	void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length)
428	{
429	uint dest = (uint)rasterBuffer->scanLine(y) + x;
430	convertARGBFromRGBA64PM_sse4<true>(buffer: dest, src: buffer, count: length);
431	}
432	#endif
433
434	void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar dest, const* QRgba64 src, int* index, int count,
435	const QVector<QRgb> , QDitherInfo )
436	{
437	uint d = (uint)dest + index;
438	convertARGBFromRGBA64PM_sse4<false>(buffer: d, src, count);
439	}
440
441	void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar dest, const* QRgba64 src, int* index, int count,
442	const QVector<QRgb> , QDitherInfo )
443	{
444	uint d = (uint)dest + index;
445	convertARGBFromRGBA64PM_sse4<true>(buffer: d, src, count);
446	}
447
448	template
449	void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>(uchar dest, const* uint src, int* index, int count,
450	const QVector<QRgb> , QDitherInfo );
451	template
452	void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>(uchar dest, const* uint src, int* index, int count,
453	const QVector<QRgb> , QDitherInfo );
454
455	QT_END_NAMESPACE
456
457	#endif
458

source code of qtbase/src/gui/painting/qdrawhelper_sse4.cpp