qdrawhelper.cpp source code [qtbase/src/gui/painting/qdrawhelper.cpp]

1	/****************************************************************************
2	**
3	** Copyright (C) 2018 The Qt Company Ltd.
4	** Copyright (C) 2018 Intel Corporation.
5	** Contact: https://www.qt.io/licensing/
6	**
7	** This file is part of the QtGui module of the Qt Toolkit.
8	**
9	** $QT_BEGIN_LICENSE:LGPL$
10	** Commercial License Usage
11	** Licensees holding valid commercial Qt licenses may use this file in
12	** accordance with the commercial license agreement provided with the
13	** Software or, alternatively, in accordance with the terms contained in
14	** a written agreement between you and The Qt Company. For licensing terms
15	** and conditions see https://www.qt.io/terms-conditions. For further
16	** information use the contact form at https://www.qt.io/contact-us.
17	**
18	** GNU Lesser General Public License Usage
19	** Alternatively, this file may be used under the terms of the GNU Lesser
20	** General Public License version 3 as published by the Free Software
21	** Foundation and appearing in the file LICENSE.LGPL3 included in the
22	** packaging of this file. Please review the following information to
23	** ensure the GNU Lesser General Public License version 3 requirements
24	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25	**
26	** GNU General Public License Usage
27	** Alternatively, this file may be used under the terms of the GNU
28	** General Public License version 2.0 or (at your option) the GNU General
29	** Public license version 3 or any later version approved by the KDE Free
30	** Qt Foundation. The licenses are as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32	** included in the packaging of this file. Please review the following
33	** information to ensure the GNU General Public License requirements will
34	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35	** https://www.gnu.org/licenses/gpl-3.0.html.
36	**
37	** $QT_END_LICENSE$
38	**
39	****************************************************************************/
40
41	#include <qglobal.h>
42
43	#include <qstylehints.h>
44	#include <qguiapplication.h>
45	#include <qatomic.h>
46	#include <private/qcolortrclut_p.h>
47	#include <private/qdrawhelper_p.h>
48	#include <private/qpaintengine_raster_p.h>
49	#include <private/qpainter_p.h>
50	#include <private/qdrawhelper_x86_p.h>
51	#include <private/qdrawingprimitive_sse2_p.h>
52	#include <private/qdrawhelper_neon_p.h>
53	#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) \|\| defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
54	#include <private/qdrawhelper_mips_dsp_p.h>
55	#endif
56	#include <private/qguiapplication_p.h>
57	#include <private/qrgba64_p.h>
58	#include <qendian.h>
59	#include <qloggingcategory.h>
60	#include <qmath.h>
61
62	QT_BEGIN_NAMESPACE
63
64	Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
65
66	#define MASK(src, a) src = BYTE_MUL(src, a)
67
68	/*
69	constants and structures
70	*/
71
72	enum {
73	fixed_scale = `1` << `16`,
74	half_point = `1` << `15`
75	};
76
77	template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth();
78	template<QImage::Format> Q_DECL_CONSTEXPR uint redShift();
79	template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth();
80	template<QImage::Format> Q_DECL_CONSTEXPR uint greenShift();
81	template<QImage::Format> Q_DECL_CONSTEXPR uint blueWidth();
82	template<QImage::Format> Q_DECL_CONSTEXPR uint blueShift();
83	template<QImage::Format> Q_DECL_CONSTEXPR uint alphaWidth();
84	template<QImage::Format> Q_DECL_CONSTEXPR uint alphaShift();
85
86	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB16>() { return `5`; }
87	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB444>() { return `4`; }
88	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB555>() { return `5`; }
89	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB666>() { return `6`; }
90	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB888>() { return `8`; }
91	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_BGR888>() { return `8`; }
92	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB4444_Premultiplied>() { return `4`; }
93	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8555_Premultiplied>() { return `5`; }
94	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8565_Premultiplied>() { return `5`; }
95	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB6666_Premultiplied>() { return `6`; }
96	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBX8888>() { return `8`; }
97	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888>() { return `8`; }
98	template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888_Premultiplied>() { return `8`; }
99
100	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB16>() { return `11`; }
101	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB444>() { return `8`; }
102	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB555>() { return `10`; }
103	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB666>() { return `12`; }
104	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB888>() { return `16`; }
105	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_BGR888>() { return `0`; }
106	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB4444_Premultiplied>() { return `8`; }
107	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8555_Premultiplied>() { return `18`; }
108	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8565_Premultiplied>() { return `19`; }
109	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB6666_Premultiplied>() { return `12`; }
110	#if Q_BYTE_ORDER == Q_BIG_ENDIAN
111	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return `24`; }
112	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return `24`; }
113	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return `24`; }
114	#else
115	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return `0`; }
116	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return `0`; }
117	template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return `0`; }
118	#endif
119	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB16>() { return `6`; }
120	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB444>() { return `4`; }
121	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB555>() { return `5`; }
122	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB666>() { return `6`; }
123	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB888>() { return `8`; }
124	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_BGR888>() { return `8`; }
125	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB4444_Premultiplied>() { return `4`; }
126	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8555_Premultiplied>() { return `5`; }
127	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8565_Premultiplied>() { return `6`; }
128	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB6666_Premultiplied>() { return `6`; }
129	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBX8888>() { return `8`; }
130	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888>() { return `8`; }
131	template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888_Premultiplied>() { return `8`; }
132
133	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB16>() { return `5`; }
134	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB444>() { return `4`; }
135	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB555>() { return `5`; }
136	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB666>() { return `6`; }
137	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB888>() { return `8`; }
138	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_BGR888>() { return `8`; }
139	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB4444_Premultiplied>() { return `4`; }
140	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8555_Premultiplied>() { return `13`; }
141	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8565_Premultiplied>() { return `13`; }
142	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB6666_Premultiplied>() { return `6`; }
143	#if Q_BYTE_ORDER == Q_BIG_ENDIAN
144	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return `16`; }
145	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return `16`; }
146	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return `16`; }
147	#else
148	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return `8`; }
149	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return `8`; }
150	template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return `8`; }
151	#endif
152	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB16>() { return `5`; }
153	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB444>() { return `4`; }
154	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB555>() { return `5`; }
155	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB666>() { return `6`; }
156	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB888>() { return `8`; }
157	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_BGR888>() { return `8`; }
158	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB4444_Premultiplied>() { return `4`; }
159	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8555_Premultiplied>() { return `5`; }
160	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8565_Premultiplied>() { return `5`; }
161	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB6666_Premultiplied>() { return `6`; }
162	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBX8888>() { return `8`; }
163	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888>() { return `8`; }
164	template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888_Premultiplied>() { return `8`; }
165
166	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB16>() { return `0`; }
167	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB444>() { return `0`; }
168	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB555>() { return `0`; }
169	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB666>() { return `0`; }
170	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB888>() { return `0`; }
171	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_BGR888>() { return `16`; }
172	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB4444_Premultiplied>() { return `0`; }
173	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8555_Premultiplied>() { return `8`; }
174	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8565_Premultiplied>() { return `8`; }
175	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB6666_Premultiplied>() { return `0`; }
176	#if Q_BYTE_ORDER == Q_BIG_ENDIAN
177	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return `8`; }
178	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return `8`; }
179	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return `8`; }
180	#else
181	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return `16`; }
182	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return `16`; }
183	template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return `16`; }
184	#endif
185	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB16>() { return `0`; }
186	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB444>() { return `0`; }
187	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB555>() { return `0`; }
188	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB666>() { return `0`; }
189	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB888>() { return `0`; }
190	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_BGR888>() { return `0`; }
191	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB4444_Premultiplied>() { return `4`; }
192	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8555_Premultiplied>() { return `8`; }
193	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8565_Premultiplied>() { return `8`; }
194	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB6666_Premultiplied>() { return `6`; }
195	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBX8888>() { return `0`; }
196	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888>() { return `8`; }
197	template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888_Premultiplied>() { return `8`; }
198
199	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB16>() { return `0`; }
200	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB444>() { return `0`; }
201	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB555>() { return `0`; }
202	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB666>() { return `0`; }
203	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB888>() { return `0`; }
204	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_BGR888>() { return `0`; }
205	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB4444_Premultiplied>() { return `12`; }
206	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8555_Premultiplied>() { return `0`; }
207	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8565_Premultiplied>() { return `0`; }
208	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB6666_Premultiplied>() { return `18`; }
209	#if Q_BYTE_ORDER == Q_BIG_ENDIAN
210	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return `0`; }
211	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return `0`; }
212	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return `0`; }
213	#else
214	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return `24`; }
215	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return `24`; }
216	template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return `24`; }
217	#endif
218
219	template<QImage::Format> constexpr QPixelLayout::BPP bitsPerPixel();
220	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; }
221	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; }
222	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; }
223	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; }
224	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; }
225	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_BGR888>() { return QPixelLayout::BPP24; }
226	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; }
227	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; }
228	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; }
229	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; }
230	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBX8888>() { return QPixelLayout::BPP32; }
231	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888>() { return QPixelLayout::BPP32; }
232	template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888_Premultiplied>() { return QPixelLayout::BPP32; }
233
234
235	typedef const uint (QT_FASTCALL FetchPixelsFunc)(uint buffer, const* uchar src, int* index, int count);
236
237	template <QPixelLayout::BPP bpp> static
238	uint QT_FASTCALL fetchPixel(const uchar , int*)
239	{
240	Q_UNREACHABLE();
241	return `0`;
242	}
243
244	template <>
245	inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar src, int* index)
246	{
247	return (src[index >> `3`] >> (index & `7`)) & `1`;
248	}
249
250	template <>
251	inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar src, int* index)
252	{
253	return (src[index >> `3`] >> (~index & `7`)) & `1`;
254	}
255
256	template <>
257	inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar src, int* index)
258	{
259	return src[index];
260	}
261
262	template <>
263	inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar src, int* index)
264	{
265	return reinterpret_cast<const quint16 *>(src)[index];
266	}
267
268	template <>
269	inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar src, int* index)
270	{
271	return reinterpret_cast<const quint24 *>(src)[index];
272	}
273
274	template <>
275	inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar src, int* index)
276	{
277	return reinterpret_cast<const uint *>(src)[index];
278	}
279
280	template <>
281	inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP64>(const uchar src, int* index)
282	{
283	// We have to do the conversion in fetch to fit into a 32bit uint
284	QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
285	return c.toArgb32();
286	}
287
288	template <QPixelLayout::BPP bpp>
289	static quint64 QT_FASTCALL fetchPixel64(const uchar src, int* index)
290	{
291	Q_STATIC_ASSERT(bpp != QPixelLayout::BPP64);
292	return fetchPixel<bpp>(src, index);
293	}
294
295	template <QPixelLayout::BPP width> static
296	void QT_FASTCALL storePixel(uchar dest, int* index, uint pixel);
297
298	template <>
299	inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar dest, int* index, uint pixel)
300	{
301	reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel);
302	}
303
304	template <>
305	inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar dest, int* index, uint pixel)
306	{
307	reinterpret_cast<quint24 *>(dest)[index] = quint24 (pixel);
308	}
309
310	typedef uint (QT_FASTCALL FetchPixelFunc)(const* uchar src, int* index);
311
312	static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = {
313	nullptr, // BPPNone
314	fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB
315	fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB
316	fetchPixel<QPixelLayout::BPP8>, // BPP8
317	fetchPixel<QPixelLayout::BPP16>, // BPP16
318	fetchPixel<QPixelLayout::BPP24>, // BPP24
319	fetchPixel<QPixelLayout::BPP32>, // BPP32
320	fetchPixel<QPixelLayout::BPP64> // BPP64
321	};
322
323	template<QImage::Format Format>
324	static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s)
325	{
326	Q_CONSTEXPR uint redMask = ((`1` << redWidth<Format>()) - `1`);
327	Q_CONSTEXPR uint greenMask = ((`1` << greenWidth<Format>()) - `1`);
328	Q_CONSTEXPR uint blueMask = ((`1` << blueWidth<Format>()) - `1`);
329
330	Q_CONSTEXPR uchar redLeftShift = `8` - redWidth<Format>();
331	Q_CONSTEXPR uchar greenLeftShift = `8` - greenWidth<Format>();
332	Q_CONSTEXPR uchar blueLeftShift = `8` - blueWidth<Format>();
333
334	Q_CONSTEXPR uchar redRightShift = `2` * redWidth<Format>() - `8`;
335	Q_CONSTEXPR uchar greenRightShift = `2` * greenWidth<Format>() - `8`;
336	Q_CONSTEXPR uchar blueRightShift = `2` * blueWidth<Format>() - `8`;
337
338	uint red = (s >> redShift<Format>()) & redMask;
339	uint green = (s >> greenShift<Format>()) & greenMask;
340	uint blue = (s >> blueShift<Format>()) & blueMask;
341
342	red = ((red << redLeftShift) \| (red >> redRightShift)) << `16`;
343	green = ((green << greenLeftShift) \| (green >> greenRightShift)) << `8`;
344	blue = (blue << blueLeftShift) \| (blue >> blueRightShift);
345	return `0xff000000` \| red \| green \| blue;
346	}
347
348	template<QImage::Format Format>
349	static void QT_FASTCALL convertToRGB32(uint buffer, int* count, const QVector<QRgb> *)
350	{
351	for (int i = `0`; i < count; ++i)
352	buffer[i] = convertPixelToRGB32<Format>(buffer[i]);
353	}
354
355	#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
356	extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint dest, const* ucharsrc, int* index, int count);
357	#endif
358
359	template<QImage::Format Format>
360	static const uint QT_FASTCALL fetchRGBToRGB32(uint buffer, const uchar src, int* index, int count,
361	const QVector<QRgb> , QDitherInfo )
362	{
363	constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
364	#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
365	if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
366	// With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
367	// to vectorize the deforested version below.
368	fetchPixelsBPP24_ssse3(dest: buffer, src, index, count);
369	convertToRGB32<Format>(buffer, count, nullptr);
370	return buffer;
371	}
372	#endif
373	for (int i = `0`; i < count; ++i)
374	buffer[i] = convertPixelToRGB32<Format>(fetchPixel<BPP>(src, index + i));
375	return buffer;
376	}
377
378	template<QImage::Format Format>
379	static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s)
380	{
381	return QRgba64::fromArgb32(rgb: convertPixelToRGB32<Format>(s));
382	}
383
384	template<QImage::Format Format>
385	static const QRgba64 QT_FASTCALL convertToRGB64(QRgba64 buffer, const uint src, int* count,
386	const QVector<QRgb> , QDitherInfo )
387	{
388	for (int i = `0`; i < count; ++i)
389	buffer[i] = convertPixelToRGB64<Format>(src[i]);
390	return buffer;
391	}
392
393	template<QImage::Format Format>
394	static const QRgba64 QT_FASTCALL fetchRGBToRGB64(QRgba64 buffer, const uchar src, int* index, int count,
395	const QVector<QRgb> , QDitherInfo )
396	{
397	for (int i = `0`; i < count; ++i)
398	buffer[i] = convertPixelToRGB64<Format>(fetchPixel<bitsPerPixel<Format>()>(src, index + i));
399	return buffer;
400	}
401
402	template<QImage::Format Format>
403	static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s)
404	{
405	Q_CONSTEXPR uint alphaMask = ((`1` << alphaWidth<Format>()) - `1`);
406	Q_CONSTEXPR uint redMask = ((`1` << redWidth<Format>()) - `1`);
407	Q_CONSTEXPR uint greenMask = ((`1` << greenWidth<Format>()) - `1`);
408	Q_CONSTEXPR uint blueMask = ((`1` << blueWidth<Format>()) - `1`);
409
410	Q_CONSTEXPR uchar alphaLeftShift = `8` - alphaWidth<Format>();
411	Q_CONSTEXPR uchar redLeftShift = `8` - redWidth<Format>();
412	Q_CONSTEXPR uchar greenLeftShift = `8` - greenWidth<Format>();
413	Q_CONSTEXPR uchar blueLeftShift = `8` - blueWidth<Format>();
414
415	Q_CONSTEXPR uchar alphaRightShift = `2` * alphaWidth<Format>() - `8`;
416	Q_CONSTEXPR uchar redRightShift = `2` * redWidth<Format>() - `8`;
417	Q_CONSTEXPR uchar greenRightShift = `2` * greenWidth<Format>() - `8`;
418	Q_CONSTEXPR uchar blueRightShift = `2` * blueWidth<Format>() - `8`;
419
420	Q_CONSTEXPR bool mustMin = (alphaWidth<Format>() != redWidth<Format>()) \|\|
421	(alphaWidth<Format>() != greenWidth<Format>()) \|\|
422	(alphaWidth<Format>() != blueWidth<Format>());
423
424	uint alpha = (s >> alphaShift<Format>()) & alphaMask;
425	uint red = (s >> redShift<Format>()) & redMask;
426	uint green = (s >> greenShift<Format>()) & greenMask;
427	uint blue = (s >> blueShift<Format>()) & blueMask;
428
429	alpha = (alpha << alphaLeftShift) \| (alpha >> alphaRightShift);
430	red = (red << redLeftShift) \| (red >> redRightShift);
431	green = (green << greenLeftShift) \| (green >> greenRightShift);
432	blue = (blue << blueLeftShift) \| (blue >> blueRightShift);
433
434	if (mustMin) {
435	red = qMin(a: alpha, b: red);
436	green = qMin(a: alpha, b: green);
437	blue = qMin(a: alpha, b: blue);
438	}
439
440	return (alpha << `24`) \| (red << `16`) \| (green << `8`) \| blue;
441	}
442
443	template<QImage::Format Format>
444	static void QT_FASTCALL convertARGBPMToARGB32PM(uint buffer, int* count, const QVector<QRgb> *)
445	{
446	for (int i = `0`; i < count; ++i)
447	buffer[i] = convertPixelToARGB32PM<Format>(buffer[i]);
448	}
449
450	template<QImage::Format Format>
451	static const uint QT_FASTCALL fetchARGBPMToARGB32PM(uint buffer, const uchar src, int* index, int count,
452	const QVector<QRgb> , QDitherInfo )
453	{
454	constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
455	#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
456	if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
457	// With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
458	// to vectorize the deforested version below.
459	fetchPixelsBPP24_ssse3(dest: buffer, src, index, count);
460	convertARGBPMToARGB32PM<Format>(buffer, count, nullptr);
461	return buffer;
462	}
463	#endif
464	for (int i = `0`; i < count; ++i)
465	buffer[i] = convertPixelToARGB32PM<Format>(fetchPixel<BPP>(src, index + i));
466	return buffer;
467	}
468
469	template<QImage::Format Format>
470	static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s)
471	{
472	return QRgba64::fromArgb32(rgb: convertPixelToARGB32PM<Format>(s));
473	}
474
475	template<QImage::Format Format>
476	static const QRgba64 QT_FASTCALL convertARGBPMToRGBA64PM(QRgba64 buffer, const uint src, int* count,
477	const QVector<QRgb> , QDitherInfo )
478	{
479	for (int i = `0`; i < count; ++i)
480	buffer[i] = convertPixelToRGB64<Format>(src[i]);
481	return buffer;
482	}
483
484	template<QImage::Format Format>
485	static const QRgba64 QT_FASTCALL fetchARGBPMToRGBA64PM(QRgba64 buffer, const uchar src, int* index, int count,
486	const QVector<QRgb> , QDitherInfo )
487	{
488	constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
489	for (int i = `0`; i < count; ++i)
490	buffer[i] = convertPixelToRGBA64PM<Format>(fetchPixel<bpp>(src, index + i));
491	return buffer;
492	}
493
494	template<QImage::Format Format, bool fromRGB>
495	static void QT_FASTCALL storeRGBFromARGB32PM(uchar dest, const* uint src, int* index, int count,
496	const QVector<QRgb> , QDitherInfo dither)
497	{
498	Q_CONSTEXPR uchar rWidth = redWidth<Format>();
499	Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
500	Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
501	constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
502
503	// RGB32 -> RGB888 is not a precision loss.
504	if (!dither \|\| (rWidth == `8` && gWidth == `8` && bWidth == `8`)) {
505	Q_CONSTEXPR uint rMask = (`1` << redWidth<Format>()) - `1`;
506	Q_CONSTEXPR uint gMask = (`1` << greenWidth<Format>()) - `1`;
507	Q_CONSTEXPR uint bMask = (`1` << blueWidth<Format>()) - `1`;
508	Q_CONSTEXPR uchar rRightShift = `24` - redWidth<Format>();
509	Q_CONSTEXPR uchar gRightShift = `16` - greenWidth<Format>();
510	Q_CONSTEXPR uchar bRightShift = `8` - blueWidth<Format>();
511
512	for (int i = `0`; i < count; ++i) {
513	const uint c = fromRGB ? src[i] : qUnpremultiply(p: src[i]);
514	const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
515	const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
516	const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
517	storePixel<BPP>(dest, index + i, r \| g \| b);
518	};
519	} else {
520	// We do ordered dither by using a rounding conversion, but instead of
521	// adding half of input precision, we add the adjusted result from the
522	// bayer matrix before narrowing.
523	// Note: Rounding conversion in itself is different from the naive
524	// conversion we do above for non-dithering.
525	const uint *bayer_line = qt_bayer_matrix[dither->y & `15`];
526	for (int i = `0`; i < count; ++i) {
527	const uint c = fromRGB ? src[i] : qUnpremultiply(p: src[i]);
528	const int d = bayer_line[(dither->x + i) & `15`];
529	const int dr = d - ((d + `1`) >> rWidth);
530	const int dg = d - ((d + `1`) >> gWidth);
531	const int db = d - ((d + `1`) >> bWidth);
532	int r = qRed(rgb: c);
533	int g = qGreen(rgb: c);
534	int b = qBlue(rgb: c);
535	r = (r + ((dr - r) >> rWidth) + `1`) >> (`8` - rWidth);
536	g = (g + ((dg - g) >> gWidth) + `1`) >> (`8` - gWidth);
537	b = (b + ((db - b) >> bWidth) + `1`) >> (`8` - bWidth);
538	const uint s = (r << redShift<Format>())
539	\| (g << greenShift<Format>())
540	\| (b << blueShift<Format>());
541	storePixel<BPP>(dest, index + i, s);
542	}
543	}
544	}
545
546	template<QImage::Format Format, bool fromRGB>
547	static void QT_FASTCALL storeARGBPMFromARGB32PM(uchar dest, const* uint src, int* index, int count,
548	const QVector<QRgb> , QDitherInfo dither)
549	{
550	constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
551	if (!dither) {
552	Q_CONSTEXPR uint aMask = (`1` << alphaWidth<Format>()) - `1`;
553	Q_CONSTEXPR uint rMask = (`1` << redWidth<Format>()) - `1`;
554	Q_CONSTEXPR uint gMask = (`1` << greenWidth<Format>()) - `1`;
555	Q_CONSTEXPR uint bMask = (`1` << blueWidth<Format>()) - `1`;
556
557	Q_CONSTEXPR uchar aRightShift = `32` - alphaWidth<Format>();
558	Q_CONSTEXPR uchar rRightShift = `24` - redWidth<Format>();
559	Q_CONSTEXPR uchar gRightShift = `16` - greenWidth<Format>();
560	Q_CONSTEXPR uchar bRightShift = `8` - blueWidth<Format>();
561
562	Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>();
563	for (int i = `0`; i < count; ++i) {
564	const uint c = src[i];
565	const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>());
566	const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
567	const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
568	const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
569	storePixel<BPP>(dest, index + i, a \| r \| g \| b);
570	};
571	} else {
572	Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
573	Q_CONSTEXPR uchar rWidth = redWidth<Format>();
574	Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
575	Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
576
577	const uint *bayer_line = qt_bayer_matrix[dither->y & `15`];
578	for (int i = `0`; i < count; ++i) {
579	const uint c = src[i];
580	const int d = bayer_line[(dither->x + i) & `15`];
581	const int da = d - ((d + `1`) >> aWidth);
582	const int dr = d - ((d + `1`) >> rWidth);
583	const int dg = d - ((d + `1`) >> gWidth);
584	const int db = d - ((d + `1`) >> bWidth);
585	int a = qAlpha(rgb: c);
586	int r = qRed(rgb: c);
587	int g = qGreen(rgb: c);
588	int b = qBlue(rgb: c);
589	if (fromRGB)
590	a = (`1` << aWidth) - `1`;
591	else
592	a = (a + ((da - a) >> aWidth) + `1`) >> (`8` - aWidth);
593	r = (r + ((dr - r) >> rWidth) + `1`) >> (`8` - rWidth);
594	g = (g + ((dg - g) >> gWidth) + `1`) >> (`8` - gWidth);
595	b = (b + ((db - b) >> bWidth) + `1`) >> (`8` - bWidth);
596	uint s = (a << alphaShift<Format>())
597	\| (r << redShift<Format>())
598	\| (g << greenShift<Format>())
599	\| (b << blueShift<Format>());
600	storePixel<BPP>(dest, index + i, s);
601	}
602	}
603	}
604
605	template<QImage::Format Format>
606	static void QT_FASTCALL rbSwap(uchar dst, const* uchar src, int* count)
607	{
608	Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
609	Q_CONSTEXPR uchar aShift = alphaShift<Format>();
610	Q_CONSTEXPR uchar rWidth = redWidth<Format>();
611	Q_CONSTEXPR uchar rShift = redShift<Format>();
612	Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
613	Q_CONSTEXPR uchar gShift = greenShift<Format>();
614	Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
615	Q_CONSTEXPR uchar bShift = blueShift<Format>();
616	#ifdef Q_COMPILER_CONSTEXPR
617	Q_STATIC_ASSERT(rWidth == bWidth);
618	#endif
619	Q_CONSTEXPR uint redBlueMask = (`1` << rWidth) - `1`;
620	Q_CONSTEXPR uint alphaGreenMask = (((`1` << aWidth) - `1`) << aShift)
621	\| (((`1` << gWidth) - `1`) << gShift);
622	constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
623
624	for (int i = `0`; i < count; ++i) {
625	const uint c = fetchPixel<bpp>(src, i);
626	const uint r = (c >> rShift) & redBlueMask;
627	const uint b = (c >> bShift) & redBlueMask;
628	const uint t = (c & alphaGreenMask)
629	\| (r << bShift)
630	\| (b << rShift);
631	storePixel<bpp>(dst, i, t);
632	}
633	}
634
635	static void QT_FASTCALL rbSwap_rgb32(uchar d, const* uchar s, int* count)
636	{
637	const uint src = reinterpret_cast<const* uint *>(s);
638	uint dest = reinterpret_cast<uint >(d);
639	for (int i = `0`; i < count; ++i) {
640	const uint c = src[i];
641	const uint ag = c & `0xff00ff00`;
642	const uint rb = c & `0x00ff00ff`;
643	dest[i] = ag \| (rb << `16`) \| (rb >> `16`);
644	}
645	}
646
647	#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
648	template<>
649	void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar d, const* uchar s, int* count)
650	{
651	return rbSwap_rgb32(d, s, count);
652	}
653	#else
654	template<>
655	void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar d, const* uchar s, int* count)
656	{
657	const uint src = reinterpret_cast<const* uint *>(s);
658	uint dest = reinterpret_cast<uint >(d);
659	for (int i = `0`; i < count; ++i) {
660	const uint c = src[i];
661	const uint rb = c & `0xff00ff00`;
662	const uint ga = c & `0x00ff00ff`;
663	dest[i] = ga \| (rb << `16`) \| (rb >> `16`);
664	}
665	}
666	#endif
667
668	static void QT_FASTCALL rbSwap_rgb30(uchar d, const* uchar s, int* count)
669	{
670	const uint src = reinterpret_cast<const* uint *>(s);
671	uint dest = reinterpret_cast<uint >(d);
672	UNALIASED_CONVERSION_LOOP(dest, src, count, qRgbSwapRgb30);
673	}
674
675	template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutRGB()
676	{
677	return QPixelLayout{
678	false,
679	false,
680	bitsPerPixel<Format>(),
681	rbSwap<Format>,
682	convertToRGB32<Format>,
683	convertToRGB64<Format>,
684	fetchRGBToRGB32<Format>,
685	fetchRGBToRGB64<Format>,
686	storeRGBFromARGB32PM<Format, false>,
687	storeRGBFromARGB32PM<Format, true>
688	};
689	}
690
691	template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutARGBPM()
692	{
693	return QPixelLayout{
694	true,
695	true,
696	bitsPerPixel<Format>(),
697	rbSwap<Format>,
698	convertARGBPMToARGB32PM<Format>,
699	convertARGBPMToRGBA64PM<Format>,
700	fetchARGBPMToARGB32PM<Format>,
701	fetchARGBPMToRGBA64PM<Format>,
702	storeARGBPMFromARGB32PM<Format, false>,
703	storeARGBPMFromARGB32PM<Format, true>
704	};
705	}
706
707	static void QT_FASTCALL convertIndexedToARGB32PM(uint buffer, int* count, const QVector<QRgb> *clut)
708	{
709	for (int i = `0`; i < count; ++i)
710	buffer[i] = qPremultiply(x: clut->at(i: buffer[i]));
711	}
712
713	template<QPixelLayout::BPP BPP>
714	static const uint QT_FASTCALL fetchIndexedToARGB32PM(uint buffer, const uchar src, int* index, int count,
715	const QVector<QRgb> clut, QDitherInfo )
716	{
717	for (int i = `0`; i < count; ++i) {
718	const uint s = fetchPixel<BPP>(src, index + i);
719	buffer[i] = qPremultiply(x: clut->at(i: s));
720	}
721	return buffer;
722	}
723
724	template<QPixelLayout::BPP BPP>
725	static const QRgba64 QT_FASTCALL fetchIndexedToRGBA64PM(QRgba64 buffer, const uchar src, int* index, int count,
726	const QVector<QRgb> clut, QDitherInfo )
727	{
728	for (int i = `0`; i < count; ++i) {
729	const uint s = fetchPixel<BPP>(src, index + i);
730	buffer[i] = QRgba64::fromArgb32(rgb: clut->at(i: s)).premultiplied();
731	}
732	return buffer;
733	}
734
735	static const QRgba64 QT_FASTCALL convertIndexedToRGBA64PM(QRgba64 buffer, const uint src, int* count,
736	const QVector<QRgb> clut, QDitherInfo )
737	{
738	for (int i = `0`; i < count; ++i)
739	buffer[i] = QRgba64::fromArgb32(rgb: clut->at(i: src[i])).premultiplied();
740	return buffer;
741	}
742
743	static void QT_FASTCALL convertPassThrough(uint , int, const* QVector<QRgb> *)
744	{
745	}
746
747	static const uint QT_FASTCALL fetchPassThrough(uint , const uchar src, int* index, int,
748	const QVector<QRgb> , QDitherInfo )
749	{
750	return reinterpret_cast<const uint *>(src) + index;
751	}
752
753	static const QRgba64 QT_FASTCALL fetchPassThrough64(QRgba64 , const uchar src, int* index, int,
754	const QVector<QRgb> , QDitherInfo )
755	{
756	return reinterpret_cast<const QRgba64 *>(src) + index;
757	}
758
759	static void QT_FASTCALL storePassThrough(uchar dest, const* uint src, int* index, int count,
760	const QVector<QRgb> , QDitherInfo )
761	{
762	uint d = reinterpret_cast<uint >(dest) + index;
763	if (d != src)
764	memcpy(dest: d, src: src, n: count * sizeof(uint));
765	}
766
767	static void QT_FASTCALL convertARGB32ToARGB32PM(uint buffer, int* count, const QVector<QRgb> *)
768	{
769	qt_convertARGB32ToARGB32PM(buffer, src: buffer, count);
770	}
771
772	static const uint QT_FASTCALL fetchARGB32ToARGB32PM(uint buffer, const uchar src, int* index, int count,
773	const QVector<QRgb> , QDitherInfo )
774	{
775	return qt_convertARGB32ToARGB32PM(buffer, src: reinterpret_cast<const uint *>(src) + index, count);
776	}
777
778	static void QT_FASTCALL convertRGBA8888PMToARGB32PM(uint buffer, int* count, const QVector<QRgb> *)
779	{
780	for (int i = `0`; i < count; ++i)
781	buffer[i] = RGBA2ARGB(x: buffer[i]);
782	}
783
784	static const uint QT_FASTCALL fetchRGBA8888PMToARGB32PM(uint buffer, const uchar src, int* index, int count,
785	const QVector<QRgb> , QDitherInfo )
786	{
787	const uint s = reinterpret_cast<const* uint *>(src) + index;
788	UNALIASED_CONVERSION_LOOP(buffer, s, count, RGBA2ARGB);
789	return buffer;
790	}
791
792	static void QT_FASTCALL convertRGBA8888ToARGB32PM(uint buffer, int* count, const QVector<QRgb> *)
793	{
794	qt_convertRGBA8888ToARGB32PM(buffer, src: buffer, count);
795	}
796
797	static const uint QT_FASTCALL fetchRGBA8888ToARGB32PM(uint buffer, const uchar src, int* index, int count,
798	const QVector<QRgb> , QDitherInfo )
799	{
800	return qt_convertRGBA8888ToARGB32PM(buffer, src: reinterpret_cast<const uint *>(src) + index, count);
801	}
802
803	static void QT_FASTCALL convertAlpha8ToRGB32(uint buffer, int* count, const QVector<QRgb> *)
804	{
805	for (int i = `0`; i < count; ++i)
806	buffer[i] = qRgba(r: `0`, g: `0`, b: `0`, a: buffer[i]);
807	}
808
809	static const uint QT_FASTCALL fetchAlpha8ToRGB32(uint buffer, const uchar src, int* index, int count,
810	const QVector<QRgb> , QDitherInfo )
811	{
812	for (int i = `0`; i < count; ++i)
813	buffer[i] = qRgba(r: `0`, g: `0`, b: `0`, a: src[index + i]);
814	return buffer;
815	}
816
817	static const QRgba64 QT_FASTCALL convertAlpha8ToRGB64(QRgba64 buffer, const uint src, int* count,
818	const QVector<QRgb> , QDitherInfo )
819	{
820	for (int i = `0`; i < count; ++i)
821	buffer[i] = QRgba64::fromRgba(red: `0`, green: `0`, blue: `0`, alpha: src[i]);
822	return buffer;
823	}
824	static const QRgba64 QT_FASTCALL fetchAlpha8ToRGB64(QRgba64 buffer, const uchar src, int* index, int count,
825	const QVector<QRgb> , QDitherInfo )
826	{
827	for (int i = `0`; i < count; ++i)
828	buffer[i] = QRgba64::fromRgba(red: `0`, green: `0`, blue: `0`, alpha: src[index + i]);
829	return buffer;
830	}
831
832	static void QT_FASTCALL convertGrayscale8ToRGB32(uint buffer, int* count, const QVector<QRgb> *)
833	{
834	for (int i = `0`; i < count; ++i) {
835	const uint s = buffer[i];
836	buffer[i] = qRgb(r: s, g: s, b: s);
837	}
838	}
839
840	static const uint QT_FASTCALL fetchGrayscale8ToRGB32(uint buffer, const uchar src, int* index, int count,
841	const QVector<QRgb> , QDitherInfo )
842	{
843	for (int i = `0`; i < count; ++i) {
844	const uint s = src[index + i];
845	buffer[i] = qRgb(r: s, g: s, b: s);
846	}
847	return buffer;
848	}
849
850	static const QRgba64 QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 buffer, const uint src, int* count,
851	const QVector<QRgb> , QDitherInfo )
852	{
853	for (int i = `0`; i < count; ++i)
854	buffer[i] = QRgba64::fromRgba(red: src[i], green: src[i], blue: src[i], alpha: `255`);
855	return buffer;
856	}
857
858	static const QRgba64 QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 buffer, const uchar src, int* index, int count,
859	const QVector<QRgb> , QDitherInfo )
860	{
861	for (int i = `0`; i < count; ++i) {
862	const uint s = src[index + i];
863	buffer[i] = QRgba64::fromRgba(red: s, green: s, blue: s, alpha: `255`);
864	}
865	return buffer;
866	}
867
868	static void QT_FASTCALL convertGrayscale16ToRGB32(uint buffer, int* count, const QVector<QRgb> *)
869	{
870	for (int i = `0`; i < count; ++i) {
871	const uint x = qt_div_257(x: buffer[i]);
872	buffer[i] = qRgb(r: x, g: x, b: x);
873	}
874	}
875
876	static const uint QT_FASTCALL fetchGrayscale16ToRGB32(uint buffer, const uchar src, int* index, int count,
877	const QVector<QRgb> , QDitherInfo )
878	{
879	const unsigned short s = reinterpret_cast<const* unsigned short *>(src) + index;
880	for (int i = `0`; i < count; ++i) {
881	const uint x = qt_div_257(x: s[i]);
882	buffer[i] = qRgb(r: x, g: x, b: x);
883	}
884	return buffer;
885	}
886
887	static const QRgba64 QT_FASTCALL convertGrayscale16ToRGBA64(QRgba64 buffer, const uint src, int* count,
888	const QVector<QRgb> , QDitherInfo )
889	{
890	for (int i = `0`; i < count; ++i)
891	buffer[i] = QRgba64::fromRgba64(red: src[i], green: src[i], blue: src[i], alpha: `65535`);
892	return buffer;
893	}
894
895	static const QRgba64 QT_FASTCALL fetchGrayscale16ToRGBA64(QRgba64 buffer, const uchar src, int* index, int count,
896	const QVector<QRgb> , QDitherInfo )
897	{
898	const unsigned short s = reinterpret_cast<const* unsigned short *>(src) + index;
899	for (int i = `0`; i < count; ++i) {
900	buffer[i] = QRgba64::fromRgba64(red: s[i], green: s[i], blue: s[i], alpha: `65535`);
901	}
902	return buffer;
903	}
904
905	static void QT_FASTCALL storeARGB32FromARGB32PM(uchar dest, const* uint src, int* index, int count,
906	const QVector<QRgb> , QDitherInfo )
907	{
908	uint d = reinterpret_cast<uint >(dest) + index;
909	UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return qUnpremultiply(c); });
910	}
911
912	static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar dest, const* uint src, int* index, int count,
913	const QVector<QRgb> , QDitherInfo )
914	{
915	uint d = reinterpret_cast<uint >(dest) + index;
916	UNALIASED_CONVERSION_LOOP(d, src, count, ARGB2RGBA);
917	}
918
919	#ifdef __SSE2__
920	template<bool RGBA, bool maskAlpha>
921	static inline void qConvertARGB32PMToRGBA64PM_sse2(QRgba64 buffer, const* uint src, int* count)
922	{
923	if (count <= `0`)
924	return;
925
926	const __m128i amask = _mm_set1_epi32(i: `0xff000000`);
927	int i = `0`;
928	for (; ((uintptr_t)buffer & `0xf`) && i < count; ++i) {
929	uint s = *src++;
930	if (maskAlpha)
931	s = s \| `0xff000000`;
932	if (RGBA)
933	s = RGBA2ARGB(x: s);
934	*buffer++ = QRgba64::fromArgb32(rgb: s);
935	}
936	for (; i < count-`3`; i += `4`) {
937	__m128i vs = _mm_loadu_si128(p: (const __m128i*)src);
938	if (maskAlpha)
939	vs = _mm_or_si128(a: vs, b: amask);
940	src += `4`;
941	__m128i v1 = _mm_unpacklo_epi8(a: vs, b: vs);
942	__m128i v2 = _mm_unpackhi_epi8(a: vs, b: vs);
943	if (!RGBA) {
944	v1 = _mm_shufflelo_epi16(v1, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
945	v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
946	v1 = _mm_shufflehi_epi16(v1, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
947	v2 = _mm_shufflehi_epi16(v2, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
948	}
949	_mm_store_si128(p: (__m128i*)(buffer), b: v1);
950	buffer += `2`;
951	_mm_store_si128(p: (__m128i*)(buffer), b: v2);
952	buffer += `2`;
953	}
954
955	SIMD_EPILOGUE(i, count, `3`) {
956	uint s = *src++;
957	if (maskAlpha)
958	s = s \| `0xff000000`;
959	if (RGBA)
960	s = RGBA2ARGB(x: s);
961	*buffer++ = QRgba64::fromArgb32(rgb: s);
962	}
963	}
964
965	template<QtPixelOrder PixelOrder>
966	static inline void qConvertRGBA64PMToA2RGB30PM_sse2(uint dest, const* QRgba64 buffer, int* count)
967	{
968	const __m128i gmask = _mm_set1_epi32(i: `0x000ffc00`);
969	const __m128i cmask = _mm_set1_epi32(i: `0x000003ff`);
970	int i = `0`;
971	__m128i vr, vg, vb, va;
972	for (; i < count && uintptr_t(buffer) & `0xF`; ++i) {
973	dest++ = qConvertRgb64ToRgb30<PixelOrder>(buffer++);
974	}
975
976	for (; i < count-`15`; i += `16`) {
977	// Repremultiplying is really expensive and hard to do in SIMD without AVX2,
978	// so we try to avoid it by checking if it is needed 16 samples at a time.
979	__m128i vOr = _mm_set1_epi32(i: `0`);
980	__m128i vAnd = _mm_set1_epi32(i: `0xffffffff`);
981	for (int j = `0`; j < `16`; j += `2`) {
982	__m128i vs = _mm_load_si128(p: (const __m128i*)(buffer + j));
983	vOr = _mm_or_si128(a: vOr, b: vs);
984	vAnd = _mm_and_si128(a: vAnd, b: vs);
985	}
986	const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, `3`)) \| ((uint)_mm_extract_epi16(vOr, `7`));
987	const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, `3`)) & ((uint)_mm_extract_epi16(vAnd, `7`));
988
989	if (andAlpha == `0xffff`) {
990	for (int j = `0`; j < `16`; j += `2`) {
991	__m128i vs = _mm_load_si128(p: (const __m128i*)buffer);
992	buffer += `2`;
993	vr = _mm_srli_epi64(a: vs, count: `6`);
994	vg = _mm_srli_epi64(a: vs, count: `16` + `6` - `10`);
995	vb = _mm_srli_epi64(a: vs, count: `32` + `6`);
996	vr = _mm_and_si128(a: vr, b: cmask);
997	vg = _mm_and_si128(a: vg, b: gmask);
998	vb = _mm_and_si128(a: vb, b: cmask);
999	va = _mm_srli_epi64(a: vs, count: `48` + `14`);
1000	if (PixelOrder == PixelOrderRGB)
1001	vr = _mm_slli_epi32(a: vr, count: `20`);
1002	else
1003	vb = _mm_slli_epi32(a: vb, count: `20`);
1004	va = _mm_slli_epi32(a: va, count: `30`);
1005	__m128i vd = _mm_or_si128(a: _mm_or_si128(a: vr, b: vg), b: _mm_or_si128(a: vb, b: va));
1006	vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(`3`, `1`, `2`, `0`));
1007	_mm_storel_epi64(p: (__m128i*)dest, a: vd);
1008	dest += `2`;
1009	}
1010	} else if (orAlpha == `0`) {
1011	for (int j = `0`; j < `16`; ++j) {
1012	*dest++ = `0`;
1013	buffer++;
1014	}
1015	} else {
1016	for (int j = `0`; j < `16`; ++j)
1017	dest++ = qConvertRgb64ToRgb30<PixelOrder>(buffer++);
1018	}
1019	}
1020
1021	SIMD_EPILOGUE(i, count, `15`)
1022	dest++ = qConvertRgb64ToRgb30<PixelOrder>(buffer++);
1023	}
1024	#elif defined(__ARM_NEON__)
1025	template<bool RGBA, bool maskAlpha>
1026	static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 buffer, const* uint src, int* count)
1027	{
1028	if (count <= `0`)
1029	return;
1030
1031	const uint32x4_t amask = vdupq_n_u32(`0xff000000`);
1032	#if defined(Q_PROCESSOR_ARM_64)
1033	const uint8x16_t rgbaMask = { `2`, `1`, `0`, `3`, `6`, `5`, `4`, `7`, `10`, `9`, `8`, `11`, `14`, `13`, `12`, `15`};
1034	#else
1035	const uint8x8_t rgbaMask = { `2`, `1`, `0`, `3`, `6`, `5`, `4`, `7` };
1036	#endif
1037	int i = `0`;
1038	for (; i < count-`3`; i += `4`) {
1039	uint32x4_t vs32 = vld1q_u32(src);
1040	src += `4`;
1041	if (maskAlpha)
1042	vs32 = vorrq_u32(vs32, amask);
1043	uint8x16_t vs8 = vreinterpretq_u8_u32(vs32);
1044	if (!RGBA) {
1045	#if defined(Q_PROCESSOR_ARM_64)
1046	vs8 = vqtbl1q_u8(vs8, rgbaMask);
1047	#else
1048	// no vqtbl1q_u8
1049	const uint8x8_t vlo = vtbl1_u8(vget_low_u8(vs8), rgbaMask);
1050	const uint8x8_t vhi = vtbl1_u8(vget_high_u8(vs8), rgbaMask);
1051	vs8 = vcombine_u8(vlo, vhi);
1052	#endif
1053	}
1054	uint8x16x2_t v = vzipq_u8(vs8, vs8);
1055
1056	vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[`0`]));
1057	buffer += `2`;
1058	vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[`1`]));
1059	buffer += `2`;
1060	}
1061
1062	SIMD_EPILOGUE(i, count, `3`) {
1063	uint s = *src++;
1064	if (maskAlpha)
1065	s = s \| `0xff000000`;
1066	if (RGBA)
1067	s = RGBA2ARGB(s);
1068	*buffer++ = QRgba64::fromArgb32(s);
1069	}
1070	}
1071	#endif
1072
1073	static const QRgba64 QT_FASTCALL convertRGB32ToRGB64(QRgba64 buffer, const uint src, int* count,
1074	const QVector<QRgb> , QDitherInfo )
1075	{
1076	#ifdef __SSE2__
1077	qConvertARGB32PMToRGBA64PM_sse2<false, true>(buffer, src, count);
1078	#elif defined(__ARM_NEON__)
1079	qConvertARGB32PMToRGBA64PM_neon<false, true>(buffer, src, count);
1080	#else
1081	for (int i = `0`; i < count; ++i)
1082	buffer[i] = QRgba64::fromArgb32(`0xff000000` \| src[i]);
1083	#endif
1084	return buffer;
1085	}
1086
1087	static const QRgba64 QT_FASTCALL fetchRGB32ToRGB64(QRgba64 buffer, const uchar src, int* index, int count,
1088	const QVector<QRgb> , QDitherInfo )
1089	{
1090	return convertRGB32ToRGB64(buffer, src: reinterpret_cast<const uint >(src) + index, count, nullptr, nullptr*);
1091	}
1092
1093	static const QRgba64 QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 buffer, const uint src, int* count,
1094	const QVector<QRgb> , QDitherInfo )
1095	{
1096	for (int i = `0`; i < count; ++i)
1097	buffer[i] = QRgba64::fromArgb32(rgb: src[i]).premultiplied();
1098	return buffer;
1099	}
1100
1101	static const QRgba64 QT_FASTCALL fetchARGB32ToRGBA64PM(QRgba64 buffer, const uchar src, int* index, int count,
1102	const QVector<QRgb> , QDitherInfo )
1103	{
1104	return convertARGB32ToRGBA64PM(buffer, src: reinterpret_cast<const uint >(src) + index, count, nullptr, nullptr*);
1105	}
1106
1107	static const QRgba64 QT_FASTCALL convertARGB32PMToRGBA64PM(QRgba64 buffer, const uint src, int* count,
1108	const QVector<QRgb> , QDitherInfo )
1109	{
1110	#ifdef __SSE2__
1111	qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count);
1112	#elif defined(__ARM_NEON__)
1113	qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count);
1114	#else
1115	for (int i = `0`; i < count; ++i)
1116	buffer[i] = QRgba64::fromArgb32(src[i]);
1117	#endif
1118	return buffer;
1119	}
1120
1121	static const QRgba64 QT_FASTCALL fetchARGB32PMToRGBA64PM(QRgba64 buffer, const uchar src, int* index, int count,
1122	const QVector<QRgb> , QDitherInfo )
1123	{
1124	return convertARGB32PMToRGBA64PM(buffer, src: reinterpret_cast<const uint >(src) + index, count, nullptr, nullptr*);
1125	}
1126
1127	#if QT_CONFIG(raster_64bit)
1128	static void convertRGBA64ToRGBA64PM(QRgba64 buffer, int* count)
1129	{
1130	for (int i = `0`; i < count; ++i)
1131	buffer[i] = buffer[i].premultiplied();
1132	}
1133
1134	static void convertRGBA64PMToRGBA64PM(QRgba64 , int*)
1135	{
1136	}
1137	#endif
1138
1139	static const QRgba64 QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 buffer, const uchar src, int* index, int count,
1140	const QVector<QRgb> , QDitherInfo )
1141	{
1142	const QRgba64 s = reinterpret_cast<const* QRgba64 *>(src) + index;
1143	for (int i = `0`; i < count; ++i)
1144	buffer[i] = QRgba64::fromRgba64(c: s[i]).premultiplied();
1145	return buffer;
1146	}
1147
1148	static const QRgba64 QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 buffer, const uint src, int* count,
1149	const QVector<QRgb> , QDitherInfo )
1150	{
1151	for (int i = `0`; i < count; ++i)
1152	buffer[i] = QRgba64::fromArgb32(rgb: RGBA2ARGB(x: src[i])).premultiplied();
1153	return buffer;
1154	}
1155
1156	static const QRgba64 QT_FASTCALL fetchRGBA8888ToRGBA64PM(QRgba64 buffer, const uchar src, int* index, int count,
1157	const QVector<QRgb> , QDitherInfo )
1158	{
1159	return convertRGBA8888ToRGBA64PM(buffer, src: reinterpret_cast<const uint >(src) + index, count, nullptr, nullptr*);
1160	}
1161
1162	static const QRgba64 QT_FASTCALL convertRGBA8888PMToRGBA64PM(QRgba64 buffer, const uint src, int* count,
1163	const QVector<QRgb> , QDitherInfo )
1164	{
1165	#ifdef __SSE2__
1166	qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count);
1167	#elif defined(__ARM_NEON__)
1168	qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count);
1169	#else
1170	for (int i = `0`; i < count; ++i)
1171	buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i]));
1172	#endif
1173	return buffer;
1174	}
1175
1176	static const QRgba64 QT_FASTCALL fetchRGBA8888PMToRGBA64PM(QRgba64 buffer, const uchar src, int* index, int count,
1177	const QVector<QRgb> , QDitherInfo )
1178	{
1179	return convertRGBA8888PMToRGBA64PM(buffer, src: reinterpret_cast<const uint >(src) + index, count, nullptr, nullptr*);
1180	}
1181
1182	static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar dest, const* uint src, int* index, int count,
1183	const QVector<QRgb> , QDitherInfo )
1184	{
1185	uint d = reinterpret_cast<uint >(dest) + index;
1186	UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(qUnpremultiply(c)); });
1187	}
1188
1189	static void QT_FASTCALL storeRGBXFromRGB32(uchar dest, const* uint src, int* index, int count,
1190	const QVector<QRgb> , QDitherInfo )
1191	{
1192	uint d = reinterpret_cast<uint >(dest) + index;
1193	UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(`0xff000000` \| c); });
1194	}
1195
1196	static void QT_FASTCALL storeRGBXFromARGB32PM(uchar dest, const* uint src, int* index, int count,
1197	const QVector<QRgb> , QDitherInfo )
1198	{
1199	uint d = reinterpret_cast<uint >(dest) + index;
1200	UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(`0xff000000` \| qUnpremultiply(c)); });
1201	}
1202
1203	template<QtPixelOrder PixelOrder>
1204	static void QT_FASTCALL convertA2RGB30PMToARGB32PM(uint buffer, int* count, const QVector<QRgb> *)
1205	{
1206	for (int i = `0`; i < count; ++i)
1207	buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(buffer[i]);
1208	}
1209
1210	template<QtPixelOrder PixelOrder>
1211	static const uint QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint buffer, const uchar s, int* index, int count,
1212	const QVector<QRgb> , QDitherInfo dither)
1213	{
1214	const uint src = reinterpret_cast<const* uint *>(s) + index;
1215	if (!dither) {
1216	UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>);
1217	} else {
1218	for (int i = `0`; i < count; ++i) {
1219	const uint c = src[i];
1220	short d10 = (qt_bayer_matrix[dither->y & `15`][(dither->x + i) & `15`] << `2`);
1221	short a10 = (c >> `30`) * `0x155`;
1222	short r10 = ((c >> `20`) & `0x3ff`);
1223	short g10 = ((c >> `10`) & `0x3ff`);
1224	short b10 = (c & `0x3ff`);
1225	if (PixelOrder == PixelOrderBGR)
1226	std::swap(a&: r10, b&: b10);
1227	short a8 = (a10 + ((d10 - a10) >> `8`)) >> `2`;
1228	short r8 = (r10 + ((d10 - r10) >> `8`)) >> `2`;
1229	short g8 = (g10 + ((d10 - g10) >> `8`)) >> `2`;
1230	short b8 = (b10 + ((d10 - b10) >> `8`)) >> `2`;
1231	buffer[i] = qRgba(r: r8, g: g8, b: b8, a: a8);
1232	}
1233	}
1234	return buffer;
1235	}
1236
1237	#ifdef __SSE2__
1238	template<QtPixelOrder PixelOrder>
1239	static inline void qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 buffer, const* uint src, int* count)
1240	{
1241	if (count <= `0`)
1242	return;
1243
1244	const __m128i rmask = _mm_set1_epi32(i: `0x3ff00000`);
1245	const __m128i gmask = _mm_set1_epi32(i: `0x000ffc00`);
1246	const __m128i bmask = _mm_set1_epi32(i: `0x000003ff`);
1247	const __m128i afactor = _mm_set1_epi16(w: `0x5555`);
1248	int i = `0`;
1249
1250	for (; ((uintptr_t)buffer & `0xf`) && i < count; ++i)
1251	buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(src++);
1252
1253	for (; i < count-`3`; i += `4`) {
1254	__m128i vs = _mm_loadu_si128(p: (const __m128i*)src);
1255	src += `4`;
1256	__m128i va = _mm_srli_epi32(a: vs, count: `30`);
1257	__m128i vr = _mm_and_si128(a: vs, b: rmask);
1258	__m128i vb = _mm_and_si128(a: vs, b: bmask);
1259	__m128i vg = _mm_and_si128(a: vs, b: gmask);
1260	va = _mm_mullo_epi16(a: va, b: afactor);
1261	vr = _mm_or_si128(a: _mm_srli_epi32(a: vr, count: `14`), b: _mm_srli_epi32(a: vr, count: `24`));
1262	vg = _mm_or_si128(a: _mm_srli_epi32(a: vg, count: `4`), b: _mm_srli_epi32(a: vg, count: `14`));
1263	vb = _mm_or_si128(a: _mm_slli_epi32(a: vb, count: `6`), b: _mm_srli_epi32(a: vb, count: `4`));
1264	__m128i vrb;
1265	if (PixelOrder == PixelOrderRGB)
1266	vrb = _mm_or_si128(a: vr, _mm_slli_si128(vb, `2`));
1267	else
1268	vrb = _mm_or_si128(a: vb, _mm_slli_si128(vr, `2`));
1269	__m128i vga = _mm_or_si128(a: vg, _mm_slli_si128(va, `2`));
1270	_mm_store_si128(p: (__m128i*)(buffer), b: _mm_unpacklo_epi16(a: vrb, b: vga));
1271	buffer += `2`;
1272	_mm_store_si128(p: (__m128i*)(buffer), b: _mm_unpackhi_epi16(a: vrb, b: vga));
1273	buffer += `2`;
1274	}
1275
1276	SIMD_EPILOGUE(i, count, `3`)
1277	buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(src++);
1278	}
1279	#endif
1280
1281	template<QtPixelOrder PixelOrder>
1282	static const QRgba64 QT_FASTCALL convertA2RGB30PMToRGBA64PM(QRgba64 buffer, const uint src, int* count,
1283	const QVector<QRgb> , QDitherInfo )
1284	{
1285	#ifdef __SSE2__
1286	qConvertA2RGB30PMToRGBA64PM_sse2<PixelOrder>(buffer, src, count);
1287	#else
1288	for (int i = `0`; i < count; ++i)
1289	buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]);
1290	#endif
1291	return buffer;
1292	}
1293
1294	template<QtPixelOrder PixelOrder>
1295	static const QRgba64 QT_FASTCALL fetchA2RGB30PMToRGBA64PM(QRgba64 buffer, const uchar src, int* index, int count,
1296	const QVector<QRgb> , QDitherInfo )
1297	{
1298	return convertA2RGB30PMToRGBA64PM<PixelOrder>(buffer, reinterpret_cast<const uint >(src) + index, count, nullptr, nullptr*);
1299	}
1300
1301	template<QtPixelOrder PixelOrder>
1302	static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar dest, const* uint src, int* index, int count,
1303	const QVector<QRgb> , QDitherInfo )
1304	{
1305	uint d = reinterpret_cast<uint >(dest) + index;
1306	UNALIASED_CONVERSION_LOOP(d, src, count, qConvertArgb32ToA2rgb30<PixelOrder>);
1307	}
1308
1309	template<QtPixelOrder PixelOrder>
1310	static void QT_FASTCALL storeRGB30FromRGB32(uchar dest, const* uint src, int* index, int count,
1311	const QVector<QRgb> , QDitherInfo )
1312	{
1313	uint d = reinterpret_cast<uint >(dest) + index;
1314	UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1315	}
1316
1317	template<QtPixelOrder PixelOrder>
1318	static void QT_FASTCALL storeRGB30FromARGB32PM(uchar dest, const* uint src, int* index, int count,
1319	const QVector<QRgb> , QDitherInfo )
1320	{
1321	uint d = reinterpret_cast<uint >(dest) + index;
1322	UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1323	}
1324
1325	template<bool RGBA>
1326	void qt_convertRGBA64ToARGB32(uint dst, const* QRgba64 src, int* count)
1327	{
1328	int i = `0`;
1329	#ifdef __SSE2__
1330	if (((uintptr_t)dst & `0x7`) && count > `0`) {
1331	uint s = (*src++).toArgb32();
1332	if (RGBA)
1333	s = ARGB2RGBA(x: s);
1334	*dst++ = s;
1335	i++;
1336	}
1337	const __m128i vhalf = _mm_set1_epi32(i: `0x80`);
1338	const __m128i vzero = _mm_setzero_si128();
1339	for (; i < count-`1`; i += `2`) {
1340	__m128i vs = _mm_loadu_si128(p: (const __m128i*)src);
1341	src += `2`;
1342	if (!RGBA) {
1343	vs = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
1344	vs = _mm_shufflehi_epi16(vs, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
1345	}
1346	__m128i v1 = _mm_unpacklo_epi16(a: vs, b: vzero);
1347	__m128i v2 = _mm_unpackhi_epi16(a: vs, b: vzero);
1348	v1 = _mm_add_epi32(a: v1, b: vhalf);
1349	v2 = _mm_add_epi32(a: v2, b: vhalf);
1350	v1 = _mm_sub_epi32(a: v1, b: _mm_srli_epi32(a: v1, count: `8`));
1351	v2 = _mm_sub_epi32(a: v2, b: _mm_srli_epi32(a: v2, count: `8`));
1352	v1 = _mm_srli_epi32(a: v1, count: `8`);
1353	v2 = _mm_srli_epi32(a: v2, count: `8`);
1354	v1 = _mm_packs_epi32(a: v1, b: v2);
1355	v1 = _mm_packus_epi16(a: v1, b: vzero);
1356	_mm_storel_epi64(p: (__m128i*)(dst), a: v1);
1357	dst += `2`;
1358	}
1359	#endif
1360	for (; i < count; i++) {
1361	uint s = (*src++).toArgb32();
1362	if (RGBA)
1363	s = ARGB2RGBA(x: s);
1364	*dst++ = s;
1365	}
1366	}
1367	template void qt_convertRGBA64ToARGB32<false>(uint dst, const* QRgba64 src, int* count);
1368	template void qt_convertRGBA64ToARGB32<true>(uint dst, const* QRgba64 src, int* count);
1369
1370
1371	static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar dest, const* uint src, int* index, int count,
1372	const QVector<QRgb> , QDitherInfo )
1373	{
1374	for (int i = `0`; i < count; ++i)
1375	dest[index + i] = qAlpha(rgb: src[i]);
1376	}
1377
1378	static void QT_FASTCALL storeGrayscale8FromRGB32(uchar dest, const* uint src, int* index, int count,
1379	const QVector<QRgb> , QDitherInfo )
1380	{
1381	for (int i = `0`; i < count; ++i)
1382	dest[index + i] = qGray(rgb: src[i]);
1383	}
1384
1385	static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar dest, const* uint src, int* index, int count,
1386	const QVector<QRgb> , QDitherInfo )
1387	{
1388	for (int i = `0`; i < count; ++i)
1389	dest[index + i] = qGray(rgb: qUnpremultiply(p: src[i]));
1390	}
1391
1392	static void QT_FASTCALL storeGrayscale16FromRGB32(uchar dest, const* uint src, int* index, int count,
1393	const QVector<QRgb> , QDitherInfo )
1394	{
1395	unsigned short d = reinterpret_cast<unsigned* short *>(dest) + index;
1396	for (int i = `0`; i < count; ++i)
1397	d[i] = qGray(rgb: src[i]) * `257`;
1398	}
1399
1400	static void QT_FASTCALL storeGrayscale16FromARGB32PM(uchar dest, const* uint src, int* index, int count,
1401	const QVector<QRgb> , QDitherInfo )
1402	{
1403	unsigned short d = reinterpret_cast<unsigned* short *>(dest) + index;
1404	for (int i = `0`; i < count; ++i)
1405	d[i] = qGray(rgb: qUnpremultiply(p: src[i])) * `257`;
1406	}
1407
1408	static const uint QT_FASTCALL fetchRGB64ToRGB32(uint buffer, const uchar src, int* index, int count,
1409	const QVector<QRgb> , QDitherInfo )
1410	{
1411	const QRgba64 s = reinterpret_cast<const* QRgba64 *>(src) + index;
1412	for (int i = `0`; i < count; ++i)
1413	buffer[i] = toArgb32(rgba64: s[i]);
1414	return buffer;
1415	}
1416
1417	static void QT_FASTCALL storeRGB64FromRGB32(uchar dest, const* uint src, int* index, int count,
1418	const QVector<QRgb> , QDitherInfo )
1419	{
1420	QRgba64 d = reinterpret_cast<QRgba64 >(dest) + index;
1421	for (int i = `0`; i < count; ++i)
1422	d[i] = QRgba64::fromArgb32(rgb: src[i] \| `0xff000000`);
1423	}
1424
1425	static const uint QT_FASTCALL fetchRGBA64ToARGB32PM(uint buffer, const uchar src, int* index, int count,
1426	const QVector<QRgb> , QDitherInfo )
1427	{
1428	const QRgba64 s = reinterpret_cast<const* QRgba64 *>(src) + index;
1429	for (int i = `0`; i < count; ++i)
1430	buffer[i] = toArgb32(rgba64: s[i].premultiplied());
1431	return buffer;
1432	}
1433
1434	template<bool Mask>
1435	static void QT_FASTCALL storeRGBA64FromARGB32PM(uchar dest, const* uint src, int* index, int count,
1436	const QVector<QRgb> , QDitherInfo )
1437	{
1438	QRgba64 d = reinterpret_cast<QRgba64 >(dest) + index;
1439	for (int i = `0`; i < count; ++i) {
1440	d[i] = QRgba64::fromArgb32(rgb: src[i]).unpremultiplied();
1441	if (Mask)
1442	d[i].setAlpha(`65535`);
1443	}
1444	}
1445
1446	static void QT_FASTCALL storeRGBA64FromARGB32(uchar dest, const* uint src, int* index, int count,
1447	const QVector<QRgb> , QDitherInfo )
1448	{
1449	QRgba64 d = reinterpret_cast<QRgba64 >(dest) + index;
1450	for (int i = `0`; i < count; ++i)
1451	d[i] = QRgba64::fromArgb32(rgb: src[i]);
1452	}
1453
1454	// Note:
1455	// convertToArgb32() assumes that no color channel is less than 4 bits.
1456	// storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits.
1457	// QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits.
1458	QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
1459	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPPNone, .rbSwap: nullptr, .convertToARGB32PM: nullptr, .convertToRGBA64PM: nullptr, .fetchToARGB32PM: nullptr, .fetchToRGBA64PM: nullptr, .storeFromARGB32PM: nullptr, .storeFromRGB32: nullptr }, // Format_Invalid
1460	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP1MSB, .rbSwap: nullptr,
1461	.convertToARGB32PM: convertIndexedToARGB32PM, .convertToRGBA64PM: convertIndexedToRGBA64PM,
1462	.fetchToARGB32PM: fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, .fetchToRGBA64PM: fetchIndexedToRGBA64PM<QPixelLayout::BPP1MSB>,
1463	.storeFromARGB32PM: nullptr, .storeFromRGB32: nullptr }, // Format_Mono
1464	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP1LSB, .rbSwap: nullptr,
1465	.convertToARGB32PM: convertIndexedToARGB32PM, .convertToRGBA64PM: convertIndexedToRGBA64PM,
1466	.fetchToARGB32PM: fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, .fetchToRGBA64PM: fetchIndexedToRGBA64PM<QPixelLayout::BPP1LSB>,
1467	.storeFromARGB32PM: nullptr, .storeFromRGB32: nullptr }, // Format_MonoLSB
1468	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP8, .rbSwap: nullptr,
1469	.convertToARGB32PM: convertIndexedToARGB32PM, .convertToRGBA64PM: convertIndexedToRGBA64PM,
1470	.fetchToARGB32PM: fetchIndexedToARGB32PM<QPixelLayout::BPP8>, .fetchToRGBA64PM: fetchIndexedToRGBA64PM<QPixelLayout::BPP8>,
1471	.storeFromARGB32PM: nullptr, .storeFromRGB32: nullptr }, // Format_Indexed8
1472	// Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong,
1473	// but everywhere this generic conversion would be wrong is currently overloaded.
1474	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb32, .convertToARGB32PM: convertPassThrough,
1475	.convertToRGBA64PM: convertRGB32ToRGB64, .fetchToARGB32PM: fetchPassThrough, .fetchToRGBA64PM: fetchRGB32ToRGB64, .storeFromARGB32PM: storePassThrough, .storeFromRGB32: storePassThrough }, // Format_RGB32
1476	{ .hasAlphaChannel: true, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb32, .convertToARGB32PM: convertARGB32ToARGB32PM,
1477	.convertToRGBA64PM: convertARGB32ToRGBA64PM, .fetchToARGB32PM: fetchARGB32ToARGB32PM, .fetchToRGBA64PM: fetchARGB32ToRGBA64PM, .storeFromARGB32PM: storeARGB32FromARGB32PM, .storeFromRGB32: storePassThrough }, // Format_ARGB32
1478	{ .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb32, .convertToARGB32PM: convertPassThrough,
1479	.convertToRGBA64PM: convertARGB32PMToRGBA64PM, .fetchToARGB32PM: fetchPassThrough, .fetchToRGBA64PM: fetchARGB32PMToRGBA64PM, .storeFromARGB32PM: storePassThrough, .storeFromRGB32: storePassThrough }, // Format_ARGB32_Premultiplied
1480	pixelLayoutRGB<QImage::Format_RGB16>(),
1481	pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(),
1482	pixelLayoutRGB<QImage::Format_RGB666>(),
1483	pixelLayoutARGBPM<QImage::Format_ARGB6666_Premultiplied>(),
1484	pixelLayoutRGB<QImage::Format_RGB555>(),
1485	pixelLayoutARGBPM<QImage::Format_ARGB8555_Premultiplied>(),
1486	pixelLayoutRGB<QImage::Format_RGB888>(),
1487	pixelLayoutRGB<QImage::Format_RGB444>(),
1488	pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(),
1489	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap<QImage::Format_RGBA8888>, .convertToARGB32PM: convertRGBA8888PMToARGB32PM,
1490	.convertToRGBA64PM: convertRGBA8888PMToRGBA64PM, .fetchToARGB32PM: fetchRGBA8888PMToARGB32PM, .fetchToRGBA64PM: fetchRGBA8888PMToRGBA64PM, .storeFromARGB32PM: storeRGBXFromARGB32PM, .storeFromRGB32: storeRGBXFromRGB32 }, // Format_RGBX8888
1491	{ .hasAlphaChannel: true, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap<QImage::Format_RGBA8888>, .convertToARGB32PM: convertRGBA8888ToARGB32PM,
1492	.convertToRGBA64PM: convertRGBA8888ToRGBA64PM, .fetchToARGB32PM: fetchRGBA8888ToARGB32PM, .fetchToRGBA64PM: fetchRGBA8888ToRGBA64PM, .storeFromARGB32PM: storeRGBA8888FromARGB32PM, .storeFromRGB32: storeRGBXFromRGB32 }, // Format_RGBA8888
1493	{ .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap<QImage::Format_RGBA8888>, .convertToARGB32PM: convertRGBA8888PMToARGB32PM,
1494	.convertToRGBA64PM: convertRGBA8888PMToRGBA64PM, .fetchToARGB32PM: fetchRGBA8888PMToARGB32PM, .fetchToRGBA64PM: fetchRGBA8888PMToRGBA64PM, .storeFromARGB32PM: storeRGBA8888PMFromARGB32PM, .storeFromRGB32: storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied
1495	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb30,
1496	.convertToARGB32PM: convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1497	.convertToRGBA64PM: convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1498	.fetchToARGB32PM: fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1499	.fetchToRGBA64PM: fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1500	.storeFromARGB32PM: storeRGB30FromARGB32PM<PixelOrderBGR>,
1501	.storeFromRGB32: storeRGB30FromRGB32<PixelOrderBGR>
1502	}, // Format_BGR30
1503	{ .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb30,
1504	.convertToARGB32PM: convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1505	.convertToRGBA64PM: convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1506	.fetchToARGB32PM: fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1507	.fetchToRGBA64PM: fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1508	.storeFromARGB32PM: storeA2RGB30PMFromARGB32PM<PixelOrderBGR>,
1509	.storeFromRGB32: storeRGB30FromRGB32<PixelOrderBGR>
1510	}, // Format_A2BGR30_Premultiplied
1511	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb30,
1512	.convertToARGB32PM: convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1513	.convertToRGBA64PM: convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1514	.fetchToARGB32PM: fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1515	.fetchToRGBA64PM: fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1516	.storeFromARGB32PM: storeRGB30FromARGB32PM<PixelOrderRGB>,
1517	.storeFromRGB32: storeRGB30FromRGB32<PixelOrderRGB>
1518	}, // Format_RGB30
1519	{ .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb30,
1520	.convertToARGB32PM: convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1521	.convertToRGBA64PM: convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1522	.fetchToARGB32PM: fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1523	.fetchToRGBA64PM: fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1524	.storeFromARGB32PM: storeA2RGB30PMFromARGB32PM<PixelOrderRGB>,
1525	.storeFromRGB32: storeRGB30FromRGB32<PixelOrderRGB>
1526	}, // Format_A2RGB30_Premultiplied
1527	{ .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP8, .rbSwap: nullptr,
1528	.convertToARGB32PM: convertAlpha8ToRGB32, .convertToRGBA64PM: convertAlpha8ToRGB64,
1529	.fetchToARGB32PM: fetchAlpha8ToRGB32, .fetchToRGBA64PM: fetchAlpha8ToRGB64,
1530	.storeFromARGB32PM: storeAlpha8FromARGB32PM, .storeFromRGB32: nullptr }, // Format_Alpha8
1531	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP8, .rbSwap: nullptr,
1532	.convertToARGB32PM: convertGrayscale8ToRGB32, .convertToRGBA64PM: convertGrayscale8ToRGB64,
1533	.fetchToARGB32PM: fetchGrayscale8ToRGB32, .fetchToRGBA64PM: fetchGrayscale8ToRGB64,
1534	.storeFromARGB32PM: storeGrayscale8FromARGB32PM, .storeFromRGB32: storeGrayscale8FromRGB32 }, // Format_Grayscale8
1535	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP64, .rbSwap: nullptr,
1536	.convertToARGB32PM: convertPassThrough, .convertToRGBA64PM: nullptr,
1537	.fetchToARGB32PM: fetchRGB64ToRGB32, .fetchToRGBA64PM: fetchPassThrough64,
1538	.storeFromARGB32PM: storeRGBA64FromARGB32PM<true>, .storeFromRGB32: storeRGB64FromRGB32 }, // Format_RGBX64
1539	{ .hasAlphaChannel: true, .premultiplied: false, .bpp: QPixelLayout::BPP64, .rbSwap: nullptr,
1540	.convertToARGB32PM: convertARGB32ToARGB32PM, .convertToRGBA64PM: nullptr,
1541	.fetchToARGB32PM: fetchRGBA64ToARGB32PM, .fetchToRGBA64PM: fetchRGBA64ToRGBA64PM,
1542	.storeFromARGB32PM: storeRGBA64FromARGB32PM<false>, .storeFromRGB32: storeRGB64FromRGB32 }, // Format_RGBA64
1543	{ .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP64, .rbSwap: nullptr,
1544	.convertToARGB32PM: convertPassThrough, .convertToRGBA64PM: nullptr,
1545	.fetchToARGB32PM: fetchRGB64ToRGB32, .fetchToRGBA64PM: fetchPassThrough64,
1546	.storeFromARGB32PM: storeRGBA64FromARGB32, .storeFromRGB32: storeRGB64FromRGB32 }, // Format_RGBA64_Premultiplied
1547	{ .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP16, .rbSwap: nullptr,
1548	.convertToARGB32PM: convertGrayscale16ToRGB32, .convertToRGBA64PM: convertGrayscale16ToRGBA64,
1549	.fetchToARGB32PM: fetchGrayscale16ToRGB32, .fetchToRGBA64PM: fetchGrayscale16ToRGBA64,
1550	.storeFromARGB32PM: storeGrayscale16FromARGB32PM, .storeFromRGB32: storeGrayscale16FromRGB32 }, // Format_Grayscale16
1551	pixelLayoutRGB<QImage::Format_BGR888>(),
1552	};
1553
1554	Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats);
1555
1556	static void QT_FASTCALL convertFromRgb64(uint dest, const* QRgba64 src, int* length)
1557	{
1558	for (int i = `0`; i < length; ++i) {
1559	dest[i] = toArgb32(rgba64: src[i]);
1560	}
1561	}
1562
1563	template<QImage::Format format>
1564	static void QT_FASTCALL storeGenericFromRGBA64PM(uchar dest, const* QRgba64 src, int* index, int count,
1565	const QVector<QRgb> clut, QDitherInfo dither)
1566	{
1567	uint buffer[BufferSize];
1568	convertFromRgb64(dest: buffer, src, length: count);
1569	qPixelLayouts[format].storeFromARGB32PM(dest, buffer, index, count, clut, dither);
1570	}
1571
1572	static void QT_FASTCALL storeARGB32FromRGBA64PM(uchar dest, const* QRgba64 src, int* index, int count,
1573	const QVector<QRgb> , QDitherInfo )
1574	{
1575	uint d = (uint)dest + index;
1576	for (int i = `0`; i < count; ++i)
1577	d[i] = toArgb32(rgba64: src[i].unpremultiplied());
1578	}
1579
1580	static void QT_FASTCALL storeRGBA8888FromRGBA64PM(uchar dest, const* QRgba64 src, int* index, int count,
1581	const QVector<QRgb> , QDitherInfo )
1582	{
1583	uint d = (uint)dest + index;
1584	for (int i = `0`; i < count; ++i)
1585	d[i] = toRgba8888(rgba64: src[i].unpremultiplied());
1586	}
1587
1588	template<QtPixelOrder PixelOrder>
1589	static void QT_FASTCALL storeRGB30FromRGBA64PM(uchar dest, const* QRgba64 src, int* index, int count,
1590	const QVector<QRgb> , QDitherInfo )
1591	{
1592	uint d = (uint)dest + index;
1593	#ifdef __SSE2__
1594	qConvertRGBA64PMToA2RGB30PM_sse2<PixelOrder>(d, src, count);
1595	#else
1596	for (int i = `0`; i < count; ++i)
1597	d[i] = qConvertRgb64ToRgb30<PixelOrder>(src[i]);
1598	#endif
1599	}
1600
1601	static void QT_FASTCALL storeRGBX64FromRGBA64PM(uchar dest, const* QRgba64 src, int* index, int count,
1602	const QVector<QRgb> , QDitherInfo )
1603	{
1604	QRgba64 d = reinterpret_cast<QRgba64>(dest) + index;
1605	for (int i = `0`; i < count; ++i) {
1606	d[i] = src[i].unpremultiplied();
1607	d[i].setAlpha(`65535`);
1608	}
1609	}
1610
1611	static void QT_FASTCALL storeRGBA64FromRGBA64PM(uchar dest, const* QRgba64 src, int* index, int count,
1612	const QVector<QRgb> , QDitherInfo )
1613	{
1614	QRgba64 d = reinterpret_cast<QRgba64>(dest) + index;
1615	for (int i = `0`; i < count; ++i)
1616	d[i] = src[i].unpremultiplied();
1617	}
1618
1619	static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar dest, const* QRgba64 src, int* index, int count,
1620	const QVector<QRgb> , QDitherInfo )
1621	{
1622	QRgba64 d = reinterpret_cast<QRgba64>(dest) + index;
1623	if (d != src)
1624	memcpy(dest: d, src: src, n: count * sizeof(QRgba64));
1625	}
1626
1627	static void QT_FASTCALL storeGray16FromRGBA64PM(uchar dest, const* QRgba64 src, int* index, int count,
1628	const QVector<QRgb> , QDitherInfo )
1629	{
1630	quint16 d = reinterpret_cast<quint16>(dest) + index;
1631	for (int i = `0`; i < count; ++i) {
1632	QRgba64 s = src[i].unpremultiplied();
1633	d[i] = qGray(r: s.red(), g: s.green(), b: s.blue());
1634	}
1635	}
1636
1637	ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = {
1638	nullptr,
1639	nullptr,
1640	nullptr,
1641	nullptr,
1642	storeGenericFromRGBA64PM<QImage::Format_RGB32>,
1643	storeARGB32FromRGBA64PM,
1644	storeGenericFromRGBA64PM<QImage::Format_ARGB32_Premultiplied>,
1645	storeGenericFromRGBA64PM<QImage::Format_RGB16>,
1646	storeGenericFromRGBA64PM<QImage::Format_ARGB8565_Premultiplied>,
1647	storeGenericFromRGBA64PM<QImage::Format_RGB666>,
1648	storeGenericFromRGBA64PM<QImage::Format_ARGB6666_Premultiplied>,
1649	storeGenericFromRGBA64PM<QImage::Format_RGB555>,
1650	storeGenericFromRGBA64PM<QImage::Format_ARGB8555_Premultiplied>,
1651	storeGenericFromRGBA64PM<QImage::Format_RGB888>,
1652	storeGenericFromRGBA64PM<QImage::Format_RGB444>,
1653	storeGenericFromRGBA64PM<QImage::Format_ARGB4444_Premultiplied>,
1654	storeGenericFromRGBA64PM<QImage::Format_RGBX8888>,
1655	storeRGBA8888FromRGBA64PM,
1656	storeGenericFromRGBA64PM<QImage::Format_RGBA8888_Premultiplied>,
1657	storeRGB30FromRGBA64PM<PixelOrderBGR>,
1658	storeRGB30FromRGBA64PM<PixelOrderBGR>,
1659	storeRGB30FromRGBA64PM<PixelOrderRGB>,
1660	storeRGB30FromRGBA64PM<PixelOrderRGB>,
1661	storeGenericFromRGBA64PM<QImage::Format_Alpha8>,
1662	storeGenericFromRGBA64PM<QImage::Format_Grayscale8>,
1663	storeRGBX64FromRGBA64PM,
1664	storeRGBA64FromRGBA64PM,
1665	storeRGBA64PMFromRGBA64PM,
1666	storeGray16FromRGBA64PM,
1667	storeGenericFromRGBA64PM<QImage::Format_BGR888>,
1668	};
1669
1670	/*
1671	Destination fetch. This is simple as we don't have to do bounds checks or
1672	transformations
1673	*/
1674
1675	static uint * QT_FASTCALL destFetchMono(uint buffer, QRasterBuffer rasterBuffer, int x, int y, int length)
1676	{
1677	uchar Q_DECL_RESTRICT data = (uchar )rasterBuffer->scanLine(y);
1678	uint *start = buffer;
1679	const uint *end = buffer + length;
1680	while (buffer < end) {
1681	*buffer = data[x>>`3`] & (`0x80` >> (x & `7`)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1682	++buffer;
1683	++x;
1684	}
1685	return start;
1686	}
1687
1688	static uint * QT_FASTCALL destFetchMonoLsb(uint buffer, QRasterBuffer rasterBuffer, int x, int y, int length)
1689	{
1690	uchar Q_DECL_RESTRICT data = (uchar )rasterBuffer->scanLine(y);
1691	uint *start = buffer;
1692	const uint *end = buffer + length;
1693	while (buffer < end) {
1694	*buffer = data[x>>`3`] & (`0x1` << (x & `7`)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1695	++buffer;
1696	++x;
1697	}
1698	return start;
1699	}
1700
1701	static uint * QT_FASTCALL destFetchARGB32P(uint , QRasterBuffer rasterBuffer, int x, int y, int)
1702	{
1703	return (uint *)rasterBuffer->scanLine(y) + x;
1704	}
1705
1706	static uint * QT_FASTCALL destFetchRGB16(uint buffer, QRasterBuffer rasterBuffer, int x, int y, int length)
1707	{
1708	const ushort Q_DECL_RESTRICT data = (const* ushort *)rasterBuffer->scanLine(y) + x;
1709	for (int i = `0`; i < length; ++i)
1710	buffer[i] = qConvertRgb16To32(c: data[i]);
1711	return buffer;
1712	}
1713
1714	static uint QT_FASTCALL destFetch(uint buffer, QRasterBuffer rasterBuffer, int* x, int y, int length)
1715	{
1716	const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1717	return const_cast<uint >(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr*));
1718	}
1719
1720	static uint QT_FASTCALL destFetchUndefined(uint buffer, QRasterBuffer , int, int, int*)
1721	{
1722	return buffer;
1723	}
1724
1725	static DestFetchProc destFetchProc[QImage::NImageFormats] =
1726	{
1727	nullptr, // Format_Invalid
1728	destFetchMono, // Format_Mono,
1729	destFetchMonoLsb, // Format_MonoLSB
1730	nullptr, // Format_Indexed8
1731	destFetchARGB32P, // Format_RGB32
1732	destFetch, // Format_ARGB32,
1733	destFetchARGB32P, // Format_ARGB32_Premultiplied
1734	destFetchRGB16, // Format_RGB16
1735	destFetch, // Format_ARGB8565_Premultiplied
1736	destFetch, // Format_RGB666
1737	destFetch, // Format_ARGB6666_Premultiplied
1738	destFetch, // Format_RGB555
1739	destFetch, // Format_ARGB8555_Premultiplied
1740	destFetch, // Format_RGB888
1741	destFetch, // Format_RGB444
1742	destFetch, // Format_ARGB4444_Premultiplied
1743	destFetch, // Format_RGBX8888
1744	destFetch, // Format_RGBA8888
1745	destFetch, // Format_RGBA8888_Premultiplied
1746	destFetch, // Format_BGR30
1747	destFetch, // Format_A2BGR30_Premultiplied
1748	destFetch, // Format_RGB30
1749	destFetch, // Format_A2RGB30_Premultiplied
1750	destFetch, // Format_Alpha8
1751	destFetch, // Format_Grayscale8
1752	destFetch, // Format_RGBX64
1753	destFetch, // Format_RGBA64
1754	destFetch, // Format_RGBA64_Premultiplied
1755	destFetch, // Format_Grayscale16
1756	destFetch, // Format_BGR888
1757	};
1758
1759	#if QT_CONFIG(raster_64bit)
1760	static QRgba64 QT_FASTCALL destFetch64(QRgba64 buffer, QRasterBuffer rasterBuffer, int* x, int y, int length)
1761	{
1762	const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1763	return const_cast<QRgba64 >(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr*));
1764	}
1765
1766	static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 , QRasterBuffer rasterBuffer, int x, int y, int)
1767	{
1768	return (QRgba64 *)rasterBuffer->scanLine(y) + x;
1769	}
1770
1771	static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 buffer, QRasterBuffer , int, int, int)
1772	{
1773	return buffer;
1774	}
1775
1776	static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
1777	{
1778	nullptr, // Format_Invalid
1779	nullptr, // Format_Mono,
1780	nullptr, // Format_MonoLSB
1781	nullptr, // Format_Indexed8
1782	destFetch64, // Format_RGB32
1783	destFetch64, // Format_ARGB32,
1784	destFetch64, // Format_ARGB32_Premultiplied
1785	destFetch64, // Format_RGB16
1786	destFetch64, // Format_ARGB8565_Premultiplied
1787	destFetch64, // Format_RGB666
1788	destFetch64, // Format_ARGB6666_Premultiplied
1789	destFetch64, // Format_RGB555
1790	destFetch64, // Format_ARGB8555_Premultiplied
1791	destFetch64, // Format_RGB888
1792	destFetch64, // Format_RGB444
1793	destFetch64, // Format_ARGB4444_Premultiplied
1794	destFetch64, // Format_RGBX8888
1795	destFetch64, // Format_RGBA8888
1796	destFetch64, // Format_RGBA8888_Premultiplied
1797	destFetch64, // Format_BGR30
1798	destFetch64, // Format_A2BGR30_Premultiplied
1799	destFetch64, // Format_RGB30
1800	destFetch64, // Format_A2RGB30_Premultiplied
1801	destFetch64, // Format_Alpha8
1802	destFetch64, // Format_Grayscale8
1803	destFetchRGB64, // Format_RGBX64
1804	destFetch64, // Format_RGBA64
1805	destFetchRGB64, // Format_RGBA64_Premultiplied
1806	destFetch64, // Format_Grayscale16
1807	destFetch64, // Format_BGR888
1808	};
1809	#endif
1810
1811	/*
1812	Returns the color in the mono destination color table
1813	that is the "nearest" to /color/.
1814	*/
1815	static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf)
1816	{
1817	QRgb color_0 = qPremultiply(x: rbuf->destColor0);
1818	QRgb color_1 = qPremultiply(x: rbuf->destColor1);
1819	color = qPremultiply(x: color);
1820
1821	int r = qRed(rgb: color);
1822	int g = qGreen(rgb: color);
1823	int b = qBlue(rgb: color);
1824	int rx, gx, bx;
1825	int dist_0, dist_1;
1826
1827	rx = r - qRed(rgb: color_0);
1828	gx = g - qGreen(rgb: color_0);
1829	bx = b - qBlue(rgb: color_0);
1830	dist_0 = rxrx + gxgx + bx*bx;
1831
1832	rx = r - qRed(rgb: color_1);
1833	gx = g - qGreen(rgb: color_1);
1834	bx = b - qBlue(rgb: color_1);
1835	dist_1 = rxrx + gxgx + bx*bx;
1836
1837	if (dist_0 < dist_1)
1838	return color_0;
1839	return color_1;
1840	}
1841
1842	/*
1843	Destination store.
1844	*/
1845
1846	static void QT_FASTCALL destStoreMono(QRasterBuffer rasterBuffer, int* x, int y, const uint buffer, int* length)
1847	{
1848	uchar Q_DECL_RESTRICT data = (uchar )rasterBuffer->scanLine(y);
1849	if (rasterBuffer->monoDestinationWithClut) {
1850	for (int i = `0`; i < length; ++i) {
1851	if (buffer[i] == rasterBuffer->destColor0) {
1852	data[x >> `3`] &= ~(`0x80` >> (x & `7`));
1853	} else if (buffer[i] == rasterBuffer->destColor1) {
1854	data[x >> `3`] \|= `0x80` >> (x & `7`);
1855	} else if (findNearestColor(color: buffer[i], rbuf: rasterBuffer) == rasterBuffer->destColor0) {
1856	data[x >> `3`] &= ~(`0x80` >> (x & `7`));
1857	} else {
1858	data[x >> `3`] \|= `0x80` >> (x & `7`);
1859	}
1860	++x;
1861	}
1862	} else {
1863	for (int i = `0`; i < length; ++i) {
1864	if (qGray(rgb: buffer[i]) < int(qt_bayer_matrix[y & `15`][x & `15`]))
1865	data[x >> `3`] \|= `0x80` >> (x & `7`);
1866	else
1867	data[x >> `3`] &= ~(`0x80` >> (x & `7`));
1868	++x;
1869	}
1870	}
1871	}
1872
1873	static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer rasterBuffer, int* x, int y, const uint buffer, int* length)
1874	{
1875	uchar Q_DECL_RESTRICT data = (uchar )rasterBuffer->scanLine(y);
1876	if (rasterBuffer->monoDestinationWithClut) {
1877	for (int i = `0`; i < length; ++i) {
1878	if (buffer[i] == rasterBuffer->destColor0) {
1879	data[x >> `3`] &= ~(`1` << (x & `7`));
1880	} else if (buffer[i] == rasterBuffer->destColor1) {
1881	data[x >> `3`] \|= `1` << (x & `7`);
1882	} else if (findNearestColor(color: buffer[i], rbuf: rasterBuffer) == rasterBuffer->destColor0) {
1883	data[x >> `3`] &= ~(`1` << (x & `7`));
1884	} else {
1885	data[x >> `3`] \|= `1` << (x & `7`);
1886	}
1887	++x;
1888	}
1889	} else {
1890	for (int i = `0`; i < length; ++i) {
1891	if (qGray(rgb: buffer[i]) < int(qt_bayer_matrix[y & `15`][x & `15`]))
1892	data[x >> `3`] \|= `1` << (x & `7`);
1893	else
1894	data[x >> `3`] &= ~(`1` << (x & `7`));
1895	++x;
1896	}
1897	}
1898	}
1899
1900	static void QT_FASTCALL destStoreRGB16(QRasterBuffer rasterBuffer, int* x, int y, const uint buffer, int* length)
1901	{
1902	quint16 data = (quint16)rasterBuffer->scanLine(y) + x;
1903	for (int i = `0`; i < length; ++i)
1904	data[i] = qConvertRgb32To16(c: buffer[i]);
1905	}
1906
1907	static void QT_FASTCALL destStore(QRasterBuffer rasterBuffer, int* x, int y, const uint buffer, int* length)
1908	{
1909	const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1910	ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
1911	if (!layout->premultiplied && !layout->hasAlphaChannel)
1912	store = layout->storeFromRGB32;
1913	uchar *dest = rasterBuffer->scanLine(y);
1914	store(dest, buffer, x, length, nullptr, nullptr);
1915	}
1916
1917	static DestStoreProc destStoreProc[QImage::NImageFormats] =
1918	{
1919	nullptr, // Format_Invalid
1920	destStoreMono, // Format_Mono,
1921	destStoreMonoLsb, // Format_MonoLSB
1922	nullptr, // Format_Indexed8
1923	nullptr, // Format_RGB32
1924	destStore, // Format_ARGB32,
1925	nullptr, // Format_ARGB32_Premultiplied
1926	destStoreRGB16, // Format_RGB16
1927	destStore, // Format_ARGB8565_Premultiplied
1928	destStore, // Format_RGB666
1929	destStore, // Format_ARGB6666_Premultiplied
1930	destStore, // Format_RGB555
1931	destStore, // Format_ARGB8555_Premultiplied
1932	destStore, // Format_RGB888
1933	destStore, // Format_RGB444
1934	destStore, // Format_ARGB4444_Premultiplied
1935	destStore, // Format_RGBX8888
1936	destStore, // Format_RGBA8888
1937	destStore, // Format_RGBA8888_Premultiplied
1938	destStore, // Format_BGR30
1939	destStore, // Format_A2BGR30_Premultiplied
1940	destStore, // Format_RGB30
1941	destStore, // Format_A2RGB30_Premultiplied
1942	destStore, // Format_Alpha8
1943	destStore, // Format_Grayscale8
1944	destStore, // Format_RGBX64
1945	destStore, // Format_RGBA64
1946	destStore, // Format_RGBA64_Premultiplied
1947	destStore, // Format_Grayscale16
1948	destStore, // Format_BGR888
1949	};
1950
1951	#if QT_CONFIG(raster_64bit)
1952	static void QT_FASTCALL destStore64(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length)
1953	{
1954	auto store = qStoreFromRGBA64PM[rasterBuffer->format];
1955	uchar *dest = rasterBuffer->scanLine(y);
1956	store(dest, buffer, x, length, nullptr, nullptr);
1957	}
1958
1959	static void QT_FASTCALL destStore64RGBA64(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length)
1960	{
1961	QRgba64 dest = reinterpret_cast<QRgba64>(rasterBuffer->scanLine(y)) + x;
1962	for (int i = `0`; i < length; ++i) {
1963	dest[i] = buffer[i].unpremultiplied();
1964	}
1965	}
1966
1967	static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
1968	{
1969	nullptr, // Format_Invalid
1970	nullptr, // Format_Mono,
1971	nullptr, // Format_MonoLSB
1972	nullptr, // Format_Indexed8
1973	destStore64, // Format_RGB32
1974	destStore64, // Format_ARGB32,
1975	destStore64, // Format_ARGB32_Premultiplied
1976	destStore64, // Format_RGB16
1977	destStore64, // Format_ARGB8565_Premultiplied
1978	destStore64, // Format_RGB666
1979	destStore64, // Format_ARGB6666_Premultiplied
1980	destStore64, // Format_RGB555
1981	destStore64, // Format_ARGB8555_Premultiplied
1982	destStore64, // Format_RGB888
1983	destStore64, // Format_RGB444
1984	destStore64, // Format_ARGB4444_Premultiplied
1985	destStore64, // Format_RGBX8888
1986	destStore64, // Format_RGBA8888
1987	destStore64, // Format_RGBA8888_Premultiplied
1988	destStore64, // Format_BGR30
1989	destStore64, // Format_A2BGR30_Premultiplied
1990	destStore64, // Format_RGB30
1991	destStore64, // Format_A2RGB30_Premultiplied
1992	destStore64, // Format_Alpha8
1993	destStore64, // Format_Grayscale8
1994	nullptr, // Format_RGBX64
1995	destStore64RGBA64, // Format_RGBA64
1996	nullptr, // Format_RGBA64_Premultiplied
1997	destStore64, // Format_Grayscale16
1998	destStore64, // Format_BGR888
1999	};
2000	#endif
2001
2002	/*
2003	Source fetches
2004
2005	This is a bit more complicated, as we need several fetch routines for every surface type
2006
2007	We need 5 fetch methods per surface type:
2008	untransformed
2009	transformed (tiled and not tiled)
2010	transformed bilinear (tiled and not tiled)
2011
2012	We don't need bounds checks for untransformed, but we need them for the other ones.
2013
2014	The generic implementation does pixel by pixel fetches
2015	*/
2016
2017	enum TextureBlendType {
2018	BlendUntransformed,
2019	BlendTiled,
2020	BlendTransformed,
2021	BlendTransformedTiled,
2022	BlendTransformedBilinear,
2023	BlendTransformedBilinearTiled,
2024	NBlendTypes
2025	};
2026
2027	static const uint QT_FASTCALL fetchUntransformed(uint buffer, const Operator *,
2028	const QSpanData data, int* y, int x, int length)
2029	{
2030	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2031	return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2032	}
2033
2034	static const uint QT_FASTCALL fetchUntransformedARGB32PM(uint , const Operator *,
2035	const QSpanData data, int* y, int x, int)
2036	{
2037	const uchar *scanLine = data->texture.scanLine(y);
2038	return reinterpret_cast<const uint *>(scanLine) + x;
2039	}
2040
2041	static const uint QT_FASTCALL fetchUntransformedRGB16(uint buffer, const Operator *,
2042	const QSpanData data, int* y, int x,
2043	int length)
2044	{
2045	const quint16 scanLine = (const* quint16 *)data->texture.scanLine(y) + x;
2046	for (int i = `0`; i < length; ++i)
2047	buffer[i] = qConvertRgb16To32(c: scanLine[i]);
2048	return buffer;
2049	}
2050
2051	#if QT_CONFIG(raster_64bit)
2052	static const QRgba64 QT_FASTCALL fetchUntransformed64(QRgba64 buffer, const Operator *,
2053	const QSpanData data, int* y, int x, int length)
2054	{
2055	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2056	return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2057	}
2058
2059	static const QRgba64 QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 , const Operator *,
2060	const QSpanData data, int* y, int x, int)
2061	{
2062	const uchar *scanLine = data->texture.scanLine(y);
2063	return reinterpret_cast<const QRgba64 *>(scanLine) + x;
2064	}
2065	#endif
2066
2067	template<TextureBlendType blendType>
2068	inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
2069	{
2070	Q_STATIC_ASSERT(blendType == BlendTransformed \|\| blendType == BlendTransformedTiled);
2071	if (blendType == BlendTransformedTiled) {
2072	if (v < `0` \|\| v >= max) {
2073	v %= max;
2074	if (v < `0`) v += max;
2075	}
2076	} else {
2077	v = qBound(min: l1, val: v, max: l2);
2078	}
2079	}
2080
2081	static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data)
2082	{
2083	if (Q_UNLIKELY(!data->fast_matrix))
2084	return false;
2085
2086	qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale;
2087	qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale;
2088	qreal minc = std::min(a: fx, b: fy);
2089	qreal maxc = std::max(a: fx, b: fy);
2090	fx += std::trunc(x: data->m11 * fixed_scale) * length;
2091	fy += std::trunc(x: data->m12 * fixed_scale) * length;
2092	minc = std::min(a: minc, b: std::min(a: fx, b: fy));
2093	maxc = std::max(a: maxc, b: std::max(a: fx, b: fy));
2094
2095	return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max();
2096	}
2097
2098	template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
2099	static void QT_FASTCALL fetchTransformed_fetcher(T buffer, const* QSpanData *data,
2100	int y, int x, int length)
2101	{
2102	Q_STATIC_ASSERT(blendType == BlendTransformed \|\| blendType == BlendTransformedTiled);
2103	const QTextureData &image = data->texture;
2104
2105	const qreal cx = x + qreal(`0.5`);
2106	const qreal cy = y + qreal(`0.5`);
2107
2108	constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
2109	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2110	if (!useFetch)
2111	Q_ASSERT(layout->bpp == bpp);
2112	// When templated 'fetch' should be inlined at compile time:
2113	const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>);
2114
2115	if (canUseFastMatrixPath(cx, cy, length, data)) {
2116	// The increment pr x in the scanline
2117	int fdx = (int)(data->m11 * fixed_scale);
2118	int fdy = (int)(data->m12 * fixed_scale);
2119
2120	int fx = int((data->m21 * cy
2121	+ data->m11 * cx + data->dx) * fixed_scale);
2122	int fy = int((data->m22 * cy
2123	+ data->m12 * cx + data->dy) * fixed_scale);
2124
2125	if (fdy == `0`) { // simple scale, no rotation or shear
2126	int py = (fy >> `16`);
2127	fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, py);
2128	const uchar *src = image.scanLine(y: py);
2129
2130	int i = `0`;
2131	if (blendType == BlendTransformed) {
2132	int fastLen = length;
2133	if (fdx > `0`)
2134	fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - `1`) * fixed_scale - fx) / fdx));
2135	else if (fdx < `0`)
2136	fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
2137
2138	for (; i < fastLen; ++i) {
2139	int x1 = (fx >> `16`);
2140	int x2 = x1;
2141	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1);
2142	if (x1 == x2)
2143	break;
2144	if (useFetch)
2145	buffer[i] = fetch(src, x1);
2146	else
2147	buffer[i] = reinterpret_cast<const T*>(src)[x1];
2148	fx += fdx;
2149	}
2150
2151	for (; i < fastLen; ++i) {
2152	int px = (fx >> `16`);
2153	if (useFetch)
2154	buffer[i] = fetch(src, px);
2155	else
2156	buffer[i] = reinterpret_cast<const T*>(src)[px];
2157	fx += fdx;
2158	}
2159	}
2160
2161	for (; i < length; ++i) {
2162	int px = (fx >> `16`);
2163	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, px);
2164	if (useFetch)
2165	buffer[i] = fetch(src, px);
2166	else
2167	buffer[i] = reinterpret_cast<const T*>(src)[px];
2168	fx += fdx;
2169	}
2170	} else { // rotation or shear
2171	int i = `0`;
2172	if (blendType == BlendTransformed) {
2173	int fastLen = length;
2174	if (fdx > `0`)
2175	fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - `1`) * fixed_scale - fx) / fdx));
2176	else if (fdx < `0`)
2177	fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
2178	if (fdy > `0`)
2179	fastLen = qMin(a: fastLen, b: int((qint64(image.y2 - `1`) * fixed_scale - fy) / fdy));
2180	else if (fdy < `0`)
2181	fastLen = qMin(a: fastLen, b: int((qint64(image.y1) * fixed_scale - fy) / fdy));
2182
2183	for (; i < fastLen; ++i) {
2184	int x1 = (fx >> `16`);
2185	int y1 = (fy >> `16`);
2186	int x2 = x1;
2187	int y2 = y1;
2188	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1);
2189	fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1);
2190	if (x1 == x2 && y1 == y2)
2191	break;
2192	if (useFetch)
2193	buffer[i] = fetch(image.scanLine(y: y1), x1);
2194	else
2195	buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: y1))[x1];
2196	fx += fdx;
2197	fy += fdy;
2198	}
2199
2200	for (; i < fastLen; ++i) {
2201	int px = (fx >> `16`);
2202	int py = (fy >> `16`);
2203	if (useFetch)
2204	buffer[i] = fetch(image.scanLine(y: py), px);
2205	else
2206	buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
2207	fx += fdx;
2208	fy += fdy;
2209	}
2210	}
2211
2212	for (; i < length; ++i) {
2213	int px = (fx >> `16`);
2214	int py = (fy >> `16`);
2215	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, px);
2216	fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, py);
2217	if (useFetch)
2218	buffer[i] = fetch(image.scanLine(y: py), px);
2219	else
2220	buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
2221	fx += fdx;
2222	fy += fdy;
2223	}
2224	}
2225	} else {
2226	const qreal fdx = data->m11;
2227	const qreal fdy = data->m12;
2228	const qreal fdw = data->m13;
2229
2230	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2231	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2232	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2233
2234	T *const end = buffer + length;
2235	T *b = buffer;
2236	while (b < end) {
2237	const qreal iw = fw == `0` ? `1` : `1` / fw;
2238	const qreal tx = fx * iw;
2239	const qreal ty = fy * iw;
2240	int px = qFloor(v: tx);
2241	int py = qFloor(v: ty);
2242
2243	fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, py);
2244	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, px);
2245	if (useFetch)
2246	*b = fetch(image.scanLine(y: py), px);
2247	else
2248	b = reinterpret_cast<const* T*>(image.scanLine(y: py))[px];
2249
2250	fx += fdx;
2251	fy += fdy;
2252	fw += fdw;
2253	//force increment to avoid /0
2254	if (!fw) {
2255	fw += fdw;
2256	}
2257	++b;
2258	}
2259	}
2260	}
2261
2262	template<TextureBlendType blendType, QPixelLayout::BPP bpp>
2263	static const uint QT_FASTCALL fetchTransformed(uint buffer, const Operator , const* QSpanData *data,
2264	int y, int x, int length)
2265	{
2266	Q_STATIC_ASSERT(blendType == BlendTransformed \|\| blendType == BlendTransformedTiled);
2267	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2268	fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
2269	layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
2270	return buffer;
2271	}
2272
2273	#if QT_CONFIG(raster_64bit)
2274	template<TextureBlendType blendType> / either BlendTransformed or BlendTransformedTiled /
2275	static const QRgba64 QT_FASTCALL fetchTransformed64(QRgba64 buffer, const Operator , const* QSpanData *data,
2276	int y, int x, int length)
2277	{
2278	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2279	if (layout->bpp != QPixelLayout::BPP64) {
2280	uint buffer32[BufferSize];
2281	Q_ASSERT(length <= BufferSize);
2282	if (layout->bpp == QPixelLayout::BPP32)
2283	fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
2284	else
2285	fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
2286	return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
2287	}
2288
2289	fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length);
2290	if (data->texture.format == QImage::Format_RGBA64)
2291	convertRGBA64ToRGBA64PM(buffer, count: length);
2292	return buffer;
2293	}
2294	#endif
2295
2296	/* \internal*
2297	interpolate 4 argb pixels with the distx and disty factor.
2298	distx and disty must be between 0 and 16
2299	*/
2300	static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
2301	{
2302	uint distxy = distx * disty;
2303	//idistx disty = (16-distx) * disty = 16disty - distxy
2304	//idistx idisty = (16-distx) * (16-disty) = 1616 - 16distx -16disty + distxy
2305	uint tlrb = (tl & `0x00ff00ff`) * (`16``16` - `16`distx - `16`*disty + distxy);
2306	uint tlag = ((tl & `0xff00ff00`) >> `8`) * (`16``16` - `16`distx - `16`*disty + distxy);
2307	uint trrb = ((tr & `0x00ff00ff`) * (distx*`16` - distxy));
2308	uint trag = (((tr & `0xff00ff00`) >> `8`) * (distx*`16` - distxy));
2309	uint blrb = ((bl & `0x00ff00ff`) * (disty*`16` - distxy));
2310	uint blag = (((bl & `0xff00ff00`) >> `8`) * (disty*`16` - distxy));
2311	uint brrb = ((br & `0x00ff00ff`) * (distxy));
2312	uint brag = (((br & `0xff00ff00`) >> `8`) * (distxy));
2313	return (((tlrb + trrb + blrb + brrb) >> `8`) & `0x00ff00ff`) \| ((tlag + trag + blag + brag) & `0xff00ff00`);
2314	}
2315
2316	#if defined(__SSE2__)
2317	#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
2318	{ \
2319	const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
2320	const __m128i distx_ = _mm_slli_epi16(distx, 4); \
2321	const __m128i disty_ = _mm_slli_epi16(disty, 4); \
2322	const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
2323	const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
2324	const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
2325	\
2326	__m128i tlAG = _mm_srli_epi16(tl, 8); \
2327	__m128i tlRB = _mm_and_si128(tl, colorMask); \
2328	__m128i trAG = _mm_srli_epi16(tr, 8); \
2329	__m128i trRB = _mm_and_si128(tr, colorMask); \
2330	__m128i blAG = _mm_srli_epi16(bl, 8); \
2331	__m128i blRB = _mm_and_si128(bl, colorMask); \
2332	__m128i brAG = _mm_srli_epi16(br, 8); \
2333	__m128i brRB = _mm_and_si128(br, colorMask); \
2334	\
2335	tlAG = _mm_mullo_epi16(tlAG, idxidy); \
2336	tlRB = _mm_mullo_epi16(tlRB, idxidy); \
2337	trAG = _mm_mullo_epi16(trAG, dxidy); \
2338	trRB = _mm_mullo_epi16(trRB, dxidy); \
2339	blAG = _mm_mullo_epi16(blAG, idxdy); \
2340	blRB = _mm_mullo_epi16(blRB, idxdy); \
2341	brAG = _mm_mullo_epi16(brAG, dxdy); \
2342	brRB = _mm_mullo_epi16(brRB, dxdy); \
2343	\
2344	/* Add the values, and shift to only keep 8 significant bits per colors */ \
2345	__m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
2346	__m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
2347	rAG = _mm_andnot_si128(colorMask, rAG); \
2348	rRB = _mm_srli_epi16(rRB, 8); \
2349	_mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
2350	}
2351	#endif
2352
2353	#if defined(__ARM_NEON__)
2354	#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
2355	{ \
2356	const int16x8_t dxdy = vmulq_s16(distx, disty); \
2357	const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
2358	const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
2359	const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
2360	const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
2361	\
2362	int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
2363	int16x8_t tlRB = vandq_s16(tl, colorMask); \
2364	int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
2365	int16x8_t trRB = vandq_s16(tr, colorMask); \
2366	int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
2367	int16x8_t blRB = vandq_s16(bl, colorMask); \
2368	int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
2369	int16x8_t brRB = vandq_s16(br, colorMask); \
2370	\
2371	int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
2372	int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
2373	rAG = vmlaq_s16(rAG, trAG, dxidy); \
2374	rRB = vmlaq_s16(rRB, trRB, dxidy); \
2375	rAG = vmlaq_s16(rAG, blAG, idxdy); \
2376	rRB = vmlaq_s16(rRB, blRB, idxdy); \
2377	rAG = vmlaq_s16(rAG, brAG, dxdy); \
2378	rRB = vmlaq_s16(rRB, brRB, dxdy); \
2379	\
2380	rAG = vandq_s16(invColorMask, rAG); \
2381	rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
2382	vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
2383	}
2384	#endif
2385
2386	template<TextureBlendType blendType>
2387	void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
2388
2389	template<>
2390	inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2)
2391	{
2392	v1 %= max;
2393	if (v1 < `0`)
2394	v1 += max;
2395	v2 = v1 + `1`;
2396	if (v2 == max)
2397	v2 = `0`;
2398	Q_ASSERT(v1 >= `0` && v1 < max);
2399	Q_ASSERT(v2 >= `0` && v2 < max);
2400	}
2401
2402	template<>
2403	inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2)
2404	{
2405	if (v1 < l1)
2406	v2 = v1 = l1;
2407	else if (v1 >= l2)
2408	v2 = v1 = l2;
2409	else
2410	v2 = v1 + `1`;
2411	Q_ASSERT(v1 >= l1 && v1 <= l2);
2412	Q_ASSERT(v2 >= l1 && v2 <= l2);
2413	}
2414
2415	enum FastTransformTypes {
2416	SimpleScaleTransform,
2417	UpscaleTransform,
2418	DownscaleTransform,
2419	RotateTransform,
2420	FastRotateTransform,
2421	NFastTransformTypes
2422	};
2423
2424	// Completes the partial interpolation stored in IntermediateBuffer.
2425	// by performing the x-axis interpolation and joining the RB and AG buffers.
2426	static void QT_FASTCALL intermediate_adder(uint b, uint end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
2427	{
2428	#if defined(QT_COMPILER_SUPPORTS_AVX2)
2429	extern void QT_FASTCALL intermediate_adder_avx2(uint b, uint end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
2430	if (qCpuHasFeature(ArchHaswell))
2431	return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
2432	#endif
2433
2434	// Switch to intermediate buffer coordinates
2435	fx -= offset * fixed_scale;
2436
2437	while (b < end) {
2438	const int x = (fx >> `16`);
2439
2440	const uint distx = (fx & `0x0000ffff`) >> `8`;
2441	const uint idistx = `256` - distx;
2442	const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + `1`] * distx) & `0xff00ff00`;
2443	const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + `1`] * distx) & `0xff00ff00`;
2444	*b = (rb >> `8`) \| ag;
2445	b++;
2446	fx += fdx;
2447	}
2448	fx += offset * fixed_scale;
2449	}
2450
2451	typedef void (QT_FASTCALL BilinearFastTransformHelper)(uint b, uint end, const* QTextureData &image, int &fx, int &fy, int fdx, int fdy);
2452
2453	template<TextureBlendType blendType>
2454	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint b, uint end, const QTextureData &image,
2455	int &fx, int &fy, int fdx, int /fdy/)
2456	{
2457	int y1 = (fy >> `16`);
2458	int y2;
2459	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
2460	const uint s1 = (const* uint *)image.scanLine(y: y1);
2461	const uint s2 = (const* uint *)image.scanLine(y: y2);
2462
2463	const int disty = (fy & `0x0000ffff`) >> `8`;
2464	const int idisty = `256` - disty;
2465	const int length = end - b;
2466
2467	// The intermediate buffer is generated in the positive direction
2468	const int adjust = (fdx < `0`) ? fdx * length : `0`;
2469	const int offset = (fx + adjust) >> `16`;
2470	int x = offset;
2471
2472	IntermediateBuffer intermediate;
2473	// count is the size used in the intermediate.buffer.
2474	int count = (qint64(length) * qAbs(t: fdx) + fixed_scale - `1`) / fixed_scale + `2`;
2475	// length is supposed to be <= BufferSize either because data->m11 < 1 or
2476	// data->m11 < 2, and any larger buffers split
2477	Q_ASSERT(count <= BufferSize + `2`);
2478	int f = `0`;
2479	int lim = count;
2480	if (blendType == BlendTransformedBilinearTiled) {
2481	x %= image.width;
2482	if (x < `0`) x += image.width;
2483	} else {
2484	lim = qMin(a: count, b: image.x2 - x);
2485	if (x < image.x1) {
2486	Q_ASSERT(x < image.x2);
2487	uint t = s1[image.x1];
2488	uint b = s2[image.x1];
2489	quint32 rb = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
2490	quint32 ag = ((((t>>`8`) & `0xff00ff`) * idisty + ((b>>`8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
2491	do {
2492	intermediate.buffer_rb[f] = rb;
2493	intermediate.buffer_ag[f] = ag;
2494	f++;
2495	x++;
2496	} while (x < image.x1 && f < lim);
2497	}
2498	}
2499
2500	if (blendType != BlendTransformedBilinearTiled) {
2501	#if defined(__SSE2__)
2502	const __m128i disty_ = _mm_set1_epi16(w: disty);
2503	const __m128i idisty_ = _mm_set1_epi16(w: idisty);
2504	const __m128i colorMask = _mm_set1_epi32(i: `0x00ff00ff`);
2505
2506	lim -= `3`;
2507	for (; f < lim; x += `4`, f += `4`) {
2508	// Load 4 pixels from s1, and split the alpha-green and red-blue component
2509	__m128i top = _mm_loadu_si128(p: (const __m128i)((const* uint *)(s1)+x));
2510	__m128i topAG = _mm_srli_epi16(a: top, count: `8`);
2511	__m128i topRB = _mm_and_si128(a: top, b: colorMask);
2512	// Multiplies each color component by idisty
2513	topAG = _mm_mullo_epi16 (a: topAG, b: idisty_);
2514	topRB = _mm_mullo_epi16 (a: topRB, b: idisty_);
2515
2516	// Same for the s2 vector
2517	__m128i bottom = _mm_loadu_si128(p: (const __m128i)((const* uint *)(s2)+x));
2518	__m128i bottomAG = _mm_srli_epi16(a: bottom, count: `8`);
2519	__m128i bottomRB = _mm_and_si128(a: bottom, b: colorMask);
2520	bottomAG = _mm_mullo_epi16 (a: bottomAG, b: disty_);
2521	bottomRB = _mm_mullo_epi16 (a: bottomRB, b: disty_);
2522
2523	// Add the values, and shift to only keep 8 significant bits per colors
2524	__m128i rAG =_mm_add_epi16(a: topAG, b: bottomAG);
2525	rAG = _mm_srli_epi16(a: rAG, count: `8`);
2526	_mm_storeu_si128(p: (__m128i*)(&intermediate.buffer_ag[f]), b: rAG);
2527	__m128i rRB =_mm_add_epi16(a: topRB, b: bottomRB);
2528	rRB = _mm_srli_epi16(a: rRB, count: `8`);
2529	_mm_storeu_si128(p: (__m128i*)(&intermediate.buffer_rb[f]), b: rRB);
2530	}
2531	#elif defined(__ARM_NEON__)
2532	const int16x8_t disty_ = vdupq_n_s16(disty);
2533	const int16x8_t idisty_ = vdupq_n_s16(idisty);
2534	const int16x8_t colorMask = vdupq_n_s16(`0x00ff`);
2535
2536	lim -= `3`;
2537	for (; f < lim; x += `4`, f += `4`) {
2538	// Load 4 pixels from s1, and split the alpha-green and red-blue component
2539	int16x8_t top = vld1q_s16((int16_t)((const* uint *)(s1)+x));
2540	int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), `8`));
2541	int16x8_t topRB = vandq_s16(top, colorMask);
2542	// Multiplies each color component by idisty
2543	topAG = vmulq_s16(topAG, idisty_);
2544	topRB = vmulq_s16(topRB, idisty_);
2545
2546	// Same for the s2 vector
2547	int16x8_t bottom = vld1q_s16((int16_t)((const* uint *)(s2)+x));
2548	int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), `8`));
2549	int16x8_t bottomRB = vandq_s16(bottom, colorMask);
2550	bottomAG = vmulq_s16(bottomAG, disty_);
2551	bottomRB = vmulq_s16(bottomRB, disty_);
2552
2553	// Add the values, and shift to only keep 8 significant bits per colors
2554	int16x8_t rAG = vaddq_s16(topAG, bottomAG);
2555	rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), `8`));
2556	vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG);
2557	int16x8_t rRB = vaddq_s16(topRB, bottomRB);
2558	rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), `8`));
2559	vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB);
2560	}
2561	#endif
2562	}
2563	for (; f < count; f++) { // Same as above but without simd
2564	if (blendType == BlendTransformedBilinearTiled) {
2565	if (x >= image.width) x -= image.width;
2566	} else {
2567	x = qMin(a: x, b: image.x2 - `1`);
2568	}
2569
2570	uint t = s1[x];
2571	uint b = s2[x];
2572
2573	intermediate.buffer_rb[f] = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
2574	intermediate.buffer_ag[f] = ((((t>>`8`) & `0xff00ff`) * idisty + ((b>>`8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
2575	x++;
2576	}
2577
2578	// Now interpolate the values from the intermediate.buffer to get the final result.
2579	intermediate_adder(b, end, intermediate, offset, fx, fdx);
2580	}
2581
2582	template<TextureBlendType blendType>
2583	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint b, uint end, const QTextureData &image,
2584	int &fx, int &fy, int fdx, int /fdy/)
2585	{
2586	int y1 = (fy >> `16`);
2587	int y2;
2588	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
2589	const uint s1 = (const* uint *)image.scanLine(y: y1);
2590	const uint s2 = (const* uint *)image.scanLine(y: y2);
2591	const int disty = (fy & `0x0000ffff`) >> `8`;
2592
2593	if (blendType != BlendTransformedBilinearTiled) {
2594	const qint64 min_fx = qint64(image.x1) * fixed_scale;
2595	const qint64 max_fx = qint64(image.x2 - `1`) * fixed_scale;
2596	while (b < end) {
2597	int x1 = (fx >> `16`);
2598	int x2;
2599	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
2600	if (x1 != x2)
2601	break;
2602	uint top = s1[x1];
2603	uint bot = s2[x1];
2604	*b = INTERPOLATE_PIXEL_256(x: top, a: `256` - disty, y: bot, b: disty);
2605	fx += fdx;
2606	++b;
2607	}
2608	uint *boundedEnd = end;
2609	if (fdx > `0`)
2610	boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
2611	else if (fdx < `0`)
2612	boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
2613
2614	// A fast middle part without boundary checks
2615	while (b < boundedEnd) {
2616	int x = (fx >> `16`);
2617	int distx = (fx & `0x0000ffff`) >> `8`;
2618	*b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx, disty);
2619	fx += fdx;
2620	++b;
2621	}
2622	}
2623
2624	while (b < end) {
2625	int x1 = (fx >> `16`);
2626	int x2;
2627	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1` , x1, x2);
2628	uint tl = s1[x1];
2629	uint tr = s1[x2];
2630	uint bl = s2[x1];
2631	uint br = s2[x2];
2632	int distx = (fx & `0x0000ffff`) >> `8`;
2633	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2634
2635	fx += fdx;
2636	++b;
2637	}
2638	}
2639
2640	template<TextureBlendType blendType>
2641	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint b, uint end, const QTextureData &image,
2642	int &fx, int &fy, int fdx, int /fdy/)
2643	{
2644	int y1 = (fy >> `16`);
2645	int y2;
2646	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
2647	const uint s1 = (const* uint *)image.scanLine(y: y1);
2648	const uint s2 = (const* uint *)image.scanLine(y: y2);
2649	const int disty8 = (fy & `0x0000ffff`) >> `8`;
2650	const int disty4 = (disty8 + `0x08`) >> `4`;
2651
2652	if (blendType != BlendTransformedBilinearTiled) {
2653	const qint64 min_fx = qint64(image.x1) * fixed_scale;
2654	const qint64 max_fx = qint64(image.x2 - `1`) * fixed_scale;
2655	while (b < end) {
2656	int x1 = (fx >> `16`);
2657	int x2;
2658	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
2659	if (x1 != x2)
2660	break;
2661	uint top = s1[x1];
2662	uint bot = s2[x1];
2663	*b = INTERPOLATE_PIXEL_256(x: top, a: `256` - disty8, y: bot, b: disty8);
2664	fx += fdx;
2665	++b;
2666	}
2667	uint *boundedEnd = end;
2668	if (fdx > `0`)
2669	boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
2670	else if (fdx < `0`)
2671	boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
2672	// A fast middle part without boundary checks
2673	#if defined(__SSE2__)
2674	const __m128i colorMask = _mm_set1_epi32(i: `0x00ff00ff`);
2675	const __m128i v_256 = _mm_set1_epi16(w: `256`);
2676	const __m128i v_disty = _mm_set1_epi16(w: disty4);
2677	const __m128i v_fdx = _mm_set1_epi32(i: fdx*`4`);
2678	const __m128i v_fx_r = _mm_set1_epi32(i: `0x8`);
2679	__m128i v_fx = _mm_setr_epi32(i0: fx, i1: fx + fdx, i2: fx + fdx + fdx, i3: fx + fdx + fdx + fdx);
2680
2681	while (b < boundedEnd - `3`) {
2682	__m128i offset = _mm_srli_epi32(a: v_fx, count: `16`);
2683	const int offset0 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, `4`);
2684	const int offset1 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, `4`);
2685	const int offset2 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, `4`);
2686	const int offset3 = _mm_cvtsi128_si32(a: offset);
2687	const __m128i tl = _mm_setr_epi32(i0: s1[offset0], i1: s1[offset1], i2: s1[offset2], i3: s1[offset3]);
2688	const __m128i tr = _mm_setr_epi32(i0: s1[offset0 + `1`], i1: s1[offset1 + `1`], i2: s1[offset2 + `1`], i3: s1[offset3 + `1`]);
2689	const __m128i bl = _mm_setr_epi32(i0: s2[offset0], i1: s2[offset1], i2: s2[offset2], i3: s2[offset3]);
2690	const __m128i br = _mm_setr_epi32(i0: s2[offset0 + `1`], i1: s2[offset1 + `1`], i2: s2[offset2 + `1`], i3: s2[offset3 + `1`]);
2691
2692	__m128i v_distx = _mm_srli_epi16(a: v_fx, count: `8`);
2693	v_distx = _mm_srli_epi16(a: _mm_add_epi32(a: v_distx, b: v_fx_r), count: `4`);
2694	v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
2695	v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
2696
2697	interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
2698	b += `4`;
2699	v_fx = _mm_add_epi32(a: v_fx, b: v_fdx);
2700	}
2701	fx = _mm_cvtsi128_si32(a: v_fx);
2702	#elif defined(__ARM_NEON__)
2703	const int16x8_t colorMask = vdupq_n_s16(`0x00ff`);
2704	const int16x8_t invColorMask = vmvnq_s16(colorMask);
2705	const int16x8_t v_256 = vdupq_n_s16(`256`);
2706	const int16x8_t v_disty = vdupq_n_s16(disty4);
2707	const int16x8_t v_disty_ = vshlq_n_s16(v_disty, `4`);
2708	int32x4_t v_fdx = vdupq_n_s32(fdx*`4`);
2709
2710	int32x4_t v_fx = vmovq_n_s32(fx);
2711	v_fx = vsetq_lane_s32(fx + fdx, v_fx, `1`);
2712	v_fx = vsetq_lane_s32(fx + fdx * `2`, v_fx, `2`);
2713	v_fx = vsetq_lane_s32(fx + fdx * `3`, v_fx, `3`);
2714
2715	const int32x4_t v_ffff_mask = vdupq_n_s32(`0x0000ffff`);
2716	const int32x4_t v_fx_r = vdupq_n_s32(`0x0800`);
2717
2718	while (b < boundedEnd - `3`) {
2719	uint32x4x2_t v_top, v_bot;
2720
2721	int x1 = (fx >> `16`);
2722	fx += fdx;
2723	v_top = vld2q_lane_u32(s1 + x1, v_top, `0`);
2724	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `0`);
2725	x1 = (fx >> `16`);
2726	fx += fdx;
2727	v_top = vld2q_lane_u32(s1 + x1, v_top, `1`);
2728	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `1`);
2729	x1 = (fx >> `16`);
2730	fx += fdx;
2731	v_top = vld2q_lane_u32(s1 + x1, v_top, `2`);
2732	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `2`);
2733	x1 = (fx >> `16`);
2734	fx += fdx;
2735	v_top = vld2q_lane_u32(s1 + x1, v_top, `3`);
2736	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `3`);
2737
2738	int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), `12`);
2739	v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, `16`));
2740
2741	interpolate_4_pixels_16_neon(
2742	vreinterpretq_s16_u32(v_top.val[`0`]), vreinterpretq_s16_u32(v_top.val[`1`]),
2743	vreinterpretq_s16_u32(v_bot.val[`0`]), vreinterpretq_s16_u32(v_bot.val[`1`]),
2744	vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
2745	colorMask, invColorMask, v_256, b);
2746	b+=`4`;
2747	v_fx = vaddq_s32(v_fx, v_fdx);
2748	}
2749	#endif
2750	while (b < boundedEnd) {
2751	int x = (fx >> `16`);
2752	if (hasFastInterpolate4()) {
2753	int distx8 = (fx & `0x0000ffff`) >> `8`;
2754	*b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx: distx8, disty: disty8);
2755	} else {
2756	int distx4 = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
2757	*b = interpolate_4_pixels_16(tl: s1[x], tr: s1[x + `1`], bl: s2[x], br: s2[x + `1`], distx: distx4, disty: disty4);
2758	}
2759	fx += fdx;
2760	++b;
2761	}
2762	}
2763
2764	while (b < end) {
2765	int x1 = (fx >> `16`);
2766	int x2;
2767	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
2768	uint tl = s1[x1];
2769	uint tr = s1[x2];
2770	uint bl = s2[x1];
2771	uint br = s2[x2];
2772	if (hasFastInterpolate4()) {
2773	int distx8 = (fx & `0x0000ffff`) >> `8`;
2774	*b = interpolate_4_pixels(tl, tr, bl, br, distx: distx8, disty: disty8);
2775	} else {
2776	int distx4 = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
2777	*b = interpolate_4_pixels_16(tl, tr, bl, br, distx: distx4, disty: disty4);
2778	}
2779	fx += fdx;
2780	++b;
2781	}
2782	}
2783
2784	template<TextureBlendType blendType>
2785	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint b, uint end, const QTextureData &image,
2786	int &fx, int &fy, int fdx, int fdy)
2787	{
2788	// if we are zooming more than 8 times, we use 8bit precision for the position.
2789	while (b < end) {
2790	int x1 = (fx >> `16`);
2791	int x2;
2792	int y1 = (fy >> `16`);
2793	int y2;
2794
2795	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
2796	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
2797
2798	const uint s1 = (const* uint *)image.scanLine(y: y1);
2799	const uint s2 = (const* uint *)image.scanLine(y: y2);
2800
2801	uint tl = s1[x1];
2802	uint tr = s1[x2];
2803	uint bl = s2[x1];
2804	uint br = s2[x2];
2805
2806	int distx = (fx & `0x0000ffff`) >> `8`;
2807	int disty = (fy & `0x0000ffff`) >> `8`;
2808
2809	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2810
2811	fx += fdx;
2812	fy += fdy;
2813	++b;
2814	}
2815	}
2816
2817	template<TextureBlendType blendType>
2818	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint b, uint end, const QTextureData &image,
2819	int &fx, int &fy, int fdx, int fdy)
2820	{
2821	//we are zooming less than 8x, use 4bit precision
2822	if (blendType != BlendTransformedBilinearTiled) {
2823	const qint64 min_fx = qint64(image.x1) * fixed_scale;
2824	const qint64 max_fx = qint64(image.x2 - `1`) * fixed_scale;
2825	const qint64 min_fy = qint64(image.y1) * fixed_scale;
2826	const qint64 max_fy = qint64(image.y2 - `1`) * fixed_scale;
2827	// first handle the possibly bounded part in the beginning
2828	while (b < end) {
2829	int x1 = (fx >> `16`);
2830	int x2;
2831	int y1 = (fy >> `16`);
2832	int y2;
2833	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
2834	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
2835	if (x1 != x2 && y1 != y2)
2836	break;
2837	const uint s1 = (const* uint *)image.scanLine(y: y1);
2838	const uint s2 = (const* uint *)image.scanLine(y: y2);
2839	uint tl = s1[x1];
2840	uint tr = s1[x2];
2841	uint bl = s2[x1];
2842	uint br = s2[x2];
2843	if (hasFastInterpolate4()) {
2844	int distx = (fx & `0x0000ffff`) >> `8`;
2845	int disty = (fy & `0x0000ffff`) >> `8`;
2846	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2847	} else {
2848	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
2849	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
2850	*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
2851	}
2852	fx += fdx;
2853	fy += fdy;
2854	++b;
2855	}
2856	uint *boundedEnd = end;
2857	if (fdx > `0`)
2858	boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
2859	else if (fdx < `0`)
2860	boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
2861	if (fdy > `0`)
2862	boundedEnd = qMin(a: boundedEnd, b: b + (max_fy - fy) / fdy);
2863	else if (fdy < `0`)
2864	boundedEnd = qMin(a: boundedEnd, b: b + (min_fy - fy) / fdy);
2865
2866	// until boundedEnd we can now have a fast middle part without boundary checks
2867	#if defined(__SSE2__)
2868	const __m128i colorMask = _mm_set1_epi32(i: `0x00ff00ff`);
2869	const __m128i v_256 = _mm_set1_epi16(w: `256`);
2870	const __m128i v_fdx = _mm_set1_epi32(i: fdx*`4`);
2871	const __m128i v_fdy = _mm_set1_epi32(i: fdy*`4`);
2872	const __m128i v_fxy_r = _mm_set1_epi32(i: `0x8`);
2873	__m128i v_fx = _mm_setr_epi32(i0: fx, i1: fx + fdx, i2: fx + fdx + fdx, i3: fx + fdx + fdx + fdx);
2874	__m128i v_fy = _mm_setr_epi32(i0: fy, i1: fy + fdy, i2: fy + fdy + fdy, i3: fy + fdy + fdy + fdy);
2875
2876	const uchar *textureData = image.imageData;
2877	const qsizetype bytesPerLine = image.bytesPerLine;
2878	const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/`4`), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
2879
2880	while (b < boundedEnd - `3`) {
2881	const __m128i vy = _mm_packs_epi32(a: _mm_srli_epi32(a: v_fy, count: `16`), b: _mm_setzero_si128());
2882	// 4x16bit 4x16bit -> 4x32bit*
2883	__m128i offset = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vy, b: vbpl), b: _mm_mulhi_epi16(a: vy, b: vbpl));
2884	offset = _mm_add_epi32(a: offset, b: _mm_srli_epi32(a: v_fx, count: `16`));
2885	const int offset0 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, `4`);
2886	const int offset1 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, `4`);
2887	const int offset2 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, `4`);
2888	const int offset3 = _mm_cvtsi128_si32(a: offset);
2889	const uint topData = (const* uint *)(textureData);
2890	const __m128i tl = _mm_setr_epi32(i0: topData[offset0], i1: topData[offset1], i2: topData[offset2], i3: topData[offset3]);
2891	const __m128i tr = _mm_setr_epi32(i0: topData[offset0 + `1`], i1: topData[offset1 + `1`], i2: topData[offset2 + `1`], i3: topData[offset3 + `1`]);
2892	const uint bottomData = (const* uint *)(textureData + bytesPerLine);
2893	const __m128i bl = _mm_setr_epi32(i0: bottomData[offset0], i1: bottomData[offset1], i2: bottomData[offset2], i3: bottomData[offset3]);
2894	const __m128i br = _mm_setr_epi32(i0: bottomData[offset0 + `1`], i1: bottomData[offset1 + `1`], i2: bottomData[offset2 + `1`], i3: bottomData[offset3 + `1`]);
2895
2896	__m128i v_distx = _mm_srli_epi16(a: v_fx, count: `8`);
2897	__m128i v_disty = _mm_srli_epi16(a: v_fy, count: `8`);
2898	v_distx = _mm_srli_epi16(a: _mm_add_epi32(a: v_distx, b: v_fxy_r), count: `4`);
2899	v_disty = _mm_srli_epi16(a: _mm_add_epi32(a: v_disty, b: v_fxy_r), count: `4`);
2900	v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
2901	v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
2902	v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
2903	v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
2904
2905	interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
2906	b += `4`;
2907	v_fx = _mm_add_epi32(a: v_fx, b: v_fdx);
2908	v_fy = _mm_add_epi32(a: v_fy, b: v_fdy);
2909	}
2910	fx = _mm_cvtsi128_si32(a: v_fx);
2911	fy = _mm_cvtsi128_si32(a: v_fy);
2912	#elif defined(__ARM_NEON__)
2913	const int16x8_t colorMask = vdupq_n_s16(`0x00ff`);
2914	const int16x8_t invColorMask = vmvnq_s16(colorMask);
2915	const int16x8_t v_256 = vdupq_n_s16(`256`);
2916	int32x4_t v_fdx = vdupq_n_s32(fdx * `4`);
2917	int32x4_t v_fdy = vdupq_n_s32(fdy * `4`);
2918
2919	const uchar *textureData = image.imageData;
2920	const int bytesPerLine = image.bytesPerLine;
2921
2922	int32x4_t v_fx = vmovq_n_s32(fx);
2923	int32x4_t v_fy = vmovq_n_s32(fy);
2924	v_fx = vsetq_lane_s32(fx + fdx, v_fx, `1`);
2925	v_fy = vsetq_lane_s32(fy + fdy, v_fy, `1`);
2926	v_fx = vsetq_lane_s32(fx + fdx * `2`, v_fx, `2`);
2927	v_fy = vsetq_lane_s32(fy + fdy * `2`, v_fy, `2`);
2928	v_fx = vsetq_lane_s32(fx + fdx * `3`, v_fx, `3`);
2929	v_fy = vsetq_lane_s32(fy + fdy * `3`, v_fy, `3`);
2930
2931	const int32x4_t v_ffff_mask = vdupq_n_s32(`0x0000ffff`);
2932	const int32x4_t v_round = vdupq_n_s32(`0x0800`);
2933
2934	while (b < boundedEnd - `3`) {
2935	uint32x4x2_t v_top, v_bot;
2936
2937	int x1 = (fx >> `16`);
2938	int y1 = (fy >> `16`);
2939	fx += fdx; fy += fdy;
2940	const uchar sl = textureData + bytesPerLine y1;
2941	const uint s1 = reinterpret_cast<const* uint *>(sl);
2942	const uint s2 = reinterpret_cast<const* uint *>(sl + bytesPerLine);
2943	v_top = vld2q_lane_u32(s1 + x1, v_top, `0`);
2944	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `0`);
2945	x1 = (fx >> `16`);
2946	y1 = (fy >> `16`);
2947	fx += fdx; fy += fdy;
2948	sl = textureData + bytesPerLine * y1;
2949	s1 = reinterpret_cast<const uint *>(sl);
2950	s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2951	v_top = vld2q_lane_u32(s1 + x1, v_top, `1`);
2952	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `1`);
2953	x1 = (fx >> `16`);
2954	y1 = (fy >> `16`);
2955	fx += fdx; fy += fdy;
2956	sl = textureData + bytesPerLine * y1;
2957	s1 = reinterpret_cast<const uint *>(sl);
2958	s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2959	v_top = vld2q_lane_u32(s1 + x1, v_top, `2`);
2960	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `2`);
2961	x1 = (fx >> `16`);
2962	y1 = (fy >> `16`);
2963	fx += fdx; fy += fdy;
2964	sl = textureData + bytesPerLine * y1;
2965	s1 = reinterpret_cast<const uint *>(sl);
2966	s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2967	v_top = vld2q_lane_u32(s1 + x1, v_top, `3`);
2968	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `3`);
2969
2970	int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), `12`);
2971	int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), `12`);
2972	v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, `16`));
2973	v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, `16`));
2974	int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), `4`);
2975
2976	interpolate_4_pixels_16_neon(
2977	vreinterpretq_s16_u32(v_top.val[`0`]), vreinterpretq_s16_u32(v_top.val[`1`]),
2978	vreinterpretq_s16_u32(v_bot.val[`0`]), vreinterpretq_s16_u32(v_bot.val[`1`]),
2979	vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
2980	v_disty_, colorMask, invColorMask, v_256, b);
2981	b += `4`;
2982	v_fx = vaddq_s32(v_fx, v_fdx);
2983	v_fy = vaddq_s32(v_fy, v_fdy);
2984	}
2985	#endif
2986	while (b < boundedEnd) {
2987	int x = (fx >> `16`);
2988	int y = (fy >> `16`);
2989
2990	const uint s1 = (const* uint *)image.scanLine(y);
2991	const uint s2 = (const* uint *)image.scanLine(y: y + `1`);
2992
2993	if (hasFastInterpolate4()) {
2994	int distx = (fx & `0x0000ffff`) >> `8`;
2995	int disty = (fy & `0x0000ffff`) >> `8`;
2996	*b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx, disty);
2997	} else {
2998	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
2999	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
3000	*b = interpolate_4_pixels_16(tl: s1[x], tr: s1[x + `1`], bl: s2[x], br: s2[x + `1`], distx, disty);
3001	}
3002
3003	fx += fdx;
3004	fy += fdy;
3005	++b;
3006	}
3007	}
3008
3009	while (b < end) {
3010	int x1 = (fx >> `16`);
3011	int x2;
3012	int y1 = (fy >> `16`);
3013	int y2;
3014
3015	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3016	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3017
3018	const uint s1 = (const* uint *)image.scanLine(y: y1);
3019	const uint s2 = (const* uint *)image.scanLine(y: y2);
3020
3021	uint tl = s1[x1];
3022	uint tr = s1[x2];
3023	uint bl = s2[x1];
3024	uint br = s2[x2];
3025
3026	if (hasFastInterpolate4()) {
3027	int distx = (fx & `0x0000ffff`) >> `8`;
3028	int disty = (fy & `0x0000ffff`) >> `8`;
3029	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
3030	} else {
3031	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
3032	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
3033	*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3034	}
3035
3036	fx += fdx;
3037	fy += fdy;
3038	++b;
3039	}
3040	}
3041
3042
3043	static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[`2`][NFastTransformTypes] = {
3044	{
3045	fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>,
3046	fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
3047	fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
3048	fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
3049	fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
3050	},
3051	{
3052	fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>,
3053	fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
3054	fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
3055	fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
3056	fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled>
3057	}
3058	};
3059
3060	template<TextureBlendType blendType> / blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled /
3061	static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint buffer, const* Operator *,
3062	const QSpanData data, int* y, int x,
3063	int length)
3064	{
3065	const qreal cx = x + qreal(`0.5`);
3066	const qreal cy = y + qreal(`0.5`);
3067	Q_CONSTEXPR int tiled = (blendType == BlendTransformedBilinearTiled) ? `1` : `0`;
3068
3069	uint *end = buffer + length;
3070	uint *b = buffer;
3071	if (canUseFastMatrixPath(cx, cy, length, data)) {
3072	// The increment pr x in the scanline
3073	int fdx = (int)(data->m11 * fixed_scale);
3074	int fdy = (int)(data->m12 * fixed_scale);
3075
3076	int fx = int((data->m21 * cy
3077	+ data->m11 * cx + data->dx) * fixed_scale);
3078	int fy = int((data->m22 * cy
3079	+ data->m12 * cx + data->dy) * fixed_scale);
3080
3081	fx -= half_point;
3082	fy -= half_point;
3083
3084	if (fdy == `0`) { // simple scale, no rotation or shear
3085	if (qAbs(t: fdx) <= fixed_scale) {
3086	// simple scale up on X
3087	bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3088	} else if (qAbs(t: fdx) <= `2` * fixed_scale) {
3089	// simple scale down on X, less than 2x
3090	const int mid = (length * `2` < BufferSize) ? length : ((length + `1`) / `2`);
3091	bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
3092	if (mid != length)
3093	bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
3094	} else if (qAbs(t: data->m22) < qreal(`1.`/`8.`)) {
3095	// scale up more than 8x (on Y)
3096	bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3097	} else {
3098	// scale down on X
3099	bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3100	}
3101	} else { // rotation or shear
3102	if (qAbs(t: data->m11) < qreal(`1.`/`8.`) \|\| qAbs(t: data->m22) < qreal(`1.`/`8.`) ) {
3103	// if we are zooming more than 8 times, we use 8bit precision for the position.
3104	bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
3105	} else {
3106	// we are zooming less than 8x, use 4bit precision
3107	bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
3108	}
3109	}
3110	} else {
3111	const QTextureData &image = data->texture;
3112
3113	const qreal fdx = data->m11;
3114	const qreal fdy = data->m12;
3115	const qreal fdw = data->m13;
3116
3117	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3118	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3119	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3120
3121	while (b < end) {
3122	const qreal iw = fw == `0` ? `1` : `1` / fw;
3123	const qreal px = fx * iw - qreal(`0.5`);
3124	const qreal py = fy * iw - qreal(`0.5`);
3125
3126	int x1 = int(px) - (px < `0`);
3127	int x2;
3128	int y1 = int(py) - (py < `0`);
3129	int y2;
3130
3131	int distx = int((px - x1) * `256`);
3132	int disty = int((py - y1) * `256`);
3133
3134	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3135	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3136
3137	const uint s1 = (const* uint *)data->texture.scanLine(y: y1);
3138	const uint s2 = (const* uint *)data->texture.scanLine(y: y2);
3139
3140	uint tl = s1[x1];
3141	uint tr = s1[x2];
3142	uint bl = s2[x1];
3143	uint br = s2[x2];
3144
3145	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
3146
3147	fx += fdx;
3148	fy += fdy;
3149	fw += fdw;
3150	//force increment to avoid /0
3151	if (!fw) {
3152	fw += fdw;
3153	}
3154	++b;
3155	}
3156	}
3157
3158	return buffer;
3159	}
3160
3161	template<TextureBlendType blendType>
3162	static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint b, uint end, const QTextureData &image,
3163	int &fx, int &fy, int fdx, int /fdy/)
3164	{
3165	const QPixelLayout *layout = &qPixelLayouts[image.format];
3166	const QVector<QRgb> *clut = image.colorTable;
3167	const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM;
3168
3169	int y1 = (fy >> `16`);
3170	int y2;
3171	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3172	const uchar *s1 = image.scanLine(y: y1);
3173	const uchar *s2 = image.scanLine(y: y2);
3174
3175	const int disty = (fy & `0x0000ffff`) >> `8`;
3176	const int idisty = `256` - disty;
3177	const int length = end - b;
3178
3179	// The intermediate buffer is generated in the positive direction
3180	const int adjust = (fdx < `0`) ? fdx * length : `0`;
3181	const int offset = (fx + adjust) >> `16`;
3182	int x = offset;
3183
3184	IntermediateBuffer intermediate;
3185	uint *buf1 = intermediate.buffer_rb;
3186	uint *buf2 = intermediate.buffer_ag;
3187	const uint *ptr1;
3188	const uint *ptr2;
3189
3190	int count = (qint64(length) * qAbs(t: fdx) + fixed_scale - `1`) / fixed_scale + `2`;
3191	Q_ASSERT(count <= BufferSize + `2`);
3192
3193	if (blendType == BlendTransformedBilinearTiled) {
3194	x %= image.width;
3195	if (x < `0`)
3196	x += image.width;
3197	int len1 = qMin(a: count, b: image.width - x);
3198	int len2 = qMin(a: x, b: count - len1);
3199
3200	ptr1 = fetch(buf1, s1, x, len1, clut, nullptr);
3201	ptr2 = fetch(buf2, s2, x, len1, clut, nullptr);
3202	for (int i = `0`; i < len1; ++i) {
3203	uint t = ptr1[i];
3204	uint b = ptr2[i];
3205	buf1[i] = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
3206	buf2[i] = ((((t >> `8`) & `0xff00ff`) * idisty + ((b >> `8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
3207	}
3208
3209	if (len2) {
3210	ptr1 = fetch(buf1 + len1, s1, `0`, len2, clut, nullptr);
3211	ptr2 = fetch(buf2 + len1, s2, `0`, len2, clut, nullptr);
3212	for (int i = `0`; i < len2; ++i) {
3213	uint t = ptr1[i];
3214	uint b = ptr2[i];
3215	buf1[i + len1] = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
3216	buf2[i + len1] = ((((t >> `8`) & `0xff00ff`) * idisty + ((b >> `8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
3217	}
3218	}
3219	// Generate the rest by repeatedly repeating the previous set of pixels
3220	for (int i = image.width; i < count; ++i) {
3221	buf1[i] = buf1[i - image.width];
3222	buf2[i] = buf2[i - image.width];
3223	}
3224	} else {
3225	int start = qMax(a: x, b: image.x1);
3226	int end = qMin(a: x + count, b: image.x2);
3227	int len = qMax(a: `1`, b: end - start);
3228	int leading = start - x;
3229
3230	ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr);
3231	ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr);
3232
3233	for (int i = `0`; i < len; ++i) {
3234	uint t = ptr1[i];
3235	uint b = ptr2[i];
3236	buf1[i + leading] = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
3237	buf2[i + leading] = ((((t >> `8`) & `0xff00ff`) * idisty + ((b >> `8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
3238	}
3239
3240	for (int i = `0`; i < leading; ++i) {
3241	buf1[i] = buf1[leading];
3242	buf2[i] = buf2[leading];
3243	}
3244	for (int i = leading + len; i < count; ++i) {
3245	buf1[i] = buf1[i - `1`];
3246	buf2[i] = buf2[i - `1`];
3247	}
3248	}
3249
3250	// Now interpolate the values from the intermediate.buffer to get the final result.
3251	intermediate_adder(b, end, intermediate, offset, fx, fdx);
3252	}
3253
3254
3255	template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
3256	static void QT_FASTCALL fetchTransformedBilinear_fetcher(T buf1, T buf2, const int len, const QTextureData &image,
3257	int fx, int fy, const int fdx, const int fdy)
3258	{
3259	const QPixelLayout &layout = qPixelLayouts[image.format];
3260	constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
3261	if (useFetch)
3262	Q_ASSERT(sizeof(T) == sizeof(uint));
3263	else
3264	Q_ASSERT(layout.bpp == bpp);
3265	const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel<bpp>;
3266	if (fdy == `0`) {
3267	int y1 = (fy >> `16`);
3268	int y2;
3269	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3270	const uchar *s1 = image.scanLine(y: y1);
3271	const uchar *s2 = image.scanLine(y: y2);
3272
3273	int i = `0`;
3274	if (blendType == BlendTransformedBilinear) {
3275	for (; i < len; ++i) {
3276	int x1 = (fx >> `16`);
3277	int x2;
3278	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3279	if (x1 != x2)
3280	break;
3281	if (useFetch) {
3282	buf1[i * `2` + `0`] = buf1[i * `2` + `1`] = fetch1(s1, x1);
3283	buf2[i * `2` + `0`] = buf2[i * `2` + `1`] = fetch1(s2, x1);
3284	} else {
3285	buf1[i * `2` + `0`] = buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x1];
3286	buf2[i * `2` + `0`] = buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x1];
3287	}
3288	fx += fdx;
3289	}
3290	int fastLen = len;
3291	if (fdx > `0`)
3292	fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - `1`) * fixed_scale - fx) / fdx));
3293	else if (fdx < `0`)
3294	fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
3295
3296	for (; i < fastLen; ++i) {
3297	int x = (fx >> `16`);
3298	if (useFetch) {
3299	buf1[i * `2` + `0`] = fetch1(s1, x);
3300	buf1[i * `2` + `1`] = fetch1(s1, x + `1`);
3301	buf2[i * `2` + `0`] = fetch1(s2, x);
3302	buf2[i * `2` + `1`] = fetch1(s2, x + `1`);
3303	} else {
3304	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x];
3305	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x + `1`];
3306	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x];
3307	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x + `1`];
3308	}
3309	fx += fdx;
3310	}
3311	}
3312
3313	for (; i < len; ++i) {
3314	int x1 = (fx >> `16`);
3315	int x2;
3316	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3317	if (useFetch) {
3318	buf1[i * `2` + `0`] = fetch1(s1, x1);
3319	buf1[i * `2` + `1`] = fetch1(s1, x2);
3320	buf2[i * `2` + `0`] = fetch1(s2, x1);
3321	buf2[i * `2` + `1`] = fetch1(s2, x2);
3322	} else {
3323	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x1];
3324	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x2];
3325	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x1];
3326	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x2];
3327	}
3328	fx += fdx;
3329	}
3330	} else {
3331	int i = `0`;
3332	if (blendType == BlendTransformedBilinear) {
3333	for (; i < len; ++i) {
3334	int x1 = (fx >> `16`);
3335	int x2;
3336	int y1 = (fy >> `16`);
3337	int y2;
3338	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3339	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3340	if (x1 != x2 && y1 != y2)
3341	break;
3342	const uchar *s1 = image.scanLine(y: y1);
3343	const uchar *s2 = image.scanLine(y: y2);
3344	if (useFetch) {
3345	buf1[i * `2` + `0`] = fetch1(s1, x1);
3346	buf1[i * `2` + `1`] = fetch1(s1, x2);
3347	buf2[i * `2` + `0`] = fetch1(s2, x1);
3348	buf2[i * `2` + `1`] = fetch1(s2, x2);
3349	} else {
3350	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x1];
3351	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x2];
3352	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x1];
3353	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x2];
3354	}
3355	fx += fdx;
3356	fy += fdy;
3357	}
3358	int fastLen = len;
3359	if (fdx > `0`)
3360	fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - `1`) * fixed_scale - fx) / fdx));
3361	else if (fdx < `0`)
3362	fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
3363	if (fdy > `0`)
3364	fastLen = qMin(a: fastLen, b: int((qint64(image.y2 - `1`) * fixed_scale - fy) / fdy));
3365	else if (fdy < `0`)
3366	fastLen = qMin(a: fastLen, b: int((qint64(image.y1) * fixed_scale - fy) / fdy));
3367
3368	for (; i < fastLen; ++i) {
3369	int x = (fx >> `16`);
3370	int y = (fy >> `16`);
3371	const uchar *s1 = image.scanLine(y);
3372	const uchar *s2 = s1 + image.bytesPerLine;
3373	if (useFetch) {
3374	buf1[i * `2` + `0`] = fetch1(s1, x);
3375	buf1[i * `2` + `1`] = fetch1(s1, x + `1`);
3376	buf2[i * `2` + `0`] = fetch1(s2, x);
3377	buf2[i * `2` + `1`] = fetch1(s2, x + `1`);
3378	} else {
3379	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x];
3380	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x + `1`];
3381	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x];
3382	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x + `1`];
3383	}
3384	fx += fdx;
3385	fy += fdy;
3386	}
3387	}
3388
3389	for (; i < len; ++i) {
3390	int x1 = (fx >> `16`);
3391	int x2;
3392	int y1 = (fy >> `16`);
3393	int y2;
3394	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3395	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3396
3397	const uchar *s1 = image.scanLine(y: y1);
3398	const uchar *s2 = image.scanLine(y: y2);
3399	if (useFetch) {
3400	buf1[i * `2` + `0`] = fetch1(s1, x1);
3401	buf1[i * `2` + `1`] = fetch1(s1, x2);
3402	buf2[i * `2` + `0`] = fetch1(s2, x1);
3403	buf2[i * `2` + `1`] = fetch1(s2, x2);
3404	} else {
3405	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x1];
3406	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x2];
3407	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x1];
3408	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x2];
3409	}
3410	fx += fdx;
3411	fy += fdy;
3412	}
3413	}
3414	}
3415
3416	// blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
3417	template<TextureBlendType blendType, QPixelLayout::BPP bpp>
3418	static const uint QT_FASTCALL fetchTransformedBilinear(uint buffer, const Operator *,
3419	const QSpanData data, int* y, int x, int length)
3420	{
3421	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
3422	const QVector<QRgb> *clut = data->texture.colorTable;
3423	Q_ASSERT(bpp == QPixelLayout::BPPNone \|\| layout->bpp == bpp);
3424
3425	const qreal cx = x + qreal(`0.5`);
3426	const qreal cy = y + qreal(`0.5`);
3427
3428	if (canUseFastMatrixPath(cx, cy, length, data)) {
3429	// The increment pr x in the scanline
3430	int fdx = (int)(data->m11 * fixed_scale);
3431	int fdy = (int)(data->m12 * fixed_scale);
3432
3433	int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3434	int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3435
3436	fx -= half_point;
3437	fy -= half_point;
3438
3439	if (fdy == `0`) { // simple scale, no rotation or shear
3440	if (qAbs(t: fdx) <= fixed_scale) { // scale up on X
3441	fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
3442	} else if (qAbs(t: fdx) <= `2` * fixed_scale) { // scale down on X less than 2x
3443	const int mid = (length * `2` < BufferSize) ? length : ((length + `1`) / `2`);
3444	fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
3445	if (mid != length)
3446	fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
3447	} else {
3448	const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
3449
3450	uint buf1[BufferSize];
3451	uint buf2[BufferSize];
3452	uint *b = buffer;
3453	while (length) {
3454	int len = qMin(a: length, b: BufferSize / `2`);
3455	fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, `0`);
3456	layout->convertToARGB32PM(buf1, len * `2`, clut);
3457	layout->convertToARGB32PM(buf2, len * `2`, clut);
3458
3459	if (hasFastInterpolate4() \|\| qAbs(t: data->m22) < qreal(`1.`/`8.`)) { // scale up more than 8x (on Y)
3460	int disty = (fy & `0x0000ffff`) >> `8`;
3461	for (int i = `0`; i < len; ++i) {
3462	int distx = (fx & `0x0000ffff`) >> `8`;
3463	b[i] = interpolate_4_pixels(t: buf1 + i * `2`, b: buf2 + i * `2`, distx, disty);
3464	fx += fdx;
3465	}
3466	} else {
3467	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
3468	for (int i = `0`; i < len; ++i) {
3469	uint tl = buf1[i * `2` + `0`];
3470	uint tr = buf1[i * `2` + `1`];
3471	uint bl = buf2[i * `2` + `0`];
3472	uint br = buf2[i * `2` + `1`];
3473	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
3474	b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3475	fx += fdx;
3476	}
3477	}
3478	length -= len;
3479	b += len;
3480	}
3481	}
3482	} else { // rotation or shear
3483	const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
3484
3485	uint buf1[BufferSize];
3486	uint buf2[BufferSize];
3487	uint *b = buffer;
3488	while (length) {
3489	int len = qMin(a: length, b: BufferSize / `2`);
3490	fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3491	layout->convertToARGB32PM(buf1, len * `2`, clut);
3492	layout->convertToARGB32PM(buf2, len * `2`, clut);
3493
3494	if (hasFastInterpolate4() \|\| qAbs(t: data->m11) < qreal(`1.`/`8.`) \|\| qAbs(t: data->m22) < qreal(`1.`/`8.`)) {
3495	// If we are zooming more than 8 times, we use 8bit precision for the position.
3496	for (int i = `0`; i < len; ++i) {
3497	int distx = (fx & `0x0000ffff`) >> `8`;
3498	int disty = (fy & `0x0000ffff`) >> `8`;
3499
3500	b[i] = interpolate_4_pixels(t: buf1 + i * `2`, b: buf2 + i * `2`, distx, disty);
3501	fx += fdx;
3502	fy += fdy;
3503	}
3504	} else {
3505	// We are zooming less than 8x, use 4bit precision
3506	for (int i = `0`; i < len; ++i) {
3507	uint tl = buf1[i * `2` + `0`];
3508	uint tr = buf1[i * `2` + `1`];
3509	uint bl = buf2[i * `2` + `0`];
3510	uint br = buf2[i * `2` + `1`];
3511
3512	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
3513	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
3514
3515	b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3516	fx += fdx;
3517	fy += fdy;
3518	}
3519	}
3520
3521	length -= len;
3522	b += len;
3523	}
3524	}
3525	} else {
3526	// When templated 'fetch' should be inlined at compile time:
3527	const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : fetchPixel<bpp>;
3528
3529	const QTextureData &image = data->texture;
3530
3531	const qreal fdx = data->m11;
3532	const qreal fdy = data->m12;
3533	const qreal fdw = data->m13;
3534
3535	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3536	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3537	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3538
3539	uint buf1[BufferSize];
3540	uint buf2[BufferSize];
3541	uint *b = buffer;
3542
3543	int distxs[BufferSize / `2`];
3544	int distys[BufferSize / `2`];
3545
3546	while (length) {
3547	int len = qMin(a: length, b: BufferSize / `2`);
3548	for (int i = `0`; i < len; ++i) {
3549	const qreal iw = fw == `0` ? `1` : `1` / fw;
3550	const qreal px = fx * iw - qreal(`0.5`);
3551	const qreal py = fy * iw - qreal(`0.5`);
3552
3553	int x1 = int(px) - (px < `0`);
3554	int x2;
3555	int y1 = int(py) - (py < `0`);
3556	int y2;
3557
3558	distxs[i] = int((px - x1) * `256`);
3559	distys[i] = int((py - y1) * `256`);
3560
3561	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3562	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3563
3564	const uchar *s1 = data->texture.scanLine(y: y1);
3565	const uchar *s2 = data->texture.scanLine(y: y2);
3566	buf1[i * `2` + `0`] = fetch1(s1, x1);
3567	buf1[i * `2` + `1`] = fetch1(s1, x2);
3568	buf2[i * `2` + `0`] = fetch1(s2, x1);
3569	buf2[i * `2` + `1`] = fetch1(s2, x2);
3570
3571	fx += fdx;
3572	fy += fdy;
3573	fw += fdw;
3574	//force increment to avoid /0
3575	if (!fw)
3576	fw += fdw;
3577	}
3578
3579	layout->convertToARGB32PM(buf1, len * `2`, clut);
3580	layout->convertToARGB32PM(buf2, len * `2`, clut);
3581
3582	for (int i = `0`; i < len; ++i) {
3583	int distx = distxs[i];
3584	int disty = distys[i];
3585
3586	b[i] = interpolate_4_pixels(t: buf1 + i * `2`, b: buf2 + i * `2`, distx, disty);
3587	}
3588	length -= len;
3589	b += len;
3590	}
3591	}
3592
3593	return buffer;
3594	}
3595
3596	#if QT_CONFIG(raster_64bit)
3597	template<TextureBlendType blendType>
3598	static const QRgba64 QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 buffer, const QSpanData *data,
3599	int y, int x, int length)
3600	{
3601	const QTextureData &texture = data->texture;
3602	const QPixelLayout *layout = &qPixelLayouts[texture.format];
3603	const QVector<QRgb> *clut = data->texture.colorTable;
3604
3605	const qreal cx = x + qreal(`0.5`);
3606	const qreal cy = y + qreal(`0.5`);
3607
3608	uint sbuf1[BufferSize];
3609	uint sbuf2[BufferSize];
3610	alignas(`8`) QRgba64 buf1[BufferSize];
3611	alignas(`8`) QRgba64 buf2[BufferSize];
3612	QRgba64 *end = buffer + length;
3613	QRgba64 *b = buffer;
3614
3615	if (canUseFastMatrixPath(cx, cy, length, data)) {
3616	// The increment pr x in the scanline
3617	const int fdx = (int)(data->m11 * fixed_scale);
3618	const int fdy = (int)(data->m12 * fixed_scale);
3619
3620	int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3621	int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3622
3623	fx -= half_point;
3624	fy -= half_point;
3625
3626	const auto fetcher =
3627	(layout->bpp == QPixelLayout::BPP32)
3628	? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
3629	: fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
3630
3631	if (fdy == `0`) { //simple scale, no rotation
3632	while (length) {
3633	int len = qMin(a: length, b: BufferSize / `2`);
3634	int disty = (fy & `0x0000ffff`);
3635	#if defined(__SSE2__)
3636	const __m128i vdy = _mm_set1_epi16(w: disty);
3637	const __m128i vidy = _mm_set1_epi16(w: `0x10000` - disty);
3638	#endif
3639	fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
3640
3641	layout->convertToRGBA64PM(buf1, sbuf1, len * `2`, clut, nullptr);
3642	if (disty)
3643	layout->convertToRGBA64PM(buf2, sbuf2, len * `2`, clut, nullptr);
3644
3645	for (int i = `0`; i < len; ++i) {
3646	int distx = (fx & `0x0000ffff`);
3647	#if defined(__SSE2__)
3648	__m128i vt = _mm_loadu_si128(p: (const __m128i)(buf1 + i`2`));
3649	if (disty) {
3650	__m128i vb = _mm_loadu_si128(p: (const __m128i)(buf2 + i`2`));
3651	vt = _mm_mulhi_epu16(a: vt, b: vidy);
3652	vb = _mm_mulhi_epu16(a: vb, b: vdy);
3653	vt = _mm_add_epi16(a: vt, b: vb);
3654	}
3655	if (distx) {
3656	const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
3657	const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(`0x10000` - distx), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
3658	vt = _mm_mulhi_epu16(a: vt, b: _mm_unpacklo_epi64(a: vidistx, b: vdistx));
3659	vt = _mm_add_epi16(a: vt, _mm_srli_si128(vt, `8`));
3660	}
3661	_mm_storel_epi64(p: (__m128i*)(b+i), a: vt);
3662	#else
3663	b[i] = interpolate_4_pixels_rgb64(buf1 + i`2`, buf2 + i`2`, distx, disty);
3664	#endif
3665	fx += fdx;
3666	}
3667	length -= len;
3668	b += len;
3669	}
3670	} else { // rotation or shear
3671	while (b < end) {
3672	int len = qMin(a: length, b: BufferSize / `2`);
3673
3674	fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
3675
3676	layout->convertToRGBA64PM(buf1, sbuf1, len * `2`, clut, nullptr);
3677	layout->convertToRGBA64PM(buf2, sbuf2, len * `2`, clut, nullptr);
3678
3679	for (int i = `0`; i < len; ++i) {
3680	int distx = (fx & `0x0000ffff`);
3681	int disty = (fy & `0x0000ffff`);
3682	b[i] = interpolate_4_pixels_rgb64(t: buf1 + i`2`, b: buf2 + i`2`, distx, disty);
3683	fx += fdx;
3684	fy += fdy;
3685	}
3686
3687	length -= len;
3688	b += len;
3689	}
3690	}
3691	} else { // !(data->fast_matrix)
3692	const QTextureData &image = data->texture;
3693
3694	const qreal fdx = data->m11;
3695	const qreal fdy = data->m12;
3696	const qreal fdw = data->m13;
3697
3698	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3699	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3700	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3701
3702	FetchPixelFunc fetch = qFetchPixel[layout->bpp];
3703
3704	int distxs[BufferSize / `2`];
3705	int distys[BufferSize / `2`];
3706
3707	while (b < end) {
3708	int len = qMin(a: length, b: BufferSize / `2`);
3709	for (int i = `0`; i < len; ++i) {
3710	const qreal iw = fw == `0` ? `1` : `1` / fw;
3711	const qreal px = fx * iw - qreal(`0.5`);
3712	const qreal py = fy * iw - qreal(`0.5`);
3713
3714	int x1 = qFloor(v: px);
3715	int x2;
3716	int y1 = qFloor(v: py);
3717	int y2;
3718
3719	distxs[i] = int((px - x1) * (`1`<<`16`));
3720	distys[i] = int((py - y1) * (`1`<<`16`));
3721
3722	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3723	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3724
3725	const uchar *s1 = texture.scanLine(y: y1);
3726	const uchar *s2 = texture.scanLine(y: y2);
3727
3728	sbuf1[i * `2` + `0`] = fetch(s1, x1);
3729	sbuf1[i * `2` + `1`] = fetch(s1, x2);
3730	sbuf2[i * `2` + `0`] = fetch(s2, x1);
3731	sbuf2[i * `2` + `1`] = fetch(s2, x2);
3732
3733	fx += fdx;
3734	fy += fdy;
3735	fw += fdw;
3736	//force increment to avoid /0
3737	if (!fw)
3738	fw += fdw;
3739	}
3740
3741	layout->convertToRGBA64PM(buf1, sbuf1, len * `2`, clut, nullptr);
3742	layout->convertToRGBA64PM(buf2, sbuf2, len * `2`, clut, nullptr);
3743
3744	for (int i = `0`; i < len; ++i) {
3745	int distx = distxs[i];
3746	int disty = distys[i];
3747	b[i] = interpolate_4_pixels_rgb64(t: buf1 + i`2`, b: buf2 + i`2`, distx, disty);
3748	}
3749
3750	length -= len;
3751	b += len;
3752	}
3753	}
3754	return buffer;
3755	}
3756
3757	template<TextureBlendType blendType>
3758	static const QRgba64 QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 buffer, const QSpanData *data,
3759	int y, int x, int length)
3760	{
3761	const QTextureData &texture = data->texture;
3762	Q_ASSERT(qPixelLayouts[texture.format].bpp == QPixelLayout::BPP64);
3763	const auto convert = (data->texture.format == QImage::Format_RGBA64) ? convertRGBA64ToRGBA64PM : convertRGBA64PMToRGBA64PM;
3764
3765	const qreal cx = x + qreal(`0.5`);
3766	const qreal cy = y + qreal(`0.5`);
3767
3768	alignas(`8`) QRgba64 buf1[BufferSize];
3769	alignas(`8`) QRgba64 buf2[BufferSize];
3770	QRgba64 *end = buffer + length;
3771	QRgba64 *b = buffer;
3772
3773	if (canUseFastMatrixPath(cx, cy, length, data)) {
3774	// The increment pr x in the scanline
3775	const int fdx = (int)(data->m11 * fixed_scale);
3776	const int fdy = (int)(data->m12 * fixed_scale);
3777
3778	int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3779	int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3780
3781	fx -= half_point;
3782	fy -= half_point;
3783	const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
3784
3785	if (fdy == `0`) { //simple scale, no rotation
3786	while (length) {
3787	int len = qMin(a: length, b: BufferSize / `2`);
3788	int disty = (fy & `0x0000ffff`);
3789	#if defined(__SSE2__)
3790	const __m128i vdy = _mm_set1_epi16(w: disty);
3791	const __m128i vidy = _mm_set1_epi16(w: `0x10000` - disty);
3792	#endif
3793	fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3794
3795	convert(buf1, len * `2`);
3796	if (disty)
3797	convert(buf2, len * `2`);
3798
3799	for (int i = `0`; i < len; ++i) {
3800	int distx = (fx & `0x0000ffff`);
3801	#if defined(__SSE2__)
3802	__m128i vt = _mm_loadu_si128(p: (const __m128i)(buf1 + i`2`));
3803	if (disty) {
3804	__m128i vb = _mm_loadu_si128(p: (const __m128i)(buf2 + i`2`));
3805	vt = _mm_mulhi_epu16(a: vt, b: vidy);
3806	vb = _mm_mulhi_epu16(a: vb, b: vdy);
3807	vt = _mm_add_epi16(a: vt, b: vb);
3808	}
3809	if (distx) {
3810	const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
3811	const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(`0x10000` - distx), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
3812	vt = _mm_mulhi_epu16(a: vt, b: _mm_unpacklo_epi64(a: vidistx, b: vdistx));
3813	vt = _mm_add_epi16(a: vt, _mm_srli_si128(vt, `8`));
3814	}
3815	_mm_storel_epi64(p: (__m128i*)(b+i), a: vt);
3816	#else
3817	b[i] = interpolate_4_pixels_rgb64(buf1 + i`2`, buf2 + i`2`, distx, disty);
3818	#endif
3819	fx += fdx;
3820	}
3821	length -= len;
3822	b += len;
3823	}
3824	} else { // rotation or shear
3825	while (b < end) {
3826	int len = qMin(a: length, b: BufferSize / `2`);
3827
3828	fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3829
3830	convert(buf1, len * `2`);
3831	convert(buf2, len * `2`);
3832
3833	for (int i = `0`; i < len; ++i) {
3834	int distx = (fx & `0x0000ffff`);
3835	int disty = (fy & `0x0000ffff`);
3836	b[i] = interpolate_4_pixels_rgb64(t: buf1 + i`2`, b: buf2 + i`2`, distx, disty);
3837	fx += fdx;
3838	fy += fdy;
3839	}
3840
3841	length -= len;
3842	b += len;
3843	}
3844	}
3845	} else { // !(data->fast_matrix)
3846	const QTextureData &image = data->texture;
3847
3848	const qreal fdx = data->m11;
3849	const qreal fdy = data->m12;
3850	const qreal fdw = data->m13;
3851
3852	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3853	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3854	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3855
3856	int distxs[BufferSize / `2`];
3857	int distys[BufferSize / `2`];
3858
3859	while (b < end) {
3860	int len = qMin(a: length, b: BufferSize / `2`);
3861	for (int i = `0`; i < len; ++i) {
3862	const qreal iw = fw == `0` ? `1` : `1` / fw;
3863	const qreal px = fx * iw - qreal(`0.5`);
3864	const qreal py = fy * iw - qreal(`0.5`);
3865
3866	int x1 = int(px) - (px < `0`);
3867	int x2;
3868	int y1 = int(py) - (py < `0`);
3869	int y2;
3870
3871	distxs[i] = int((px - x1) * (`1`<<`16`));
3872	distys[i] = int((py - y1) * (`1`<<`16`));
3873
3874	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
3875	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
3876
3877	const uchar *s1 = texture.scanLine(y: y1);
3878	const uchar *s2 = texture.scanLine(y: y2);
3879
3880	buf1[i * `2` + `0`] = reinterpret_cast<const QRgba64 *>(s1)[x1];
3881	buf1[i * `2` + `1`] = reinterpret_cast<const QRgba64 *>(s1)[x2];
3882	buf2[i * `2` + `0`] = reinterpret_cast<const QRgba64 *>(s2)[x1];
3883	buf2[i * `2` + `1`] = reinterpret_cast<const QRgba64 *>(s2)[x2];
3884
3885	fx += fdx;
3886	fy += fdy;
3887	fw += fdw;
3888	//force increment to avoid /0
3889	if (!fw)
3890	fw += fdw;
3891	}
3892
3893	convert(buf1, len * `2`);
3894	convert(buf2, len * `2`);
3895
3896	for (int i = `0`; i < len; ++i) {
3897	int distx = distxs[i];
3898	int disty = distys[i];
3899	b[i] = interpolate_4_pixels_rgb64(t: buf1 + i`2`, b: buf2 + i`2`, distx, disty);
3900	}
3901
3902	length -= len;
3903	b += len;
3904	}
3905	}
3906	return buffer;
3907	}
3908
3909	template<TextureBlendType blendType>
3910	static const QRgba64 QT_FASTCALL fetchTransformedBilinear64(QRgba64 buffer, const Operator *,
3911	const QSpanData data, int* y, int x, int length)
3912	{
3913	if (qPixelLayouts[data->texture.format].bpp == QPixelLayout::BPP64)
3914	return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length);
3915	return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length);
3916	}
3917	#endif
3918
3919	// FetchUntransformed can have more specialized methods added depending on SIMD features.
3920	static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = {
3921	nullptr, // Invalid
3922	fetchUntransformed, // Mono
3923	fetchUntransformed, // MonoLsb
3924	fetchUntransformed, // Indexed8
3925	fetchUntransformedARGB32PM, // RGB32
3926	fetchUntransformed, // ARGB32
3927	fetchUntransformedARGB32PM, // ARGB32_Premultiplied
3928	fetchUntransformedRGB16, // RGB16
3929	fetchUntransformed, // ARGB8565_Premultiplied
3930	fetchUntransformed, // RGB666
3931	fetchUntransformed, // ARGB6666_Premultiplied
3932	fetchUntransformed, // RGB555
3933	fetchUntransformed, // ARGB8555_Premultiplied
3934	fetchUntransformed, // RGB888
3935	fetchUntransformed, // RGB444
3936	fetchUntransformed, // ARGB4444_Premultiplied
3937	fetchUntransformed, // RGBX8888
3938	fetchUntransformed, // RGBA8888
3939	fetchUntransformed, // RGBA8888_Premultiplied
3940	fetchUntransformed, // Format_BGR30
3941	fetchUntransformed, // Format_A2BGR30_Premultiplied
3942	fetchUntransformed, // Format_RGB30
3943	fetchUntransformed, // Format_A2RGB30_Premultiplied
3944	fetchUntransformed, // Alpha8
3945	fetchUntransformed, // Grayscale8
3946	fetchUntransformed, // RGBX64
3947	fetchUntransformed, // RGBA64
3948	fetchUntransformed, // RGBA64_Premultiplied
3949	fetchUntransformed, // Grayscale16
3950	fetchUntransformed, // BGR888
3951	};
3952
3953	static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = {
3954	fetchUntransformed, // Untransformed
3955	fetchUntransformed, // Tiled
3956	fetchTransformed<BlendTransformed, QPixelLayout::BPPNone>, // Transformed
3957	fetchTransformed<BlendTransformedTiled, QPixelLayout::BPPNone>, // TransformedTiled
3958	fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPPNone>, // TransformedBilinear
3959	fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPPNone> // TransformedBilinearTiled
3960	};
3961
3962	static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = {
3963	fetchUntransformedARGB32PM, // Untransformed
3964	fetchUntransformedARGB32PM, // Tiled
3965	fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3966	fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3967	fetchTransformedBilinearARGB32PM<BlendTransformedBilinear>, // Bilinear
3968	fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled
3969	};
3970
3971	static SourceFetchProc sourceFetchAny16[NBlendTypes] = {
3972	fetchUntransformed, // Untransformed
3973	fetchUntransformed, // Tiled
3974	fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed
3975	fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>, // TransformedTiled
3976	fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>, // TransformedBilinear
3977	fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled
3978	};
3979
3980	static SourceFetchProc sourceFetchAny32[NBlendTypes] = {
3981	fetchUntransformed, // Untransformed
3982	fetchUntransformed, // Tiled
3983	fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3984	fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3985	fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP32>, // TransformedBilinear
3986	fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP32> // TransformedBilinearTiled
3987	};
3988
3989	static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format)
3990	{
3991	if (format == QImage::Format_RGB32 \|\| format == QImage::Format_ARGB32_Premultiplied)
3992	return sourceFetchARGB32PM[blendType];
3993	if (blendType == BlendUntransformed \|\| blendType == BlendTiled)
3994	return sourceFetchUntransformed[format];
3995	if (qPixelLayouts[format].bpp == QPixelLayout::BPP16)
3996	return sourceFetchAny16[blendType];
3997	if (qPixelLayouts[format].bpp == QPixelLayout::BPP32)
3998	return sourceFetchAny32[blendType];
3999	return sourceFetchGeneric[blendType];
4000	}
4001
4002	#if QT_CONFIG(raster_64bit)
4003	static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = {
4004	fetchUntransformed64, // Untransformed
4005	fetchUntransformed64, // Tiled
4006	fetchTransformed64<BlendTransformed>, // Transformed
4007	fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
4008	fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
4009	fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
4010	};
4011
4012	static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = {
4013	fetchUntransformedRGBA64PM, // Untransformed
4014	fetchUntransformedRGBA64PM, // Tiled
4015	fetchTransformed64<BlendTransformed>, // Transformed
4016	fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
4017	fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
4018	fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
4019	};
4020
4021	static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format)
4022	{
4023	if (format == QImage::Format_RGBX64 \|\| format == QImage::Format_RGBA64_Premultiplied)
4024	return sourceFetchRGBA64PM[blendType];
4025	return sourceFetchGeneric64[blendType];
4026	}
4027	#endif
4028
4029
4030	#define FIXPT_BITS 8
4031	#define FIXPT_SIZE (1<<FIXPT_BITS)
4032	#define FIXPT_MAX (INT_MAX >> (FIXPT_BITS + 1))
4033
4034	static uint qt_gradient_pixel_fixed(const QGradientData data, int* fixed_pos)
4035	{
4036	int ipos = (fixed_pos + (FIXPT_SIZE / `2`)) >> FIXPT_BITS;
4037	return data->colorTable32[qt_gradient_clamp(data, ipos)];
4038	}
4039
4040	#if QT_CONFIG(raster_64bit)
4041	static const QRgba64& qt_gradient_pixel64_fixed(const QGradientData data, int* fixed_pos)
4042	{
4043	int ipos = (fixed_pos + (FIXPT_SIZE / `2`)) >> FIXPT_BITS;
4044	return data->colorTable64[qt_gradient_clamp(data, ipos)];
4045	}
4046	#endif
4047
4048	static void QT_FASTCALL getLinearGradientValues(LinearGradientValues v, const* QSpanData *data)
4049	{
4050	v->dx = data->gradient.linear.end.x - data->gradient.linear.origin.x;
4051	v->dy = data->gradient.linear.end.y - data->gradient.linear.origin.y;
4052	v->l = v->dx * v->dx + v->dy * v->dy;
4053	v->off = `0`;
4054	if (v->l != `0`) {
4055	v->dx /= v->l;
4056	v->dy /= v->l;
4057	v->off = -v->dx * data->gradient.linear.origin.x - v->dy * data->gradient.linear.origin.y;
4058	}
4059	}
4060
4061	class GradientBase32
4062	{
4063	public:
4064	typedef uint Type;
4065	static Type null() { return `0`; }
4066	static Type fetchSingle(const QGradientData& gradient, qreal v)
4067	{
4068	return qt_gradient_pixel(data: &gradient, pos: v);
4069	}
4070	static Type fetchSingle(const QGradientData& gradient, int v)
4071	{
4072	return qt_gradient_pixel_fixed(data: &gradient, fixed_pos: v);
4073	}
4074	static void memfill(Type buffer, Type fill, int* length)
4075	{
4076	qt_memfill32(buffer, fill, length);
4077	}
4078	};
4079
4080	#if QT_CONFIG(raster_64bit)
4081	class GradientBase64
4082	{
4083	public:
4084	typedef QRgba64 Type;
4085	static Type null() { return QRgba64::fromRgba64(c: `0`); }
4086	static Type fetchSingle(const QGradientData& gradient, qreal v)
4087	{
4088	return qt_gradient_pixel64(data: &gradient, pos: v);
4089	}
4090	static Type fetchSingle(const QGradientData& gradient, int v)
4091	{
4092	return qt_gradient_pixel64_fixed(data: &gradient, fixed_pos: v);
4093	}
4094	static void memfill(Type buffer, Type fill, int* length)
4095	{
4096	qt_memfill64((quint64*)buffer, fill, length);
4097	}
4098	};
4099	#endif
4100
4101	template<class GradientBase, typename BlendType>
4102	static inline const BlendType * QT_FASTCALL qt_fetch_linear_gradient_template(
4103	BlendType buffer, const* Operator op, const* QSpanData *data,
4104	int y, int x, int length)
4105	{
4106	const BlendType *b = buffer;
4107	qreal t, inc;
4108
4109	bool affine = true;
4110	qreal rx=`0`, ry=`0`;
4111	if (op->linear.l == `0`) {
4112	t = inc = `0`;
4113	} else {
4114	rx = data->m21 * (y + qreal(`0.5`)) + data->m11 * (x + qreal(`0.5`)) + data->dx;
4115	ry = data->m22 * (y + qreal(`0.5`)) + data->m12 * (x + qreal(`0.5`)) + data->dy;
4116	t = op->linear.dxrx + op->linear.dyry + op->linear.off;
4117	inc = op->linear.dx * data->m11 + op->linear.dy * data->m12;
4118	affine = !data->m13 && !data->m23;
4119
4120	if (affine) {
4121	t *= (GRADIENT_STOPTABLE_SIZE - `1`);
4122	inc *= (GRADIENT_STOPTABLE_SIZE - `1`);
4123	}
4124	}
4125
4126	const BlendType *end = buffer + length;
4127	if (affine) {
4128	if (inc > qreal(-`1e-5`) && inc < qreal(`1e-5`)) {
4129	if (std::abs(x: t) < FIXPT_MAX)
4130	GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, int(t * FIXPT_SIZE)), length);
4131	else
4132	GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, t / GRADIENT_STOPTABLE_SIZE), length);
4133	} else {
4134	if (std::abs(x: t) < FIXPT_MAX && std::abs(x: inc) < FIXPT_MAX && std::abs(x: t + inc * length) < FIXPT_MAX) {
4135	// we can use fixed point math
4136	int t_fixed = int(t * FIXPT_SIZE);
4137	int inc_fixed = int(inc * FIXPT_SIZE);
4138	while (buffer < end) {
4139	*buffer = GradientBase::fetchSingle(data->gradient, t_fixed);
4140	t_fixed += inc_fixed;
4141	++buffer;
4142	}
4143	} else {
4144	// we have to fall back to float math
4145	while (buffer < end) {
4146	*buffer = GradientBase::fetchSingle(data->gradient, t/GRADIENT_STOPTABLE_SIZE);
4147	t += inc;
4148	++buffer;
4149	}
4150	}
4151	}
4152	} else { // fall back to float math here as well
4153	qreal rw = data->m23 * (y + qreal(`0.5`)) + data->m13 * (x + qreal(`0.5`)) + data->m33;
4154	while (buffer < end) {
4155	qreal x = rx/rw;
4156	qreal y = ry/rw;
4157	t = (op->linear.dxx + op->linear.dy y) + op->linear.off;
4158
4159	*buffer = GradientBase::fetchSingle(data->gradient, t);
4160	rx += data->m11;
4161	ry += data->m12;
4162	rw += data->m13;
4163	if (!rw) {
4164	rw += data->m13;
4165	}
4166	++buffer;
4167	}
4168	}
4169
4170	return b;
4171	}
4172
4173	static const uint * QT_FASTCALL qt_fetch_linear_gradient(uint buffer, const* Operator op, const* QSpanData *data,
4174	int y, int x, int length)
4175	{
4176	return qt_fetch_linear_gradient_template<GradientBase32, uint>(buffer, op, data, y, x, length);
4177	}
4178
4179	#if QT_CONFIG(raster_64bit)
4180	static const QRgba64 * QT_FASTCALL qt_fetch_linear_gradient_rgb64(QRgba64 buffer, const* Operator op, const* QSpanData *data,
4181	int y, int x, int length)
4182	{
4183	return qt_fetch_linear_gradient_template<GradientBase64, QRgba64>(buffer, op, data, y, x, length);
4184	}
4185	#endif
4186
4187	static void QT_FASTCALL getRadialGradientValues(RadialGradientValues v, const* QSpanData *data)
4188	{
4189	v->dx = data->gradient.radial.center.x - data->gradient.radial.focal.x;
4190	v->dy = data->gradient.radial.center.y - data->gradient.radial.focal.y;
4191
4192	v->dr = data->gradient.radial.center.radius - data->gradient.radial.focal.radius;
4193	v->sqrfr = data->gradient.radial.focal.radius * data->gradient.radial.focal.radius;
4194
4195	v->a = v->dr * v->dr - v->dxv->dx - v->dyv->dy;
4196	v->inv2a = `1` / (`2` * v->a);
4197
4198	v->extended = !qFuzzyIsNull(d: data->gradient.radial.focal.radius) \|\| v->a <= `0`;
4199	}
4200
4201	template <class GradientBase>
4202	class RadialFetchPlain : public GradientBase
4203	{
4204	public:
4205	typedef typename GradientBase::Type BlendType;
4206	static void fetch(BlendType buffer, BlendType end,
4207	const Operator op, const* QSpanData *data, qreal det,
4208	qreal delta_det, qreal delta_delta_det, qreal b, qreal delta_b)
4209	{
4210	if (op->radial.extended) {
4211	while (buffer < end) {
4212	BlendType result = GradientBase::null();
4213	if (det >= `0`) {
4214	qreal w = qSqrt(v: det) - b;
4215	if (data->gradient.radial.focal.radius + op->radial.dr * w >= `0`)
4216	result = GradientBase::fetchSingle(data->gradient, w);
4217	}
4218
4219	*buffer = result;
4220
4221	det += delta_det;
4222	delta_det += delta_delta_det;
4223	b += delta_b;
4224
4225	++buffer;
4226	}
4227	} else {
4228	while (buffer < end) {
4229	*buffer++ = GradientBase::fetchSingle(data->gradient, qSqrt(v: det) - b);
4230
4231	det += delta_det;
4232	delta_det += delta_delta_det;
4233	b += delta_b;
4234	}
4235	}
4236	}
4237	};
4238
4239	const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint buffer, const* Operator op, const* QSpanData *data,
4240	int y, int x, int length)
4241	{
4242	return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase32>, uint>(buffer, op, data, y, x, length);
4243	}
4244
4245	static SourceFetchProc qt_fetch_radial_gradient = qt_fetch_radial_gradient_plain;
4246
4247	#if QT_CONFIG(raster_64bit)
4248	const QRgba64 * QT_FASTCALL qt_fetch_radial_gradient_rgb64(QRgba64 buffer, const* Operator op, const* QSpanData *data,
4249	int y, int x, int length)
4250	{
4251	return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase64>, QRgba64>(buffer, op, data, y, x, length);
4252	}
4253	#endif
4254
4255	template <class GradientBase, typename BlendType>
4256	static inline const BlendType * QT_FASTCALL qt_fetch_conical_gradient_template(
4257	BlendType buffer, const* QSpanData *data,
4258	int y, int x, int length)
4259	{
4260	const BlendType *b = buffer;
4261	qreal rx = data->m21 * (y + qreal(`0.5`))
4262	+ data->dx + data->m11 * (x + qreal(`0.5`));
4263	qreal ry = data->m22 * (y + qreal(`0.5`))
4264	+ data->dy + data->m12 * (x + qreal(`0.5`));
4265	bool affine = !data->m13 && !data->m23;
4266
4267	const qreal inv2pi = M_1_PI / `2.0`;
4268
4269	const BlendType *end = buffer + length;
4270	if (affine) {
4271	rx -= data->gradient.conical.center.x;
4272	ry -= data->gradient.conical.center.y;
4273	while (buffer < end) {
4274	qreal angle = qAtan2(y: ry, x: rx) + data->gradient.conical.angle;
4275
4276	buffer = GradientBase::fetchSingle(data->gradient, `1` - angle inv2pi);
4277
4278	rx += data->m11;
4279	ry += data->m12;
4280	++buffer;
4281	}
4282	} else {
4283	qreal rw = data->m23 * (y + qreal(`0.5`))
4284	+ data->m33 + data->m13 * (x + qreal(`0.5`));
4285	if (!rw)
4286	rw = `1`;
4287	while (buffer < end) {
4288	qreal angle = qAtan2(y: ry/rw - data->gradient.conical.center.x,
4289	x: rx/rw - data->gradient.conical.center.y)
4290	+ data->gradient.conical.angle;
4291
4292	buffer = GradientBase::fetchSingle(data->gradient, `1` - angle inv2pi);
4293
4294	rx += data->m11;
4295	ry += data->m12;
4296	rw += data->m13;
4297	if (!rw) {
4298	rw += data->m13;
4299	}
4300	++buffer;
4301	}
4302	}
4303	return b;
4304	}
4305
4306	static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint buffer, const* Operator , const* QSpanData *data,
4307	int y, int x, int length)
4308	{
4309	return qt_fetch_conical_gradient_template<GradientBase32, uint>(buffer, data, y, x, length);
4310	}
4311
4312	#if QT_CONFIG(raster_64bit)
4313	static const QRgba64 * QT_FASTCALL qt_fetch_conical_gradient_rgb64(QRgba64 buffer, const* Operator , const* QSpanData *data,
4314	int y, int x, int length)
4315	{
4316	return qt_fetch_conical_gradient_template<GradientBase64, QRgba64>(buffer, data, y, x, length);
4317	}
4318	#endif
4319
4320	extern CompositionFunctionSolid qt_functionForModeSolid_C[];
4321	extern CompositionFunctionSolid64 qt_functionForModeSolid64_C[];
4322
4323	static const CompositionFunctionSolid *functionForModeSolid = qt_functionForModeSolid_C;
4324	#if QT_CONFIG(raster_64bit)
4325	static const CompositionFunctionSolid64 *functionForModeSolid64 = qt_functionForModeSolid64_C;
4326	#endif
4327
4328	extern CompositionFunction qt_functionForMode_C[];
4329	extern CompositionFunction64 qt_functionForMode64_C[];
4330
4331	static const CompositionFunction *functionForMode = qt_functionForMode_C;
4332	#if QT_CONFIG(raster_64bit)
4333	static const CompositionFunction64 *functionForMode64 = qt_functionForMode64_C;
4334	#endif
4335
4336	static TextureBlendType getBlendType(const QSpanData *data)
4337	{
4338	TextureBlendType ft;
4339	if (data->txop <= QTransform::TxTranslate)
4340	if (data->texture.type == QTextureData::Tiled)
4341	ft = BlendTiled;
4342	else
4343	ft = BlendUntransformed;
4344	else if (data->bilinear)
4345	if (data->texture.type == QTextureData::Tiled)
4346	ft = BlendTransformedBilinearTiled;
4347	else
4348	ft = BlendTransformedBilinear;
4349	else
4350	if (data->texture.type == QTextureData::Tiled)
4351	ft = BlendTransformedTiled;
4352	else
4353	ft = BlendTransformed;
4354	return ft;
4355	}
4356
4357	static inline Operator getOperator(const QSpanData data, const* QSpan spans, int* spanCount)
4358	{
4359	Operator op;
4360	bool solidSource = false;
4361
4362	switch(data->type) {
4363	case QSpanData::Solid:
4364	solidSource = data->solidColor.isOpaque();
4365	op.srcFetch = nullptr;
4366	#if QT_CONFIG(raster_64bit)
4367	op.srcFetch64 = nullptr;
4368	#endif
4369	break;
4370	case QSpanData::LinearGradient:
4371	solidSource = !data->gradient.alphaColor;
4372	getLinearGradientValues(v: &op.linear, data);
4373	op.srcFetch = qt_fetch_linear_gradient;
4374	#if QT_CONFIG(raster_64bit)
4375	op.srcFetch64 = qt_fetch_linear_gradient_rgb64;
4376	#endif
4377	break;
4378	case QSpanData::RadialGradient:
4379	solidSource = !data->gradient.alphaColor;
4380	getRadialGradientValues(v: &op.radial, data);
4381	op.srcFetch = qt_fetch_radial_gradient;
4382	#if QT_CONFIG(raster_64bit)
4383	op.srcFetch64 = qt_fetch_radial_gradient_rgb64;
4384	#endif
4385	break;
4386	case QSpanData::ConicalGradient:
4387	solidSource = !data->gradient.alphaColor;
4388	op.srcFetch = qt_fetch_conical_gradient;
4389	#if QT_CONFIG(raster_64bit)
4390	op.srcFetch64 = qt_fetch_conical_gradient_rgb64;
4391	#endif
4392	break;
4393	case QSpanData::Texture:
4394	solidSource = !data->texture.hasAlpha;
4395	op.srcFetch = getSourceFetch(blendType: getBlendType(data), format: data->texture.format);
4396	#if QT_CONFIG(raster_64bit)
4397	op.srcFetch64 = getSourceFetch64(blendType: getBlendType(data), format: data->texture.format);;
4398	#endif
4399	break;
4400	default:
4401	Q_UNREACHABLE();
4402	break;
4403	}
4404	#if !QT_CONFIG(raster_64bit)
4405	op.srcFetch64 = `0`;
4406	#endif
4407
4408	op.mode = data->rasterBuffer->compositionMode;
4409	if (op.mode == QPainter::CompositionMode_SourceOver && solidSource)
4410	op.mode = QPainter::CompositionMode_Source;
4411
4412	op.destFetch = destFetchProc[data->rasterBuffer->format];
4413	#if QT_CONFIG(raster_64bit)
4414	op.destFetch64 = destFetchProc64[data->rasterBuffer->format];
4415	#else
4416	op.destFetch64 = `0`;
4417	#endif
4418	if (op.mode == QPainter::CompositionMode_Source &&
4419	(data->type != QSpanData::Texture \|\| data->texture.const_alpha == `256`)) {
4420	const QSpan *lastSpan = spans + spanCount;
4421	bool alphaSpans = false;
4422	while (spans < lastSpan) {
4423	if (spans->coverage != `255`) {
4424	alphaSpans = true;
4425	break;
4426	}
4427	++spans;
4428	}
4429	if (!alphaSpans && spanCount > `0`) {
4430	// If all spans are opaque we do not need to fetch dest.
4431	// But don't clear passthrough destFetch as they are just as fast and save destStore.
4432	if (op.destFetch != destFetchARGB32P)
4433	op.destFetch = destFetchUndefined;
4434	#if QT_CONFIG(raster_64bit)
4435	if (op.destFetch64 != destFetchRGB64)
4436	op.destFetch64 = destFetch64Undefined;
4437	#endif
4438	}
4439	}
4440
4441	op.destStore = destStoreProc[data->rasterBuffer->format];
4442	op.funcSolid = functionForModeSolid[op.mode];
4443	op.func = functionForMode[op.mode];
4444	#if QT_CONFIG(raster_64bit)
4445	op.destStore64 = destStoreProc64[data->rasterBuffer->format];
4446	op.funcSolid64 = functionForModeSolid64[op.mode];
4447	op.func64 = functionForMode64[op.mode];
4448	#else
4449	op.destStore64 = `0`;
4450	op.funcSolid64 = `0`;
4451	op.func64 = `0`;
4452	#endif
4453
4454	return op;
4455	}
4456
4457	static void spanfill_from_first(QRasterBuffer rasterBuffer, QPixelLayout::BPP bpp, int* x, int y, int length)
4458	{
4459	switch (bpp) {
4460	case QPixelLayout::BPP64: {
4461	quint64 dest = reinterpret_cast<quint64 >(rasterBuffer->scanLine(y)) + x;
4462	qt_memfill_template(dest: dest + `1`, color: dest[`0`], count: length - `1`);
4463	break;
4464	}
4465	case QPixelLayout::BPP32: {
4466	quint32 dest = reinterpret_cast<quint32 >(rasterBuffer->scanLine(y)) + x;
4467	qt_memfill_template(dest: dest + `1`, color: dest[`0`], count: length - `1`);
4468	break;
4469	}
4470	case QPixelLayout::BPP24: {
4471	quint24 dest = reinterpret_cast<quint24 >(rasterBuffer->scanLine(y)) + x;
4472	qt_memfill_template(dest: dest + `1`, color: dest[`0`], count: length - `1`);
4473	break;
4474	}
4475	case QPixelLayout::BPP16: {
4476	quint16 dest = reinterpret_cast<quint16 >(rasterBuffer->scanLine(y)) + x;
4477	qt_memfill_template(dest: dest + `1`, color: dest[`0`], count: length - `1`);
4478	break;
4479	}
4480	case QPixelLayout::BPP8: {
4481	uchar *dest = rasterBuffer->scanLine(y) + x;
4482	memset(s: dest + `1`, c: dest[`0`], n: length - `1`);
4483	break;
4484	}
4485	default:
4486	Q_UNREACHABLE();
4487	}
4488	}
4489
4490
4491	// -------------------- blend methods ---------------------
4492
4493	static void blend_color_generic(int count, const QSpan spans, void* *userData)
4494	{
4495	QSpanData data = reinterpret_cast<QSpanData >(userData);
4496	uint buffer[BufferSize];
4497	Operator op = getOperator(data, spans: nullptr, spanCount: `0`);
4498	const uint color = data->solidColor.toArgb32();
4499	const bool solidFill = op.mode == QPainter::CompositionMode_Source;
4500	const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
4501
4502	while (count--) {
4503	int x = spans->x;
4504	int length = spans->len;
4505	if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == `255` && length) {
4506	// If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
4507	op.destStore(data->rasterBuffer, x, spans->y, &color, `1`);
4508	spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans->y, length);
4509	length = `0`;
4510	}
4511
4512	while (length) {
4513	int l = qMin(a: BufferSize, b: length);
4514	uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
4515	op.funcSolid(dest, l, color, spans->coverage);
4516	if (op.destStore)
4517	op.destStore(data->rasterBuffer, x, spans->y, dest, l);
4518	length -= l;
4519	x += l;
4520	}
4521	++spans;
4522	}
4523	}
4524
4525	static void blend_color_argb(int count, const QSpan spans, void* *userData)
4526	{
4527	QSpanData data = reinterpret_cast<QSpanData >(userData);
4528
4529	const Operator op = getOperator(data, spans: nullptr, spanCount: `0`);
4530	const uint color = data->solidColor.toArgb32();
4531
4532	if (op.mode == QPainter::CompositionMode_Source) {
4533	// inline for performance
4534	while (count--) {
4535	uint target = ((uint )data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
4536	if (spans->coverage == `255`) {
4537	qt_memfill(dest: target, color, count: spans->len);
4538	#ifdef __SSE2__
4539	} else if (spans->len > `16`) {
4540	op.funcSolid(target, spans->len, color, spans->coverage);
4541	#endif
4542	} else {
4543	uint c = BYTE_MUL(x: color, a: spans->coverage);
4544	int ialpha = `255` - spans->coverage;
4545	for (int i = `0`; i < spans->len; ++i)
4546	target[i] = c + BYTE_MUL(x: target[i], a: ialpha);
4547	}
4548	++spans;
4549	}
4550	return;
4551	}
4552
4553	while (count--) {
4554	uint target = ((uint )data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
4555	op.funcSolid(target, spans->len, color, spans->coverage);
4556	++spans;
4557	}
4558	}
4559
4560	void blend_color_generic_rgb64(int count, const QSpan spans, void* *userData)
4561	{
4562	#if QT_CONFIG(raster_64bit)
4563	QSpanData data = reinterpret_cast<QSpanData >(userData);
4564	Operator op = getOperator(data, spans: nullptr, spanCount: `0`);
4565	if (!op.funcSolid64) {
4566	qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit");
4567	return blend_color_generic(count, spans, userData);
4568	}
4569
4570	alignas(`8`) QRgba64 buffer[BufferSize];
4571	const QRgba64 color = data->solidColor;
4572	const bool solidFill = op.mode == QPainter::CompositionMode_Source;
4573	const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
4574
4575	while (count--) {
4576	int x = spans->x;
4577	int length = spans->len;
4578	if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == `255` && length && op.destStore64) {
4579	// If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
4580	op.destStore64(data->rasterBuffer, x, spans->y, &color, `1`);
4581	spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans->y, length);
4582	length = `0`;
4583	}
4584
4585	while (length) {
4586	int l = qMin(a: BufferSize, b: length);
4587	QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
4588	op.funcSolid64(dest, l, color, spans->coverage);
4589	if (op.destStore64)
4590	op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
4591	length -= l;
4592	x += l;
4593	}
4594	++spans;
4595	}
4596	#else
4597	blend_color_generic(count, spans, userData);
4598	#endif
4599	}
4600
4601	static void blend_color_rgb16(int count, const QSpan spans, void* *userData)
4602	{
4603	QSpanData data = reinterpret_cast<QSpanData >(userData);
4604
4605	/*
4606	We duplicate a little logic from getOperator() and calculate the
4607	composition mode directly. This allows blend_color_rgb16 to be used
4608	from qt_gradient_quint16 with minimal overhead.
4609	*/
4610	QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
4611	if (mode == QPainter::CompositionMode_SourceOver && data->solidColor.isOpaque())
4612	mode = QPainter::CompositionMode_Source;
4613
4614	if (mode == QPainter::CompositionMode_Source) {
4615	// inline for performance
4616	ushort c = data->solidColor.toRgb16();
4617	for (; count--; spans++) {
4618	if (!spans->len)
4619	continue;
4620	ushort target = ((ushort )data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
4621	if (spans->coverage == `255`) {
4622	qt_memfill(dest: target, color: c, count: spans->len);
4623	} else {
4624	ushort color = BYTE_MUL_RGB16(x: c, a: spans->coverage);
4625	int ialpha = `255` - spans->coverage;
4626	const ushort *end = target + spans->len;
4627	while (target < end) {
4628	target = color + BYTE_MUL_RGB16(x: target, a: ialpha);
4629	++target;
4630	}
4631	}
4632	}
4633	return;
4634	}
4635
4636	if (mode == QPainter::CompositionMode_SourceOver) {
4637	for (; count--; spans++) {
4638	if (!spans->len)
4639	continue;
4640	uint color = BYTE_MUL(x: data->solidColor.toArgb32(), a: spans->coverage);
4641	int ialpha = qAlpha(rgb: ~color);
4642	ushort c = qConvertRgb32To16(c: color);
4643	ushort target = ((ushort )data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
4644	int len = spans->len;
4645	bool pre = (((quintptr)target) & `0x3`) != `0`;
4646	bool post = false;
4647	if (pre) {
4648	// skip to word boundary
4649	target = c + BYTE_MUL_RGB16(x: target, a: ialpha);
4650	++target;
4651	--len;
4652	}
4653	if (len & `0x1`) {
4654	post = true;
4655	--len;
4656	}
4657	uint target32 = (uint)target;
4658	uint c32 = c \| (c<<`16`);
4659	len >>= `1`;
4660	uint salpha = (ialpha+`1`) >> `3`; // calculate here rather than in loop
4661	while (len--) {
4662	// blend full words
4663	target32 = c32 + BYTE_MUL_RGB16_32(x: target32, a: salpha);
4664	++target32;
4665	target += `2`;
4666	}
4667	if (post) {
4668	// one last pixel beyond a full word
4669	target = c + BYTE_MUL_RGB16(x: target, a: ialpha);
4670	}
4671	}
4672	return;
4673	}
4674
4675	blend_color_generic(count, spans, userData);
4676	}
4677
4678	template <typename T>
4679	void handleSpans(int count, const QSpan spans, const* QSpanData *data, T &handler)
4680	{
4681	uint const_alpha = `256`;
4682	if (data->type == QSpanData::Texture)
4683	const_alpha = data->texture.const_alpha;
4684
4685	int coverage = `0`;
4686	while (count) {
4687	if (!spans->len) {
4688	++spans;
4689	--count;
4690	continue;
4691	}
4692	int x = spans->x;
4693	const int y = spans->y;
4694	int right = x + spans->len;
4695
4696	// compute length of adjacent spans
4697	for (int i = `1`; i < count && spans[i].y == y && spans[i].x == right; ++i)
4698	right += spans[i].len;
4699	int length = right - x;
4700
4701	while (length) {
4702	int l = qMin(a: BufferSize, b: length);
4703	length -= l;
4704
4705	int process_length = l;
4706	int process_x = x;
4707
4708	const typename T::BlendType *src = handler.fetch(process_x, y, process_length);
4709	int offset = `0`;
4710	while (l > `0`) {
4711	if (x == spans->x) // new span?
4712	coverage = (spans->coverage * const_alpha) >> `8`;
4713
4714	int right = spans->x + spans->len;
4715	int len = qMin(a: l, b: right - x);
4716
4717	handler.process(x, y, len, coverage, src, offset);
4718
4719	l -= len;
4720	x += len;
4721	offset += len;
4722
4723	if (x == right) { // done with current span?
4724	++spans;
4725	--count;
4726	}
4727	}
4728	handler.store(process_x, y, process_length);
4729	}
4730	}
4731	}
4732
4733	template<typename T>
4734	struct QBlendBase
4735	{
4736	typedef T BlendType;
4737	QBlendBase(QSpanData d, const* Operator &o)
4738	: data(d)
4739	, op (o)
4740	, dest(nullptr)
4741	{
4742	}
4743
4744	QSpanData *data;
4745	Operator op;
4746
4747	BlendType *dest;
4748
4749	alignas(`8`) BlendType buffer[BufferSize];
4750	alignas(`8`) BlendType src_buffer[BufferSize];
4751	};
4752
4753	class BlendSrcGeneric : public QBlendBase<uint>
4754	{
4755	public:
4756	BlendSrcGeneric(QSpanData d, const* Operator &o)
4757	: QBlendBase<uint>(d, o)
4758	{
4759	}
4760
4761	const uint fetch(int* x, int y, int len)
4762	{
4763	dest = op.destFetch(buffer, data->rasterBuffer, x, y, len);
4764	return op.srcFetch(src_buffer, &op, data, y, x, len);
4765	}
4766
4767	void process(int, int, int len, int coverage, const uint src, int* offset)
4768	{
4769	op.func(dest + offset, src + offset, len, coverage);
4770	}
4771
4772	void store(int x, int y, int len)
4773	{
4774	if (op.destStore)
4775	op.destStore(data->rasterBuffer, x, y, dest, len);
4776	}
4777	};
4778
4779	#if QT_CONFIG(raster_64bit)
4780	class BlendSrcGenericRGB64 : public QBlendBase<QRgba64>
4781	{
4782	public:
4783	BlendSrcGenericRGB64(QSpanData d, const* Operator &o)
4784	: QBlendBase<QRgba64>(d, o)
4785	{
4786	}
4787
4788	bool isSupported() const
4789	{
4790	return op.func64 && op.destFetch64;
4791	}
4792
4793	const QRgba64 fetch(int* x, int y, int len)
4794	{
4795	dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len);
4796	return op.srcFetch64(src_buffer, &op, data, y, x, len);
4797	}
4798
4799	void process(int, int, int len, int coverage, const QRgba64 src, int* offset)
4800	{
4801	op.func64(dest + offset, src + offset, len, coverage);
4802	}
4803
4804	void store(int x, int y, int len)
4805	{
4806	if (op.destStore64)
4807	op.destStore64(data->rasterBuffer, x, y, dest, len);
4808	}
4809	};
4810	#endif
4811
4812	static void blend_src_generic(int count, const QSpan spans, void* *userData)
4813	{
4814	QSpanData data = reinterpret_cast<QSpanData >(userData);
4815	BlendSrcGeneric blend(data, getOperator(data, spans, spanCount: count));
4816	handleSpans(count, spans, data, handler&: blend);
4817	}
4818
4819	#if QT_CONFIG(raster_64bit)
4820	static void blend_src_generic_rgb64(int count, const QSpan spans, void* *userData)
4821	{
4822	QSpanData data = reinterpret_cast<QSpanData >(userData);
4823	Operator op = getOperator(data, spans, spanCount: count);
4824	BlendSrcGenericRGB64 blend64(data, op);
4825	if (blend64.isSupported())
4826	handleSpans(count, spans, data, handler&: blend64);
4827	else {
4828	qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4829	BlendSrcGeneric blend32(data, op);
4830	handleSpans(count, spans, data, handler&: blend32);
4831	}
4832	}
4833	#endif
4834
4835	static void blend_untransformed_generic(int count, const QSpan spans, void* *userData)
4836	{
4837	QSpanData data = reinterpret_cast<QSpanData >(userData);
4838
4839	uint buffer[BufferSize];
4840	uint src_buffer[BufferSize];
4841	Operator op = getOperator(data, spans, spanCount: count);
4842
4843	const int image_width = data->texture.width;
4844	const int image_height = data->texture.height;
4845	int xoff = -qRound(d: -data->dx);
4846	int yoff = -qRound(d: -data->dy);
4847
4848	for (; count--; spans++) {
4849	if (!spans->len)
4850	continue;
4851	int x = spans->x;
4852	int length = spans->len;
4853	int sx = xoff + x;
4854	int sy = yoff + spans->y;
4855	if (sy >= `0` && sy < image_height && sx < image_width) {
4856	if (sx < `0`) {
4857	x -= sx;
4858	length += sx;
4859	sx = `0`;
4860	}
4861	if (sx + length > image_width)
4862	length = image_width - sx;
4863	if (length > `0`) {
4864	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
4865	while (length) {
4866	int l = qMin(a: BufferSize, b: length);
4867	const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
4868	uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
4869	op.func(dest, src, l, coverage);
4870	if (op.destStore)
4871	op.destStore(data->rasterBuffer, x, spans->y, dest, l);
4872	x += l;
4873	sx += l;
4874	length -= l;
4875	}
4876	}
4877	}
4878	}
4879	}
4880
4881	#if QT_CONFIG(raster_64bit)
4882	static void blend_untransformed_generic_rgb64(int count, const QSpan spans, void* *userData)
4883	{
4884	QSpanData data = reinterpret_cast<QSpanData >(userData);
4885
4886	Operator op = getOperator(data, spans, spanCount: count);
4887	if (!op.func64) {
4888	qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4889	return blend_untransformed_generic(count, spans, userData);
4890	}
4891	alignas(`8`) QRgba64 buffer[BufferSize];
4892	alignas(`8`) QRgba64 src_buffer[BufferSize];
4893
4894	const int image_width = data->texture.width;
4895	const int image_height = data->texture.height;
4896	int xoff = -qRound(d: -data->dx);
4897	int yoff = -qRound(d: -data->dy);
4898
4899	for (; count--; spans++) {
4900	if (!spans->len)
4901	continue;
4902	int x = spans->x;
4903	int length = spans->len;
4904	int sx = xoff + x;
4905	int sy = yoff + spans->y;
4906	if (sy >= `0` && sy < image_height && sx < image_width) {
4907	if (sx < `0`) {
4908	x -= sx;
4909	length += sx;
4910	sx = `0`;
4911	}
4912	if (sx + length > image_width)
4913	length = image_width - sx;
4914	if (length > `0`) {
4915	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
4916	while (length) {
4917	int l = qMin(a: BufferSize, b: length);
4918	const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4919	QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
4920	op.func64(dest, src, l, coverage);
4921	if (op.destStore64)
4922	op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
4923	x += l;
4924	sx += l;
4925	length -= l;
4926	}
4927	}
4928	}
4929	}
4930	}
4931	#endif
4932
4933	static void blend_untransformed_argb(int count, const QSpan spans, void* *userData)
4934	{
4935	QSpanData data = reinterpret_cast<QSpanData >(userData);
4936	if (data->texture.format != QImage::Format_ARGB32_Premultiplied
4937	&& data->texture.format != QImage::Format_RGB32) {
4938	blend_untransformed_generic(count, spans, userData);
4939	return;
4940	}
4941
4942	Operator op = getOperator(data, spans, spanCount: count);
4943
4944	const int image_width = data->texture.width;
4945	const int image_height = data->texture.height;
4946	int xoff = -qRound(d: -data->dx);
4947	int yoff = -qRound(d: -data->dy);
4948
4949	for (; count--; spans++) {
4950	if (!spans->len)
4951	continue;
4952	int x = spans->x;
4953	int length = spans->len;
4954	int sx = xoff + x;
4955	int sy = yoff + spans->y;
4956	if (sy >= `0` && sy < image_height && sx < image_width) {
4957	if (sx < `0`) {
4958	x -= sx;
4959	length += sx;
4960	sx = `0`;
4961	}
4962	if (sx + length > image_width)
4963	length = image_width - sx;
4964	if (length > `0`) {
4965	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
4966	const uint src = (const* uint *)data->texture.scanLine(y: sy) + sx;
4967	uint dest = ((uint )data->rasterBuffer->scanLine(y: spans->y)) + x;
4968	op.func(dest, src, length, coverage);
4969	}
4970	}
4971	}
4972	}
4973
4974	static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a,
4975	quint16 y, quint8 b)
4976	{
4977	quint16 t = ((((x & `0x07e0`) * a) + ((y & `0x07e0`) * b)) >> `5`) & `0x07e0`;
4978	t \|= ((((x & `0xf81f`) * a) + ((y & `0xf81f`) * b)) >> `5`) & `0xf81f`;
4979
4980	return t;
4981	}
4982
4983	static inline quint32 interpolate_pixel_rgb16x2_255(quint32 x, quint8 a,
4984	quint32 y, quint8 b)
4985	{
4986	uint t;
4987	t = ((((x & `0xf81f07e0`) >> `5`) * a) + (((y & `0xf81f07e0`) >> `5`) * b)) & `0xf81f07e0`;
4988	t \|= ((((x & `0x07e0f81f`) * a) + ((y & `0x07e0f81f`) * b)) >> `5`) & `0x07e0f81f`;
4989	return t;
4990	}
4991
4992	static inline void blend_sourceOver_rgb16_rgb16(quint16 *Q_DECL_RESTRICT dest,
4993	const quint16 *Q_DECL_RESTRICT src,
4994	int length,
4995	const quint8 alpha,
4996	const quint8 ialpha)
4997	{
4998	const int dstAlign = ((quintptr)dest) & `0x3`;
4999	if (dstAlign) {
5000	dest = interpolate_pixel_rgb16_255(x: src, a: alpha, y: *dest, b: ialpha);
5001	++dest;
5002	++src;
5003	--length;
5004	}
5005	const int srcAlign = ((quintptr)src) & `0x3`;
5006	int length32 = length >> `1`;
5007	if (length32 && srcAlign == `0`) {
5008	while (length32--) {
5009	const quint32 src32 = reinterpret_cast<const* quint32*>(src);
5010	quint32 dest32 = reinterpret_cast<quint32>(dest);
5011	dest32 = interpolate_pixel_rgb16x2_255(x: src32, a: alpha,
5012	y: *dest32, b: ialpha);
5013	dest += `2`;
5014	src += `2`;
5015	}
5016	length &= `0x1`;
5017	}
5018	while (length--) {
5019	dest = interpolate_pixel_rgb16_255(x: src, a: alpha, y: *dest, b: ialpha);
5020	++dest;
5021	++src;
5022	}
5023	}
5024
5025	static void blend_untransformed_rgb565(int count, const QSpan spans, void* *userData)
5026	{
5027	QSpanData data = reinterpret_cast<QSpanData>(userData);
5028	QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
5029
5030	if (data->texture.format != QImage::Format_RGB16
5031	\|\| (mode != QPainter::CompositionMode_SourceOver
5032	&& mode != QPainter::CompositionMode_Source))
5033	{
5034	blend_untransformed_generic(count, spans, userData);
5035	return;
5036	}
5037
5038	const int image_width = data->texture.width;
5039	const int image_height = data->texture.height;
5040	int xoff = -qRound(d: -data->dx);
5041	int yoff = -qRound(d: -data->dy);
5042
5043	const QSpan *end = spans + count;
5044	while (spans < end) {
5045	if (!spans->len) {
5046	++spans;
5047	continue;
5048	}
5049	const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> `8`;
5050	if (coverage == `0`) {
5051	++spans;
5052	continue;
5053	}
5054
5055	int x = spans->x;
5056	int length = spans->len;
5057	int sx = xoff + x;
5058	int sy = yoff + spans->y;
5059	if (sy >= `0` && sy < image_height && sx < image_width) {
5060	if (sx < `0`) {
5061	x -= sx;
5062	length += sx;
5063	sx = `0`;
5064	}
5065	if (sx + length > image_width)
5066	length = image_width - sx;
5067	if (length > `0`) {
5068	quint16 dest = (quint16 )data->rasterBuffer->scanLine(y: spans->y) + x;
5069	const quint16 src = (const* quint16 *)data->texture.scanLine(y: sy) + sx;
5070	if (coverage == `255`) {
5071	memcpy(dest: dest, src: src, n: length * sizeof(quint16));
5072	} else {
5073	const quint8 alpha = (coverage + `1`) >> `3`;
5074	const quint8 ialpha = `0x20` - alpha;
5075	if (alpha > `0`)
5076	blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha);
5077	}
5078	}
5079	}
5080	++spans;
5081	}
5082	}
5083
5084	static void blend_tiled_generic(int count, const QSpan spans, void* *userData)
5085	{
5086	QSpanData data = reinterpret_cast<QSpanData >(userData);
5087
5088	uint buffer[BufferSize];
5089	uint src_buffer[BufferSize];
5090	Operator op = getOperator(data, spans, spanCount: count);
5091
5092	const int image_width = data->texture.width;
5093	const int image_height = data->texture.height;
5094	int xoff = -qRound(d: -data->dx) % image_width;
5095	int yoff = -qRound(d: -data->dy) % image_height;
5096
5097	if (xoff < `0`)
5098	xoff += image_width;
5099	if (yoff < `0`)
5100	yoff += image_height;
5101
5102	while (count--) {
5103	int x = spans->x;
5104	int length = spans->len;
5105	int sx = (xoff + spans->x) % image_width;
5106	int sy = (spans->y + yoff) % image_height;
5107	if (sx < `0`)
5108	sx += image_width;
5109	if (sy < `0`)
5110	sy += image_height;
5111
5112	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
5113	while (length) {
5114	int l = qMin(a: image_width - sx, b: length);
5115	if (BufferSize < l)
5116	l = BufferSize;
5117	const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
5118	uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
5119	op.func(dest, src, l, coverage);
5120	if (op.destStore)
5121	op.destStore(data->rasterBuffer, x, spans->y, dest, l);
5122	x += l;
5123	sx += l;
5124	length -= l;
5125	if (sx >= image_width)
5126	sx = `0`;
5127	}
5128	++spans;
5129	}
5130	}
5131
5132	#if QT_CONFIG(raster_64bit)
5133	static void blend_tiled_generic_rgb64(int count, const QSpan spans, void* *userData)
5134	{
5135	QSpanData data = reinterpret_cast<QSpanData >(userData);
5136
5137	Operator op = getOperator(data, spans, spanCount: count);
5138	if (!op.func64) {
5139	qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
5140	return blend_tiled_generic(count, spans, userData);
5141	}
5142	alignas(`8`) QRgba64 buffer[BufferSize];
5143	alignas(`8`) QRgba64 src_buffer[BufferSize];
5144
5145	const int image_width = data->texture.width;
5146	const int image_height = data->texture.height;
5147	int xoff = -qRound(d: -data->dx) % image_width;
5148	int yoff = -qRound(d: -data->dy) % image_height;
5149
5150	if (xoff < `0`)
5151	xoff += image_width;
5152	if (yoff < `0`)
5153	yoff += image_height;
5154
5155	bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32;
5156	bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64;
5157	if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 \|\| isBpp64)) {
5158	// If destination isn't blended into the result, we can do the tiling directly on destination pixels.
5159	while (count--) {
5160	int x = spans->x;
5161	int y = spans->y;
5162	int length = spans->len;
5163	int sx = (xoff + spans->x) % image_width;
5164	int sy = (spans->y + yoff) % image_height;
5165	if (sx < `0`)
5166	sx += image_width;
5167	if (sy < `0`)
5168	sy += image_height;
5169
5170	int sl = qMin(a: image_width, b: length);
5171	if (sx > `0` && sl > `0`) {
5172	int l = qMin(a: image_width - sx, b: sl);
5173	const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
5174	op.destStore64(data->rasterBuffer, x, y, src, l);
5175	x += l;
5176	sx += l;
5177	sl -= l;
5178	if (sx >= image_width)
5179	sx = `0`;
5180	}
5181	if (sl > `0`) {
5182	Q_ASSERT(sx == `0`);
5183	const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, sl);
5184	op.destStore64(data->rasterBuffer, x, y, src, sl);
5185	x += sl;
5186	sx += sl;
5187	sl -= sl;
5188	if (sx >= image_width)
5189	sx = `0`;
5190	}
5191	if (isBpp32) {
5192	uint dest = reinterpret_cast<uint >(data->rasterBuffer->scanLine(y)) + x - image_width;
5193	for (int i = image_width; i < length; ++i)
5194	dest[i] = dest[i - image_width];
5195	} else {
5196	quint64 dest = reinterpret_cast<quint64 >(data->rasterBuffer->scanLine(y)) + x - image_width;
5197	for (int i = image_width; i < length; ++i)
5198	dest[i] = dest[i - image_width];
5199	}
5200	++spans;
5201	}
5202	return;
5203	}
5204
5205	while (count--) {
5206	int x = spans->x;
5207	int length = spans->len;
5208	int sx = (xoff + spans->x) % image_width;
5209	int sy = (spans->y + yoff) % image_height;
5210	if (sx < `0`)
5211	sx += image_width;
5212	if (sy < `0`)
5213	sy += image_height;
5214
5215	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
5216	while (length) {
5217	int l = qMin(a: image_width - sx, b: length);
5218	if (BufferSize < l)
5219	l = BufferSize;
5220	const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
5221	QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
5222	op.func64(dest, src, l, coverage);
5223	if (op.destStore64)
5224	op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
5225	x += l;
5226	sx += l;
5227	length -= l;
5228	if (sx >= image_width)
5229	sx = `0`;
5230	}
5231	++spans;
5232	}
5233	}
5234	#endif
5235
5236	static void blend_tiled_argb(int count, const QSpan spans, void* *userData)
5237	{
5238	QSpanData data = reinterpret_cast<QSpanData >(userData);
5239	if (data->texture.format != QImage::Format_ARGB32_Premultiplied
5240	&& data->texture.format != QImage::Format_RGB32) {
5241	blend_tiled_generic(count, spans, userData);
5242	return;
5243	}
5244
5245	Operator op = getOperator(data, spans, spanCount: count);
5246
5247	int image_width = data->texture.width;
5248	int image_height = data->texture.height;
5249	int xoff = -qRound(d: -data->dx) % image_width;
5250	int yoff = -qRound(d: -data->dy) % image_height;
5251
5252	if (xoff < `0`)
5253	xoff += image_width;
5254	if (yoff < `0`)
5255	yoff += image_height;
5256
5257	while (count--) {
5258	int x = spans->x;
5259	int length = spans->len;
5260	int sx = (xoff + spans->x) % image_width;
5261	int sy = (spans->y + yoff) % image_height;
5262	if (sx < `0`)
5263	sx += image_width;
5264	if (sy < `0`)
5265	sy += image_height;
5266
5267	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
5268	while (length) {
5269	int l = qMin(a: image_width - sx, b: length);
5270	if (BufferSize < l)
5271	l = BufferSize;
5272	const uint src = (const* uint *)data->texture.scanLine(y: sy) + sx;
5273	uint dest = ((uint )data->rasterBuffer->scanLine(y: spans->y)) + x;
5274	op.func(dest, src, l, coverage);
5275	x += l;
5276	sx += l;
5277	length -= l;
5278	if (sx >= image_width)
5279	sx = `0`;
5280	}
5281	++spans;
5282	}
5283	}
5284
5285	static void blend_tiled_rgb565(int count, const QSpan spans, void* *userData)
5286	{
5287	QSpanData data = reinterpret_cast<QSpanData>(userData);
5288	QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
5289
5290	if (data->texture.format != QImage::Format_RGB16
5291	\|\| (mode != QPainter::CompositionMode_SourceOver
5292	&& mode != QPainter::CompositionMode_Source))
5293	{
5294	blend_tiled_generic(count, spans, userData);
5295	return;
5296	}
5297
5298	const int image_width = data->texture.width;
5299	const int image_height = data->texture.height;
5300	int xoff = -qRound(d: -data->dx) % image_width;
5301	int yoff = -qRound(d: -data->dy) % image_height;
5302
5303	if (xoff < `0`)
5304	xoff += image_width;
5305	if (yoff < `0`)
5306	yoff += image_height;
5307
5308	while (count--) {
5309	const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> `8`;
5310	if (coverage == `0`) {
5311	++spans;
5312	continue;
5313	}
5314
5315	int x = spans->x;
5316	int length = spans->len;
5317	int sx = (xoff + spans->x) % image_width;
5318	int sy = (spans->y + yoff) % image_height;
5319	if (sx < `0`)
5320	sx += image_width;
5321	if (sy < `0`)
5322	sy += image_height;
5323
5324	if (coverage == `255`) {
5325	// Copy the first texture block
5326	length = qMin(a: image_width,b: length);
5327	int tx = x;
5328	while (length) {
5329	int l = qMin(a: image_width - sx, b: length);
5330	if (BufferSize < l)
5331	l = BufferSize;
5332	quint16 dest = ((quint16 )data->rasterBuffer->scanLine(y: spans->y)) + tx;
5333	const quint16 src = (const* quint16 *)data->texture.scanLine(y: sy) + sx;
5334	memcpy(dest: dest, src: src, n: l * sizeof(quint16));
5335	length -= l;
5336	tx += l;
5337	sx += l;
5338	if (sx >= image_width)
5339	sx = `0`;
5340	}
5341
5342	// Now use the rasterBuffer as the source of the texture,
5343	// We can now progressively copy larger blocks
5344	// - Less cpu time in code figuring out what to copy
5345	// We are dealing with one block of data
5346	// - More likely to fit in the cache
5347	// - can use memcpy
5348	int copy_image_width = qMin(a: image_width, b: int(spans->len));
5349	length = spans->len - copy_image_width;
5350	quint16 src = ((quint16 )data->rasterBuffer->scanLine(y: spans->y)) + x;
5351	quint16 *dest = src + copy_image_width;
5352	while (copy_image_width < length) {
5353	memcpy(dest: dest, src: src, n: copy_image_width * sizeof(quint16));
5354	dest += copy_image_width;
5355	length -= copy_image_width;
5356	copy_image_width *= `2`;
5357	}
5358	if (length > `0`)
5359	memcpy(dest: dest, src: src, n: length * sizeof(quint16));
5360	} else {
5361	const quint8 alpha = (coverage + `1`) >> `3`;
5362	const quint8 ialpha = `0x20` - alpha;
5363	if (alpha > `0`) {
5364	while (length) {
5365	int l = qMin(a: image_width - sx, b: length);
5366	if (BufferSize < l)
5367	l = BufferSize;
5368	quint16 dest = ((quint16 )data->rasterBuffer->scanLine(y: spans->y)) + x;
5369	const quint16 src = (const* quint16 *)data->texture.scanLine(y: sy) + sx;
5370	blend_sourceOver_rgb16_rgb16(dest, src, length: l, alpha, ialpha);
5371	x += l;
5372	sx += l;
5373	length -= l;
5374	if (sx >= image_width)
5375	sx = `0`;
5376	}
5377	}
5378	}
5379	++spans;
5380	}
5381	}
5382
5383	/ Image formats here are target formats /
5384	static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = {
5385	blend_untransformed_argb, // Untransformed
5386	blend_tiled_argb, // Tiled
5387	blend_src_generic, // Transformed
5388	blend_src_generic, // TransformedTiled
5389	blend_src_generic, // TransformedBilinear
5390	blend_src_generic // TransformedBilinearTiled
5391	};
5392
5393	static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = {
5394	blend_untransformed_rgb565, // Untransformed
5395	blend_tiled_rgb565, // Tiled
5396	blend_src_generic, // Transformed
5397	blend_src_generic, // TransformedTiled
5398	blend_src_generic, // TransformedBilinear
5399	blend_src_generic // TransformedBilinearTiled
5400	};
5401
5402	static const ProcessSpans processTextureSpansGeneric[NBlendTypes] = {
5403	blend_untransformed_generic, // Untransformed
5404	blend_tiled_generic, // Tiled
5405	blend_src_generic, // Transformed
5406	blend_src_generic, // TransformedTiled
5407	blend_src_generic, // TransformedBilinear
5408	blend_src_generic // TransformedBilinearTiled
5409	};
5410
5411	#if QT_CONFIG(raster_64bit)
5412	static const ProcessSpans processTextureSpansGeneric64[NBlendTypes] = {
5413	blend_untransformed_generic_rgb64, // Untransformed
5414	blend_tiled_generic_rgb64, // Tiled
5415	blend_src_generic_rgb64, // Transformed
5416	blend_src_generic_rgb64, // TransformedTiled
5417	blend_src_generic_rgb64, // TransformedBilinear
5418	blend_src_generic_rgb64 // TransformedBilinearTiled
5419	};
5420	#endif
5421
5422	void qBlendTexture(int count, const QSpan spans, void* *userData)
5423	{
5424	QSpanData data = reinterpret_cast<QSpanData >(userData);
5425	TextureBlendType blendType = getBlendType(data);
5426	ProcessSpans proc;
5427	switch (data->rasterBuffer->format) {
5428	case QImage::Format_ARGB32_Premultiplied:
5429	proc = processTextureSpansARGB32PM[blendType];
5430	break;
5431	case QImage::Format_RGB16:
5432	proc = processTextureSpansRGB16[blendType];
5433	break;
5434	#if QT_CONFIG(raster_64bit)
5435	#if defined(__SSE2__) \|\| defined(__ARM_NEON__) \|\| (Q_PROCESSOR_WORDSIZE == 8)
5436	case QImage::Format_ARGB32:
5437	case QImage::Format_RGBA8888:
5438	#endif
5439	case QImage::Format_BGR30:
5440	case QImage::Format_A2BGR30_Premultiplied:
5441	case QImage::Format_RGB30:
5442	case QImage::Format_A2RGB30_Premultiplied:
5443	case QImage::Format_RGBX64:
5444	case QImage::Format_RGBA64:
5445	case QImage::Format_RGBA64_Premultiplied:
5446	case QImage::Format_Grayscale16:
5447	proc = processTextureSpansGeneric64[blendType];
5448	break;
5449	#endif // QT_CONFIG(raster_64bit)
5450	case QImage::Format_Invalid:
5451	Q_UNREACHABLE();
5452	return;
5453	default:
5454	proc = processTextureSpansGeneric[blendType];
5455	break;
5456	}
5457	proc(count, spans, userData);
5458	}
5459
5460	static void blend_vertical_gradient_argb(int count, const QSpan spans, void* *userData)
5461	{
5462	QSpanData data = reinterpret_cast<QSpanData >(userData);
5463
5464	LinearGradientValues linear;
5465	getLinearGradientValues(v: &linear, data);
5466
5467	CompositionFunctionSolid funcSolid =
5468	functionForModeSolid[data->rasterBuffer->compositionMode];
5469
5470	/*
5471	The logic for vertical gradient calculations is a mathematically
5472	reduced copy of that in fetchLinearGradient() - which is basically:
5473
5474	qreal ry = data->m22 (y + 0.5) + data->dy;*
5475	qreal t = linear.dyry + linear.off;*
5476	t = (GRADIENT_STOPTABLE_SIZE - 1);*
5477	quint32 color =
5478	qt_gradient_pixel_fixed(&data->gradient,
5479	int(t FIXPT_SIZE));*
5480
5481	This has then been converted to fixed point to improve performance.
5482	*/
5483	const int gss = GRADIENT_STOPTABLE_SIZE - `1`;
5484	int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5485	int off = int((((linear.dy * (data->m22 * qreal(`0.5`) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5486
5487	while (count--) {
5488	int y = spans->y;
5489	int x = spans->x;
5490
5491	quint32 dst = (quint32 )(data->rasterBuffer->scanLine(y)) + x;
5492	quint32 color =
5493	qt_gradient_pixel_fixed(data: &data->gradient, fixed_pos: yinc * y + off);
5494
5495	funcSolid(dst, spans->len, color, spans->coverage);
5496	++spans;
5497	}
5498	}
5499
5500	template<ProcessSpans blend_color>
5501	static void blend_vertical_gradient(int count, const QSpan spans, void* *userData)
5502	{
5503	QSpanData data = reinterpret_cast<QSpanData >(userData);
5504
5505	LinearGradientValues linear;
5506	getLinearGradientValues(v: &linear, data);
5507
5508	// Based on the same logic as blend_vertical_gradient_argb.
5509
5510	const int gss = GRADIENT_STOPTABLE_SIZE - `1`;
5511	int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5512	int off = int((((linear.dy * (data->m22 * qreal(`0.5`) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5513
5514	while (count--) {
5515	int y = spans->y;
5516
5517	#if QT_CONFIG(raster_64bit)
5518	data->solidColor = qt_gradient_pixel64_fixed(data: &data->gradient, fixed_pos: yinc * y + off);
5519	#else
5520	data->solidColor = QRgba64::fromArgb32(qt_gradient_pixel_fixed(&data->gradient, yinc * y + off));
5521	#endif
5522	blend_color(`1`, spans, userData);
5523	++spans;
5524	}
5525	}
5526
5527	void qBlendGradient(int count, const QSpan spans, void* *userData)
5528	{
5529	QSpanData data = reinterpret_cast<QSpanData >(userData);
5530	bool isVerticalGradient =
5531	data->txop <= QTransform::TxScale &&
5532	data->type == QSpanData::LinearGradient &&
5533	data->gradient.linear.end.x == data->gradient.linear.origin.x;
5534	switch (data->rasterBuffer->format) {
5535	case QImage::Format_RGB16:
5536	if (isVerticalGradient)
5537	return blend_vertical_gradient<blend_color_rgb16>(count, spans, userData);
5538	return blend_src_generic(count, spans, userData);
5539	case QImage::Format_RGB32:
5540	case QImage::Format_ARGB32_Premultiplied:
5541	if (isVerticalGradient)
5542	return blend_vertical_gradient_argb(count, spans, userData);
5543	return blend_src_generic(count, spans, userData);
5544	#if QT_CONFIG(raster_64bit)
5545	#if defined(__SSE2__) \|\| defined(__ARM_NEON__) \|\| (Q_PROCESSOR_WORDSIZE == 8)
5546	case QImage::Format_ARGB32:
5547	case QImage::Format_RGBA8888:
5548	#endif
5549	case QImage::Format_BGR30:
5550	case QImage::Format_A2BGR30_Premultiplied:
5551	case QImage::Format_RGB30:
5552	case QImage::Format_A2RGB30_Premultiplied:
5553	case QImage::Format_RGBX64:
5554	case QImage::Format_RGBA64:
5555	case QImage::Format_RGBA64_Premultiplied:
5556	if (isVerticalGradient)
5557	return blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData);
5558	return blend_src_generic_rgb64(count, spans, userData);
5559	#endif // QT_CONFIG(raster_64bit)
5560	case QImage::Format_Invalid:
5561	break;
5562	default:
5563	if (isVerticalGradient)
5564	return blend_vertical_gradient<blend_color_generic>(count, spans, userData);
5565	return blend_src_generic(count, spans, userData);
5566	}
5567	Q_UNREACHABLE();
5568	}
5569
5570	template <class DST> static
5571	inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer,
5572	int x, int y, DST color,
5573	const uchar *map,
5574	int mapWidth, int mapHeight, int mapStride)
5575	{
5576	DST dest = reinterpret_cast<DST >(rasterBuffer->scanLine(y)) + x;
5577	const int destStride = rasterBuffer->stride<DST>();
5578
5579	if (mapWidth > `8`) {
5580	while (mapHeight--) {
5581	int x0 = `0`;
5582	int n = `0`;
5583	for (int x = `0`; x < mapWidth; x += `8`) {
5584	uchar s = map[x >> `3`];
5585	for (int i = `0`; i < `8`; ++i) {
5586	if (s & `0x80`) {
5587	++n;
5588	} else {
5589	if (n) {
5590	qt_memfill(dest + x0, color, n);
5591	x0 += n + `1`;
5592	n = `0`;
5593	} else {
5594	++x0;
5595	}
5596	if (!s) {
5597	x0 += `8` - `1` - i;
5598	break;
5599	}
5600	}
5601	s <<= `1`;
5602	}
5603	}
5604	if (n)
5605	qt_memfill(dest + x0, color, n);
5606	dest += destStride;
5607	map += mapStride;
5608	}
5609	} else {
5610	while (mapHeight--) {
5611	int x0 = `0`;
5612	int n = `0`;
5613	for (uchar s = *map; s; s <<= `1`) {
5614	if (s & `0x80`) {
5615	++n;
5616	} else if (n) {
5617	qt_memfill(dest + x0, color, n);
5618	x0 += n + `1`;
5619	n = `0`;
5620	} else {
5621	++x0;
5622	}
5623	}
5624	if (n)
5625	qt_memfill(dest + x0, color, n);
5626	dest += destStride;
5627	map += mapStride;
5628	}
5629	}
5630	}
5631
5632	inline static void qt_bitmapblit_argb32(QRasterBuffer *rasterBuffer,
5633	int x, int y, const QRgba64 &color,
5634	const uchar *map,
5635	int mapWidth, int mapHeight, int mapStride)
5636	{
5637	qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color: color.toArgb32(),
5638	map, mapWidth, mapHeight, mapStride);
5639	}
5640
5641	inline static void qt_bitmapblit_rgba8888(QRasterBuffer *rasterBuffer,
5642	int x, int y, const QRgba64 &color,
5643	const uchar *map,
5644	int mapWidth, int mapHeight, int mapStride)
5645	{
5646	qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color: ARGB2RGBA(x: color.toArgb32()),
5647	map, mapWidth, mapHeight, mapStride);
5648	}
5649
5650	template<QtPixelOrder PixelOrder>
5651	inline static void qt_bitmapblit_rgb30(QRasterBuffer *rasterBuffer,
5652	int x, int y, const QRgba64 &color,
5653	const uchar *map,
5654	int mapWidth, int mapHeight, int mapStride)
5655	{
5656	qt_bitmapblit_template<quint32>(rasterBuffer, x, y, qConvertRgb64ToRgb30<PixelOrder>(color),
5657	map, mapWidth, mapHeight, mapStride);
5658	}
5659
5660	inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer,
5661	int x, int y, const QRgba64 &color,
5662	const uchar *map,
5663	int mapWidth, int mapHeight, int mapStride)
5664	{
5665	qt_bitmapblit_template<quint16>(rasterBuffer, x, y, color: color.toRgb16(),
5666	map, mapWidth, mapHeight, mapStride);
5667	}
5668
5669	static inline void grayBlendPixel(quint32 dst, int* coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5670	{
5671	// Do a gammacorrected gray alphablend...
5672	const QRgba64 dstLinear = colorProfile ? colorProfile->toLinear64(rgb32: dst) : QRgba64::fromArgb32(rgb: dst);
5673
5674	QRgba64 blend = interpolate255(x: srcLinear, alpha1: coverage, y: dstLinear, alpha2: `255` - coverage);
5675
5676	*dst = colorProfile ? colorProfile->fromLinear64(rgb64: blend) : toArgb32(rgba64: blend);
5677	}
5678
5679	static inline void alphamapblend_argb32(quint32 dst, int* coverage, QRgba64 srcLinear, quint32 src, const QColorTrcLut *colorProfile)
5680	{
5681	if (coverage == `0`) {
5682	// nothing
5683	} else if (coverage == `255` \|\| !colorProfile) {
5684	blend_pixel(dst&: *dst, src, const_alpha: coverage);
5685	} else if (*dst < `0xff000000`) {
5686	// Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
5687	blend_pixel(dst&: *dst, src, const_alpha: coverage);
5688	} else if (src >= `0xff000000`) {
5689	grayBlendPixel(dst, coverage, srcLinear, colorProfile);
5690	} else {
5691	// First do naive blend with text-color
5692	QRgb s = *dst;
5693	blend_pixel(dst&: s, src);
5694	// Then gamma-corrected blend with glyph shape
5695	QRgba64 s64 = colorProfile ? colorProfile->toLinear64(rgb32: s) : QRgba64::fromArgb32(rgb: s);
5696	grayBlendPixel(dst, coverage, srcLinear: s64, colorProfile);
5697	}
5698	}
5699
5700	#if QT_CONFIG(raster_64bit)
5701
5702	static inline void grayBlendPixel(QRgba64 &dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5703	{
5704	// Do a gammacorrected gray alphablend...
5705	QRgba64 dstColor = dst;
5706	if (colorProfile) {
5707	if (dstColor.isOpaque())
5708	dstColor = colorProfile->toLinear(rgb64: dstColor);
5709	else if (!dstColor.isTransparent())
5710	dstColor = colorProfile->toLinear(rgb64: dstColor.unpremultiplied()).premultiplied();
5711	}
5712
5713	blend_pixel(dst&: dstColor, src: srcLinear, const_alpha: coverage);
5714
5715	if (colorProfile) {
5716	if (dstColor.isOpaque())
5717	dstColor = colorProfile->fromLinear(rgb64: dstColor);
5718	else if (!dstColor.isTransparent())
5719	dstColor = colorProfile->fromLinear(rgb64: dstColor.unpremultiplied()).premultiplied();
5720	}
5721	dst = dstColor;
5722	}
5723
5724	static inline void alphamapblend_generic(int coverage, QRgba64 dest, int* x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
5725	{
5726	if (coverage == `0`) {
5727	// nothing
5728	} else if (coverage == `255`) {
5729	blend_pixel(dst&: dest[x], src);
5730	} else if (src.isOpaque()) {
5731	grayBlendPixel(dst&: dest[x], coverage, srcLinear, colorProfile);
5732	} else {
5733	// First do naive blend with text-color
5734	QRgba64 s = dest[x];
5735	blend_pixel(dst&: s, src);
5736	// Then gamma-corrected blend with glyph shape
5737	if (colorProfile)
5738	s = colorProfile->toLinear(rgb64: s);
5739	grayBlendPixel(dst&: dest[x], coverage, srcLinear: s, colorProfile);
5740	}
5741	}
5742
5743	static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5744	int x, int y, const QRgba64 &color,
5745	const uchar *map,
5746	int mapWidth, int mapHeight, int mapStride,
5747	const QClipData clip, bool* useGammaCorrection)
5748	{
5749	if (color.isTransparent())
5750	return;
5751
5752	const QColorTrcLut colorProfile = nullptr*;
5753
5754	if (useGammaCorrection)
5755	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5756
5757	QRgba64 srcColor = color;
5758	if (colorProfile && color.isOpaque())
5759	srcColor = colorProfile->toLinear(rgb64: srcColor);
5760
5761	alignas(`8`) QRgba64 buffer[BufferSize];
5762	const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5763	const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5764
5765	if (!clip) {
5766	for (int ly = `0`; ly < mapHeight; ++ly) {
5767	int i = x;
5768	int length = mapWidth;
5769	while (length > `0`) {
5770	int l = qMin(a: BufferSize, b: length);
5771	QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5772	for (int j=`0`; j < l; ++j) {
5773	const int coverage = map[j + (i - x)];
5774	alphamapblend_generic(coverage, dest, x: j, srcLinear: srcColor, src: color, colorProfile);
5775	}
5776	if (destStore64)
5777	destStore64(rasterBuffer, i, y + ly, dest, l);
5778	length -= l;
5779	i += l;
5780	}
5781	map += mapStride;
5782	}
5783	} else {
5784	int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5785
5786	int top = qMax(a: y, b: `0`);
5787	map += (top - y) * mapStride;
5788
5789	const_cast<QClipData *>(clip)->initialize();
5790	for (int yp = top; yp<bottom; ++yp) {
5791	const QClipData::ClipLine &line = clip->m_clipLines[yp];
5792
5793	for (int i=`0`; i<line.count; ++i) {
5794	const QSpan &clip = line.spans[i];
5795
5796	int start = qMax<int>(a: x, b: clip.x);
5797	int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5798	if (end <= start)
5799	continue;
5800	Q_ASSERT(end - start <= BufferSize);
5801	QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
5802
5803	for (int xp=start; xp<end; ++xp) {
5804	const int coverage = map[xp - x];
5805	alphamapblend_generic(coverage, dest, x: xp - start, srcLinear: srcColor, src: color, colorProfile);
5806	}
5807	if (destStore64)
5808	destStore64(rasterBuffer, start, clip.y, dest, end - start);
5809	} // for (i -> line.count)
5810	map += mapStride;
5811	} // for (yp -> bottom)
5812	}
5813	}
5814	#else
5815	static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5816	int x, int y, const QRgba64 &color,
5817	const uchar *map,
5818	int mapWidth, int mapHeight, int mapStride,
5819	const QClipData clip, bool* useGammaCorrection)
5820	{
5821	if (color.isTransparent())
5822	return;
5823
5824	const quint32 c = color.toArgb32();
5825
5826	const QColorTrcLut colorProfile = nullptr*;
5827
5828	if (useGammaCorrection)
5829	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5830
5831	QRgba64 srcColor = color;
5832	if (colorProfile && color.isOpaque())
5833	srcColor = colorProfile->toLinear(srcColor);
5834
5835	quint32 buffer[BufferSize];
5836	const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
5837	const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
5838
5839	if (!clip) {
5840	for (int ly = `0`; ly < mapHeight; ++ly) {
5841	int i = x;
5842	int length = mapWidth;
5843	while (length > `0`) {
5844	int l = qMin(BufferSize, length);
5845	quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
5846	for (int j=`0`; j < l; ++j) {
5847	const int coverage = map[j + (i - x)];
5848	alphamapblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
5849	}
5850	if (destStore)
5851	destStore(rasterBuffer, i, y + ly, dest, l);
5852	length -= l;
5853	i += l;
5854	}
5855	map += mapStride;
5856	}
5857	} else {
5858	int bottom = qMin(y + mapHeight, rasterBuffer->height());
5859
5860	int top = qMax(y, `0`);
5861	map += (top - y) * mapStride;
5862
5863	const_cast<QClipData *>(clip)->initialize();
5864	for (int yp = top; yp<bottom; ++yp) {
5865	const QClipData::ClipLine &line = clip->m_clipLines[yp];
5866
5867	for (int i=`0`; i<line.count; ++i) {
5868	const QSpan &clip = line.spans[i];
5869
5870	int start = qMax<int>(x, clip.x);
5871	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5872	if (end <= start)
5873	continue;
5874	Q_ASSERT(end - start <= BufferSize);
5875	quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
5876
5877	for (int xp=start; xp<end; ++xp) {
5878	const int coverage = map[xp - x];
5879	alphamapblend_argb32(dest + xp - x, coverage, srcColor, color, colorProfile);
5880	}
5881	if (destStore)
5882	destStore(rasterBuffer, start, clip.y, dest, end - start);
5883	} // for (i -> line.count)
5884	map += mapStride;
5885	} // for (yp -> bottom)
5886	}
5887	}
5888	#endif
5889
5890	static inline void alphamapblend_quint16(int coverage, quint16 dest, int* x, const quint16 srcColor)
5891	{
5892	if (coverage == `0`) {
5893	// nothing
5894	} else if (coverage == `255`) {
5895	dest[x] = srcColor;
5896	} else {
5897	dest[x] = BYTE_MUL_RGB16(x: srcColor, a: coverage)
5898	+ BYTE_MUL_RGB16(x: dest[x], a: `255` - coverage);
5899	}
5900	}
5901
5902	void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
5903	int x, int y, const QRgba64 &color,
5904	const uchar *map,
5905	int mapWidth, int mapHeight, int mapStride,
5906	const QClipData clip, bool* useGammaCorrection)
5907	{
5908	if (useGammaCorrection \|\| !color.isOpaque()) {
5909	qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
5910	return;
5911	}
5912
5913	const quint16 c = color.toRgb16();
5914
5915	if (!clip) {
5916	quint16 dest = reinterpret_cast<quint16>(rasterBuffer->scanLine(y)) + x;
5917	const int destStride = rasterBuffer->stride<quint16>();
5918	while (mapHeight--) {
5919	for (int i = `0`; i < mapWidth; ++i)
5920	alphamapblend_quint16(coverage: map[i], dest, x: i, srcColor: c);
5921	dest += destStride;
5922	map += mapStride;
5923	}
5924	} else {
5925	int top = qMax(a: y, b: `0`);
5926	int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5927	map += (top - y) * mapStride;
5928
5929	const_cast<QClipData *>(clip)->initialize();
5930	for (int yp = top; yp<bottom; ++yp) {
5931	const QClipData::ClipLine &line = clip->m_clipLines[yp];
5932
5933	quint16 dest = reinterpret_cast<quint16>(rasterBuffer->scanLine(y: yp));
5934
5935	for (int i=`0`; i<line.count; ++i) {
5936	const QSpan &clip = line.spans[i];
5937
5938	int start = qMax<int>(a: x, b: clip.x);
5939	int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5940
5941	for (int xp=start; xp<end; ++xp)
5942	alphamapblend_quint16(coverage: map[xp - x], dest, x: xp, srcColor: c);
5943	} // for (i -> line.count)
5944	map += mapStride;
5945	} // for (yp -> bottom)
5946	}
5947	}
5948
5949	static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer,
5950	int x, int y, const QRgba64 &color,
5951	const uchar *map,
5952	int mapWidth, int mapHeight, int mapStride,
5953	const QClipData clip, bool* useGammaCorrection)
5954	{
5955	const quint32 c = color.toArgb32();
5956	const int destStride = rasterBuffer->stride<quint32>();
5957
5958	if (color.isTransparent())
5959	return;
5960
5961	const QColorTrcLut colorProfile = nullptr*;
5962
5963	if (useGammaCorrection)
5964	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5965
5966	QRgba64 srcColor = color;
5967	if (colorProfile && color.isOpaque())
5968	srcColor = colorProfile->toLinear(rgb64: srcColor);
5969
5970	if (!clip) {
5971	quint32 dest = reinterpret_cast<quint32>(rasterBuffer->scanLine(y)) + x;
5972	while (mapHeight--) {
5973	for (int i = `0`; i < mapWidth; ++i) {
5974	const int coverage = map[i];
5975	alphamapblend_argb32(dst: dest + i, coverage, srcLinear: srcColor, src: c, colorProfile);
5976	}
5977	dest += destStride;
5978	map += mapStride;
5979	}
5980	} else {
5981	int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5982
5983	int top = qMax(a: y, b: `0`);
5984	map += (top - y) * mapStride;
5985
5986	const_cast<QClipData *>(clip)->initialize();
5987	for (int yp = top; yp<bottom; ++yp) {
5988	const QClipData::ClipLine &line = clip->m_clipLines[yp];
5989
5990	quint32 dest = reinterpret_cast<quint32 >(rasterBuffer->scanLine(y: yp));
5991
5992	for (int i=`0`; i<line.count; ++i) {
5993	const QSpan &clip = line.spans[i];
5994
5995	int start = qMax<int>(a: x, b: clip.x);
5996	int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5997
5998	for (int xp=start; xp<end; ++xp) {
5999	const int coverage = map[xp - x];
6000	alphamapblend_argb32(dst: dest + xp, coverage, srcLinear: srcColor, src: c, colorProfile);
6001	} // for (i -> line.count)
6002	} // for (yp -> bottom)
6003	map += mapStride;
6004	}
6005	}
6006	}
6007
6008	static inline int qRgbAvg(QRgb rgb)
6009	{
6010	return (qRed(rgb) * `5` + qGreen(rgb) * `6` + qBlue(rgb) * `5`) / `16`;
6011	}
6012
6013	static inline void rgbBlendPixel(quint32 dst, int* coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
6014	{
6015	// Do a gammacorrected RGB alphablend...
6016	const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(rgb32: dst) : QRgba64::fromArgb32(rgb: dst);
6017
6018	QRgba64 blend = rgbBlend(d: dlinear, s: slinear, rgbAlpha: coverage);
6019
6020	*dst = colorProfile ? colorProfile->fromLinear64(rgb64: blend) : toArgb32(rgba64: blend);
6021	}
6022
6023	static inline QRgb rgbBlend(QRgb d, QRgb s, uint rgbAlpha)
6024	{
6025	#if defined(__SSE2__)
6026	__m128i vd = _mm_cvtsi32_si128(a: d);
6027	__m128i vs = _mm_cvtsi32_si128(a: s);
6028	__m128i va = _mm_cvtsi32_si128(a: rgbAlpha);
6029	const __m128i vz = _mm_setzero_si128();
6030	vd = _mm_unpacklo_epi8(a: vd, b: vz);
6031	vs = _mm_unpacklo_epi8(a: vs, b: vz);
6032	va = _mm_unpacklo_epi8(a: va, b: vz);
6033	__m128i vb = _mm_xor_si128(a: _mm_set1_epi16(w: `255`), b: va);
6034	vs = _mm_mullo_epi16(a: vs, b: va);
6035	vd = _mm_mullo_epi16(a: vd, b: vb);
6036	vd = _mm_add_epi16(a: vd, b: vs);
6037	vd = _mm_add_epi16(a: vd, b: _mm_srli_epi16(a: vd, count: `8`));
6038	vd = _mm_add_epi16(a: vd, b: _mm_set1_epi16(w: `0x80`));
6039	vd = _mm_srli_epi16(a: vd, count: `8`);
6040	vd = _mm_packus_epi16(a: vd, b: vd);
6041	return _mm_cvtsi128_si32(a: vd);
6042	#else
6043	const int dr = qRed(d);
6044	const int dg = qGreen(d);
6045	const int db = qBlue(d);
6046
6047	const int sr = qRed(s);
6048	const int sg = qGreen(s);
6049	const int sb = qBlue(s);
6050
6051	const int mr = qRed(rgbAlpha);
6052	const int mg = qGreen(rgbAlpha);
6053	const int mb = qBlue(rgbAlpha);
6054
6055	const int nr = qt_div_255(sr * mr + dr * (`255` - mr));
6056	const int ng = qt_div_255(sg * mg + dg * (`255` - mg));
6057	const int nb = qt_div_255(sb * mb + db * (`255` - mb));
6058
6059	return `0xff000000` \| (nr << `16`) \| (ng << `8`) \| nb;
6060	#endif
6061	}
6062
6063	static inline void alphargbblend_argb32(quint32 dst, uint coverage, const* QRgba64 &srcLinear, quint32 src, const QColorTrcLut *colorProfile)
6064	{
6065	if (coverage == `0xff000000`) {
6066	// nothing
6067	} else if (coverage == `0xffffffff` && qAlpha(rgb: src) == `255`) {
6068	blend_pixel(dst&: *dst, src);
6069	} else if (*dst < `0xff000000`) {
6070	// Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
6071	blend_pixel(dst&: *dst, src, const_alpha: qRgbAvg(rgb: coverage));
6072	} else if (!colorProfile) {
6073	// First do naive blend with text-color
6074	QRgb s = *dst;
6075	blend_pixel(dst&: s, src);
6076	// Then a naive blend with glyph shape
6077	dst = rgbBlend(d: dst, s, rgbAlpha: coverage);
6078	} else if (srcLinear.isOpaque()) {
6079	rgbBlendPixel(dst, coverage, slinear: srcLinear, colorProfile);
6080	} else {
6081	// First do naive blend with text-color
6082	QRgb s = *dst;
6083	blend_pixel(dst&: s, src);
6084	// Then gamma-corrected blend with glyph shape
6085	QRgba64 s64 = colorProfile ? colorProfile->toLinear64(rgb32: s) : QRgba64::fromArgb32(rgb: s);
6086	rgbBlendPixel(dst, coverage, slinear: s64, colorProfile);
6087	}
6088	}
6089
6090	#if QT_CONFIG(raster_64bit)
6091	static inline void rgbBlendPixel(QRgba64 &dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
6092	{
6093	// Do a gammacorrected RGB alphablend...
6094	const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(rgb32: dst) : dst;
6095
6096	QRgba64 blend = rgbBlend(d: dlinear, s: slinear, rgbAlpha: coverage);
6097
6098	dst = colorProfile ? colorProfile->fromLinear(rgb64: blend) : blend;
6099	}
6100
6101	static inline void alphargbblend_generic(uint coverage, QRgba64 dest, int* x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
6102	{
6103	if (coverage == `0xff000000`) {
6104	// nothing
6105	} else if (coverage == `0xffffffff`) {
6106	blend_pixel(dst&: dest[x], src);
6107	} else if (!dest[x].isOpaque()) {
6108	// Do a gray alphablend.
6109	alphamapblend_generic(coverage: qRgbAvg(rgb: coverage), dest, x, srcLinear, src, colorProfile);
6110	} else if (src.isOpaque()) {
6111	rgbBlendPixel(dst&: dest[x], coverage, slinear: srcLinear, colorProfile);
6112	} else {
6113	// First do naive blend with text-color
6114	QRgba64 s = dest[x];
6115	blend_pixel(dst&: s, src);
6116	// Then gamma-corrected blend with glyph shape
6117	if (colorProfile)
6118	s = colorProfile->toLinear(rgb64: s);
6119	rgbBlendPixel(dst&: dest[x], coverage, slinear: s, colorProfile);
6120	}
6121	}
6122
6123	static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
6124	int x, int y, const QRgba64 &color,
6125	const uint src, int* mapWidth, int mapHeight, int srcStride,
6126	const QClipData clip, bool* useGammaCorrection)
6127	{
6128	if (color.isTransparent())
6129	return;
6130
6131	const QColorTrcLut colorProfile = nullptr*;
6132
6133	if (useGammaCorrection)
6134	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6135
6136	QRgba64 srcColor = color;
6137	if (colorProfile && color.isOpaque())
6138	srcColor = colorProfile->toLinear(rgb64: srcColor);
6139
6140	alignas(`8`) QRgba64 buffer[BufferSize];
6141	const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
6142	const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
6143
6144	if (!clip) {
6145	for (int ly = `0`; ly < mapHeight; ++ly) {
6146	int i = x;
6147	int length = mapWidth;
6148	while (length > `0`) {
6149	int l = qMin(a: BufferSize, b: length);
6150	QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
6151	for (int j=`0`; j < l; ++j) {
6152	const uint coverage = src[j + (i - x)];
6153	alphargbblend_generic(coverage, dest, x: j, srcLinear: srcColor, src: color, colorProfile);
6154	}
6155	if (destStore64)
6156	destStore64(rasterBuffer, i, y + ly, dest, l);
6157	length -= l;
6158	i += l;
6159	}
6160	src += srcStride;
6161	}
6162	} else {
6163	int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
6164
6165	int top = qMax(a: y, b: `0`);
6166	src += (top - y) * srcStride;
6167
6168	const_cast<QClipData *>(clip)->initialize();
6169	for (int yp = top; yp<bottom; ++yp) {
6170	const QClipData::ClipLine &line = clip->m_clipLines[yp];
6171
6172	for (int i=`0`; i<line.count; ++i) {
6173	const QSpan &clip = line.spans[i];
6174
6175	int start = qMax<int>(a: x, b: clip.x);
6176	int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
6177	if (end <= start)
6178	continue;
6179	Q_ASSERT(end - start <= BufferSize);
6180	QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
6181
6182	for (int xp=start; xp<end; ++xp) {
6183	const uint coverage = src[xp - x];
6184	alphargbblend_generic(coverage, dest, x: xp - start, srcLinear: srcColor, src: color, colorProfile);
6185	}
6186	if (destStore64)
6187	destStore64(rasterBuffer, start, clip.y, dest, end - start);
6188	} // for (i -> line.count)
6189	src += srcStride;
6190	} // for (yp -> bottom)
6191	}
6192	}
6193	#else
6194	static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
6195	int x, int y, const QRgba64 &color,
6196	const uint src, int* mapWidth, int mapHeight, int srcStride,
6197	const QClipData clip, bool* useGammaCorrection)
6198	{
6199	if (color.isTransparent())
6200	return;
6201
6202	const quint32 c = color.toArgb32();
6203
6204	const QColorTrcLut colorProfile = nullptr*;
6205
6206	if (useGammaCorrection)
6207	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6208
6209	QRgba64 srcColor = color;
6210	if (colorProfile && color.isOpaque())
6211	srcColor = colorProfile->toLinear(srcColor);
6212
6213	quint32 buffer[BufferSize];
6214	const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
6215	const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
6216
6217	if (!clip) {
6218	for (int ly = `0`; ly < mapHeight; ++ly) {
6219	int i = x;
6220	int length = mapWidth;
6221	while (length > `0`) {
6222	int l = qMin(BufferSize, length);
6223	quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
6224	for (int j=`0`; j < l; ++j) {
6225	const uint coverage = src[j + (i - x)];
6226	alphargbblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
6227	}
6228	if (destStore)
6229	destStore(rasterBuffer, i, y + ly, dest, l);
6230	length -= l;
6231	i += l;
6232	}
6233	src += srcStride;
6234	}
6235	} else {
6236	int bottom = qMin(y + mapHeight, rasterBuffer->height());
6237
6238	int top = qMax(y, `0`);
6239	src += (top - y) * srcStride;
6240
6241	const_cast<QClipData *>(clip)->initialize();
6242	for (int yp = top; yp<bottom; ++yp) {
6243	const QClipData::ClipLine &line = clip->m_clipLines[yp];
6244
6245	for (int i=`0`; i<line.count; ++i) {
6246	const QSpan &clip = line.spans[i];
6247
6248	int start = qMax<int>(x, clip.x);
6249	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
6250	if (end <= start)
6251	continue;
6252	Q_ASSERT(end - start <= BufferSize);
6253	quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
6254
6255	for (int xp=start; xp<end; ++xp) {
6256	const uint coverage = src[xp - x];
6257	alphargbblend_argb32(dest + xp - start, coverage, srcColor, c, colorProfile);
6258	}
6259	if (destStore)
6260	destStore(rasterBuffer, start, clip.y, dest, end - start);
6261	} // for (i -> line.count)
6262	src += srcStride;
6263	} // for (yp -> bottom)
6264	}
6265	}
6266	#endif
6267
6268	static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
6269	int x, int y, const QRgba64 &color,
6270	const uint src, int* mapWidth, int mapHeight, int srcStride,
6271	const QClipData clip, bool* useGammaCorrection)
6272	{
6273	if (color.isTransparent())
6274	return;
6275
6276	const quint32 c = color.toArgb32();
6277
6278	const QColorTrcLut colorProfile = nullptr*;
6279
6280	if (useGammaCorrection)
6281	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6282
6283	QRgba64 srcColor = color;
6284	if (colorProfile && color.isOpaque())
6285	srcColor = colorProfile->toLinear(rgb64: srcColor);
6286
6287	if (!clip) {
6288	quint32 dst = reinterpret_cast<quint32>(rasterBuffer->scanLine(y)) + x;
6289	const int destStride = rasterBuffer->stride<quint32>();
6290	while (mapHeight--) {
6291	for (int i = `0`; i < mapWidth; ++i) {
6292	const uint coverage = src[i];
6293	alphargbblend_argb32(dst: dst + i, coverage, srcLinear: srcColor, src: c, colorProfile);
6294	}
6295
6296	dst += destStride;
6297	src += srcStride;
6298	}
6299	} else {
6300	int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
6301
6302	int top = qMax(a: y, b: `0`);
6303	src += (top - y) * srcStride;
6304
6305	const_cast<QClipData *>(clip)->initialize();
6306	for (int yp = top; yp<bottom; ++yp) {
6307	const QClipData::ClipLine &line = clip->m_clipLines[yp];
6308
6309	quint32 dst = reinterpret_cast<quint32 >(rasterBuffer->scanLine(y: yp));
6310
6311	for (int i=`0`; i<line.count; ++i) {
6312	const QSpan &clip = line.spans[i];
6313
6314	int start = qMax<int>(a: x, b: clip.x);
6315	int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
6316
6317	for (int xp=start; xp<end; ++xp) {
6318	const uint coverage = src[xp - x];
6319	alphargbblend_argb32(dst: dst + xp, coverage, srcLinear: srcColor, src: c, colorProfile);
6320	}
6321	} // for (i -> line.count)
6322	src += srcStride;
6323	} // for (yp -> bottom)
6324
6325	}
6326	}
6327
6328	static void qt_rectfill_argb32(QRasterBuffer *rasterBuffer,
6329	int x, int y, int width, int height,
6330	const QRgba64 &color)
6331	{
6332	qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6333	value: color.toArgb32(), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6334	}
6335
6336	static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer,
6337	int x, int y, int width, int height,
6338	const QRgba64 &color)
6339	{
6340	const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6341	quint32 c32 = color.toArgb32();
6342	quint16 c16;
6343	layout.storeFromARGB32PM(reinterpret_cast<uchar >(&c16), &c32, `0`, `1`, nullptr, nullptr*);
6344	qt_rectfill<quint16>(dest: reinterpret_cast<quint16 *>(rasterBuffer->buffer()),
6345	value: c16, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6346	}
6347
6348	static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer,
6349	int x, int y, int width, int height,
6350	const QRgba64 &color)
6351	{
6352	const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6353	quint32 c32 = color.toArgb32();
6354	quint24 c24;
6355	layout.storeFromARGB32PM(reinterpret_cast<uchar >(&c24), &c32, `0`, `1`, nullptr, nullptr*);
6356	qt_rectfill<quint24>(dest: reinterpret_cast<quint24 *>(rasterBuffer->buffer()),
6357	value: c24, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6358	}
6359
6360	static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer,
6361	int x, int y, int width, int height,
6362	const QRgba64 &color)
6363	{
6364	qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6365	value: color.unpremultiplied().toArgb32(), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6366	}
6367
6368	static void qt_rectfill_rgba(QRasterBuffer *rasterBuffer,
6369	int x, int y, int width, int height,
6370	const QRgba64 &color)
6371	{
6372	qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6373	value: ARGB2RGBA(x: color.toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6374	}
6375
6376	static void qt_rectfill_nonpremul_rgba(QRasterBuffer *rasterBuffer,
6377	int x, int y, int width, int height,
6378	const QRgba64 &color)
6379	{
6380	qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6381	value: ARGB2RGBA(x: color.unpremultiplied().toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6382	}
6383
6384	template<QtPixelOrder PixelOrder>
6385	static void qt_rectfill_rgb30(QRasterBuffer *rasterBuffer,
6386	int x, int y, int width, int height,
6387	const QRgba64 &color)
6388	{
6389	qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6390	qConvertRgb64ToRgb30<PixelOrder>(color), x, y, width, height, rasterBuffer->bytesPerLine());
6391	}
6392
6393	static void qt_rectfill_alpha(QRasterBuffer *rasterBuffer,
6394	int x, int y, int width, int height,
6395	const QRgba64 &color)
6396	{
6397	qt_rectfill<quint8>(dest: reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6398	value: color.alpha() >> `8`, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6399	}
6400
6401	static void qt_rectfill_gray(QRasterBuffer *rasterBuffer,
6402	int x, int y, int width, int height,
6403	const QRgba64 &color)
6404	{
6405	qt_rectfill<quint8>(dest: reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6406	value: qGray(rgb: color.toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6407	}
6408
6409	static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer,
6410	int x, int y, int width, int height,
6411	const QRgba64 &color)
6412	{
6413	const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
6414	quint64 c64;
6415	store(reinterpret_cast<uchar >(&c64), &color, `0`, `1`, nullptr, nullptr*);
6416	qt_rectfill<quint64>(dest: reinterpret_cast<quint64 *>(rasterBuffer->buffer()),
6417	value: c64, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6418	}
6419
6420	// Map table for destination image format. Contains function pointers
6421	// for blends of various types unto the destination
6422
6423	DrawHelper qDrawHelper[QImage::NImageFormats] =
6424	{
6425	// Format_Invalid,
6426	{ .blendColor: nullptr, .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr },
6427	// Format_Mono,
6428	{
6429	.blendColor: blend_color_generic,
6430	.bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6431	},
6432	// Format_MonoLSB,
6433	{
6434	.blendColor: blend_color_generic,
6435	.bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6436	},
6437	// Format_Indexed8,
6438	{
6439	.blendColor: blend_color_generic,
6440	.bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6441	},
6442	// Format_RGB32,
6443	{
6444	.blendColor: blend_color_argb,
6445	.bitmapBlit: qt_bitmapblit_argb32,
6446	.alphamapBlit: qt_alphamapblit_argb32,
6447	.alphaRGBBlit: qt_alphargbblit_argb32,
6448	.fillRect: qt_rectfill_argb32
6449	},
6450	// Format_ARGB32,
6451	{
6452	.blendColor: blend_color_generic,
6453	.bitmapBlit: qt_bitmapblit_argb32,
6454	.alphamapBlit: qt_alphamapblit_argb32,
6455	.alphaRGBBlit: qt_alphargbblit_argb32,
6456	.fillRect: qt_rectfill_nonpremul_argb32
6457	},
6458	// Format_ARGB32_Premultiplied
6459	{
6460	.blendColor: blend_color_argb,
6461	.bitmapBlit: qt_bitmapblit_argb32,
6462	.alphamapBlit: qt_alphamapblit_argb32,
6463	.alphaRGBBlit: qt_alphargbblit_argb32,
6464	.fillRect: qt_rectfill_argb32
6465	},
6466	// Format_RGB16
6467	{
6468	.blendColor: blend_color_rgb16,
6469	.bitmapBlit: qt_bitmapblit_quint16,
6470	.alphamapBlit: qt_alphamapblit_quint16,
6471	.alphaRGBBlit: qt_alphargbblit_generic,
6472	.fillRect: qt_rectfill_quint16
6473	},
6474	// Format_ARGB8565_Premultiplied
6475	{
6476	.blendColor: blend_color_generic,
6477	.bitmapBlit: nullptr,
6478	.alphamapBlit: qt_alphamapblit_generic,
6479	.alphaRGBBlit: qt_alphargbblit_generic,
6480	.fillRect: qt_rectfill_quint24
6481	},
6482	// Format_RGB666
6483	{
6484	.blendColor: blend_color_generic,
6485	.bitmapBlit: nullptr,
6486	.alphamapBlit: qt_alphamapblit_generic,
6487	.alphaRGBBlit: qt_alphargbblit_generic,
6488	.fillRect: qt_rectfill_quint24
6489	},
6490	// Format_ARGB6666_Premultiplied
6491	{
6492	.blendColor: blend_color_generic,
6493	.bitmapBlit: nullptr,
6494	.alphamapBlit: qt_alphamapblit_generic,
6495	.alphaRGBBlit: qt_alphargbblit_generic,
6496	.fillRect: qt_rectfill_quint24
6497	},
6498	// Format_RGB555
6499	{
6500	.blendColor: blend_color_generic,
6501	.bitmapBlit: nullptr,
6502	.alphamapBlit: qt_alphamapblit_generic,
6503	.alphaRGBBlit: qt_alphargbblit_generic,
6504	.fillRect: qt_rectfill_quint16
6505	},
6506	// Format_ARGB8555_Premultiplied
6507	{
6508	.blendColor: blend_color_generic,
6509	.bitmapBlit: nullptr,
6510	.alphamapBlit: qt_alphamapblit_generic,
6511	.alphaRGBBlit: qt_alphargbblit_generic,
6512	.fillRect: qt_rectfill_quint24
6513	},
6514	// Format_RGB888
6515	{
6516	.blendColor: blend_color_generic,
6517	.bitmapBlit: nullptr,
6518	.alphamapBlit: qt_alphamapblit_generic,
6519	.alphaRGBBlit: qt_alphargbblit_generic,
6520	.fillRect: qt_rectfill_quint24
6521	},
6522	// Format_RGB444
6523	{
6524	.blendColor: blend_color_generic,
6525	.bitmapBlit: nullptr,
6526	.alphamapBlit: qt_alphamapblit_generic,
6527	.alphaRGBBlit: qt_alphargbblit_generic,
6528	.fillRect: qt_rectfill_quint16
6529	},
6530	// Format_ARGB4444_Premultiplied
6531	{
6532	.blendColor: blend_color_generic,
6533	.bitmapBlit: nullptr,
6534	.alphamapBlit: qt_alphamapblit_generic,
6535	.alphaRGBBlit: qt_alphargbblit_generic,
6536	.fillRect: qt_rectfill_quint16
6537	},
6538	// Format_RGBX8888
6539	{
6540	.blendColor: blend_color_generic,
6541	.bitmapBlit: qt_bitmapblit_rgba8888,
6542	.alphamapBlit: qt_alphamapblit_generic,
6543	.alphaRGBBlit: qt_alphargbblit_generic,
6544	.fillRect: qt_rectfill_rgba
6545	},
6546	// Format_RGBA8888
6547	{
6548	.blendColor: blend_color_generic,
6549	.bitmapBlit: qt_bitmapblit_rgba8888,
6550	.alphamapBlit: qt_alphamapblit_generic,
6551	.alphaRGBBlit: qt_alphargbblit_generic,
6552	.fillRect: qt_rectfill_nonpremul_rgba
6553	},
6554	// Format_RGB8888_Premultiplied
6555	{
6556	.blendColor: blend_color_generic,
6557	.bitmapBlit: qt_bitmapblit_rgba8888,
6558	.alphamapBlit: qt_alphamapblit_generic,
6559	.alphaRGBBlit: qt_alphargbblit_generic,
6560	.fillRect: qt_rectfill_rgba
6561	},
6562	// Format_BGR30
6563	{
6564	.blendColor: blend_color_generic_rgb64,
6565	.bitmapBlit: qt_bitmapblit_rgb30<PixelOrderBGR>,
6566	.alphamapBlit: qt_alphamapblit_generic,
6567	.alphaRGBBlit: qt_alphargbblit_generic,
6568	.fillRect: qt_rectfill_rgb30<PixelOrderBGR>
6569	},
6570	// Format_A2BGR30_Premultiplied
6571	{
6572	.blendColor: blend_color_generic_rgb64,
6573	.bitmapBlit: qt_bitmapblit_rgb30<PixelOrderBGR>,
6574	.alphamapBlit: qt_alphamapblit_generic,
6575	.alphaRGBBlit: qt_alphargbblit_generic,
6576	.fillRect: qt_rectfill_rgb30<PixelOrderBGR>
6577	},
6578	// Format_RGB30
6579	{
6580	.blendColor: blend_color_generic_rgb64,
6581	.bitmapBlit: qt_bitmapblit_rgb30<PixelOrderRGB>,
6582	.alphamapBlit: qt_alphamapblit_generic,
6583	.alphaRGBBlit: qt_alphargbblit_generic,
6584	.fillRect: qt_rectfill_rgb30<PixelOrderRGB>
6585	},
6586	// Format_A2RGB30_Premultiplied
6587	{
6588	.blendColor: blend_color_generic_rgb64,
6589	.bitmapBlit: qt_bitmapblit_rgb30<PixelOrderRGB>,
6590	.alphamapBlit: qt_alphamapblit_generic,
6591	.alphaRGBBlit: qt_alphargbblit_generic,
6592	.fillRect: qt_rectfill_rgb30<PixelOrderRGB>
6593	},
6594	// Format_Alpha8
6595	{
6596	.blendColor: blend_color_generic,
6597	.bitmapBlit: nullptr,
6598	.alphamapBlit: qt_alphamapblit_generic,
6599	.alphaRGBBlit: qt_alphargbblit_generic,
6600	.fillRect: qt_rectfill_alpha
6601	},
6602	// Format_Grayscale8
6603	{
6604	.blendColor: blend_color_generic,
6605	.bitmapBlit: nullptr,
6606	.alphamapBlit: qt_alphamapblit_generic,
6607	.alphaRGBBlit: qt_alphargbblit_generic,
6608	.fillRect: qt_rectfill_gray
6609	},
6610	// Format_RGBX64
6611	{
6612	.blendColor: blend_color_generic_rgb64,
6613	.bitmapBlit: nullptr,
6614	.alphamapBlit: qt_alphamapblit_generic,
6615	.alphaRGBBlit: qt_alphargbblit_generic,
6616	.fillRect: qt_rectfill_quint64
6617	},
6618	// Format_RGBA64
6619	{
6620	.blendColor: blend_color_generic_rgb64,
6621	.bitmapBlit: nullptr,
6622	.alphamapBlit: qt_alphamapblit_generic,
6623	.alphaRGBBlit: qt_alphargbblit_generic,
6624	.fillRect: qt_rectfill_quint64
6625	},
6626	// Format_RGBA64_Premultiplied
6627	{
6628	.blendColor: blend_color_generic_rgb64,
6629	.bitmapBlit: nullptr,
6630	.alphamapBlit: qt_alphamapblit_generic,
6631	.alphaRGBBlit: qt_alphargbblit_generic,
6632	.fillRect: qt_rectfill_quint64
6633	},
6634	// Format_Grayscale16
6635	{
6636	.blendColor: blend_color_generic_rgb64,
6637	.bitmapBlit: nullptr,
6638	.alphamapBlit: qt_alphamapblit_generic,
6639	.alphaRGBBlit: qt_alphargbblit_generic,
6640	.fillRect: qt_rectfill_quint16
6641	},
6642	// Format_BGR888
6643	{
6644	.blendColor: blend_color_generic,
6645	.bitmapBlit: nullptr,
6646	.alphamapBlit: qt_alphamapblit_generic,
6647	.alphaRGBBlit: qt_alphargbblit_generic,
6648	.fillRect: qt_rectfill_quint24
6649	},
6650	};
6651
6652	#if !defined(__SSE2__)
6653	void qt_memfill64(quint64 *dest, quint64 color, qsizetype count)
6654	{
6655	qt_memfill_template<quint64>(dest, color, count);
6656	}
6657	#endif
6658
6659	#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG)
6660	__attribute__((optimize("no-tree-vectorize")))
6661	#endif
6662	void qt_memfill24(quint24 *dest, quint24 color, qsizetype count)
6663	{
6664	# ifdef QT_COMPILER_SUPPORTS_SSSE3
6665	extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype);
6666	if (qCpuHasFeature(SSSE3))
6667	return qt_memfill24_ssse3(dest, color, count);
6668	# endif
6669
6670	const quint32 v = color;
6671	quint24 *end = dest + count;
6672
6673	// prolog: align dest to 32bit
6674	while ((quintptr(dest) & `0x3`) && dest < end) {
6675	*dest++ = v;
6676	}
6677	if (dest >= end)
6678	return;
6679
6680	const uint val1 = qFromBigEndian(source: (v << `8`) \| (v >> `16`));
6681	const uint val2 = qFromBigEndian(source: (v << `16`) \| (v >> `8`));
6682	const uint val3 = qFromBigEndian(source: (v << `24`) \| (v >> `0`));
6683
6684	for ( ; dest <= (end - `4`); dest += `4`) {
6685	quint32 dst = reinterpret_cast<quint32 >(dest);
6686	dst[`0`] = val1;
6687	dst[`1`] = val2;
6688	dst[`2`] = val3;
6689	}
6690
6691	// less than 4px left
6692	switch (end - dest) {
6693	case `3`:
6694	*dest++ = v;
6695	Q_FALLTHROUGH();
6696	case `2`:
6697	*dest++ = v;
6698	Q_FALLTHROUGH();
6699	case `1`:
6700	*dest++ = v;
6701	}
6702	}
6703
6704	void qt_memfill16(quint16 *dest, quint16 value, qsizetype count)
6705	{
6706	const int align = quintptr(dest) & `0x3`;
6707	if (align) {
6708	*dest++ = value;
6709	--count;
6710	}
6711
6712	if (count & `0x1`)
6713	dest[count - `1`] = value;
6714
6715	const quint32 value32 = (value << `16`) \| value;
6716	qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / `2`);
6717	}
6718
6719	#if !defined(__SSE2__) && !defined(__ARM_NEON__) && !defined(__MIPS_DSP__)
6720	void qt_memfill32(quint32 *dest, quint32 color, qsizetype count)
6721	{
6722	qt_memfill_template<quint32>(dest, color, count);
6723	}
6724	#endif
6725	#ifdef __SSE2__
6726	decltype(qt_memfill32_sse2) qt_memfill32 = nullptr*;
6727	decltype(qt_memfill64_sse2) qt_memfill64 = nullptr*;
6728	#endif
6729
6730	#ifdef QT_COMPILER_SUPPORTS_SSE4_1
6731	template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count, const QVector<QRgb> , QDitherInfo );
6732	#endif
6733
6734	extern void qInitBlendFunctions();
6735
6736	static void qInitDrawhelperFunctions()
6737	{
6738	// Set up basic blend function tables.
6739	qInitBlendFunctions();
6740
6741	#ifdef __SSE2__
6742	# ifndef __AVX2__
6743	qt_memfill32 = qt_memfill32_sse2;
6744	qt_memfill64 = qt_memfill64_sse2;
6745	# endif
6746	qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
6747	qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
6748	qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
6749	qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
6750	qDrawHelper[QImage::Format_RGBX8888].bitmapBlit = qt_bitmapblit8888_sse2;
6751	qDrawHelper[QImage::Format_RGBA8888].bitmapBlit = qt_bitmapblit8888_sse2;
6752	qDrawHelper[QImage::Format_RGBA8888_Premultiplied].bitmapBlit = qt_bitmapblit8888_sse2;
6753
6754	extern void qt_scale_image_argb32_on_argb32_sse2(uchar destPixels, int* dbpl,
6755	const uchar srcPixels, int* sbpl, int srch,
6756	const QRectF &targetRect,
6757	const QRectF &sourceRect,
6758	const QRect &clip,
6759	int const_alpha);
6760	qScaleFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6761	qScaleFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6762	qScaleFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6763	qScaleFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6764
6765	extern void qt_blend_rgb32_on_rgb32_sse2(uchar destPixels, int* dbpl,
6766	const uchar srcPixels, int* sbpl,
6767	int w, int h,
6768	int const_alpha);
6769	extern void qt_blend_argb32_on_argb32_sse2(uchar destPixels, int* dbpl,
6770	const uchar srcPixels, int* sbpl,
6771	int w, int h,
6772	int const_alpha);
6773
6774	qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6775	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6776	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6777	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6778	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6779	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6780	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6781	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6782
6783	extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint buffer, const* Operator op, const* QSpanData *data,
6784	int y, int x, int length);
6785
6786	qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
6787
6788	extern void QT_FASTCALL comp_func_SourceOver_sse2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
6789	extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint destPixels, int* length, uint color, uint const_alpha);
6790	extern void QT_FASTCALL comp_func_Source_sse2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
6791	extern void QT_FASTCALL comp_func_solid_Source_sse2(uint destPixels, int* length, uint color, uint const_alpha);
6792	extern void QT_FASTCALL comp_func_Plus_sse2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
6793	qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_sse2;
6794	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_sse2;
6795	qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
6796	qt_functionForModeSolid_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_sse2;
6797	qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
6798
6799	#ifdef QT_COMPILER_SUPPORTS_SSSE3
6800	if (qCpuHasFeature(SSSE3)) {
6801	extern void qt_blend_argb32_on_argb32_ssse3(uchar destPixels, int* dbpl,
6802	const uchar srcPixels, int* sbpl,
6803	int w, int h,
6804	int const_alpha);
6805
6806	extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint buffer, const* Operator , const* QSpanData *data,
6807	int y, int x, int length);
6808	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6809	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6810	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6811	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6812	sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3;
6813	extern void QT_FASTCALL rbSwap_888_ssse3(uchar dst, const* uchar src, int* count);
6814	qPixelLayouts[QImage::Format_RGB888].rbSwap = rbSwap_888_ssse3;
6815	qPixelLayouts[QImage::Format_BGR888].rbSwap = rbSwap_888_ssse3;
6816	}
6817	#endif // SSSE3
6818
6819	#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
6820	if (qCpuHasFeature(SSE4_1)) {
6821	extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint buffer, int* count, const QVector<QRgb> *);
6822	extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint buffer, int* count, const QVector<QRgb> *);
6823	extern const uint QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint buffer, const uchar src, int* index, int count,
6824	const QVector<QRgb> , QDitherInfo );
6825	extern const uint QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint buffer, const uchar src, int* index, int count,
6826	const QVector<QRgb> , QDitherInfo );
6827	extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 buffer, const* uint src, int* count,
6828	const QVector<QRgb> , QDitherInfo );
6829	extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 buffer, const* uint src, int* count,
6830	const QVector<QRgb> , QDitherInfo );
6831	extern const QRgba64 QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 buffer, const uchar src, int* index, int count,
6832	const QVector<QRgb> , QDitherInfo );
6833	extern const QRgba64 QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 buffer, const uchar src, int* index, int count,
6834	const QVector<QRgb> , QDitherInfo );
6835	extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
6836	const QVector<QRgb> , QDitherInfo );
6837	extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
6838	const QVector<QRgb> , QDitherInfo );
6839	extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
6840	const QVector<QRgb> , QDitherInfo );
6841	extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar dest, const* QRgba64 src, int* index, int count,
6842	const QVector<QRgb> , QDitherInfo );
6843	extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar dest, const* QRgba64 src, int* index, int count,
6844	const QVector<QRgb> , QDitherInfo );
6845	extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length);
6846	extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length);
6847	# ifndef __AVX2__
6848	qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
6849	qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
6850	qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4;
6851	qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
6852	qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_sse4;
6853	qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_sse4;
6854	qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6855	qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6856	qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6857	qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6858	# endif
6859	qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4;
6860	qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4;
6861	qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4;
6862	qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>;
6863	qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>;
6864	qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4;
6865	qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4;
6866	#if QT_CONFIG(raster_64bit)
6867	destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4;
6868	destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4;
6869	#endif
6870	}
6871	#endif
6872
6873	#if defined(QT_COMPILER_SUPPORTS_AVX2)
6874	if (qCpuHasFeature(ArchHaswell)) {
6875	qt_memfill32 = qt_memfill32_avx2;
6876	qt_memfill64 = qt_memfill64_avx2;
6877	extern void qt_blend_rgb32_on_rgb32_avx2(uchar destPixels, int* dbpl,
6878	const uchar srcPixels, int* sbpl,
6879	int w, int h, int const_alpha);
6880	extern void qt_blend_argb32_on_argb32_avx2(uchar destPixels, int* dbpl,
6881	const uchar srcPixels, int* sbpl,
6882	int w, int h, int const_alpha);
6883	qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6884	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6885	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6886	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6887	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6888	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6889	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6890	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6891
6892	extern void QT_FASTCALL comp_func_Source_avx2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
6893	extern void QT_FASTCALL comp_func_SourceOver_avx2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
6894	extern void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint destPixels, int* length, uint color, uint const_alpha);
6895	qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_avx2;
6896	qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_avx2;
6897	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2;
6898	#if QT_CONFIG(raster_64bit)
6899	extern void QT_FASTCALL comp_func_Source_rgb64_avx2(QRgba64 destPixels, const* QRgba64 srcPixels, int* length, uint const_alpha);
6900	extern void QT_FASTCALL comp_func_SourceOver_rgb64_avx2(QRgba64 destPixels, const* QRgba64 srcPixels, int* length, uint const_alpha);
6901	extern void QT_FASTCALL comp_func_solid_SourceOver_rgb64_avx2(QRgba64 destPixels, int* length, QRgba64 color, uint const_alpha);
6902	qt_functionForMode64_C[QPainter::CompositionMode_Source] = comp_func_Source_rgb64_avx2;
6903	qt_functionForMode64_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgb64_avx2;
6904	qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2;
6905	#endif
6906
6907	extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint b, uint end, const QTextureData &image,
6908	int &fx, int &fy, int fdx, int /fdy/);
6909	extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint b, uint end, const QTextureData &image,
6910	int &fx, int &fy, int fdx, int /fdy/);
6911	extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint b, uint end, const QTextureData &image,
6912	int &fx, int &fy, int fdx, int fdy);
6913
6914	bilinearFastTransformHelperARGB32PM[`0`][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2;
6915	bilinearFastTransformHelperARGB32PM[`0`][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2;
6916	bilinearFastTransformHelperARGB32PM[`0`][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2;
6917
6918	extern void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint buffer, int* count, const QVector<QRgb> *);
6919	extern void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint buffer, int* count, const QVector<QRgb> *);
6920	extern const uint QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint buffer, const uchar src, int* index, int count,
6921	const QVector<QRgb> , QDitherInfo );
6922	extern const uint QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint buffer, const uchar src, int* index, int count,
6923	const QVector<QRgb> , QDitherInfo );
6924	qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_avx2;
6925	qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
6926	qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2;
6927	qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
6928
6929	#if QT_CONFIG(raster_64bit)
6930	extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 , const* uint , int, const* QVector<QRgb> , QDitherInfo );
6931	extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 , const* uint , int* count, const QVector<QRgb> , QDitherInfo );
6932	extern const QRgba64 QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 , const uchar , int, int, const* QVector<QRgb> , QDitherInfo );
6933	extern const QRgba64 QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 , const uchar , int, int, const* QVector<QRgb> , QDitherInfo );
6934	qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2;
6935	qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2;
6936	qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2;
6937	qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2;
6938	#endif
6939	}
6940	#endif
6941
6942	#endif // SSE2
6943
6944	#if defined(__ARM_NEON__)
6945	qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6946	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6947	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6948	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6949	#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6950	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6951	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6952	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6953	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6954	#endif
6955
6956	qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
6957	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
6958	qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
6959
6960	extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint buffer, const* Operator op, const* QSpanData *data,
6961	int y, int x, int length);
6962
6963	qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
6964
6965	sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
6966
6967	#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6968	extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint buffer, int* count, const QVector<QRgb> *);
6969	extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint buffer, int* count, const QVector<QRgb> *);
6970	extern const uint QT_FASTCALL fetchARGB32ToARGB32PM_neon(uint buffer, const uchar src, int* index, int count,
6971	const QVector<QRgb> , QDitherInfo );
6972	extern const uint QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint buffer, const uchar src, int* index, int count,
6973	const QVector<QRgb> , QDitherInfo );
6974	extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 buffer, const* uint src, int* count,
6975	const QVector<QRgb> , QDitherInfo );
6976	extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 buffer, const* uint src, int* count,
6977	const QVector<QRgb> , QDitherInfo );
6978	extern const QRgba64 QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 buffer, const uchar src, int* index, int count,
6979	const QVector<QRgb> , QDitherInfo );
6980	extern const QRgba64 QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 buffer, const uchar src, int* index, int count,
6981	const QVector<QRgb> , QDitherInfo );
6982	extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar dest, const* uint src, int* index, int count,
6983	const QVector<QRgb> , QDitherInfo );
6984	extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar dest, const* uint src, int* index, int count,
6985	const QVector<QRgb> , QDitherInfo );
6986	extern void QT_FASTCALL storeRGBXFromARGB32PM_neon(uchar dest, const* uint src, int* index, int count,
6987	const QVector<QRgb> , QDitherInfo );
6988	qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon;
6989	qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
6990	qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon;
6991	qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_neon;
6992	qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_neon;
6993	qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon;
6994	qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
6995	qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon;
6996	qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6997	qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6998	qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon;
6999	qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
7000	qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
7001	#endif
7002
7003	#if defined(ENABLE_PIXMAN_DRAWHELPERS)
7004	// The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
7005	qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
7006	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
7007	qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
7008
7009	qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
7010	qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
7011
7012	qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
7013	qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
7014
7015	qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
7016
7017	destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
7018	destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
7019
7020	qMemRotateFunctions[QPixelLayout::BPP16][`0`] = qt_memrotate90_16_neon;
7021	qMemRotateFunctions[QPixelLayout::BPP16][`2`] = qt_memrotate270_16_neon;
7022	#endif
7023	#endif // defined(__ARM_NEON__)
7024
7025	#if defined(__MIPS_DSP__)
7026	// Composition functions are all DSP r1
7027	qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
7028	qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
7029	qt_functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
7030	qt_functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
7031	qt_functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
7032	qt_functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
7033	qt_functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
7034	qt_functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
7035	qt_functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
7036	qt_functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
7037
7038	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
7039	qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
7040	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
7041	qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
7042	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
7043	qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
7044	qt_functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
7045	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
7046
7047	qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
7048	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
7049	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
7050	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
7051
7052	destFetchProc[QImage::Format_ARGB32] = qt_destFetchARGB32_mips_dsp;
7053
7054	destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
7055
7056	sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
7057	sourceFetchUntransformed[QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
7058	sourceFetchUntransformed[QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
7059
7060	#if defined(__MIPS_DSPR2__)
7061	qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
7062	sourceFetchUntransformed[QImage::Format_RGB16] = qt_fetchUntransformedRGB16_mips_dspr2;
7063	#else
7064	qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
7065	#endif // defined(__MIPS_DSPR2__)
7066	#endif // defined(__MIPS_DSP__)
7067	}
7068
7069	// Ensure initialization if this object file is linked.
7070	Q_CONSTRUCTOR_FUNCTION(qInitDrawhelperFunctions);
7071
7072	QT_END_NAMESPACE
7073

source code of qtbase/src/gui/painting/qdrawhelper.cpp