1// Copyright (C) 2022 The Qt Company Ltd.
2// Copyright (C) 2018 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#include "qdrawhelper_p.h"
6
7#include <qstylehints.h>
8#include <qguiapplication.h>
9#include <qatomic.h>
10#include <private/qcolortransform_p.h>
11#include <private/qcolortrclut_p.h>
12#include <private/qdrawhelper_p.h>
13#include <private/qdrawhelper_x86_p.h>
14#include <private/qdrawingprimitive_sse2_p.h>
15#include <private/qdrawhelper_neon_p.h>
16#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
17#include <private/qdrawhelper_mips_dsp_p.h>
18#endif
19#include <private/qguiapplication_p.h>
20#include <private/qpaintengine_raster_p.h>
21#include <private/qpainter_p.h>
22#include <private/qpixellayout_p.h>
23#include <private/qrgba64_p.h>
24#include <qendian.h>
25#include <qloggingcategory.h>
26#include <qmath.h>
27
28#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
29#define QT_USE_THREAD_PARALLEL_FILLS
30#endif
31
32#if defined(QT_USE_THREAD_PARALLEL_FILLS)
33#include <qsemaphore.h>
34#include <qthreadpool.h>
35#include <private/qthreadpool_p.h>
36#endif
37
38QT_BEGIN_NAMESPACE
39
40Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
41
42#define MASK(src, a) src = BYTE_MUL(src, a)
43
44/*
45 constants and structures
46*/
47
48constexpr int fixed_scale = 1 << 16;
49constexpr int half_point = 1 << 15;
50
51template <QPixelLayout::BPP bpp> static
52inline uint QT_FASTCALL fetch1Pixel(const uchar *, int)
53{
54 Q_UNREACHABLE_RETURN(0);
55}
56
57template <>
58inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP1LSB>(const uchar *src, int index)
59{
60 return (src[index >> 3] >> (index & 7)) & 1;
61}
62
63template <>
64inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP1MSB>(const uchar *src, int index)
65{
66 return (src[index >> 3] >> (~index & 7)) & 1;
67}
68
69template <>
70inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP8>(const uchar *src, int index)
71{
72 return src[index];
73}
74
75template <>
76inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP16>(const uchar *src, int index)
77{
78 return reinterpret_cast<const quint16 *>(src)[index];
79}
80
81template <>
82inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP24>(const uchar *src, int index)
83{
84 return reinterpret_cast<const quint24 *>(src)[index];
85}
86
87template <>
88inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP32>(const uchar *src, int index)
89{
90 return reinterpret_cast<const uint *>(src)[index];
91}
92
93template <>
94inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP64>(const uchar *src, int index)
95{
96 // We have to do the conversion in fetch to fit into a 32bit uint
97 QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
98 return c.toArgb32();
99}
100
101template <>
102inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP16FPx4>(const uchar *src, int index)
103{
104 // We have to do the conversion in fetch to fit into a 32bit uint
105 QRgbaFloat16 c = reinterpret_cast<const QRgbaFloat16 *>(src)[index];
106 return c.toArgb32();
107}
108
109template <>
110inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP32FPx4>(const uchar *src, int index)
111{
112 // We have to do the conversion in fetch to fit into a 32bit uint
113 QRgbaFloat32 c = reinterpret_cast<const QRgbaFloat32 *>(src)[index];
114 return c.toArgb32();
115}
116
117typedef uint (QT_FASTCALL *Fetch1PixelFunc)(const uchar *src, int index);
118
119constexpr Fetch1PixelFunc fetch1PixelTable[QPixelLayout::BPPCount] = {
120 nullptr, // BPPNone
121 fetch1Pixel<QPixelLayout::BPP1MSB>,
122 fetch1Pixel<QPixelLayout::BPP1LSB>,
123 fetch1Pixel<QPixelLayout::BPP8>,
124 fetch1Pixel<QPixelLayout::BPP16>,
125 fetch1Pixel<QPixelLayout::BPP24>,
126 fetch1Pixel<QPixelLayout::BPP32>,
127 fetch1Pixel<QPixelLayout::BPP64>,
128 fetch1Pixel<QPixelLayout::BPP16FPx4>,
129 fetch1Pixel<QPixelLayout::BPP32FPx4>,
130};
131
132#if QT_CONFIG(raster_64bit)
133static void QT_FASTCALL convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count)
134{
135 for (int i = 0; i < count; ++i)
136 buffer[i] = buffer[i].premultiplied();
137}
138
139static void QT_FASTCALL convertRGBA64PMToRGBA64PM(QRgba64 *, int)
140{
141}
142
143static void QT_FASTCALL convertRGBA16FToRGBA64PM(QRgba64 *buffer, int count)
144{
145 const QRgbaFloat16 *in = reinterpret_cast<const QRgbaFloat16 *>(buffer);
146 for (int i = 0; i < count; ++i) {
147 QRgbaFloat16 c = in[i];
148 buffer[i] = QRgba64::fromRgba64(red: c.red16(), green: c.green16(), blue: c.blue16(), alpha: c.alpha16()).premultiplied();
149 }
150}
151
152static void QT_FASTCALL convertRGBA16FPMToRGBA64PM(QRgba64 *buffer, int count)
153{
154 const QRgbaFloat16 *in = reinterpret_cast<const QRgbaFloat16 *>(buffer);
155 for (int i = 0; i < count; ++i) {
156 QRgbaFloat16 c = in[i];
157 buffer[i] = QRgba64::fromRgba64(red: c.red16(), green: c.green16(), blue: c.blue16(), alpha: c.alpha16());
158 }
159}
160
161static void QT_FASTCALL convertRGBA32FToRGBA64PM(QRgba64 *buffer, int count)
162{
163 const QRgbaFloat32 *in = reinterpret_cast<const QRgbaFloat32 *>(buffer);
164 for (int i = 0; i < count; ++i) {
165 QRgbaFloat32 c = in[i];
166 buffer[i] = QRgba64::fromRgba64(red: c.red16(), green: c.green16(), blue: c.blue16(), alpha: c.alpha16()).premultiplied();
167 }
168}
169
170static void QT_FASTCALL convertRGBA32FPMToRGBA64PM(QRgba64 *buffer, int count)
171{
172 const QRgbaFloat32 *in = reinterpret_cast<const QRgbaFloat32 *>(buffer);
173 for (int i = 0; i < count; ++i) {
174 QRgbaFloat32 c = in[i];
175 buffer[i] = QRgba64::fromRgba64(red: c.red16(), green: c.green16(), blue: c.blue16(), alpha: c.alpha16());
176 }
177}
178
179static Convert64Func convert64ToRGBA64PM[] = {
180 nullptr,
181 nullptr,
182 nullptr,
183 nullptr,
184 nullptr,
185 nullptr,
186 nullptr,
187 nullptr,
188 nullptr,
189 nullptr,
190 nullptr,
191 nullptr,
192 nullptr,
193 nullptr,
194 nullptr,
195 nullptr,
196 nullptr,
197 nullptr,
198 nullptr,
199 nullptr,
200 nullptr,
201 nullptr,
202 nullptr,
203 nullptr,
204 nullptr,
205 convertRGBA64PMToRGBA64PM,
206 convertRGBA64ToRGBA64PM,
207 convertRGBA64PMToRGBA64PM,
208 nullptr,
209 nullptr,
210 convertRGBA16FPMToRGBA64PM,
211 convertRGBA16FToRGBA64PM,
212 convertRGBA16FPMToRGBA64PM,
213 convertRGBA32FPMToRGBA64PM,
214 convertRGBA32FToRGBA64PM,
215 convertRGBA32FPMToRGBA64PM,
216 nullptr,
217};
218
219static_assert(std::size(convert64ToRGBA64PM) == QImage::NImageFormats);
220#endif
221
222#if QT_CONFIG(raster_fp)
223static void QT_FASTCALL convertRGBA64PMToRGBA32F(QRgbaFloat32 *buffer, const quint64 *src, int count)
224{
225 const auto *in = reinterpret_cast<const QRgba64 *>(src);
226 for (int i = 0; i < count; ++i) {
227 auto c = in[i];
228 buffer[i] = QRgbaFloat32::fromRgba64(red: c.red(), green: c.green(), blue: c.blue(), alpha: c.alpha()).premultiplied();
229 }
230}
231
232static void QT_FASTCALL convertRGBA64ToRGBA32F(QRgbaFloat32 *buffer, const quint64 *src, int count)
233{
234 const auto *in = reinterpret_cast<const QRgba64 *>(src);
235 for (int i = 0; i < count; ++i) {
236 auto c = in[i];
237 buffer[i] = QRgbaFloat32::fromRgba64(red: c.red(), green: c.green(), blue: c.blue(), alpha: c.alpha());
238 }
239}
240
241static void QT_FASTCALL convertRGBA16FPMToRGBA32F(QRgbaFloat32 *buffer, const quint64 *src, int count)
242{
243 qFloatFromFloat16((float *)buffer, (const qfloat16 *)src, length: count * 4);
244 for (int i = 0; i < count; ++i)
245 buffer[i] = buffer[i].premultiplied();
246}
247
248static void QT_FASTCALL convertRGBA16FToRGBA32F(QRgbaFloat32 *buffer, const quint64 *src, int count)
249{
250 qFloatFromFloat16((float *)buffer, (const qfloat16 *)src, length: count * 4);
251}
252
253static Convert64ToFPFunc convert64ToRGBA32F[] = {
254 nullptr,
255 nullptr,
256 nullptr,
257 nullptr,
258 nullptr,
259 nullptr,
260 nullptr,
261 nullptr,
262 nullptr,
263 nullptr,
264 nullptr,
265 nullptr,
266 nullptr,
267 nullptr,
268 nullptr,
269 nullptr,
270 nullptr,
271 nullptr,
272 nullptr,
273 nullptr,
274 nullptr,
275 nullptr,
276 nullptr,
277 nullptr,
278 nullptr,
279 convertRGBA64ToRGBA32F,
280 convertRGBA64PMToRGBA32F,
281 convertRGBA64ToRGBA32F,
282 nullptr,
283 nullptr,
284 convertRGBA16FToRGBA32F,
285 convertRGBA16FPMToRGBA32F,
286 convertRGBA16FToRGBA32F,
287 nullptr,
288 nullptr,
289 nullptr,
290 nullptr,
291};
292
293static_assert(std::size(convert64ToRGBA32F) == QImage::NImageFormats);
294
295static void convertRGBA32FToRGBA32FPM(QRgbaFloat32 *buffer, int count)
296{
297 for (int i = 0; i < count; ++i)
298 buffer[i] = buffer[i].premultiplied();
299}
300
301static void convertRGBA32FToRGBA32F(QRgbaFloat32 *, int)
302{
303}
304
305#endif
306
307/*
308 Destination fetch. This is simple as we don't have to do bounds checks or
309 transformations
310*/
311
312static uint * QT_FASTCALL destFetchMono(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
313{
314 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
315 uint *start = buffer;
316 const uint *end = buffer + length;
317 while (buffer < end) {
318 *buffer = data[x>>3] & (0x80 >> (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
319 ++buffer;
320 ++x;
321 }
322 return start;
323}
324
325static uint * QT_FASTCALL destFetchMonoLsb(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
326{
327 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
328 uint *start = buffer;
329 const uint *end = buffer + length;
330 while (buffer < end) {
331 *buffer = data[x>>3] & (0x1 << (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
332 ++buffer;
333 ++x;
334 }
335 return start;
336}
337
338static uint * QT_FASTCALL destFetchARGB32P(uint *, QRasterBuffer *rasterBuffer, int x, int y, int)
339{
340 return (uint *)rasterBuffer->scanLine(y) + x;
341}
342
343static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
344{
345 const ushort *Q_DECL_RESTRICT data = (const ushort *)rasterBuffer->scanLine(y) + x;
346 for (int i = 0; i < length; ++i)
347 buffer[i] = qConvertRgb16To32(c: data[i]);
348 return buffer;
349}
350
351static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
352{
353 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
354 return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
355}
356
357static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int)
358{
359 return buffer;
360}
361
362static DestFetchProc destFetchProc[] =
363{
364 nullptr, // Format_Invalid
365 destFetchMono, // Format_Mono,
366 destFetchMonoLsb, // Format_MonoLSB
367 nullptr, // Format_Indexed8
368 destFetchARGB32P, // Format_RGB32
369 destFetch, // Format_ARGB32,
370 destFetchARGB32P, // Format_ARGB32_Premultiplied
371 destFetchRGB16, // Format_RGB16
372 destFetch, // Format_ARGB8565_Premultiplied
373 destFetch, // Format_RGB666
374 destFetch, // Format_ARGB6666_Premultiplied
375 destFetch, // Format_RGB555
376 destFetch, // Format_ARGB8555_Premultiplied
377 destFetch, // Format_RGB888
378 destFetch, // Format_RGB444
379 destFetch, // Format_ARGB4444_Premultiplied
380 destFetch, // Format_RGBX8888
381 destFetch, // Format_RGBA8888
382 destFetch, // Format_RGBA8888_Premultiplied
383 destFetch, // Format_BGR30
384 destFetch, // Format_A2BGR30_Premultiplied
385 destFetch, // Format_RGB30
386 destFetch, // Format_A2RGB30_Premultiplied
387 destFetch, // Format_Alpha8
388 destFetch, // Format_Grayscale8
389 destFetch, // Format_RGBX64
390 destFetch, // Format_RGBA64
391 destFetch, // Format_RGBA64_Premultiplied
392 destFetch, // Format_Grayscale16
393 destFetch, // Format_BGR888
394 destFetch, // Format_RGBX16FPx4
395 destFetch, // Format_RGBA16FPx4
396 destFetch, // Format_RGBA16FPx4_Premultiplied
397 destFetch, // Format_RGBX32FPx4
398 destFetch, // Format_RGBA32FPx4
399 destFetch, // Format_RGBA32FPx4_Premultiplied
400 destFetch, // Format_CMYK8888
401};
402
403static_assert(std::size(destFetchProc) == QImage::NImageFormats);
404
405#if QT_CONFIG(raster_64bit)
406static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
407{
408 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
409 return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
410}
411
412static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int)
413{
414 return (QRgba64 *)rasterBuffer->scanLine(y) + x;
415}
416
417static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int)
418{
419 return buffer;
420}
421
422static DestFetchProc64 destFetchProc64[] =
423{
424 nullptr, // Format_Invalid
425 nullptr, // Format_Mono,
426 nullptr, // Format_MonoLSB
427 nullptr, // Format_Indexed8
428 destFetch64, // Format_RGB32
429 destFetch64, // Format_ARGB32,
430 destFetch64, // Format_ARGB32_Premultiplied
431 destFetch64, // Format_RGB16
432 destFetch64, // Format_ARGB8565_Premultiplied
433 destFetch64, // Format_RGB666
434 destFetch64, // Format_ARGB6666_Premultiplied
435 destFetch64, // Format_RGB555
436 destFetch64, // Format_ARGB8555_Premultiplied
437 destFetch64, // Format_RGB888
438 destFetch64, // Format_RGB444
439 destFetch64, // Format_ARGB4444_Premultiplied
440 destFetch64, // Format_RGBX8888
441 destFetch64, // Format_RGBA8888
442 destFetch64, // Format_RGBA8888_Premultiplied
443 destFetch64, // Format_BGR30
444 destFetch64, // Format_A2BGR30_Premultiplied
445 destFetch64, // Format_RGB30
446 destFetch64, // Format_A2RGB30_Premultiplied
447 destFetch64, // Format_Alpha8
448 destFetch64, // Format_Grayscale8
449 destFetchRGB64, // Format_RGBX64
450 destFetch64, // Format_RGBA64
451 destFetchRGB64, // Format_RGBA64_Premultiplied
452 destFetch64, // Format_Grayscale16
453 destFetch64, // Format_BGR888
454 destFetch64, // Format_RGBX16FPx4
455 destFetch64, // Format_RGBA16FPx4
456 destFetch64, // Format_RGBA16FPx4_Premultiplied
457 destFetch64, // Format_RGBX32FPx4
458 destFetch64, // Format_RGBA32FPx4
459 destFetch64, // Format_RGBA32FPx4_Premultiplied
460 destFetch64, // Format_CMYK8888
461};
462
463static_assert(std::size(destFetchProc64) == QImage::NImageFormats);
464#endif
465
466#if QT_CONFIG(raster_fp)
467static QRgbaFloat32 *QT_FASTCALL destFetchFP(QRgbaFloat32 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
468{
469 return const_cast<QRgbaFloat32 *>(qFetchToRGBA32F[rasterBuffer->format](buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
470}
471
472static QRgbaFloat32 *QT_FASTCALL destFetchRGBFP(QRgbaFloat32 *, QRasterBuffer *rasterBuffer, int x, int y, int)
473{
474 return reinterpret_cast<QRgbaFloat32 *>(rasterBuffer->scanLine(y)) + x;
475}
476
477static QRgbaFloat32 *QT_FASTCALL destFetchFPUndefined(QRgbaFloat32 *buffer, QRasterBuffer *, int, int, int)
478{
479 return buffer;
480}
481static DestFetchProcFP destFetchProcFP[] =
482{
483 nullptr, // Format_Invalid
484 nullptr, // Format_Mono,
485 nullptr, // Format_MonoLSB
486 nullptr, // Format_Indexed8
487 destFetchFP, // Format_RGB32
488 destFetchFP, // Format_ARGB32,
489 destFetchFP, // Format_ARGB32_Premultiplied
490 destFetchFP, // Format_RGB16
491 destFetchFP, // Format_ARGB8565_Premultiplied
492 destFetchFP, // Format_RGB666
493 destFetchFP, // Format_ARGB6666_Premultiplied
494 destFetchFP, // Format_RGB555
495 destFetchFP, // Format_ARGB8555_Premultiplied
496 destFetchFP, // Format_RGB888
497 destFetchFP, // Format_RGB444
498 destFetchFP, // Format_ARGB4444_Premultiplied
499 destFetchFP, // Format_RGBX8888
500 destFetchFP, // Format_RGBA8888
501 destFetchFP, // Format_RGBA8888_Premultiplied
502 destFetchFP, // Format_BGR30
503 destFetchFP, // Format_A2BGR30_Premultiplied
504 destFetchFP, // Format_RGB30
505 destFetchFP, // Format_A2RGB30_Premultiplied
506 destFetchFP, // Format_Alpha8
507 destFetchFP, // Format_Grayscale8
508 destFetchFP, // Format_RGBX64
509 destFetchFP, // Format_RGBA64
510 destFetchFP, // Format_RGBA64_Premultiplied
511 destFetchFP, // Format_Grayscale16
512 destFetchFP, // Format_BGR888
513 destFetchFP, // Format_RGBX16FPx4
514 destFetchFP, // Format_RGBA16FPx4
515 destFetchFP, // Format_RGBA16FPx4_Premultiplied
516 destFetchRGBFP, // Format_RGBX32FPx4
517 destFetchFP, // Format_RGBA32FPx4
518 destFetchRGBFP, // Format_RGBA32FPx4_Premultiplied
519 destFetchFP, // Format_CMYK8888
520};
521
522static_assert(std::size(destFetchProcFP) == QImage::NImageFormats);
523#endif
524
525/*
526 Returns the color in the mono destination color table
527 that is the "nearest" to /color/.
528*/
529static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf)
530{
531 const QRgb color_0 = rbuf->destColor0;
532 const QRgb color_1 = rbuf->destColor1;
533
534 int r = qRed(rgb: color);
535 int g = qGreen(rgb: color);
536 int b = qBlue(rgb: color);
537 int rx, gx, bx;
538 int dist_0, dist_1;
539
540 rx = r - qRed(rgb: color_0);
541 gx = g - qGreen(rgb: color_0);
542 bx = b - qBlue(rgb: color_0);
543 dist_0 = rx*rx + gx*gx + bx*bx;
544
545 rx = r - qRed(rgb: color_1);
546 gx = g - qGreen(rgb: color_1);
547 bx = b - qBlue(rgb: color_1);
548 dist_1 = rx*rx + gx*gx + bx*bx;
549
550 if (dist_0 < dist_1)
551 return color_0;
552 return color_1;
553}
554
555/*
556 Destination store.
557*/
558
559static void QT_FASTCALL destStoreMono(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
560{
561 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
562 if (rasterBuffer->monoDestinationWithClut) {
563 for (int i = 0; i < length; ++i) {
564 if (buffer[i] == rasterBuffer->destColor0) {
565 data[x >> 3] &= ~(0x80 >> (x & 7));
566 } else if (buffer[i] == rasterBuffer->destColor1) {
567 data[x >> 3] |= 0x80 >> (x & 7);
568 } else if (findNearestColor(color: buffer[i], rbuf: rasterBuffer) == rasterBuffer->destColor0) {
569 data[x >> 3] &= ~(0x80 >> (x & 7));
570 } else {
571 data[x >> 3] |= 0x80 >> (x & 7);
572 }
573 ++x;
574 }
575 } else {
576 for (int i = 0; i < length; ++i) {
577 if (qGray(rgb: buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
578 data[x >> 3] |= 0x80 >> (x & 7);
579 else
580 data[x >> 3] &= ~(0x80 >> (x & 7));
581 ++x;
582 }
583 }
584}
585
586static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
587{
588 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
589 if (rasterBuffer->monoDestinationWithClut) {
590 for (int i = 0; i < length; ++i) {
591 if (buffer[i] == rasterBuffer->destColor0) {
592 data[x >> 3] &= ~(1 << (x & 7));
593 } else if (buffer[i] == rasterBuffer->destColor1) {
594 data[x >> 3] |= 1 << (x & 7);
595 } else if (findNearestColor(color: buffer[i], rbuf: rasterBuffer) == rasterBuffer->destColor0) {
596 data[x >> 3] &= ~(1 << (x & 7));
597 } else {
598 data[x >> 3] |= 1 << (x & 7);
599 }
600 ++x;
601 }
602 } else {
603 for (int i = 0; i < length; ++i) {
604 if (qGray(rgb: buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
605 data[x >> 3] |= 1 << (x & 7);
606 else
607 data[x >> 3] &= ~(1 << (x & 7));
608 ++x;
609 }
610 }
611}
612
613static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
614{
615 quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x;
616 for (int i = 0; i < length; ++i)
617 data[i] = qConvertRgb32To16(c: buffer[i]);
618}
619
620static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
621{
622 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
623 ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
624 if (!layout->premultiplied && !layout->hasAlphaChannel)
625 store = layout->storeFromRGB32;
626 uchar *dest = rasterBuffer->scanLine(y);
627 store(dest, buffer, x, length, nullptr, nullptr);
628}
629
630static void QT_FASTCALL destStoreGray8(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
631{
632 uchar *data = rasterBuffer->scanLine(y) + x;
633
634 bool failed = false;
635 for (int k = 0; k < length; ++k) {
636 if (!qIsGray(rgb: buffer[k])) {
637 failed = true;
638 break;
639 }
640 data[k] = qRed(rgb: buffer[k]);
641 }
642 if (failed) { // Non-gray colors
643 QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb;
644 QColorTransform tf = QColorSpacePrivate::get(colorSpace&: fromCS)->transformationToXYZ();
645 QColorTransformPrivate *tfd = QColorTransformPrivate::get(q: tf);
646
647 tfd->apply(dst: data, src: buffer, count: length, flags: QColorTransformPrivate::InputPremultiplied);
648 }
649}
650
651static void QT_FASTCALL destStoreGray16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
652{
653 quint16 *data = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
654
655 bool failed = false;
656 for (int k = 0; k < length; ++k) {
657 if (!qIsGray(rgb: buffer[k])) {
658 failed = true;
659 break;
660 }
661 data[k] = qRed(rgb: buffer[k]) * 257;
662 }
663 if (failed) { // Non-gray colors
664 QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb;
665 QColorTransform tf = QColorSpacePrivate::get(colorSpace&: fromCS)->transformationToXYZ();
666 QColorTransformPrivate *tfd = QColorTransformPrivate::get(q: tf);
667
668 QRgba64 tmp_line[BufferSize];
669 for (int k = 0; k < length; ++k)
670 tmp_line[k] = QRgba64::fromArgb32(rgb: buffer[k]);
671 tfd->apply(dst: data, src: tmp_line, count: length, flags: QColorTransformPrivate::InputPremultiplied);
672 }
673}
674
675static DestStoreProc destStoreProc[] =
676{
677 nullptr, // Format_Invalid
678 destStoreMono, // Format_Mono,
679 destStoreMonoLsb, // Format_MonoLSB
680 nullptr, // Format_Indexed8
681 nullptr, // Format_RGB32
682 destStore, // Format_ARGB32,
683 nullptr, // Format_ARGB32_Premultiplied
684 destStoreRGB16, // Format_RGB16
685 destStore, // Format_ARGB8565_Premultiplied
686 destStore, // Format_RGB666
687 destStore, // Format_ARGB6666_Premultiplied
688 destStore, // Format_RGB555
689 destStore, // Format_ARGB8555_Premultiplied
690 destStore, // Format_RGB888
691 destStore, // Format_RGB444
692 destStore, // Format_ARGB4444_Premultiplied
693 destStore, // Format_RGBX8888
694 destStore, // Format_RGBA8888
695 destStore, // Format_RGBA8888_Premultiplied
696 destStore, // Format_BGR30
697 destStore, // Format_A2BGR30_Premultiplied
698 destStore, // Format_RGB30
699 destStore, // Format_A2RGB30_Premultiplied
700 destStore, // Format_Alpha8
701 destStoreGray8, // Format_Grayscale8
702 destStore, // Format_RGBX64
703 destStore, // Format_RGBA64
704 destStore, // Format_RGBA64_Premultiplied
705 destStoreGray16, // Format_Grayscale16
706 destStore, // Format_BGR888
707 destStore, // Format_RGBX16FPx4
708 destStore, // Format_RGBA16FPx4
709 destStore, // Format_RGBA16FPx4_Premultiplied
710 destStore, // Format_RGBX32FPx4
711 destStore, // Format_RGBA32FPx4
712 destStore, // Format_RGBA32FPx4_Premultiplied
713 destStore, // Format_CMYK8888
714};
715
716static_assert(std::size(destStoreProc) == QImage::NImageFormats);
717
718#if QT_CONFIG(raster_64bit)
719static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
720{
721 auto store = qStoreFromRGBA64PM[rasterBuffer->format];
722 uchar *dest = rasterBuffer->scanLine(y);
723 store(dest, buffer, x, length, nullptr, nullptr);
724}
725
726static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
727{
728 QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x;
729 for (int i = 0; i < length; ++i) {
730 dest[i] = buffer[i].unpremultiplied();
731 }
732}
733
734static void QT_FASTCALL destStore64Gray8(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
735{
736 uchar *data = rasterBuffer->scanLine(y) + x;
737
738 bool failed = false;
739 for (int k = 0; k < length; ++k) {
740 if (buffer[k].red() != buffer[k].green() || buffer[k].red() != buffer[k].blue()) {
741 failed = true;
742 break;
743 }
744 data[k] = buffer[k].red8();
745 }
746 if (failed) { // Non-gray colors
747 QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb;
748 QColorTransform tf = QColorSpacePrivate::get(colorSpace&: fromCS)->transformationToXYZ();
749 QColorTransformPrivate *tfd = QColorTransformPrivate::get(q: tf);
750
751 quint16 gray_line[BufferSize];
752 tfd->apply(dst: gray_line, src: buffer, count: length, flags: QColorTransformPrivate::InputPremultiplied);
753 for (int k = 0; k < length; ++k)
754 data[k] = qt_div_257(x: gray_line[k]);
755 }
756}
757
758static void QT_FASTCALL destStore64Gray16(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
759{
760 quint16 *data = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
761
762 bool failed = false;
763 for (int k = 0; k < length; ++k) {
764 if (buffer[k].red() != buffer[k].green() || buffer[k].red() != buffer[k].blue()) {
765 failed = true;
766 break;
767 }
768 data[k] = buffer[k].red();
769 }
770 if (failed) { // Non-gray colors
771 QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb;
772 QColorTransform tf = QColorSpacePrivate::get(colorSpace&: fromCS)->transformationToXYZ();
773 QColorTransformPrivate *tfd = QColorTransformPrivate::get(q: tf);
774 tfd->apply(dst: data, src: buffer, count: length, flags: QColorTransformPrivate::InputPremultiplied);
775 }
776}
777
778static DestStoreProc64 destStoreProc64[] =
779{
780 nullptr, // Format_Invalid
781 nullptr, // Format_Mono,
782 nullptr, // Format_MonoLSB
783 nullptr, // Format_Indexed8
784 destStore64, // Format_RGB32
785 destStore64, // Format_ARGB32,
786 destStore64, // Format_ARGB32_Premultiplied
787 destStore64, // Format_RGB16
788 destStore64, // Format_ARGB8565_Premultiplied
789 destStore64, // Format_RGB666
790 destStore64, // Format_ARGB6666_Premultiplied
791 destStore64, // Format_RGB555
792 destStore64, // Format_ARGB8555_Premultiplied
793 destStore64, // Format_RGB888
794 destStore64, // Format_RGB444
795 destStore64, // Format_ARGB4444_Premultiplied
796 destStore64, // Format_RGBX8888
797 destStore64, // Format_RGBA8888
798 destStore64, // Format_RGBA8888_Premultiplied
799 destStore64, // Format_BGR30
800 destStore64, // Format_A2BGR30_Premultiplied
801 destStore64, // Format_RGB30
802 destStore64, // Format_A2RGB30_Premultiplied
803 destStore64, // Format_Alpha8
804 destStore64Gray8, // Format_Grayscale8
805 nullptr, // Format_RGBX64
806 destStore64RGBA64, // Format_RGBA64
807 nullptr, // Format_RGBA64_Premultiplied
808 destStore64Gray16, // Format_Grayscale16
809 destStore64, // Format_BGR888
810 destStore64, // Format_RGBX16FPx4
811 destStore64, // Format_RGBA16FPx4
812 destStore64, // Format_RGBA16FPx4_Premultiplied
813 destStore64, // Format_RGBX32FPx4
814 destStore64, // Format_RGBA32FPx4
815 destStore64, // Format_RGBA32FPx4_Premultiplied
816 destStore64, // Format_CMYK8888
817};
818
819static_assert(std::size(destStoreProc64) == QImage::NImageFormats);
820#endif
821
822#if QT_CONFIG(raster_fp)
823static void QT_FASTCALL destStoreFP(QRasterBuffer *rasterBuffer, int x, int y, const QRgbaFloat32 *buffer, int length)
824{
825 auto store = qStoreFromRGBA32F[rasterBuffer->format];
826 uchar *dest = rasterBuffer->scanLine(y);
827 store(dest, buffer, x, length, nullptr, nullptr);
828}
829#endif
830
831/*
832 Source fetches
833
834 This is a bit more complicated, as we need several fetch routines for every surface type
835
836 We need 5 fetch methods per surface type:
837 untransformed
838 transformed (tiled and not tiled)
839 transformed bilinear (tiled and not tiled)
840
841 We don't need bounds checks for untransformed, but we need them for the other ones.
842
843 The generic implementation does pixel by pixel fetches
844*/
845
846enum TextureBlendType {
847 BlendUntransformed,
848 BlendTiled,
849 BlendTransformed,
850 BlendTransformedTiled,
851 BlendTransformedBilinear,
852 BlendTransformedBilinearTiled,
853 NBlendTypes
854};
855
856static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator *,
857 const QSpanData *data, int y, int x, int length)
858{
859 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
860 return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
861}
862
863static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *,
864 const QSpanData *data, int y, int x, int)
865{
866 const uchar *scanLine = data->texture.scanLine(y);
867 return reinterpret_cast<const uint *>(scanLine) + x;
868}
869
870static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *,
871 const QSpanData *data, int y, int x,
872 int length)
873{
874 const quint16 *scanLine = (const quint16 *)data->texture.scanLine(y) + x;
875 for (int i = 0; i < length; ++i)
876 buffer[i] = qConvertRgb16To32(c: scanLine[i]);
877 return buffer;
878}
879
880#if QT_CONFIG(raster_64bit)
881static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Operator *,
882 const QSpanData *data, int y, int x, int length)
883{
884 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
885 return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
886}
887
888static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *,
889 const QSpanData *data, int y, int x, int)
890{
891 const uchar *scanLine = data->texture.scanLine(y);
892 return reinterpret_cast<const QRgba64 *>(scanLine) + x;
893}
894#endif
895
896#if QT_CONFIG(raster_fp)
897static const QRgbaFloat32 *QT_FASTCALL fetchUntransformedFP(QRgbaFloat32 *buffer, const Operator *,
898 const QSpanData *data, int y, int x, int length)
899{
900 const auto fetch = qFetchToRGBA32F[data->texture.format];
901 return fetch(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
902}
903#endif
904
905template<TextureBlendType blendType>
906inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
907{
908 static_assert(blendType == BlendTransformed || blendType == BlendTransformedTiled);
909 if (blendType == BlendTransformedTiled) {
910 if (v < 0 || v >= max) {
911 v %= max;
912 if (v < 0) v += max;
913 }
914 } else {
915 v = qBound(min: l1, val: v, max: l2);
916 }
917}
918
919static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data)
920{
921 if (Q_UNLIKELY(!data->fast_matrix))
922 return false;
923
924 qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale;
925 qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale;
926 qreal minc = std::min(a: fx, b: fy);
927 qreal maxc = std::max(a: fx, b: fy);
928 fx += std::trunc(x: data->m11 * fixed_scale) * length;
929 fy += std::trunc(x: data->m12 * fixed_scale) * length;
930 minc = std::min(a: minc, b: std::min(a: fx, b: fy));
931 maxc = std::max(a: maxc, b: std::max(a: fx, b: fy));
932
933 return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max();
934}
935
936template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
937static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data,
938 int y, int x, int length)
939{
940 static_assert(blendType == BlendTransformed || blendType == BlendTransformedTiled);
941 const QTextureData &image = data->texture;
942
943 const qreal cx = x + qreal(0.5);
944 const qreal cy = y + qreal(0.5);
945
946 constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
947 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
948 if (!useFetch)
949 Q_ASSERT(layout->bpp == bpp || (layout->bpp == QPixelLayout::BPP16FPx4 && bpp == QPixelLayout::BPP64));
950 // When templated 'fetch' should be inlined at compile time:
951 const Fetch1PixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? fetch1PixelTable[layout->bpp] : Fetch1PixelFunc(fetch1Pixel<bpp>);
952
953 if (canUseFastMatrixPath(cx, cy, length, data)) {
954 // The increment pr x in the scanline
955 int fdx = (int)(data->m11 * fixed_scale);
956 int fdy = (int)(data->m12 * fixed_scale);
957
958 int fx = int((data->m21 * cy
959 + data->m11 * cx + data->dx) * fixed_scale);
960 int fy = int((data->m22 * cy
961 + data->m12 * cx + data->dy) * fixed_scale);
962
963 if (fdy == 0) { // simple scale, no rotation or shear
964 int py = (fy >> 16);
965 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
966 const uchar *src = image.scanLine(y: py);
967
968 int i = 0;
969 if (blendType == BlendTransformed) {
970 int fastLen = length;
971 if (fdx > 0)
972 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
973 else if (fdx < 0)
974 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
975
976 for (; i < fastLen; ++i) {
977 int x1 = (fx >> 16);
978 int x2 = x1;
979 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
980 if (x1 == x2)
981 break;
982 if constexpr (useFetch)
983 buffer[i] = fetch1(src, x1);
984 else
985 buffer[i] = reinterpret_cast<const T*>(src)[x1];
986 fx += fdx;
987 }
988
989 for (; i < fastLen; ++i) {
990 int px = (fx >> 16);
991 if constexpr (useFetch)
992 buffer[i] = fetch1(src, px);
993 else
994 buffer[i] = reinterpret_cast<const T*>(src)[px];
995 fx += fdx;
996 }
997 }
998
999 for (; i < length; ++i) {
1000 int px = (fx >> 16);
1001 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
1002 if constexpr (useFetch)
1003 buffer[i] = fetch1(src, px);
1004 else
1005 buffer[i] = reinterpret_cast<const T*>(src)[px];
1006 fx += fdx;
1007 }
1008 } else { // rotation or shear
1009 int i = 0;
1010 if (blendType == BlendTransformed) {
1011 int fastLen = length;
1012 if (fdx > 0)
1013 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
1014 else if (fdx < 0)
1015 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
1016 if (fdy > 0)
1017 fastLen = qMin(a: fastLen, b: int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
1018 else if (fdy < 0)
1019 fastLen = qMin(a: fastLen, b: int((qint64(image.y1) * fixed_scale - fy) / fdy));
1020
1021 for (; i < fastLen; ++i) {
1022 int x1 = (fx >> 16);
1023 int y1 = (fy >> 16);
1024 int x2 = x1;
1025 int y2 = y1;
1026 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
1027 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1);
1028 if (x1 == x2 && y1 == y2)
1029 break;
1030 if constexpr (useFetch)
1031 buffer[i] = fetch1(image.scanLine(y: y1), x1);
1032 else
1033 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: y1))[x1];
1034 fx += fdx;
1035 fy += fdy;
1036 }
1037
1038 for (; i < fastLen; ++i) {
1039 int px = (fx >> 16);
1040 int py = (fy >> 16);
1041 if constexpr (useFetch)
1042 buffer[i] = fetch1(image.scanLine(y: py), px);
1043 else
1044 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
1045 fx += fdx;
1046 fy += fdy;
1047 }
1048 }
1049
1050 for (; i < length; ++i) {
1051 int px = (fx >> 16);
1052 int py = (fy >> 16);
1053 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
1054 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
1055 if constexpr (useFetch)
1056 buffer[i] = fetch1(image.scanLine(y: py), px);
1057 else
1058 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
1059 fx += fdx;
1060 fy += fdy;
1061 }
1062 }
1063 } else {
1064 const qreal fdx = data->m11;
1065 const qreal fdy = data->m12;
1066 const qreal fdw = data->m13;
1067
1068 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
1069 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
1070 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
1071
1072 T *const end = buffer + length;
1073 T *b = buffer;
1074 while (b < end) {
1075 const qreal iw = fw == 0 ? 1 : 1 / fw;
1076 const qreal tx = fx * iw;
1077 const qreal ty = fy * iw;
1078 int px = qFloor(v: tx);
1079 int py = qFloor(v: ty);
1080
1081 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
1082 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
1083 if constexpr (useFetch)
1084 *b = fetch1(image.scanLine(y: py), px);
1085 else
1086 *b = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
1087
1088 fx += fdx;
1089 fy += fdy;
1090 fw += fdw;
1091 //force increment to avoid /0
1092 if (!fw) {
1093 fw += fdw;
1094 }
1095 ++b;
1096 }
1097 }
1098}
1099
1100template<TextureBlendType blendType, QPixelLayout::BPP bpp>
1101static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data,
1102 int y, int x, int length)
1103{
1104 static_assert(blendType == BlendTransformed || blendType == BlendTransformedTiled);
1105 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
1106 fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
1107 layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
1108 return buffer;
1109}
1110
1111#if QT_CONFIG(raster_64bit)
1112template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
1113static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data,
1114 int y, int x, int length)
1115{
1116 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
1117 if (layout->bpp < QPixelLayout::BPP64) {
1118 uint buffer32[BufferSize];
1119 Q_ASSERT(length <= BufferSize);
1120 if (layout->bpp == QPixelLayout::BPP32)
1121 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
1122 else
1123 fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
1124 return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
1125 }
1126
1127 fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, quint64>(reinterpret_cast<quint64*>(buffer), data, y, x, length);
1128 if (auto convert = convert64ToRGBA64PM[data->texture.format])
1129 convert(buffer, length);
1130 return buffer;
1131}
1132#endif
1133
1134#if QT_CONFIG(raster_fp)
1135template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
1136static const QRgbaFloat32 *QT_FASTCALL fetchTransformedFP(QRgbaFloat32 *buffer, const Operator *, const QSpanData *data,
1137 int y, int x, int length)
1138{
1139 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
1140 if (layout->bpp < QPixelLayout::BPP64) {
1141 uint buffer32[BufferSize];
1142 Q_ASSERT(length <= BufferSize);
1143 if (layout->bpp == QPixelLayout::BPP32)
1144 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
1145 else
1146 fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
1147 qConvertToRGBA32F[data->texture.format](buffer, buffer32, length, data->texture.colorTable, nullptr);
1148 } else if (layout->bpp < QPixelLayout::BPP32FPx4) {
1149 quint64 buffer64[BufferSize];
1150 fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, quint64>(buffer64, data, y, x, length);
1151 convert64ToRGBA32F[data->texture.format](buffer, buffer64, length);
1152 } else {
1153 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>(buffer, data, y, x, length);
1154 if (data->texture.format == QImage::Format_RGBA32FPx4)
1155 convertRGBA32FToRGBA32FPM(buffer, count: length);
1156 return buffer;
1157 }
1158 return buffer;
1159}
1160#endif
1161
1162/** \internal
1163 interpolate 4 argb pixels with the distx and disty factor.
1164 distx and disty must be between 0 and 16
1165 */
1166static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
1167{
1168 uint distxy = distx * disty;
1169 //idistx * disty = (16-distx) * disty = 16*disty - distxy
1170 //idistx * idisty = (16-distx) * (16-disty) = 16*16 - 16*distx -16*disty + distxy
1171 uint tlrb = (tl & 0x00ff00ff) * (16*16 - 16*distx - 16*disty + distxy);
1172 uint tlag = ((tl & 0xff00ff00) >> 8) * (16*16 - 16*distx - 16*disty + distxy);
1173 uint trrb = ((tr & 0x00ff00ff) * (distx*16 - distxy));
1174 uint trag = (((tr & 0xff00ff00) >> 8) * (distx*16 - distxy));
1175 uint blrb = ((bl & 0x00ff00ff) * (disty*16 - distxy));
1176 uint blag = (((bl & 0xff00ff00) >> 8) * (disty*16 - distxy));
1177 uint brrb = ((br & 0x00ff00ff) * (distxy));
1178 uint brag = (((br & 0xff00ff00) >> 8) * (distxy));
1179 return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
1180}
1181
1182#if defined(__SSE2__)
1183#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
1184{ \
1185 const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
1186 const __m128i distx_ = _mm_slli_epi16(distx, 4); \
1187 const __m128i disty_ = _mm_slli_epi16(disty, 4); \
1188 const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
1189 const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
1190 const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
1191 \
1192 __m128i tlAG = _mm_srli_epi16(tl, 8); \
1193 __m128i tlRB = _mm_and_si128(tl, colorMask); \
1194 __m128i trAG = _mm_srli_epi16(tr, 8); \
1195 __m128i trRB = _mm_and_si128(tr, colorMask); \
1196 __m128i blAG = _mm_srli_epi16(bl, 8); \
1197 __m128i blRB = _mm_and_si128(bl, colorMask); \
1198 __m128i brAG = _mm_srli_epi16(br, 8); \
1199 __m128i brRB = _mm_and_si128(br, colorMask); \
1200 \
1201 tlAG = _mm_mullo_epi16(tlAG, idxidy); \
1202 tlRB = _mm_mullo_epi16(tlRB, idxidy); \
1203 trAG = _mm_mullo_epi16(trAG, dxidy); \
1204 trRB = _mm_mullo_epi16(trRB, dxidy); \
1205 blAG = _mm_mullo_epi16(blAG, idxdy); \
1206 blRB = _mm_mullo_epi16(blRB, idxdy); \
1207 brAG = _mm_mullo_epi16(brAG, dxdy); \
1208 brRB = _mm_mullo_epi16(brRB, dxdy); \
1209 \
1210 /* Add the values, and shift to only keep 8 significant bits per colors */ \
1211 __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
1212 __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
1213 rAG = _mm_andnot_si128(colorMask, rAG); \
1214 rRB = _mm_srli_epi16(rRB, 8); \
1215 _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
1216}
1217#endif
1218
1219#if defined(__ARM_NEON__)
1220#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
1221{ \
1222 const int16x8_t dxdy = vmulq_s16(distx, disty); \
1223 const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
1224 const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
1225 const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
1226 const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
1227 \
1228 int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
1229 int16x8_t tlRB = vandq_s16(tl, colorMask); \
1230 int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
1231 int16x8_t trRB = vandq_s16(tr, colorMask); \
1232 int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
1233 int16x8_t blRB = vandq_s16(bl, colorMask); \
1234 int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
1235 int16x8_t brRB = vandq_s16(br, colorMask); \
1236 \
1237 int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
1238 int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
1239 rAG = vmlaq_s16(rAG, trAG, dxidy); \
1240 rRB = vmlaq_s16(rRB, trRB, dxidy); \
1241 rAG = vmlaq_s16(rAG, blAG, idxdy); \
1242 rRB = vmlaq_s16(rRB, blRB, idxdy); \
1243 rAG = vmlaq_s16(rAG, brAG, dxdy); \
1244 rRB = vmlaq_s16(rRB, brRB, dxdy); \
1245 \
1246 rAG = vandq_s16(invColorMask, rAG); \
1247 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
1248 vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
1249}
1250#endif
1251
1252template<TextureBlendType blendType>
1253void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
1254
1255template<>
1256inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2)
1257{
1258 v1 %= max;
1259 if (v1 < 0)
1260 v1 += max;
1261 v2 = v1 + 1;
1262 if (v2 == max)
1263 v2 = 0;
1264 Q_ASSERT(v1 >= 0 && v1 < max);
1265 Q_ASSERT(v2 >= 0 && v2 < max);
1266}
1267
1268template<>
1269inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2)
1270{
1271 if (v1 < l1)
1272 v2 = v1 = l1;
1273 else if (v1 >= l2)
1274 v2 = v1 = l2;
1275 else
1276 v2 = v1 + 1;
1277 Q_ASSERT(v1 >= l1 && v1 <= l2);
1278 Q_ASSERT(v2 >= l1 && v2 <= l2);
1279}
1280
1281enum FastTransformTypes {
1282 SimpleScaleTransform,
1283 UpscaleTransform,
1284 DownscaleTransform,
1285 RotateTransform,
1286 FastRotateTransform,
1287 NFastTransformTypes
1288};
1289
1290// Completes the partial interpolation stored in IntermediateBuffer.
1291// by performing the x-axis interpolation and joining the RB and AG buffers.
1292static void QT_FASTCALL intermediate_adder(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
1293{
1294#if defined(QT_COMPILER_SUPPORTS_AVX2)
1295 extern void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
1296 if (qCpuHasFeature(ArchHaswell))
1297 return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
1298#endif
1299
1300 // Switch to intermediate buffer coordinates
1301 fx -= offset * fixed_scale;
1302
1303 while (b < end) {
1304 const int x = (fx >> 16);
1305
1306 const uint distx = (fx & 0x0000ffff) >> 8;
1307 const uint idistx = 256 - distx;
1308 const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00;
1309 const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00;
1310 *b = (rb >> 8) | ag;
1311 b++;
1312 fx += fdx;
1313 }
1314 fx += offset * fixed_scale;
1315}
1316
1317typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy);
1318
1319template<TextureBlendType blendType>
1320static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
1321 int &fx, int &fy, int fdx, int /*fdy*/)
1322{
1323 int y1 = (fy >> 16);
1324 int y2;
1325 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1326 const uint *s1 = (const uint *)image.scanLine(y: y1);
1327 const uint *s2 = (const uint *)image.scanLine(y: y2);
1328
1329 const int disty = (fy & 0x0000ffff) >> 8;
1330 const int idisty = 256 - disty;
1331 const int length = end - b;
1332
1333 // The intermediate buffer is generated in the positive direction
1334 const int adjust = (fdx < 0) ? fdx * length : 0;
1335 const int offset = (fx + adjust) >> 16;
1336 int x = offset;
1337
1338 IntermediateBuffer intermediate;
1339 // count is the size used in the intermediate.buffer.
1340 int count = (qint64(length) * qAbs(t: fdx) + fixed_scale - 1) / fixed_scale + 2;
1341 // length is supposed to be <= BufferSize either because data->m11 < 1 or
1342 // data->m11 < 2, and any larger buffers split
1343 Q_ASSERT(count <= BufferSize + 2);
1344 int f = 0;
1345 int lim = count;
1346 if (blendType == BlendTransformedBilinearTiled) {
1347 x %= image.width;
1348 if (x < 0) x += image.width;
1349 } else {
1350 lim = qMin(a: count, b: image.x2 - x);
1351 if (x < image.x1) {
1352 Q_ASSERT(x < image.x2);
1353 uint t = s1[image.x1];
1354 uint b = s2[image.x1];
1355 quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
1356 quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
1357 do {
1358 intermediate.buffer_rb[f] = rb;
1359 intermediate.buffer_ag[f] = ag;
1360 f++;
1361 x++;
1362 } while (x < image.x1 && f < lim);
1363 }
1364 }
1365
1366 if (blendType != BlendTransformedBilinearTiled) {
1367#if defined(__SSE2__)
1368 const __m128i disty_ = _mm_set1_epi16(w: disty);
1369 const __m128i idisty_ = _mm_set1_epi16(w: idisty);
1370 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
1371
1372 lim -= 3;
1373 for (; f < lim; x += 4, f += 4) {
1374 // Load 4 pixels from s1, and split the alpha-green and red-blue component
1375 __m128i top = _mm_loadu_si128(p: (const __m128i*)((const uint *)(s1)+x));
1376 __m128i topAG = _mm_srli_epi16(a: top, count: 8);
1377 __m128i topRB = _mm_and_si128(a: top, b: colorMask);
1378 // Multiplies each color component by idisty
1379 topAG = _mm_mullo_epi16 (a: topAG, b: idisty_);
1380 topRB = _mm_mullo_epi16 (a: topRB, b: idisty_);
1381
1382 // Same for the s2 vector
1383 __m128i bottom = _mm_loadu_si128(p: (const __m128i*)((const uint *)(s2)+x));
1384 __m128i bottomAG = _mm_srli_epi16(a: bottom, count: 8);
1385 __m128i bottomRB = _mm_and_si128(a: bottom, b: colorMask);
1386 bottomAG = _mm_mullo_epi16 (a: bottomAG, b: disty_);
1387 bottomRB = _mm_mullo_epi16 (a: bottomRB, b: disty_);
1388
1389 // Add the values, and shift to only keep 8 significant bits per colors
1390 __m128i rAG =_mm_add_epi16(a: topAG, b: bottomAG);
1391 rAG = _mm_srli_epi16(a: rAG, count: 8);
1392 _mm_storeu_si128(p: (__m128i*)(&intermediate.buffer_ag[f]), b: rAG);
1393 __m128i rRB =_mm_add_epi16(a: topRB, b: bottomRB);
1394 rRB = _mm_srli_epi16(a: rRB, count: 8);
1395 _mm_storeu_si128(p: (__m128i*)(&intermediate.buffer_rb[f]), b: rRB);
1396 }
1397#elif defined(__ARM_NEON__)
1398 const int16x8_t disty_ = vdupq_n_s16(disty);
1399 const int16x8_t idisty_ = vdupq_n_s16(idisty);
1400 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
1401
1402 lim -= 3;
1403 for (; f < lim; x += 4, f += 4) {
1404 // Load 4 pixels from s1, and split the alpha-green and red-blue component
1405 int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
1406 int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
1407 int16x8_t topRB = vandq_s16(top, colorMask);
1408 // Multiplies each color component by idisty
1409 topAG = vmulq_s16(topAG, idisty_);
1410 topRB = vmulq_s16(topRB, idisty_);
1411
1412 // Same for the s2 vector
1413 int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
1414 int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
1415 int16x8_t bottomRB = vandq_s16(bottom, colorMask);
1416 bottomAG = vmulq_s16(bottomAG, disty_);
1417 bottomRB = vmulq_s16(bottomRB, disty_);
1418
1419 // Add the values, and shift to only keep 8 significant bits per colors
1420 int16x8_t rAG = vaddq_s16(topAG, bottomAG);
1421 rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
1422 vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG);
1423 int16x8_t rRB = vaddq_s16(topRB, bottomRB);
1424 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
1425 vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB);
1426 }
1427#endif
1428 }
1429 for (; f < count; f++) { // Same as above but without simd
1430 if (blendType == BlendTransformedBilinearTiled) {
1431 if (x >= image.width) x -= image.width;
1432 } else {
1433 x = qMin(a: x, b: image.x2 - 1);
1434 }
1435
1436 uint t = s1[x];
1437 uint b = s2[x];
1438
1439 intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
1440 intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
1441 x++;
1442 }
1443
1444 // Now interpolate the values from the intermediate.buffer to get the final result.
1445 intermediate_adder(b, end, intermediate, offset, fx, fdx);
1446}
1447
1448template<TextureBlendType blendType>
1449static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint *b, uint *end, const QTextureData &image,
1450 int &fx, int &fy, int fdx, int /*fdy*/)
1451{
1452 int y1 = (fy >> 16);
1453 int y2;
1454 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1455 const uint *s1 = (const uint *)image.scanLine(y: y1);
1456 const uint *s2 = (const uint *)image.scanLine(y: y2);
1457 const int disty = (fy & 0x0000ffff) >> 8;
1458
1459 if (blendType != BlendTransformedBilinearTiled) {
1460 const qint64 min_fx = qint64(image.x1) * fixed_scale;
1461 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
1462 while (b < end) {
1463 int x1 = (fx >> 16);
1464 int x2;
1465 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1466 if (x1 != x2)
1467 break;
1468 uint top = s1[x1];
1469 uint bot = s2[x1];
1470 *b = INTERPOLATE_PIXEL_256(x: top, a: 256 - disty, y: bot, b: disty);
1471 fx += fdx;
1472 ++b;
1473 }
1474 uint *boundedEnd = end;
1475 if (fdx > 0)
1476 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
1477 else if (fdx < 0)
1478 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
1479
1480 // A fast middle part without boundary checks
1481 while (b < boundedEnd) {
1482 int x = (fx >> 16);
1483 int distx = (fx & 0x0000ffff) >> 8;
1484 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx, disty);
1485 fx += fdx;
1486 ++b;
1487 }
1488 }
1489
1490 while (b < end) {
1491 int x1 = (fx >> 16);
1492 int x2;
1493 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1 , x1, x2);
1494 uint tl = s1[x1];
1495 uint tr = s1[x2];
1496 uint bl = s2[x1];
1497 uint br = s2[x2];
1498 int distx = (fx & 0x0000ffff) >> 8;
1499 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1500
1501 fx += fdx;
1502 ++b;
1503 }
1504}
1505
1506template<TextureBlendType blendType>
1507static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *b, uint *end, const QTextureData &image,
1508 int &fx, int &fy, int fdx, int /*fdy*/)
1509{
1510 int y1 = (fy >> 16);
1511 int y2;
1512 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1513 const uint *s1 = (const uint *)image.scanLine(y: y1);
1514 const uint *s2 = (const uint *)image.scanLine(y: y2);
1515 const int disty8 = (fy & 0x0000ffff) >> 8;
1516 const int disty4 = (disty8 + 0x08) >> 4;
1517
1518 if (blendType != BlendTransformedBilinearTiled) {
1519 const qint64 min_fx = qint64(image.x1) * fixed_scale;
1520 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
1521 while (b < end) {
1522 int x1 = (fx >> 16);
1523 int x2;
1524 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1525 if (x1 != x2)
1526 break;
1527 uint top = s1[x1];
1528 uint bot = s2[x1];
1529 *b = INTERPOLATE_PIXEL_256(x: top, a: 256 - disty8, y: bot, b: disty8);
1530 fx += fdx;
1531 ++b;
1532 }
1533 uint *boundedEnd = end;
1534 if (fdx > 0)
1535 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
1536 else if (fdx < 0)
1537 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
1538 // A fast middle part without boundary checks
1539#if defined(__SSE2__)
1540 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
1541 const __m128i v_256 = _mm_set1_epi16(w: 256);
1542 const __m128i v_disty = _mm_set1_epi16(w: disty4);
1543 const __m128i v_fdx = _mm_set1_epi32(i: fdx*4);
1544 const __m128i v_fx_r = _mm_set1_epi32(i: 0x8);
1545 __m128i v_fx = _mm_setr_epi32(i0: fx, i1: fx + fdx, i2: fx + fdx + fdx, i3: fx + fdx + fdx + fdx);
1546
1547 while (b < boundedEnd - 3) {
1548 __m128i offset = _mm_srli_epi32(a: v_fx, count: 16);
1549 const int offset0 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1550 const int offset1 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1551 const int offset2 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1552 const int offset3 = _mm_cvtsi128_si32(a: offset);
1553 const __m128i tl = _mm_setr_epi32(i0: s1[offset0], i1: s1[offset1], i2: s1[offset2], i3: s1[offset3]);
1554 const __m128i tr = _mm_setr_epi32(i0: s1[offset0 + 1], i1: s1[offset1 + 1], i2: s1[offset2 + 1], i3: s1[offset3 + 1]);
1555 const __m128i bl = _mm_setr_epi32(i0: s2[offset0], i1: s2[offset1], i2: s2[offset2], i3: s2[offset3]);
1556 const __m128i br = _mm_setr_epi32(i0: s2[offset0 + 1], i1: s2[offset1 + 1], i2: s2[offset2 + 1], i3: s2[offset3 + 1]);
1557
1558 __m128i v_distx = _mm_srli_epi16(a: v_fx, count: 8);
1559 v_distx = _mm_srli_epi16(a: _mm_add_epi32(a: v_distx, b: v_fx_r), count: 4);
1560 v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
1561 v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
1562
1563 interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
1564 b += 4;
1565 v_fx = _mm_add_epi32(a: v_fx, b: v_fdx);
1566 }
1567 fx = _mm_cvtsi128_si32(a: v_fx);
1568#elif defined(__ARM_NEON__)
1569 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
1570 const int16x8_t invColorMask = vmvnq_s16(colorMask);
1571 const int16x8_t v_256 = vdupq_n_s16(256);
1572 const int16x8_t v_disty = vdupq_n_s16(disty4);
1573 const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
1574 int32x4_t v_fdx = vdupq_n_s32(fdx*4);
1575
1576 int32x4_t v_fx = vmovq_n_s32(fx);
1577 v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
1578 v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
1579 v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
1580
1581 const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
1582 const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
1583
1584 // Pre-initialize to work-around code-analysis warnings/crashes in MSVC:
1585 uint32x4x2_t v_top = {};
1586 uint32x4x2_t v_bot = {};
1587 while (b < boundedEnd - 3) {
1588 int x1 = (fx >> 16);
1589 fx += fdx;
1590 v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
1591 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
1592 x1 = (fx >> 16);
1593 fx += fdx;
1594 v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
1595 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
1596 x1 = (fx >> 16);
1597 fx += fdx;
1598 v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
1599 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
1600 x1 = (fx >> 16);
1601 fx += fdx;
1602 v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
1603 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
1604
1605 int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12);
1606 v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
1607
1608 interpolate_4_pixels_16_neon(
1609 vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
1610 vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
1611 vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
1612 colorMask, invColorMask, v_256, b);
1613 b+=4;
1614 v_fx = vaddq_s32(v_fx, v_fdx);
1615 }
1616#endif
1617 while (b < boundedEnd) {
1618 int x = (fx >> 16);
1619 if (hasFastInterpolate4()) {
1620 int distx8 = (fx & 0x0000ffff) >> 8;
1621 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx: distx8, disty: disty8);
1622 } else {
1623 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
1624 *b = interpolate_4_pixels_16(tl: s1[x], tr: s1[x + 1], bl: s2[x], br: s2[x + 1], distx: distx4, disty: disty4);
1625 }
1626 fx += fdx;
1627 ++b;
1628 }
1629 }
1630
1631 while (b < end) {
1632 int x1 = (fx >> 16);
1633 int x2;
1634 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1635 uint tl = s1[x1];
1636 uint tr = s1[x2];
1637 uint bl = s2[x1];
1638 uint br = s2[x2];
1639 if (hasFastInterpolate4()) {
1640 int distx8 = (fx & 0x0000ffff) >> 8;
1641 *b = interpolate_4_pixels(tl, tr, bl, br, distx: distx8, disty: disty8);
1642 } else {
1643 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
1644 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx: distx4, disty: disty4);
1645 }
1646 fx += fdx;
1647 ++b;
1648 }
1649}
1650
1651template<TextureBlendType blendType>
1652static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint *b, uint *end, const QTextureData &image,
1653 int &fx, int &fy, int fdx, int fdy)
1654{
1655 // if we are zooming more than 8 times, we use 8bit precision for the position.
1656 while (b < end) {
1657 int x1 = (fx >> 16);
1658 int x2;
1659 int y1 = (fy >> 16);
1660 int y2;
1661
1662 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1663 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1664
1665 const uint *s1 = (const uint *)image.scanLine(y: y1);
1666 const uint *s2 = (const uint *)image.scanLine(y: y2);
1667
1668 uint tl = s1[x1];
1669 uint tr = s1[x2];
1670 uint bl = s2[x1];
1671 uint br = s2[x2];
1672
1673 int distx = (fx & 0x0000ffff) >> 8;
1674 int disty = (fy & 0x0000ffff) >> 8;
1675
1676 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1677
1678 fx += fdx;
1679 fy += fdy;
1680 ++b;
1681 }
1682}
1683
1684template<TextureBlendType blendType>
1685static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint *b, uint *end, const QTextureData &image,
1686 int &fx, int &fy, int fdx, int fdy)
1687{
1688 //we are zooming less than 8x, use 4bit precision
1689 if (blendType != BlendTransformedBilinearTiled) {
1690 const qint64 min_fx = qint64(image.x1) * fixed_scale;
1691 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
1692 const qint64 min_fy = qint64(image.y1) * fixed_scale;
1693 const qint64 max_fy = qint64(image.y2 - 1) * fixed_scale;
1694 // first handle the possibly bounded part in the beginning
1695 while (b < end) {
1696 int x1 = (fx >> 16);
1697 int x2;
1698 int y1 = (fy >> 16);
1699 int y2;
1700 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1701 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1702 if (x1 != x2 && y1 != y2)
1703 break;
1704 const uint *s1 = (const uint *)image.scanLine(y: y1);
1705 const uint *s2 = (const uint *)image.scanLine(y: y2);
1706 uint tl = s1[x1];
1707 uint tr = s1[x2];
1708 uint bl = s2[x1];
1709 uint br = s2[x2];
1710 if (hasFastInterpolate4()) {
1711 int distx = (fx & 0x0000ffff) >> 8;
1712 int disty = (fy & 0x0000ffff) >> 8;
1713 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1714 } else {
1715 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
1716 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
1717 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
1718 }
1719 fx += fdx;
1720 fy += fdy;
1721 ++b;
1722 }
1723 uint *boundedEnd = end;
1724 if (fdx > 0)
1725 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
1726 else if (fdx < 0)
1727 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
1728 if (fdy > 0)
1729 boundedEnd = qMin(a: boundedEnd, b: b + (max_fy - fy) / fdy);
1730 else if (fdy < 0)
1731 boundedEnd = qMin(a: boundedEnd, b: b + (min_fy - fy) / fdy);
1732
1733 // until boundedEnd we can now have a fast middle part without boundary checks
1734#if defined(__SSE2__)
1735 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
1736 const __m128i v_256 = _mm_set1_epi16(w: 256);
1737 const __m128i v_fdx = _mm_set1_epi32(i: fdx*4);
1738 const __m128i v_fdy = _mm_set1_epi32(i: fdy*4);
1739 const __m128i v_fxy_r = _mm_set1_epi32(i: 0x8);
1740 __m128i v_fx = _mm_setr_epi32(i0: fx, i1: fx + fdx, i2: fx + fdx + fdx, i3: fx + fdx + fdx + fdx);
1741 __m128i v_fy = _mm_setr_epi32(i0: fy, i1: fy + fdy, i2: fy + fdy + fdy, i3: fy + fdy + fdy + fdy);
1742
1743 const uchar *textureData = image.imageData;
1744 const qsizetype bytesPerLine = image.bytesPerLine;
1745 const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
1746
1747 while (b < boundedEnd - 3) {
1748 const __m128i vy = _mm_packs_epi32(a: _mm_srli_epi32(a: v_fy, count: 16), b: _mm_setzero_si128());
1749 // 4x16bit * 4x16bit -> 4x32bit
1750 __m128i offset = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vy, b: vbpl), b: _mm_mulhi_epi16(a: vy, b: vbpl));
1751 offset = _mm_add_epi32(a: offset, b: _mm_srli_epi32(a: v_fx, count: 16));
1752 const int offset0 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1753 const int offset1 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1754 const int offset2 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1755 const int offset3 = _mm_cvtsi128_si32(a: offset);
1756 const uint *topData = (const uint *)(textureData);
1757 const __m128i tl = _mm_setr_epi32(i0: topData[offset0], i1: topData[offset1], i2: topData[offset2], i3: topData[offset3]);
1758 const __m128i tr = _mm_setr_epi32(i0: topData[offset0 + 1], i1: topData[offset1 + 1], i2: topData[offset2 + 1], i3: topData[offset3 + 1]);
1759 const uint *bottomData = (const uint *)(textureData + bytesPerLine);
1760 const __m128i bl = _mm_setr_epi32(i0: bottomData[offset0], i1: bottomData[offset1], i2: bottomData[offset2], i3: bottomData[offset3]);
1761 const __m128i br = _mm_setr_epi32(i0: bottomData[offset0 + 1], i1: bottomData[offset1 + 1], i2: bottomData[offset2 + 1], i3: bottomData[offset3 + 1]);
1762
1763 __m128i v_distx = _mm_srli_epi16(a: v_fx, count: 8);
1764 __m128i v_disty = _mm_srli_epi16(a: v_fy, count: 8);
1765 v_distx = _mm_srli_epi16(a: _mm_add_epi32(a: v_distx, b: v_fxy_r), count: 4);
1766 v_disty = _mm_srli_epi16(a: _mm_add_epi32(a: v_disty, b: v_fxy_r), count: 4);
1767 v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
1768 v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
1769 v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
1770 v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
1771
1772 interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
1773 b += 4;
1774 v_fx = _mm_add_epi32(a: v_fx, b: v_fdx);
1775 v_fy = _mm_add_epi32(a: v_fy, b: v_fdy);
1776 }
1777 fx = _mm_cvtsi128_si32(a: v_fx);
1778 fy = _mm_cvtsi128_si32(a: v_fy);
1779#elif defined(__ARM_NEON__)
1780 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
1781 const int16x8_t invColorMask = vmvnq_s16(colorMask);
1782 const int16x8_t v_256 = vdupq_n_s16(256);
1783 int32x4_t v_fdx = vdupq_n_s32(fdx * 4);
1784 int32x4_t v_fdy = vdupq_n_s32(fdy * 4);
1785
1786 const uchar *textureData = image.imageData;
1787 const qsizetype bytesPerLine = image.bytesPerLine;
1788
1789 int32x4_t v_fx = vmovq_n_s32(fx);
1790 int32x4_t v_fy = vmovq_n_s32(fy);
1791 v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
1792 v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1);
1793 v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
1794 v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2);
1795 v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
1796 v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3);
1797
1798 const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
1799 const int32x4_t v_round = vdupq_n_s32(0x0800);
1800
1801 // Pre-initialize to work-around code-analysis warnings/crashes in MSVC:
1802 uint32x4x2_t v_top = {};
1803 uint32x4x2_t v_bot = {};
1804 while (b < boundedEnd - 3) {
1805 int x1 = (fx >> 16);
1806 int y1 = (fy >> 16);
1807 fx += fdx; fy += fdy;
1808 const uchar *sl = textureData + bytesPerLine * y1;
1809 const uint *s1 = reinterpret_cast<const uint *>(sl);
1810 const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1811 v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
1812 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
1813 x1 = (fx >> 16);
1814 y1 = (fy >> 16);
1815 fx += fdx; fy += fdy;
1816 sl = textureData + bytesPerLine * y1;
1817 s1 = reinterpret_cast<const uint *>(sl);
1818 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1819 v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
1820 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
1821 x1 = (fx >> 16);
1822 y1 = (fy >> 16);
1823 fx += fdx; fy += fdy;
1824 sl = textureData + bytesPerLine * y1;
1825 s1 = reinterpret_cast<const uint *>(sl);
1826 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1827 v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
1828 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
1829 x1 = (fx >> 16);
1830 y1 = (fy >> 16);
1831 fx += fdx; fy += fdy;
1832 sl = textureData + bytesPerLine * y1;
1833 s1 = reinterpret_cast<const uint *>(sl);
1834 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1835 v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
1836 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
1837
1838 int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12);
1839 int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12);
1840 v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
1841 v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16));
1842 int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4);
1843
1844 interpolate_4_pixels_16_neon(
1845 vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
1846 vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
1847 vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
1848 v_disty_, colorMask, invColorMask, v_256, b);
1849 b += 4;
1850 v_fx = vaddq_s32(v_fx, v_fdx);
1851 v_fy = vaddq_s32(v_fy, v_fdy);
1852 }
1853#endif
1854 while (b < boundedEnd) {
1855 int x = (fx >> 16);
1856 int y = (fy >> 16);
1857
1858 const uint *s1 = (const uint *)image.scanLine(y);
1859 const uint *s2 = (const uint *)image.scanLine(y: y + 1);
1860
1861 if (hasFastInterpolate4()) {
1862 int distx = (fx & 0x0000ffff) >> 8;
1863 int disty = (fy & 0x0000ffff) >> 8;
1864 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx, disty);
1865 } else {
1866 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
1867 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
1868 *b = interpolate_4_pixels_16(tl: s1[x], tr: s1[x + 1], bl: s2[x], br: s2[x + 1], distx, disty);
1869 }
1870
1871 fx += fdx;
1872 fy += fdy;
1873 ++b;
1874 }
1875 }
1876
1877 while (b < end) {
1878 int x1 = (fx >> 16);
1879 int x2;
1880 int y1 = (fy >> 16);
1881 int y2;
1882
1883 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1884 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1885
1886 const uint *s1 = (const uint *)image.scanLine(y: y1);
1887 const uint *s2 = (const uint *)image.scanLine(y: y2);
1888
1889 uint tl = s1[x1];
1890 uint tr = s1[x2];
1891 uint bl = s2[x1];
1892 uint br = s2[x2];
1893
1894 if (hasFastInterpolate4()) {
1895 int distx = (fx & 0x0000ffff) >> 8;
1896 int disty = (fy & 0x0000ffff) >> 8;
1897 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1898 } else {
1899 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
1900 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
1901 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
1902 }
1903
1904 fx += fdx;
1905 fy += fdy;
1906 ++b;
1907 }
1908}
1909
1910
1911static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = {
1912 {
1913 fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>,
1914 fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
1915 fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
1916 fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
1917 fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
1918 },
1919 {
1920 fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>,
1921 fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
1922 fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
1923 fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
1924 fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled>
1925 }
1926};
1927
1928template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */
1929static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *,
1930 const QSpanData *data, int y, int x,
1931 int length)
1932{
1933 const qreal cx = x + qreal(0.5);
1934 const qreal cy = y + qreal(0.5);
1935 constexpr int tiled = (blendType == BlendTransformedBilinearTiled) ? 1 : 0;
1936
1937 uint *end = buffer + length;
1938 uint *b = buffer;
1939 if (canUseFastMatrixPath(cx, cy, length, data)) {
1940 // The increment pr x in the scanline
1941 int fdx = (int)(data->m11 * fixed_scale);
1942 int fdy = (int)(data->m12 * fixed_scale);
1943
1944 int fx = int((data->m21 * cy
1945 + data->m11 * cx + data->dx) * fixed_scale);
1946 int fy = int((data->m22 * cy
1947 + data->m12 * cx + data->dy) * fixed_scale);
1948
1949 fx -= half_point;
1950 fy -= half_point;
1951
1952 if (fdy == 0) { // simple scale, no rotation or shear
1953 if (qAbs(t: fdx) <= fixed_scale) {
1954 // simple scale up on X
1955 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1956 } else if (qAbs(t: fdx) <= 2 * fixed_scale) {
1957 // simple scale down on X, less than 2x
1958 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
1959 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
1960 if (mid != length)
1961 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
1962 } else if (qAbs(t: data->m22) < qreal(1./8.)) {
1963 // scale up more than 8x (on Y)
1964 bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1965 } else {
1966 // scale down on X
1967 bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1968 }
1969 } else { // rotation or shear
1970 if (qAbs(t: data->m11) < qreal(1./8.) || qAbs(t: data->m22) < qreal(1./8.) ) {
1971 // if we are zooming more than 8 times, we use 8bit precision for the position.
1972 bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
1973 } else {
1974 // we are zooming less than 8x, use 4bit precision
1975 bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
1976 }
1977 }
1978 } else {
1979 const QTextureData &image = data->texture;
1980
1981 const qreal fdx = data->m11;
1982 const qreal fdy = data->m12;
1983 const qreal fdw = data->m13;
1984
1985 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
1986 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
1987 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
1988
1989 while (b < end) {
1990 const qreal iw = fw == 0 ? 1 : 1 / fw;
1991 const qreal px = fx * iw - qreal(0.5);
1992 const qreal py = fy * iw - qreal(0.5);
1993
1994 int x1 = int(px) - (px < 0);
1995 int x2;
1996 int y1 = int(py) - (py < 0);
1997 int y2;
1998
1999 int distx = int((px - x1) * 256);
2000 int disty = int((py - y1) * 256);
2001
2002 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2003 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2004
2005 const uint *s1 = (const uint *)data->texture.scanLine(y: y1);
2006 const uint *s2 = (const uint *)data->texture.scanLine(y: y2);
2007
2008 uint tl = s1[x1];
2009 uint tr = s1[x2];
2010 uint bl = s2[x1];
2011 uint br = s2[x2];
2012
2013 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2014
2015 fx += fdx;
2016 fy += fdy;
2017 fw += fdw;
2018 //force increment to avoid /0
2019 if (!fw) {
2020 fw += fdw;
2021 }
2022 ++b;
2023 }
2024 }
2025
2026 return buffer;
2027}
2028
2029template<TextureBlendType blendType>
2030static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
2031 int &fx, int &fy, int fdx, int /*fdy*/)
2032{
2033 const QPixelLayout *layout = &qPixelLayouts[image.format];
2034 const QList<QRgb> *clut = image.colorTable;
2035 const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM;
2036
2037 int y1 = (fy >> 16);
2038 int y2;
2039 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2040 const uchar *s1 = image.scanLine(y: y1);
2041 const uchar *s2 = image.scanLine(y: y2);
2042
2043 const int disty = (fy & 0x0000ffff) >> 8;
2044 const int idisty = 256 - disty;
2045 const int length = end - b;
2046
2047 // The intermediate buffer is generated in the positive direction
2048 const int adjust = (fdx < 0) ? fdx * length : 0;
2049 const int offset = (fx + adjust) >> 16;
2050 int x = offset;
2051
2052 IntermediateBuffer intermediate;
2053 uint *buf1 = intermediate.buffer_rb;
2054 uint *buf2 = intermediate.buffer_ag;
2055 const uint *ptr1;
2056 const uint *ptr2;
2057
2058 int count = (qint64(length) * qAbs(t: fdx) + fixed_scale - 1) / fixed_scale + 2;
2059 Q_ASSERT(count <= BufferSize + 2);
2060
2061 if (blendType == BlendTransformedBilinearTiled) {
2062 x %= image.width;
2063 if (x < 0)
2064 x += image.width;
2065 int len1 = qMin(a: count, b: image.width - x);
2066 int len2 = qMin(a: x, b: count - len1);
2067
2068 ptr1 = fetch(buf1, s1, x, len1, clut, nullptr);
2069 ptr2 = fetch(buf2, s2, x, len1, clut, nullptr);
2070 for (int i = 0; i < len1; ++i) {
2071 uint t = ptr1[i];
2072 uint b = ptr2[i];
2073 buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2074 buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2075 }
2076
2077 if (len2) {
2078 ptr1 = fetch(buf1 + len1, s1, 0, len2, clut, nullptr);
2079 ptr2 = fetch(buf2 + len1, s2, 0, len2, clut, nullptr);
2080 for (int i = 0; i < len2; ++i) {
2081 uint t = ptr1[i];
2082 uint b = ptr2[i];
2083 buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2084 buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2085 }
2086 }
2087 // Generate the rest by repeatedly repeating the previous set of pixels
2088 for (int i = image.width; i < count; ++i) {
2089 buf1[i] = buf1[i - image.width];
2090 buf2[i] = buf2[i - image.width];
2091 }
2092 } else {
2093 int start = qMax(a: x, b: image.x1);
2094 int end = qMin(a: x + count, b: image.x2);
2095 int len = qMax(a: 1, b: end - start);
2096 int leading = start - x;
2097
2098 ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr);
2099 ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr);
2100
2101 for (int i = 0; i < len; ++i) {
2102 uint t = ptr1[i];
2103 uint b = ptr2[i];
2104 buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2105 buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2106 }
2107
2108 for (int i = 0; i < leading; ++i) {
2109 buf1[i] = buf1[leading];
2110 buf2[i] = buf2[leading];
2111 }
2112 for (int i = leading + len; i < count; ++i) {
2113 buf1[i] = buf1[i - 1];
2114 buf2[i] = buf2[i - 1];
2115 }
2116 }
2117
2118 // Now interpolate the values from the intermediate.buffer to get the final result.
2119 intermediate_adder(b, end, intermediate, offset, fx, fdx);
2120}
2121
2122
2123template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
2124static void QT_FASTCALL fetchTransformedBilinear_fetcher(T *buf1, T *buf2, const int len, const QTextureData &image,
2125 int fx, int fy, const int fdx, const int fdy)
2126{
2127 const QPixelLayout &layout = qPixelLayouts[image.format];
2128 constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
2129 if (useFetch)
2130 Q_ASSERT(sizeof(T) == sizeof(uint));
2131 else
2132 Q_ASSERT(layout.bpp == bpp || (layout.bpp == QPixelLayout::BPP16FPx4 && bpp == QPixelLayout::BPP64));
2133 const Fetch1PixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? fetch1PixelTable[layout.bpp] : fetch1Pixel<bpp>;
2134 if (fdy == 0) {
2135 int y1 = (fy >> 16);
2136 int y2;
2137 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2138 const uchar *s1 = image.scanLine(y: y1);
2139 const uchar *s2 = image.scanLine(y: y2);
2140
2141 int i = 0;
2142 if (blendType == BlendTransformedBilinear) {
2143 for (; i < len; ++i) {
2144 int x1 = (fx >> 16);
2145 int x2;
2146 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2147 if (x1 != x2)
2148 break;
2149 if constexpr (useFetch) {
2150 buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
2151 buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
2152 } else {
2153 buf1[i * 2 + 0] = buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x1];
2154 buf2[i * 2 + 0] = buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x1];
2155 }
2156 fx += fdx;
2157 }
2158 int fastLen = len;
2159 if (fdx > 0)
2160 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2161 else if (fdx < 0)
2162 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
2163
2164 for (; i < fastLen; ++i) {
2165 int x = (fx >> 16);
2166 if constexpr (useFetch) {
2167 buf1[i * 2 + 0] = fetch1(s1, x);
2168 buf1[i * 2 + 1] = fetch1(s1, x + 1);
2169 buf2[i * 2 + 0] = fetch1(s2, x);
2170 buf2[i * 2 + 1] = fetch1(s2, x + 1);
2171 } else {
2172 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
2173 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
2174 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
2175 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
2176 }
2177 fx += fdx;
2178 }
2179 }
2180
2181 for (; i < len; ++i) {
2182 int x1 = (fx >> 16);
2183 int x2;
2184 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2185 if constexpr (useFetch) {
2186 buf1[i * 2 + 0] = fetch1(s1, x1);
2187 buf1[i * 2 + 1] = fetch1(s1, x2);
2188 buf2[i * 2 + 0] = fetch1(s2, x1);
2189 buf2[i * 2 + 1] = fetch1(s2, x2);
2190 } else {
2191 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
2192 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
2193 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
2194 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
2195 }
2196 fx += fdx;
2197 }
2198 } else {
2199 int i = 0;
2200 if (blendType == BlendTransformedBilinear) {
2201 for (; i < len; ++i) {
2202 int x1 = (fx >> 16);
2203 int x2;
2204 int y1 = (fy >> 16);
2205 int y2;
2206 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2207 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2208 if (x1 != x2 && y1 != y2)
2209 break;
2210 const uchar *s1 = image.scanLine(y: y1);
2211 const uchar *s2 = image.scanLine(y: y2);
2212 if constexpr (useFetch) {
2213 buf1[i * 2 + 0] = fetch1(s1, x1);
2214 buf1[i * 2 + 1] = fetch1(s1, x2);
2215 buf2[i * 2 + 0] = fetch1(s2, x1);
2216 buf2[i * 2 + 1] = fetch1(s2, x2);
2217 } else {
2218 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
2219 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
2220 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
2221 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
2222 }
2223 fx += fdx;
2224 fy += fdy;
2225 }
2226 int fastLen = len;
2227 if (fdx > 0)
2228 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2229 else if (fdx < 0)
2230 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
2231 if (fdy > 0)
2232 fastLen = qMin(a: fastLen, b: int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
2233 else if (fdy < 0)
2234 fastLen = qMin(a: fastLen, b: int((qint64(image.y1) * fixed_scale - fy) / fdy));
2235
2236 for (; i < fastLen; ++i) {
2237 int x = (fx >> 16);
2238 int y = (fy >> 16);
2239 const uchar *s1 = image.scanLine(y);
2240 const uchar *s2 = s1 + image.bytesPerLine;
2241 if constexpr (useFetch) {
2242 buf1[i * 2 + 0] = fetch1(s1, x);
2243 buf1[i * 2 + 1] = fetch1(s1, x + 1);
2244 buf2[i * 2 + 0] = fetch1(s2, x);
2245 buf2[i * 2 + 1] = fetch1(s2, x + 1);
2246 } else {
2247 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
2248 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
2249 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
2250 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
2251 }
2252 fx += fdx;
2253 fy += fdy;
2254 }
2255 }
2256
2257 for (; i < len; ++i) {
2258 int x1 = (fx >> 16);
2259 int x2;
2260 int y1 = (fy >> 16);
2261 int y2;
2262 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2263 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2264
2265 const uchar *s1 = image.scanLine(y: y1);
2266 const uchar *s2 = image.scanLine(y: y2);
2267 if constexpr (useFetch) {
2268 buf1[i * 2 + 0] = fetch1(s1, x1);
2269 buf1[i * 2 + 1] = fetch1(s1, x2);
2270 buf2[i * 2 + 0] = fetch1(s2, x1);
2271 buf2[i * 2 + 1] = fetch1(s2, x2);
2272 } else {
2273 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
2274 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
2275 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
2276 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
2277 }
2278 fx += fdx;
2279 fy += fdy;
2280 }
2281 }
2282}
2283
2284template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
2285static void QT_FASTCALL fetchTransformedBilinear_slow_fetcher(T *buf1, T *buf2, ushort *distxs, ushort *distys,
2286 const int len, const QTextureData &image,
2287 qreal &fx, qreal &fy, qreal &fw,
2288 const qreal fdx, const qreal fdy, const qreal fdw)
2289{
2290 const QPixelLayout &layout = qPixelLayouts[image.format];
2291 constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
2292 if (useFetch)
2293 Q_ASSERT(sizeof(T) == sizeof(uint));
2294 else
2295 Q_ASSERT(layout.bpp == bpp);
2296
2297 const Fetch1PixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? fetch1PixelTable[layout.bpp] : fetch1Pixel<bpp>;
2298
2299 for (int i = 0; i < len; ++i) {
2300 const qreal iw = fw == 0 ? 16384 : 1 / fw;
2301 const qreal px = fx * iw - qreal(0.5);
2302 const qreal py = fy * iw - qreal(0.5);
2303
2304 int x1 = qFloor(v: px);
2305 int x2;
2306 int y1 = qFloor(v: py);
2307 int y2;
2308
2309 distxs[i] = ushort((px - x1) * (1<<16));
2310 distys[i] = ushort((py - y1) * (1<<16));
2311
2312 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2313 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2314
2315 const uchar *s1 = image.scanLine(y: y1);
2316 const uchar *s2 = image.scanLine(y: y2);
2317 if constexpr (useFetch) {
2318 buf1[i * 2 + 0] = fetch1(s1, x1);
2319 buf1[i * 2 + 1] = fetch1(s1, x2);
2320 buf2[i * 2 + 0] = fetch1(s2, x1);
2321 buf2[i * 2 + 1] = fetch1(s2, x2);
2322 } else {
2323 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
2324 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
2325 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
2326 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
2327 }
2328
2329 fx += fdx;
2330 fy += fdy;
2331 fw += fdw;
2332 }
2333}
2334
2335// blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
2336template<TextureBlendType blendType, QPixelLayout::BPP bpp>
2337static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *,
2338 const QSpanData *data, int y, int x, int length)
2339{
2340 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2341 const QList<QRgb> *clut = data->texture.colorTable;
2342 Q_ASSERT(bpp == QPixelLayout::BPPNone || layout->bpp == bpp);
2343
2344 const qreal cx = x + qreal(0.5);
2345 const qreal cy = y + qreal(0.5);
2346
2347 if (canUseFastMatrixPath(cx, cy, length, data)) {
2348 // The increment pr x in the scanline
2349 int fdx = (int)(data->m11 * fixed_scale);
2350 int fdy = (int)(data->m12 * fixed_scale);
2351
2352 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2353 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2354
2355 fx -= half_point;
2356 fy -= half_point;
2357
2358 if (fdy == 0) { // simple scale, no rotation or shear
2359 if (qAbs(t: fdx) <= fixed_scale) { // scale up on X
2360 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
2361 } else if (qAbs(t: fdx) <= 2 * fixed_scale) { // scale down on X less than 2x
2362 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
2363 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
2364 if (mid != length)
2365 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
2366 } else {
2367 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
2368
2369 uint buf1[BufferSize];
2370 uint buf2[BufferSize];
2371 uint *b = buffer;
2372 while (length) {
2373 int len = qMin(a: length, b: BufferSize / 2);
2374 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0);
2375 layout->convertToARGB32PM(buf1, len * 2, clut);
2376 layout->convertToARGB32PM(buf2, len * 2, clut);
2377
2378 if (hasFastInterpolate4() || qAbs(t: data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y)
2379 int disty = (fy & 0x0000ffff) >> 8;
2380 for (int i = 0; i < len; ++i) {
2381 int distx = (fx & 0x0000ffff) >> 8;
2382 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
2383 fx += fdx;
2384 }
2385 } else {
2386 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2387 for (int i = 0; i < len; ++i) {
2388 uint tl = buf1[i * 2 + 0];
2389 uint tr = buf1[i * 2 + 1];
2390 uint bl = buf2[i * 2 + 0];
2391 uint br = buf2[i * 2 + 1];
2392 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2393 b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
2394 fx += fdx;
2395 }
2396 }
2397 length -= len;
2398 b += len;
2399 }
2400 }
2401 } else { // rotation or shear
2402 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
2403
2404 uint buf1[BufferSize];
2405 uint buf2[BufferSize];
2406 uint *b = buffer;
2407 while (length) {
2408 int len = qMin(a: length, b: BufferSize / 2);
2409 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
2410 layout->convertToARGB32PM(buf1, len * 2, clut);
2411 layout->convertToARGB32PM(buf2, len * 2, clut);
2412
2413 if (hasFastInterpolate4() || qAbs(t: data->m11) < qreal(1./8.) || qAbs(t: data->m22) < qreal(1./8.)) {
2414 // If we are zooming more than 8 times, we use 8bit precision for the position.
2415 for (int i = 0; i < len; ++i) {
2416 int distx = (fx & 0x0000ffff) >> 8;
2417 int disty = (fy & 0x0000ffff) >> 8;
2418
2419 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
2420 fx += fdx;
2421 fy += fdy;
2422 }
2423 } else {
2424 // We are zooming less than 8x, use 4bit precision
2425 for (int i = 0; i < len; ++i) {
2426 uint tl = buf1[i * 2 + 0];
2427 uint tr = buf1[i * 2 + 1];
2428 uint bl = buf2[i * 2 + 0];
2429 uint br = buf2[i * 2 + 1];
2430
2431 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2432 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2433
2434 b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
2435 fx += fdx;
2436 fy += fdy;
2437 }
2438 }
2439
2440 length -= len;
2441 b += len;
2442 }
2443 }
2444 } else {
2445 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType,bpp,uint>;
2446
2447 const qreal fdx = data->m11;
2448 const qreal fdy = data->m12;
2449 const qreal fdw = data->m13;
2450
2451 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2452 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2453 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2454
2455 uint buf1[BufferSize];
2456 uint buf2[BufferSize];
2457 uint *b = buffer;
2458
2459 ushort distxs[BufferSize / 2];
2460 ushort distys[BufferSize / 2];
2461
2462 while (length) {
2463 const int len = qMin(a: length, b: BufferSize / 2);
2464 fetcher(buf1, buf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2465
2466 layout->convertToARGB32PM(buf1, len * 2, clut);
2467 layout->convertToARGB32PM(buf2, len * 2, clut);
2468
2469 for (int i = 0; i < len; ++i) {
2470 const int distx = distxs[i] >> 8;
2471 const int disty = distys[i] >> 8;
2472
2473 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
2474 }
2475 length -= len;
2476 b += len;
2477 }
2478 }
2479
2480 return buffer;
2481}
2482
2483#if QT_CONFIG(raster_64bit)
2484template<TextureBlendType blendType>
2485static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 *buffer, const QSpanData *data,
2486 int y, int x, int length)
2487{
2488 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2489 const auto *clut = data->texture.colorTable;
2490 const auto convert = layout->convertToRGBA64PM;
2491
2492 const qreal cx = x + qreal(0.5);
2493 const qreal cy = y + qreal(0.5);
2494
2495 uint sbuf1[BufferSize];
2496 uint sbuf2[BufferSize];
2497 alignas(8) QRgba64 buf1[BufferSize];
2498 alignas(8) QRgba64 buf2[BufferSize];
2499 QRgba64 *b = buffer;
2500
2501 if (canUseFastMatrixPath(cx, cy, length, data)) {
2502 // The increment pr x in the scanline
2503 const int fdx = (int)(data->m11 * fixed_scale);
2504 const int fdy = (int)(data->m12 * fixed_scale);
2505
2506 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2507 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2508
2509 fx -= half_point;
2510 fy -= half_point;
2511
2512 const auto fetcher =
2513 (layout->bpp == QPixelLayout::BPP32)
2514 ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
2515 : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2516
2517 if (fdy == 0) { //simple scale, no rotation
2518 while (length) {
2519 const int len = qMin(a: length, b: BufferSize / 2);
2520 const int disty = (fy & 0x0000ffff);
2521#if defined(__SSE2__)
2522 const __m128i vdy = _mm_set1_epi16(w: disty);
2523 const __m128i vidy = _mm_set1_epi16(w: 0x10000 - disty);
2524#endif
2525 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2526
2527 convert(buf1, sbuf1, len * 2, clut, nullptr);
2528 if (disty)
2529 convert(buf2, sbuf2, len * 2, clut, nullptr);
2530
2531 for (int i = 0; i < len; ++i) {
2532 const int distx = (fx & 0x0000ffff);
2533#if defined(__SSE2__)
2534 __m128i vt = _mm_loadu_si128(p: (const __m128i*)(buf1 + i*2));
2535 if (disty) {
2536 __m128i vb = _mm_loadu_si128(p: (const __m128i*)(buf2 + i*2));
2537 vt = _mm_mulhi_epu16(a: vt, b: vidy);
2538 vb = _mm_mulhi_epu16(a: vb, b: vdy);
2539 vt = _mm_add_epi16(a: vt, b: vb);
2540 }
2541 if (distx) {
2542 const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
2543 const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
2544 vt = _mm_mulhi_epu16(a: vt, b: _mm_unpacklo_epi64(a: vidistx, b: vdistx));
2545 vt = _mm_add_epi16(a: vt, _mm_srli_si128(vt, 8));
2546 }
2547 _mm_storel_epi64(p: (__m128i*)(b+i), a: vt);
2548#else
2549 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
2550#endif
2551 fx += fdx;
2552 }
2553 length -= len;
2554 b += len;
2555 }
2556 } else { // rotation or shear
2557 while (length) {
2558 const int len = qMin(a: length, b: BufferSize / 2);
2559
2560 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2561
2562 convert(buf1, sbuf1, len * 2, clut, nullptr);
2563 convert(buf2, sbuf2, len * 2, clut, nullptr);
2564
2565 for (int i = 0; i < len; ++i) {
2566 const int distx = (fx & 0x0000ffff);
2567 const int disty = (fy & 0x0000ffff);
2568 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2569 fx += fdx;
2570 fy += fdy;
2571 }
2572
2573 length -= len;
2574 b += len;
2575 }
2576 }
2577 } else { // !(data->fast_matrix)
2578 const auto fetcher =
2579 (layout->bpp == QPixelLayout::BPP32)
2580 ? fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32, uint>
2581 : fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2582
2583 const qreal fdx = data->m11;
2584 const qreal fdy = data->m12;
2585 const qreal fdw = data->m13;
2586
2587 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2588 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2589 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2590
2591 ushort distxs[BufferSize / 2];
2592 ushort distys[BufferSize / 2];
2593
2594 while (length) {
2595 const int len = qMin(a: length, b: BufferSize / 2);
2596 fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2597
2598 convert(buf1, sbuf1, len * 2, clut, nullptr);
2599 convert(buf2, sbuf2, len * 2, clut, nullptr);
2600
2601 for (int i = 0; i < len; ++i) {
2602 const int distx = distxs[i];
2603 const int disty = distys[i];
2604 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2605 }
2606
2607 length -= len;
2608 b += len;
2609 }
2610 }
2611 return buffer;
2612}
2613
2614template<TextureBlendType blendType>
2615static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 *buffer, const QSpanData *data,
2616 int y, int x, int length)
2617{
2618 const auto convert = convert64ToRGBA64PM[data->texture.format];
2619
2620 const qreal cx = x + qreal(0.5);
2621 const qreal cy = y + qreal(0.5);
2622
2623 alignas(8) QRgba64 buf1[BufferSize];
2624 alignas(8) QRgba64 buf2[BufferSize];
2625 QRgba64 *end = buffer + length;
2626 QRgba64 *b = buffer;
2627
2628 if (canUseFastMatrixPath(cx, cy, length, data)) {
2629 // The increment pr x in the scanline
2630 const int fdx = (int)(data->m11 * fixed_scale);
2631 const int fdy = (int)(data->m12 * fixed_scale);
2632
2633 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2634 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2635
2636 fx -= half_point;
2637 fy -= half_point;
2638 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
2639
2640 if (fdy == 0) { //simple scale, no rotation
2641 while (length) {
2642 int len = qMin(a: length, b: BufferSize / 2);
2643 int disty = (fy & 0x0000ffff);
2644#if defined(__SSE2__)
2645 const __m128i vdy = _mm_set1_epi16(w: disty);
2646 const __m128i vidy = _mm_set1_epi16(w: 0x10000 - disty);
2647#endif
2648 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
2649
2650 convert(buf1, len * 2);
2651 if (disty)
2652 convert(buf2, len * 2);
2653
2654 for (int i = 0; i < len; ++i) {
2655 int distx = (fx & 0x0000ffff);
2656#if defined(__SSE2__)
2657 __m128i vt = _mm_loadu_si128(p: (const __m128i*)(buf1 + i*2));
2658 if (disty) {
2659 __m128i vb = _mm_loadu_si128(p: (const __m128i*)(buf2 + i*2));
2660 vt = _mm_mulhi_epu16(a: vt, b: vidy);
2661 vb = _mm_mulhi_epu16(a: vb, b: vdy);
2662 vt = _mm_add_epi16(a: vt, b: vb);
2663 }
2664 if (distx) {
2665 const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
2666 const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
2667 vt = _mm_mulhi_epu16(a: vt, b: _mm_unpacklo_epi64(a: vidistx, b: vdistx));
2668 vt = _mm_add_epi16(a: vt, _mm_srli_si128(vt, 8));
2669 }
2670 _mm_storel_epi64(p: (__m128i*)(b+i), a: vt);
2671#else
2672 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
2673#endif
2674 fx += fdx;
2675 }
2676 length -= len;
2677 b += len;
2678 }
2679 } else { // rotation or shear
2680 while (b < end) {
2681 int len = qMin(a: length, b: BufferSize / 2);
2682
2683 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
2684
2685 convert(buf1, len * 2);
2686 convert(buf2, len * 2);
2687
2688 for (int i = 0; i < len; ++i) {
2689 int distx = (fx & 0x0000ffff);
2690 int disty = (fy & 0x0000ffff);
2691 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2692 fx += fdx;
2693 fy += fdy;
2694 }
2695
2696 length -= len;
2697 b += len;
2698 }
2699 }
2700 } else { // !(data->fast_matrix)
2701 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
2702
2703 const qreal fdx = data->m11;
2704 const qreal fdy = data->m12;
2705 const qreal fdw = data->m13;
2706
2707 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2708 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2709 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2710
2711 ushort distxs[BufferSize / 2];
2712 ushort distys[BufferSize / 2];
2713
2714 while (length) {
2715 const int len = qMin(a: length, b: BufferSize / 2);
2716 fetcher(buf1, buf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2717
2718 convert(buf1, len * 2);
2719 convert(buf2, len * 2);
2720
2721 for (int i = 0; i < len; ++i) {
2722 const int distx = distxs[i];
2723 const int disty = distys[i];
2724 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2725 }
2726
2727 length -= len;
2728 b += len;
2729 }
2730 }
2731 return buffer;
2732}
2733
2734template<TextureBlendType blendType>
2735static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_f32x4(QRgba64 *buffer, const QSpanData *data,
2736 int y, int x, int length)
2737{
2738 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2739 const auto *clut = data->texture.colorTable;
2740 const auto convert = layout->fetchToRGBA64PM;
2741
2742 const qreal cx = x + qreal(0.5);
2743 const qreal cy = y + qreal(0.5);
2744
2745 QRgbaFloat32 sbuf1[BufferSize];
2746 QRgbaFloat32 sbuf2[BufferSize];
2747 alignas(8) QRgba64 buf1[BufferSize];
2748 alignas(8) QRgba64 buf2[BufferSize];
2749 QRgba64 *b = buffer;
2750
2751 if (canUseFastMatrixPath(cx, cy, length, data)) {
2752 // The increment pr x in the scanline
2753 const int fdx = (int)(data->m11 * fixed_scale);
2754 const int fdy = (int)(data->m12 * fixed_scale);
2755
2756 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2757 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2758
2759 fx -= half_point;
2760 fy -= half_point;
2761
2762 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>;
2763
2764 const bool skipsecond = (fdy == 0) && ((fy & 0x0000ffff) == 0);
2765 while (length) {
2766 const int len = qMin(a: length, b: BufferSize / 2);
2767
2768 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2769
2770 convert(buf1, (const uchar *)sbuf1, 0, len * 2, clut, nullptr);
2771 if (!skipsecond)
2772 convert(buf2, (const uchar *)sbuf2, 0, len * 2, clut, nullptr);
2773
2774 for (int i = 0; i < len; ++i) {
2775 const int distx = (fx & 0x0000ffff);
2776 const int disty = (fy & 0x0000ffff);
2777 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2778 fx += fdx;
2779 fy += fdy;
2780 }
2781
2782 length -= len;
2783 b += len;
2784 }
2785 } else { // !(data->fast_matrix)
2786 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>;
2787
2788 const qreal fdx = data->m11;
2789 const qreal fdy = data->m12;
2790 const qreal fdw = data->m13;
2791
2792 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2793 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2794 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2795
2796 ushort distxs[BufferSize / 2];
2797 ushort distys[BufferSize / 2];
2798
2799 while (length) {
2800 const int len = qMin(a: length, b: BufferSize / 2);
2801 fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2802
2803 convert(buf1, (const uchar *)sbuf1, 0, len * 2, clut, nullptr);
2804 convert(buf2, (const uchar *)sbuf2, 0, len * 2, clut, nullptr);
2805
2806 for (int i = 0; i < len; ++i) {
2807 const int distx = distxs[i];
2808 const int disty = distys[i];
2809 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2810 }
2811
2812 length -= len;
2813 b += len;
2814 }
2815 }
2816 return buffer;
2817}
2818
2819template<TextureBlendType blendType>
2820static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *,
2821 const QSpanData *data, int y, int x, int length)
2822{
2823 switch (qPixelLayouts[data->texture.format].bpp) {
2824 case QPixelLayout::BPP64:
2825 case QPixelLayout::BPP16FPx4:
2826 return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length);
2827 case QPixelLayout::BPP32FPx4:
2828 return fetchTransformedBilinear64_f32x4<blendType>(buffer, data, y, x, length);
2829 default:
2830 return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length);
2831 }
2832}
2833#endif
2834
2835#if QT_CONFIG(raster_fp)
2836static void interpolate_simple_rgba32f(QRgbaFloat32 *b, const QRgbaFloat32 *buf1, const QRgbaFloat32 *buf2, int len,
2837 int &fx, int fdx,
2838 int &fy, int fdy)
2839{
2840 for (int i = 0; i < len; ++i) {
2841 const int distx = (fx & 0x0000ffff);
2842 const int disty = (fy & 0x0000ffff);
2843 b[i] = interpolate_4_pixels_rgba32f(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2844 fx += fdx;
2845 fy += fdy;
2846 }
2847}
2848
2849static void interpolate_perspective_rgba32f(QRgbaFloat32 *b, const QRgbaFloat32 *buf1, const QRgbaFloat32 *buf2, int len,
2850 unsigned short *distxs,
2851 unsigned short *distys)
2852{
2853 for (int i = 0; i < len; ++i) {
2854 const int dx = distxs[i];
2855 const int dy = distys[i];
2856 b[i] = interpolate_4_pixels_rgba32f(t: buf1 + i*2, b: buf2 + i*2, distx: dx, disty: dy);
2857 }
2858}
2859
2860template<TextureBlendType blendType>
2861static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP_uint32(QRgbaFloat32 *buffer, const QSpanData *data,
2862 int y, int x, int length)
2863{
2864 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2865 const auto *clut = data->texture.colorTable;
2866 const auto convert = qConvertToRGBA32F[data->texture.format];
2867
2868 const qreal cx = x + qreal(0.5);
2869 const qreal cy = y + qreal(0.5);
2870
2871 uint sbuf1[BufferSize];
2872 uint sbuf2[BufferSize];
2873 QRgbaFloat32 buf1[BufferSize];
2874 QRgbaFloat32 buf2[BufferSize];
2875 QRgbaFloat32 *b = buffer;
2876
2877 if (canUseFastMatrixPath(cx, cy, length, data)) {
2878 // The increment pr x in the scanline
2879 const int fdx = (int)(data->m11 * fixed_scale);
2880 const int fdy = (int)(data->m12 * fixed_scale);
2881
2882 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2883 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2884
2885 fx -= half_point;
2886 fy -= half_point;
2887
2888 const auto fetcher =
2889 (layout->bpp == QPixelLayout::BPP32)
2890 ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
2891 : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2892
2893 const bool skipsecond = (fdy == 0) && ((fy & 0x0000ffff) == 0);
2894 while (length) {
2895 const int len = qMin(a: length, b: BufferSize / 2);
2896 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2897
2898 convert(buf1, sbuf1, len * 2, clut, nullptr);
2899 if (!skipsecond)
2900 convert(buf2, sbuf2, len * 2, clut, nullptr);
2901
2902 interpolate_simple_rgba32f(b, buf1, buf2, len, fx, fdx, fy, fdy);
2903
2904 length -= len;
2905 b += len;
2906 }
2907 } else { // !(data->fast_matrix)
2908 const auto fetcher =
2909 (layout->bpp == QPixelLayout::BPP32)
2910 ? fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32, uint>
2911 : fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2912
2913 const qreal fdx = data->m11;
2914 const qreal fdy = data->m12;
2915 const qreal fdw = data->m13;
2916 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2917 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2918 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2919 ushort distxs[BufferSize / 2];
2920 ushort distys[BufferSize / 2];
2921
2922 while (length) {
2923 const int len = qMin(a: length, b: BufferSize / 2);
2924 fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2925
2926 convert(buf1, sbuf1, len * 2, clut, nullptr);
2927 convert(buf2, sbuf2, len * 2, clut, nullptr);
2928
2929 interpolate_perspective_rgba32f(b, buf1, buf2, len, distxs, distys);
2930
2931 length -= len;
2932 b += len;
2933 }
2934 }
2935 return buffer;
2936}
2937
2938template<TextureBlendType blendType>
2939static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP_uint64(QRgbaFloat32 *buffer, const QSpanData *data,
2940 int y, int x, int length)
2941{
2942 const auto convert = convert64ToRGBA32F[data->texture.format];
2943
2944 const qreal cx = x + qreal(0.5);
2945 const qreal cy = y + qreal(0.5);
2946
2947 quint64 sbuf1[BufferSize];
2948 quint64 sbuf2[BufferSize];
2949 QRgbaFloat32 buf1[BufferSize];
2950 QRgbaFloat32 buf2[BufferSize];
2951 QRgbaFloat32 *b = buffer;
2952
2953 if (canUseFastMatrixPath(cx, cy, length, data)) {
2954 // The increment pr x in the scanline
2955 const int fdx = (int)(data->m11 * fixed_scale);
2956 const int fdy = (int)(data->m12 * fixed_scale);
2957
2958 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2959 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2960
2961 fx -= half_point;
2962 fy -= half_point;
2963 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, quint64>;
2964
2965 const bool skipsecond = (fdy == 0) && ((fy & 0x0000ffff) == 0);
2966 while (length) {
2967 const int len = qMin(a: length, b: BufferSize / 2);
2968 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2969
2970 convert(buf1, sbuf1, len * 2);
2971 if (!skipsecond)
2972 convert(buf2, sbuf2, len * 2);
2973
2974 interpolate_simple_rgba32f(b, buf1, buf2, len, fx, fdx, fy, fdy);
2975
2976 length -= len;
2977 b += len;
2978 }
2979 } else { // !(data->fast_matrix)
2980 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP64, quint64>;
2981
2982 const qreal fdx = data->m11;
2983 const qreal fdy = data->m12;
2984 const qreal fdw = data->m13;
2985
2986 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2987 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2988 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2989
2990 ushort distxs[BufferSize / 2];
2991 ushort distys[BufferSize / 2];
2992
2993 while (length) {
2994 const int len = qMin(a: length, b: BufferSize / 2);
2995 fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2996
2997 convert(buf1, sbuf1, len * 2);
2998 convert(buf2, sbuf2, len * 2);
2999
3000 interpolate_perspective_rgba32f(b, buf1, buf2, len, distxs, distys);
3001
3002 length -= len;
3003 b += len;
3004 }
3005 }
3006 return buffer;
3007}
3008
3009template<TextureBlendType blendType>
3010static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP(QRgbaFloat32 *buffer, const QSpanData *data,
3011 int y, int x, int length)
3012{
3013 const auto convert = data->rasterBuffer->format == QImage::Format_RGBA32FPx4 ? convertRGBA32FToRGBA32FPM
3014 : convertRGBA32FToRGBA32F;
3015
3016 const qreal cx = x + qreal(0.5);
3017 const qreal cy = y + qreal(0.5);
3018
3019 QRgbaFloat32 buf1[BufferSize];
3020 QRgbaFloat32 buf2[BufferSize];
3021 QRgbaFloat32 *b = buffer;
3022
3023 if (canUseFastMatrixPath(cx, cy, length, data)) {
3024 // The increment pr x in the scanline
3025 const int fdx = (int)(data->m11 * fixed_scale);
3026 const int fdy = (int)(data->m12 * fixed_scale);
3027
3028 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3029 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3030
3031 fx -= half_point;
3032 fy -= half_point;
3033 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>;
3034
3035 const bool skipsecond = (fdy == 0) && ((fy & 0x0000ffff) == 0);
3036 while (length) {
3037 const int len = qMin(a: length, b: BufferSize / 2);
3038 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3039
3040 convert(buf1, len * 2);
3041 if (!skipsecond)
3042 convert(buf2, len * 2);
3043
3044 interpolate_simple_rgba32f(b, buf1, buf2, len, fx, fdx, fy, fdy);
3045
3046 length -= len;
3047 b += len;
3048 }
3049 } else { // !(data->fast_matrix)
3050 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>;
3051
3052 const qreal fdx = data->m11;
3053 const qreal fdy = data->m12;
3054 const qreal fdw = data->m13;
3055
3056 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3057 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3058 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3059
3060 ushort distxs[BufferSize / 2];
3061 ushort distys[BufferSize / 2];
3062
3063 while (length) {
3064 const int len = qMin(a: length, b: BufferSize / 2);
3065 fetcher(buf1, buf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
3066
3067 convert(buf1, len * 2);
3068 convert(buf2, len * 2);
3069
3070 interpolate_perspective_rgba32f(b, buf1, buf2, len, distxs, distys);
3071
3072 length -= len;
3073 b += len;
3074 }
3075 }
3076 return buffer;
3077}
3078
3079template<TextureBlendType blendType>
3080static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP(QRgbaFloat32 *buffer, const Operator *,
3081 const QSpanData *data, int y, int x, int length)
3082{
3083 switch (qPixelLayouts[data->texture.format].bpp) {
3084 case QPixelLayout::BPP64:
3085 case QPixelLayout::BPP16FPx4:
3086 return fetchTransformedBilinearFP_uint64<blendType>(buffer, data, y, x, length);
3087 case QPixelLayout::BPP32FPx4:
3088 return fetchTransformedBilinearFP<blendType>(buffer, data, y, x, length);
3089 default:
3090 return fetchTransformedBilinearFP_uint32<blendType>(buffer, data, y, x, length);
3091 }
3092}
3093#endif // QT_CONFIG(raster_fp)
3094
3095// FetchUntransformed can have more specialized methods added depending on SIMD features.
3096static SourceFetchProc sourceFetchUntransformed[] = {
3097 nullptr, // Invalid
3098 fetchUntransformed, // Mono
3099 fetchUntransformed, // MonoLsb
3100 fetchUntransformed, // Indexed8
3101 fetchUntransformedARGB32PM, // RGB32
3102 fetchUntransformed, // ARGB32
3103 fetchUntransformedARGB32PM, // ARGB32_Premultiplied
3104 fetchUntransformedRGB16, // RGB16
3105 fetchUntransformed, // ARGB8565_Premultiplied
3106 fetchUntransformed, // RGB666
3107 fetchUntransformed, // ARGB6666_Premultiplied
3108 fetchUntransformed, // RGB555
3109 fetchUntransformed, // ARGB8555_Premultiplied
3110 fetchUntransformed, // RGB888
3111 fetchUntransformed, // RGB444
3112 fetchUntransformed, // ARGB4444_Premultiplied
3113 fetchUntransformed, // RGBX8888
3114 fetchUntransformed, // RGBA8888
3115 fetchUntransformed, // RGBA8888_Premultiplied
3116 fetchUntransformed, // Format_BGR30
3117 fetchUntransformed, // Format_A2BGR30_Premultiplied
3118 fetchUntransformed, // Format_RGB30
3119 fetchUntransformed, // Format_A2RGB30_Premultiplied
3120 fetchUntransformed, // Alpha8
3121 fetchUntransformed, // Grayscale8
3122 fetchUntransformed, // RGBX64
3123 fetchUntransformed, // RGBA64
3124 fetchUntransformed, // RGBA64_Premultiplied
3125 fetchUntransformed, // Grayscale16
3126 fetchUntransformed, // BGR888
3127 fetchUntransformed, // RGBX16FPx4
3128 fetchUntransformed, // RGBA16FPx4
3129 fetchUntransformed, // RGBA16FPx4_Premultiplied
3130 fetchUntransformed, // RGBX32Px4
3131 fetchUntransformed, // RGBA32FPx4
3132 fetchUntransformed, // RGBA32FPx4_Premultiplied
3133 fetchUntransformed, // CMYK8888
3134};
3135
3136static_assert(std::size(sourceFetchUntransformed) == QImage::NImageFormats);
3137
3138static const SourceFetchProc sourceFetchGeneric[] = {
3139 fetchUntransformed, // Untransformed
3140 fetchUntransformed, // Tiled
3141 fetchTransformed<BlendTransformed, QPixelLayout::BPPNone>, // Transformed
3142 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPPNone>, // TransformedTiled
3143 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPPNone>, // TransformedBilinear
3144 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPPNone> // TransformedBilinearTiled
3145};
3146
3147static_assert(std::size(sourceFetchGeneric) == NBlendTypes);
3148
3149static SourceFetchProc sourceFetchARGB32PM[] = {
3150 fetchUntransformedARGB32PM, // Untransformed
3151 fetchUntransformedARGB32PM, // Tiled
3152 fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3153 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3154 fetchTransformedBilinearARGB32PM<BlendTransformedBilinear>, // Bilinear
3155 fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled
3156};
3157
3158static_assert(std::size(sourceFetchARGB32PM) == NBlendTypes);
3159
3160static SourceFetchProc sourceFetchAny16[] = {
3161 fetchUntransformed, // Untransformed
3162 fetchUntransformed, // Tiled
3163 fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed
3164 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>, // TransformedTiled
3165 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>, // TransformedBilinear
3166 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled
3167};
3168
3169static_assert(std::size(sourceFetchAny16) == NBlendTypes);
3170
3171static SourceFetchProc sourceFetchAny32[] = {
3172 fetchUntransformed, // Untransformed
3173 fetchUntransformed, // Tiled
3174 fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3175 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3176 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP32>, // TransformedBilinear
3177 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP32> // TransformedBilinearTiled
3178};
3179
3180static_assert(std::size(sourceFetchAny32) == NBlendTypes);
3181
3182static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format)
3183{
3184 if (format == QImage::Format_RGB32 || format == QImage::Format_ARGB32_Premultiplied)
3185 return sourceFetchARGB32PM[blendType];
3186 if (blendType == BlendUntransformed || blendType == BlendTiled)
3187 return sourceFetchUntransformed[format];
3188 if (qPixelLayouts[format].bpp == QPixelLayout::BPP16)
3189 return sourceFetchAny16[blendType];
3190 if (qPixelLayouts[format].bpp == QPixelLayout::BPP32)
3191 return sourceFetchAny32[blendType];
3192 return sourceFetchGeneric[blendType];
3193}
3194
3195#if QT_CONFIG(raster_64bit)
3196static const SourceFetchProc64 sourceFetchGeneric64[] = {
3197 fetchUntransformed64, // Untransformed
3198 fetchUntransformed64, // Tiled
3199 fetchTransformed64<BlendTransformed>, // Transformed
3200 fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
3201 fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
3202 fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
3203};
3204
3205static_assert(std::size(sourceFetchGeneric64) == NBlendTypes);
3206
3207static const SourceFetchProc64 sourceFetchRGBA64PM[] = {
3208 fetchUntransformedRGBA64PM, // Untransformed
3209 fetchUntransformedRGBA64PM, // Tiled
3210 fetchTransformed64<BlendTransformed>, // Transformed
3211 fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
3212 fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
3213 fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
3214};
3215
3216static_assert(std::size(sourceFetchRGBA64PM) == NBlendTypes);
3217
3218static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format)
3219{
3220 if (format == QImage::Format_RGBX64 || format == QImage::Format_RGBA64_Premultiplied)
3221 return sourceFetchRGBA64PM[blendType];
3222 return sourceFetchGeneric64[blendType];
3223}
3224#endif
3225
3226#if QT_CONFIG(raster_fp)
3227static const SourceFetchProcFP sourceFetchGenericFP[] = {
3228 fetchUntransformedFP, // Untransformed
3229 fetchUntransformedFP, // Tiled
3230 fetchTransformedFP<BlendTransformed>, // Transformed
3231 fetchTransformedFP<BlendTransformedTiled>, // TransformedTiled
3232 fetchTransformedBilinearFP<BlendTransformedBilinear>, // Bilinear
3233 fetchTransformedBilinearFP<BlendTransformedBilinearTiled> // BilinearTiled
3234};
3235
3236static_assert(std::size(sourceFetchGenericFP) == NBlendTypes);
3237
3238static inline SourceFetchProcFP getSourceFetchFP(TextureBlendType blendType, QImage::Format /*format*/)
3239{
3240 return sourceFetchGenericFP[blendType];
3241}
3242#endif
3243
3244#define FIXPT_BITS 8
3245#define FIXPT_SIZE (1<<FIXPT_BITS)
3246#define FIXPT_MAX (INT_MAX >> (FIXPT_BITS + 1))
3247
3248static uint qt_gradient_pixel_fixed(const QGradientData *data, int fixed_pos)
3249{
3250 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
3251 return data->colorTable32[qt_gradient_clamp(data, ipos)];
3252}
3253
3254#if QT_CONFIG(raster_64bit)
3255static const QRgba64& qt_gradient_pixel64_fixed(const QGradientData *data, int fixed_pos)
3256{
3257 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
3258 return data->colorTable64[qt_gradient_clamp(data, ipos)];
3259}
3260#endif
3261
3262#if QT_CONFIG(raster_fp)
3263static inline QRgbaFloat32 qt_gradient_pixelFP(const QGradientData *data, qreal pos)
3264{
3265 int ipos = int(pos * (GRADIENT_STOPTABLE_SIZE - 1) + qreal(0.5));
3266 QRgba64 rgb64 = data->colorTable64[qt_gradient_clamp(data, ipos)];
3267 return QRgbaFloat32::fromRgba64(red: rgb64.red(),green: rgb64.green(), blue: rgb64.blue(), alpha: rgb64.alpha());
3268}
3269
3270static inline QRgbaFloat32 qt_gradient_pixelFP_fixed(const QGradientData *data, int fixed_pos)
3271{
3272 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
3273 QRgba64 rgb64 = data->colorTable64[qt_gradient_clamp(data, ipos)];
3274 return QRgbaFloat32::fromRgba64(red: rgb64.red(), green: rgb64.green(), blue: rgb64.blue(), alpha: rgb64.alpha());
3275}
3276#endif
3277
3278static void QT_FASTCALL getLinearGradientValues(LinearGradientValues *v, const QSpanData *data)
3279{
3280 v->dx = data->gradient.linear.end.x - data->gradient.linear.origin.x;
3281 v->dy = data->gradient.linear.end.y - data->gradient.linear.origin.y;
3282 v->l = v->dx * v->dx + v->dy * v->dy;
3283 v->off = 0;
3284 if (v->l != 0) {
3285 v->dx /= v->l;
3286 v->dy /= v->l;
3287 v->off = -v->dx * data->gradient.linear.origin.x - v->dy * data->gradient.linear.origin.y;
3288 }
3289}
3290
3291class GradientBase32
3292{
3293public:
3294 typedef uint Type;
3295 static Type null() { return 0; }
3296 static Type fetchSingle(const QGradientData& gradient, qreal v)
3297 {
3298 Q_ASSERT(std::isfinite(v));
3299 return qt_gradient_pixel(data: &gradient, pos: v);
3300 }
3301 static Type fetchSingle(const QGradientData& gradient, int v)
3302 {
3303 return qt_gradient_pixel_fixed(data: &gradient, fixed_pos: v);
3304 }
3305 static void memfill(Type *buffer, Type fill, int length)
3306 {
3307 qt_memfill32(buffer, fill, length);
3308 }
3309};
3310
3311#if QT_CONFIG(raster_64bit)
3312class GradientBase64
3313{
3314public:
3315 typedef QRgba64 Type;
3316 static Type null() { return QRgba64::fromRgba64(c: 0); }
3317 static Type fetchSingle(const QGradientData& gradient, qreal v)
3318 {
3319 Q_ASSERT(std::isfinite(v));
3320 return qt_gradient_pixel64(data: &gradient, pos: v);
3321 }
3322 static Type fetchSingle(const QGradientData& gradient, int v)
3323 {
3324 return qt_gradient_pixel64_fixed(data: &gradient, fixed_pos: v);
3325 }
3326 static void memfill(Type *buffer, Type fill, int length)
3327 {
3328 qt_memfill64((quint64*)buffer, fill, length);
3329 }
3330};
3331#endif
3332
3333#if QT_CONFIG(raster_fp)
3334class GradientBaseFP
3335{
3336public:
3337 typedef QRgbaFloat32 Type;
3338 static Type null() { return QRgbaFloat32::fromRgba64(red: 0,green: 0,blue: 0,alpha: 0); }
3339 static Type fetchSingle(const QGradientData& gradient, qreal v)
3340 {
3341 Q_ASSERT(std::isfinite(v));
3342 return qt_gradient_pixelFP(data: &gradient, pos: v);
3343 }
3344 static Type fetchSingle(const QGradientData& gradient, int v)
3345 {
3346 return qt_gradient_pixelFP_fixed(data: &gradient, fixed_pos: v);
3347 }
3348 static void memfill(Type *buffer, Type fill, int length)
3349 {
3350 quint64 fillCopy;
3351 memcpy(dest: &fillCopy, src: &fill, n: sizeof(quint64));
3352 qt_memfill64((quint64*)buffer, fillCopy, length);
3353 }
3354};
3355#endif
3356
3357template<class GradientBase, typename BlendType>
3358static inline const BlendType * QT_FASTCALL qt_fetch_linear_gradient_template(
3359 BlendType *buffer, const Operator *op, const QSpanData *data,
3360 int y, int x, int length)
3361{
3362 const BlendType *b = buffer;
3363 qreal t, inc;
3364
3365 bool affine = true;
3366 qreal rx=0, ry=0;
3367 if (op->linear.l == 0) {
3368 t = inc = 0;
3369 } else {
3370 rx = data->m21 * (y + qreal(0.5)) + data->m11 * (x + qreal(0.5)) + data->dx;
3371 ry = data->m22 * (y + qreal(0.5)) + data->m12 * (x + qreal(0.5)) + data->dy;
3372 t = op->linear.dx*rx + op->linear.dy*ry + op->linear.off;
3373 inc = op->linear.dx * data->m11 + op->linear.dy * data->m12;
3374 affine = !data->m13 && !data->m23;
3375
3376 if (affine) {
3377 t *= (GRADIENT_STOPTABLE_SIZE - 1);
3378 inc *= (GRADIENT_STOPTABLE_SIZE - 1);
3379 }
3380 }
3381
3382 const BlendType *end = buffer + length;
3383 if (affine) {
3384 if (inc > qreal(-1e-5) && inc < qreal(1e-5)) {
3385 if (std::abs(x: t) < FIXPT_MAX)
3386 GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, int(t * FIXPT_SIZE)), length);
3387 else
3388 GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, t / GRADIENT_STOPTABLE_SIZE), length);
3389 } else {
3390 if (std::abs(x: t) < FIXPT_MAX && std::abs(x: inc) < FIXPT_MAX && std::abs(x: t + inc * length) < FIXPT_MAX) {
3391 // we can use fixed point math
3392 int t_fixed = int(t * FIXPT_SIZE);
3393 int inc_fixed = int(inc * FIXPT_SIZE);
3394 while (buffer < end) {
3395 *buffer = GradientBase::fetchSingle(data->gradient, t_fixed);
3396 t_fixed += inc_fixed;
3397 ++buffer;
3398 }
3399 } else {
3400 // we have to fall back to float math
3401 while (buffer < end) {
3402 *buffer = GradientBase::fetchSingle(data->gradient, t/GRADIENT_STOPTABLE_SIZE);
3403 t += inc;
3404 ++buffer;
3405 }
3406 }
3407 }
3408 } else { // fall back to float math here as well
3409 qreal rw = data->m23 * (y + qreal(0.5)) + data->m13 * (x + qreal(0.5)) + data->m33;
3410 while (buffer < end) {
3411 qreal x = rx/rw;
3412 qreal y = ry/rw;
3413 t = (op->linear.dx*x + op->linear.dy *y) + op->linear.off;
3414
3415 *buffer = GradientBase::fetchSingle(data->gradient, t);
3416 rx += data->m11;
3417 ry += data->m12;
3418 rw += data->m13;
3419 if (!rw) {
3420 rw += data->m13;
3421 }
3422 ++buffer;
3423 }
3424 }
3425
3426 return b;
3427}
3428
3429static const uint * QT_FASTCALL qt_fetch_linear_gradient(uint *buffer, const Operator *op, const QSpanData *data,
3430 int y, int x, int length)
3431{
3432 return qt_fetch_linear_gradient_template<GradientBase32, uint>(buffer, op, data, y, x, length);
3433}
3434
3435#if QT_CONFIG(raster_64bit)
3436static const QRgba64 * QT_FASTCALL qt_fetch_linear_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
3437 int y, int x, int length)
3438{
3439 return qt_fetch_linear_gradient_template<GradientBase64, QRgba64>(buffer, op, data, y, x, length);
3440}
3441#endif
3442#if QT_CONFIG(raster_fp)
3443static const QRgbaFloat32 * QT_FASTCALL qt_fetch_linear_gradient_rgbfp(QRgbaFloat32 *buffer, const Operator *op, const QSpanData *data,
3444 int y, int x, int length)
3445{
3446 return qt_fetch_linear_gradient_template<GradientBaseFP, QRgbaFloat32>(buffer, op, data, y, x, length);
3447}
3448#endif
3449
3450static void QT_FASTCALL getRadialGradientValues(RadialGradientValues *v, const QSpanData *data)
3451{
3452 v->dx = data->gradient.radial.center.x - data->gradient.radial.focal.x;
3453 v->dy = data->gradient.radial.center.y - data->gradient.radial.focal.y;
3454
3455 v->dr = data->gradient.radial.center.radius - data->gradient.radial.focal.radius;
3456 v->sqrfr = data->gradient.radial.focal.radius * data->gradient.radial.focal.radius;
3457
3458 v->a = v->dr * v->dr - v->dx*v->dx - v->dy*v->dy;
3459
3460 v->extended = !qFuzzyIsNull(d: data->gradient.radial.focal.radius) || v->a <= 0;
3461}
3462
3463template <class GradientBase>
3464class RadialFetchPlain : public GradientBase
3465{
3466public:
3467 typedef typename GradientBase::Type BlendType;
3468 static void fetch(BlendType *buffer, BlendType *end,
3469 const Operator *op, const QSpanData *data, qreal det,
3470 qreal delta_det, qreal delta_delta_det, qreal b, qreal delta_b)
3471 {
3472 if (op->radial.extended) {
3473 while (buffer < end) {
3474 BlendType result = GradientBase::null();
3475 if (det >= 0) {
3476 qreal w = qSqrt(v: det) - b;
3477 if (data->gradient.radial.focal.radius + op->radial.dr * w >= 0)
3478 result = GradientBase::fetchSingle(data->gradient, w);
3479 }
3480
3481 *buffer = result;
3482
3483 det += delta_det;
3484 delta_det += delta_delta_det;
3485 b += delta_b;
3486
3487 ++buffer;
3488 }
3489 } else {
3490 while (buffer < end) {
3491 BlendType result = GradientBase::null();
3492 if (det >= 0) {
3493 qreal w = qSqrt(v: det) - b;
3494 result = GradientBase::fetchSingle(data->gradient, w);
3495 }
3496
3497 *buffer++ = result;
3498
3499 det += delta_det;
3500 delta_det += delta_delta_det;
3501 b += delta_b;
3502 }
3503 }
3504 }
3505};
3506
3507const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint *buffer, const Operator *op, const QSpanData *data,
3508 int y, int x, int length)
3509{
3510 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase32>, uint>(buffer, op, data, y, x, length);
3511}
3512
3513static SourceFetchProc qt_fetch_radial_gradient = qt_fetch_radial_gradient_plain;
3514
3515#if QT_CONFIG(raster_64bit)
3516const QRgba64 * QT_FASTCALL qt_fetch_radial_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
3517 int y, int x, int length)
3518{
3519 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase64>, QRgba64>(buffer, op, data, y, x, length);
3520}
3521#endif
3522
3523#if QT_CONFIG(raster_fp)
3524static const QRgbaFloat32 * QT_FASTCALL qt_fetch_radial_gradient_rgbfp(QRgbaFloat32 *buffer, const Operator *op, const QSpanData *data,
3525 int y, int x, int length)
3526{
3527 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBaseFP>, QRgbaFloat32>(buffer, op, data, y, x, length);
3528}
3529#endif
3530
3531template <class GradientBase, typename BlendType>
3532static inline const BlendType * QT_FASTCALL qt_fetch_conical_gradient_template(
3533 BlendType *buffer, const QSpanData *data,
3534 int y, int x, int length)
3535{
3536 const BlendType *b = buffer;
3537 qreal rx = data->m21 * (y + qreal(0.5))
3538 + data->dx + data->m11 * (x + qreal(0.5));
3539 qreal ry = data->m22 * (y + qreal(0.5))
3540 + data->dy + data->m12 * (x + qreal(0.5));
3541 bool affine = !data->m13 && !data->m23;
3542
3543 const qreal inv2pi = M_1_PI / 2.0;
3544
3545 const BlendType *end = buffer + length;
3546 if (affine) {
3547 rx -= data->gradient.conical.center.x;
3548 ry -= data->gradient.conical.center.y;
3549 while (buffer < end) {
3550 qreal angle = qAtan2(y: ry, x: rx) + data->gradient.conical.angle;
3551
3552 *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
3553
3554 rx += data->m11;
3555 ry += data->m12;
3556 ++buffer;
3557 }
3558 } else {
3559 qreal rw = data->m23 * (y + qreal(0.5))
3560 + data->m33 + data->m13 * (x + qreal(0.5));
3561 if (!rw)
3562 rw = 1;
3563 while (buffer < end) {
3564 qreal angle = qAtan2(y: ry/rw - data->gradient.conical.center.x,
3565 x: rx/rw - data->gradient.conical.center.y)
3566 + data->gradient.conical.angle;
3567
3568 *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
3569
3570 rx += data->m11;
3571 ry += data->m12;
3572 rw += data->m13;
3573 if (!rw) {
3574 rw += data->m13;
3575 }
3576 ++buffer;
3577 }
3578 }
3579 return b;
3580}
3581
3582static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint *buffer, const Operator *, const QSpanData *data,
3583 int y, int x, int length)
3584{
3585 return qt_fetch_conical_gradient_template<GradientBase32, uint>(buffer, data, y, x, length);
3586}
3587
3588#if QT_CONFIG(raster_64bit)
3589static const QRgba64 * QT_FASTCALL qt_fetch_conical_gradient_rgb64(QRgba64 *buffer, const Operator *, const QSpanData *data,
3590 int y, int x, int length)
3591{
3592 return qt_fetch_conical_gradient_template<GradientBase64, QRgba64>(buffer, data, y, x, length);
3593}
3594#endif
3595
3596#if QT_CONFIG(raster_fp)
3597static const QRgbaFloat32 * QT_FASTCALL qt_fetch_conical_gradient_rgbfp(QRgbaFloat32 *buffer, const Operator *, const QSpanData *data,
3598 int y, int x, int length)
3599{
3600 return qt_fetch_conical_gradient_template<GradientBaseFP, QRgbaFloat32>(buffer, data, y, x, length);
3601}
3602#endif
3603
3604extern CompositionFunctionSolid qt_functionForModeSolid_C[];
3605extern CompositionFunctionSolid64 qt_functionForModeSolid64_C[];
3606extern CompositionFunctionSolidFP qt_functionForModeSolidFP_C[];
3607
3608static const CompositionFunctionSolid *functionForModeSolid = qt_functionForModeSolid_C;
3609#if QT_CONFIG(raster_64bit)
3610static const CompositionFunctionSolid64 *functionForModeSolid64 = qt_functionForModeSolid64_C;
3611#endif
3612#if QT_CONFIG(raster_fp)
3613static const CompositionFunctionSolidFP *functionForModeSolidFP = qt_functionForModeSolidFP_C;
3614#endif
3615
3616extern CompositionFunction qt_functionForMode_C[];
3617extern CompositionFunction64 qt_functionForMode64_C[];
3618extern CompositionFunctionFP qt_functionForModeFP_C[];
3619
3620static const CompositionFunction *functionForMode = qt_functionForMode_C;
3621#if QT_CONFIG(raster_64bit)
3622static const CompositionFunction64 *functionForMode64 = qt_functionForMode64_C;
3623#endif
3624#if QT_CONFIG(raster_fp)
3625static const CompositionFunctionFP *functionForModeFP = qt_functionForModeFP_C;
3626#endif
3627
3628static TextureBlendType getBlendType(const QSpanData *data)
3629{
3630 TextureBlendType ft;
3631 if (data->texture.type == QTextureData::Pattern)
3632 ft = BlendTiled;
3633 else if (data->txop <= QTransform::TxTranslate)
3634 if (data->texture.type == QTextureData::Tiled)
3635 ft = BlendTiled;
3636 else
3637 ft = BlendUntransformed;
3638 else if (data->bilinear)
3639 if (data->texture.type == QTextureData::Tiled)
3640 ft = BlendTransformedBilinearTiled;
3641 else
3642 ft = BlendTransformedBilinear;
3643 else
3644 if (data->texture.type == QTextureData::Tiled)
3645 ft = BlendTransformedTiled;
3646 else
3647 ft = BlendTransformed;
3648 return ft;
3649}
3650
3651static inline Operator getOperator(const QSpanData *data, const QT_FT_Span *spans, int spanCount)
3652{
3653 Operator op;
3654 bool solidSource = false;
3655 switch(data->type) {
3656 case QSpanData::Solid:
3657 solidSource = data->solidColor.alphaF() >= 1.0f;
3658 op.srcFetch = nullptr;
3659 op.srcFetch64 = nullptr;
3660 op.srcFetchFP = nullptr;
3661 break;
3662 case QSpanData::LinearGradient:
3663 solidSource = !data->gradient.alphaColor;
3664 getLinearGradientValues(v: &op.linear, data);
3665 op.srcFetch = qt_fetch_linear_gradient;
3666#if QT_CONFIG(raster_64bit)
3667 op.srcFetch64 = qt_fetch_linear_gradient_rgb64;
3668#endif
3669#if QT_CONFIG(raster_fp)
3670 op.srcFetchFP = qt_fetch_linear_gradient_rgbfp;
3671#endif
3672 break;
3673 case QSpanData::RadialGradient:
3674 solidSource = !data->gradient.alphaColor;
3675 getRadialGradientValues(v: &op.radial, data);
3676 op.srcFetch = qt_fetch_radial_gradient;
3677#if QT_CONFIG(raster_64bit)
3678 op.srcFetch64 = qt_fetch_radial_gradient_rgb64;
3679#endif
3680#if QT_CONFIG(raster_fp)
3681 op.srcFetchFP = qt_fetch_radial_gradient_rgbfp;
3682#endif
3683 break;
3684 case QSpanData::ConicalGradient:
3685 solidSource = !data->gradient.alphaColor;
3686 op.srcFetch = qt_fetch_conical_gradient;
3687#if QT_CONFIG(raster_64bit)
3688 op.srcFetch64 = qt_fetch_conical_gradient_rgb64;
3689#endif
3690#if QT_CONFIG(raster_fp)
3691 op.srcFetchFP = qt_fetch_conical_gradient_rgbfp;
3692#endif
3693 break;
3694 case QSpanData::Texture:
3695 solidSource = !data->texture.hasAlpha;
3696 op.srcFetch = getSourceFetch(blendType: getBlendType(data), format: data->texture.format);
3697#if QT_CONFIG(raster_64bit)
3698 op.srcFetch64 = getSourceFetch64(blendType: getBlendType(data), format: data->texture.format);
3699#endif
3700#if QT_CONFIG(raster_fp)
3701 op.srcFetchFP = getSourceFetchFP(blendType: getBlendType(data), data->texture.format);
3702#endif
3703 break;
3704 default:
3705 Q_UNREACHABLE();
3706 break;
3707 }
3708#if !QT_CONFIG(raster_64bit)
3709 op.srcFetch64 = nullptr;
3710#endif
3711#if !QT_CONFIG(raster_fp)
3712 op.srcFetchFP = nullptr;
3713#endif
3714
3715 op.mode = data->rasterBuffer->compositionMode;
3716 if (op.mode == QPainter::CompositionMode_SourceOver && solidSource)
3717 op.mode = QPainter::CompositionMode_Source;
3718
3719 op.destFetch = destFetchProc[data->rasterBuffer->format];
3720#if QT_CONFIG(raster_64bit)
3721 op.destFetch64 = destFetchProc64[data->rasterBuffer->format];
3722#else
3723 op.destFetch64 = nullptr;
3724#endif
3725#if QT_CONFIG(raster_fp)
3726 op.destFetchFP = destFetchProcFP[data->rasterBuffer->format];
3727#else
3728 op.destFetchFP = nullptr;
3729#endif
3730 if (op.mode == QPainter::CompositionMode_Source &&
3731 (data->type != QSpanData::Texture || data->texture.const_alpha == 256)) {
3732 const QT_FT_Span *lastSpan = spans + spanCount;
3733 bool alphaSpans = false;
3734 while (spans < lastSpan) {
3735 if (spans->coverage != 255) {
3736 alphaSpans = true;
3737 break;
3738 }
3739 ++spans;
3740 }
3741 if (!alphaSpans && spanCount > 0) {
3742 // If all spans are opaque we do not need to fetch dest.
3743 // But don't clear passthrough destFetch as they are just as fast and save destStore.
3744 if (op.destFetch != destFetchARGB32P)
3745 op.destFetch = destFetchUndefined;
3746#if QT_CONFIG(raster_64bit)
3747 if (op.destFetch64 != destFetchRGB64)
3748 op.destFetch64 = destFetch64Undefined;
3749#endif
3750#if QT_CONFIG(raster_fp)
3751 if (op.destFetchFP != destFetchRGBFP)
3752 op.destFetchFP = destFetchFPUndefined;
3753#endif
3754 }
3755 }
3756
3757 op.destStore = destStoreProc[data->rasterBuffer->format];
3758 op.funcSolid = functionForModeSolid[op.mode];
3759 op.func = functionForMode[op.mode];
3760#if QT_CONFIG(raster_64bit)
3761 op.destStore64 = destStoreProc64[data->rasterBuffer->format];
3762 op.funcSolid64 = functionForModeSolid64[op.mode];
3763 op.func64 = functionForMode64[op.mode];
3764#else
3765 op.destStore64 = nullptr;
3766 op.funcSolid64 = nullptr;
3767 op.func64 = nullptr;
3768#endif
3769#if QT_CONFIG(raster_fp)
3770 op.destStoreFP = destStoreFP;
3771 op.funcSolidFP = functionForModeSolidFP[op.mode];
3772 op.funcFP = functionForModeFP[op.mode];
3773#else
3774 op.destStoreFP = nullptr;
3775 op.funcSolidFP = nullptr;
3776 op.funcFP = nullptr;
3777#endif
3778
3779 return op;
3780}
3781
3782static void spanfill_from_first(QRasterBuffer *rasterBuffer, QPixelLayout::BPP bpp, int x, int y, int length)
3783{
3784 switch (bpp) {
3785 case QPixelLayout::BPP32FPx4: {
3786 QRgbaFloat32 *dest = reinterpret_cast<QRgbaFloat32 *>(rasterBuffer->scanLine(y)) + x;
3787 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3788 break;
3789 }
3790 case QPixelLayout::BPP16FPx4:
3791 case QPixelLayout::BPP64: {
3792 quint64 *dest = reinterpret_cast<quint64 *>(rasterBuffer->scanLine(y)) + x;
3793 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3794 break;
3795 }
3796 case QPixelLayout::BPP32: {
3797 quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y)) + x;
3798 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3799 break;
3800 }
3801 case QPixelLayout::BPP24: {
3802 quint24 *dest = reinterpret_cast<quint24 *>(rasterBuffer->scanLine(y)) + x;
3803 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3804 break;
3805 }
3806 case QPixelLayout::BPP16: {
3807 quint16 *dest = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
3808 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3809 break;
3810 }
3811 case QPixelLayout::BPP8: {
3812 uchar *dest = rasterBuffer->scanLine(y) + x;
3813 memset(s: dest + 1, c: dest[0], n: length - 1);
3814 break;
3815 }
3816 default:
3817 Q_UNREACHABLE();
3818 }
3819}
3820
3821
3822// -------------------- blend methods ---------------------
3823
3824#if defined(QT_USE_THREAD_PARALLEL_FILLS)
3825#define QT_THREAD_PARALLEL_FILLS(function) \
3826 const int segments = (count + 32) / 64; \
3827 QThreadPool *threadPool = QThreadPoolPrivate::qtGuiInstance(); \
3828 if (segments > 1 && qPixelLayouts[data->rasterBuffer->format].bpp >= QPixelLayout::BPP8 \
3829 && threadPool && !threadPool->contains(QThread::currentThread())) { \
3830 QSemaphore semaphore; \
3831 int c = 0; \
3832 for (int i = 0; i < segments; ++i) { \
3833 int cn = (count - c) / (segments - i); \
3834 threadPool->start([&, c, cn]() { \
3835 function(c, c + cn); \
3836 semaphore.release(1); \
3837 }, 1); \
3838 c += cn; \
3839 } \
3840 semaphore.acquire(segments); \
3841 } else \
3842 function(0, count)
3843#else
3844#define QT_THREAD_PARALLEL_FILLS(function) function(0, count)
3845#endif
3846
3847static void blend_color_generic(int count, const QT_FT_Span *spans, void *userData)
3848{
3849 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
3850 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
3851 const uint color = data->solidColor.rgba();
3852 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
3853 const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
3854
3855 auto function = [=] (int cStart, int cEnd) {
3856 alignas(16) uint buffer[BufferSize];
3857 for (int c = cStart; c < cEnd; ++c) {
3858 int x = spans[c].x;
3859 int length = spans[c].len;
3860 if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStore) {
3861 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
3862 op.destStore(data->rasterBuffer, x, spans[c].y, &color, 1);
3863 spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans[c].y, length);
3864 length = 0;
3865 }
3866
3867 while (length) {
3868 int l = qMin(a: BufferSize, b: length);
3869 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l);
3870 op.funcSolid(dest, l, color, spans[c].coverage);
3871 if (op.destStore)
3872 op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
3873 length -= l;
3874 x += l;
3875 }
3876 }
3877 };
3878 QT_THREAD_PARALLEL_FILLS(function);
3879}
3880
3881static void blend_color_argb(int count, const QT_FT_Span *spans, void *userData)
3882{
3883 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
3884
3885 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
3886 const uint color = data->solidColor.rgba();
3887
3888 if (op.mode == QPainter::CompositionMode_Source) {
3889 // inline for performance
3890 while (count--) {
3891 uint *target = ((uint *)data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
3892 if (spans->coverage == 255) {
3893 qt_memfill(dest: target, color, count: spans->len);
3894#ifdef __SSE2__
3895 } else if (spans->len > 16) {
3896 op.funcSolid(target, spans->len, color, spans->coverage);
3897#endif
3898 } else {
3899 uint c = BYTE_MUL(x: color, a: spans->coverage);
3900 int ialpha = 255 - spans->coverage;
3901 for (int i = 0; i < spans->len; ++i)
3902 target[i] = c + BYTE_MUL(x: target[i], a: ialpha);
3903 }
3904 ++spans;
3905 }
3906 return;
3907 }
3908 const auto funcSolid = op.funcSolid;
3909 auto function = [=] (int cStart, int cEnd) {
3910 for (int c = cStart; c < cEnd; ++c) {
3911 uint *target = ((uint *)data->rasterBuffer->scanLine(y: spans[c].y)) + spans[c].x;
3912 funcSolid(target, spans[c].len, color, spans[c].coverage);
3913 }
3914 };
3915 QT_THREAD_PARALLEL_FILLS(function);
3916}
3917
3918static void blend_color_generic_rgb64(int count, const QT_FT_Span *spans, void *userData)
3919{
3920#if QT_CONFIG(raster_64bit)
3921 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
3922 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
3923 if (!op.funcSolid64) {
3924 qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit");
3925 return blend_color_generic(count, spans, userData);
3926 }
3927
3928 const QRgba64 color = data->solidColor.rgba64();
3929 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
3930 const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
3931
3932 auto function = [=, &op] (int cStart, int cEnd)
3933 {
3934 alignas(16) QRgba64 buffer[BufferSize];
3935 for (int c = cStart; c < cEnd; ++c) {
3936 int x = spans[c].x;
3937 int length = spans[c].len;
3938 if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStore64) {
3939 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
3940 op.destStore64(data->rasterBuffer, x, spans[c].y, &color, 1);
3941 spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans[c].y, length);
3942 length = 0;
3943 }
3944
3945 while (length) {
3946 int l = qMin(a: BufferSize, b: length);
3947 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l);
3948 op.funcSolid64(dest, l, color, spans[c].coverage);
3949 if (op.destStore64)
3950 op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
3951 length -= l;
3952 x += l;
3953 }
3954 }
3955 };
3956 QT_THREAD_PARALLEL_FILLS(function);
3957#else
3958 blend_color_generic(count, spans, userData);
3959#endif
3960}
3961
3962static void blend_color_generic_fp(int count, const QT_FT_Span *spans, void *userData)
3963{
3964#if QT_CONFIG(raster_fp)
3965 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
3966 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
3967 if (!op.funcSolidFP || !op.destFetchFP) {
3968 qCDebug(lcQtGuiDrawHelper, "blend_color_generic_fp: unsupported 4xF16 blend attempted, falling back to 32-bit");
3969 return blend_color_generic(count, spans, userData);
3970 }
3971
3972 float r, g, b, a;
3973 data->solidColor.getRgbF(r: &r, g: &g, b: &b, a: &a);
3974 const QRgbaFloat32 color{.r: r, .g: g, .b: b, .a: a};
3975 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
3976 QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
3977
3978 auto function = [=, &op] (int cStart, int cEnd)
3979 {
3980 alignas(16) QRgbaFloat32 buffer[BufferSize];
3981 for (int c = cStart; c < cEnd; ++c) {
3982 int x = spans[c].x;
3983 int length = spans[c].len;
3984 if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStoreFP) {
3985 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
3986 op.destStoreFP(data->rasterBuffer, x, spans[c].y, &color, 1);
3987 spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans[c].y, length);
3988 length = 0;
3989 }
3990
3991 while (length) {
3992 int l = qMin(a: BufferSize, b: length);
3993 QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l);
3994 op.funcSolidFP(dest, l, color, spans[c].coverage);
3995 if (op.destStoreFP)
3996 op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
3997 length -= l;
3998 x += l;
3999 }
4000 }
4001 };
4002 QT_THREAD_PARALLEL_FILLS(function);
4003#else
4004 blend_color_generic(count, spans, userData);
4005#endif
4006}
4007
4008template <typename T>
4009void handleSpans(int count, const QT_FT_Span *spans, const QSpanData *data, const Operator &op)
4010{
4011 const int const_alpha = (data->type == QSpanData::Texture) ? data->texture.const_alpha : 256;
4012 const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256;
4013
4014 auto function = [=, &op] (int cStart, int cEnd)
4015 {
4016 T handler(data, op);
4017 int coverage = 0;
4018 for (int c = cStart; c < cEnd;) {
4019 if (!spans[c].len) {
4020 ++c;
4021 continue;
4022 }
4023 int x = spans[c].x;
4024 const int y = spans[c].y;
4025 int right = x + spans[c].len;
4026 const bool fetchDest = !solidSource || spans[c].coverage < 255;
4027
4028 // compute length of adjacent spans
4029 for (int i = c + 1; i < cEnd && spans[i].y == y && spans[i].x == right && fetchDest == (!solidSource || spans[i].coverage < 255); ++i)
4030 right += spans[i].len;
4031 int length = right - x;
4032
4033 while (length) {
4034 int l = qMin(a: BufferSize, b: length);
4035 length -= l;
4036
4037 int process_length = l;
4038 int process_x = x;
4039
4040 const auto *src = handler.fetch(process_x, y, process_length, fetchDest);
4041 int offset = 0;
4042 while (l > 0) {
4043 if (x == spans[c].x) // new span?
4044 coverage = (spans[c].coverage * const_alpha) >> 8;
4045
4046 int right = spans[c].x + spans[c].len;
4047 int len = qMin(a: l, b: right - x);
4048
4049 handler.process(x, y, len, coverage, src, offset);
4050
4051 l -= len;
4052 x += len;
4053 offset += len;
4054
4055 if (x == right) // done with current span?
4056 ++c;
4057 }
4058 handler.store(process_x, y, process_length);
4059 }
4060 }
4061 };
4062 QT_THREAD_PARALLEL_FILLS(function);
4063}
4064
4065struct QBlendBase
4066{
4067 const QSpanData *data;
4068 const Operator &op;
4069};
4070
4071class BlendSrcGeneric : public QBlendBase
4072{
4073public:
4074 uint *dest = nullptr;
4075 alignas(16) uint buffer[BufferSize];
4076 alignas(16) uint src_buffer[BufferSize];
4077 BlendSrcGeneric(const QSpanData *d, const Operator &o)
4078 : QBlendBase{.data: d, .op: o}
4079 {
4080 }
4081
4082 const uint *fetch(int x, int y, int len, bool fetchDest)
4083 {
4084 if (fetchDest || op.destFetch == destFetchARGB32P)
4085 dest = op.destFetch(buffer, data->rasterBuffer, x, y, len);
4086 else
4087 dest = buffer;
4088 return op.srcFetch(src_buffer, &op, data, y, x, len);
4089 }
4090
4091 void process(int, int, int len, int coverage, const uint *src, int offset)
4092 {
4093 op.func(dest + offset, src + offset, len, coverage);
4094 }
4095
4096 void store(int x, int y, int len)
4097 {
4098 if (op.destStore)
4099 op.destStore(data->rasterBuffer, x, y, dest, len);
4100 }
4101};
4102
4103#if QT_CONFIG(raster_64bit)
4104class BlendSrcGenericRGB64 : public QBlendBase
4105{
4106public:
4107 QRgba64 *dest = nullptr;
4108 alignas(16) QRgba64 buffer[BufferSize];
4109 alignas(16) QRgba64 src_buffer[BufferSize];
4110 BlendSrcGenericRGB64(const QSpanData *d, const Operator &o)
4111 : QBlendBase{.data: d, .op: o}
4112 {
4113 }
4114
4115 bool isSupported() const
4116 {
4117 return op.func64 && op.destFetch64;
4118 }
4119
4120 const QRgba64 *fetch(int x, int y, int len, bool fetchDest)
4121 {
4122 if (fetchDest || op.destFetch64 == destFetchRGB64)
4123 dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len);
4124 else
4125 dest = buffer;
4126 return op.srcFetch64(src_buffer, &op, data, y, x, len);
4127 }
4128
4129 void process(int, int, int len, int coverage, const QRgba64 *src, int offset)
4130 {
4131 op.func64(dest + offset, src + offset, len, coverage);
4132 }
4133
4134 void store(int x, int y, int len)
4135 {
4136 if (op.destStore64)
4137 op.destStore64(data->rasterBuffer, x, y, dest, len);
4138 }
4139};
4140#endif
4141
4142#if QT_CONFIG(raster_fp)
4143class BlendSrcGenericRGBFP : public QBlendBase
4144{
4145public:
4146 QRgbaFloat32 *dest = nullptr;
4147 alignas(16) QRgbaFloat32 buffer[BufferSize];
4148 alignas(16) QRgbaFloat32 src_buffer[BufferSize];
4149 BlendSrcGenericRGBFP(const QSpanData *d, const Operator &o)
4150 : QBlendBase{.data: d, .op: o}
4151 {
4152 }
4153
4154 bool isSupported() const
4155 {
4156 return op.funcFP && op.destFetchFP && op.srcFetchFP;
4157 }
4158
4159 const QRgbaFloat32 *fetch(int x, int y, int len, bool fetchDest)
4160 {
4161 if (fetchDest || op.destFetchFP == destFetchRGBFP)
4162 dest = op.destFetchFP(buffer, data->rasterBuffer, x, y, len);
4163 else
4164 dest = buffer;
4165 return op.srcFetchFP(src_buffer, &op, data, y, x, len);
4166 }
4167
4168 void process(int, int, int len, int coverage, const QRgbaFloat32 *src, int offset)
4169 {
4170 op.funcFP(dest + offset, src + offset, len, coverage);
4171 }
4172
4173 void store(int x, int y, int len)
4174 {
4175 if (op.destStoreFP)
4176 op.destStoreFP(data->rasterBuffer, x, y, dest, len);
4177 }
4178};
4179#endif
4180
4181static void blend_src_generic(int count, const QT_FT_Span *spans, void *userData)
4182{
4183 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4184 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
4185 handleSpans<BlendSrcGeneric>(count, spans, data, op);
4186}
4187
4188#if QT_CONFIG(raster_64bit)
4189static void blend_src_generic_rgb64(int count, const QT_FT_Span *spans, void *userData)
4190{
4191 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4192 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
4193 if (op.func64 && op.destFetch64) {
4194 handleSpans<BlendSrcGenericRGB64>(count, spans, data, op);
4195 } else {
4196 qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4197 handleSpans<BlendSrcGeneric>(count, spans, data, op);
4198 }
4199}
4200#endif
4201
4202#if QT_CONFIG(raster_fp)
4203static void blend_src_generic_fp(int count, const QT_FT_Span *spans, void *userData)
4204{
4205 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4206 const Operator op = getOperator(data, spans, spanCount: count);
4207 if (op.funcFP && op.destFetchFP && op.srcFetchFP) {
4208 handleSpans<BlendSrcGenericRGBFP>(count, spans, data, op);
4209 } else {
4210 qCDebug(lcQtGuiDrawHelper, "blend_src_generic_fp: unsupported 4xFP blend attempted, falling back to 32-bit");
4211 handleSpans<BlendSrcGeneric>(count, spans, data, op);
4212 }
4213}
4214#endif
4215
4216static void blend_untransformed_generic(int count, const QT_FT_Span *spans, void *userData)
4217{
4218 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4219
4220 const Operator op = getOperator(data, spans, spanCount: count);
4221
4222 const int image_width = data->texture.width;
4223 const int image_height = data->texture.height;
4224 const int const_alpha = data->texture.const_alpha;
4225 const int xoff = -qRound(d: -data->dx);
4226 const int yoff = -qRound(d: -data->dy);
4227 const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256 && op.destFetch != destFetchARGB32P;
4228
4229 auto function = [=, &op] (int cStart, int cEnd)
4230 {
4231 alignas(16) uint buffer[BufferSize];
4232 alignas(16) uint src_buffer[BufferSize];
4233 for (int c = cStart; c < cEnd; ++c) {
4234 if (!spans[c].len)
4235 continue;
4236 int x = spans[c].x;
4237 int length = spans[c].len;
4238 int sx = xoff + x;
4239 int sy = yoff + spans[c].y;
4240 const bool fetchDest = !solidSource || spans[c].coverage < 255;
4241 if (sy >= 0 && sy < image_height && sx < image_width) {
4242 if (sx < 0) {
4243 x -= sx;
4244 length += sx;
4245 sx = 0;
4246 }
4247 if (sx + length > image_width)
4248 length = image_width - sx;
4249 if (length > 0) {
4250 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4251 while (length) {
4252 int l = qMin(a: BufferSize, b: length);
4253 const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
4254 uint *dest = fetchDest ? op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
4255 op.func(dest, src, l, coverage);
4256 if (op.destStore)
4257 op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
4258 x += l;
4259 sx += l;
4260 length -= l;
4261 }
4262 }
4263 }
4264 }
4265 };
4266 QT_THREAD_PARALLEL_FILLS(function);
4267}
4268
4269#if QT_CONFIG(raster_64bit)
4270static void blend_untransformed_generic_rgb64(int count, const QT_FT_Span *spans, void *userData)
4271{
4272 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4273
4274 const Operator op = getOperator(data, spans, spanCount: count);
4275 if (!op.func64) {
4276 qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4277 return blend_untransformed_generic(count, spans, userData);
4278 }
4279
4280 const int image_width = data->texture.width;
4281 const int image_height = data->texture.height;
4282 const int const_alpha = data->texture.const_alpha;
4283 const int xoff = -qRound(d: -data->dx);
4284 const int yoff = -qRound(d: -data->dy);
4285 const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256 && op.destFetch64 != destFetchRGB64;
4286
4287 auto function = [=, &op] (int cStart, int cEnd)
4288 {
4289 alignas(16) QRgba64 buffer[BufferSize];
4290 alignas(16) QRgba64 src_buffer[BufferSize];
4291 for (int c = cStart; c < cEnd; ++c) {
4292 if (!spans[c].len)
4293 continue;
4294 int x = spans[c].x;
4295 int length = spans[c].len;
4296 int sx = xoff + x;
4297 int sy = yoff + spans[c].y;
4298 const bool fetchDest = !solidSource || spans[c].coverage < 255;
4299 if (sy >= 0 && sy < image_height && sx < image_width) {
4300 if (sx < 0) {
4301 x -= sx;
4302 length += sx;
4303 sx = 0;
4304 }
4305 if (sx + length > image_width)
4306 length = image_width - sx;
4307 if (length > 0) {
4308 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4309 while (length) {
4310 int l = qMin(a: BufferSize, b: length);
4311 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4312 QRgba64 *dest = fetchDest ? op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
4313 op.func64(dest, src, l, coverage);
4314 if (op.destStore64)
4315 op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
4316 x += l;
4317 sx += l;
4318 length -= l;
4319 }
4320 }
4321 }
4322 }
4323 };
4324 QT_THREAD_PARALLEL_FILLS(function);
4325}
4326#endif
4327
4328#if QT_CONFIG(raster_fp)
4329static void blend_untransformed_generic_fp(int count, const QT_FT_Span *spans, void *userData)
4330{
4331 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4332
4333 const Operator op = getOperator(data, spans, spanCount: count);
4334 if (!op.funcFP) {
4335 qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgbaf16: unsupported 4xFP16 blend attempted, falling back to 32-bit");
4336 return blend_untransformed_generic(count, spans, userData);
4337 }
4338
4339 const int image_width = data->texture.width;
4340 const int image_height = data->texture.height;
4341 const int xoff = -qRound(d: -data->dx);
4342 const int yoff = -qRound(d: -data->dy);
4343 const bool solidSource = op.mode == QPainter::CompositionMode_Source && data->texture.const_alpha == 256 && op.destFetchFP != destFetchRGBFP;
4344
4345 auto function = [=, &op] (int cStart, int cEnd)
4346 {
4347 alignas(16) QRgbaFloat32 buffer[BufferSize];
4348 alignas(16) QRgbaFloat32 src_buffer[BufferSize];
4349 for (int c = cStart; c < cEnd; ++c) {
4350 if (!spans[c].len)
4351 continue;
4352 int x = spans[c].x;
4353 int length = spans[c].len;
4354 int sx = xoff + x;
4355 int sy = yoff + spans[c].y;
4356 const bool fetchDest = !solidSource || spans[c].coverage < 255;
4357 if (sy >= 0 && sy < image_height && sx < image_width) {
4358 if (sx < 0) {
4359 x -= sx;
4360 length += sx;
4361 sx = 0;
4362 }
4363 if (sx + length > image_width)
4364 length = image_width - sx;
4365 if (length > 0) {
4366 const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
4367 while (length) {
4368 int l = qMin(a: BufferSize, b: length);
4369 const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l);
4370 QRgbaFloat32 *dest = fetchDest ? op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
4371 op.funcFP(dest, src, l, coverage);
4372 if (op.destStoreFP)
4373 op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
4374 x += l;
4375 sx += l;
4376 length -= l;
4377 }
4378 }
4379 }
4380 }
4381 };
4382 QT_THREAD_PARALLEL_FILLS(function);
4383}
4384#endif
4385
4386static void blend_untransformed_argb(int count, const QT_FT_Span *spans, void *userData)
4387{
4388 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4389 if (data->texture.format != QImage::Format_ARGB32_Premultiplied
4390 && data->texture.format != QImage::Format_RGB32) {
4391 blend_untransformed_generic(count, spans, userData);
4392 return;
4393 }
4394
4395 const Operator op = getOperator(data, spans, spanCount: count);
4396
4397 const int image_width = data->texture.width;
4398 const int image_height = data->texture.height;
4399 const int const_alpha = data->texture.const_alpha;
4400 const int xoff = -qRound(d: -data->dx);
4401 const int yoff = -qRound(d: -data->dy);
4402
4403 auto function = [=, &op] (int cStart, int cEnd)
4404 {
4405 for (int c = cStart; c < cEnd; ++c) {
4406 if (!spans[c].len)
4407 continue;
4408 int x = spans[c].x;
4409 int length = spans[c].len;
4410 int sx = xoff + x;
4411 int sy = yoff + spans[c].y;
4412 if (sy >= 0 && sy < image_height && sx < image_width) {
4413 if (sx < 0) {
4414 x -= sx;
4415 length += sx;
4416 sx = 0;
4417 }
4418 if (sx + length > image_width)
4419 length = image_width - sx;
4420 if (length > 0) {
4421 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4422 const uint *src = (const uint *)data->texture.scanLine(y: sy) + sx;
4423 uint *dest = ((uint *)data->rasterBuffer->scanLine(y: spans[c].y)) + x;
4424 op.func(dest, src, length, coverage);
4425 }
4426 }
4427 }
4428 };
4429 QT_THREAD_PARALLEL_FILLS(function);
4430}
4431
4432static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a,
4433 quint16 y, quint8 b)
4434{
4435 quint16 t = ((((x & 0x07e0) * a) + ((y & 0x07e0) * b)) >> 5) & 0x07e0;
4436 t |= ((((x & 0xf81f) * a) + ((y & 0xf81f) * b)) >> 5) & 0xf81f;
4437
4438 return t;
4439}
4440
4441static inline quint32 interpolate_pixel_rgb16x2_255(quint32 x, quint8 a,
4442 quint32 y, quint8 b)
4443{
4444 uint t;
4445 t = ((((x & 0xf81f07e0) >> 5) * a) + (((y & 0xf81f07e0) >> 5) * b)) & 0xf81f07e0;
4446 t |= ((((x & 0x07e0f81f) * a) + ((y & 0x07e0f81f) * b)) >> 5) & 0x07e0f81f;
4447 return t;
4448}
4449
4450static inline void blend_sourceOver_rgb16_rgb16(quint16 *Q_DECL_RESTRICT dest,
4451 const quint16 *Q_DECL_RESTRICT src,
4452 int length,
4453 const quint8 alpha,
4454 const quint8 ialpha)
4455{
4456 const int dstAlign = ((quintptr)dest) & 0x3;
4457 if (dstAlign) {
4458 *dest = interpolate_pixel_rgb16_255(x: *src, a: alpha, y: *dest, b: ialpha);
4459 ++dest;
4460 ++src;
4461 --length;
4462 }
4463 const int srcAlign = ((quintptr)src) & 0x3;
4464 int length32 = length >> 1;
4465 if (length32 && srcAlign == 0) {
4466 while (length32--) {
4467 const quint32 *src32 = reinterpret_cast<const quint32*>(src);
4468 quint32 *dest32 = reinterpret_cast<quint32*>(dest);
4469 *dest32 = interpolate_pixel_rgb16x2_255(x: *src32, a: alpha,
4470 y: *dest32, b: ialpha);
4471 dest += 2;
4472 src += 2;
4473 }
4474 length &= 0x1;
4475 }
4476 while (length--) {
4477 *dest = interpolate_pixel_rgb16_255(x: *src, a: alpha, y: *dest, b: ialpha);
4478 ++dest;
4479 ++src;
4480 }
4481}
4482
4483static void blend_untransformed_rgb565(int count, const QT_FT_Span *spans, void *userData)
4484{
4485 QSpanData *data = reinterpret_cast<QSpanData*>(userData);
4486 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
4487
4488 if (data->texture.format != QImage::Format_RGB16
4489 || (mode != QPainter::CompositionMode_SourceOver
4490 && mode != QPainter::CompositionMode_Source))
4491 {
4492 blend_untransformed_generic(count, spans, userData);
4493 return;
4494 }
4495
4496 const int image_width = data->texture.width;
4497 const int image_height = data->texture.height;
4498 int xoff = -qRound(d: -data->dx);
4499 int yoff = -qRound(d: -data->dy);
4500
4501 auto function = [=](int cStart, int cEnd)
4502 {
4503 for (int c = cStart; c < cEnd; ++c) {
4504 if (!spans[c].len)
4505 continue;
4506 const quint8 coverage = (data->texture.const_alpha * spans[c].coverage) >> 8;
4507 if (coverage == 0)
4508 continue;
4509
4510 int x = spans[c].x;
4511 int length = spans[c].len;
4512 int sx = xoff + x;
4513 int sy = yoff + spans[c].y;
4514 if (sy >= 0 && sy < image_height && sx < image_width) {
4515 if (sx < 0) {
4516 x -= sx;
4517 length += sx;
4518 sx = 0;
4519 }
4520 if (sx + length > image_width)
4521 length = image_width - sx;
4522 if (length > 0) {
4523 quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(y: spans[c].y) + x;
4524 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
4525 if (coverage == 255) {
4526 memcpy(dest: dest, src: src, n: length * sizeof(quint16));
4527 } else {
4528 const quint8 alpha = (coverage + 1) >> 3;
4529 const quint8 ialpha = 0x20 - alpha;
4530 if (alpha > 0)
4531 blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha);
4532 }
4533 }
4534 }
4535 }
4536 };
4537 QT_THREAD_PARALLEL_FILLS(function);
4538}
4539
4540static void blend_tiled_generic(int count, const QT_FT_Span *spans, void *userData)
4541{
4542 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4543
4544 const Operator op = getOperator(data, spans, spanCount: count);
4545
4546 const int image_width = data->texture.width;
4547 const int image_height = data->texture.height;
4548 const int const_alpha = data->texture.const_alpha;
4549 int xoff = -qRound(d: -data->dx) % image_width;
4550 int yoff = -qRound(d: -data->dy) % image_height;
4551
4552 if (xoff < 0)
4553 xoff += image_width;
4554 if (yoff < 0)
4555 yoff += image_height;
4556
4557 auto function = [=, &op](int cStart, int cEnd)
4558 {
4559 alignas(16) uint buffer[BufferSize];
4560 alignas(16) uint src_buffer[BufferSize];
4561 for (int c = cStart; c < cEnd; ++c) {
4562 int x = spans[c].x;
4563 int length = spans[c].len;
4564 int sx = (xoff + spans[c].x) % image_width;
4565 int sy = (spans[c].y + yoff) % image_height;
4566 if (sx < 0)
4567 sx += image_width;
4568 if (sy < 0)
4569 sy += image_height;
4570
4571 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4572 while (length) {
4573 int l = qMin(a: image_width - sx, b: length);
4574 if (BufferSize < l)
4575 l = BufferSize;
4576 const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
4577 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l);
4578 op.func(dest, src, l, coverage);
4579 if (op.destStore)
4580 op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
4581 x += l;
4582 sx += l;
4583 length -= l;
4584 if (sx >= image_width)
4585 sx = 0;
4586 }
4587 }
4588 };
4589 QT_THREAD_PARALLEL_FILLS(function);
4590}
4591
4592#if QT_CONFIG(raster_64bit)
4593static void blend_tiled_generic_rgb64(int count, const QT_FT_Span *spans, void *userData)
4594{
4595 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4596
4597 const Operator op = getOperator(data, spans, spanCount: count);
4598 if (!op.func64) {
4599 qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4600 return blend_tiled_generic(count, spans, userData);
4601 }
4602
4603 const int image_width = data->texture.width;
4604 const int image_height = data->texture.height;
4605 int xoff = -qRound(d: -data->dx) % image_width;
4606 int yoff = -qRound(d: -data->dy) % image_height;
4607
4608 if (xoff < 0)
4609 xoff += image_width;
4610 if (yoff < 0)
4611 yoff += image_height;
4612
4613 bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32;
4614 bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64;
4615 if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 || isBpp64)) {
4616 alignas(16) QRgba64 src_buffer[BufferSize];
4617 // If destination isn't blended into the result, we can do the tiling directly on destination pixels.
4618 while (count--) {
4619 int x = spans->x;
4620 int y = spans->y;
4621 int length = spans->len;
4622 int sx = (xoff + spans->x) % image_width;
4623 int sy = (spans->y + yoff) % image_height;
4624 if (sx < 0)
4625 sx += image_width;
4626 if (sy < 0)
4627 sy += image_height;
4628
4629 int sl = qMin(a: image_width, b: length);
4630 if (sx > 0 && sl > 0) {
4631 int l = qMin(a: image_width - sx, b: sl);
4632 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4633 op.destStore64(data->rasterBuffer, x, y, src, l);
4634 x += l;
4635 sx += l;
4636 sl -= l;
4637 if (sx >= image_width)
4638 sx = 0;
4639 }
4640 if (sl > 0) {
4641 Q_ASSERT(sx == 0);
4642 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, sl);
4643 op.destStore64(data->rasterBuffer, x, y, src, sl);
4644 x += sl;
4645 sx += sl;
4646 sl -= sl;
4647 if (sx >= image_width)
4648 sx = 0;
4649 }
4650 if (isBpp32) {
4651 uint *dest = reinterpret_cast<uint *>(data->rasterBuffer->scanLine(y)) + x - image_width;
4652 for (int i = image_width; i < length; ++i)
4653 dest[i] = dest[i - image_width];
4654 } else {
4655 quint64 *dest = reinterpret_cast<quint64 *>(data->rasterBuffer->scanLine(y)) + x - image_width;
4656 for (int i = image_width; i < length; ++i)
4657 dest[i] = dest[i - image_width];
4658 }
4659 ++spans;
4660 }
4661 return;
4662 }
4663
4664 auto function = [=, &op](int cStart, int cEnd)
4665 {
4666 alignas(16) QRgba64 buffer[BufferSize];
4667 alignas(16) QRgba64 src_buffer[BufferSize];
4668 for (int c = cStart; c < cEnd; ++c) {
4669 int x = spans[c].x;
4670 int length = spans[c].len;
4671 int sx = (xoff + spans[c].x) % image_width;
4672 int sy = (spans[c].y + yoff) % image_height;
4673 if (sx < 0)
4674 sx += image_width;
4675 if (sy < 0)
4676 sy += image_height;
4677
4678 const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
4679 while (length) {
4680 int l = qMin(a: image_width - sx, b: length);
4681 if (BufferSize < l)
4682 l = BufferSize;
4683 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4684 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l);
4685 op.func64(dest, src, l, coverage);
4686 if (op.destStore64)
4687 op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
4688 x += l;
4689 sx += l;
4690 length -= l;
4691 if (sx >= image_width)
4692 sx = 0;
4693 }
4694 }
4695 };
4696 QT_THREAD_PARALLEL_FILLS(function);
4697}
4698#endif
4699
4700#if QT_CONFIG(raster_fp)
4701static void blend_tiled_generic_fp(int count, const QT_FT_Span *spans, void *userData)
4702{
4703 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4704
4705 const Operator op = getOperator(data, spans, spanCount: count);
4706 if (!op.funcFP) {
4707 qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_fp: unsupported 4xFP blend attempted, falling back to 32-bit");
4708 return blend_tiled_generic(count, spans, userData);
4709 }
4710
4711 const int image_width = data->texture.width;
4712 const int image_height = data->texture.height;
4713 int xoff = -qRound(d: -data->dx) % image_width;
4714 int yoff = -qRound(d: -data->dy) % image_height;
4715
4716 if (xoff < 0)
4717 xoff += image_width;
4718 if (yoff < 0)
4719 yoff += image_height;
4720
4721 // Consider tiling optimizing like the other versions.
4722
4723 auto function = [=, &op](int cStart, int cEnd)
4724 {
4725 alignas(16) QRgbaFloat32 buffer[BufferSize];
4726 alignas(16) QRgbaFloat32 src_buffer[BufferSize];
4727 for (int c = cStart; c < cEnd; ++c) {
4728 int x = spans[c].x;
4729 int length = spans[c].len;
4730 int sx = (xoff + spans[c].x) % image_width;
4731 int sy = (spans[c].y + yoff) % image_height;
4732 if (sx < 0)
4733 sx += image_width;
4734 if (sy < 0)
4735 sy += image_height;
4736
4737 const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
4738 while (length) {
4739 int l = qMin(a: image_width - sx, b: length);
4740 if (BufferSize < l)
4741 l = BufferSize;
4742 const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l);
4743 QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l);
4744 op.funcFP(dest, src, l, coverage);
4745 if (op.destStoreFP)
4746 op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
4747 x += l;
4748 sx += l;
4749 length -= l;
4750 if (sx >= image_width)
4751 sx = 0;
4752 }
4753 }
4754 };
4755 QT_THREAD_PARALLEL_FILLS(function);
4756}
4757#endif
4758
4759static void blend_tiled_argb(int count, const QT_FT_Span *spans, void *userData)
4760{
4761 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4762 if (data->texture.format != QImage::Format_ARGB32_Premultiplied
4763 && data->texture.format != QImage::Format_RGB32) {
4764 blend_tiled_generic(count, spans, userData);
4765 return;
4766 }
4767
4768 const Operator op = getOperator(data, spans, spanCount: count);
4769
4770 const int image_width = data->texture.width;
4771 const int image_height = data->texture.height;
4772 int xoff = -qRound(d: -data->dx) % image_width;
4773 int yoff = -qRound(d: -data->dy) % image_height;
4774
4775 if (xoff < 0)
4776 xoff += image_width;
4777 if (yoff < 0)
4778 yoff += image_height;
4779 const auto func = op.func;
4780 const int const_alpha = data->texture.const_alpha;
4781
4782 auto function = [=] (int cStart, int cEnd) {
4783 for (int c = cStart; c < cEnd; ++c) {
4784 int x = spans[c].x;
4785 int length = spans[c].len;
4786 int sx = (xoff + spans[c].x) % image_width;
4787 int sy = (spans[c].y + yoff) % image_height;
4788 if (sx < 0)
4789 sx += image_width;
4790 if (sy < 0)
4791 sy += image_height;
4792
4793 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4794 while (length) {
4795 int l = qMin(a: image_width - sx, b: length);
4796 if (BufferSize < l)
4797 l = BufferSize;
4798 const uint *src = (const uint *)data->texture.scanLine(y: sy) + sx;
4799 uint *dest = ((uint *)data->rasterBuffer->scanLine(y: spans[c].y)) + x;
4800 func(dest, src, l, coverage);
4801 x += l;
4802 sx += l;
4803 length -= l;
4804 if (sx >= image_width)
4805 sx = 0;
4806 }
4807 }
4808 };
4809 QT_THREAD_PARALLEL_FILLS(function);
4810}
4811
4812static void blend_tiled_rgb565(int count, const QT_FT_Span *spans, void *userData)
4813{
4814 QSpanData *data = reinterpret_cast<QSpanData*>(userData);
4815 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
4816
4817 if (data->texture.format != QImage::Format_RGB16
4818 || (mode != QPainter::CompositionMode_SourceOver
4819 && mode != QPainter::CompositionMode_Source))
4820 {
4821 blend_tiled_generic(count, spans, userData);
4822 return;
4823 }
4824
4825 const int image_width = data->texture.width;
4826 const int image_height = data->texture.height;
4827 int xoff = -qRound(d: -data->dx) % image_width;
4828 int yoff = -qRound(d: -data->dy) % image_height;
4829
4830 if (xoff < 0)
4831 xoff += image_width;
4832 if (yoff < 0)
4833 yoff += image_height;
4834
4835 const int const_alpha = data->texture.const_alpha;
4836 auto function = [=] (int cStart, int cEnd) {
4837 for (int c = cStart; c < cEnd; ++c) {
4838 const quint8 coverage = (const_alpha * spans[c].coverage) >> 8;
4839 if (coverage == 0)
4840 continue;
4841
4842 int x = spans[c].x;
4843 int length = spans[c].len;
4844 int sx = (xoff + spans[c].x) % image_width;
4845 int sy = (spans[c].y + yoff) % image_height;
4846 if (sx < 0)
4847 sx += image_width;
4848 if (sy < 0)
4849 sy += image_height;
4850
4851 if (coverage == 255) {
4852 // Copy the first texture block
4853 length = qMin(a: image_width,b: length);
4854 int tx = x;
4855 while (length) {
4856 int l = qMin(a: image_width - sx, b: length);
4857 if (BufferSize < l)
4858 l = BufferSize;
4859 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(y: spans[c].y)) + tx;
4860 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
4861 memcpy(dest: dest, src: src, n: l * sizeof(quint16));
4862 length -= l;
4863 tx += l;
4864 sx += l;
4865 if (sx >= image_width)
4866 sx = 0;
4867 }
4868
4869 // Now use the rasterBuffer as the source of the texture,
4870 // We can now progressively copy larger blocks
4871 // - Less cpu time in code figuring out what to copy
4872 // We are dealing with one block of data
4873 // - More likely to fit in the cache
4874 // - can use memcpy
4875 int copy_image_width = qMin(a: image_width, b: int(spans[c].len));
4876 length = spans[c].len - copy_image_width;
4877 quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(y: spans[c].y)) + x;
4878 quint16 *dest = src + copy_image_width;
4879 while (copy_image_width < length) {
4880 memcpy(dest: dest, src: src, n: copy_image_width * sizeof(quint16));
4881 dest += copy_image_width;
4882 length -= copy_image_width;
4883 copy_image_width *= 2;
4884 }
4885 if (length > 0)
4886 memcpy(dest: dest, src: src, n: length * sizeof(quint16));
4887 } else {
4888 const quint8 alpha = (coverage + 1) >> 3;
4889 const quint8 ialpha = 0x20 - alpha;
4890 if (alpha > 0) {
4891 while (length) {
4892 int l = qMin(a: image_width - sx, b: length);
4893 if (BufferSize < l)
4894 l = BufferSize;
4895 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(y: spans[c].y)) + x;
4896 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
4897 blend_sourceOver_rgb16_rgb16(dest, src, length: l, alpha, ialpha);
4898 x += l;
4899 sx += l;
4900 length -= l;
4901 if (sx >= image_width)
4902 sx = 0;
4903 }
4904 }
4905 }
4906 }
4907 };
4908 QT_THREAD_PARALLEL_FILLS(function);
4909}
4910
4911/* Image formats here are target formats */
4912static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = {
4913 blend_untransformed_argb, // Untransformed
4914 blend_tiled_argb, // Tiled
4915 blend_src_generic, // Transformed
4916 blend_src_generic, // TransformedTiled
4917 blend_src_generic, // TransformedBilinear
4918 blend_src_generic // TransformedBilinearTiled
4919};
4920
4921static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = {
4922 blend_untransformed_rgb565, // Untransformed
4923 blend_tiled_rgb565, // Tiled
4924 blend_src_generic, // Transformed
4925 blend_src_generic, // TransformedTiled
4926 blend_src_generic, // TransformedBilinear
4927 blend_src_generic // TransformedBilinearTiled
4928};
4929
4930static const ProcessSpans processTextureSpansGeneric[NBlendTypes] = {
4931 blend_untransformed_generic, // Untransformed
4932 blend_tiled_generic, // Tiled
4933 blend_src_generic, // Transformed
4934 blend_src_generic, // TransformedTiled
4935 blend_src_generic, // TransformedBilinear
4936 blend_src_generic // TransformedBilinearTiled
4937};
4938
4939#if QT_CONFIG(raster_64bit)
4940static const ProcessSpans processTextureSpansGeneric64[NBlendTypes] = {
4941 blend_untransformed_generic_rgb64, // Untransformed
4942 blend_tiled_generic_rgb64, // Tiled
4943 blend_src_generic_rgb64, // Transformed
4944 blend_src_generic_rgb64, // TransformedTiled
4945 blend_src_generic_rgb64, // TransformedBilinear
4946 blend_src_generic_rgb64 // TransformedBilinearTiled
4947};
4948#endif
4949
4950#if QT_CONFIG(raster_fp)
4951static const ProcessSpans processTextureSpansGenericFP[NBlendTypes] = {
4952 blend_untransformed_generic_fp, // Untransformed
4953 blend_tiled_generic_fp, // Tiled
4954 blend_src_generic_fp, // Transformed
4955 blend_src_generic_fp, // TransformedTiled
4956 blend_src_generic_fp, // TransformedBilinear
4957 blend_src_generic_fp // TransformedBilinearTiled
4958};
4959#endif
4960void qBlendTexture(int count, const QT_FT_Span *spans, void *userData)
4961{
4962 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4963 TextureBlendType blendType = getBlendType(data);
4964 ProcessSpans proc;
4965 switch (data->rasterBuffer->format) {
4966 case QImage::Format_Invalid:
4967 Q_UNREACHABLE_RETURN();
4968 case QImage::Format_ARGB32_Premultiplied:
4969 proc = processTextureSpansARGB32PM[blendType];
4970 break;
4971 case QImage::Format_RGB16:
4972 proc = processTextureSpansRGB16[blendType];
4973 break;
4974#if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
4975 case QImage::Format_ARGB32:
4976 case QImage::Format_RGBA8888:
4977#endif
4978 case QImage::Format_BGR30:
4979 case QImage::Format_A2BGR30_Premultiplied:
4980 case QImage::Format_RGB30:
4981 case QImage::Format_A2RGB30_Premultiplied:
4982 case QImage::Format_RGBX64:
4983 case QImage::Format_RGBA64:
4984 case QImage::Format_RGBA64_Premultiplied:
4985 case QImage::Format_Grayscale16:
4986#if !QT_CONFIG(raster_fp)
4987 case QImage::Format_RGBX16FPx4:
4988 case QImage::Format_RGBA16FPx4:
4989 case QImage::Format_RGBA16FPx4_Premultiplied:
4990 case QImage::Format_RGBX32FPx4:
4991 case QImage::Format_RGBA32FPx4:
4992 case QImage::Format_RGBA32FPx4_Premultiplied:
4993#endif
4994#if QT_CONFIG(raster_64bit)
4995 proc = processTextureSpansGeneric64[blendType];
4996 break;
4997#endif // QT_CONFIG(raster_64bit)
4998#if QT_CONFIG(raster_fp)
4999 case QImage::Format_RGBX16FPx4:
5000 case QImage::Format_RGBA16FPx4:
5001 case QImage::Format_RGBA16FPx4_Premultiplied:
5002 case QImage::Format_RGBX32FPx4:
5003 case QImage::Format_RGBA32FPx4:
5004 case QImage::Format_RGBA32FPx4_Premultiplied:
5005 proc = processTextureSpansGenericFP[blendType];
5006 break;
5007#endif
5008 default:
5009 proc = processTextureSpansGeneric[blendType];
5010 break;
5011 }
5012 proc(count, spans, userData);
5013}
5014
5015static inline bool calculate_fixed_gradient_factors(int count, const QT_FT_Span *spans,
5016 const QSpanData *data,
5017 const LinearGradientValues &linear,
5018 int *pyinc, int *poff)
5019{
5020 /*
5021 The logic for vertical gradient calculations is a mathematically
5022 reduced copy of that in fetchLinearGradient() - which is basically:
5023
5024 qreal ry = data->m22 * (y + 0.5) + data->dy;
5025 qreal t = linear.dy*ry + linear.off;
5026 t *= (GRADIENT_STOPTABLE_SIZE - 1);
5027 quint32 color =
5028 qt_gradient_pixel_fixed(&data->gradient,
5029 int(t * FIXPT_SIZE));
5030
5031 This has then been converted to fixed point to improve performance.
5032 */
5033 const int gss = GRADIENT_STOPTABLE_SIZE - 1;
5034 qreal ryinc = linear.dy * data->m22 * gss * FIXPT_SIZE;
5035 qreal roff = (linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss * FIXPT_SIZE;
5036 const int limit = std::numeric_limits<int>::max() - FIXPT_SIZE;
5037 if (count && (std::fabs(x: ryinc) < limit) && (std::fabs(x: roff) < limit)
5038 && (std::fabs(x: ryinc * spans->y + roff) < limit)
5039 && (std::fabs(x: ryinc * (spans + count - 1)->y + roff) < limit)) {
5040 *pyinc = int(ryinc);
5041 *poff = int(roff);
5042 return true;
5043 }
5044 return false;
5045}
5046
5047static bool blend_vertical_gradient_argb(int count, const QT_FT_Span *spans, void *userData)
5048{
5049 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5050
5051 LinearGradientValues linear;
5052 getLinearGradientValues(v: &linear, data);
5053
5054 CompositionFunctionSolid funcSolid =
5055 functionForModeSolid[data->rasterBuffer->compositionMode];
5056
5057 int yinc(0), off(0);
5058 if (!calculate_fixed_gradient_factors(count, spans, data, linear, pyinc: &yinc, poff: &off))
5059 return false;
5060
5061 while (count--) {
5062 int y = spans->y;
5063 int x = spans->x;
5064
5065 quint32 *dst = (quint32 *)(data->rasterBuffer->scanLine(y)) + x;
5066 quint32 color =
5067 qt_gradient_pixel_fixed(data: &data->gradient, fixed_pos: yinc * y + off);
5068
5069 funcSolid(dst, spans->len, color, spans->coverage);
5070 ++spans;
5071 }
5072 return true;
5073}
5074
5075template<ProcessSpans blend_color>
5076static bool blend_vertical_gradient(int count, const QT_FT_Span *spans, void *userData)
5077{
5078 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5079
5080 LinearGradientValues linear;
5081 getLinearGradientValues(v: &linear, data);
5082
5083 int yinc(0), off(0);
5084 if (!calculate_fixed_gradient_factors(count, spans, data, linear, pyinc: &yinc, poff: &off))
5085 return false;
5086
5087 while (count--) {
5088 int y = spans->y;
5089
5090#if QT_CONFIG(raster_64bit)
5091 data->solidColor = qt_gradient_pixel64_fixed(data: &data->gradient, fixed_pos: yinc * y + off);
5092#else
5093 data->solidColor = qt_gradient_pixel_fixed(&data->gradient, yinc * y + off);
5094#endif
5095 blend_color(1, spans, userData);
5096 ++spans;
5097 }
5098 return true;
5099}
5100
5101void qBlendGradient(int count, const QT_FT_Span *spans, void *userData)
5102{
5103 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5104 bool isVerticalGradient =
5105 data->txop <= QTransform::TxScale &&
5106 data->type == QSpanData::LinearGradient &&
5107 data->gradient.linear.end.x == data->gradient.linear.origin.x;
5108 switch (data->rasterBuffer->format) {
5109 case QImage::Format_Invalid:
5110 break;
5111 case QImage::Format_RGB32:
5112 case QImage::Format_ARGB32_Premultiplied:
5113 if (isVerticalGradient && blend_vertical_gradient_argb(count, spans, userData))
5114 return;
5115 return blend_src_generic(count, spans, userData);
5116#if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
5117 case QImage::Format_ARGB32:
5118 case QImage::Format_RGBA8888:
5119#endif
5120 case QImage::Format_BGR30:
5121 case QImage::Format_A2BGR30_Premultiplied:
5122 case QImage::Format_RGB30:
5123 case QImage::Format_A2RGB30_Premultiplied:
5124 case QImage::Format_RGBX64:
5125 case QImage::Format_RGBA64:
5126 case QImage::Format_RGBA64_Premultiplied:
5127#if !QT_CONFIG(raster_fp)
5128 case QImage::Format_RGBX16FPx4:
5129 case QImage::Format_RGBA16FPx4:
5130 case QImage::Format_RGBA16FPx4_Premultiplied:
5131 case QImage::Format_RGBX32FPx4:
5132 case QImage::Format_RGBA32FPx4:
5133 case QImage::Format_RGBA32FPx4_Premultiplied:
5134#endif
5135#if QT_CONFIG(raster_64bit)
5136 if (isVerticalGradient && blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData))
5137 return;
5138 return blend_src_generic_rgb64(count, spans, userData);
5139#endif // QT_CONFIG(raster_64bit)
5140#if QT_CONFIG(raster_fp)
5141 case QImage::Format_RGBX16FPx4:
5142 case QImage::Format_RGBA16FPx4:
5143 case QImage::Format_RGBA16FPx4_Premultiplied:
5144 case QImage::Format_RGBX32FPx4:
5145 case QImage::Format_RGBA32FPx4:
5146 case QImage::Format_RGBA32FPx4_Premultiplied:
5147 if (isVerticalGradient && blend_vertical_gradient<blend_color_generic_fp>(count, spans, userData))
5148 return;
5149 return blend_src_generic_fp(count, spans, userData);
5150#endif
5151 default:
5152 if (isVerticalGradient && blend_vertical_gradient<blend_color_generic>(count, spans, userData))
5153 return;
5154 return blend_src_generic(count, spans, userData);
5155 }
5156 Q_UNREACHABLE();
5157}
5158
5159template <class DST> static
5160inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer,
5161 int x, int y, DST color,
5162 const uchar *map,
5163 int mapWidth, int mapHeight, int mapStride)
5164{
5165 DST *dest = reinterpret_cast<DST *>(rasterBuffer->scanLine(y)) + x;
5166 const int destStride = rasterBuffer->stride<DST>();
5167
5168 if (mapWidth > 8) {
5169 while (--mapHeight >= 0) {
5170 int x0 = 0;
5171 int n = 0;
5172 for (int x = 0; x < mapWidth; x += 8) {
5173 uchar s = map[x >> 3];
5174 for (int i = 0; i < 8; ++i) {
5175 if (s & 0x80) {
5176 ++n;
5177 } else {
5178 if (n) {
5179 qt_memfill(dest + x0, color, n);
5180 x0 += n + 1;
5181 n = 0;
5182 } else {
5183 ++x0;
5184 }
5185 if (!s) {
5186 x0 += 8 - 1 - i;
5187 break;
5188 }
5189 }
5190 s <<= 1;
5191 }
5192 }
5193 if (n)
5194 qt_memfill(dest + x0, color, n);
5195 dest += destStride;
5196 map += mapStride;
5197 }
5198 } else {
5199 while (--mapHeight >= 0) {
5200 int x0 = 0;
5201 int n = 0;
5202 for (uchar s = *map; s; s <<= 1) {
5203 if (s & 0x80) {
5204 ++n;
5205 } else if (n) {
5206 qt_memfill(dest + x0, color, n);
5207 x0 += n + 1;
5208 n = 0;
5209 } else {
5210 ++x0;
5211 }
5212 }
5213 if (n)
5214 qt_memfill(dest + x0, color, n);
5215 dest += destStride;
5216 map += mapStride;
5217 }
5218 }
5219}
5220
5221inline static void qt_bitmapblit_argb32(QRasterBuffer *rasterBuffer,
5222 int x, int y, const QRgba64 &color,
5223 const uchar *map,
5224 int mapWidth, int mapHeight, int mapStride)
5225{
5226 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color: color.toArgb32(),
5227 map, mapWidth, mapHeight, mapStride);
5228}
5229
5230inline static void qt_bitmapblit_rgba8888(QRasterBuffer *rasterBuffer,
5231 int x, int y, const QRgba64 &color,
5232 const uchar *map,
5233 int mapWidth, int mapHeight, int mapStride)
5234{
5235 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color: ARGB2RGBA(x: color.toArgb32()),
5236 map, mapWidth, mapHeight, mapStride);
5237}
5238
5239template<QtPixelOrder PixelOrder>
5240inline static void qt_bitmapblit_rgb30(QRasterBuffer *rasterBuffer,
5241 int x, int y, const QRgba64 &color,
5242 const uchar *map,
5243 int mapWidth, int mapHeight, int mapStride)
5244{
5245 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, qConvertRgb64ToRgb30<PixelOrder>(color),
5246 map, mapWidth, mapHeight, mapStride);
5247}
5248
5249inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer,
5250 int x, int y, const QRgba64 &color,
5251 const uchar *map,
5252 int mapWidth, int mapHeight, int mapStride)
5253{
5254 qt_bitmapblit_template<quint16>(rasterBuffer, x, y, color: color.toRgb16(),
5255 map, mapWidth, mapHeight, mapStride);
5256}
5257
5258static inline void grayBlendPixel(quint32 *dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5259{
5260 // Do a gammacorrected gray alphablend...
5261 const QRgba64 dstLinear = colorProfile ? colorProfile->toLinear64(rgb32: *dst) : QRgba64::fromArgb32(rgb: *dst);
5262
5263 QRgba64 blend = interpolate255(x: srcLinear, alpha1: coverage, y: dstLinear, alpha2: 255 - coverage);
5264
5265 *dst = colorProfile ? colorProfile->fromLinear64(rgb64: blend) : toArgb32(rgba64: blend);
5266}
5267
5268static inline void alphamapblend_argb32(quint32 *dst, int coverage, QRgba64 srcLinear, quint32 src, const QColorTrcLut *colorProfile)
5269{
5270 if (coverage == 0) {
5271 // nothing
5272 } else if (coverage == 255 || !colorProfile) {
5273 blend_pixel(dst&: *dst, src, const_alpha: coverage);
5274 } else if (*dst < 0xff000000) {
5275 // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
5276 blend_pixel(dst&: *dst, src, const_alpha: coverage);
5277 } else if (src >= 0xff000000) {
5278 grayBlendPixel(dst, coverage, srcLinear, colorProfile);
5279 } else {
5280 // First do naive blend with text-color
5281 QRgb s = *dst;
5282 blend_pixel(dst&: s, src);
5283 // Then gamma-corrected blend with glyph shape
5284 QRgba64 s64 = colorProfile ? colorProfile->toLinear64(rgb32: s) : QRgba64::fromArgb32(rgb: s);
5285 grayBlendPixel(dst, coverage, srcLinear: s64, colorProfile);
5286 }
5287}
5288
5289#if QT_CONFIG(raster_64bit)
5290
5291static inline void grayBlendPixel(QRgba64 &dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5292{
5293 // Do a gammacorrected gray alphablend...
5294 QRgba64 dstColor = dst;
5295 if (colorProfile) {
5296 if (dstColor.isOpaque())
5297 dstColor = colorProfile->toLinear(rgb64: dstColor);
5298 else if (!dstColor.isTransparent())
5299 dstColor = colorProfile->toLinear(rgb64: dstColor.unpremultiplied()).premultiplied();
5300 }
5301
5302 blend_pixel(dst&: dstColor, src: srcLinear, const_alpha: coverage);
5303
5304 if (colorProfile) {
5305 if (dstColor.isOpaque())
5306 dstColor = colorProfile->fromLinear(rgb64: dstColor);
5307 else if (!dstColor.isTransparent())
5308 dstColor = colorProfile->fromLinear(rgb64: dstColor.unpremultiplied()).premultiplied();
5309 }
5310 dst = dstColor;
5311}
5312
5313static inline void alphamapblend_generic(int coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
5314{
5315 if (coverage == 0) {
5316 // nothing
5317 } else if (coverage == 255) {
5318 blend_pixel(dst&: dest[x], src);
5319 } else if (src.isOpaque()) {
5320 grayBlendPixel(dst&: dest[x], coverage, srcLinear, colorProfile);
5321 } else {
5322 // First do naive blend with text-color
5323 QRgba64 s = dest[x];
5324 blend_pixel(dst&: s, src);
5325 // Then gamma-corrected blend with glyph shape
5326 if (colorProfile)
5327 s = colorProfile->toLinear(rgb64: s);
5328 grayBlendPixel(dst&: dest[x], coverage, srcLinear: s, colorProfile);
5329 }
5330}
5331
5332static void qt_alphamapblit_generic_oneline(const uchar *map, int len,
5333 const QRgba64 srcColor, QRgba64 *dest,
5334 const QRgba64 color,
5335 const QColorTrcLut *colorProfile)
5336{
5337 for (int j = 0; j < len; ++j)
5338 alphamapblend_generic(coverage: map[j], dest, x: j, srcLinear: srcColor, src: color, colorProfile);
5339}
5340
5341static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5342 int x, int y, const QRgba64 &color,
5343 const uchar *map,
5344 int mapWidth, int mapHeight, int mapStride,
5345 const QClipData *clip, bool useGammaCorrection)
5346{
5347 if (color.isTransparent())
5348 return;
5349
5350 const QColorTrcLut *colorProfile = nullptr;
5351
5352 if (useGammaCorrection)
5353 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5354
5355 QRgba64 srcColor = color;
5356 if (colorProfile && color.isOpaque())
5357 srcColor = colorProfile->toLinear(rgb64: srcColor);
5358
5359 alignas(8) QRgba64 buffer[BufferSize];
5360 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5361 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5362
5363 if (!clip) {
5364 for (int ly = 0; ly < mapHeight; ++ly) {
5365 int i = x;
5366 int length = mapWidth;
5367 while (length > 0) {
5368 int l = qMin(a: BufferSize, b: length);
5369
5370 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5371 qt_alphamapblit_generic_oneline(map: map + i - x, len: l,
5372 srcColor, dest, color,
5373 colorProfile);
5374 if (destStore64)
5375 destStore64(rasterBuffer, i, y + ly, dest, l);
5376 length -= l;
5377 i += l;
5378 }
5379 map += mapStride;
5380 }
5381 } else {
5382 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5383
5384 int top = qMax(a: y, b: 0);
5385 map += (top - y) * mapStride;
5386
5387 const_cast<QClipData *>(clip)->initialize();
5388 for (int yp = top; yp<bottom; ++yp) {
5389 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5390
5391 for (int i=0; i<line.count; ++i) {
5392 const QT_FT_Span &clip = line.spans[i];
5393
5394 int start = qMax<int>(a: x, b: clip.x);
5395 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5396 if (end <= start)
5397 continue;
5398 Q_ASSERT(end - start <= BufferSize);
5399 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
5400 qt_alphamapblit_generic_oneline(map: map + start - x, len: end - start,
5401 srcColor, dest, color,
5402 colorProfile);
5403 if (destStore64)
5404 destStore64(rasterBuffer, start, clip.y, dest, end - start);
5405 } // for (i -> line.count)
5406 map += mapStride;
5407 } // for (yp -> bottom)
5408 }
5409}
5410#else
5411static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5412 int x, int y, const QRgba64 &color,
5413 const uchar *map,
5414 int mapWidth, int mapHeight, int mapStride,
5415 const QClipData *clip, bool useGammaCorrection)
5416{
5417 if (color.isTransparent())
5418 return;
5419
5420 const quint32 c = color.toArgb32();
5421
5422 const QColorTrcLut *colorProfile = nullptr;
5423
5424 if (useGammaCorrection)
5425 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5426
5427 QRgba64 srcColor = color;
5428 if (colorProfile && color.isOpaque())
5429 srcColor = colorProfile->toLinear(srcColor);
5430
5431 quint32 buffer[BufferSize];
5432 const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
5433 const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
5434
5435 if (!clip) {
5436 for (int ly = 0; ly < mapHeight; ++ly) {
5437 int i = x;
5438 int length = mapWidth;
5439 while (length > 0) {
5440 int l = qMin(BufferSize, length);
5441 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
5442 for (int j=0; j < l; ++j) {
5443 const int coverage = map[j + (i - x)];
5444 alphamapblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
5445 }
5446 if (destStore)
5447 destStore(rasterBuffer, i, y + ly, dest, l);
5448 length -= l;
5449 i += l;
5450 }
5451 map += mapStride;
5452 }
5453 } else {
5454 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5455
5456 int top = qMax(y, 0);
5457 map += (top - y) * mapStride;
5458
5459 const_cast<QClipData *>(clip)->initialize();
5460 for (int yp = top; yp<bottom; ++yp) {
5461 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5462
5463 for (int i=0; i<line.count; ++i) {
5464 const QT_FT_Span &clip = line.spans[i];
5465
5466 int start = qMax<int>(x, clip.x);
5467 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5468 if (end <= start)
5469 continue;
5470 Q_ASSERT(end - start <= BufferSize);
5471 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
5472
5473 for (int xp=start; xp<end; ++xp) {
5474 const int coverage = map[xp - x];
5475 alphamapblend_argb32(dest + xp - x, coverage, srcColor, color, colorProfile);
5476 }
5477 if (destStore)
5478 destStore(rasterBuffer, start, clip.y, dest, end - start);
5479 } // for (i -> line.count)
5480 map += mapStride;
5481 } // for (yp -> bottom)
5482 }
5483}
5484#endif
5485
5486static inline void alphamapblend_quint16(int coverage, quint16 *dest, int x, const quint16 srcColor)
5487{
5488 if (coverage == 0) {
5489 // nothing
5490 } else if (coverage == 255) {
5491 dest[x] = srcColor;
5492 } else {
5493 dest[x] = BYTE_MUL_RGB16(x: srcColor, a: coverage)
5494 + BYTE_MUL_RGB16(x: dest[x], a: 255 - coverage);
5495 }
5496}
5497
5498void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
5499 int x, int y, const QRgba64 &color,
5500 const uchar *map,
5501 int mapWidth, int mapHeight, int mapStride,
5502 const QClipData *clip, bool useGammaCorrection)
5503{
5504 if (useGammaCorrection || !color.isOpaque()) {
5505 qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
5506 return;
5507 }
5508
5509 const quint16 c = color.toRgb16();
5510
5511 if (!clip) {
5512 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
5513 const int destStride = rasterBuffer->stride<quint16>();
5514 while (--mapHeight >= 0) {
5515 for (int i = 0; i < mapWidth; ++i)
5516 alphamapblend_quint16(coverage: map[i], dest, x: i, srcColor: c);
5517 dest += destStride;
5518 map += mapStride;
5519 }
5520 } else {
5521 int top = qMax(a: y, b: 0);
5522 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5523 map += (top - y) * mapStride;
5524
5525 const_cast<QClipData *>(clip)->initialize();
5526 for (int yp = top; yp<bottom; ++yp) {
5527 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5528
5529 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y: yp));
5530
5531 for (int i=0; i<line.count; ++i) {
5532 const QT_FT_Span &clip = line.spans[i];
5533
5534 int start = qMax<int>(a: x, b: clip.x);
5535 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5536
5537 for (int xp=start; xp<end; ++xp)
5538 alphamapblend_quint16(coverage: map[xp - x], dest, x: xp, srcColor: c);
5539 } // for (i -> line.count)
5540 map += mapStride;
5541 } // for (yp -> bottom)
5542 }
5543}
5544
5545static void qt_alphamapblit_argb32_oneline(const uchar *map,
5546 int mapWidth, const QRgba64 &srcColor,
5547 quint32 *dest, const quint32 c,
5548 const QColorTrcLut *colorProfile)
5549{
5550 for (int i = 0; i < mapWidth; ++i)
5551 alphamapblend_argb32(dst: dest + i, coverage: map[i], srcLinear: srcColor, src: c, colorProfile);
5552}
5553
5554static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer,
5555 int x, int y, const QRgba64 &color,
5556 const uchar *map,
5557 int mapWidth, int mapHeight, int mapStride,
5558 const QClipData *clip, bool useGammaCorrection)
5559{
5560 const quint32 c = color.toArgb32();
5561 const int destStride = rasterBuffer->stride<quint32>();
5562
5563 if (color.isTransparent())
5564 return;
5565
5566 const QColorTrcLut *colorProfile = nullptr;
5567
5568 if (useGammaCorrection)
5569 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5570
5571 QRgba64 srcColor = color;
5572 if (colorProfile && color.isOpaque())
5573 srcColor = colorProfile->toLinear(rgb64: srcColor);
5574
5575 if (!clip) {
5576 quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
5577 while (--mapHeight >= 0) {
5578 qt_alphamapblit_argb32_oneline(map, mapWidth, srcColor, dest, c, colorProfile);
5579 dest += destStride;
5580 map += mapStride;
5581 }
5582 } else {
5583 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5584
5585 int top = qMax(a: y, b: 0);
5586 map += (top - y) * mapStride;
5587
5588 const_cast<QClipData *>(clip)->initialize();
5589 for (int yp = top; yp<bottom; ++yp) {
5590 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5591
5592 quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y: yp));
5593
5594 for (int i=0; i<line.count; ++i) {
5595 const QT_FT_Span &clip = line.spans[i];
5596 int start = qMax<int>(a: x, b: clip.x);
5597 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5598 qt_alphamapblit_argb32_oneline(map: map + start - x, mapWidth: end - start, srcColor, dest: dest + start, c, colorProfile);
5599 } // for (yp -> bottom)
5600 map += mapStride;
5601 }
5602 }
5603}
5604
5605#if QT_CONFIG(raster_64bit)
5606static void qt_alphamapblit_nonpremul_argb32(QRasterBuffer *rasterBuffer,
5607 int x, int y, const QRgba64 &color,
5608 const uchar *map,
5609 int mapWidth, int mapHeight, int mapStride,
5610 const QClipData *clip, bool useGammaCorrection)
5611{
5612 if (clip)
5613 return qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight,
5614 mapStride, clip, useGammaCorrection);
5615
5616 if (color.isTransparent())
5617 return;
5618
5619 const QColorTrcLut *colorProfile = nullptr;
5620
5621 if (useGammaCorrection)
5622 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5623
5624 const quint32 c = color.toArgb32();
5625 QRgba64 srcColor = color;
5626 if (colorProfile && color.isOpaque())
5627 srcColor = colorProfile->toLinear(rgb64: srcColor);
5628
5629 alignas(8) QRgba64 buffer[BufferSize];
5630 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5631 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5632
5633 for (int ly = 0; ly < mapHeight; ++ly) {
5634 bool dstFullyOpaque = true;
5635 int i = x;
5636 int length = mapWidth;
5637 while (length > 0) {
5638 int l = qMin(a: BufferSize, b: length);
5639 quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y: y + ly)) + i;
5640 for (int j = 0; j < l && dstFullyOpaque; ++j)
5641 dstFullyOpaque = (dest[j] & 0xff000000) == 0xff000000;
5642 if (dstFullyOpaque) {
5643 // Use RGB/ARGB32PM optimized version
5644 qt_alphamapblit_argb32_oneline(map: map + i - x, mapWidth: l, srcColor, dest, c, colorProfile);
5645 } else {
5646 // Use generic version
5647 QRgba64 *dest64 = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5648 qt_alphamapblit_generic_oneline(map: map + i - x, len: l,
5649 srcColor, dest: dest64, color,
5650 colorProfile);
5651 if (destStore64)
5652 destStore64(rasterBuffer, i, y + ly, dest64, l);
5653 }
5654 length -= l;
5655 i += l;
5656 }
5657 map += mapStride;
5658 }
5659}
5660#endif
5661
5662static inline int qRgbAvg(QRgb rgb)
5663{
5664 return (qRed(rgb) * 5 + qGreen(rgb) * 6 + qBlue(rgb) * 5) / 16;
5665}
5666
5667static inline void rgbBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
5668{
5669 // Do a gammacorrected RGB alphablend...
5670 const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(rgb32: *dst) : QRgba64::fromArgb32(rgb: *dst);
5671
5672 QRgba64 blend = rgbBlend(d: dlinear, s: slinear, rgbAlpha: coverage);
5673
5674 *dst = colorProfile ? colorProfile->fromLinear64(rgb64: blend) : toArgb32(rgba64: blend);
5675}
5676
5677static inline QRgb rgbBlend(QRgb d, QRgb s, uint rgbAlpha)
5678{
5679#if defined(__SSE2__)
5680 __m128i vd = _mm_cvtsi32_si128(a: d);
5681 __m128i vs = _mm_cvtsi32_si128(a: s);
5682 __m128i va = _mm_cvtsi32_si128(a: rgbAlpha);
5683 const __m128i vz = _mm_setzero_si128();
5684 vd = _mm_unpacklo_epi8(a: vd, b: vz);
5685 vs = _mm_unpacklo_epi8(a: vs, b: vz);
5686 va = _mm_unpacklo_epi8(a: va, b: vz);
5687 __m128i vb = _mm_xor_si128(a: _mm_set1_epi16(w: 255), b: va);
5688 vs = _mm_mullo_epi16(a: vs, b: va);
5689 vd = _mm_mullo_epi16(a: vd, b: vb);
5690 vd = _mm_add_epi16(a: vd, b: vs);
5691 vd = _mm_add_epi16(a: vd, b: _mm_srli_epi16(a: vd, count: 8));
5692 vd = _mm_add_epi16(a: vd, b: _mm_set1_epi16(w: 0x80));
5693 vd = _mm_srli_epi16(a: vd, count: 8);
5694 vd = _mm_packus_epi16(a: vd, b: vd);
5695 return _mm_cvtsi128_si32(a: vd);
5696#else
5697 const int dr = qRed(d);
5698 const int dg = qGreen(d);
5699 const int db = qBlue(d);
5700
5701 const int sr = qRed(s);
5702 const int sg = qGreen(s);
5703 const int sb = qBlue(s);
5704
5705 const int mr = qRed(rgbAlpha);
5706 const int mg = qGreen(rgbAlpha);
5707 const int mb = qBlue(rgbAlpha);
5708
5709 const int nr = qt_div_255(sr * mr + dr * (255 - mr));
5710 const int ng = qt_div_255(sg * mg + dg * (255 - mg));
5711 const int nb = qt_div_255(sb * mb + db * (255 - mb));
5712
5713 return 0xff000000 | (nr << 16) | (ng << 8) | nb;
5714#endif
5715}
5716
5717static inline void alphargbblend_argb32(quint32 *dst, uint coverage, const QRgba64 &srcLinear, quint32 src, const QColorTrcLut *colorProfile)
5718{
5719 if (coverage == 0xff000000) {
5720 // nothing
5721 } else if (coverage == 0xffffffff && qAlpha(rgb: src) == 255) {
5722 blend_pixel(dst&: *dst, src);
5723 } else if (*dst < 0xff000000) {
5724 // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
5725 blend_pixel(dst&: *dst, src, const_alpha: qRgbAvg(rgb: coverage));
5726 } else if (!colorProfile) {
5727 // First do naive blend with text-color
5728 QRgb s = *dst;
5729 blend_pixel(dst&: s, src);
5730 // Then a naive blend with glyph shape
5731 *dst = rgbBlend(d: *dst, s, rgbAlpha: coverage);
5732 } else if (srcLinear.isOpaque()) {
5733 rgbBlendPixel(dst, coverage, slinear: srcLinear, colorProfile);
5734 } else {
5735 // First do naive blend with text-color
5736 QRgb s = *dst;
5737 blend_pixel(dst&: s, src);
5738 // Then gamma-corrected blend with glyph shape
5739 QRgba64 s64 = colorProfile ? colorProfile->toLinear64(rgb32: s) : QRgba64::fromArgb32(rgb: s);
5740 rgbBlendPixel(dst, coverage, slinear: s64, colorProfile);
5741 }
5742}
5743
5744#if QT_CONFIG(raster_64bit)
5745static inline void rgbBlendPixel(QRgba64 &dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
5746{
5747 // Do a gammacorrected RGB alphablend...
5748 const QRgba64 dlinear = colorProfile ? colorProfile->toLinear(rgb64: dst) : dst;
5749
5750 QRgba64 blend = rgbBlend(d: dlinear, s: slinear, rgbAlpha: coverage);
5751
5752 dst = colorProfile ? colorProfile->fromLinear(rgb64: blend) : blend;
5753}
5754
5755static inline void alphargbblend_generic(uint coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
5756{
5757 if (coverage == 0xff000000) {
5758 // nothing
5759 } else if (coverage == 0xffffffff) {
5760 blend_pixel(dst&: dest[x], src);
5761 } else if (!dest[x].isOpaque()) {
5762 // Do a gray alphablend.
5763 alphamapblend_generic(coverage: qRgbAvg(rgb: coverage), dest, x, srcLinear, src, colorProfile);
5764 } else if (src.isOpaque()) {
5765 rgbBlendPixel(dst&: dest[x], coverage, slinear: srcLinear, colorProfile);
5766 } else {
5767 // First do naive blend with text-color
5768 QRgba64 s = dest[x];
5769 blend_pixel(dst&: s, src);
5770 // Then gamma-corrected blend with glyph shape
5771 if (colorProfile)
5772 s = colorProfile->toLinear(rgb64: s);
5773 rgbBlendPixel(dst&: dest[x], coverage, slinear: s, colorProfile);
5774 }
5775}
5776
5777static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
5778 int x, int y, const QRgba64 &color,
5779 const uint *src, int mapWidth, int mapHeight, int srcStride,
5780 const QClipData *clip, bool useGammaCorrection)
5781{
5782 if (color.isTransparent())
5783 return;
5784
5785 const QColorTrcLut *colorProfile = nullptr;
5786
5787 if (useGammaCorrection)
5788 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
5789
5790 QRgba64 srcColor = color;
5791 if (colorProfile && color.isOpaque())
5792 srcColor = colorProfile->toLinear(rgb64: srcColor);
5793
5794 alignas(8) QRgba64 buffer[BufferSize];
5795 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5796 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5797
5798 if (!clip) {
5799 for (int ly = 0; ly < mapHeight; ++ly) {
5800 int i = x;
5801 int length = mapWidth;
5802 while (length > 0) {
5803 int l = qMin(a: BufferSize, b: length);
5804 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5805 for (int j=0; j < l; ++j) {
5806 const uint coverage = src[j + (i - x)];
5807 alphargbblend_generic(coverage, dest, x: j, srcLinear: srcColor, src: color, colorProfile);
5808 }
5809 if (destStore64)
5810 destStore64(rasterBuffer, i, y + ly, dest, l);
5811 length -= l;
5812 i += l;
5813 }
5814 src += srcStride;
5815 }
5816 } else {
5817 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5818
5819 int top = qMax(a: y, b: 0);
5820 src += (top - y) * srcStride;
5821
5822 const_cast<QClipData *>(clip)->initialize();
5823 for (int yp = top; yp<bottom; ++yp) {
5824 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5825
5826 for (int i=0; i<line.count; ++i) {
5827 const QT_FT_Span &clip = line.spans[i];
5828
5829 int start = qMax<int>(a: x, b: clip.x);
5830 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5831 if (end <= start)
5832 continue;
5833 Q_ASSERT(end - start <= BufferSize);
5834 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
5835
5836 for (int xp=start; xp<end; ++xp) {
5837 const uint coverage = src[xp - x];
5838 alphargbblend_generic(coverage, dest, x: xp - start, srcLinear: srcColor, src: color, colorProfile);
5839 }
5840 if (destStore64)
5841 destStore64(rasterBuffer, start, clip.y, dest, end - start);
5842 } // for (i -> line.count)
5843 src += srcStride;
5844 } // for (yp -> bottom)
5845 }
5846}
5847#else
5848static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
5849 int x, int y, const QRgba64 &color,
5850 const uint *src, int mapWidth, int mapHeight, int srcStride,
5851 const QClipData *clip, bool useGammaCorrection)
5852{
5853 if (color.isTransparent())
5854 return;
5855
5856 const quint32 c = color.toArgb32();
5857
5858 const QColorTrcLut *colorProfile = nullptr;
5859
5860 if (useGammaCorrection)
5861 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
5862
5863 QRgba64 srcColor = color;
5864 if (colorProfile && color.isOpaque())
5865 srcColor = colorProfile->toLinear(srcColor);
5866
5867 quint32 buffer[BufferSize];
5868 const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
5869 const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
5870
5871 if (!clip) {
5872 for (int ly = 0; ly < mapHeight; ++ly) {
5873 int i = x;
5874 int length = mapWidth;
5875 while (length > 0) {
5876 int l = qMin(BufferSize, length);
5877 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
5878 for (int j=0; j < l; ++j) {
5879 const uint coverage = src[j + (i - x)];
5880 alphargbblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
5881 }
5882 if (destStore)
5883 destStore(rasterBuffer, i, y + ly, dest, l);
5884 length -= l;
5885 i += l;
5886 }
5887 src += srcStride;
5888 }
5889 } else {
5890 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5891
5892 int top = qMax(y, 0);
5893 src += (top - y) * srcStride;
5894
5895 const_cast<QClipData *>(clip)->initialize();
5896 for (int yp = top; yp<bottom; ++yp) {
5897 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5898
5899 for (int i=0; i<line.count; ++i) {
5900 const QT_FT_Span &clip = line.spans[i];
5901
5902 int start = qMax<int>(x, clip.x);
5903 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5904 if (end <= start)
5905 continue;
5906 Q_ASSERT(end - start <= BufferSize);
5907 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
5908
5909 for (int xp=start; xp<end; ++xp) {
5910 const uint coverage = src[xp - x];
5911 alphargbblend_argb32(dest + xp - start, coverage, srcColor, c, colorProfile);
5912 }
5913 if (destStore)
5914 destStore(rasterBuffer, start, clip.y, dest, end - start);
5915 } // for (i -> line.count)
5916 src += srcStride;
5917 } // for (yp -> bottom)
5918 }
5919}
5920#endif
5921
5922static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
5923 int x, int y, const QRgba64 &color,
5924 const uint *src, int mapWidth, int mapHeight, int srcStride,
5925 const QClipData *clip, bool useGammaCorrection)
5926{
5927 if (color.isTransparent())
5928 return;
5929
5930 const quint32 c = color.toArgb32();
5931
5932 const QColorTrcLut *colorProfile = nullptr;
5933
5934 if (useGammaCorrection)
5935 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
5936
5937 QRgba64 srcColor = color;
5938 if (colorProfile && color.isOpaque())
5939 srcColor = colorProfile->toLinear(rgb64: srcColor);
5940
5941 if (!clip) {
5942 quint32 *dst = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
5943 const int destStride = rasterBuffer->stride<quint32>();
5944 while (--mapHeight >= 0) {
5945 for (int i = 0; i < mapWidth; ++i) {
5946 const uint coverage = src[i];
5947 alphargbblend_argb32(dst: dst + i, coverage, srcLinear: srcColor, src: c, colorProfile);
5948 }
5949
5950 dst += destStride;
5951 src += srcStride;
5952 }
5953 } else {
5954 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5955
5956 int top = qMax(a: y, b: 0);
5957 src += (top - y) * srcStride;
5958
5959 const_cast<QClipData *>(clip)->initialize();
5960 for (int yp = top; yp<bottom; ++yp) {
5961 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5962
5963 quint32 *dst = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y: yp));
5964
5965 for (int i=0; i<line.count; ++i) {
5966 const QT_FT_Span &clip = line.spans[i];
5967
5968 int start = qMax<int>(a: x, b: clip.x);
5969 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5970
5971 for (int xp=start; xp<end; ++xp) {
5972 const uint coverage = src[xp - x];
5973 alphargbblend_argb32(dst: dst + xp, coverage, srcLinear: srcColor, src: c, colorProfile);
5974 }
5975 } // for (i -> line.count)
5976 src += srcStride;
5977 } // for (yp -> bottom)
5978
5979 }
5980}
5981
5982static void qt_rectfill_argb32(QRasterBuffer *rasterBuffer,
5983 int x, int y, int width, int height,
5984 const QRgba64 &color)
5985{
5986 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
5987 value: color.toArgb32(), x, y, width, height, stride: rasterBuffer->bytesPerLine());
5988}
5989
5990static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer,
5991 int x, int y, int width, int height,
5992 const QRgba64 &color)
5993{
5994 const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
5995 quint32 c32 = color.toArgb32();
5996 quint16 c16;
5997 layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c16), &c32, 0, 1, nullptr, nullptr);
5998 qt_rectfill<quint16>(dest: reinterpret_cast<quint16 *>(rasterBuffer->buffer()),
5999 value: c16, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6000}
6001
6002static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer,
6003 int x, int y, int width, int height,
6004 const QRgba64 &color)
6005{
6006 const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6007 quint32 c32 = color.toArgb32();
6008 quint24 c24;
6009 layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c24), &c32, 0, 1, nullptr, nullptr);
6010 qt_rectfill<quint24>(dest: reinterpret_cast<quint24 *>(rasterBuffer->buffer()),
6011 value: c24, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6012}
6013
6014static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer,
6015 int x, int y, int width, int height,
6016 const QRgba64 &color)
6017{
6018 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6019 value: color.unpremultiplied().toArgb32(), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6020}
6021
6022static void qt_rectfill_rgba(QRasterBuffer *rasterBuffer,
6023 int x, int y, int width, int height,
6024 const QRgba64 &color)
6025{
6026 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6027 value: ARGB2RGBA(x: color.toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6028}
6029
6030static void qt_rectfill_nonpremul_rgba(QRasterBuffer *rasterBuffer,
6031 int x, int y, int width, int height,
6032 const QRgba64 &color)
6033{
6034 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6035 value: ARGB2RGBA(x: color.unpremultiplied().toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6036}
6037
6038template<QtPixelOrder PixelOrder>
6039static void qt_rectfill_rgb30(QRasterBuffer *rasterBuffer,
6040 int x, int y, int width, int height,
6041 const QRgba64 &color)
6042{
6043 qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6044 qConvertRgb64ToRgb30<PixelOrder>(color), x, y, width, height, rasterBuffer->bytesPerLine());
6045}
6046
6047static void qt_rectfill_alpha(QRasterBuffer *rasterBuffer,
6048 int x, int y, int width, int height,
6049 const QRgba64 &color)
6050{
6051 qt_rectfill<quint8>(dest: reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6052 value: color.alpha() >> 8, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6053}
6054
6055static void qt_rectfill_gray(QRasterBuffer *rasterBuffer,
6056 int x, int y, int width, int height,
6057 const QRgba64 &color)
6058{
6059 qt_rectfill<quint8>(dest: reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6060 value: qGray(rgb: color.toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6061}
6062
6063static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer,
6064 int x, int y, int width, int height,
6065 const QRgba64 &color)
6066{
6067 const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
6068 quint64 c64;
6069 store(reinterpret_cast<uchar *>(&c64), &color, 0, 1, nullptr, nullptr);
6070 qt_rectfill<quint64>(dest: reinterpret_cast<quint64 *>(rasterBuffer->buffer()),
6071 value: c64, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6072}
6073
6074static void qt_rectfill_fp32x4(QRasterBuffer *rasterBuffer,
6075 int x, int y, int width, int height,
6076 const QRgba64 &color)
6077{
6078 const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
6079 QRgbaFloat32 c;
6080 store(reinterpret_cast<uchar *>(&c), &color, 0, 1, nullptr, nullptr);
6081 qt_rectfill<QRgbaFloat32>(dest: reinterpret_cast<QRgbaFloat32 *>(rasterBuffer->buffer()),
6082 value: c, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6083}
6084
6085// Map table for destination image format. Contains function pointers
6086// for blends of various types unto the destination
6087
6088DrawHelper qDrawHelper[] =
6089{
6090 // Format_Invalid,
6091 { .blendColor: nullptr, .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr },
6092 // Format_Mono,
6093 {
6094 .blendColor: blend_color_generic,
6095 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6096 },
6097 // Format_MonoLSB,
6098 {
6099 .blendColor: blend_color_generic,
6100 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6101 },
6102 // Format_Indexed8,
6103 {
6104 .blendColor: blend_color_generic,
6105 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6106 },
6107 // Format_RGB32,
6108 {
6109 .blendColor: blend_color_argb,
6110 .bitmapBlit: qt_bitmapblit_argb32,
6111 .alphamapBlit: qt_alphamapblit_argb32,
6112 .alphaRGBBlit: qt_alphargbblit_argb32,
6113 .fillRect: qt_rectfill_argb32
6114 },
6115 // Format_ARGB32,
6116 {
6117 .blendColor: blend_color_generic,
6118 .bitmapBlit: qt_bitmapblit_argb32,
6119#if QT_CONFIG(raster_64bit)
6120 .alphamapBlit: qt_alphamapblit_nonpremul_argb32,
6121#else
6122 qt_alphamapblit_generic,
6123#endif
6124 .alphaRGBBlit: qt_alphargbblit_generic,
6125 .fillRect: qt_rectfill_nonpremul_argb32
6126 },
6127 // Format_ARGB32_Premultiplied
6128 {
6129 .blendColor: blend_color_argb,
6130 .bitmapBlit: qt_bitmapblit_argb32,
6131 .alphamapBlit: qt_alphamapblit_argb32,
6132 .alphaRGBBlit: qt_alphargbblit_argb32,
6133 .fillRect: qt_rectfill_argb32
6134 },
6135 // Format_RGB16
6136 {
6137 .blendColor: blend_color_generic,
6138 .bitmapBlit: qt_bitmapblit_quint16,
6139 .alphamapBlit: qt_alphamapblit_quint16,
6140 .alphaRGBBlit: qt_alphargbblit_generic,
6141 .fillRect: qt_rectfill_quint16
6142 },
6143 // Format_ARGB8565_Premultiplied
6144 {
6145 .blendColor: blend_color_generic,
6146 .bitmapBlit: nullptr,
6147 .alphamapBlit: qt_alphamapblit_generic,
6148 .alphaRGBBlit: qt_alphargbblit_generic,
6149 .fillRect: qt_rectfill_quint24
6150 },
6151 // Format_RGB666
6152 {
6153 .blendColor: blend_color_generic,
6154 .bitmapBlit: nullptr,
6155 .alphamapBlit: qt_alphamapblit_generic,
6156 .alphaRGBBlit: qt_alphargbblit_generic,
6157 .fillRect: qt_rectfill_quint24
6158 },
6159 // Format_ARGB6666_Premultiplied
6160 {
6161 .blendColor: blend_color_generic,
6162 .bitmapBlit: nullptr,
6163 .alphamapBlit: qt_alphamapblit_generic,
6164 .alphaRGBBlit: qt_alphargbblit_generic,
6165 .fillRect: qt_rectfill_quint24
6166 },
6167 // Format_RGB555
6168 {
6169 .blendColor: blend_color_generic,
6170 .bitmapBlit: nullptr,
6171 .alphamapBlit: qt_alphamapblit_generic,
6172 .alphaRGBBlit: qt_alphargbblit_generic,
6173 .fillRect: qt_rectfill_quint16
6174 },
6175 // Format_ARGB8555_Premultiplied
6176 {
6177 .blendColor: blend_color_generic,
6178 .bitmapBlit: nullptr,
6179 .alphamapBlit: qt_alphamapblit_generic,
6180 .alphaRGBBlit: qt_alphargbblit_generic,
6181 .fillRect: qt_rectfill_quint24
6182 },
6183 // Format_RGB888
6184 {
6185 .blendColor: blend_color_generic,
6186 .bitmapBlit: nullptr,
6187 .alphamapBlit: qt_alphamapblit_generic,
6188 .alphaRGBBlit: qt_alphargbblit_generic,
6189 .fillRect: qt_rectfill_quint24
6190 },
6191 // Format_RGB444
6192 {
6193 .blendColor: blend_color_generic,
6194 .bitmapBlit: nullptr,
6195 .alphamapBlit: qt_alphamapblit_generic,
6196 .alphaRGBBlit: qt_alphargbblit_generic,
6197 .fillRect: qt_rectfill_quint16
6198 },
6199 // Format_ARGB4444_Premultiplied
6200 {
6201 .blendColor: blend_color_generic,
6202 .bitmapBlit: nullptr,
6203 .alphamapBlit: qt_alphamapblit_generic,
6204 .alphaRGBBlit: qt_alphargbblit_generic,
6205 .fillRect: qt_rectfill_quint16
6206 },
6207 // Format_RGBX8888
6208 {
6209 .blendColor: blend_color_generic,
6210 .bitmapBlit: qt_bitmapblit_rgba8888,
6211 .alphamapBlit: qt_alphamapblit_generic,
6212 .alphaRGBBlit: qt_alphargbblit_generic,
6213 .fillRect: qt_rectfill_rgba
6214 },
6215 // Format_RGBA8888
6216 {
6217 .blendColor: blend_color_generic,
6218 .bitmapBlit: qt_bitmapblit_rgba8888,
6219 .alphamapBlit: qt_alphamapblit_generic,
6220 .alphaRGBBlit: qt_alphargbblit_generic,
6221 .fillRect: qt_rectfill_nonpremul_rgba
6222 },
6223 // Format_RGB8888_Premultiplied
6224 {
6225 .blendColor: blend_color_generic,
6226 .bitmapBlit: qt_bitmapblit_rgba8888,
6227 .alphamapBlit: qt_alphamapblit_generic,
6228 .alphaRGBBlit: qt_alphargbblit_generic,
6229 .fillRect: qt_rectfill_rgba
6230 },
6231 // Format_BGR30
6232 {
6233 .blendColor: blend_color_generic_rgb64,
6234 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderBGR>,
6235 .alphamapBlit: qt_alphamapblit_generic,
6236 .alphaRGBBlit: qt_alphargbblit_generic,
6237 .fillRect: qt_rectfill_rgb30<PixelOrderBGR>
6238 },
6239 // Format_A2BGR30_Premultiplied
6240 {
6241 .blendColor: blend_color_generic_rgb64,
6242 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderBGR>,
6243 .alphamapBlit: qt_alphamapblit_generic,
6244 .alphaRGBBlit: qt_alphargbblit_generic,
6245 .fillRect: qt_rectfill_rgb30<PixelOrderBGR>
6246 },
6247 // Format_RGB30
6248 {
6249 .blendColor: blend_color_generic_rgb64,
6250 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderRGB>,
6251 .alphamapBlit: qt_alphamapblit_generic,
6252 .alphaRGBBlit: qt_alphargbblit_generic,
6253 .fillRect: qt_rectfill_rgb30<PixelOrderRGB>
6254 },
6255 // Format_A2RGB30_Premultiplied
6256 {
6257 .blendColor: blend_color_generic_rgb64,
6258 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderRGB>,
6259 .alphamapBlit: qt_alphamapblit_generic,
6260 .alphaRGBBlit: qt_alphargbblit_generic,
6261 .fillRect: qt_rectfill_rgb30<PixelOrderRGB>
6262 },
6263 // Format_Alpha8
6264 {
6265 .blendColor: blend_color_generic,
6266 .bitmapBlit: nullptr,
6267 .alphamapBlit: qt_alphamapblit_generic,
6268 .alphaRGBBlit: qt_alphargbblit_generic,
6269 .fillRect: qt_rectfill_alpha
6270 },
6271 // Format_Grayscale8
6272 {
6273 .blendColor: blend_color_generic,
6274 .bitmapBlit: nullptr,
6275 .alphamapBlit: qt_alphamapblit_generic,
6276 .alphaRGBBlit: qt_alphargbblit_generic,
6277 .fillRect: qt_rectfill_gray
6278 },
6279 // Format_RGBX64
6280 {
6281 .blendColor: blend_color_generic_rgb64,
6282 .bitmapBlit: nullptr,
6283 .alphamapBlit: qt_alphamapblit_generic,
6284 .alphaRGBBlit: qt_alphargbblit_generic,
6285 .fillRect: qt_rectfill_quint64
6286 },
6287 // Format_RGBA64
6288 {
6289 .blendColor: blend_color_generic_rgb64,
6290 .bitmapBlit: nullptr,
6291 .alphamapBlit: qt_alphamapblit_generic,
6292 .alphaRGBBlit: qt_alphargbblit_generic,
6293 .fillRect: qt_rectfill_quint64
6294 },
6295 // Format_RGBA64_Premultiplied
6296 {
6297 .blendColor: blend_color_generic_rgb64,
6298 .bitmapBlit: nullptr,
6299 .alphamapBlit: qt_alphamapblit_generic,
6300 .alphaRGBBlit: qt_alphargbblit_generic,
6301 .fillRect: qt_rectfill_quint64
6302 },
6303 // Format_Grayscale16
6304 {
6305 .blendColor: blend_color_generic_rgb64,
6306 .bitmapBlit: nullptr,
6307 .alphamapBlit: qt_alphamapblit_generic,
6308 .alphaRGBBlit: qt_alphargbblit_generic,
6309 .fillRect: qt_rectfill_quint16
6310 },
6311 // Format_BGR888
6312 {
6313 .blendColor: blend_color_generic,
6314 .bitmapBlit: nullptr,
6315 .alphamapBlit: qt_alphamapblit_generic,
6316 .alphaRGBBlit: qt_alphargbblit_generic,
6317 .fillRect: qt_rectfill_quint24
6318 },
6319 // Format_RGBX16FPx4
6320 {
6321 .blendColor: blend_color_generic_fp,
6322 .bitmapBlit: nullptr,
6323 .alphamapBlit: qt_alphamapblit_generic,
6324 .alphaRGBBlit: qt_alphargbblit_generic,
6325 .fillRect: qt_rectfill_quint64
6326 },
6327 // Format_RGBA16FPx4
6328 {
6329 .blendColor: blend_color_generic_fp,
6330 .bitmapBlit: nullptr,
6331 .alphamapBlit: qt_alphamapblit_generic,
6332 .alphaRGBBlit: qt_alphargbblit_generic,
6333 .fillRect: qt_rectfill_quint64
6334 },
6335 // Format_RGBA16FPx4_Premultiplied
6336 {
6337 .blendColor: blend_color_generic_fp,
6338 .bitmapBlit: nullptr,
6339 .alphamapBlit: qt_alphamapblit_generic,
6340 .alphaRGBBlit: qt_alphargbblit_generic,
6341 .fillRect: qt_rectfill_quint64
6342 },
6343 // Format_RGBX32FPx4
6344 {
6345 .blendColor: blend_color_generic_fp,
6346 .bitmapBlit: nullptr,
6347 .alphamapBlit: qt_alphamapblit_generic,
6348 .alphaRGBBlit: qt_alphargbblit_generic,
6349 .fillRect: qt_rectfill_fp32x4
6350 },
6351 // Format_RGBA32FPx4
6352 {
6353 .blendColor: blend_color_generic_fp,
6354 .bitmapBlit: nullptr,
6355 .alphamapBlit: qt_alphamapblit_generic,
6356 .alphaRGBBlit: qt_alphargbblit_generic,
6357 .fillRect: qt_rectfill_fp32x4
6358 },
6359 // Format_RGBA32FPx4_Premultiplied
6360 {
6361 .blendColor: blend_color_generic_fp,
6362 .bitmapBlit: nullptr,
6363 .alphamapBlit: qt_alphamapblit_generic,
6364 .alphaRGBBlit: qt_alphargbblit_generic,
6365 .fillRect: qt_rectfill_fp32x4
6366 },
6367};
6368
6369static_assert(std::size(qDrawHelper) == QImage::NImageFormats);
6370
6371#if !defined(Q_PROCESSOR_X86)
6372void qt_memfill64(quint64 *dest, quint64 color, qsizetype count)
6373{
6374 qt_memfill_template<quint64>(dest, color, count);
6375}
6376#endif
6377
6378#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_CLANG)
6379__attribute__((optimize("no-tree-vectorize")))
6380#endif
6381void qt_memfill24(quint24 *dest, quint24 color, qsizetype count)
6382{
6383# ifdef QT_COMPILER_SUPPORTS_SSSE3
6384 extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype);
6385 if (qCpuHasFeature(SSSE3))
6386 return qt_memfill24_ssse3(dest, color, count);
6387# endif
6388
6389 const quint32 v = color;
6390 quint24 *end = dest + count;
6391
6392 // prolog: align dest to 32bit
6393 while ((quintptr(dest) & 0x3) && dest < end) {
6394 *dest++ = v;
6395 }
6396 if (dest >= end)
6397 return;
6398
6399 const uint val1 = qFromBigEndian(source: (v << 8) | (v >> 16));
6400 const uint val2 = qFromBigEndian(source: (v << 16) | (v >> 8));
6401 const uint val3 = qFromBigEndian(source: (v << 24) | (v >> 0));
6402
6403 for ( ; dest <= (end - 4); dest += 4) {
6404 quint32 *dst = reinterpret_cast<quint32 *>(dest);
6405 dst[0] = val1;
6406 dst[1] = val2;
6407 dst[2] = val3;
6408 }
6409
6410 // less than 4px left
6411 switch (end - dest) {
6412 case 3:
6413 *dest++ = v;
6414 Q_FALLTHROUGH();
6415 case 2:
6416 *dest++ = v;
6417 Q_FALLTHROUGH();
6418 case 1:
6419 *dest++ = v;
6420 }
6421}
6422
6423void qt_memfill16(quint16 *dest, quint16 value, qsizetype count)
6424{
6425 const int align = quintptr(dest) & 0x3;
6426 if (align) {
6427 *dest++ = value;
6428 --count;
6429 }
6430
6431 if (count & 0x1)
6432 dest[count - 1] = value;
6433
6434 const quint32 value32 = (value << 16) | value;
6435 qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / 2);
6436}
6437
6438#if defined(Q_PROCESSOR_X86)
6439void (*qt_memfill32)(quint32 *dest, quint32 value, qsizetype count) = nullptr;
6440void (*qt_memfill64)(quint64 *dest, quint64 value, qsizetype count) = nullptr;
6441#elif !defined(__ARM_NEON__) && !defined(__MIPS_DSP__)
6442void qt_memfill32(quint32 *dest, quint32 color, qsizetype count)
6443{
6444 qt_memfill_template<quint32>(dest, color, count);
6445}
6446#endif
6447
6448#ifdef QT_COMPILER_SUPPORTS_SSE4_1
6449template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6450#endif
6451
6452extern void qInitBlendFunctions();
6453
6454static void qInitDrawhelperFunctions()
6455{
6456 // Set up basic blend function tables.
6457 qInitBlendFunctions();
6458
6459#if defined(Q_PROCESSOR_X86) && !defined(__SSE2__)
6460 qt_memfill32 = qt_memfill_template<quint32>;
6461 qt_memfill64 = qt_memfill_template<quint64>;
6462#elif defined(__SSE2__)
6463# ifndef __haswell__
6464 qt_memfill32 = qt_memfill32_sse2;
6465 qt_memfill64 = qt_memfill64_sse2;
6466# endif
6467 qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
6468 qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
6469 qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
6470 qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
6471 qDrawHelper[QImage::Format_RGBX8888].bitmapBlit = qt_bitmapblit8888_sse2;
6472 qDrawHelper[QImage::Format_RGBA8888].bitmapBlit = qt_bitmapblit8888_sse2;
6473 qDrawHelper[QImage::Format_RGBA8888_Premultiplied].bitmapBlit = qt_bitmapblit8888_sse2;
6474
6475 extern void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6476 const uchar *srcPixels, int sbpl, int srch,
6477 const QRectF &targetRect,
6478 const QRectF &sourceRect,
6479 const QRect &clip,
6480 int const_alpha);
6481 qScaleFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6482 qScaleFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6483 qScaleFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6484 qScaleFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6485
6486 extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
6487 const uchar *srcPixels, int sbpl,
6488 int w, int h,
6489 int const_alpha);
6490 extern void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6491 const uchar *srcPixels, int sbpl,
6492 int w, int h,
6493 int const_alpha);
6494
6495 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6496 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6497 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6498 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6499 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6500 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6501 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6502 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6503
6504 extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
6505 int y, int x, int length);
6506
6507 qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
6508
6509 extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6510 extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6511 extern void QT_FASTCALL comp_func_Source_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6512 extern void QT_FASTCALL comp_func_solid_Source_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6513 extern void QT_FASTCALL comp_func_Plus_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6514 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_sse2;
6515 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_sse2;
6516 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
6517 qt_functionForModeSolid_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_sse2;
6518 qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
6519
6520#ifdef QT_COMPILER_SUPPORTS_SSSE3
6521 if (qCpuHasFeature(SSSE3)) {
6522 extern void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
6523 const uchar *srcPixels, int sbpl,
6524 int w, int h,
6525 int const_alpha);
6526
6527 extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data,
6528 int y, int x, int length);
6529 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6530 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6531 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6532 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6533 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3;
6534 extern void QT_FASTCALL rbSwap_888_ssse3(uchar *dst, const uchar *src, int count);
6535 qPixelLayouts[QImage::Format_RGB888].rbSwap = rbSwap_888_ssse3;
6536 qPixelLayouts[QImage::Format_BGR888].rbSwap = rbSwap_888_ssse3;
6537 }
6538#endif // SSSE3
6539
6540#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
6541 if (qCpuHasFeature(SSE4_1)) {
6542 extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QList<QRgb> *);
6543 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QList<QRgb> *);
6544 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6545 const QList<QRgb> *, QDitherInfo *);
6546 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6547 const QList<QRgb> *, QDitherInfo *);
6548 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6549 const QList<QRgb> *, QDitherInfo *);
6550 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6551 const QList<QRgb> *, QDitherInfo *);
6552 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6553 const QList<QRgb> *, QDitherInfo *);
6554 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6555 const QList<QRgb> *, QDitherInfo *);
6556 extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6557 const QList<QRgb> *, QDitherInfo *);
6558 extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6559 const QList<QRgb> *, QDitherInfo *);
6560 extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6561 const QList<QRgb> *, QDitherInfo *);
6562 extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6563 const QList<QRgb> *, QDitherInfo *);
6564 extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6565 const QList<QRgb> *, QDitherInfo *);
6566 extern void QT_FASTCALL storeRGBA64FromRGBA64PM_sse4(uchar *, const QRgba64 *, int, int, const QList<QRgb> *, QDitherInfo *);
6567 extern void QT_FASTCALL storeRGBx64FromRGBA64PM_sse4(uchar *, const QRgba64 *, int, int, const QList<QRgb> *, QDitherInfo *);
6568 extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6569 extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6570# ifndef __haswell__
6571 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
6572 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
6573 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4;
6574 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
6575 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_sse4;
6576 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_sse4;
6577 qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6578 qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6579 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6580 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6581# endif
6582 qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4;
6583 qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4;
6584 qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4;
6585 qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>;
6586 qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>;
6587 qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4;
6588 qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4;
6589 qStoreFromRGBA64PM[QImage::Format_RGBX64] = storeRGBx64FromRGBA64PM_sse4;
6590 qStoreFromRGBA64PM[QImage::Format_RGBA64] = storeRGBA64FromRGBA64PM_sse4;
6591#if QT_CONFIG(raster_64bit)
6592 destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4;
6593 destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4;
6594#endif
6595#if QT_CONFIG(raster_fp)
6596 extern const QRgbaFloat32 *QT_FASTCALL fetchRGBA32FToRGBA32F_sse4(QRgbaFloat32 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6597 extern void QT_FASTCALL storeRGBX32FFromRGBA32F_sse4(uchar *dest, const QRgbaFloat32 *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6598 extern void QT_FASTCALL storeRGBA32FFromRGBA32F_sse4(uchar *dest, const QRgbaFloat32 *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6599 qFetchToRGBA32F[QImage::Format_RGBA32FPx4] = fetchRGBA32FToRGBA32F_sse4;
6600 qStoreFromRGBA32F[QImage::Format_RGBX32FPx4] = storeRGBX32FFromRGBA32F_sse4;
6601 qStoreFromRGBA32F[QImage::Format_RGBA32FPx4] = storeRGBA32FFromRGBA32F_sse4;
6602#endif // QT_CONFIG(raster_fp)
6603 }
6604#endif
6605
6606#if defined(QT_COMPILER_SUPPORTS_AVX2)
6607 if (qCpuHasFeature(ArchHaswell)) {
6608 qt_memfill32 = qt_memfill32_avx2;
6609 qt_memfill64 = qt_memfill64_avx2;
6610 extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
6611 const uchar *srcPixels, int sbpl,
6612 int w, int h, int const_alpha);
6613 extern void qt_blend_argb32_on_argb32_avx2(uchar *destPixels, int dbpl,
6614 const uchar *srcPixels, int sbpl,
6615 int w, int h, int const_alpha);
6616 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6617 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6618 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6619 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6620 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6621 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6622 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6623 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6624
6625 extern void QT_FASTCALL comp_func_Source_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6626 extern void QT_FASTCALL comp_func_SourceOver_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6627 extern void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint *destPixels, int length, uint color, uint const_alpha);
6628 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_avx2;
6629 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_avx2;
6630 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2;
6631#if QT_CONFIG(raster_64bit)
6632 extern void QT_FASTCALL comp_func_Source_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6633 extern void QT_FASTCALL comp_func_SourceOver_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6634 extern void QT_FASTCALL comp_func_solid_SourceOver_rgb64_avx2(QRgba64 *destPixels, int length, QRgba64 color, uint const_alpha);
6635 qt_functionForMode64_C[QPainter::CompositionMode_Source] = comp_func_Source_rgb64_avx2;
6636 qt_functionForMode64_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgb64_avx2;
6637 qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2;
6638#endif
6639#if QT_CONFIG(raster_fp)
6640 extern void QT_FASTCALL comp_func_Source_rgbafp_avx2(QRgbaFloat32 *destPixels, const QRgbaFloat32 *srcPixels, int length, uint const_alpha);
6641 extern void QT_FASTCALL comp_func_SourceOver_rgbafp_avx2(QRgbaFloat32 *destPixels, const QRgbaFloat32 *srcPixels, int length, uint const_alpha);
6642 extern void QT_FASTCALL comp_func_solid_Source_rgbafp_avx2(QRgbaFloat32 *destPixels, int length, QRgbaFloat32 color, uint const_alpha);
6643 extern void QT_FASTCALL comp_func_solid_SourceOver_rgbafp_avx2(QRgbaFloat32 *destPixels, int length, QRgbaFloat32 color, uint const_alpha);
6644 qt_functionForModeFP_C[QPainter::CompositionMode_Source] = comp_func_Source_rgbafp_avx2;
6645 qt_functionForModeFP_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgbafp_avx2;
6646 qt_functionForModeSolidFP_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_rgbafp_avx2;
6647 qt_functionForModeSolidFP_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgbafp_avx2;
6648#endif
6649
6650 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6651 int &fx, int &fy, int fdx, int /*fdy*/);
6652 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6653 int &fx, int &fy, int fdx, int /*fdy*/);
6654 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image,
6655 int &fx, int &fy, int fdx, int fdy);
6656
6657 bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2;
6658 bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2;
6659 bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2;
6660
6661 extern void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, int count, const QList<QRgb> *);
6662 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, int count, const QList<QRgb> *);
6663 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6664 const QList<QRgb> *, QDitherInfo *);
6665 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6666 const QList<QRgb> *, QDitherInfo *);
6667 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_avx2;
6668 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
6669 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2;
6670 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
6671
6672 extern const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *, const uint *, int, const QList<QRgb> *, QDitherInfo *);
6673 extern const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uint *, int count, const QList<QRgb> *, QDitherInfo *);
6674 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QList<QRgb> *, QDitherInfo *);
6675 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QList<QRgb> *, QDitherInfo *);
6676 extern const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6677 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2;
6678 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2;
6679 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2;
6680 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2;
6681 qPixelLayouts[QImage::Format_RGBA64].fetchToRGBA64PM = fetchRGBA64ToRGBA64PM_avx2;
6682
6683 extern const uint *QT_FASTCALL fetchRGB16FToRGB32_avx2(uint *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6684 extern const uint *QT_FASTCALL fetchRGBA16FToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6685 extern const QRgba64 *QT_FASTCALL fetchRGBA16FPMToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6686 extern const QRgba64 *QT_FASTCALL fetchRGBA16FToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6687 extern void QT_FASTCALL storeRGB16FFromRGB32_avx2(uchar *dest, const uint *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6688 extern void QT_FASTCALL storeRGBA16FFromARGB32PM_avx2(uchar *dest, const uint *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6689 qPixelLayouts[QImage::Format_RGBX16FPx4].fetchToARGB32PM = fetchRGB16FToRGB32_avx2;
6690 qPixelLayouts[QImage::Format_RGBX16FPx4].fetchToRGBA64PM = fetchRGBA16FPMToRGBA64PM_avx2;
6691 qPixelLayouts[QImage::Format_RGBX16FPx4].storeFromARGB32PM = storeRGB16FFromRGB32_avx2;
6692 qPixelLayouts[QImage::Format_RGBX16FPx4].storeFromRGB32 = storeRGB16FFromRGB32_avx2;
6693 qPixelLayouts[QImage::Format_RGBA16FPx4].fetchToARGB32PM = fetchRGBA16FToARGB32PM_avx2;
6694 qPixelLayouts[QImage::Format_RGBA16FPx4].fetchToRGBA64PM = fetchRGBA16FToRGBA64PM_avx2;
6695 qPixelLayouts[QImage::Format_RGBA16FPx4].storeFromARGB32PM = storeRGBA16FFromARGB32PM_avx2;
6696 qPixelLayouts[QImage::Format_RGBA16FPx4].storeFromRGB32 = storeRGB16FFromRGB32_avx2;
6697 qPixelLayouts[QImage::Format_RGBA16FPx4_Premultiplied].fetchToARGB32PM = fetchRGB16FToRGB32_avx2;
6698 qPixelLayouts[QImage::Format_RGBA16FPx4_Premultiplied].fetchToRGBA64PM = fetchRGBA16FPMToRGBA64PM_avx2;
6699 qPixelLayouts[QImage::Format_RGBA16FPx4_Premultiplied].storeFromARGB32PM = storeRGB16FFromRGB32_avx2;
6700 qPixelLayouts[QImage::Format_RGBA16FPx4_Premultiplied].storeFromRGB32 = storeRGB16FFromRGB32_avx2;
6701#if QT_CONFIG(raster_fp)
6702 extern const QRgbaFloat32 *QT_FASTCALL fetchRGBA16FToRGBA32F_avx2(QRgbaFloat32 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6703 extern void QT_FASTCALL storeRGBX16FFromRGBA32F_avx2(uchar *dest, const QRgbaFloat32 *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6704 extern void QT_FASTCALL storeRGBA16FFromRGBA32F_avx2(uchar *dest, const QRgbaFloat32 *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6705 qFetchToRGBA32F[QImage::Format_RGBA16FPx4] = fetchRGBA16FToRGBA32F_avx2;
6706 qStoreFromRGBA32F[QImage::Format_RGBX16FPx4] = storeRGBX16FFromRGBA32F_avx2;
6707 qStoreFromRGBA32F[QImage::Format_RGBA16FPx4] = storeRGBA16FFromRGBA32F_avx2;
6708#endif // QT_CONFIG(raster_fp)
6709 }
6710
6711#endif
6712
6713#endif // SSE2
6714
6715#if defined(__ARM_NEON__)
6716 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6717 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6718 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6719 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6720#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6721 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6722 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6723 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6724 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6725#endif
6726
6727 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
6728 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
6729 qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
6730
6731 extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
6732 int y, int x, int length);
6733
6734 qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
6735
6736 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
6737
6738#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6739 extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, int count, const QList<QRgb> *);
6740 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, int count, const QList<QRgb> *);
6741 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6742 const QList<QRgb> *, QDitherInfo *);
6743 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6744 const QList<QRgb> *, QDitherInfo *);
6745 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6746 const QList<QRgb> *, QDitherInfo *);
6747 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6748 const QList<QRgb> *, QDitherInfo *);
6749 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6750 const QList<QRgb> *, QDitherInfo *);
6751 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6752 const QList<QRgb> *, QDitherInfo *);
6753 extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6754 const QList<QRgb> *, QDitherInfo *);
6755 extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6756 const QList<QRgb> *, QDitherInfo *);
6757 extern void QT_FASTCALL storeRGBXFromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6758 const QList<QRgb> *, QDitherInfo *);
6759 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon;
6760 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
6761 qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon;
6762 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_neon;
6763 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_neon;
6764 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon;
6765 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
6766 qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon;
6767 qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6768 qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6769 qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon;
6770 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6771 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6772#endif
6773
6774#if defined(ENABLE_PIXMAN_DRAWHELPERS)
6775 // The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
6776 qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
6777 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
6778 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
6779
6780 qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
6781 qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
6782
6783 qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
6784 qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
6785
6786 qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
6787
6788 destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
6789 destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
6790
6791 qMemRotateFunctions[QPixelLayout::BPP16][0] = qt_memrotate90_16_neon;
6792 qMemRotateFunctions[QPixelLayout::BPP16][2] = qt_memrotate270_16_neon;
6793#endif
6794#endif // defined(__ARM_NEON__)
6795
6796#if defined(__MIPS_DSP__)
6797 // Composition functions are all DSP r1
6798 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
6799 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
6800 qt_functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
6801 qt_functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
6802 qt_functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
6803 qt_functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
6804 qt_functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
6805 qt_functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
6806 qt_functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
6807 qt_functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
6808
6809 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
6810 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
6811 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
6812 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
6813 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
6814 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
6815 qt_functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
6816 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
6817
6818 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
6819 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
6820 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
6821 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
6822
6823 destFetchProc[QImage::Format_ARGB32] = qt_destFetchARGB32_mips_dsp;
6824
6825 destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
6826
6827 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
6828 sourceFetchUntransformed[QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
6829 sourceFetchUntransformed[QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
6830
6831#if defined(__MIPS_DSPR2__)
6832 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
6833 sourceFetchUntransformed[QImage::Format_RGB16] = qt_fetchUntransformedRGB16_mips_dspr2;
6834#else
6835 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
6836#endif // defined(__MIPS_DSPR2__)
6837#endif // defined(__MIPS_DSP__)
6838}
6839
6840// Ensure initialization if this object file is linked.
6841Q_CONSTRUCTOR_FUNCTION(qInitDrawhelperFunctions);
6842
6843QT_END_NAMESPACE
6844

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of qtbase/src/gui/painting/qdrawhelper.cpp