1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include <qimage.h> |
5 | #include <private/qimage_p.h> |
6 | #include <private/qsimd_p.h> |
7 | |
8 | #ifdef QT_COMPILER_SUPPORTS_SSSE3 |
9 | |
10 | QT_BEGIN_NAMESPACE |
11 | |
12 | // Convert a scanline of RGB888 (src) to RGB32 (dst) |
13 | // src must be at least len * 3 bytes |
14 | // dst must be at least len * 4 bytes |
15 | Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len) |
16 | { |
17 | int i = 0; |
18 | |
19 | // Prologue, align dst to 16 bytes. |
20 | ALIGNMENT_PROLOGUE_16BYTES(dst, i, len) { |
21 | dst[i] = qRgb(r: src[0], g: src[1], b: src[2]); |
22 | src += 3; |
23 | } |
24 | |
25 | // Mask the 4 first colors of the RGB888 vector |
26 | const __m128i shuffleMask = _mm_set_epi8(b15: char(0xff), b14: 9, b13: 10, b12: 11, b11: char(0xff), b10: 6, b9: 7, b8: 8, b7: char(0xff), b6: 3, b5: 4, b4: 5, b3: char(0xff), b2: 0, b1: 1, b0: 2); |
27 | |
28 | // Mask the 4 last colors of a RGB888 vector with an offset of 1 (so the last 3 bytes are RGB) |
29 | const __m128i shuffleMaskEnd = _mm_set_epi8(b15: char(0xff), b14: 13, b13: 14, b12: 15, b11: char(0xff), b10: 10, b9: 11, b8: 12, b7: char(0xff), b6: 7, b5: 8, b4: 9, b3: char(0xff), b2: 4, b1: 5, b0: 6); |
30 | |
31 | // Mask to have alpha = 0xff |
32 | const __m128i alphaMask = _mm_set1_epi32(i: 0xff000000); |
33 | |
34 | const __m128i *inVectorPtr = (const __m128i *)src; |
35 | __m128i *dstVectorPtr = (__m128i *)(dst + i); |
36 | |
37 | for (; i < (len - 15); i += 16) { // one iteration in the loop converts 16 pixels |
38 | /* |
39 | RGB888 has 5 pixels per vector, + 1 byte from the next pixel. The idea here is |
40 | to load vectors of RGB888 and use palignr to select a vector out of two vectors. |
41 | |
42 | After 3 loads of RGB888 and 3 stores of RGB32, we have 4 pixels left in the last |
43 | vector of RGB888, we can mask it directly to get a last store or RGB32. After that, |
44 | the first next byte is a R, and we can loop for the next 16 pixels. |
45 | |
46 | The conversion itself is done with a byte permutation (pshufb). |
47 | */ |
48 | __m128i firstSrcVector = _mm_lddqu_si128(p: inVectorPtr); |
49 | __m128i outputVector = _mm_shuffle_epi8(a: firstSrcVector, b: shuffleMask); |
50 | _mm_store_si128(p: dstVectorPtr, b: _mm_or_si128(a: outputVector, b: alphaMask)); |
51 | ++inVectorPtr; |
52 | ++dstVectorPtr; |
53 | |
54 | // There are 4 unused bytes left in srcVector, we need to load the next 16 bytes |
55 | // and load the next input with palignr |
56 | __m128i secondSrcVector = _mm_lddqu_si128(p: inVectorPtr); |
57 | __m128i srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 12); |
58 | outputVector = _mm_shuffle_epi8(a: srcVector, b: shuffleMask); |
59 | _mm_store_si128(p: dstVectorPtr, b: _mm_or_si128(a: outputVector, b: alphaMask)); |
60 | ++inVectorPtr; |
61 | ++dstVectorPtr; |
62 | firstSrcVector = secondSrcVector; |
63 | |
64 | // We now have 8 unused bytes left in firstSrcVector |
65 | secondSrcVector = _mm_lddqu_si128(p: inVectorPtr); |
66 | srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 8); |
67 | outputVector = _mm_shuffle_epi8(a: srcVector, b: shuffleMask); |
68 | _mm_store_si128(p: dstVectorPtr, b: _mm_or_si128(a: outputVector, b: alphaMask)); |
69 | ++inVectorPtr; |
70 | ++dstVectorPtr; |
71 | |
72 | // There are now 12 unused bytes in firstSrcVector. |
73 | // We can mask them directly, almost there. |
74 | outputVector = _mm_shuffle_epi8(a: secondSrcVector, b: shuffleMaskEnd); |
75 | _mm_store_si128(p: dstVectorPtr, b: _mm_or_si128(a: outputVector, b: alphaMask)); |
76 | ++dstVectorPtr; |
77 | } |
78 | src = (const uchar *)inVectorPtr; |
79 | |
80 | SIMD_EPILOGUE(i, len, 15) { |
81 | dst[i] = qRgb(r: src[0], g: src[1], b: src[2]); |
82 | src += 3; |
83 | } |
84 | } |
85 | |
86 | void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) |
87 | { |
88 | Q_ASSERT(src->format == QImage::Format_RGB888 || src->format == QImage::Format_BGR888); |
89 | if (src->format == QImage::Format_BGR888) |
90 | Q_ASSERT(dest->format == QImage::Format_RGBX8888 || dest->format == QImage::Format_RGBA8888 || dest->format == QImage::Format_RGBA8888_Premultiplied); |
91 | else |
92 | Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied); |
93 | Q_ASSERT(src->width == dest->width); |
94 | Q_ASSERT(src->height == dest->height); |
95 | |
96 | const uchar *src_data = (uchar *) src->data; |
97 | quint32 *dest_data = (quint32 *) dest->data; |
98 | |
99 | for (int i = 0; i < src->height; ++i) { |
100 | qt_convert_rgb888_to_rgb32_ssse3(dst: dest_data, src: src_data, len: src->width); |
101 | src_data += src->bytes_per_line; |
102 | dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line); |
103 | } |
104 | } |
105 | |
106 | QT_END_NAMESPACE |
107 | |
108 | #endif // QT_COMPILER_SUPPORTS_SSSE3 |
109 | |