1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtGui module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #ifndef QRGBA64_P_H |
41 | #define QRGBA64_P_H |
42 | |
43 | // |
44 | // W A R N I N G |
45 | // ------------- |
46 | // |
47 | // This file is not part of the Qt API. It exists purely as an |
48 | // implementation detail. This header file may change from version to |
49 | // version without notice, or even be removed. |
50 | // |
51 | // We mean it. |
52 | // |
53 | |
54 | #include "qrgba64.h" |
55 | #include "qdrawhelper_p.h" |
56 | |
57 | #include <QtCore/private/qsimd_p.h> |
58 | #include <QtGui/private/qtguiglobal_p.h> |
59 | |
60 | QT_BEGIN_NAMESPACE |
61 | |
62 | inline QRgba64 combineAlpha256(QRgba64 rgba64, uint alpha256) |
63 | { |
64 | return QRgba64::fromRgba64(red: rgba64.red(), green: rgba64.green(), blue: rgba64.blue(), alpha: (rgba64.alpha() * alpha256) >> 8); |
65 | } |
66 | |
67 | inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535) |
68 | { |
69 | return QRgba64::fromRgba64(red: qt_div_65535(x: rgba64.red() * alpha65535), |
70 | green: qt_div_65535(x: rgba64.green() * alpha65535), |
71 | blue: qt_div_65535(x: rgba64.blue() * alpha65535), |
72 | alpha: qt_div_65535(x: rgba64.alpha() * alpha65535)); |
73 | } |
74 | |
75 | #ifdef __SSE2__ |
76 | Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, __m128i va) |
77 | { |
78 | __m128i vs = rgba64; |
79 | vs = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vs, b: va), b: _mm_mulhi_epu16(a: vs, b: va)); |
80 | vs = _mm_add_epi32(a: vs, b: _mm_srli_epi32(a: vs, count: 16)); |
81 | vs = _mm_add_epi32(a: vs, b: _mm_set1_epi32(i: 0x8000)); |
82 | vs = _mm_srai_epi32(a: vs, count: 16); |
83 | vs = _mm_packs_epi32(a: vs, b: _mm_setzero_si128()); |
84 | return vs; |
85 | } |
86 | Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, uint alpha65535) |
87 | { |
88 | const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0)); |
89 | return multiplyAlpha65535(rgba64, va); |
90 | } |
91 | #endif |
92 | |
93 | #if defined(__ARM_NEON__) |
94 | Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535) |
95 | { |
96 | uint32x4_t vs32 = vmull_u16(rgba64, alpha65535); // vs = vs * alpha |
97 | vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16) |
98 | return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16 |
99 | } |
100 | Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535) |
101 | { |
102 | uint32x4_t vs32 = vmull_n_u16(rgba64, alpha65535); // vs = vs * alpha |
103 | vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16) |
104 | return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16 |
105 | } |
106 | #endif |
107 | |
108 | template<typename T> |
109 | inline T multiplyAlpha255(T rgba64, uint alpha255) |
110 | { |
111 | #if defined(__SSE2__) || defined(__ARM_NEON__) |
112 | return multiplyAlpha65535(rgba64, alpha255 * 257); |
113 | #else |
114 | return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255), |
115 | qt_div_255(rgba64.green() * alpha255), |
116 | qt_div_255(rgba64.blue() * alpha255), |
117 | qt_div_255(rgba64.alpha() * alpha255)); |
118 | #endif |
119 | } |
120 | |
121 | inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) |
122 | { |
123 | return QRgba64::fromRgba64(c: multiplyAlpha255(rgba64: x, alpha255: alpha1) + multiplyAlpha255(rgba64: y, alpha255: alpha2)); |
124 | } |
125 | |
126 | #if defined __SSE2__ |
127 | Q_ALWAYS_INLINE __m128i interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2) |
128 | { |
129 | return _mm_add_epi32(a: multiplyAlpha255(rgba64: x, alpha255: alpha1), b: multiplyAlpha255(rgba64: y, alpha255: alpha2)); |
130 | } |
131 | #endif |
132 | |
133 | #if defined __ARM_NEON__ |
134 | Q_ALWAYS_INLINE uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2) |
135 | { |
136 | return vadd_u16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2)); |
137 | } |
138 | #endif |
139 | |
140 | inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) |
141 | { |
142 | return QRgba64::fromRgba64(c: multiplyAlpha65535(rgba64: x, alpha65535: alpha1) + multiplyAlpha65535(rgba64: y, alpha65535: alpha2)); |
143 | } |
144 | |
145 | #if defined __SSE2__ |
146 | Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2) |
147 | { |
148 | return _mm_add_epi32(a: multiplyAlpha65535(rgba64: x, alpha65535: alpha1), b: multiplyAlpha65535(rgba64: y, alpha65535: alpha2)); |
149 | } |
150 | // alpha2 below is const-ref because otherwise MSVC2015 complains that it can't 16-byte align the argument. |
151 | Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2) |
152 | { |
153 | return _mm_add_epi32(a: multiplyAlpha65535(rgba64: x, va: alpha1), b: multiplyAlpha65535(rgba64: y, va: alpha2)); |
154 | } |
155 | #endif |
156 | |
157 | #if defined __ARM_NEON__ |
158 | Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2) |
159 | { |
160 | return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); |
161 | } |
162 | Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2) |
163 | { |
164 | return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); |
165 | } |
166 | #endif |
167 | |
168 | inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b) |
169 | { |
170 | return QRgba64::fromRgba64(red: qMin(a: a.red() + b.red(), b: 65535), |
171 | green: qMin(a: a.green() + b.green(), b: 65535), |
172 | blue: qMin(a: a.blue() + b.blue(), b: 65535), |
173 | alpha: qMin(a: a.alpha() + b.alpha(), b: 65535)); |
174 | } |
175 | |
176 | #if QT_COMPILER_SUPPORTS_HERE(SSE2) |
177 | QT_FUNCTION_TARGET(SSE2) |
178 | Q_ALWAYS_INLINE uint toArgb32(__m128i v) |
179 | { |
180 | v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128()); |
181 | v = _mm_add_epi32(a: v, b: _mm_set1_epi32(i: 128)); |
182 | v = _mm_sub_epi32(a: v, b: _mm_srli_epi32(a: v, count: 8)); |
183 | v = _mm_srli_epi32(a: v, count: 8); |
184 | v = _mm_packs_epi32(a: v, b: v); |
185 | v = _mm_packus_epi16(a: v, b: v); |
186 | return _mm_cvtsi128_si32(a: v); |
187 | } |
188 | #elif defined __ARM_NEON__ |
189 | Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v) |
190 | { |
191 | v = vsub_u16(v, vrshr_n_u16(v, 8)); |
192 | v = vrshr_n_u16(v, 8); |
193 | uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v)); |
194 | return vget_lane_u32(vreinterpret_u32_u8(v8), 0); |
195 | } |
196 | #endif |
197 | |
198 | Q_ALWAYS_INLINE uint toArgb32(QRgba64 rgba64) |
199 | { |
200 | #if defined __SSE2__ |
201 | __m128i v = _mm_loadl_epi64(p: (const __m128i *)&rgba64); |
202 | v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3, 0, 1, 2)); |
203 | return toArgb32(v); |
204 | #elif defined __ARM_NEON__ |
205 | uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); |
206 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
207 | const uint8x8_t shuffleMask = { 4, 5, 2, 3, 0, 1, 6, 7 }; |
208 | v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask)); |
209 | #else |
210 | v = vext_u16(v, v, 3); |
211 | #endif |
212 | return toArgb32(v); |
213 | #else |
214 | return rgba64.toArgb32(); |
215 | #endif |
216 | } |
217 | |
218 | Q_ALWAYS_INLINE uint toRgba8888(QRgba64 rgba64) |
219 | { |
220 | #if defined __SSE2__ |
221 | __m128i v = _mm_loadl_epi64(p: (const __m128i *)&rgba64); |
222 | return toArgb32(v); |
223 | #elif defined __ARM_NEON__ |
224 | uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); |
225 | return toArgb32(v); |
226 | #else |
227 | return ARGB2RGBA(toArgb32(rgba64)); |
228 | #endif |
229 | } |
230 | |
231 | inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha) |
232 | { |
233 | QRgba64 blend; |
234 | #if defined(__SSE2__) |
235 | __m128i vd = _mm_loadl_epi64(p: (const __m128i *)&d); |
236 | __m128i vs = _mm_loadl_epi64(p: (const __m128i *)&s); |
237 | __m128i va = _mm_cvtsi32_si128(a: rgbAlpha); |
238 | va = _mm_unpacklo_epi8(a: va, b: va); |
239 | va = _mm_shufflelo_epi16(va, _MM_SHUFFLE(3, 0, 1, 2)); |
240 | __m128i vb = _mm_xor_si128(a: _mm_set1_epi16(w: -1), b: va); |
241 | |
242 | vs = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vs, b: va), b: _mm_mulhi_epu16(a: vs, b: va)); |
243 | vd = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vd, b: vb), b: _mm_mulhi_epu16(a: vd, b: vb)); |
244 | vd = _mm_add_epi32(a: vd, b: vs); |
245 | vd = _mm_add_epi32(a: vd, b: _mm_srli_epi32(a: vd, count: 16)); |
246 | vd = _mm_add_epi32(a: vd, b: _mm_set1_epi32(i: 0x8000)); |
247 | vd = _mm_srai_epi32(a: vd, count: 16); |
248 | vd = _mm_packs_epi32(a: vd, b: _mm_setzero_si128()); |
249 | |
250 | _mm_storel_epi64(p: (__m128i *)&blend, a: vd); |
251 | #elif defined(__ARM_NEON__) |
252 | uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d)); |
253 | uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s)); |
254 | uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(ARGB2RGBA(rgbAlpha))); |
255 | uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[0]); |
256 | uint16x4_t vb = vdup_n_u16(0xffff); |
257 | vb = vsub_u16(vb, va); |
258 | |
259 | uint32x4_t vs32 = vmull_u16(vs, va); |
260 | uint32x4_t vd32 = vmull_u16(vd, vb); |
261 | vd32 = vaddq_u32(vd32, vs32); |
262 | vd32 = vsraq_n_u32(vd32, vd32, 16); |
263 | vd = vrshrn_n_u32(vd32, 16); |
264 | vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd)); |
265 | #else |
266 | const int mr = qRed(rgbAlpha); |
267 | const int mg = qGreen(rgbAlpha); |
268 | const int mb = qBlue(rgbAlpha); |
269 | blend = qRgba64(qt_div_255(s.red() * mr + d.red() * (255 - mr)), |
270 | qt_div_255(s.green() * mg + d.green() * (255 - mg)), |
271 | qt_div_255(s.blue() * mb + d.blue() * (255 - mb)), |
272 | s.alpha()); |
273 | #endif |
274 | return blend; |
275 | } |
276 | |
277 | static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src) |
278 | { |
279 | if (src.isOpaque()) |
280 | dst = src; |
281 | else if (!src.isTransparent()) |
282 | dst = src + multiplyAlpha65535(rgba64: dst, alpha65535: 65535 - src.alpha()); |
283 | } |
284 | |
285 | static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha) |
286 | { |
287 | if (const_alpha == 255) |
288 | return blend_pixel(dst, src); |
289 | if (!src.isTransparent()) { |
290 | src = multiplyAlpha255(rgba64: src, alpha255: const_alpha); |
291 | dst = src + multiplyAlpha65535(rgba64: dst, alpha65535: 65535 - src.alpha()); |
292 | } |
293 | } |
294 | |
295 | QT_END_NAMESPACE |
296 | |
297 | #endif // QRGBA64_P_H |
298 | |