qrgba64_p.h source code [qtbase/src/gui/painting/qrgba64_p.h]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 The Qt Company Ltd.
4	** Contact: https://www.qt.io/licensing/
5	**
6	** This file is part of the QtGui module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial License Usage
10	** Licensees holding valid commercial Qt licenses may use this file in
11	** accordance with the commercial license agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and The Qt Company. For licensing terms
14	** and conditions see https://www.qt.io/terms-conditions. For further
15	** information use the contact form at https://www.qt.io/contact-us.
16	**
17	** GNU Lesser General Public License Usage
18	** Alternatively, this file may be used under the terms of the GNU Lesser
19	** General Public License version 3 as published by the Free Software
20	** Foundation and appearing in the file LICENSE.LGPL3 included in the
21	** packaging of this file. Please review the following information to
22	** ensure the GNU Lesser General Public License version 3 requirements
23	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24	**
25	** GNU General Public License Usage
26	** Alternatively, this file may be used under the terms of the GNU
27	** General Public License version 2.0 or (at your option) the GNU General
28	** Public license version 3 or any later version approved by the KDE Free
29	** Qt Foundation. The licenses are as published by the Free Software
30	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31	** included in the packaging of this file. Please review the following
32	** information to ensure the GNU General Public License requirements will
33	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34	** https://www.gnu.org/licenses/gpl-3.0.html.
35	**
36	** $QT_END_LICENSE$
37	**
38	****************************************************************************/
39
40	#ifndef QRGBA64_P_H
41	#define QRGBA64_P_H
42
43	//
44	// W A R N I N G
45	// -------------
46	//
47	// This file is not part of the Qt API. It exists purely as an
48	// implementation detail. This header file may change from version to
49	// version without notice, or even be removed.
50	//
51	// We mean it.
52	//
53
54	#include "qrgba64.h"
55	#include "qdrawhelper_p.h"
56
57	#include <QtCore/private/qsimd_p.h>
58	#include <QtGui/private/qtguiglobal_p.h>
59
60	QT_BEGIN_NAMESPACE
61
62	inline QRgba64 combineAlpha256(QRgba64 rgba64, uint alpha256)
63	{
64	return QRgba64::fromRgba64(red: rgba64.red(), green: rgba64.green(), blue: rgba64.blue(), alpha: (rgba64.alpha() * alpha256) >> `8`);
65	}
66
67	inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535)
68	{
69	return QRgba64::fromRgba64(red: qt_div_65535(x: rgba64.red() * alpha65535),
70	green: qt_div_65535(x: rgba64.green() * alpha65535),
71	blue: qt_div_65535(x: rgba64.blue() * alpha65535),
72	alpha: qt_div_65535(x: rgba64.alpha() * alpha65535));
73	}
74
75	#ifdef __SSE2__
76	Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, __m128i va)
77	{
78	__m128i vs = rgba64;
79	vs = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vs, b: va), b: _mm_mulhi_epu16(a: vs, b: va));
80	vs = _mm_add_epi32(a: vs, b: _mm_srli_epi32(a: vs, count: `16`));
81	vs = _mm_add_epi32(a: vs, b: _mm_set1_epi32(i: `0x8000`));
82	vs = _mm_srai_epi32(a: vs, count: `16`);
83	vs = _mm_packs_epi32(a: vs, b: _mm_setzero_si128());
84	return vs;
85	}
86	Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, uint alpha65535)
87	{
88	const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
89	return multiplyAlpha65535(rgba64, va);
90	}
91	#endif
92
93	#if defined(__ARM_NEON__)
94	Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535)
95	{
96	uint32x4_t vs32 = vmull_u16(rgba64, alpha65535); // vs = vs alpha*
97	vs32 = vsraq_n_u32(vs32, vs32, `16`); // vs = vs + (vs >> 16)
98	return vrshrn_n_u32(vs32, `16`); // vs = (vs + 0x8000) >> 16
99	}
100	Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535)
101	{
102	uint32x4_t vs32 = vmull_n_u16(rgba64, alpha65535); // vs = vs alpha*
103	vs32 = vsraq_n_u32(vs32, vs32, `16`); // vs = vs + (vs >> 16)
104	return vrshrn_n_u32(vs32, `16`); // vs = (vs + 0x8000) >> 16
105	}
106	#endif
107
108	template<typename T>
109	inline T multiplyAlpha255(T rgba64, uint alpha255)
110	{
111	#if defined(__SSE2__) \|\| defined(__ARM_NEON__)
112	return multiplyAlpha65535(rgba64, alpha255 * `257`);
113	#else
114	return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255),
115	qt_div_255(rgba64.green() * alpha255),
116	qt_div_255(rgba64.blue() * alpha255),
117	qt_div_255(rgba64.alpha() * alpha255));
118	#endif
119	}
120
121	inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
122	{
123	return QRgba64::fromRgba64(c: multiplyAlpha255(rgba64: x, alpha255: alpha1) + multiplyAlpha255(rgba64: y, alpha255: alpha2));
124	}
125
126	#if defined __SSE2__
127	Q_ALWAYS_INLINE __m128i interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2)
128	{
129	return _mm_add_epi32(a: multiplyAlpha255(rgba64: x, alpha255: alpha1), b: multiplyAlpha255(rgba64: y, alpha255: alpha2));
130	}
131	#endif
132
133	#if defined __ARM_NEON__
134	Q_ALWAYS_INLINE uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
135	{
136	return vadd_u16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
137	}
138	#endif
139
140	inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
141	{
142	return QRgba64::fromRgba64(c: multiplyAlpha65535(rgba64: x, alpha65535: alpha1) + multiplyAlpha65535(rgba64: y, alpha65535: alpha2));
143	}
144
145	#if defined __SSE2__
146	Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2)
147	{
148	return _mm_add_epi32(a: multiplyAlpha65535(rgba64: x, alpha65535: alpha1), b: multiplyAlpha65535(rgba64: y, alpha65535: alpha2));
149	}
150	// alpha2 below is const-ref because otherwise MSVC2015 complains that it can't 16-byte align the argument.
151	Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2)
152	{
153	return _mm_add_epi32(a: multiplyAlpha65535(rgba64: x, va: alpha1), b: multiplyAlpha65535(rgba64: y, va: alpha2));
154	}
155	#endif
156
157	#if defined __ARM_NEON__
158	Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
159	{
160	return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
161	}
162	Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2)
163	{
164	return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
165	}
166	#endif
167
168	inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b)
169	{
170	return QRgba64::fromRgba64(red: qMin(a: a.red() + b.red(), b: `65535`),
171	green: qMin(a: a.green() + b.green(), b: `65535`),
172	blue: qMin(a: a.blue() + b.blue(), b: `65535`),
173	alpha: qMin(a: a.alpha() + b.alpha(), b: `65535`));
174	}
175
176	#if QT_COMPILER_SUPPORTS_HERE(SSE2)
177	QT_FUNCTION_TARGET(SSE2)
178	Q_ALWAYS_INLINE uint toArgb32(__m128i v)
179	{
180	v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128());
181	v = _mm_add_epi32(a: v, b: _mm_set1_epi32(i: `128`));
182	v = _mm_sub_epi32(a: v, b: _mm_srli_epi32(a: v, count: `8`));
183	v = _mm_srli_epi32(a: v, count: `8`);
184	v = _mm_packs_epi32(a: v, b: v);
185	v = _mm_packus_epi16(a: v, b: v);
186	return _mm_cvtsi128_si32(a: v);
187	}
188	#elif defined __ARM_NEON__
189	Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v)
190	{
191	v = vsub_u16(v, vrshr_n_u16(v, `8`));
192	v = vrshr_n_u16(v, `8`);
193	uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v));
194	return vget_lane_u32(vreinterpret_u32_u8(v8), `0`);
195	}
196	#endif
197
198	Q_ALWAYS_INLINE uint toArgb32(QRgba64 rgba64)
199	{
200	#if defined __SSE2__
201	__m128i v = _mm_loadl_epi64(p: (const __m128i *)&rgba64);
202	v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
203	return toArgb32(v);
204	#elif defined __ARM_NEON__
205	uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
206	#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
207	const uint8x8_t shuffleMask = { `4`, `5`, `2`, `3`, `0`, `1`, `6`, `7` };
208	v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask));
209	#else
210	v = vext_u16(v, v, `3`);
211	#endif
212	return toArgb32(v);
213	#else
214	return rgba64.toArgb32();
215	#endif
216	}
217
218	Q_ALWAYS_INLINE uint toRgba8888(QRgba64 rgba64)
219	{
220	#if defined __SSE2__
221	__m128i v = _mm_loadl_epi64(p: (const __m128i *)&rgba64);
222	return toArgb32(v);
223	#elif defined __ARM_NEON__
224	uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
225	return toArgb32(v);
226	#else
227	return ARGB2RGBA(toArgb32(rgba64));
228	#endif
229	}
230
231	inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha)
232	{
233	QRgba64 blend;
234	#if defined(__SSE2__)
235	__m128i vd = _mm_loadl_epi64(p: (const __m128i *)&d);
236	__m128i vs = _mm_loadl_epi64(p: (const __m128i *)&s);
237	__m128i va = _mm_cvtsi32_si128(a: rgbAlpha);
238	va = _mm_unpacklo_epi8(a: va, b: va);
239	va = _mm_shufflelo_epi16(va, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
240	__m128i vb = _mm_xor_si128(a: _mm_set1_epi16(w: -`1`), b: va);
241
242	vs = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vs, b: va), b: _mm_mulhi_epu16(a: vs, b: va));
243	vd = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vd, b: vb), b: _mm_mulhi_epu16(a: vd, b: vb));
244	vd = _mm_add_epi32(a: vd, b: vs);
245	vd = _mm_add_epi32(a: vd, b: _mm_srli_epi32(a: vd, count: `16`));
246	vd = _mm_add_epi32(a: vd, b: _mm_set1_epi32(i: `0x8000`));
247	vd = _mm_srai_epi32(a: vd, count: `16`);
248	vd = _mm_packs_epi32(a: vd, b: _mm_setzero_si128());
249
250	_mm_storel_epi64(p: (__m128i *)&blend, a: vd);
251	#elif defined(__ARM_NEON__)
252	uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d));
253	uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s));
254	uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(ARGB2RGBA(rgbAlpha)));
255	uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[`0`]);
256	uint16x4_t vb = vdup_n_u16(`0xffff`);
257	vb = vsub_u16(vb, va);
258
259	uint32x4_t vs32 = vmull_u16(vs, va);
260	uint32x4_t vd32 = vmull_u16(vd, vb);
261	vd32 = vaddq_u32(vd32, vs32);
262	vd32 = vsraq_n_u32(vd32, vd32, `16`);
263	vd = vrshrn_n_u32(vd32, `16`);
264	vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd));
265	#else
266	const int mr = qRed(rgbAlpha);
267	const int mg = qGreen(rgbAlpha);
268	const int mb = qBlue(rgbAlpha);
269	blend = qRgba64(qt_div_255(s.red() * mr + d.red() * (`255` - mr)),
270	qt_div_255(s.green() * mg + d.green() * (`255` - mg)),
271	qt_div_255(s.blue() * mb + d.blue() * (`255` - mb)),
272	s.alpha());
273	#endif
274	return blend;
275	}
276
277	static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src)
278	{
279	if (src.isOpaque())
280	dst = src;
281	else if (!src.isTransparent())
282	dst = src + multiplyAlpha65535(rgba64: dst, alpha65535: `65535` - src.alpha());
283	}
284
285	static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha)
286	{
287	if (const_alpha == `255`)
288	return blend_pixel(dst, src);
289	if (!src.isTransparent()) {
290	src = multiplyAlpha255(rgba64: src, alpha255: const_alpha);
291	dst = src + multiplyAlpha65535(rgba64: dst, alpha65535: `65535` - src.alpha());
292	}
293	}
294
295	QT_END_NAMESPACE
296
297	#endif // QRGBA64_P_H
298

source code of qtbase/src/gui/painting/qrgba64_p.h