vector4d_sse_p.h source code [qt3d/src/core/transforms/vector4d_sse_p.h]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
4	** Contact: https://www.qt.io/licensing/
5	**
6	** This file is part of the Qt3D module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial License Usage
10	** Licensees holding valid commercial Qt licenses may use this file in
11	** accordance with the commercial license agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and The Qt Company. For licensing terms
14	** and conditions see https://www.qt.io/terms-conditions. For further
15	** information use the contact form at https://www.qt.io/contact-us.
16	**
17	** GNU Lesser General Public License Usage
18	** Alternatively, this file may be used under the terms of the GNU Lesser
19	** General Public License version 3 as published by the Free Software
20	** Foundation and appearing in the file LICENSE.LGPL3 included in the
21	** packaging of this file. Please review the following information to
22	** ensure the GNU Lesser General Public License version 3 requirements
23	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24	**
25	** GNU General Public License Usage
26	** Alternatively, this file may be used under the terms of the GNU
27	** General Public License version 2.0 or (at your option) the GNU General
28	** Public license version 3 or any later version approved by the KDE Free
29	** Qt Foundation. The licenses are as published by the Free Software
30	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31	** included in the packaging of this file. Please review the following
32	** information to ensure the GNU General Public License requirements will
33	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34	** https://www.gnu.org/licenses/gpl-3.0.html.
35	**
36	** $QT_END_LICENSE$
37	**
38	****************************************************************************/
39
40	#ifndef QT3DCORE_VECTOR4D_SSE_P_H
41	#define QT3DCORE_VECTOR4D_SSE_P_H
42
43	//
44	// W A R N I N G
45	// -------------
46	//
47	// This file is not part of the Qt3D API. It exists purely as an
48	// implementation detail. This header file may change from version to
49	// version without notice, or even be removed.
50	//
51	// We mean it.
52	//
53
54	#include <Qt3DCore/private/vector3d_p.h>
55	#include <QtGui/qvector4d.h>
56
57	#ifdef QT_COMPILER_SUPPORTS_SSE2
58
59	QT_BEGIN_NAMESPACE
60
61	namespace Qt3DCore {
62
63	class Matrix4x4_SSE;
64	class Matrix4x4_AVX2;
65
66	class Vector4D_SSE
67	{
68	public:
69	Q_ALWAYS_INLINE Vector4D_SSE()
70	: m_xyzw(_mm_setzero_ps())
71	{
72	}
73
74	explicit Q_ALWAYS_INLINE Vector4D_SSE(Qt::Initialization) {}
75
76	explicit Q_ALWAYS_INLINE Vector4D_SSE(float x, float y, float z, float w)
77	: m_xyzw(_mm_set_ps(z: w, y: z, x: y, w: x))
78	{
79	}
80
81	explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector4D v)
82	: m_xyzw(_mm_set_ps(z: v.w(), y: v.z(), x: v.y(), w: v.x()))
83	{
84	}
85
86	explicit Q_ALWAYS_INLINE Vector4D_SSE(const Vector3D_SSE &vec3, float w = `0.0f`)
87	: m_xyzw(vec3.m_xyzw)
88	{
89	setW(w);
90	}
91
92	explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector3D v, float w = `0.0f`)
93	: m_xyzw(_mm_set_ps(z: w, y: v.z(), x: v.y(), w: v.x()))
94	{
95	}
96
97	Q_ALWAYS_INLINE Vector4D_SSE &operator+=(Vector4D_SSE vector)
98	{
99	m_xyzw = _mm_add_ps(a: m_xyzw, b: vector.m_xyzw);
100	return *this;
101	}
102
103	Q_ALWAYS_INLINE Vector4D_SSE &operator-=(Vector4D_SSE vector)
104	{
105	m_xyzw = _mm_sub_ps(a: m_xyzw, b: vector.m_xyzw);
106	return *this;
107	}
108
109	Q_ALWAYS_INLINE Vector4D_SSE &operator*=(Vector4D_SSE vector)
110	{
111	m_xyzw = _mm_mul_ps(a: m_xyzw, b: vector.m_xyzw);
112	return *this;
113	}
114
115	Q_ALWAYS_INLINE Vector4D_SSE &operator/=(Vector4D_SSE vector)
116	{
117	m_xyzw = _mm_div_ps(a: m_xyzw, b: vector.m_xyzw);
118	return *this;
119	}
120
121	Q_ALWAYS_INLINE Vector4D_SSE &operator=(float* factor)
122	{
123	m_xyzw = _mm_mul_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
124	return *this;
125	}
126
127	Q_ALWAYS_INLINE Vector4D_SSE &operator/=(float factor)
128	{
129	m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
130	return *this;
131	}
132
133	Q_ALWAYS_INLINE bool operator==(Vector4D_SSE other) const
134	{
135	// 0b1111 == 0xf
136	return (_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: other.m_xyzw)) == `0xf`);
137	}
138
139	Q_ALWAYS_INLINE bool operator!=(Vector4D_SSE other) const
140	{
141	return !(*this == other);
142	}
143
144	Q_ALWAYS_INLINE QVector4D toQVector4D() const
145	{
146	return QVector4D (x(), y(), z(), w());
147	}
148
149	// TODO: Uncomment when we introduce Vector3D_SSE
150	//Q_ALWAYS_INLINE Vector3D_SSE toVector3D() const { return Vector3D_SSE(this); }*
151
152	Q_ALWAYS_INLINE float lengthSquared() const
153	{
154	return dotProduct(a: *this, b: *this);
155	}
156
157	Q_ALWAYS_INLINE float length() const
158	{
159	return sqrt(x: dotProduct(a: *this, b: *this));
160	}
161
162	Q_ALWAYS_INLINE void normalize()
163	{
164	const float len = length();
165	m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set_ps1(w: len));
166	}
167
168	Q_ALWAYS_INLINE Vector4D_SSE normalized() const
169	{
170	Vector4D_SSE v = *this;
171	v.normalize();
172	return v;
173	}
174
175	Q_ALWAYS_INLINE bool isNull() const
176	{
177	// 0b1111 == 0xf
178	return _mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: _mm_setzero_ps())) == `0xf`;
179	}
180
181	Q_ALWAYS_INLINE float x() const { return _mm_cvtss_f32(a: m_xyzw); }
182
183	Q_ALWAYS_INLINE float y() const
184	{
185	// 0b01010101 = 0x55
186	return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, `0x55`));
187	}
188
189	Q_ALWAYS_INLINE float z() const
190	{
191	// 0b10101010 = 0xaa
192	return _mm_cvtss_f32(a: _mm_unpackhi_ps(a: m_xyzw, b: m_xyzw));
193	}
194
195	Q_ALWAYS_INLINE float w() const
196	{
197	// 0b11111111 = 0xff
198	return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, `0xff`));
199	}
200
201	Q_ALWAYS_INLINE void setX(float x)
202	{
203	m_xyzw = _mm_move_ss(a: m_xyzw, b: _mm_set_ss(w: x));
204	}
205
206	Q_ALWAYS_INLINE void setY(float y)
207	{
208	// m_xyzw = a, b, c, d
209
210	// y, y, y, y
211	const __m128 yVec = _mm_set_ps1(w: y);
212
213	// y, y, a, a
214	// 0b00000000 == 0x0
215	const __m128 yaVec = _mm_shuffle_ps(yVec, m_xyzw, `0x0`);
216
217	// a, y, c, d
218	// 0b11100010 == 0xe2
219	m_xyzw = _mm_shuffle_ps(yaVec, m_xyzw, `0xe2`);
220	}
221
222	Q_ALWAYS_INLINE void setZ(float z)
223	{
224	// m_xyzw = a, b, c, d
225
226	// z, z, z, z
227	const __m128 zVec = _mm_set_ps1(w: z);
228
229	// z, z, d, d
230	// 0b11110000 == 0xf0
231	const __m128 zdVec = _mm_shuffle_ps(zVec, m_xyzw, `0xf0`);
232
233	// a, b, z, d
234	// 0b10000100 == 0x84
235	m_xyzw = _mm_shuffle_ps(m_xyzw, zdVec, `0x84`);
236	}
237
238	Q_ALWAYS_INLINE void setW(float w)
239	{
240	#ifdef __SSE4_1__
241	const __m128 wVec = _mm_set_ss(w);
242	// insert element 0 of wVec into position 3 in vec3, don't zero anything
243	m_xyzw = _mm_insert_ps(m_xyzw, wVec, `0x30`);
244	#else
245	// m_xyzw = a, b, c, d
246
247	// w, w, w, w
248	const __m128 wVec = _mm_set_ps1(w: w);
249
250	// c, c, w, w
251	const __m128 cwVec = _mm_shuffle_ps(m_xyzw, wVec, _MM_SHUFFLE(`0`, `0`, `2`, `2`));
252
253	// a, b, c, w
254	m_xyzw = _mm_shuffle_ps(m_xyzw, cwVec, _MM_SHUFFLE(`2`, `0`, `1`, `0`));
255	#endif
256	}
257
258	Q_ALWAYS_INLINE float operator[](int idx) const
259	{
260	Q_DECL_ALIGN(`16`) float vec[`4`];
261	_mm_store_ps(p: vec, a: m_xyzw);
262	return vec[idx];
263	}
264
265	struct DigitWrapper
266	{
267	explicit DigitWrapper(int idx, Vector4D_SSE *vec)
268	: m_vec(vec)
269	, m_idx(idx)
270	{}
271
272	operator float() const
273	{
274	switch (m_idx) {
275	case `0`:
276	return m_vec->x();
277	case `1`:
278	return m_vec->y();
279	case `2`:
280	return m_vec->z();
281	case `3`:
282	return m_vec->w();
283	default:
284	Q_UNREACHABLE();
285	return `0.0f`;
286	}
287	}
288	void operator =(float value)
289	{
290	switch (m_idx) {
291	case `0`:
292	m_vec->setX(value);
293	break;
294	case `1`:
295	m_vec->setY(value);
296	break;
297	case `2`:
298	m_vec->setZ(value);
299	break;
300	case `3`:
301	m_vec->setW(value);
302	break;
303	default:
304	Q_UNREACHABLE();
305	}
306	}
307
308	private:
309	Vector4D_SSE *m_vec;
310	const int m_idx;
311	};
312
313	Q_ALWAYS_INLINE DigitWrapper operator[](int idx)
314	{
315	return DigitWrapper (idx, this);
316	}
317
318	static Q_ALWAYS_INLINE float dotProduct(Vector4D_SSE a, Vector4D_SSE b)
319	{
320	#if defined(__SSE4_1__)
321	// 0b11111111 = 0xff
322	return _mm_cvtss_f32(_mm_dp_ps(a.m_xyzw, b.m_xyzw, `0xff`));
323	#elif defined(__SSE3__)
324	const __m128 mult = _mm_mul_ps(a.m_xyzw, b.m_xyzw);
325	// a + b, c + d, a + d, c + d
326	const __m128 partialSum = _mm_hadd_ps(mult, mult);
327	// c + d, ......
328	// 0x00000001 =
329	const __m128 partialSumShuffle = _mm_shuffle_ps(partialSum, partialSum, `0x1`);
330	return _mm_cvtss_f32(_mm_hadd_ps(partialSum, partialSumShuffle));
331	#else
332	const __m128 mult = _mm_mul_ps(a: a.m_xyzw, b: b.m_xyzw);
333	// (multX, multY, 0, 0) + (multZ, multW, 0, 0) -> (multX + multZ, multY + multW, 0, 0)
334	// 0b00001110 == 0xe
335	const __m128 shuffled = _mm_shuffle_ps(mult, mult, `0xe`);
336	__m128 result = _mm_add_ps(a: shuffled, b: mult);
337	// (multX + multZ, 0, 0, 0) + (multY + multW, 0, 0, 0);
338	// 0b00000001 == 0x1
339	const __m128 shuffled2 = _mm_shuffle_ps(result, result, `0x1`);
340	result = _mm_add_ps(a: result, b: shuffled2);
341	return _mm_cvtss_f32(a: result);
342	#endif
343	}
344
345	friend class Matrix4x4_SSE;
346
347	#ifdef __AVX2__
348	friend class Matrix4x4_AVX2;
349	friend Vector4D_SSE operator(const* Vector4D_SSE &vector, const Matrix4x4_AVX2 &matrix);
350	friend Vector4D_SSE operator(const* Matrix4x4_AVX2 &matrix, const Vector4D_SSE &vector);
351	#endif
352
353	friend class Vector3D_SSE;
354	friend Vector4D_SSE operator(const* Vector4D_SSE &vector, const Matrix4x4_SSE &matrix);
355	friend Vector4D_SSE operator(const* Matrix4x4_SSE &matrix, const Vector4D_SSE &vector);
356
357	friend Q_ALWAYS_INLINE const Vector4D_SSE operator+(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 += v2; }
358	friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 -= v2; }
359	friend Q_ALWAYS_INLINE const Vector4D_SSE operator(float* factor, Vector4D_SSE vector) { return vector *= factor; }
360	friend Q_ALWAYS_INLINE const Vector4D_SSE operator(Vector4D_SSE vector, float* factor) { return vector *= factor; }
361	friend Q_ALWAYS_INLINE const Vector4D_SSE operator(Vector4D_SSE v1, Vector4D_SSE v2) { return* v1 *= v2; }
362	friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE vector)
363	{
364	Vector4D_SSE c(Qt::Uninitialized);
365
366	c.m_xyzw = _mm_xor_ps(a: vector.m_xyzw, b: _mm_set1_ps(w: -`0.0f`));
367
368	return c;
369	}
370
371	friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, float divisor) { return vector /= divisor; }
372	friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, Vector4D_SSE divisor) { return vector /= divisor; }
373
374	friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Vector4D_SSE &v);
375	friend Q_ALWAYS_INLINE bool qFuzzyCompare(const Vector4D_SSE& v1, const Vector4D_SSE& v2)
376	{
377	return ::qFuzzyCompare(p1: v1.x(), p2: v2.x()) &&
378	::qFuzzyCompare(p1: v1.y(), p2: v2.y()) &&
379	::qFuzzyCompare(p1: v1.z(), p2: v2.z()) &&
380	::qFuzzyCompare(p1: v1.w(), p2: v2.w());
381	}
382
383	private:
384	// Q_DECL_ALIGN(16) float m[4];// for SSE support
385	__m128 m_xyzw;
386	};
387
388	} // Qt3DCore
389
390	Q_DECLARE_TYPEINFO(Qt3DCore::Vector4D_SSE, Q_PRIMITIVE_TYPE);
391
392	QT_END_NAMESPACE
393
394	Q_DECLARE_METATYPE(Qt3DCore::Vector4D_SSE)
395
396	#endif // QT_COMPILER_SUPPORTS_SSE2
397
398	#endif // QT3DCORE_VECTOR4D_SSE_P_H
399

Provided by KDAB

Definitions

source code of qt3d/src/core/transforms/vector4d_sse_p.h