vector3d_sse_p.h source code [qt3d/src/core/transforms/vector3d_sse_p.h]

1	// Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
2	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4	#ifndef QT3DCORE_VECTOR3D_SSE_P_H
5	#define QT3DCORE_VECTOR3D_SSE_P_H
6
7	//
8	// W A R N I N G
9	// -------------
10	//
11	// This file is not part of the Qt3D API. It exists purely as an
12	// implementation detail. This header file may change from version to
13	// version without notice, or even be removed.
14	//
15	// We mean it.
16	//
17
18	#include <Qt3DCore/private/qt3dcore_global_p.h>
19	#include <QtCore/private/qsimd_p.h>
20	#include <QtCore/QtGlobal>
21	#include <QtGui/qvector3d.h>
22	#include <QDebug>
23	#include <math.h>
24
25	#ifdef __SSE2__
26
27	QT_BEGIN_NAMESPACE
28
29	namespace Qt3DCore {
30
31	class Matrix4x4_SSE;
32	class Vector4D_SSE;
33
34	class Vector3D_SSE
35	{
36	public:
37
38	Q_ALWAYS_INLINE Vector3D_SSE()
39	: m_xyzw(_mm_setzero_ps())
40	{
41	}
42
43	explicit Q_ALWAYS_INLINE Vector3D_SSE(Qt::Initialization) {}
44
45	explicit Q_ALWAYS_INLINE Vector3D_SSE(float x, float y, float z)
46	: m_xyzw(_mm_set_ps(z: `0.0f`, y: z, x: y, w: x))
47	{
48	}
49
50	explicit Q_ALWAYS_INLINE Vector3D_SSE(QVector3D v)
51	: m_xyzw(_mm_set_ps(z: `0.0f`, y: v.z(), x: v.y(), w: v.x()))
52	{
53	}
54
55	explicit Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE(const Vector4D_SSE &v);
56
57	Q_ALWAYS_INLINE Vector3D_SSE &operator+=(Vector3D_SSE vector)
58	{
59	m_xyzw = _mm_add_ps(a: m_xyzw, b: vector.m_xyzw);
60	return *this;
61	}
62
63	Q_ALWAYS_INLINE Vector3D_SSE &operator-=(Vector3D_SSE vector)
64	{
65	m_xyzw = _mm_sub_ps(a: m_xyzw, b: vector.m_xyzw);
66	return *this;
67	}
68
69	Q_ALWAYS_INLINE Vector3D_SSE &operator*=(Vector3D_SSE vector)
70	{
71	m_xyzw = _mm_mul_ps(a: m_xyzw, b: vector.m_xyzw);
72	return *this;
73	}
74
75	Q_ALWAYS_INLINE Vector3D_SSE &operator/=(Vector3D_SSE vector)
76	{
77	m_xyzw = _mm_div_ps(a: m_xyzw, b: vector.m_xyzw);
78	return *this;
79	}
80
81	Q_ALWAYS_INLINE Vector3D_SSE &operator=(float* factor)
82	{
83	m_xyzw = _mm_mul_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
84	return *this;
85	}
86
87	Q_ALWAYS_INLINE Vector3D_SSE &operator/=(float factor)
88	{
89	m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
90	return *this;
91	}
92
93	Q_ALWAYS_INLINE bool operator==(Vector3D_SSE other) const
94	{
95	// 0b111 == 0x7
96	return ((_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: other.m_xyzw)) & `0x7`) == `0x7`);
97	}
98
99	Q_ALWAYS_INLINE bool operator!=(Vector3D_SSE other) const
100	{
101	return !(*this == other);
102	}
103
104	Q_ALWAYS_INLINE QVector3D toQVector3D() const
105	{
106	return QVector3D (x(), y(), z());
107	}
108
109	Q_ALWAYS_INLINE float lengthSquared() const
110	{
111	return Qt3DCore::Vector3D_SSE::dotProduct(a: *this, b: *this);
112	}
113
114	Q_ALWAYS_INLINE float length() const
115	{
116	return sqrt(x: Qt3DCore::Vector3D_SSE::dotProduct(a: *this, b: *this));
117	}
118
119	Q_ALWAYS_INLINE float distanceToPoint(const Vector3D_SSE &point) const
120	{
121	return (*this - point).length();
122	}
123
124	Q_ALWAYS_INLINE void normalize()
125	{
126	const float len = length();
127	m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set_ps1(w: len));
128	}
129
130	Q_ALWAYS_INLINE Vector3D_SSE normalized() const
131	{
132	Vector3D_SSE v = *this;
133	v.normalize();
134	return v;
135	}
136
137	Q_ALWAYS_INLINE bool isNull() const
138	{
139	// Ignore last bit
140	// 0b111 = 0x7
141	return ((_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: _mm_set_ps1(w: `0.0f`))) & `0x7`) == `0x7`);
142	}
143
144	Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE unproject(const Matrix4x4_SSE &modelView, const Matrix4x4_SSE &projection, const QRect &viewport) const;
145	Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE project(const Matrix4x4_SSE &modelView, const Matrix4x4_SSE &projection, const QRect &viewport) const;
146
147	Q_ALWAYS_INLINE float x() const { return _mm_cvtss_f32(a: m_xyzw); }
148
149	Q_ALWAYS_INLINE float y() const
150	{
151	// 0b01010101 = 0x55
152	return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, `0x55`));
153	}
154
155	Q_ALWAYS_INLINE float z() const
156	{
157	// 0b10101010 = 0xaa
158	return _mm_cvtss_f32(a: _mm_unpackhi_ps(a: m_xyzw, b: m_xyzw));
159	}
160
161	Q_ALWAYS_INLINE void setX(float x)
162	{
163	m_xyzw = _mm_move_ss(a: m_xyzw, b: _mm_set_ss(w: x));
164	}
165
166	Q_ALWAYS_INLINE void setY(float y)
167	{
168	// m_xyzw = a, b, c, d
169
170	// y, y, y, y
171	const __m128 yVec = _mm_set_ps1(w: y);
172
173	// y, y, a, a
174	// 0b00000000 == 0x0
175	const __m128 yaVec = _mm_shuffle_ps(yVec, m_xyzw, `0x0`);
176
177	// a, y, c, d
178	// 0b11100010 == 0xe2
179	m_xyzw = _mm_shuffle_ps(yaVec, m_xyzw, `0xe2`);
180	}
181
182	Q_ALWAYS_INLINE void setZ(float z)
183	{
184	// m_xyzw = a, b, c, d
185
186	// z, z, z, z
187	const __m128 zVec = _mm_set_ps1(w: z);
188
189	// z, z, d, d
190	// 0b11110000 == 0xf0
191	const __m128 zdVec = _mm_shuffle_ps(zVec, m_xyzw, `0xf0`);
192
193	// a, b, z, d
194	// 0b10000100 == 0x84
195	m_xyzw = _mm_shuffle_ps(m_xyzw, zdVec, `0x84`);
196	}
197
198	Q_ALWAYS_INLINE float operator[](int idx) const
199	{
200	switch (idx) {
201	case `0`:
202	return x();
203	case `1`:
204	return y();
205	case `2`:
206	return z();
207	default:
208	Q_UNREACHABLE_RETURN(`0.0f`);
209	}
210	}
211
212	struct DigitWrapper
213	{
214	explicit DigitWrapper(int idx, Vector3D_SSE *vec)
215	: m_vec(vec)
216	, m_idx(idx)
217	{}
218
219	operator float() const
220	{
221	switch (m_idx) {
222	case `0`:
223	return m_vec->x();
224	case `1`:
225	return m_vec->y();
226	case `2`:
227	return m_vec->z();
228	default:
229	Q_UNREACHABLE_RETURN(`0.0f`);
230	}
231	}
232
233	void operator =(float value)
234	{
235	switch (m_idx) {
236	case `0`:
237	m_vec->setX(value);
238	break;
239	case `1`:
240	m_vec->setY(value);
241	break;
242	case `2`:
243	m_vec->setZ(value);
244	break;
245	default:
246	Q_UNREACHABLE();
247	}
248	}
249
250	private:
251	Vector3D_SSE *m_vec;
252	const int m_idx;
253	};
254
255	Q_ALWAYS_INLINE DigitWrapper operator[](int idx)
256	{
257	return DigitWrapper (idx, this);
258	}
259
260	static Q_ALWAYS_INLINE float dotProduct(Vector3D_SSE a, Vector3D_SSE b)
261	{
262	#if defined(__SSE4_1__)
263	// 0b01111111 = 0x7f
264	return _mm_cvtss_f32(_mm_dp_ps(a.m_xyzw, b.m_xyzw, `0x7f`));
265	#elif defined(__SSE3__)
266	const __m128 mult = _mm_mul_ps(a.m_xyzw, b.m_xyzw);
267	// a + b, c + d, a + d, c + d
268	const __m128 partialSum = _mm_hadd_ps(mult, mult);
269	// c + d, ......
270	// 0x00000001 =
271	const __m128 partialSumShuffle = _mm_shuffle_ps(partialSum, partialSum, `0x1`);
272	return _mm_cvtss_f32(_mm_hadd_ps(partialSum, partialSumShuffle));
273	#else
274	const __m128 mult = _mm_mul_ps(a: a.m_xyzw, b: b.m_xyzw);
275
276	// (multX, 0, 0, 0) + (multY, 0, 0, 0) -> (multX + multY, 0, 0, 0)
277	// 0b11111101 == 0xfd
278	const __m128 shuffled = _mm_shuffle_ps(mult, mult, `0xfd`);
279	// (multX + multY, 0, 0, 0) + (multZ, 0, 0, 0);
280	// 0b11111110 == 0xfe
281	const __m128 shuffled2 = _mm_shuffle_ps(mult, mult, `0xfe`);
282	const __m128 result = _mm_add_ps(a: _mm_add_ps(a: shuffled, b: mult), b: shuffled2);
283	return _mm_cvtss_f32(a: result);
284	#endif
285	}
286
287	static Q_ALWAYS_INLINE Vector3D_SSE crossProduct(Vector3D_SSE a, Vector3D_SSE b)
288	{
289	// a.y b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x*
290	// (a.y, a.z, a.z, a.x, a.x, a.y) (b.z, b.y, b.x, b.z, b.y, b.x)
291	// (a.y, a.z, a.x) (b.z, b.x, b.y) - (a.z, a.x, a.y) (b.y, b.z, b.x)*
292
293	// 0b11001001 == 0xc9
294	const __m128 a1 = _mm_shuffle_ps(a.m_xyzw, a.m_xyzw, `0xc9`);
295	const __m128 b2 = _mm_shuffle_ps(b.m_xyzw, b.m_xyzw, `0xc9`);
296	// 0b11010010 == 0xd2
297	const __m128 a2 = _mm_shuffle_ps(a.m_xyzw, a.m_xyzw, `0xd2`);
298	const __m128 b1 = _mm_shuffle_ps(b.m_xyzw, b.m_xyzw, `0xd2`);
299
300	Vector3D_SSE v(Qt::Uninitialized);
301	v.m_xyzw = _mm_sub_ps(a: _mm_mul_ps(a: a1, b: b1), b: _mm_mul_ps(a: a2, b: b2));
302	return v;
303	}
304
305	friend class Vector4D_SSE;
306	friend class Matrix4x4_SSE;
307	friend Vector3D_SSE operator(const* Vector3D_SSE &vector, const Matrix4x4_SSE &matrix);
308	friend Vector3D_SSE operator(const* Matrix4x4_SSE &matrix, const Vector3D_SSE &vector);
309
310	friend Q_ALWAYS_INLINE const Vector3D_SSE operator+(Vector3D_SSE v1, Vector3D_SSE v2) { return v1 += v2; }
311	friend Q_ALWAYS_INLINE const Vector3D_SSE operator-(Vector3D_SSE v1, Vector3D_SSE v2) { return v1 -= v2; }
312	friend Q_ALWAYS_INLINE const Vector3D_SSE operator(float* factor, Vector3D_SSE vector) { return vector *= factor; }
313	friend Q_ALWAYS_INLINE const Vector3D_SSE operator(Vector3D_SSE vector, float* factor) { return vector *= factor; }
314	friend Q_ALWAYS_INLINE const Vector3D_SSE operator(Vector3D_SSE v1, Vector3D_SSE v2) { return* v1 *= v2; }
315	friend Q_ALWAYS_INLINE const Vector3D_SSE operator-(Vector3D_SSE vector)
316	{
317	Vector3D_SSE c(Qt::Uninitialized);
318
319	c.m_xyzw = _mm_xor_ps(a: vector.m_xyzw, b: _mm_set1_ps(w: -`0.0f`));
320
321	return c;
322	}
323
324	friend Q_ALWAYS_INLINE const Vector3D_SSE operator/(Vector3D_SSE vector, float divisor) { return vector /= divisor; }
325	friend Q_ALWAYS_INLINE const Vector3D_SSE operator/(Vector3D_SSE vector, Vector3D_SSE divisor) { return vector /= divisor; }
326
327	friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Vector3D_SSE &v);
328	friend Q_ALWAYS_INLINE bool qFuzzyCompare(const Vector3D_SSE& v1, const Vector3D_SSE& v2)
329	{
330	return ::qFuzzyCompare(p1: v1.x(), p2: v2.x()) &&
331	::qFuzzyCompare(p1: v1.y(), p2: v2.y()) &&
332	::qFuzzyCompare(p1: v1.z(), p2: v2.z());
333	}
334
335	private:
336	// Q_DECL_ALIGN(16) float m[4];// for SSE support
337	__m128 m_xyzw;
338	};
339
340	} // Qt3DCore
341
342	Q_DECLARE_TYPEINFO(Qt3DCore::Vector3D_SSE, Q_PRIMITIVE_TYPE);
343
344	QT_END_NAMESPACE
345
346	Q_DECLARE_METATYPE(Qt3DCore::Vector3D_SSE)
347
348	#endif // __SSE2__
349
350	#endif // QT3DCORE_VECTOR3D_SSE_P_H
351

source code of qt3d/src/core/transforms/vector3d_sse_p.h