vector4d_sse_p.h source code [qt3d/src/core/transforms/vector4d_sse_p.h]

1	// Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
2	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4	#ifndef QT3DCORE_VECTOR4D_SSE_P_H
5	#define QT3DCORE_VECTOR4D_SSE_P_H
6
7	//
8	// W A R N I N G
9	// -------------
10	//
11	// This file is not part of the Qt3D API. It exists purely as an
12	// implementation detail. This header file may change from version to
13	// version without notice, or even be removed.
14	//
15	// We mean it.
16	//
17
18	#include <Qt3DCore/private/vector3d_p.h>
19	#include <QtGui/qvector4d.h>
20
21	#ifdef __SSE2__
22
23	QT_BEGIN_NAMESPACE
24
25	namespace Qt3DCore {
26
27	class Matrix4x4_SSE;
28
29	class Vector4D_SSE
30	{
31	public:
32	Q_ALWAYS_INLINE Vector4D_SSE()
33	: m_xyzw(_mm_setzero_ps())
34	{
35	}
36
37	explicit Q_ALWAYS_INLINE Vector4D_SSE(Qt::Initialization) {}
38
39	explicit Q_ALWAYS_INLINE Vector4D_SSE(float x, float y, float z, float w)
40	: m_xyzw(_mm_set_ps(z: w, y: z, x: y, w: x))
41	{
42	}
43
44	explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector4D v)
45	: m_xyzw(_mm_set_ps(z: v.w(), y: v.z(), x: v.y(), w: v.x()))
46	{
47	}
48
49	explicit Q_ALWAYS_INLINE Vector4D_SSE(const Vector3D_SSE &vec3, float w = `0.0f`)
50	: m_xyzw(vec3.m_xyzw)
51	{
52	setW(w);
53	}
54
55	explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector3D v, float w = `0.0f`)
56	: m_xyzw(_mm_set_ps(z: w, y: v.z(), x: v.y(), w: v.x()))
57	{
58	}
59
60	Q_ALWAYS_INLINE Vector4D_SSE &operator+=(Vector4D_SSE vector)
61	{
62	m_xyzw = _mm_add_ps(a: m_xyzw, b: vector.m_xyzw);
63	return *this;
64	}
65
66	Q_ALWAYS_INLINE Vector4D_SSE &operator-=(Vector4D_SSE vector)
67	{
68	m_xyzw = _mm_sub_ps(a: m_xyzw, b: vector.m_xyzw);
69	return *this;
70	}
71
72	Q_ALWAYS_INLINE Vector4D_SSE &operator*=(Vector4D_SSE vector)
73	{
74	m_xyzw = _mm_mul_ps(a: m_xyzw, b: vector.m_xyzw);
75	return *this;
76	}
77
78	Q_ALWAYS_INLINE Vector4D_SSE &operator/=(Vector4D_SSE vector)
79	{
80	m_xyzw = _mm_div_ps(a: m_xyzw, b: vector.m_xyzw);
81	return *this;
82	}
83
84	Q_ALWAYS_INLINE Vector4D_SSE &operator=(float* factor)
85	{
86	m_xyzw = _mm_mul_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
87	return *this;
88	}
89
90	Q_ALWAYS_INLINE Vector4D_SSE &operator/=(float factor)
91	{
92	m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
93	return *this;
94	}
95
96	Q_ALWAYS_INLINE bool operator==(Vector4D_SSE other) const
97	{
98	// 0b1111 == 0xf
99	return (_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: other.m_xyzw)) == `0xf`);
100	}
101
102	Q_ALWAYS_INLINE bool operator!=(Vector4D_SSE other) const
103	{
104	return !(*this == other);
105	}
106
107	Q_ALWAYS_INLINE QVector4D toQVector4D() const
108	{
109	return QVector4D (x(), y(), z(), w());
110	}
111
112	// TODO: Uncomment when we introduce Vector3D_SSE
113	//Q_ALWAYS_INLINE Vector3D_SSE toVector3D() const { return Vector3D_SSE(this); }*
114
115	Q_ALWAYS_INLINE float lengthSquared() const
116	{
117	return dotProduct(a: *this, b: *this);
118	}
119
120	Q_ALWAYS_INLINE float length() const
121	{
122	return sqrt(x: dotProduct(a: *this, b: *this));
123	}
124
125	Q_ALWAYS_INLINE void normalize()
126	{
127	const float len = length();
128	m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set_ps1(w: len));
129	}
130
131	Q_ALWAYS_INLINE Vector4D_SSE normalized() const
132	{
133	Vector4D_SSE v = *this;
134	v.normalize();
135	return v;
136	}
137
138	Q_ALWAYS_INLINE bool isNull() const
139	{
140	// 0b1111 == 0xf
141	return _mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: _mm_setzero_ps())) == `0xf`;
142	}
143
144	Q_ALWAYS_INLINE float x() const { return _mm_cvtss_f32(a: m_xyzw); }
145
146	Q_ALWAYS_INLINE float y() const
147	{
148	// 0b01010101 = 0x55
149	return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, `0x55`));
150	}
151
152	Q_ALWAYS_INLINE float z() const
153	{
154	// 0b10101010 = 0xaa
155	return _mm_cvtss_f32(a: _mm_unpackhi_ps(a: m_xyzw, b: m_xyzw));
156	}
157
158	Q_ALWAYS_INLINE float w() const
159	{
160	// 0b11111111 = 0xff
161	return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, `0xff`));
162	}
163
164	Q_ALWAYS_INLINE void setX(float x)
165	{
166	m_xyzw = _mm_move_ss(a: m_xyzw, b: _mm_set_ss(w: x));
167	}
168
169	Q_ALWAYS_INLINE void setY(float y)
170	{
171	// m_xyzw = a, b, c, d
172
173	// y, y, y, y
174	const __m128 yVec = _mm_set_ps1(w: y);
175
176	// y, y, a, a
177	// 0b00000000 == 0x0
178	const __m128 yaVec = _mm_shuffle_ps(yVec, m_xyzw, `0x0`);
179
180	// a, y, c, d
181	// 0b11100010 == 0xe2
182	m_xyzw = _mm_shuffle_ps(yaVec, m_xyzw, `0xe2`);
183	}
184
185	Q_ALWAYS_INLINE void setZ(float z)
186	{
187	// m_xyzw = a, b, c, d
188
189	// z, z, z, z
190	const __m128 zVec = _mm_set_ps1(w: z);
191
192	// z, z, d, d
193	// 0b11110000 == 0xf0
194	const __m128 zdVec = _mm_shuffle_ps(zVec, m_xyzw, `0xf0`);
195
196	// a, b, z, d
197	// 0b10000100 == 0x84
198	m_xyzw = _mm_shuffle_ps(m_xyzw, zdVec, `0x84`);
199	}
200
201	Q_ALWAYS_INLINE void setW(float w)
202	{
203	#ifdef __SSE4_1__
204	const __m128 wVec = _mm_set_ss(w);
205	// insert element 0 of wVec into position 3 in vec3, don't zero anything
206	m_xyzw = _mm_insert_ps(m_xyzw, wVec, `0x30`);
207	#else
208	// m_xyzw = a, b, c, d
209
210	// w, w, w, w
211	const __m128 wVec = _mm_set_ps1(w: w);
212
213	// c, c, w, w
214	const __m128 cwVec = _mm_shuffle_ps(m_xyzw, wVec, _MM_SHUFFLE(`0`, `0`, `2`, `2`));
215
216	// a, b, c, w
217	m_xyzw = _mm_shuffle_ps(m_xyzw, cwVec, _MM_SHUFFLE(`2`, `0`, `1`, `0`));
218	#endif
219	}
220
221	Q_ALWAYS_INLINE float operator[](int idx) const
222	{
223	Q_DECL_ALIGN(`16`) float vec[`4`];
224	_mm_store_ps(p: vec, a: m_xyzw);
225	return vec[idx];
226	}
227
228	struct DigitWrapper
229	{
230	explicit DigitWrapper(int idx, Vector4D_SSE *vec)
231	: m_vec(vec)
232	, m_idx(idx)
233	{}
234
235	operator float() const
236	{
237	switch (m_idx) {
238	case `0`:
239	return m_vec->x();
240	case `1`:
241	return m_vec->y();
242	case `2`:
243	return m_vec->z();
244	case `3`:
245	return m_vec->w();
246	default:
247	Q_UNREACHABLE_RETURN(`0.0f`);
248	}
249	}
250	void operator =(float value)
251	{
252	switch (m_idx) {
253	case `0`:
254	m_vec->setX(value);
255	break;
256	case `1`:
257	m_vec->setY(value);
258	break;
259	case `2`:
260	m_vec->setZ(value);
261	break;
262	case `3`:
263	m_vec->setW(value);
264	break;
265	default:
266	Q_UNREACHABLE();
267	}
268	}
269
270	private:
271	Vector4D_SSE *m_vec;
272	const int m_idx;
273	};
274
275	Q_ALWAYS_INLINE DigitWrapper operator[](int idx)
276	{
277	return DigitWrapper (idx, this);
278	}
279
280	static Q_ALWAYS_INLINE float dotProduct(Vector4D_SSE a, Vector4D_SSE b)
281	{
282	#if defined(__SSE4_1__)
283	// 0b11111111 = 0xff
284	return _mm_cvtss_f32(_mm_dp_ps(a.m_xyzw, b.m_xyzw, `0xff`));
285	#elif defined(__SSE3__)
286	const __m128 mult = _mm_mul_ps(a.m_xyzw, b.m_xyzw);
287	// a + b, c + d, a + d, c + d
288	const __m128 partialSum = _mm_hadd_ps(mult, mult);
289	// c + d, ......
290	// 0x00000001 =
291	const __m128 partialSumShuffle = _mm_shuffle_ps(partialSum, partialSum, `0x1`);
292	return _mm_cvtss_f32(_mm_hadd_ps(partialSum, partialSumShuffle));
293	#else
294	const __m128 mult = _mm_mul_ps(a: a.m_xyzw, b: b.m_xyzw);
295	// (multX, multY, 0, 0) + (multZ, multW, 0, 0) -> (multX + multZ, multY + multW, 0, 0)
296	// 0b00001110 == 0xe
297	const __m128 shuffled = _mm_shuffle_ps(mult, mult, `0xe`);
298	__m128 result = _mm_add_ps(a: shuffled, b: mult);
299	// (multX + multZ, 0, 0, 0) + (multY + multW, 0, 0, 0);
300	// 0b00000001 == 0x1
301	const __m128 shuffled2 = _mm_shuffle_ps(result, result, `0x1`);
302	result = _mm_add_ps(a: result, b: shuffled2);
303	return _mm_cvtss_f32(a: result);
304	#endif
305	}
306
307	friend class Matrix4x4_SSE;
308	friend class Vector3D_SSE;
309	friend Vector4D_SSE operator(const* Vector4D_SSE &vector, const Matrix4x4_SSE &matrix);
310	friend Vector4D_SSE operator(const* Matrix4x4_SSE &matrix, const Vector4D_SSE &vector);
311
312	friend Q_ALWAYS_INLINE const Vector4D_SSE operator+(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 += v2; }
313	friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 -= v2; }
314	friend Q_ALWAYS_INLINE const Vector4D_SSE operator(float* factor, Vector4D_SSE vector) { return vector *= factor; }
315	friend Q_ALWAYS_INLINE const Vector4D_SSE operator(Vector4D_SSE vector, float* factor) { return vector *= factor; }
316	friend Q_ALWAYS_INLINE const Vector4D_SSE operator(Vector4D_SSE v1, Vector4D_SSE v2) { return* v1 *= v2; }
317	friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE vector)
318	{
319	Vector4D_SSE c(Qt::Uninitialized);
320
321	c.m_xyzw = _mm_xor_ps(a: vector.m_xyzw, b: _mm_set1_ps(w: -`0.0f`));
322
323	return c;
324	}
325
326	friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, float divisor) { return vector /= divisor; }
327	friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, Vector4D_SSE divisor) { return vector /= divisor; }
328
329	friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Vector4D_SSE &v);
330	friend Q_ALWAYS_INLINE bool qFuzzyCompare(const Vector4D_SSE& v1, const Vector4D_SSE& v2)
331	{
332	return ::qFuzzyCompare(p1: v1.x(), p2: v2.x()) &&
333	::qFuzzyCompare(p1: v1.y(), p2: v2.y()) &&
334	::qFuzzyCompare(p1: v1.z(), p2: v2.z()) &&
335	::qFuzzyCompare(p1: v1.w(), p2: v2.w());
336	}
337
338	private:
339	// Q_DECL_ALIGN(16) float m[4];// for SSE support
340	__m128 m_xyzw;
341	};
342
343	} // Qt3DCore
344
345	Q_DECLARE_TYPEINFO(Qt3DCore::Vector4D_SSE, Q_PRIMITIVE_TYPE);
346
347	QT_END_NAMESPACE
348
349	Q_DECLARE_METATYPE(Qt3DCore::Vector4D_SSE)
350
351	#endif // __SSE2__
352
353	#endif // QT3DCORE_VECTOR4D_SSE_P_H
354

Provided by KDAB

Definitions

source code of qt3d/src/core/transforms/vector4d_sse_p.h