PsVecMath.h source code [qtquick3dphysics/src/3rdparty/PhysX/source/foundation/include/PsVecMath.h]

1	//
2	// Redistribution and use in source and binary forms, with or without
3	// modification, are permitted provided that the following conditions
4	// are met:
5	// Redistributions of source code must retain the above copyright*
6	// notice, this list of conditions and the following disclaimer.
7	// Redistributions in binary form must reproduce the above copyright*
8	// notice, this list of conditions and the following disclaimer in the
9	// documentation and/or other materials provided with the distribution.
10	// Neither the name of NVIDIA CORPORATION nor the names of its*
11	// contributors may be used to endorse or promote products derived
12	// from this software without specific prior written permission.
13	//
14	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
15	// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
18	// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	//
26	// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
27	// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
28	// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
29
30	#ifndef PSFOUNDATION_PSVECMATH_H
31	#define PSFOUNDATION_PSVECMATH_H
32
33	#include "Ps.h"
34	#include "PsIntrinsics.h"
35	#include "foundation/PxVec3.h"
36	#include "foundation/PxVec4.h"
37	#include "foundation/PxMat33.h"
38	#include "foundation/PxUnionCast.h"
39
40	// We can opt to use the scalar version of vectorised functions.
41	// This can catch type safety issues and might even work out more optimal on pc.
42	// It will also be useful for benchmarking and testing.
43	// NEVER submit with vector intrinsics deactivated without good reason.
44	// AM: deactivating SIMD for debug win64 just so autobuild will also exercise
45	// non-SIMD path, until a dedicated non-SIMD platform sich as Arm comes online.
46	// TODO: dima: reference all platforms with SIMD support here,
47	// all unknown/experimental cases should better default to NO SIMD.
48
49	// enable/disable SIMD
50	#if !defined(PX_SIMD_DISABLED)
51	#if PX_INTEL_FAMILY && (!defined(__EMSCRIPTEN__) \|\| defined(__SSE2__))
52	#define COMPILE_VECTOR_INTRINSICS 1
53	#elif PX_ANDROID && PX_NEON
54	#define COMPILE_VECTOR_INTRINSICS 1
55	#elif PX_UWP && PX_NEON
56	#define COMPILE_VECTOR_INTRINSICS 1
57	#elif PX_IOS && PX_NEON
58	#define COMPILE_VECTOR_INTRINSICS 1
59	#elif PX_SWITCH
60	#define COMPILE_VECTOR_INTRINSICS 1
61	#else
62	#define COMPILE_VECTOR_INTRINSICS 0
63	#endif
64	#else
65	#define COMPILE_VECTOR_INTRINSICS 0
66	#endif
67
68	#if COMPILE_VECTOR_INTRINSICS && PX_INTEL_FAMILY&&(PX_UNIX_FAMILY \|\| PX_PS4)
69	// only SSE2 compatible platforms should reach this
70	#if PX_EMSCRIPTEN
71	#include <emmintrin.h>
72	#endif
73	#include <xmmintrin.h>
74	#endif
75
76	#if COMPILE_VECTOR_INTRINSICS
77	#include "PsAoS.h"
78	#else
79	#include "PsVecMathAoSScalar.h"
80	#endif
81
82	namespace physx
83	{
84	namespace shdfnd
85	{
86	namespace aos
87	{
88
89	// Basic AoS types are
90	// FloatV - 16-byte aligned representation of float.
91	// Vec3V - 16-byte aligned representation of PxVec3 stored as (x y z 0).
92	// Vec4V - 16-byte aligned representation of vector of 4 floats stored as (x y z w).
93	// BoolV - 16-byte aligned representation of vector of 4 bools stored as (x y z w).
94	// VecU32V - 16-byte aligned representation of 4 unsigned ints stored as (x y z w).
95	// VecI32V - 16-byte aligned representation of 4 signed ints stored as (x y z w).
96	// Mat33V - 16-byte aligned representation of any 3x3 matrix.
97	// Mat34V - 16-byte aligned representation of transformation matrix (rotation in col1,col2,col3 and translation in
98	// col4).
99	// Mat44V - 16-byte aligned representation of any 4x4 matrix.
100
101	//////////////////////////////////////////
102	// Construct a simd type from a scalar type
103	//////////////////////////////////////////
104
105	// FloatV
106	//(f,f,f,f)
107	PX_FORCE_INLINE FloatV FLoad(const PxF32 f);
108
109	// Vec3V
110	//(f,f,f,0)
111	PX_FORCE_INLINE Vec3V V3Load(const PxF32 f);
112	//(f.x,f.y,f.z,0)
113	PX_FORCE_INLINE Vec3V V3LoadU(const PxVec3& f);
114	//(f.x,f.y,f.z,0), f must be 16-byte aligned
115	PX_FORCE_INLINE Vec3V V3LoadA(const PxVec3& f);
116	//(f.x,f.y,f.z,w_undefined), f must be 16-byte aligned
117	PX_FORCE_INLINE Vec3V V3LoadUnsafeA(const PxVec3& f);
118	//(f.x,f.y,f.z,0)
119	PX_FORCE_INLINE Vec3V V3LoadU(const PxF32* f);
120	//(f.x,f.y,f.z,0), f must be 16-byte aligned
121	PX_FORCE_INLINE Vec3V V3LoadA(const PxF32* f);
122
123	// Vec4V
124	//(f,f,f,f)
125	PX_FORCE_INLINE Vec4V V4Load(const PxF32 f);
126	//(f[0],f[1],f[2],f[3])
127	PX_FORCE_INLINE Vec4V V4LoadU(const PxF32* const f);
128	//(f[0],f[1],f[2],f[3]), f must be 16-byte aligned
129	PX_FORCE_INLINE Vec4V V4LoadA(const PxF32* const f);
130	//(x,y,z,w)
131	PX_FORCE_INLINE Vec4V V4LoadXYZW(const PxF32& x, const PxF32& y, const PxF32& z, const PxF32& w);
132
133	// BoolV
134	//(f,f,f,f)
135	PX_FORCE_INLINE BoolV BLoad(const bool f);
136	//(f[0],f[1],f[2],f[3])
137	PX_FORCE_INLINE BoolV BLoad(const bool* const f);
138
139	// VecU32V
140	//(f,f,f,f)
141	PX_FORCE_INLINE VecU32V U4Load(const PxU32 f);
142	//(f[0],f[1],f[2],f[3])
143	PX_FORCE_INLINE VecU32V U4LoadU(const PxU32* f);
144	//(f[0],f[1],f[2],f[3]), f must be 16-byte aligned
145	PX_FORCE_INLINE VecU32V U4LoadA(const PxU32* f);
146	//((U32)x, (U32)y, (U32)z, (U32)w)
147	PX_FORCE_INLINE VecU32V U4LoadXYZW(PxU32 x, PxU32 y, PxU32 z, PxU32 w);
148
149	// VecI32V
150	//(i,i,i,i)
151	PX_FORCE_INLINE VecI32V I4Load(const PxI32 i);
152	//(i,i,i,i)
153	PX_FORCE_INLINE VecI32V I4LoadU(const PxI32* i);
154	//(i,i,i,i)
155	PX_FORCE_INLINE VecI32V I4LoadA(const PxI32* i);
156
157	// QuatV
158	//(x = v[0], y=v[1], z=v[2], w=v3[3]) and array don't need to aligned
159	PX_FORCE_INLINE QuatV QuatVLoadU(const PxF32* v);
160	//(x = v[0], y=v[1], z=v[2], w=v3[3]) and array need to aligned, fast load
161	PX_FORCE_INLINE QuatV QuatVLoadA(const PxF32* v);
162	//(x, y, z, w)
163	PX_FORCE_INLINE QuatV QuatVLoadXYZW(const PxF32 x, const PxF32 y, const PxF32 z, const PxF32 w);
164
165	// not added to public api
166	Vec4V Vec4V_From_PxVec3_WUndefined(const PxVec3& v);
167
168	///////////////////////////////////////////////////
169	// Construct a simd type from a different simd type
170	///////////////////////////////////////////////////
171
172	// Vec3V
173	//(v.x,v.y,v.z,0)
174	PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V(Vec4V v);
175	//(v.x,v.y,v.z,undefined) - be very careful with w!=0 because many functions require w==0 for correct operation eg V3Dot, V3Length, V3Cross etc etc.
176	PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V_WUndefined(const Vec4V v);
177
178	// Vec4V
179	//(f.x,f.y,f.z,f.w)
180	PX_FORCE_INLINE Vec4V Vec4V_From_Vec3V(Vec3V f);
181	//((PxF32)f.x, (PxF32)f.y, (PxF32)f.z, (PxF32)f.w)
182	PX_FORCE_INLINE Vec4V Vec4V_From_VecU32V(VecU32V a);
183	//((PxF32)f.x, (PxF32)f.y, (PxF32)f.z, (PxF32)f.w)
184	PX_FORCE_INLINE Vec4V Vec4V_From_VecI32V(VecI32V a);
185	//((reinterpret_cast<PxF32>(&f.x), (reinterpret_cast<PxF32>(&f.y), (reinterpret_cast<PxF32>(&f.z),
186	//(reinterpret_cast<PxF32>(&f.w))*
187	PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecU32V(VecU32V a);
188	//((reinterpret_cast<PxF32>(&f.x), (reinterpret_cast<PxF32>(&f.y), (reinterpret_cast<PxF32>(&f.z),
189	//(reinterpret_cast<PxF32>(&f.w))*
190	PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecI32V(VecI32V a);
191
192	// VecU32V
193	//((reinterpret_cast<PxU32>(&f.x), (reinterpret_cast<PxU32>(&f.y), (reinterpret_cast<PxU32>(&f.z),
194	//(reinterpret_cast<PxU32>(&f.w))*
195	PX_FORCE_INLINE VecU32V VecU32V_ReinterpretFrom_Vec4V(Vec4V a);
196	//(b[0], b[1], b[2], b[3])
197	PX_FORCE_INLINE VecU32V VecU32V_From_BoolV(const BoolVArg b);
198
199	// VecI32V
200	//((reinterpret_cast<PxI32>(&f.x), (reinterpret_cast<PxI32>(&f.y), (reinterpret_cast<PxI32>(&f.z),
201	//(reinterpret_cast<PxI32>(&f.w))*
202	PX_FORCE_INLINE VecI32V VecI32V_ReinterpretFrom_Vec4V(Vec4V a);
203	//((I32)a.x, (I32)a.y, (I32)a.z, (I32)a.w)
204	PX_FORCE_INLINE VecI32V VecI32V_From_Vec4V(Vec4V a);
205	//((I32)b.x, (I32)b.y, (I32)b.z, (I32)b.w)
206	PX_FORCE_INLINE VecI32V VecI32V_From_BoolV(const BoolVArg b);
207
208	///////////////////////////////////////////////////
209	// Convert from a simd type back to a scalar type
210	///////////////////////////////////////////////////
211
212	// FloatV
213	// a.x
214	PX_FORCE_INLINE void FStore(const FloatV a, PxF32* PX_RESTRICT f);
215
216	// Vec3V
217	//(a.x,a.y,a.z)
218	PX_FORCE_INLINE void V3StoreA(const Vec3V a, PxVec3& f);
219	//(a.x,a.y,a.z)
220	PX_FORCE_INLINE void V3StoreU(const Vec3V a, PxVec3& f);
221
222	// Vec4V
223	PX_FORCE_INLINE void V4StoreA(const Vec4V a, PxF32* f);
224	PX_FORCE_INLINE void V4StoreU(const Vec4V a, PxF32* f);
225
226	// BoolV
227	PX_FORCE_INLINE void BStoreA(const BoolV b, PxU32* f);
228
229	// VecU32V
230	PX_FORCE_INLINE void U4StoreA(const VecU32V uv, PxU32* u);
231
232	// VecI32V
233	PX_FORCE_INLINE void I4StoreA(const VecI32V iv, PxI32* i);
234
235	//////////////////////////////////////////////////////////////////
236	// Test that simd types have elements in the floating point range
237	//////////////////////////////////////////////////////////////////
238
239	// check for each component is valid ie in floating point range
240	PX_FORCE_INLINE bool isFiniteFloatV(const FloatV a);
241	// check for each component is valid ie in floating point range
242	PX_FORCE_INLINE bool isFiniteVec3V(const Vec3V a);
243	// check for each component is valid ie in floating point range
244	PX_FORCE_INLINE bool isFiniteVec4V(const Vec4V a);
245
246	// Check that w-component is zero.
247	PX_FORCE_INLINE bool isValidVec3V(const Vec3V a);
248
249	//////////////////////////////////////////////////////////////////
250	// Tests that all elements of two 16-byte types are completely equivalent.
251	// Use these tests for unit testing and asserts only.
252	//////////////////////////////////////////////////////////////////
253
254	namespace _VecMathTests
255	{
256	PX_FORCE_INLINE Vec3V getInvalidVec3V();
257	PX_FORCE_INLINE bool allElementsEqualFloatV(const FloatV a, const FloatV b);
258	PX_FORCE_INLINE bool allElementsEqualVec3V(const Vec3V a, const Vec3V b);
259	PX_FORCE_INLINE bool allElementsEqualVec4V(const Vec4V a, const Vec4V b);
260	PX_FORCE_INLINE bool allElementsEqualBoolV(const BoolV a, const BoolV b);
261	PX_FORCE_INLINE bool allElementsEqualVecU32V(const VecU32V a, const VecU32V b);
262	PX_FORCE_INLINE bool allElementsEqualVecI32V(const VecI32V a, const VecI32V b);
263
264	PX_FORCE_INLINE bool allElementsEqualMat33V(const Mat33V& a, const Mat33V& b)
265	{
266	return (allElementsEqualVec3V(a: a.col0, b: b.col0) && allElementsEqualVec3V(a: a.col1, b: b.col1) &&
267	allElementsEqualVec3V(a: a.col2, b: b.col2));
268	}
269	PX_FORCE_INLINE bool allElementsEqualMat34V(const Mat34V& a, const Mat34V& b)
270	{
271	return (allElementsEqualVec3V(a: a.col0, b: b.col0) && allElementsEqualVec3V(a: a.col1, b: b.col1) &&
272	allElementsEqualVec3V(a: a.col2, b: b.col2) && allElementsEqualVec3V(a: a.col3, b: b.col3));
273	}
274	PX_FORCE_INLINE bool allElementsEqualMat44V(const Mat44V& a, const Mat44V& b)
275	{
276	return (allElementsEqualVec4V(a: a.col0, b: b.col0) && allElementsEqualVec4V(a: a.col1, b: b.col1) &&
277	allElementsEqualVec4V(a: a.col2, b: b.col2) && allElementsEqualVec4V(a: a.col3, b: b.col3));
278	}
279
280	PX_FORCE_INLINE bool allElementsNearEqualFloatV(const FloatV a, const FloatV b);
281	PX_FORCE_INLINE bool allElementsNearEqualVec3V(const Vec3V a, const Vec3V b);
282	PX_FORCE_INLINE bool allElementsNearEqualVec4V(const Vec4V a, const Vec4V b);
283	PX_FORCE_INLINE bool allElementsNearEqualMat33V(const Mat33V& a, const Mat33V& b)
284	{
285	return (allElementsNearEqualVec3V(a: a.col0, b: b.col0) && allElementsNearEqualVec3V(a: a.col1, b: b.col1) &&
286	allElementsNearEqualVec3V(a: a.col2, b: b.col2));
287	}
288	PX_FORCE_INLINE bool allElementsNearEqualMat34V(const Mat34V& a, const Mat34V& b)
289	{
290	return (allElementsNearEqualVec3V(a: a.col0, b: b.col0) && allElementsNearEqualVec3V(a: a.col1, b: b.col1) &&
291	allElementsNearEqualVec3V(a: a.col2, b: b.col2) && allElementsNearEqualVec3V(a: a.col3, b: b.col3));
292	}
293	PX_FORCE_INLINE bool allElementsNearEqualMat44V(const Mat44V& a, const Mat44V& b)
294	{
295	return (allElementsNearEqualVec4V(a: a.col0, b: b.col0) && allElementsNearEqualVec4V(a: a.col1, b: b.col1) &&
296	allElementsNearEqualVec4V(a: a.col2, b: b.col2) && allElementsNearEqualVec4V(a: a.col3, b: b.col3));
297	}
298	}
299
300	//////////////////////////////////////////////////////////////////
301	// Math operations on FloatV
302	//////////////////////////////////////////////////////////////////
303
304	//(0,0,0,0)
305	PX_FORCE_INLINE FloatV FZero();
306	//(1,1,1,1)
307	PX_FORCE_INLINE FloatV FOne();
308	//(0.5,0.5,0.5,0.5)
309	PX_FORCE_INLINE FloatV FHalf();
310	//(PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL)
311	PX_FORCE_INLINE FloatV FEps();
312	//(PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL PX_MAX_REAL)
313	PX_FORCE_INLINE FloatV FMax();
314	//(-PX_MAX_REAL, -PX_MAX_REAL, -PX_MAX_REAL -PX_MAX_REAL)
315	PX_FORCE_INLINE FloatV FNegMax();
316	//(1e-6f, 1e-6f, 1e-6f, 1e-6f)
317	PX_FORCE_INLINE FloatV FEps6();
318	//((PxF32)&1, (PxF32)&1, (PxF32)&1, (PxF32)&1)
319
320	//-f (per component)
321	PX_FORCE_INLINE FloatV FNeg(const FloatV f);
322	// a+b (per component)
323	PX_FORCE_INLINE FloatV FAdd(const FloatV a, const FloatV b);
324	// a-b (per component)
325	PX_FORCE_INLINE FloatV FSub(const FloatV a, const FloatV b);
326	// ab (per component)*
327	PX_FORCE_INLINE FloatV FMul(const FloatV a, const FloatV b);
328	// a/b (per component)
329	PX_FORCE_INLINE FloatV FDiv(const FloatV a, const FloatV b);
330	// a/b (per component)
331	PX_FORCE_INLINE FloatV FDivFast(const FloatV a, const FloatV b);
332	// 1.0f/a
333	PX_FORCE_INLINE FloatV FRecip(const FloatV a);
334	// 1.0f/a
335	PX_FORCE_INLINE FloatV FRecipFast(const FloatV a);
336	// 1.0f/sqrt(a)
337	PX_FORCE_INLINE FloatV FRsqrt(const FloatV a);
338	// 1.0f/sqrt(a)
339	PX_FORCE_INLINE FloatV FRsqrtFast(const FloatV a);
340	// sqrt(a)
341	PX_FORCE_INLINE FloatV FSqrt(const FloatV a);
342	// ab+c*
343	PX_FORCE_INLINE FloatV FScaleAdd(const FloatV a, const FloatV b, const FloatV c);
344	// c-ab*
345	PX_FORCE_INLINE FloatV FNegScaleSub(const FloatV a, const FloatV b, const FloatV c);
346	// fabs(a)
347	PX_FORCE_INLINE FloatV FAbs(const FloatV a);
348	// c ? a : b (per component)
349	PX_FORCE_INLINE FloatV FSel(const BoolV c, const FloatV a, const FloatV b);
350	// a>b (per component)
351	PX_FORCE_INLINE BoolV FIsGrtr(const FloatV a, const FloatV b);
352	// a>=b (per component)
353	PX_FORCE_INLINE BoolV FIsGrtrOrEq(const FloatV a, const FloatV b);
354	// a==b (per component)
355	PX_FORCE_INLINE BoolV FIsEq(const FloatV a, const FloatV b);
356	// Max(a,b) (per component)
357	PX_FORCE_INLINE FloatV FMax(const FloatV a, const FloatV b);
358	// Min(a,b) (per component)
359	PX_FORCE_INLINE FloatV FMin(const FloatV a, const FloatV b);
360	// Clamp(a,b) (per component)
361	PX_FORCE_INLINE FloatV FClamp(const FloatV a, const FloatV minV, const FloatV maxV);
362
363	// a.x>b.x
364	PX_FORCE_INLINE PxU32 FAllGrtr(const FloatV a, const FloatV b);
365	// a.x>=b.x
366	PX_FORCE_INLINE PxU32 FAllGrtrOrEq(const FloatV a, const FloatV b);
367	// a.x==b.x
368	PX_FORCE_INLINE PxU32 FAllEq(const FloatV a, const FloatV b);
369	// a<min \|\| a>max
370	PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV min, const FloatV max);
371	// a>=min && a<=max
372	PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV min, const FloatV max);
373	// a<-bounds \|\| a>bounds
374	PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV bounds);
375	// a>=-bounds && a<=bounds
376	PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV bounds);
377
378	// round float a to the near int
379	PX_FORCE_INLINE FloatV FRound(const FloatV a);
380	// calculate the sin of float a
381	PX_FORCE_INLINE FloatV FSin(const FloatV a);
382	// calculate the cos of float b
383	PX_FORCE_INLINE FloatV FCos(const FloatV a);
384
385	//////////////////////////////////////////////////////////////////
386	// Math operations on Vec3V
387	//////////////////////////////////////////////////////////////////
388
389	//(f,f,f,f)
390	PX_FORCE_INLINE Vec3V V3Splat(const FloatV f);
391
392	//(x,y,z)
393	PX_FORCE_INLINE Vec3V V3Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z);
394
395	//(1,0,0,0)
396	PX_FORCE_INLINE Vec3V V3UnitX();
397	//(0,1,0,0)
398	PX_FORCE_INLINE Vec3V V3UnitY();
399	//(0,0,1,0)
400	PX_FORCE_INLINE Vec3V V3UnitZ();
401
402	//(f.x,f.x,f.x,f.x)
403	PX_FORCE_INLINE FloatV V3GetX(const Vec3V f);
404	//(f.y,f.y,f.y,f.y)
405	PX_FORCE_INLINE FloatV V3GetY(const Vec3V f);
406	//(f.z,f.z,f.z,f.z)
407	PX_FORCE_INLINE FloatV V3GetZ(const Vec3V f);
408
409	//(f,v.y,v.z,v.w)
410	PX_FORCE_INLINE Vec3V V3SetX(const Vec3V v, const FloatV f);
411	//(v.x,f,v.z,v.w)
412	PX_FORCE_INLINE Vec3V V3SetY(const Vec3V v, const FloatV f);
413	//(v.x,v.y,f,v.w)
414	PX_FORCE_INLINE Vec3V V3SetZ(const Vec3V v, const FloatV f);
415
416	// v.x=f
417	PX_FORCE_INLINE void V3WriteX(Vec3V& v, const PxF32 f);
418	// v.y=f
419	PX_FORCE_INLINE void V3WriteY(Vec3V& v, const PxF32 f);
420	// v.z=f
421	PX_FORCE_INLINE void V3WriteZ(Vec3V& v, const PxF32 f);
422	// v.x=f.x, v.y=f.y, v.z=f.z
423	PX_FORCE_INLINE void V3WriteXYZ(Vec3V& v, const PxVec3& f);
424	// return v.x
425	PX_FORCE_INLINE PxF32 V3ReadX(const Vec3V& v);
426	// return v.y
427	PX_FORCE_INLINE PxF32 V3ReadY(const Vec3V& v);
428	// return v.y
429	PX_FORCE_INLINE PxF32 V3ReadZ(const Vec3V& v);
430	// return (v.x,v.y,v.z)
431	PX_FORCE_INLINE const PxVec3& V3ReadXYZ(const Vec3V& v);
432
433	//(a.x, b.x, c.x)
434	PX_FORCE_INLINE Vec3V V3ColX(const Vec3V a, const Vec3V b, const Vec3V c);
435	//(a.y, b.y, c.y)
436	PX_FORCE_INLINE Vec3V V3ColY(const Vec3V a, const Vec3V b, const Vec3V c);
437	//(a.z, b.z, c.z)
438	PX_FORCE_INLINE Vec3V V3ColZ(const Vec3V a, const Vec3V b, const Vec3V c);
439
440	//(0,0,0,0)
441	PX_FORCE_INLINE Vec3V V3Zero();
442	//(1,1,1,1)
443	PX_FORCE_INLINE Vec3V V3One();
444	//(PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL)
445	PX_FORCE_INLINE Vec3V V3Eps();
446	//-c (per component)
447	PX_FORCE_INLINE Vec3V V3Neg(const Vec3V c);
448	// a+b (per component)
449	PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const Vec3V b);
450	// a-b (per component)
451	PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const Vec3V b);
452	// ab (per component)*
453	PX_FORCE_INLINE Vec3V V3Scale(const Vec3V a, const FloatV b);
454	// ab (per component)*
455	PX_FORCE_INLINE Vec3V V3Mul(const Vec3V a, const Vec3V b);
456	// a/b (per component)
457	PX_FORCE_INLINE Vec3V V3ScaleInv(const Vec3V a, const FloatV b);
458	// a/b (per component)
459	PX_FORCE_INLINE Vec3V V3Div(const Vec3V a, const Vec3V b);
460	// a/b (per component)
461	PX_FORCE_INLINE Vec3V V3ScaleInvFast(const Vec3V a, const FloatV b);
462	// a/b (per component)
463	PX_FORCE_INLINE Vec3V V3DivFast(const Vec3V a, const Vec3V b);
464	// 1.0f/a
465	PX_FORCE_INLINE Vec3V V3Recip(const Vec3V a);
466	// 1.0f/a
467	PX_FORCE_INLINE Vec3V V3RecipFast(const Vec3V a);
468	// 1.0f/sqrt(a)
469	PX_FORCE_INLINE Vec3V V3Rsqrt(const Vec3V a);
470	// 1.0f/sqrt(a)
471	PX_FORCE_INLINE Vec3V V3RsqrtFast(const Vec3V a);
472	// ab+c*
473	PX_FORCE_INLINE Vec3V V3ScaleAdd(const Vec3V a, const FloatV b, const Vec3V c);
474	// c-ab*
475	PX_FORCE_INLINE Vec3V V3NegScaleSub(const Vec3V a, const FloatV b, const Vec3V c);
476	// ab+c*
477	PX_FORCE_INLINE Vec3V V3MulAdd(const Vec3V a, const Vec3V b, const Vec3V c);
478	// c-ab*
479	PX_FORCE_INLINE Vec3V V3NegMulSub(const Vec3V a, const Vec3V b, const Vec3V c);
480	// fabs(a)
481	PX_FORCE_INLINE Vec3V V3Abs(const Vec3V a);
482
483	// a.b
484	// Note: a.w and b.w must have value zero
485	PX_FORCE_INLINE FloatV V3Dot(const Vec3V a, const Vec3V b);
486	// aXb
487	// Note: a.w and b.w must have value zero
488	PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const Vec3V b);
489	// \|a.a\|^1/2
490	// Note: a.w must have value zero
491	PX_FORCE_INLINE FloatV V3Length(const Vec3V a);
492	// a.a
493	// Note: a.w must have value zero
494	PX_FORCE_INLINE FloatV V3LengthSq(const Vec3V a);
495	// a\|a.a\|^-1/2*
496	// Note: a.w must have value zero
497	PX_FORCE_INLINE Vec3V V3Normalize(const Vec3V a);
498	// a.a>0 ? a\|a.a\|^-1/2 : (0,0,0,0)*
499	// Note: a.w must have value zero
500	PX_FORCE_INLINE FloatV V3Length(const Vec3V a);
501	// a.a>0 ? a\|a.a\|^-1/2 : unsafeReturnValue*
502	// Note: a.w must have value zero
503	PX_FORCE_INLINE Vec3V V3NormalizeSafe(const Vec3V a, const Vec3V unsafeReturnValue);
504	// a.x + a.y + a.z
505	// Note: a.w must have value zero
506	PX_FORCE_INLINE FloatV V3SumElems(const Vec3V a);
507
508	// c ? a : b (per component)
509	PX_FORCE_INLINE Vec3V V3Sel(const BoolV c, const Vec3V a, const Vec3V b);
510	// a>b (per component)
511	PX_FORCE_INLINE BoolV V3IsGrtr(const Vec3V a, const Vec3V b);
512	// a>=b (per component)
513	PX_FORCE_INLINE BoolV V3IsGrtrOrEq(const Vec3V a, const Vec3V b);
514	// a==b (per component)
515	PX_FORCE_INLINE BoolV V3IsEq(const Vec3V a, const Vec3V b);
516	// Max(a,b) (per component)
517	PX_FORCE_INLINE Vec3V V3Max(const Vec3V a, const Vec3V b);
518	// Min(a,b) (per component)
519	PX_FORCE_INLINE Vec3V V3Min(const Vec3V a, const Vec3V b);
520
521	// Extract the maximum value from a
522	// Note: a.w must have value zero
523	PX_FORCE_INLINE FloatV V3ExtractMax(const Vec3V a);
524
525	// Extract the minimum value from a
526	// Note: a.w must have value zero
527	PX_FORCE_INLINE FloatV V3ExtractMin(const Vec3V a);
528
529	// Clamp(a,b) (per component)
530	PX_FORCE_INLINE Vec3V V3Clamp(const Vec3V a, const Vec3V minV, const Vec3V maxV);
531
532	// Extract the sign for each component
533	PX_FORCE_INLINE Vec3V V3Sign(const Vec3V a);
534
535	// Test all components.
536	// (a.x>b.x && a.y>b.y && a.z>b.z)
537	// Note: a.w and b.w must have value zero
538	PX_FORCE_INLINE PxU32 V3AllGrtr(const Vec3V a, const Vec3V b);
539	// (a.x>=b.x && a.y>=b.y && a.z>=b.z)
540	// Note: a.w and b.w must have value zero
541	PX_FORCE_INLINE PxU32 V3AllGrtrOrEq(const Vec3V a, const Vec3V b);
542	// (a.x==b.x && a.y==b.y && a.z==b.z)
543	// Note: a.w and b.w must have value zero
544	PX_FORCE_INLINE PxU32 V3AllEq(const Vec3V a, const Vec3V b);
545	// a.x<min.x \|\| a.y<min.y \|\| a.z<min.z \|\| a.x>max.x \|\| a.y>max.y \|\| a.z>max.z
546	// Note: a.w and min.w and max.w must have value zero
547	PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V min, const Vec3V max);
548	// a.x>=min.x && a.y>=min.y && a.z>=min.z && a.x<=max.x && a.y<=max.y && a.z<=max.z
549	// Note: a.w and min.w and max.w must have value zero
550	PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V min, const Vec3V max);
551	// a.x<-bounds.x \|\| a.y<=-bounds.y \|\| a.z<bounds.z \|\| a.x>bounds.x \|\| a.y>bounds.y \|\| a.z>bounds.z
552	// Note: a.w and bounds.w must have value zero
553	PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V bounds);
554	// a.x>=-bounds.x && a.y>=-bounds.y && a.z>=-bounds.z && a.x<=bounds.x && a.y<=bounds.y && a.z<=bounds.z
555	// Note: a.w and bounds.w must have value zero
556	PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V bounds);
557
558	//(floor(a.x + 0.5f), floor(a.y + 0.5f), floor(a.z + 0.5f))
559	PX_FORCE_INLINE Vec3V V3Round(const Vec3V a);
560
561	//(sinf(a.x), sinf(a.y), sinf(a.z))
562	PX_FORCE_INLINE Vec3V V3Sin(const Vec3V a);
563	//(cosf(a.x), cosf(a.y), cosf(a.z))
564	PX_FORCE_INLINE Vec3V V3Cos(const Vec3V a);
565
566	//(a.y,a.z,a.z)
567	PX_FORCE_INLINE Vec3V V3PermYZZ(const Vec3V a);
568	//(a.x,a.y,a.x)
569	PX_FORCE_INLINE Vec3V V3PermXYX(const Vec3V a);
570	//(a.y,a.z,a.x)
571	PX_FORCE_INLINE Vec3V V3PermYZX(const Vec3V a);
572	//(a.z, a.x, a.y)
573	PX_FORCE_INLINE Vec3V V3PermZXY(const Vec3V a);
574	//(a.z,a.z,a.y)
575	PX_FORCE_INLINE Vec3V V3PermZZY(const Vec3V a);
576	//(a.y,a.x,a.x)
577	PX_FORCE_INLINE Vec3V V3PermYXX(const Vec3V a);
578	//(0, v1.z, v0.y)
579	PX_FORCE_INLINE Vec3V V3Perm_Zero_1Z_0Y(const Vec3V v0, const Vec3V v1);
580	//(v0.z, 0, v1.x)
581	PX_FORCE_INLINE Vec3V V3Perm_0Z_Zero_1X(const Vec3V v0, const Vec3V v1);
582	//(v1.y, v0.x, 0)
583	PX_FORCE_INLINE Vec3V V3Perm_1Y_0X_Zero(const Vec3V v0, const Vec3V v1);
584
585	// Transpose 3 Vec3Vs inplace. Sets the w component to zero
586	// [ x0, y0, z0, w0] [ x1, y1, z1, w1] [ x2, y2, z2, w2] -> [x0 x1 x2 0] [y0 y1 y2 0] [z0 z1 z2 0]
587	PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2);
588
589	//////////////////////////////////////////////////////////////////
590	// Math operations on Vec4V
591	//////////////////////////////////////////////////////////////////
592
593	//(f,f,f,f)
594	PX_FORCE_INLINE Vec4V V4Splat(const FloatV f);
595
596	//(f[0],f[1],f[2],f[3])
597	PX_FORCE_INLINE Vec4V V4Merge(const FloatV* const f);
598	//(x,y,z,w)
599	PX_FORCE_INLINE Vec4V V4Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w);
600	//(x.w, y.w, z.w, w.w)
601	PX_FORCE_INLINE Vec4V V4MergeW(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w);
602	//(x.z, y.z, z.z, w.z)
603	PX_FORCE_INLINE Vec4V V4MergeZ(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w);
604	//(x.y, y.y, z.y, w.y)
605	PX_FORCE_INLINE Vec4V V4MergeY(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w);
606	//(x.x, y.x, z.x, w.x)
607	PX_FORCE_INLINE Vec4V V4MergeX(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w);
608
609	//(a.x, b.x, a.y, b.y)
610	PX_FORCE_INLINE Vec4V V4UnpackXY(const Vec4VArg a, const Vec4VArg b);
611	//(a.z, b.z, a.w, b.w)
612	PX_FORCE_INLINE Vec4V V4UnpackZW(const Vec4VArg a, const Vec4VArg b);
613
614	//(1,0,0,0)
615	PX_FORCE_INLINE Vec4V V4UnitW();
616	//(0,1,0,0)
617	PX_FORCE_INLINE Vec4V V4UnitY();
618	//(0,0,1,0)
619	PX_FORCE_INLINE Vec4V V4UnitZ();
620	//(0,0,0,1)
621	PX_FORCE_INLINE Vec4V V4UnitW();
622
623	//(f.x,f.x,f.x,f.x)
624	PX_FORCE_INLINE FloatV V4GetX(const Vec4V f);
625	//(f.y,f.y,f.y,f.y)
626	PX_FORCE_INLINE FloatV V4GetY(const Vec4V f);
627	//(f.z,f.z,f.z,f.z)
628	PX_FORCE_INLINE FloatV V4GetZ(const Vec4V f);
629	//(f.w,f.w,f.w,f.w)
630	PX_FORCE_INLINE FloatV V4GetW(const Vec4V f);
631
632	//(f,v.y,v.z,v.w)
633	PX_FORCE_INLINE Vec4V V4SetX(const Vec4V v, const FloatV f);
634	//(v.x,f,v.z,v.w)
635	PX_FORCE_INLINE Vec4V V4SetY(const Vec4V v, const FloatV f);
636	//(v.x,v.y,f,v.w)
637	PX_FORCE_INLINE Vec4V V4SetZ(const Vec4V v, const FloatV f);
638	//(v.x,v.y,v.z,f)
639	PX_FORCE_INLINE Vec4V V4SetW(const Vec4V v, const FloatV f);
640
641	//(v.x,v.y,v.z,0)
642	PX_FORCE_INLINE Vec4V V4ClearW(const Vec4V v);
643
644	//(a[elementIndex], a[elementIndex], a[elementIndex], a[elementIndex])
645	template <int elementIndex>
646	PX_FORCE_INLINE Vec4V V4SplatElement(Vec4V a);
647
648	// v.x=f
649	PX_FORCE_INLINE void V4WriteX(Vec4V& v, const PxF32 f);
650	// v.y=f
651	PX_FORCE_INLINE void V4WriteY(Vec4V& v, const PxF32 f);
652	// v.z=f
653	PX_FORCE_INLINE void V4WriteZ(Vec4V& v, const PxF32 f);
654	// v.w=f
655	PX_FORCE_INLINE void V4WriteW(Vec4V& v, const PxF32 f);
656	// v.x=f.x, v.y=f.y, v.z=f.z
657	PX_FORCE_INLINE void V4WriteXYZ(Vec4V& v, const PxVec3& f);
658	// return v.x
659	PX_FORCE_INLINE PxF32 V4ReadX(const Vec4V& v);
660	// return v.y
661	PX_FORCE_INLINE PxF32 V4ReadY(const Vec4V& v);
662	// return v.z
663	PX_FORCE_INLINE PxF32 V4ReadZ(const Vec4V& v);
664	// return v.w
665	PX_FORCE_INLINE PxF32 V4ReadW(const Vec4V& v);
666	// return (v.x,v.y,v.z)
667	PX_FORCE_INLINE const PxVec3& V4ReadXYZ(const Vec4V& v);
668
669	//(0,0,0,0)
670	PX_FORCE_INLINE Vec4V V4Zero();
671	//(1,1,1,1)
672	PX_FORCE_INLINE Vec4V V4One();
673	//(PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL)
674	PX_FORCE_INLINE Vec4V V4Eps();
675
676	//-c (per component)
677	PX_FORCE_INLINE Vec4V V4Neg(const Vec4V c);
678	// a+b (per component)
679	PX_FORCE_INLINE Vec4V V4Add(const Vec4V a, const Vec4V b);
680	// a-b (per component)
681	PX_FORCE_INLINE Vec4V V4Sub(const Vec4V a, const Vec4V b);
682	// ab (per component)*
683	PX_FORCE_INLINE Vec4V V4Scale(const Vec4V a, const FloatV b);
684	// ab (per component)*
685	PX_FORCE_INLINE Vec4V V4Mul(const Vec4V a, const Vec4V b);
686	// a/b (per component)
687	PX_FORCE_INLINE Vec4V V4ScaleInv(const Vec4V a, const FloatV b);
688	// a/b (per component)
689	PX_FORCE_INLINE Vec4V V4Div(const Vec4V a, const Vec4V b);
690	// a/b (per component)
691	PX_FORCE_INLINE Vec4V V4ScaleInvFast(const Vec4V a, const FloatV b);
692	// a/b (per component)
693	PX_FORCE_INLINE Vec4V V4DivFast(const Vec4V a, const Vec4V b);
694	// 1.0f/a
695	PX_FORCE_INLINE Vec4V V4Recip(const Vec4V a);
696	// 1.0f/a
697	PX_FORCE_INLINE Vec4V V4RecipFast(const Vec4V a);
698	// 1.0f/sqrt(a)
699	PX_FORCE_INLINE Vec4V V4Rsqrt(const Vec4V a);
700	// 1.0f/sqrt(a)
701	PX_FORCE_INLINE Vec4V V4RsqrtFast(const Vec4V a);
702	// ab+c*
703	PX_FORCE_INLINE Vec4V V4ScaleAdd(const Vec4V a, const FloatV b, const Vec4V c);
704	// c-ab*
705	PX_FORCE_INLINE Vec4V V4NegScaleSub(const Vec4V a, const FloatV b, const Vec4V c);
706	// ab+c*
707	PX_FORCE_INLINE Vec4V V4MulAdd(const Vec4V a, const Vec4V b, const Vec4V c);
708	// c-ab*
709	PX_FORCE_INLINE Vec4V V4NegMulSub(const Vec4V a, const Vec4V b, const Vec4V c);
710
711	// fabs(a)
712	PX_FORCE_INLINE Vec4V V4Abs(const Vec4V a);
713	// bitwise a & ~b
714	PX_FORCE_INLINE Vec4V V4Andc(const Vec4V a, const VecU32V b);
715
716	// a.b (W is taken into account)
717	PX_FORCE_INLINE FloatV V4Dot(const Vec4V a, const Vec4V b);
718	// a.b (same computation as V3Dot. W is ignored in input)
719	PX_FORCE_INLINE FloatV V4Dot3(const Vec4V a, const Vec4V b);
720	// aXb (same computation as V3Cross. W is ignored in input and undefined in output)
721	PX_FORCE_INLINE Vec4V V4Cross(const Vec4V a, const Vec4V b);
722
723	//\|a.a\|^1/2
724	PX_FORCE_INLINE FloatV V4Length(const Vec4V a);
725	// a.a
726	PX_FORCE_INLINE FloatV V4LengthSq(const Vec4V a);
727
728	// a\|a.a\|^-1/2*
729	PX_FORCE_INLINE Vec4V V4Normalize(const Vec4V a);
730	// a.a>0 ? a\|a.a\|^-1/2 : unsafeReturnValue*
731	PX_FORCE_INLINE Vec4V V4NormalizeSafe(const Vec4V a, const Vec4V unsafeReturnValue);
732	// a\|a.a\|^-1/2*
733	PX_FORCE_INLINE Vec4V V4NormalizeFast(const Vec4V a);
734
735	// c ? a : b (per component)
736	PX_FORCE_INLINE Vec4V V4Sel(const BoolV c, const Vec4V a, const Vec4V b);
737	// a>b (per component)
738	PX_FORCE_INLINE BoolV V4IsGrtr(const Vec4V a, const Vec4V b);
739	// a>=b (per component)
740	PX_FORCE_INLINE BoolV V4IsGrtrOrEq(const Vec4V a, const Vec4V b);
741	// a==b (per component)
742	PX_FORCE_INLINE BoolV V4IsEq(const Vec4V a, const Vec4V b);
743	// Max(a,b) (per component)
744	PX_FORCE_INLINE Vec4V V4Max(const Vec4V a, const Vec4V b);
745	// Min(a,b) (per component)
746	PX_FORCE_INLINE Vec4V V4Min(const Vec4V a, const Vec4V b);
747	// Get the maximum component from a
748	PX_FORCE_INLINE FloatV V4ExtractMax(const Vec4V a);
749	// Get the minimum component from a
750	PX_FORCE_INLINE FloatV V4ExtractMin(const Vec4V a);
751
752	// Clamp(a,b) (per component)
753	PX_FORCE_INLINE Vec4V V4Clamp(const Vec4V a, const Vec4V minV, const Vec4V maxV);
754
755	// return 1 if all components of a are greater than all components of b.
756	PX_FORCE_INLINE PxU32 V4AllGrtr(const Vec4V a, const Vec4V b);
757	// return 1 if all components of a are greater than or equal to all components of b
758	PX_FORCE_INLINE PxU32 V4AllGrtrOrEq(const Vec4V a, const Vec4V b);
759	// return 1 if XYZ components of a are greater than or equal to XYZ components of b. W is ignored.
760	PX_FORCE_INLINE PxU32 V4AllGrtrOrEq3(const Vec4V a, const Vec4V b);
761	// return 1 if all components of a are equal to all components of b
762	PX_FORCE_INLINE PxU32 V4AllEq(const Vec4V a, const Vec4V b);
763	// return 1 if any XYZ component of a is greater than the corresponding component of b. W is ignored.
764	PX_FORCE_INLINE PxU32 V4AnyGrtr3(const Vec4V a, const Vec4V b);
765
766	// round(a)(per component)
767	PX_FORCE_INLINE Vec4V V4Round(const Vec4V a);
768	// sin(a) (per component)
769	PX_FORCE_INLINE Vec4V V4Sin(const Vec4V a);
770	// cos(a) (per component)
771	PX_FORCE_INLINE Vec4V V4Cos(const Vec4V a);
772
773	// Permute v into a new vec4v with YXWZ format
774	PX_FORCE_INLINE Vec4V V4PermYXWZ(const Vec4V v);
775	// Permute v into a new vec4v with XZXZ format
776	PX_FORCE_INLINE Vec4V V4PermXZXZ(const Vec4V v);
777	// Permute v into a new vec4v with YWYW format
778	PX_FORCE_INLINE Vec4V V4PermYWYW(const Vec4V v);
779	// Permute v into a new vec4v with YZXW format
780	PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V v);
781	// Permute v into a new vec4v with ZWXY format - equivalent to a swap of the two 64bit parts of the vector
782	PX_FORCE_INLINE Vec4V V4PermZWXY(const Vec4V a);
783
784	// Permute v into a new vec4v with format {a[x], a[y], a[z], a[w]}
785	// V4Perm<1,3,1,3> is equal to V4PermYWYW
786	// V4Perm<0,2,0,2> is equal to V4PermXZXZ
787	// V3Perm<1,0,3,2> is equal to V4PermYXWZ
788	template <PxU8 x, PxU8 y, PxU8 z, PxU8 w>
789	PX_FORCE_INLINE Vec4V V4Perm(const Vec4V a);
790
791	// Transpose 4 Vec4Vs inplace.
792	// [ x0, y0, z0, w0] [ x1, y1, z1, w1] [ x2, y2, z2, w2] [ x3, y3, z3, w3] ->
793	// [ x0, x1, x2, x3] [ y0, y1, y2, y3] [ z0, z1, z2, z3] [ w0, w1, w2, w3]
794	PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2);
795
796	// q = cos(a/2) + usin(a/2)*
797	PX_FORCE_INLINE QuatV QuatV_From_RotationAxisAngle(const Vec3V u, const FloatV a);
798	// convert q to a unit quaternion
799	PX_FORCE_INLINE QuatV QuatNormalize(const QuatV q);
800	//\|q.q\|^1/2
801	PX_FORCE_INLINE FloatV QuatLength(const QuatV q);
802	// q.q
803	PX_FORCE_INLINE FloatV QuatLengthSq(const QuatV q);
804	// a.b
805	PX_FORCE_INLINE FloatV QuatDot(const QuatV a, const QuatV b);
806	//(-q.x, -q.y, -q.z, q.w)
807	PX_FORCE_INLINE QuatV QuatConjugate(const QuatV q);
808	//(q.x, q.y, q.z)
809	PX_FORCE_INLINE Vec3V QuatGetImaginaryPart(const QuatV q);
810	// convert quaternion to matrix 33
811	PX_FORCE_INLINE Mat33V QuatGetMat33V(const QuatVArg q);
812	// convert quaternion to matrix 33
813	PX_FORCE_INLINE void QuatGetMat33V(const QuatVArg q, Vec3V& column0, Vec3V& column1, Vec3V& column2);
814	// convert matrix 33 to quaternion
815	PX_FORCE_INLINE QuatV Mat33GetQuatV(const Mat33V& a);
816	// brief computes rotation of x-axis
817	PX_FORCE_INLINE Vec3V QuatGetBasisVector0(const QuatV q);
818	// brief computes rotation of y-axis
819	PX_FORCE_INLINE Vec3V QuatGetBasisVector1(const QuatV q);
820	// brief computes rotation of z-axis
821	PX_FORCE_INLINE Vec3V QuatGetBasisVector2(const QuatV q);
822	// calculate the rotation vector from q and v
823	PX_FORCE_INLINE Vec3V QuatRotate(const QuatV q, const Vec3V v);
824	// calculate the rotation vector from the conjugate quaternion and v
825	PX_FORCE_INLINE Vec3V QuatRotateInv(const QuatV q, const Vec3V v);
826	// quaternion multiplication
827	PX_FORCE_INLINE QuatV QuatMul(const QuatV a, const QuatV b);
828	// quaternion add
829	PX_FORCE_INLINE QuatV QuatAdd(const QuatV a, const QuatV b);
830	// (-q.x, -q.y, -q.z, -q.w)
831	PX_FORCE_INLINE QuatV QuatNeg(const QuatV q);
832	// (a.x - b.x, a.y-b.y, a.z-b.z, a.w-b.w )
833	PX_FORCE_INLINE QuatV QuatSub(const QuatV a, const QuatV b);
834	// (a.xb, a.yb, a.zb, a.wb)
835	PX_FORCE_INLINE QuatV QuatScale(const QuatV a, const FloatV b);
836	// (x = v[0], y = v[1], z = v[2], w =v[3])
837	PX_FORCE_INLINE QuatV QuatMerge(const FloatV* const v);
838	// (x = v[0], y = v[1], z = v[2], w =v[3])
839	PX_FORCE_INLINE QuatV QuatMerge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w);
840	// (x = 0.f, y = 0.f, z = 0.f, w = 1.f)
841	PX_FORCE_INLINE QuatV QuatIdentity();
842	// check for each component is valid
843	PX_FORCE_INLINE bool isFiniteQuatV(const QuatV q);
844	// check for each component is valid
845	PX_FORCE_INLINE bool isValidQuatV(const QuatV q);
846	// check for each component is valid
847	PX_FORCE_INLINE bool isSaneQuatV(const QuatV q);
848
849	// Math operations on 16-byte aligned booleans.
850	// x=false y=false z=false w=false
851	PX_FORCE_INLINE BoolV BFFFF();
852	// x=false y=false z=false w=true
853	PX_FORCE_INLINE BoolV BFFFT();
854	// x=false y=false z=true w=false
855	PX_FORCE_INLINE BoolV BFFTF();
856	// x=false y=false z=true w=true
857	PX_FORCE_INLINE BoolV BFFTT();
858	// x=false y=true z=false w=false
859	PX_FORCE_INLINE BoolV BFTFF();
860	// x=false y=true z=false w=true
861	PX_FORCE_INLINE BoolV BFTFT();
862	// x=false y=true z=true w=false
863	PX_FORCE_INLINE BoolV BFTTF();
864	// x=false y=true z=true w=true
865	PX_FORCE_INLINE BoolV BFTTT();
866	// x=true y=false z=false w=false
867	PX_FORCE_INLINE BoolV BTFFF();
868	// x=true y=false z=false w=true
869	PX_FORCE_INLINE BoolV BTFFT();
870	// x=true y=false z=true w=false
871	PX_FORCE_INLINE BoolV BTFTF();
872	// x=true y=false z=true w=true
873	PX_FORCE_INLINE BoolV BTFTT();
874	// x=true y=true z=false w=false
875	PX_FORCE_INLINE BoolV BTTFF();
876	// x=true y=true z=false w=true
877	PX_FORCE_INLINE BoolV BTTFT();
878	// x=true y=true z=true w=false
879	PX_FORCE_INLINE BoolV BTTTF();
880	// x=true y=true z=true w=true
881	PX_FORCE_INLINE BoolV BTTTT();
882
883	// x=false y=false z=false w=true
884	PX_FORCE_INLINE BoolV BWMask();
885	// x=true y=false z=false w=false
886	PX_FORCE_INLINE BoolV BXMask();
887	// x=false y=true z=false w=false
888	PX_FORCE_INLINE BoolV BYMask();
889	// x=false y=false z=true w=false
890	PX_FORCE_INLINE BoolV BZMask();
891
892	// get x component
893	PX_FORCE_INLINE BoolV BGetX(const BoolV f);
894	// get y component
895	PX_FORCE_INLINE BoolV BGetY(const BoolV f);
896	// get z component
897	PX_FORCE_INLINE BoolV BGetZ(const BoolV f);
898	// get w component
899	PX_FORCE_INLINE BoolV BGetW(const BoolV f);
900
901	// Use elementIndex to splat xxxx or yyyy or zzzz or wwww
902	template <int elementIndex>
903	PX_FORCE_INLINE BoolV BSplatElement(Vec4V a);
904
905	// component-wise && (AND)
906	PX_FORCE_INLINE BoolV BAnd(const BoolV a, const BoolV b);
907	// component-wise \|\| (OR)
908	PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b);
909	// component-wise not
910	PX_FORCE_INLINE BoolV BNot(const BoolV a);
911
912	// if all four components are true, return true, otherwise return false
913	PX_FORCE_INLINE BoolV BAllTrue4(const BoolV a);
914
915	// if any four components is true, return true, otherwise return false
916	PX_FORCE_INLINE BoolV BAnyTrue4(const BoolV a);
917
918	// if all three(0, 1, 2) components are true, return true, otherwise return false
919	PX_FORCE_INLINE BoolV BAllTrue3(const BoolV a);
920
921	// if any three (0, 1, 2) components is true, return true, otherwise return false
922	PX_FORCE_INLINE BoolV BAnyTrue3(const BoolV a);
923
924	// Return 1 if all components equal, zero otherwise.
925	PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b);
926
927	// Specialized/faster BAllEq function for b==TTTT
928	PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a);
929	// Specialized/faster BAllEq function for b==FFFF
930	PX_FORCE_INLINE PxU32 BAllEqFFFF(const BoolV a);
931
932	/// Get BoolV as bits set in an PxU32. A bit in the output is set if the element is 'true' in the input.
933	/// There is a bit for each element in a, with element 0s value held in bit0, element 1 in bit 1s and so forth.
934	/// If nothing is true in the input it will return 0, and if all are true if will return 0xf.
935	/// NOTE! That performance of the function varies considerably by platform, thus it is recommended to use
936	/// where your algorithm really needs a BoolV in an integer variable.
937	PX_FORCE_INLINE PxU32 BGetBitMask(const BoolV a);
938
939	// VecI32V stuff
940
941	PX_FORCE_INLINE VecI32V VecI32V_Zero();
942
943	PX_FORCE_INLINE VecI32V VecI32V_One();
944
945	PX_FORCE_INLINE VecI32V VecI32V_Two();
946
947	PX_FORCE_INLINE VecI32V VecI32V_MinusOne();
948
949	// Compute a shift parameter for VecI32V_LeftShift and VecI32V_RightShift
950	// Each element of shift must be identical ie the vector must have form {count, count, count, count} with count>=0
951	PX_FORCE_INLINE VecShiftV VecI32V_PrepareShift(const VecI32VArg shift);
952
953	// Shift each element of a leftwards by the same amount
954	// Compute shift with VecI32V_PrepareShift
955	//{a.x<<shift[0], a.y<<shift[0], a.z<<shift[0], a.w<<shift[0]}
956	PX_FORCE_INLINE VecI32V VecI32V_LeftShift(const VecI32VArg a, const VecShiftVArg shift);
957
958	// Shift each element of a rightwards by the same amount
959	// Compute shift with VecI32V_PrepareShift
960	//{a.x>>shift[0], a.y>>shift[0], a.z>>shift[0], a.w>>shift[0]}
961	PX_FORCE_INLINE VecI32V VecI32V_RightShift(const VecI32VArg a, const VecShiftVArg shift);
962
963	PX_FORCE_INLINE VecI32V VecI32V_Add(const VecI32VArg a, const VecI32VArg b);
964
965	PX_FORCE_INLINE VecI32V VecI32V_Or(const VecI32VArg a, const VecI32VArg b);
966
967	PX_FORCE_INLINE VecI32V VecI32V_GetX(const VecI32VArg a);
968
969	PX_FORCE_INLINE VecI32V VecI32V_GetY(const VecI32VArg a);
970
971	PX_FORCE_INLINE VecI32V VecI32V_GetZ(const VecI32VArg a);
972
973	PX_FORCE_INLINE VecI32V VecI32V_GetW(const VecI32VArg a);
974
975	PX_FORCE_INLINE VecI32V VecI32V_Sub(const VecI32VArg a, const VecI32VArg b);
976
977	PX_FORCE_INLINE BoolV VecI32V_IsGrtr(const VecI32VArg a, const VecI32VArg b);
978
979	PX_FORCE_INLINE BoolV VecI32V_IsEq(const VecI32VArg a, const VecI32VArg b);
980
981	PX_FORCE_INLINE VecI32V V4I32Sel(const BoolV c, const VecI32V a, const VecI32V b);
982
983	// VecU32V stuff
984
985	PX_FORCE_INLINE VecU32V U4Zero();
986
987	PX_FORCE_INLINE VecU32V U4One();
988
989	PX_FORCE_INLINE VecU32V U4Two();
990
991	PX_FORCE_INLINE BoolV V4IsEqU32(const VecU32V a, const VecU32V b);
992
993	PX_FORCE_INLINE VecU32V V4U32Sel(const BoolV c, const VecU32V a, const VecU32V b);
994
995	PX_FORCE_INLINE VecU32V V4U32or(VecU32V a, VecU32V b);
996
997	PX_FORCE_INLINE VecU32V V4U32xor(VecU32V a, VecU32V b);
998
999	PX_FORCE_INLINE VecU32V V4U32and(VecU32V a, VecU32V b);
1000
1001	PX_FORCE_INLINE VecU32V V4U32Andc(VecU32V a, VecU32V b);
1002
1003	// VecU32 - why does this not return a bool?
1004	PX_FORCE_INLINE VecU32V V4IsGrtrV32u(const Vec4V a, const Vec4V b);
1005
1006	// Math operations on 16-byte aligned Mat33s (represents any 3x3 matrix)
1007	PX_FORCE_INLINE Mat33V M33Load(const PxMat33& m)
1008	{
1009	return Mat33V (Vec3V_From_Vec4V(v: V4LoadU(f: &m.column0.x)),
1010	Vec3V_From_Vec4V(v: V4LoadU(f: &m.column1.x)), V3LoadU(f: m.column2));
1011	}
1012	// ab*
1013	PX_FORCE_INLINE Vec3V M33MulV3(const Mat33V& a, const Vec3V b);
1014	// Ax + b*
1015	PX_FORCE_INLINE Vec3V M33MulV3AddV3(const Mat33V& A, const Vec3V b, const Vec3V c);
1016	// transpose(a) b*
1017	PX_FORCE_INLINE Vec3V M33TrnspsMulV3(const Mat33V& a, const Vec3V b);
1018	// ab*
1019	PX_FORCE_INLINE Mat33V M33MulM33(const Mat33V& a, const Mat33V& b);
1020	// a+b
1021	PX_FORCE_INLINE Mat33V M33Add(const Mat33V& a, const Mat33V& b);
1022	// a+b
1023	PX_FORCE_INLINE Mat33V M33Sub(const Mat33V& a, const Mat33V& b);
1024	//-a
1025	PX_FORCE_INLINE Mat33V M33Neg(const Mat33V& a);
1026	// absolute value of the matrix
1027	PX_FORCE_INLINE Mat33V M33Abs(const Mat33V& a);
1028	// inverse mat
1029	PX_FORCE_INLINE Mat33V M33Inverse(const Mat33V& a);
1030	// transpose(a)
1031	PX_FORCE_INLINE Mat33V M33Trnsps(const Mat33V& a);
1032	// create an identity matrix
1033	PX_FORCE_INLINE Mat33V M33Identity();
1034
1035	// create a vec3 to store the diagonal element of the M33
1036	PX_FORCE_INLINE Mat33V M33Diagonal(const Vec3VArg);
1037
1038	// Not implemented
1039	// return 1 if all components of a are equal to all components of b
1040	// PX_FORCE_INLINE PxU32 V4U32AllEq(const VecU32V a, const VecU32V b);
1041	// v.w=f
1042	// PX_FORCE_INLINE void V3WriteW(Vec3V& v, const PxF32 f);
1043	// PX_FORCE_INLINE PxF32 V3ReadW(const Vec3V& v);
1044
1045	// Not used
1046	// PX_FORCE_INLINE Vec4V V4LoadAligned(Vec4V addr);*
1047	// PX_FORCE_INLINE Vec4V V4LoadUnaligned(Vec4V addr);*
1048	// floor(a)(per component)
1049	// PX_FORCE_INLINE Vec4V V4Floor(Vec4V a);
1050	// ceil(a) (per component)
1051	// PX_FORCE_INLINE Vec4V V4Ceil(Vec4V a);
1052	// PX_FORCE_INLINE VecU32V V4ConvertToU32VSaturate(const Vec4V a, PxU32 power);
1053
1054	// Math operations on 16-byte aligned Mat34s (represents transformation matrix - rotation and translation).
1055	// namespace _Mat34V
1056	//{
1057	// //ab*
1058	// PX_FORCE_INLINE Vec3V multiplyV(const Mat34V& a, const Vec3V b);
1059	// //a_rotation b*
1060	// PX_FORCE_INLINE Vec3V multiply3X3V(const Mat34V& a, const Vec3V b);
1061	// //transpose(a_rotation)b*
1062	// PX_FORCE_INLINE Vec3V multiplyTranspose3X3V(const Mat34V& a, const Vec3V b);
1063	// //ab*
1064	// PX_FORCE_INLINE Mat34V multiplyV(const Mat34V& a, const Mat34V& b);
1065	// //a_rotationb*
1066	// PX_FORCE_INLINE Mat33V multiply3X3V(const Mat34V& a, const Mat33V& b);
1067	// //a_rotationb_rotation*
1068	// PX_FORCE_INLINE Mat33V multiply3X3V(const Mat34V& a, const Mat34V& b);
1069	// //a+b
1070	// PX_FORCE_INLINE Mat34V addV(const Mat34V& a, const Mat34V& b);
1071	// //a^-1
1072	// PX_FORCE_INLINE Mat34V getInverseV(const Mat34V& a);
1073	// //transpose(a_rotation)
1074	// PX_FORCE_INLINE Mat33V getTranspose3X3(const Mat34V& a);
1075	//}; //namespace _Mat34V
1076
1077	// ab*
1078	//#define M34MulV3(a,b) (M34MulV3(a,b))
1079	////a_rotation b*
1080	//#define M34Mul33V3(a,b) (M34Mul33V3(a,b))
1081	////transpose(a_rotation)b*
1082	//#define M34TrnspsMul33V3(a,b) (M34TrnspsMul33V3(a,b))
1083	////ab*
1084	//#define M34MulM34(a,b) (_Mat34V::multiplyV(a,b))
1085	// a_rotationb*
1086	//#define M34MulM33(a,b) (M34MulM33(a,b))
1087	// a_rotationb_rotation*
1088	//#define M34Mul33MM34(a,b) (M34MulM33(a,b))
1089	// a+b
1090	//#define M34Add(a,b) (M34Add(a,b))
1091	////a^-1
1092	//#define M34Inverse(a,b) (M34Inverse(a))
1093	// transpose(a_rotation)
1094	//#define M34Trnsps33(a) (M33Trnsps3X3(a))
1095
1096	// Math operations on 16-byte aligned Mat44s (represents any 4x4 matrix)
1097	// namespace _Mat44V
1098	//{
1099	// //ab*
1100	// PX_FORCE_INLINE Vec4V multiplyV(const Mat44V& a, const Vec4V b);
1101	// //transpose(a)b*
1102	// PX_FORCE_INLINE Vec4V multiplyTransposeV(const Mat44V& a, const Vec4V b);
1103	// //ab*
1104	// PX_FORCE_INLINE Mat44V multiplyV(const Mat44V& a, const Mat44V& b);
1105	// //a+b
1106	// PX_FORCE_INLINE Mat44V addV(const Mat44V& a, const Mat44V& b);
1107	// //a&-1
1108	// PX_FORCE_INLINE Mat44V getInverseV(const Mat44V& a);
1109	// //transpose(a)
1110	// PX_FORCE_INLINE Mat44V getTransposeV(const Mat44V& a);
1111	//}; //namespace _Mat44V
1112
1113	// namespace _VecU32V
1114	//{
1115	// // pack 8 U32s to 8 U16s with saturation
1116	// PX_FORCE_INLINE VecU16V pack2U32VToU16VSaturate(VecU32V a, VecU32V b);
1117	// PX_FORCE_INLINE VecU32V orV(VecU32V a, VecU32V b);
1118	// PX_FORCE_INLINE VecU32V andV(VecU32V a, VecU32V b);
1119	// PX_FORCE_INLINE VecU32V andcV(VecU32V a, VecU32V b);
1120	// // conversion from integer to float
1121	// PX_FORCE_INLINE Vec4V convertToVec4V(VecU32V a);
1122	// // splat a[elementIndex] into all fields of a
1123	// template<int elementIndex>
1124	// PX_FORCE_INLINE VecU32V splatElement(VecU32V a);
1125	// PX_FORCE_INLINE void storeAligned(VecU32V a, VecU32V address);*
1126	//};
1127
1128	// namespace _VecI32V
1129	//{
1130	// template<int a> PX_FORCE_INLINE VecI32V splatI32();
1131	//};
1132	//
1133	// namespace _VecU16V
1134	//{
1135	// PX_FORCE_INLINE VecU16V orV(VecU16V a, VecU16V b);
1136	// PX_FORCE_INLINE VecU16V andV(VecU16V a, VecU16V b);
1137	// PX_FORCE_INLINE VecU16V andcV(VecU16V a, VecU16V b);
1138	// PX_FORCE_INLINE void storeAligned(VecU16V val, VecU16V address);*
1139	// PX_FORCE_INLINE VecU16V loadAligned(VecU16V addr);*
1140	// PX_FORCE_INLINE VecU16V loadUnaligned(VecU16V addr);*
1141	// PX_FORCE_INLINE VecU16V compareGt(VecU16V a, VecU16V b);
1142	// template<int elementIndex>
1143	// PX_FORCE_INLINE VecU16V splatElement(VecU16V a);
1144	// PX_FORCE_INLINE VecU16V subtractModulo(VecU16V a, VecU16V b);
1145	// PX_FORCE_INLINE VecU16V addModulo(VecU16V a, VecU16V b);
1146	// PX_FORCE_INLINE VecU32V getLo16(VecU16V a); // [0,2,4,6] 16-bit values to [0,1,2,3] 32-bit vector
1147	// PX_FORCE_INLINE VecU32V getHi16(VecU16V a); // [1,3,5,7] 16-bit values to [0,1,2,3] 32-bit vector
1148	//};
1149	//
1150	// namespace _VecI16V
1151	//{
1152	// template <int val> PX_FORCE_INLINE VecI16V splatImmediate();
1153	//};
1154	//
1155	// namespace _VecU8V
1156	//{
1157	//};
1158
1159	// ab*
1160	//#define M44MulV4(a,b) (M44MulV4(a,b))
1161	////transpose(a)b*
1162	//#define M44TrnspsMulV4(a,b) (M44TrnspsMulV4(a,b))
1163	////ab*
1164	//#define M44MulM44(a,b) (M44MulM44(a,b))
1165	////a+b
1166	//#define M44Add(a,b) (M44Add(a,b))
1167	////a&-1
1168	//#define M44Inverse(a) (M44Inverse(a))
1169	////transpose(a)
1170	//#define M44Trnsps(a) (M44Trnsps(a))
1171
1172	// dsequeira: these used to be assert'd out in SIMD builds, but they're necessary if
1173	// we want to be able to write some scalar functions which run using SIMD data structures
1174
1175	PX_FORCE_INLINE void V3WriteX(Vec3V& v, const PxF32 f)
1176	{
1177	reinterpret_cast<PxVec3&>(v).x = f;
1178	}
1179
1180	PX_FORCE_INLINE void V3WriteY(Vec3V& v, const PxF32 f)
1181	{
1182	reinterpret_cast<PxVec3&>(v).y = f;
1183	}
1184
1185	PX_FORCE_INLINE void V3WriteZ(Vec3V& v, const PxF32 f)
1186	{
1187	reinterpret_cast<PxVec3&>(v).z = f;
1188	}
1189
1190	PX_FORCE_INLINE void V3WriteXYZ(Vec3V& v, const PxVec3& f)
1191	{
1192	reinterpret_cast<PxVec3&>(v) = f;
1193	}
1194
1195	PX_FORCE_INLINE PxF32 V3ReadX(const Vec3V& v)
1196	{
1197	return reinterpret_cast<const PxVec3&>(v).x;
1198	}
1199
1200	PX_FORCE_INLINE PxF32 V3ReadY(const Vec3V& v)
1201	{
1202	return reinterpret_cast<const PxVec3&>(v).y;
1203	}
1204
1205	PX_FORCE_INLINE PxF32 V3ReadZ(const Vec3V& v)
1206	{
1207	return reinterpret_cast<const PxVec3&>(v).z;
1208	}
1209
1210	PX_FORCE_INLINE const PxVec3& V3ReadXYZ(const Vec3V& v)
1211	{
1212	return reinterpret_cast<const PxVec3&>(v);
1213	}
1214
1215	PX_FORCE_INLINE void V4WriteX(Vec4V& v, const PxF32 f)
1216	{
1217	reinterpret_cast<PxVec4&>(v).x = f;
1218	}
1219
1220	PX_FORCE_INLINE void V4WriteY(Vec4V& v, const PxF32 f)
1221	{
1222	reinterpret_cast<PxVec4&>(v).y = f;
1223	}
1224
1225	PX_FORCE_INLINE void V4WriteZ(Vec4V& v, const PxF32 f)
1226	{
1227	reinterpret_cast<PxVec4&>(v).z = f;
1228	}
1229
1230	PX_FORCE_INLINE void V4WriteW(Vec4V& v, const PxF32 f)
1231	{
1232	reinterpret_cast<PxVec4&>(v).w = f;
1233	}
1234
1235	PX_FORCE_INLINE void V4WriteXYZ(Vec4V& v, const PxVec3& f)
1236	{
1237	reinterpret_cast<PxVec3&>(v) = f;
1238	}
1239
1240	PX_FORCE_INLINE PxF32 V4ReadX(const Vec4V& v)
1241	{
1242	return reinterpret_cast<const PxVec4&>(v).x;
1243	}
1244
1245	PX_FORCE_INLINE PxF32 V4ReadY(const Vec4V& v)
1246	{
1247	return reinterpret_cast<const PxVec4&>(v).y;
1248	}
1249
1250	PX_FORCE_INLINE PxF32 V4ReadZ(const Vec4V& v)
1251	{
1252	return reinterpret_cast<const PxVec4&>(v).z;
1253	}
1254
1255	PX_FORCE_INLINE PxF32 V4ReadW(const Vec4V& v)
1256	{
1257	return reinterpret_cast<const PxVec4&>(v).w;
1258	}
1259
1260	PX_FORCE_INLINE const PxVec3& V4ReadXYZ(const Vec4V& v)
1261	{
1262	return reinterpret_cast<const PxVec3&>(v);
1263	}
1264
1265	// this macro transposes 4 Vec4V into 3 Vec4V (assuming that the W component can be ignored
1266	#define PX_TRANSPOSE_44_34(inA, inB, inC, inD, outA, outB, outC) \
1267	\
1268	outA = V4UnpackXY(inA, inC); \
1269	\
1270	inA = V4UnpackZW(inA, inC); \
1271	\
1272	inC = V4UnpackXY(inB, inD); \
1273	\
1274	inB = V4UnpackZW(inB, inD); \
1275	\
1276	outB = V4UnpackZW(outA, inC); \
1277	\
1278	outA = V4UnpackXY(outA, inC); \
1279	\
1280	outC = V4UnpackXY(inA, inB);
1281
1282	// this macro transposes 3 Vec4V into 4 Vec4V (with W components as garbage!)
1283	#define PX_TRANSPOSE_34_44(inA, inB, inC, outA, outB, outC, outD) \
1284	outA = V4UnpackXY(inA, inC); \
1285	inA = V4UnpackZW(inA, inC); \
1286	outC = V4UnpackXY(inB, inB); \
1287	inC = V4UnpackZW(inB, inB); \
1288	outB = V4UnpackZW(outA, outC); \
1289	outA = V4UnpackXY(outA, outC); \
1290	outC = V4UnpackXY(inA, inC); \
1291	outD = V4UnpackZW(inA, inC);
1292
1293	#define PX_TRANSPOSE_44(inA, inB, inC, inD, outA, outB, outC, outD) \
1294	outA = V4UnpackXY(inA, inC); \
1295	inA = V4UnpackZW(inA, inC); \
1296	inC = V4UnpackXY(inB, inD); \
1297	inB = V4UnpackZW(inB, inD); \
1298	outB = V4UnpackZW(outA, inC); \
1299	outA = V4UnpackXY(outA, inC); \
1300	outC = V4UnpackXY(inA, inB); \
1301	outD = V4UnpackZW(inA, inB);
1302
1303	// This function returns a Vec4V, where each element is the dot product of one pair of Vec3Vs. On PC, each element in
1304	// the result should be identical to the results if V3Dot was performed
1305	// for each pair of Vec3V.
1306	// However, on other platforms, the result might diverge by some small margin due to differences in FP rounding, e.g. if
1307	// _mm_dp_ps was used or some other approximate dot product or fused madd operations
1308	// were used.
1309	// Where there does not exist a hw-accelerated dot-product operation, this approach should be the fastest way to compute
1310	// the dot product of 4 vectors.
1311	PX_FORCE_INLINE Vec4V V3Dot4(const Vec3VArg a0, const Vec3VArg b0, const Vec3VArg a1, const Vec3VArg b1,
1312	const Vec3VArg a2, const Vec3VArg b2, const Vec3VArg a3, const Vec3VArg b3)
1313	{
1314	Vec4V a0b0 = Vec4V_From_Vec3V(f: V3Mul(a: a0, b: b0));
1315	Vec4V a1b1 = Vec4V_From_Vec3V(f: V3Mul(a: a1, b: b1));
1316	Vec4V a2b2 = Vec4V_From_Vec3V(f: V3Mul(a: a2, b: b2));
1317	Vec4V a3b3 = Vec4V_From_Vec3V(f: V3Mul(a: a3, b: b3));
1318
1319	Vec4V aTrnsps, bTrnsps, cTrnsps;
1320
1321	PX_TRANSPOSE_44_34(a0b0, a1b1, a2b2, a3b3, aTrnsps, bTrnsps, cTrnsps);
1322
1323	return V4Add(a: V4Add(a: aTrnsps, b: bTrnsps), b: cTrnsps);
1324	}
1325
1326	//(f.x,f.y,f.z,0) - Alternative/faster V3LoadU implementation when it is safe to read "W", i.e. the 32bits after the PxVec3.
1327	PX_FORCE_INLINE Vec3V V3LoadU_SafeReadW(const PxVec3& f)
1328	{
1329	return Vec3V_From_Vec4V(v: V4LoadU(f: &f.x));
1330	}
1331
1332	} // namespace aos
1333	} // namespace shdfnd
1334	} // namespace physx
1335
1336	// Now for the cross-platform implementations of the 16-byte aligned maths functions (win32/360/ppu/spu etc).
1337	#if COMPILE_VECTOR_INTRINSICS
1338	#include "PsInlineAoS.h"
1339	#else // #if COMPILE_VECTOR_INTRINSICS
1340	#include "PsVecMathAoSScalarInline.h"
1341	#endif // #if !COMPILE_VECTOR_INTRINSICS
1342	#include "PsVecQuat.h"
1343
1344	#endif // PSFOUNDATION_PSVECMATH_H
1345

Provided by KDAB

Definitions

source code of qtquick3dphysics/src/3rdparty/PhysX/source/foundation/include/PsVecMath.h