1 | // |
2 | // Redistribution and use in source and binary forms, with or without |
3 | // modification, are permitted provided that the following conditions |
4 | // are met: |
5 | // * Redistributions of source code must retain the above copyright |
6 | // notice, this list of conditions and the following disclaimer. |
7 | // * Redistributions in binary form must reproduce the above copyright |
8 | // notice, this list of conditions and the following disclaimer in the |
9 | // documentation and/or other materials provided with the distribution. |
10 | // * Neither the name of NVIDIA CORPORATION nor the names of its |
11 | // contributors may be used to endorse or promote products derived |
12 | // from this software without specific prior written permission. |
13 | // |
14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY |
15 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
17 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
18 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
24 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | // |
26 | // Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved. |
27 | // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. |
28 | // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. |
29 | |
30 | #ifndef PSFOUNDATION_PSVECQUAT_H |
31 | #define PSFOUNDATION_PSVECQUAT_H |
32 | |
33 | //#include "PsInlineAoS.h" |
34 | |
35 | namespace physx |
36 | { |
37 | namespace shdfnd |
38 | { |
39 | namespace aos |
40 | { |
41 | |
42 | #ifndef PX_PIDIV2 |
43 | #define PX_PIDIV2 1.570796327f |
44 | #endif |
45 | |
46 | ////////////////////////////////// |
47 | // QuatV |
48 | ////////////////////////////////// |
49 | PX_FORCE_INLINE QuatV QuatVLoadXYZW(const PxF32 x, const PxF32 y, const PxF32 z, const PxF32 w) |
50 | { |
51 | return V4LoadXYZW(x, y, z, w); |
52 | } |
53 | |
54 | PX_FORCE_INLINE QuatV QuatVLoadU(const PxF32* v) |
55 | { |
56 | return V4LoadU(f: v); |
57 | } |
58 | |
59 | PX_FORCE_INLINE QuatV QuatVLoadA(const PxF32* v) |
60 | { |
61 | return V4LoadA(f: v); |
62 | } |
63 | |
64 | PX_FORCE_INLINE QuatV QuatV_From_RotationAxisAngle(const Vec3V u, const FloatV a) |
65 | { |
66 | // q = cos(a/2) + u*sin(a/2) |
67 | const FloatV half = FLoad(f: 0.5f); |
68 | const FloatV hangle = FMul(a, b: half); |
69 | const FloatV piByTwo(FLoad(PX_PIDIV2)); |
70 | const FloatV PiByTwoMinHangle(FSub(a: piByTwo, b: hangle)); |
71 | const Vec4V hangle2(Vec4V_From_Vec3V(f: V3Merge(x: hangle, y: PiByTwoMinHangle, z: hangle))); |
72 | |
73 | /*const FloatV sina = FSin(hangle); |
74 | const FloatV cosa = FCos(hangle);*/ |
75 | |
76 | const Vec4V _sina = V4Sin(a: hangle2); |
77 | const FloatV sina = V4GetX(f: _sina); |
78 | const FloatV cosa = V4GetY(f: _sina); |
79 | |
80 | const Vec3V v = V3Scale(a: u, b: sina); |
81 | // return V4Sel(BTTTF(), Vec4V_From_Vec3V(v), V4Splat(cosa)); |
82 | return V4SetW(v: Vec4V_From_Vec3V(f: v), f: cosa); |
83 | } |
84 | |
85 | // Normalize |
86 | PX_FORCE_INLINE QuatV QuatNormalize(const QuatV q) |
87 | { |
88 | return V4Normalize(a: q); |
89 | } |
90 | |
91 | PX_FORCE_INLINE FloatV QuatLength(const QuatV q) |
92 | { |
93 | return V4Length(a: q); |
94 | } |
95 | |
96 | PX_FORCE_INLINE FloatV QuatLengthSq(const QuatV q) |
97 | { |
98 | return V4LengthSq(a: q); |
99 | } |
100 | |
101 | PX_FORCE_INLINE FloatV QuatDot(const QuatV a, const QuatV b) // convert this PxQuat to a unit quaternion |
102 | { |
103 | return V4Dot(a, b); |
104 | } |
105 | |
106 | PX_FORCE_INLINE QuatV QuatConjugate(const QuatV q) |
107 | { |
108 | return V4SetW(v: V4Neg(f: q), f: V4GetW(f: q)); |
109 | } |
110 | |
111 | PX_FORCE_INLINE Vec3V QuatGetImaginaryPart(const QuatV q) |
112 | { |
113 | return Vec3V_From_Vec4V(v: q); |
114 | } |
115 | |
116 | /** brief computes rotation of x-axis */ |
117 | PX_FORCE_INLINE Vec3V QuatGetBasisVector0(const QuatV q) |
118 | { |
119 | /*const PxF32 x2 = x*2.0f; |
120 | const PxF32 w2 = w*2.0f; |
121 | return PxVec3( (w * w2) - 1.0f + x*x2, |
122 | (z * w2) + y*x2, |
123 | (-y * w2) + z*x2);*/ |
124 | |
125 | const FloatV two = FLoad(f: 2.f); |
126 | const FloatV w = V4GetW(f: q); |
127 | const Vec3V u = Vec3V_From_Vec4V(v: q); |
128 | |
129 | const FloatV x2 = FMul(a: V3GetX(f: u), b: two); |
130 | const FloatV w2 = FMul(a: w, b: two); |
131 | |
132 | const Vec3V a = V3Scale(a: u, b: x2); |
133 | const Vec3V tmp = V3Merge(x: w, y: V3GetZ(f: u), z: FNeg(f: V3GetY(f: u))); |
134 | // const Vec3V b = V3Scale(tmp, w2); |
135 | // const Vec3V ab = V3Add(a, b); |
136 | const Vec3V ab = V3ScaleAdd(a: tmp, b: w2, c: a); |
137 | return V3SetX(v: ab, f: FSub(a: V3GetX(f: ab), b: FOne())); |
138 | } |
139 | |
140 | /** brief computes rotation of y-axis */ |
141 | PX_FORCE_INLINE Vec3V QuatGetBasisVector1(const QuatV q) |
142 | { |
143 | /*const PxF32 y2 = y*2.0f; |
144 | const PxF32 w2 = w*2.0f; |
145 | return PxVec3( (-z * w2) + x*y2, |
146 | (w * w2) - 1.0f + y*y2, |
147 | (x * w2) + z*y2);*/ |
148 | |
149 | const FloatV two = FLoad(f: 2.f); |
150 | const FloatV w = V4GetW(f: q); |
151 | const Vec3V u = Vec3V_From_Vec4V(v: q); |
152 | |
153 | const FloatV y2 = FMul(a: V3GetY(f: u), b: two); |
154 | const FloatV w2 = FMul(a: w, b: two); |
155 | |
156 | const Vec3V a = V3Scale(a: u, b: y2); |
157 | const Vec3V tmp = V3Merge(x: FNeg(f: V3GetZ(f: u)), y: w, z: V3GetX(f: u)); |
158 | // const Vec3V b = V3Scale(tmp, w2); |
159 | // const Vec3V ab = V3Add(a, b); |
160 | const Vec3V ab = V3ScaleAdd(a: tmp, b: w2, c: a); |
161 | return V3SetY(v: ab, f: FSub(a: V3GetY(f: ab), b: FOne())); |
162 | } |
163 | |
164 | /** brief computes rotation of z-axis */ |
165 | PX_FORCE_INLINE Vec3V QuatGetBasisVector2(const QuatV q) |
166 | { |
167 | /*const PxF32 z2 = z*2.0f; |
168 | const PxF32 w2 = w*2.0f; |
169 | return PxVec3( (y * w2) + x*z2, |
170 | (-x * w2) + y*z2, |
171 | (w * w2) - 1.0f + z*z2);*/ |
172 | |
173 | const FloatV two = FLoad(f: 2.f); |
174 | const FloatV w = V4GetW(f: q); |
175 | const Vec3V u = Vec3V_From_Vec4V(v: q); |
176 | |
177 | const FloatV z2 = FMul(a: V3GetZ(f: u), b: two); |
178 | const FloatV w2 = FMul(a: w, b: two); |
179 | |
180 | const Vec3V a = V3Scale(a: u, b: z2); |
181 | const Vec3V tmp = V3Merge(x: V3GetY(f: u), y: FNeg(f: V3GetX(f: u)), z: w); |
182 | /*const Vec3V b = V3Scale(tmp, w2); |
183 | const Vec3V ab = V3Add(a, b);*/ |
184 | const Vec3V ab = V3ScaleAdd(a: tmp, b: w2, c: a); |
185 | return V3SetZ(v: ab, f: FSub(a: V3GetZ(f: ab), b: FOne())); |
186 | } |
187 | |
188 | PX_FORCE_INLINE Vec3V QuatRotate(const QuatV q, const Vec3V v) |
189 | { |
190 | /* |
191 | const PxVec3 qv(x,y,z); |
192 | return (v*(w*w-0.5f) + (qv.cross(v))*w + qv*(qv.dot(v)))*2; |
193 | */ |
194 | |
195 | const FloatV two = FLoad(f: 2.f); |
196 | // const FloatV half = FloatV_From_F32(0.5f); |
197 | const FloatV nhalf = FLoad(f: -0.5f); |
198 | const Vec3V u = Vec3V_From_Vec4V(v: q); |
199 | const FloatV w = V4GetW(f: q); |
200 | // const FloatV w2 = FSub(FMul(w, w), half); |
201 | const FloatV w2 = FScaleAdd(a: w, b: w, c: nhalf); |
202 | const Vec3V a = V3Scale(a: v, b: w2); |
203 | // const Vec3V b = V3Scale(V3Cross(u, v), w); |
204 | // const Vec3V c = V3Scale(u, V3Dot(u, v)); |
205 | // return V3Scale(V3Add(V3Add(a, b), c), two); |
206 | const Vec3V temp = V3ScaleAdd(a: V3Cross(a: u, b: v), b: w, c: a); |
207 | return V3Scale(a: V3ScaleAdd(a: u, b: V3Dot(a: u, b: v), c: temp), b: two); |
208 | } |
209 | |
210 | PX_FORCE_INLINE Vec3V QuatTransform(const QuatV q, const Vec3V p, const Vec3V v) |
211 | { |
212 | // p + q.rotate(v) |
213 | const FloatV two = FLoad(f: 2.f); |
214 | // const FloatV half = FloatV_From_F32(0.5f); |
215 | const FloatV nhalf = FLoad(f: -0.5f); |
216 | const Vec3V u = Vec3V_From_Vec4V(v: q); |
217 | const FloatV w = V4GetW(f: q); |
218 | // const FloatV w2 = FSub(FMul(w, w), half); |
219 | const FloatV w2 = FScaleAdd(a: w, b: w, c: nhalf); |
220 | const Vec3V a = V3Scale(a: v, b: w2); |
221 | /*const Vec3V b = V3Scale(V3Cross(u, v), w); |
222 | const Vec3V c = V3Scale(u, V3Dot(u, v)); |
223 | return V3ScaleAdd(V3Add(V3Add(a, b), c), two, p);*/ |
224 | const Vec3V temp = V3ScaleAdd(a: V3Cross(a: u, b: v), b: w, c: a); |
225 | const Vec3V z = V3ScaleAdd(a: u, b: V3Dot(a: u, b: v), c: temp); |
226 | return V3ScaleAdd(a: z, b: two, c: p); |
227 | } |
228 | |
229 | PX_FORCE_INLINE Vec3V QuatRotateInv(const QuatV q, const Vec3V v) |
230 | { |
231 | |
232 | // const PxVec3 qv(x,y,z); |
233 | // return (v*(w*w-0.5f) - (qv.cross(v))*w + qv*(qv.dot(v)))*2; |
234 | |
235 | const FloatV two = FLoad(f: 2.f); |
236 | const FloatV nhalf = FLoad(f: -0.5f); |
237 | const Vec3V u = Vec3V_From_Vec4V(v: q); |
238 | const FloatV w = V4GetW(f: q); |
239 | const FloatV w2 = FScaleAdd(a: w, b: w, c: nhalf); |
240 | const Vec3V a = V3Scale(a: v, b: w2); |
241 | /*const Vec3V b = V3Scale(V3Cross(u, v), w); |
242 | const Vec3V c = V3Scale(u, V3Dot(u, v)); |
243 | return V3Scale(V3Add(V3Sub(a, b), c), two);*/ |
244 | const Vec3V temp = V3NegScaleSub(a: V3Cross(a: u, b: v), b: w, c: a); |
245 | return V3Scale(a: V3ScaleAdd(a: u, b: V3Dot(a: u, b: v), c: temp), b: two); |
246 | } |
247 | |
248 | PX_FORCE_INLINE QuatV QuatMul(const QuatV a, const QuatV b) |
249 | { |
250 | const Vec3V imagA = Vec3V_From_Vec4V(v: a); |
251 | const Vec3V imagB = Vec3V_From_Vec4V(v: b); |
252 | const FloatV rA = V4GetW(f: a); |
253 | const FloatV rB = V4GetW(f: b); |
254 | |
255 | const FloatV real = FSub(a: FMul(a: rA, b: rB), b: V3Dot(a: imagA, b: imagB)); |
256 | const Vec3V v0 = V3Scale(a: imagA, b: rB); |
257 | const Vec3V v1 = V3Scale(a: imagB, b: rA); |
258 | const Vec3V v2 = V3Cross(a: imagA, b: imagB); |
259 | const Vec3V imag = V3Add(a: V3Add(a: v0, b: v1), b: v2); |
260 | |
261 | return V4SetW(v: Vec4V_From_Vec3V(f: imag), f: real); |
262 | } |
263 | |
264 | PX_FORCE_INLINE QuatV QuatAdd(const QuatV a, const QuatV b) |
265 | { |
266 | return V4Add(a, b); |
267 | } |
268 | |
269 | PX_FORCE_INLINE QuatV QuatNeg(const QuatV q) |
270 | { |
271 | return V4Neg(f: q); |
272 | } |
273 | |
274 | PX_FORCE_INLINE QuatV QuatSub(const QuatV a, const QuatV b) |
275 | { |
276 | return V4Sub(a, b); |
277 | } |
278 | |
279 | PX_FORCE_INLINE QuatV QuatScale(const QuatV a, const FloatV b) |
280 | { |
281 | return V4Scale(a, b); |
282 | } |
283 | |
284 | PX_FORCE_INLINE QuatV QuatMerge(const FloatV* const floatVArray) |
285 | { |
286 | return V4Merge(floatVArray); |
287 | } |
288 | |
289 | PX_FORCE_INLINE QuatV QuatMerge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w) |
290 | { |
291 | return V4Merge(x, y, z, w); |
292 | } |
293 | |
294 | PX_FORCE_INLINE QuatV QuatIdentity() |
295 | { |
296 | return V4SetW(v: V4Zero(), f: FOne()); |
297 | } |
298 | |
299 | PX_FORCE_INLINE bool isFiniteQuatV(const QuatV q) |
300 | { |
301 | return isFiniteVec4V(a: q); |
302 | } |
303 | |
304 | PX_FORCE_INLINE bool isValidQuatV(const QuatV q) |
305 | { |
306 | const FloatV unitTolerance = FLoad(f: 1e-4f); |
307 | const FloatV tmp = FAbs(a: FSub(a: QuatLength(q), b: FOne())); |
308 | const BoolV con = FIsGrtr(a: unitTolerance, b: tmp); |
309 | return isFiniteVec4V(a: q) & (BAllEqTTTT(a: con) == 1); |
310 | } |
311 | |
312 | PX_FORCE_INLINE bool isSaneQuatV(const QuatV q) |
313 | { |
314 | const FloatV unitTolerance = FLoad(f: 1e-2f); |
315 | const FloatV tmp = FAbs(a: FSub(a: QuatLength(q), b: FOne())); |
316 | const BoolV con = FIsGrtr(a: unitTolerance, b: tmp); |
317 | return isFiniteVec4V(a: q) & (BAllEqTTTT(a: con) == 1); |
318 | } |
319 | |
320 | PX_FORCE_INLINE Mat33V QuatGetMat33V(const QuatVArg q) |
321 | { |
322 | // const FloatV two = FloatV_From_F32(2.f); |
323 | // const FloatV one = FOne(); |
324 | |
325 | // const FloatV x = V4GetX(q); |
326 | // const FloatV y = V4GetY(q); |
327 | // const FloatV z = V4GetZ(q); |
328 | // const Vec4V _q = V4Mul(q, two); |
329 | // |
330 | ////const FloatV w = V4GetW(q); |
331 | |
332 | // const Vec4V t0 = V4Mul(_q, x); // 2xx, 2xy, 2xz, 2xw |
333 | // const Vec4V t1 = V4Mul(_q, y); // 2xy, 2yy, 2yz, 2yw |
334 | // const Vec4V t2 = V4Mul(_q, z); // 2xz, 2yz, 2zz, 2zw |
335 | ////const Vec4V t3 = V4Mul(_q, w); // 2xw, 2yw, 2zw, 2ww |
336 | |
337 | // const FloatV xx2 = V4GetX(t0); |
338 | // const FloatV xy2 = V4GetY(t0); |
339 | // const FloatV xz2 = V4GetZ(t0); |
340 | // const FloatV xw2 = V4GetW(t0); |
341 | |
342 | // const FloatV yy2 = V4GetY(t1); |
343 | // const FloatV yz2 = V4GetZ(t1); |
344 | // const FloatV yw2 = V4GetW(t1); |
345 | |
346 | // const FloatV zz2 = V4GetZ(t2); |
347 | // const FloatV zw2 = V4GetW(t2); |
348 | |
349 | ////const FloatV ww2 = V4GetW(t3); |
350 | |
351 | // const FloatV c00 = FSub(one, FAdd(yy2, zz2)); |
352 | // const FloatV c01 = FSub(xy2, zw2); |
353 | // const FloatV c02 = FAdd(xz2, yw2); |
354 | |
355 | // const FloatV c10 = FAdd(xy2, zw2); |
356 | // const FloatV c11 = FSub(one, FAdd(xx2, zz2)); |
357 | // const FloatV c12 = FSub(yz2, xw2); |
358 | |
359 | // const FloatV c20 = FSub(xz2, yw2); |
360 | // const FloatV c21 = FAdd(yz2, xw2); |
361 | // const FloatV c22 = FSub(one, FAdd(xx2, yy2)); |
362 | |
363 | // const Vec3V c0 = V3Merge(c00, c10, c20); |
364 | // const Vec3V c1 = V3Merge(c01, c11, c21); |
365 | // const Vec3V c2 = V3Merge(c02, c12, c22); |
366 | |
367 | // return Mat33V(c0, c1, c2); |
368 | |
369 | const FloatV one = FOne(); |
370 | const FloatV x = V4GetX(f: q); |
371 | const FloatV y = V4GetY(f: q); |
372 | const FloatV z = V4GetZ(f: q); |
373 | const FloatV w = V4GetW(f: q); |
374 | |
375 | const FloatV x2 = FAdd(a: x, b: x); |
376 | const FloatV y2 = FAdd(a: y, b: y); |
377 | const FloatV z2 = FAdd(a: z, b: z); |
378 | |
379 | const FloatV xx = FMul(a: x2, b: x); |
380 | const FloatV yy = FMul(a: y2, b: y); |
381 | const FloatV zz = FMul(a: z2, b: z); |
382 | |
383 | const FloatV xy = FMul(a: x2, b: y); |
384 | const FloatV xz = FMul(a: x2, b: z); |
385 | const FloatV xw = FMul(a: x2, b: w); |
386 | |
387 | const FloatV yz = FMul(a: y2, b: z); |
388 | const FloatV yw = FMul(a: y2, b: w); |
389 | const FloatV zw = FMul(a: z2, b: w); |
390 | |
391 | const FloatV v = FSub(a: one, b: xx); |
392 | |
393 | const Vec3V column0 = V3Merge(x: FSub(a: FSub(a: one, b: yy), b: zz), y: FAdd(a: xy, b: zw), z: FSub(a: xz, b: yw)); |
394 | const Vec3V column1 = V3Merge(x: FSub(a: xy, b: zw), y: FSub(a: v, b: zz), z: FAdd(a: yz, b: xw)); |
395 | const Vec3V column2 = V3Merge(x: FAdd(a: xz, b: yw), y: FSub(a: yz, b: xw), z: FSub(a: v, b: yy)); |
396 | return Mat33V(column0, column1, column2); |
397 | } |
398 | |
399 | PX_FORCE_INLINE QuatV Mat33GetQuatV(const Mat33V& a) |
400 | { |
401 | const FloatV one = FOne(); |
402 | const FloatV zero = FZero(); |
403 | const FloatV half = FLoad(f: 0.5f); |
404 | const FloatV two = FLoad(f: 2.f); |
405 | const FloatV scale = FLoad(f: 0.25f); |
406 | const FloatV a00 = V3GetX(f: a.col0); |
407 | const FloatV a11 = V3GetY(f: a.col1); |
408 | const FloatV a22 = V3GetZ(f: a.col2); |
409 | |
410 | const FloatV a21 = V3GetZ(f: a.col1); // row=2, col=1; |
411 | const FloatV a12 = V3GetY(f: a.col2); // row=1, col=2; |
412 | const FloatV a02 = V3GetX(f: a.col2); // row=0, col=2; |
413 | const FloatV a20 = V3GetZ(f: a.col0); // row=2, col=0; |
414 | const FloatV a10 = V3GetY(f: a.col0); // row=1, col=0; |
415 | const FloatV a01 = V3GetX(f: a.col1); // row=0, col=1; |
416 | |
417 | const Vec3V vec0 = V3Merge(x: a21, y: a02, z: a10); |
418 | const Vec3V vec1 = V3Merge(x: a12, y: a20, z: a01); |
419 | const Vec3V v = V3Sub(a: vec0, b: vec1); |
420 | const Vec3V g = V3Add(a: vec0, b: vec1); |
421 | |
422 | const FloatV trace = FAdd(a: a00, b: FAdd(a: a11, b: a22)); |
423 | |
424 | if(FAllGrtrOrEq(a: trace, b: zero)) |
425 | { |
426 | const FloatV h = FSqrt(a: FAdd(a: trace, b: one)); |
427 | const FloatV w = FMul(a: half, b: h); |
428 | const FloatV s = FMul(a: half, b: FRecip(a: h)); |
429 | const Vec3V u = V3Scale(a: v, b: s); |
430 | return V4SetW(v: Vec4V_From_Vec3V(f: u), f: w); |
431 | } |
432 | else |
433 | { |
434 | const FloatV ntrace = FNeg(f: trace); |
435 | const Vec3V d = V3Merge(x: a00, y: a11, z: a22); |
436 | const BoolV con0 = BAllTrue3(a: V3IsGrtrOrEq(a: V3Splat(f: a00), b: d)); |
437 | const BoolV con1 = BAllTrue3(a: V3IsGrtrOrEq(a: V3Splat(f: a11), b: d)); |
438 | |
439 | const FloatV t0 = FAdd(a: one, b: FScaleAdd(a: a00, b: two, c: ntrace)); |
440 | const FloatV t1 = FAdd(a: one, b: FScaleAdd(a: a11, b: two, c: ntrace)); |
441 | const FloatV t2 = FAdd(a: one, b: FScaleAdd(a: a22, b: two, c: ntrace)); |
442 | |
443 | const FloatV t = FSel(c: con0, a: t0, b: FSel(c: con1, a: t1, b: t2)); |
444 | |
445 | const FloatV h = FMul(a: two, b: FSqrt(a: t)); |
446 | const FloatV s = FRecip(a: h); |
447 | const FloatV g0 = FMul(a: scale, b: h); |
448 | const Vec3V vs = V3Scale(a: v, b: s); |
449 | const Vec3V gs = V3Scale(a: g, b: s); |
450 | const FloatV gsx = V3GetX(f: gs); |
451 | const FloatV gsy = V3GetY(f: gs); |
452 | const FloatV gsz = V3GetZ(f: gs); |
453 | // vs.x= (a21 - a12)*s; vs.y=(a02 - a20)*s; vs.z=(a10 - a01)*s; |
454 | // gs.x= (a21 + a12)*s; gs.y=(a02 + a20)*s; gs.z=(a10 + a01)*s; |
455 | const Vec4V v0 = V4Merge(x: g0, y: gsz, z: gsy, w: V3GetX(f: vs)); |
456 | const Vec4V v1 = V4Merge(x: gsz, y: g0, z: gsx, w: V3GetY(f: vs)); |
457 | const Vec4V v2 = V4Merge(x: gsy, y: gsx, z: g0, w: V3GetZ(f: vs)); |
458 | return V4Sel(c: con0, a: v0, b: V4Sel(c: con1, a: v1, b: v2)); |
459 | } |
460 | } |
461 | |
462 | } // namespace aos |
463 | } // namespace shdfnd |
464 | } // namespace physx |
465 | |
466 | #endif |
467 | |