1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../sys/alloc.h" |
7 | #include "math.h" |
8 | #include "../simd/sse.h" |
9 | |
10 | namespace embree |
11 | { |
12 | //////////////////////////////////////////////////////////////////////////////// |
13 | /// SSE Vec3fa Type |
14 | //////////////////////////////////////////////////////////////////////////////// |
15 | |
16 | struct __aligned(16) Vec3fa |
17 | { |
18 | ALIGNED_STRUCT_(16); |
19 | |
20 | typedef float Scalar; |
21 | enum { N = 3 }; |
22 | union { |
23 | __m128 m128; |
24 | struct { float x,y,z; }; |
25 | }; |
26 | |
27 | //////////////////////////////////////////////////////////////////////////////// |
28 | /// Constructors, Assignment & Cast Operators |
29 | //////////////////////////////////////////////////////////////////////////////// |
30 | |
31 | __forceinline Vec3fa( ) {} |
32 | __forceinline Vec3fa( const __m128 a ) : m128(a) {} |
33 | |
34 | __forceinline Vec3fa ( const Vec3<float>& other ) { m128 = _mm_set_ps(z: 0, y: other.z, x: other.y, w: other.x); } |
35 | //__forceinline Vec3fa& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; } |
36 | |
37 | __forceinline Vec3fa ( const Vec3fa& other ) { m128 = other.m128; } |
38 | __forceinline Vec3fa& operator =( const Vec3fa& other ) { m128 = other.m128; return *this; } |
39 | |
40 | __forceinline explicit Vec3fa( const float a ) : m128(_mm_set1_ps(w: a)) {} |
41 | __forceinline Vec3fa( const float x, const float y, const float z) : m128(_mm_set_ps(z: 0, y: z, x: y, w: x)) {} |
42 | |
43 | __forceinline explicit Vec3fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a: a)) {} |
44 | |
45 | __forceinline explicit operator const vfloat4() const { return vfloat4(m128); } |
46 | __forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(a: m128)); } |
47 | __forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); } |
48 | __forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(a: m128)); } |
49 | |
50 | //__forceinline operator const __m128&() const { return m128; } |
51 | //__forceinline operator __m128&() { return m128; } |
52 | |
53 | //////////////////////////////////////////////////////////////////////////////// |
54 | /// Loads and Stores |
55 | //////////////////////////////////////////////////////////////////////////////// |
56 | |
57 | static __forceinline Vec3fa load( const void* const a ) { |
58 | return Vec3fa(_mm_and_ps(a: _mm_load_ps(p: (float*)a),b: _mm_castsi128_ps(a: _mm_set_epi32(i3: 0, i2: -1, i1: -1, i0: -1)))); |
59 | } |
60 | |
61 | static __forceinline Vec3fa loadu( const void* const a ) { |
62 | return Vec3fa(_mm_loadu_ps(p: (float*)a)); |
63 | } |
64 | |
65 | static __forceinline void storeu ( void* ptr, const Vec3fa& v ) { |
66 | _mm_storeu_ps(p: (float*)ptr,a: v.m128); |
67 | } |
68 | |
69 | //////////////////////////////////////////////////////////////////////////////// |
70 | /// Constants |
71 | //////////////////////////////////////////////////////////////////////////////// |
72 | |
73 | __forceinline Vec3fa( ZeroTy ) : m128(_mm_setzero_ps()) {} |
74 | __forceinline Vec3fa( OneTy ) : m128(_mm_set1_ps(w: 1.0f)) {} |
75 | __forceinline Vec3fa( PosInfTy ) : m128(_mm_set1_ps(w: pos_inf)) {} |
76 | __forceinline Vec3fa( NegInfTy ) : m128(_mm_set1_ps(w: neg_inf)) {} |
77 | |
78 | //////////////////////////////////////////////////////////////////////////////// |
79 | /// Array Access |
80 | //////////////////////////////////////////////////////////////////////////////// |
81 | |
82 | __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } |
83 | __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } |
84 | }; |
85 | |
86 | //////////////////////////////////////////////////////////////////////////////// |
87 | /// Unary Operators |
88 | //////////////////////////////////////////////////////////////////////////////// |
89 | |
90 | __forceinline Vec3fa operator +( const Vec3fa& a ) { return a; } |
91 | __forceinline Vec3fa operator -( const Vec3fa& a ) { |
92 | const __m128 mask = _mm_castsi128_ps(a: _mm_set1_epi32(i: 0x80000000)); |
93 | return _mm_xor_ps(a: a.m128, b: mask); |
94 | } |
95 | __forceinline Vec3fa abs ( const Vec3fa& a ) { |
96 | const __m128 mask = _mm_castsi128_ps(a: _mm_set1_epi32(i: 0x7fffffff)); |
97 | return _mm_and_ps(a: a.m128, b: mask); |
98 | } |
99 | __forceinline Vec3fa sign ( const Vec3fa& a ) { |
100 | return blendv_ps(f: Vec3fa(one).m128, t: (-Vec3fa(one)).m128, mask: _mm_cmplt_ps (a: a.m128,b: Vec3fa(zero).m128)); |
101 | } |
102 | |
103 | __forceinline Vec3fa rcp ( const Vec3fa& a ) |
104 | { |
105 | #if defined(__AVX512VL__) |
106 | const Vec3fa r = _mm_rcp14_ps(a.m128); |
107 | #else |
108 | const Vec3fa r = _mm_rcp_ps(a: a.m128); |
109 | #endif |
110 | |
111 | #if defined(__AVX2__) |
112 | const Vec3fa h_n = _mm_fnmadd_ps(a.m128, r.m128, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0) |
113 | const Vec3fa res = _mm_fmadd_ps(r.m128, h_n.m128, r.m128); // Then compute r + r * h_n |
114 | #else |
115 | const Vec3fa h_n = _mm_sub_ps(a: vfloat4(1.0f), b: _mm_mul_ps(a: a.m128, b: r.m128)); // First, compute 1 - a * r (which will be very close to 0) |
116 | const Vec3fa res = _mm_add_ps(a: r.m128,b: _mm_mul_ps(a: r.m128, b: h_n.m128)); // Then compute r + r * h_n |
117 | #endif |
118 | |
119 | return res; |
120 | } |
121 | |
122 | __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return _mm_sqrt_ps(a: a.m128); } |
123 | __forceinline Vec3fa sqr ( const Vec3fa& a ) { return _mm_mul_ps(a: a.m128,b: a.m128); } |
124 | |
125 | __forceinline Vec3fa rsqrt( const Vec3fa& a ) |
126 | { |
127 | #if defined(__AVX512VL__) |
128 | __m128 r = _mm_rsqrt14_ps(a.m128); |
129 | #else |
130 | __m128 r = _mm_rsqrt_ps(a: a.m128); |
131 | #endif |
132 | return _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: 1.5f),b: r), b: _mm_mul_ps(a: _mm_mul_ps(a: _mm_mul_ps(a: a.m128, b: _mm_set1_ps(w: -0.5f)), b: r), b: _mm_mul_ps(a: r, b: r))); |
133 | } |
134 | |
135 | __forceinline Vec3fa zero_fix(const Vec3fa& a) { |
136 | return blendv_ps(f: a.m128, t: _mm_set1_ps(w: min_rcp_input), mask: _mm_cmplt_ps (a: abs(a).m128, b: _mm_set1_ps(w: min_rcp_input))); |
137 | } |
138 | __forceinline Vec3fa rcp_safe(const Vec3fa& a) { |
139 | return rcp(a: zero_fix(a)); |
140 | } |
141 | __forceinline Vec3fa log ( const Vec3fa& a ) { |
142 | return Vec3fa(logf(x: a.x),logf(x: a.y),logf(x: a.z)); |
143 | } |
144 | |
145 | __forceinline Vec3fa exp ( const Vec3fa& a ) { |
146 | return Vec3fa(expf(x: a.x),expf(x: a.y),expf(x: a.z)); |
147 | } |
148 | |
149 | //////////////////////////////////////////////////////////////////////////////// |
150 | /// Binary Operators |
151 | //////////////////////////////////////////////////////////////////////////////// |
152 | |
153 | __forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return _mm_add_ps(a: a.m128, b: b.m128); } |
154 | __forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return _mm_sub_ps(a: a.m128, b: b.m128); } |
155 | __forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return _mm_mul_ps(a: a.m128, b: b.m128); } |
156 | __forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); } |
157 | __forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; } |
158 | __forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return _mm_div_ps(a: a.m128,b: b.m128); } |
159 | __forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return _mm_div_ps(a: a.m128,b: _mm_set1_ps(w: b)); } |
160 | __forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return _mm_div_ps(a: _mm_set1_ps(w: a),b: b.m128); } |
161 | |
162 | __forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { return _mm_min_ps(a: a.m128,b: b.m128); } |
163 | __forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { return _mm_max_ps(a: a.m128,b: b.m128); } |
164 | |
165 | #if defined(__SSE4_1__) |
166 | __forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) { |
167 | const vint4 ai = _mm_castps_si128(a.m128); |
168 | const vint4 bi = _mm_castps_si128(b.m128); |
169 | const vint4 ci = _mm_min_epi32(ai,bi); |
170 | return _mm_castsi128_ps(ci); |
171 | } |
172 | #endif |
173 | |
174 | #if defined(__SSE4_1__) |
175 | __forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) { |
176 | const vint4 ai = _mm_castps_si128(a.m128); |
177 | const vint4 bi = _mm_castps_si128(b.m128); |
178 | const vint4 ci = _mm_max_epi32(ai,bi); |
179 | return _mm_castsi128_ps(ci); |
180 | } |
181 | #endif |
182 | |
183 | __forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) { |
184 | return Vec3fa(powf(x: a.x,y: b),powf(x: a.y,y: b),powf(x: a.z,y: b)); |
185 | } |
186 | |
187 | //////////////////////////////////////////////////////////////////////////////// |
188 | /// Ternary Operators |
189 | //////////////////////////////////////////////////////////////////////////////// |
190 | |
191 | #if defined(__AVX2__) |
192 | __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); } |
193 | __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); } |
194 | __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); } |
195 | __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); } |
196 | #else |
197 | __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b+c; } |
198 | __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; } |
199 | __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b+c;} |
200 | __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b-c; } |
201 | #endif |
202 | |
203 | __forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(a: Vec3fa(a),b,c); } |
204 | __forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(a: Vec3fa(a),b,c); } |
205 | __forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(a: Vec3fa(a),b,c); } |
206 | __forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(a: Vec3fa(a),b,c); } |
207 | |
208 | //////////////////////////////////////////////////////////////////////////////// |
209 | /// Assignment Operators |
210 | //////////////////////////////////////////////////////////////////////////////// |
211 | |
212 | __forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; } |
213 | __forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; } |
214 | __forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; } |
215 | __forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; } |
216 | __forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; } |
217 | __forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; } |
218 | |
219 | //////////////////////////////////////////////////////////////////////////////// |
220 | /// Reductions |
221 | //////////////////////////////////////////////////////////////////////////////// |
222 | |
223 | __forceinline float reduce_add(const Vec3fa& v) { |
224 | const vfloat4 a(v.m128); |
225 | const vfloat4 b = shuffle<1>(v: a); |
226 | const vfloat4 c = shuffle<2>(v: a); |
227 | return _mm_cvtss_f32(a: a+b+c); |
228 | } |
229 | |
230 | __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; } |
231 | __forceinline float reduce_min(const Vec3fa& v) { return min(a: v.x,b: v.y,c: v.z); } |
232 | __forceinline float reduce_max(const Vec3fa& v) { return max(a: v.x,b: v.y,c: v.z); } |
233 | |
234 | //////////////////////////////////////////////////////////////////////////////// |
235 | /// Comparison Operators |
236 | //////////////////////////////////////////////////////////////////////////////// |
237 | |
238 | __forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(a: _mm_cmpeq_ps (a: a.m128, b: b.m128)) & 7) == 7; } |
239 | __forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(a: _mm_cmpneq_ps(a: a.m128, b: b.m128)) & 7) != 0; } |
240 | |
241 | __forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpeq_ps (a: a.m128, b: b.m128); } |
242 | __forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpneq_ps(a: a.m128, b: b.m128); } |
243 | __forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmplt_ps (a: a.m128, b: b.m128); } |
244 | __forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmple_ps (a: a.m128, b: b.m128); } |
245 | __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpnle_ps(a: a.m128, b: b.m128); } |
246 | __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpnlt_ps(a: a.m128, b: b.m128); } |
247 | |
248 | __forceinline bool isvalid ( const Vec3fa& v ) { |
249 | return all(b: gt_mask(a: v,b: Vec3fa(-FLT_LARGE)) & lt_mask(a: v,b: Vec3fa(+FLT_LARGE))); |
250 | } |
251 | |
252 | __forceinline bool is_finite ( const Vec3fa& a ) { |
253 | return all(b: ge_mask(a,b: Vec3fa(-FLT_MAX)) & le_mask(a,b: Vec3fa(+FLT_MAX))); |
254 | } |
255 | |
256 | __forceinline bool isvalid4 ( const Vec3fa& v ) { |
257 | return all(b: (vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE))); |
258 | } |
259 | |
260 | __forceinline bool is_finite4 ( const Vec3fa& a ) { |
261 | return all(b: (vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX))); |
262 | } |
263 | |
264 | //////////////////////////////////////////////////////////////////////////////// |
265 | /// Euclidian Space Operators |
266 | //////////////////////////////////////////////////////////////////////////////// |
267 | |
268 | #if defined(__SSE4_1__) |
269 | __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) { |
270 | return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F)); |
271 | } |
272 | #else |
273 | __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) { |
274 | return reduce_add(v: a*b); |
275 | } |
276 | #endif |
277 | |
278 | __forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) |
279 | { |
280 | vfloat4 a0 = vfloat4(a.m128); |
281 | vfloat4 b0 = shuffle<1,2,0,3>(v: vfloat4(b.m128)); |
282 | vfloat4 a1 = shuffle<1,2,0,3>(v: vfloat4(a.m128)); |
283 | vfloat4 b1 = vfloat4(b.m128); |
284 | return Vec3fa(shuffle<1,2,0,3>(v: msub(a: a0,b: b0,c: a1*b1))); |
285 | } |
286 | |
287 | __forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,b: a); } |
288 | __forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(x: dot(a,b: a)); } |
289 | __forceinline float rcp_length2( const Vec3fa& a ) { return rcp(x: dot(a,b: a)); } |
290 | __forceinline float length ( const Vec3fa& a ) { return sqrt(x: dot(a,b: a)); } |
291 | __forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(x: dot(a,b: a)); } |
292 | __forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a: a-b); } |
293 | __forceinline float halfArea ( const Vec3fa& d ) { return madd(a: d.x,b: (d.y+d.z),c: d.y*d.z); } |
294 | __forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); } |
295 | |
296 | __forceinline Vec3fa normalize_safe( const Vec3fa& a ) { |
297 | const float d = dot(a,b: a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(x: d); |
298 | } |
299 | |
300 | /*! differentiated normalization */ |
301 | __forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp) |
302 | { |
303 | const float pp = dot(a: p,b: p); |
304 | const float pdp = dot(a: p,b: dp); |
305 | return (pp*dp-pdp*p)*rcp(x: pp)*rsqrt(x: pp); |
306 | } |
307 | |
308 | //////////////////////////////////////////////////////////////////////////////// |
309 | /// Select |
310 | //////////////////////////////////////////////////////////////////////////////// |
311 | |
312 | __forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) { |
313 | __m128 mask = s ? _mm_castsi128_ps(a: _mm_cmpeq_epi32(a: _mm_setzero_si128(), b: _mm_setzero_si128())) : _mm_setzero_ps(); |
314 | return blendv_ps(f: f.m128, t: t.m128, mask); |
315 | } |
316 | |
317 | __forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) { |
318 | return blendv_ps(f: f.m128, t: t.m128, mask: s); |
319 | } |
320 | |
321 | __forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) { |
322 | return madd(a: 1.0f-t,b: v0,c: t*v1); |
323 | } |
324 | |
325 | __forceinline int maxDim ( const Vec3fa& a ) |
326 | { |
327 | const Vec3fa b = abs(a); |
328 | if (b.x > b.y) { |
329 | if (b.x > b.z) return 0; else return 2; |
330 | } else { |
331 | if (b.y > b.z) return 1; else return 2; |
332 | } |
333 | } |
334 | |
335 | //////////////////////////////////////////////////////////////////////////////// |
336 | /// Rounding Functions |
337 | //////////////////////////////////////////////////////////////////////////////// |
338 | |
339 | #if defined (__SSE4_1__) |
340 | __forceinline Vec3fa trunc( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); } |
341 | __forceinline Vec3fa floor( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); } |
342 | __forceinline Vec3fa ceil ( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); } |
343 | #else |
344 | __forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(truncf(x: a.x),truncf(x: a.y),truncf(x: a.z)); } |
345 | __forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(floorf(x: a.x),floorf(x: a.y),floorf(x: a.z)); } |
346 | __forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(ceilf (x: a.x),ceilf (x: a.y),ceilf (x: a.z)); } |
347 | #endif |
348 | |
349 | //////////////////////////////////////////////////////////////////////////////// |
350 | /// Output Operators |
351 | //////////////////////////////////////////////////////////////////////////////// |
352 | |
353 | __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) { |
354 | return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")" ; |
355 | } |
356 | |
357 | typedef Vec3fa Vec3fa_t; |
358 | |
359 | |
360 | //////////////////////////////////////////////////////////////////////////////// |
361 | /// SSE Vec3fx Type |
362 | //////////////////////////////////////////////////////////////////////////////// |
363 | |
364 | struct __aligned(16) Vec3fx |
365 | { |
366 | ALIGNED_STRUCT_(16); |
367 | |
368 | typedef float Scalar; |
369 | enum { N = 3 }; |
370 | union { |
371 | __m128 m128; |
372 | struct { float x,y,z; union { int a; unsigned u; float w; }; }; |
373 | }; |
374 | |
375 | //////////////////////////////////////////////////////////////////////////////// |
376 | /// Constructors, Assignment & Cast Operators |
377 | //////////////////////////////////////////////////////////////////////////////// |
378 | |
379 | __forceinline Vec3fx( ) {} |
380 | __forceinline Vec3fx( const __m128 a ) : m128(a) {} |
381 | |
382 | __forceinline explicit Vec3fx(const Vec3fa& v) : m128(v.m128) {} |
383 | __forceinline operator Vec3fa () const { return Vec3fa(m128); } |
384 | |
385 | __forceinline explicit Vec3fx ( const Vec3<float>& other ) { m128 = _mm_set_ps(z: 0, y: other.z, x: other.y, w: other.x); } |
386 | //__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; } |
387 | |
388 | __forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; } |
389 | |
390 | __forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; } |
391 | |
392 | __forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(w: a)) {} |
393 | __forceinline Vec3fx( const float x, const float y, const float z) : m128(_mm_set_ps(z: 0, y: z, x: y, w: x)) {} |
394 | |
395 | __forceinline Vec3fx( const Vec3fa& other, const int a1) { m128 = other.m128; a = a1; } |
396 | __forceinline Vec3fx( const Vec3fa& other, const unsigned a1) { m128 = other.m128; u = a1; } |
397 | __forceinline Vec3fx( const Vec3fa& other, const float w1) { |
398 | #if defined (__SSE4_1__) |
399 | m128 = _mm_insert_ps(other.m128, _mm_set_ss(w1),3 << 4); |
400 | #else |
401 | const vint4 mask(-1,-1,-1,0); |
402 | m128 = select(m: vboolf4(_mm_castsi128_ps(a: mask)),t: vfloat4(other.m128),f: vfloat4(w1)); |
403 | #endif |
404 | } |
405 | //__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly! |
406 | //__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly! |
407 | __forceinline Vec3fx( const float x, const float y, const float z, const float w) : m128(_mm_set_ps(z: w, y: z, x: y, w: x)) {} |
408 | |
409 | //__forceinline explicit Vec3fx( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {} |
410 | |
411 | __forceinline explicit operator const vfloat4() const { return vfloat4(m128); } |
412 | __forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(a: m128)); } |
413 | __forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); } |
414 | __forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(a: m128)); } |
415 | |
416 | //__forceinline operator const __m128&() const { return m128; } |
417 | //__forceinline operator __m128&() { return m128; } |
418 | |
419 | //////////////////////////////////////////////////////////////////////////////// |
420 | /// Loads and Stores |
421 | //////////////////////////////////////////////////////////////////////////////// |
422 | |
423 | static __forceinline Vec3fx load( const void* const a ) { |
424 | return Vec3fx(_mm_and_ps(a: _mm_load_ps(p: (float*)a),b: _mm_castsi128_ps(a: _mm_set_epi32(i3: 0, i2: -1, i1: -1, i0: -1)))); |
425 | } |
426 | |
427 | static __forceinline Vec3fx loadu( const void* const a ) { |
428 | return Vec3fx(_mm_loadu_ps(p: (float*)a)); |
429 | } |
430 | |
431 | static __forceinline void storeu ( void* ptr, const Vec3fx& v ) { |
432 | _mm_storeu_ps(p: (float*)ptr,a: v.m128); |
433 | } |
434 | |
435 | //////////////////////////////////////////////////////////////////////////////// |
436 | /// Constants |
437 | //////////////////////////////////////////////////////////////////////////////// |
438 | |
439 | __forceinline Vec3fx( ZeroTy ) : m128(_mm_setzero_ps()) {} |
440 | __forceinline Vec3fx( OneTy ) : m128(_mm_set1_ps(w: 1.0f)) {} |
441 | __forceinline Vec3fx( PosInfTy ) : m128(_mm_set1_ps(w: pos_inf)) {} |
442 | __forceinline Vec3fx( NegInfTy ) : m128(_mm_set1_ps(w: neg_inf)) {} |
443 | |
444 | //////////////////////////////////////////////////////////////////////////////// |
445 | /// Array Access |
446 | //////////////////////////////////////////////////////////////////////////////// |
447 | |
448 | __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } |
449 | __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } |
450 | }; |
451 | |
452 | //////////////////////////////////////////////////////////////////////////////// |
453 | /// Unary Operators |
454 | //////////////////////////////////////////////////////////////////////////////// |
455 | |
456 | __forceinline Vec3fx operator +( const Vec3fx& a ) { return a; } |
457 | __forceinline Vec3fx operator -( const Vec3fx& a ) { |
458 | const __m128 mask = _mm_castsi128_ps(a: _mm_set1_epi32(i: 0x80000000)); |
459 | return _mm_xor_ps(a: a.m128, b: mask); |
460 | } |
461 | __forceinline Vec3fx abs ( const Vec3fx& a ) { |
462 | const __m128 mask = _mm_castsi128_ps(a: _mm_set1_epi32(i: 0x7fffffff)); |
463 | return _mm_and_ps(a: a.m128, b: mask); |
464 | } |
465 | __forceinline Vec3fx sign ( const Vec3fx& a ) { |
466 | return blendv_ps(f: Vec3fx(one).m128, t: (-Vec3fx(one)).m128, mask: _mm_cmplt_ps (a: a.m128,b: Vec3fx(zero).m128)); |
467 | } |
468 | |
469 | __forceinline Vec3fx rcp ( const Vec3fx& a ) |
470 | { |
471 | #if defined(__AVX512VL__) |
472 | const Vec3fx r = _mm_rcp14_ps(a.m128); |
473 | #else |
474 | const Vec3fx r = _mm_rcp_ps(a: a.m128); |
475 | #endif |
476 | |
477 | #if defined(__AVX2__) |
478 | const Vec3fx res = _mm_mul_ps(r.m128,_mm_fnmadd_ps(r.m128, a.m128, vfloat4(2.0f))); |
479 | #else |
480 | const Vec3fx res = _mm_mul_ps(a: r.m128,b: _mm_sub_ps(a: vfloat4(2.0f), b: _mm_mul_ps(a: r.m128, b: a.m128))); |
481 | //return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); |
482 | #endif |
483 | |
484 | return res; |
485 | } |
486 | |
487 | __forceinline Vec3fx sqrt ( const Vec3fx& a ) { return _mm_sqrt_ps(a: a.m128); } |
488 | __forceinline Vec3fx sqr ( const Vec3fx& a ) { return _mm_mul_ps(a: a.m128,b: a.m128); } |
489 | |
490 | __forceinline Vec3fx rsqrt( const Vec3fx& a ) |
491 | { |
492 | #if defined(__AVX512VL__) |
493 | __m128 r = _mm_rsqrt14_ps(a.m128); |
494 | #else |
495 | __m128 r = _mm_rsqrt_ps(a: a.m128); |
496 | #endif |
497 | return _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: 1.5f),b: r), b: _mm_mul_ps(a: _mm_mul_ps(a: _mm_mul_ps(a: a.m128, b: _mm_set1_ps(w: -0.5f)), b: r), b: _mm_mul_ps(a: r, b: r))); |
498 | } |
499 | |
500 | __forceinline Vec3fx zero_fix(const Vec3fx& a) { |
501 | return blendv_ps(f: a.m128, t: _mm_set1_ps(w: min_rcp_input), mask: _mm_cmplt_ps (a: abs(a).m128, b: _mm_set1_ps(w: min_rcp_input))); |
502 | } |
503 | __forceinline Vec3fx rcp_safe(const Vec3fx& a) { |
504 | return rcp(a: zero_fix(a)); |
505 | } |
506 | __forceinline Vec3fx log ( const Vec3fx& a ) { |
507 | return Vec3fx(logf(x: a.x),logf(x: a.y),logf(x: a.z)); |
508 | } |
509 | |
510 | __forceinline Vec3fx exp ( const Vec3fx& a ) { |
511 | return Vec3fx(expf(x: a.x),expf(x: a.y),expf(x: a.z)); |
512 | } |
513 | |
514 | //////////////////////////////////////////////////////////////////////////////// |
515 | /// Binary Operators |
516 | //////////////////////////////////////////////////////////////////////////////// |
517 | |
518 | __forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return _mm_add_ps(a: a.m128, b: b.m128); } |
519 | __forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return _mm_sub_ps(a: a.m128, b: b.m128); } |
520 | __forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return _mm_mul_ps(a: a.m128, b: b.m128); } |
521 | __forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); } |
522 | __forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; } |
523 | __forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return _mm_div_ps(a: a.m128,b: b.m128); } |
524 | __forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return _mm_div_ps(a: a.m128,b: _mm_set1_ps(w: b)); } |
525 | __forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return _mm_div_ps(a: _mm_set1_ps(w: a),b: b.m128); } |
526 | |
527 | __forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { return _mm_min_ps(a: a.m128,b: b.m128); } |
528 | __forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { return _mm_max_ps(a: a.m128,b: b.m128); } |
529 | |
530 | #if defined(__SSE4_1__) |
531 | __forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) { |
532 | const vint4 ai = _mm_castps_si128(a.m128); |
533 | const vint4 bi = _mm_castps_si128(b.m128); |
534 | const vint4 ci = _mm_min_epi32(ai,bi); |
535 | return _mm_castsi128_ps(ci); |
536 | } |
537 | #endif |
538 | |
539 | #if defined(__SSE4_1__) |
540 | __forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) { |
541 | const vint4 ai = _mm_castps_si128(a.m128); |
542 | const vint4 bi = _mm_castps_si128(b.m128); |
543 | const vint4 ci = _mm_max_epi32(ai,bi); |
544 | return _mm_castsi128_ps(ci); |
545 | } |
546 | #endif |
547 | |
548 | __forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) { |
549 | return Vec3fx(powf(x: a.x,y: b),powf(x: a.y,y: b),powf(x: a.z,y: b)); |
550 | } |
551 | |
552 | //////////////////////////////////////////////////////////////////////////////// |
553 | /// Ternary Operators |
554 | //////////////////////////////////////////////////////////////////////////////// |
555 | |
556 | #if defined(__AVX2__) |
557 | __forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); } |
558 | __forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); } |
559 | __forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); } |
560 | __forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); } |
561 | #else |
562 | __forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b+c; } |
563 | __forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b-c; } |
564 | __forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b+c;} |
565 | __forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b-c; } |
566 | #endif |
567 | |
568 | __forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(a: Vec3fx(a),b,c); } |
569 | __forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(a: Vec3fx(a),b,c); } |
570 | __forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(a: Vec3fx(a),b,c); } |
571 | __forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(a: Vec3fx(a),b,c); } |
572 | |
573 | //////////////////////////////////////////////////////////////////////////////// |
574 | /// Assignment Operators |
575 | //////////////////////////////////////////////////////////////////////////////// |
576 | |
577 | __forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; } |
578 | __forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; } |
579 | __forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; } |
580 | __forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; } |
581 | __forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; } |
582 | __forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; } |
583 | |
584 | //////////////////////////////////////////////////////////////////////////////// |
585 | /// Reductions |
586 | //////////////////////////////////////////////////////////////////////////////// |
587 | |
588 | __forceinline float reduce_add(const Vec3fx& v) { |
589 | const vfloat4 a(v.m128); |
590 | const vfloat4 b = shuffle<1>(v: a); |
591 | const vfloat4 c = shuffle<2>(v: a); |
592 | return _mm_cvtss_f32(a: a+b+c); |
593 | } |
594 | |
595 | __forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; } |
596 | __forceinline float reduce_min(const Vec3fx& v) { return min(a: v.x,b: v.y,c: v.z); } |
597 | __forceinline float reduce_max(const Vec3fx& v) { return max(a: v.x,b: v.y,c: v.z); } |
598 | |
599 | //////////////////////////////////////////////////////////////////////////////// |
600 | /// Comparison Operators |
601 | //////////////////////////////////////////////////////////////////////////////// |
602 | |
603 | __forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(a: _mm_cmpeq_ps (a: a.m128, b: b.m128)) & 7) == 7; } |
604 | __forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(a: _mm_cmpneq_ps(a: a.m128, b: b.m128)) & 7) != 0; } |
605 | |
606 | __forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpeq_ps (a: a.m128, b: b.m128); } |
607 | __forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpneq_ps(a: a.m128, b: b.m128); } |
608 | __forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmplt_ps (a: a.m128, b: b.m128); } |
609 | __forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmple_ps (a: a.m128, b: b.m128); } |
610 | __forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnle_ps(a: a.m128, b: b.m128); } |
611 | __forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnlt_ps(a: a.m128, b: b.m128); } |
612 | |
613 | __forceinline bool isvalid ( const Vec3fx& v ) { |
614 | return all(b: gt_mask(a: v,b: Vec3fx(-FLT_LARGE)) & lt_mask(a: v,b: Vec3fx(+FLT_LARGE))); |
615 | } |
616 | |
617 | __forceinline bool is_finite ( const Vec3fx& a ) { |
618 | return all(b: ge_mask(a,b: Vec3fx(-FLT_MAX)) & le_mask(a,b: Vec3fx(+FLT_MAX))); |
619 | } |
620 | |
621 | __forceinline bool isvalid4 ( const Vec3fx& v ) { |
622 | return all(b: (vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE))); |
623 | } |
624 | |
625 | __forceinline bool is_finite4 ( const Vec3fx& a ) { |
626 | return all(b: (vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX))); |
627 | } |
628 | |
629 | //////////////////////////////////////////////////////////////////////////////// |
630 | /// Euclidian Space Operators |
631 | //////////////////////////////////////////////////////////////////////////////// |
632 | |
633 | #if defined(__SSE4_1__) |
634 | __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) { |
635 | return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F)); |
636 | } |
637 | #else |
638 | __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) { |
639 | return reduce_add(v: a*b); |
640 | } |
641 | #endif |
642 | |
643 | __forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) |
644 | { |
645 | vfloat4 a0 = vfloat4(a.m128); |
646 | vfloat4 b0 = shuffle<1,2,0,3>(v: vfloat4(b.m128)); |
647 | vfloat4 a1 = shuffle<1,2,0,3>(v: vfloat4(a.m128)); |
648 | vfloat4 b1 = vfloat4(b.m128); |
649 | return Vec3fx(shuffle<1,2,0,3>(v: msub(a: a0,b: b0,c: a1*b1))); |
650 | } |
651 | |
652 | __forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,b: a); } |
653 | __forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(x: dot(a,b: a)); } |
654 | __forceinline float rcp_length2( const Vec3fx& a ) { return rcp(x: dot(a,b: a)); } |
655 | __forceinline float length ( const Vec3fx& a ) { return sqrt(x: dot(a,b: a)); } |
656 | __forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(x: dot(a,b: a)); } |
657 | __forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a: a-b); } |
658 | __forceinline float halfArea ( const Vec3fx& d ) { return madd(a: d.x,b: (d.y+d.z),c: d.y*d.z); } |
659 | __forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); } |
660 | |
661 | __forceinline Vec3fx normalize_safe( const Vec3fx& a ) { |
662 | const float d = dot(a,b: a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(x: d); |
663 | } |
664 | |
665 | /*! differentiated normalization */ |
666 | __forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp) |
667 | { |
668 | const float pp = dot(a: p,b: p); |
669 | const float pdp = dot(a: p,b: dp); |
670 | return (pp*dp-pdp*p)*rcp(x: pp)*rsqrt(x: pp); |
671 | } |
672 | |
673 | //////////////////////////////////////////////////////////////////////////////// |
674 | /// Select |
675 | //////////////////////////////////////////////////////////////////////////////// |
676 | |
677 | __forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) { |
678 | __m128 mask = s ? _mm_castsi128_ps(a: _mm_cmpeq_epi32(a: _mm_setzero_si128(), b: _mm_setzero_si128())) : _mm_setzero_ps(); |
679 | return blendv_ps(f: f.m128, t: t.m128, mask); |
680 | } |
681 | |
682 | __forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) { |
683 | return blendv_ps(f: f.m128, t: t.m128, mask: s); |
684 | } |
685 | |
686 | __forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) { |
687 | return madd(a: 1.0f-t,b: v0,c: t*v1); |
688 | } |
689 | |
690 | __forceinline int maxDim ( const Vec3fx& a ) |
691 | { |
692 | const Vec3fx b = abs(a); |
693 | if (b.x > b.y) { |
694 | if (b.x > b.z) return 0; else return 2; |
695 | } else { |
696 | if (b.y > b.z) return 1; else return 2; |
697 | } |
698 | } |
699 | |
700 | //////////////////////////////////////////////////////////////////////////////// |
701 | /// Rounding Functions |
702 | //////////////////////////////////////////////////////////////////////////////// |
703 | |
704 | #if defined(__aarch64__) |
705 | __forceinline Vec3fx trunc(const Vec3fx& a) { return vrndq_f32(a.m128); } |
706 | __forceinline Vec3fx floor(const Vec3fx& a) { return vrndmq_f32(a.m128); } |
707 | __forceinline Vec3fx ceil (const Vec3fx& a) { return vrndpq_f32(a.m128); } |
708 | #elif defined (__SSE4_1__) |
709 | __forceinline Vec3fx trunc( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); } |
710 | __forceinline Vec3fx floor( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); } |
711 | __forceinline Vec3fx ceil ( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); } |
712 | #else |
713 | __forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(truncf(x: a.x),truncf(x: a.y),truncf(x: a.z)); } |
714 | __forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(floorf(x: a.x),floorf(x: a.y),floorf(x: a.z)); } |
715 | __forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(ceilf (x: a.x),ceilf (x: a.y),ceilf (x: a.z)); } |
716 | #endif |
717 | |
718 | //////////////////////////////////////////////////////////////////////////////// |
719 | /// Output Operators |
720 | //////////////////////////////////////////////////////////////////////////////// |
721 | |
722 | __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) { |
723 | return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")" ; |
724 | } |
725 | |
726 | |
727 | typedef Vec3fx Vec3ff; |
728 | } |
729 | |