1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "../sys/platform.h"
7#include "../sys/intrinsics.h"
8#include "constants.h"
9#include <cmath>
10
11#if defined(__ARM_NEON)
12#include "../simd/arm/emulation.h"
13#else
14#include <emmintrin.h>
15#include <xmmintrin.h>
16#include <immintrin.h>
17#endif
18
19#if defined(__WIN32__)
20#if defined(_MSC_VER) && (_MSC_VER <= 1700)
21namespace std
22{
23 __forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
24 __forceinline bool isnan ( const float x ) { return _isnan(x) != 0; }
25 __forceinline bool isfinite (const float x) { return _finite(x) != 0; }
26}
27#endif
28#endif
29
30namespace embree
31{
32 __forceinline bool isvalid ( const float& v ) {
33 return (v > -FLT_LARGE) & (v < +FLT_LARGE);
34 }
35
36 __forceinline int cast_f2i(float f) {
37 union { float f; int i; } v; v.f = f; return v.i;
38 }
39
40 __forceinline float cast_i2f(int i) {
41 union { float f; int i; } v; v.i = i; return v.f;
42 }
43
44 __forceinline int toInt (const float& a) { return int(a); }
45 __forceinline float toFloat(const int& a) { return float(a); }
46
47#if defined(__WIN32__)
48 __forceinline bool finite ( const float x ) { return _finite(x) != 0; }
49#endif
50
51 __forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
52 __forceinline float sqr ( const float x ) { return x*x; }
53
54 __forceinline float rcp ( const float x )
55 {
56 const __m128 a = _mm_set_ss(w: x);
57
58#if defined(__AVX512VL__)
59 const __m128 r = _mm_rcp14_ss(_mm_set_ss(0.0f),a);
60#else
61 const __m128 r = _mm_rcp_ss(a: a);
62#endif
63
64#if defined(__AVX2__)
65 return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(2.0f))));
66#else
67 return _mm_cvtss_f32(a: _mm_mul_ss(a: r,b: _mm_sub_ss(a: _mm_set_ss(w: 2.0f), b: _mm_mul_ss(a: r, b: a))));
68#endif
69 }
70
71 __forceinline float signmsk ( const float x ) {
72 return _mm_cvtss_f32(a: _mm_and_ps(a: _mm_set_ss(w: x),b: _mm_castsi128_ps(a: _mm_set1_epi32(i: 0x80000000))));
73 }
74 __forceinline float xorf( const float x, const float y ) {
75 return _mm_cvtss_f32(a: _mm_xor_ps(a: _mm_set_ss(w: x),b: _mm_set_ss(w: y)));
76 }
77 __forceinline float andf( const float x, const unsigned y ) {
78 return _mm_cvtss_f32(a: _mm_and_ps(a: _mm_set_ss(w: x),b: _mm_castsi128_ps(a: _mm_set1_epi32(i: y))));
79 }
80 __forceinline float rsqrt( const float x )
81 {
82 const __m128 a = _mm_set_ss(w: x);
83#if defined(__AVX512VL__)
84 __m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a);
85#else
86 __m128 r = _mm_rsqrt_ss(a: a);
87#endif
88 r = _mm_add_ss(a: _mm_mul_ss(a: _mm_set_ss(w: 1.5f), b: r), b: _mm_mul_ss(a: _mm_mul_ss(a: _mm_mul_ss(a: a, b: _mm_set_ss(w: -0.5f)), b: r), b: _mm_mul_ss(a: r, b: r)));
89#if defined(__ARM_NEON)
90 r = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
91#endif
92 return _mm_cvtss_f32(a: r);
93 }
94
95#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
96 __forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
97 __forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
98 __forceinline int roundf(float f) { return (int)(f + 0.5f); }
99#else
100 __forceinline float nextafter(float x, float y) { return ::nextafterf(x: x, y: y); }
101 __forceinline double nextafter(double x, double y) { return ::nextafter(x: x, y: y); }
102#endif
103
104 __forceinline float abs ( const float x ) { return ::fabsf(x: x); }
105 __forceinline float acos ( const float x ) { return ::acosf (x: x); }
106 __forceinline float asin ( const float x ) { return ::asinf (x: x); }
107 __forceinline float atan ( const float x ) { return ::atanf (x: x); }
108 __forceinline float atan2( const float y, const float x ) { return ::atan2f(y: y, x: x); }
109 __forceinline float cos ( const float x ) { return ::cosf (x: x); }
110 __forceinline float cosh ( const float x ) { return ::coshf (x: x); }
111 __forceinline float exp ( const float x ) { return ::expf (x: x); }
112 __forceinline float fmod ( const float x, const float y ) { return ::fmodf (x: x, y: y); }
113 __forceinline float log ( const float x ) { return ::logf (x: x); }
114 __forceinline float log10( const float x ) { return ::log10f(x: x); }
115 __forceinline float pow ( const float x, const float y ) { return ::powf (x: x, y: y); }
116 __forceinline float sin ( const float x ) { return ::sinf (x: x); }
117 __forceinline float sinh ( const float x ) { return ::sinhf (x: x); }
118 __forceinline float sqrt ( const float x ) { return ::sqrtf (x: x); }
119 __forceinline float tan ( const float x ) { return ::tanf (x: x); }
120 __forceinline float tanh ( const float x ) { return ::tanhf (x: x); }
121 __forceinline float floor( const float x ) { return ::floorf (x: x); }
122 __forceinline float ceil ( const float x ) { return ::ceilf (x: x); }
123 __forceinline float frac ( const float x ) { return x-floor(x); }
124
125 __forceinline double abs ( const double x ) { return ::fabs(x: x); }
126 __forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
127 __forceinline double acos ( const double x ) { return ::acos (x: x); }
128 __forceinline double asin ( const double x ) { return ::asin (x: x); }
129 __forceinline double atan ( const double x ) { return ::atan (x: x); }
130 __forceinline double atan2( const double y, const double x ) { return ::atan2(y: y, x: x); }
131 __forceinline double cos ( const double x ) { return ::cos (x: x); }
132 __forceinline double cosh ( const double x ) { return ::cosh (x: x); }
133 __forceinline double exp ( const double x ) { return ::exp (x: x); }
134 __forceinline double fmod ( const double x, const double y ) { return ::fmod (x: x, y: y); }
135 __forceinline double log ( const double x ) { return ::log (x: x); }
136 __forceinline double log10( const double x ) { return ::log10(x: x); }
137 __forceinline double pow ( const double x, const double y ) { return ::pow (x: x, y: y); }
138 __forceinline double rcp ( const double x ) { return 1.0/x; }
139 __forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x: x); }
140 __forceinline double sin ( const double x ) { return ::sin (x: x); }
141 __forceinline double sinh ( const double x ) { return ::sinh (x: x); }
142 __forceinline double sqr ( const double x ) { return x*x; }
143 __forceinline double sqrt ( const double x ) { return ::sqrt (x: x); }
144 __forceinline double tan ( const double x ) { return ::tan (x: x); }
145 __forceinline double tanh ( const double x ) { return ::tanh (x: x); }
146 __forceinline double floor( const double x ) { return ::floor (x: x); }
147 __forceinline double ceil ( const double x ) { return ::ceil (x: x); }
148
149#if defined(__SSE4_1__)
150 __forceinline float mini(float a, float b) {
151 const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
152 const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
153 const __m128i ci = _mm_min_epi32(ai,bi);
154 return _mm_cvtss_f32(_mm_castsi128_ps(ci));
155 }
156#endif
157
158#if defined(__SSE4_1__)
159 __forceinline float maxi(float a, float b) {
160 const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
161 const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
162 const __m128i ci = _mm_max_epi32(ai,bi);
163 return _mm_cvtss_f32(_mm_castsi128_ps(ci));
164 }
165#endif
166
167 template<typename T>
168 __forceinline T twice(const T& a) { return a+a; }
169
170 __forceinline int min(int a, int b) { return a<b ? a:b; }
171 __forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; }
172 __forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; }
173 __forceinline float min(float a, float b) { return a<b ? a:b; }
174 __forceinline double min(double a, double b) { return a<b ? a:b; }
175#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
176 __forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; }
177#endif
178#if defined(__EMSCRIPTEN__)
179 __forceinline long min(long a, long b) { return a<b ? a:b; }
180#endif
181
182 template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
183 template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
184 template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
185
186 template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
187 template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
188 template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
189
190 __forceinline int max(int a, int b) { return a<b ? b:a; }
191 __forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; }
192 __forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; }
193 __forceinline float max(float a, float b) { return a<b ? b:a; }
194 __forceinline double max(double a, double b) { return a<b ? b:a; }
195#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
196 __forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; }
197#endif
198#if defined(__EMSCRIPTEN__)
199 __forceinline long max(long a, long b) { return a<b ? b:a; }
200#endif
201
202 template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
203 template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
204 template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
205
206 template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
207 template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
208 template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
209
210#if defined(__MACOSX__)
211 __forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
212 __forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
213#endif
214
215#if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
216 __forceinline void sincosf(float x, float *sin, float *cos) {
217 __sincosf(x,sin,cos);
218 }
219#endif
220
221#if defined(__WIN32__) || defined(__FreeBSD__)
222 __forceinline void sincosf(float x, float *s, float *c) {
223 *s = sinf(x); *c = cosf(x);
224 }
225#endif
226
227 template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
228 template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
229
230 template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
231 template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
232 template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
233 template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
234
235#if defined(__AVX2__)
236 __forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
237 __forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
238 __forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
239 __forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
240#else
241 __forceinline float madd ( const float a, const float b, const float c) { return a*b+c; }
242 __forceinline float msub ( const float a, const float b, const float c) { return a*b-c; }
243 __forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;}
244 __forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; }
245#endif
246
247 /*! random functions */
248 template<typename T> T random() { return T(0); }
249#if defined(_WIN32)
250 template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); }
251 template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); }
252#else
253 template<> __forceinline int random() { return int(rand()); }
254 template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
255#endif
256 template<> __forceinline float random() { return rand()/float(RAND_MAX); }
257 template<> __forceinline double random() { return rand()/double(RAND_MAX); }
258
259#if _WIN32
260 __forceinline double drand48() {
261 return double(rand())/double(RAND_MAX);
262 }
263
264 __forceinline void srand48(long seed) {
265 return srand(seed);
266 }
267#endif
268
269 /*! selects */
270 __forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
271 __forceinline int select(bool s, int t, int f) { return s ? t : f; }
272 __forceinline float select(bool s, float t, float f) { return s ? t : f; }
273
274 __forceinline bool all(bool s) { return s; }
275
276 __forceinline float lerp(const float v0, const float v1, const float t) {
277 return madd(a: 1.0f-t,b: v0,c: t*v1);
278 }
279
280 template<typename T>
281 __forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
282 return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
283 }
284
285 /*! exchange */
286 template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
287
288 /* load/store */
289 template<typename Ty> struct mem;
290
291 template<> struct mem<float> {
292 static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
293 static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
294
295 static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
296 static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
297 };
298
299 /*! bit reverse operation */
300 template<class T>
301 __forceinline T bitReverse(const T& vin)
302 {
303 T v = vin;
304 v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
305 v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
306 v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
307 v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
308 v = ( v >> 16 ) | ( v << 16);
309 return v;
310 }
311
312 /*! bit interleave operation */
313 template<class T>
314 __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
315 {
316 T x = xin, y = yin, z = zin;
317 x = (x | (x << 16)) & 0x030000FF;
318 x = (x | (x << 8)) & 0x0300F00F;
319 x = (x | (x << 4)) & 0x030C30C3;
320 x = (x | (x << 2)) & 0x09249249;
321
322 y = (y | (y << 16)) & 0x030000FF;
323 y = (y | (y << 8)) & 0x0300F00F;
324 y = (y | (y << 4)) & 0x030C30C3;
325 y = (y | (y << 2)) & 0x09249249;
326
327 z = (z | (z << 16)) & 0x030000FF;
328 z = (z | (z << 8)) & 0x0300F00F;
329 z = (z | (z << 4)) & 0x030C30C3;
330 z = (z | (z << 2)) & 0x09249249;
331
332 return x | (y << 1) | (z << 2);
333 }
334
335#if defined(__AVX2__)
336
337 template<>
338 __forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
339 {
340 const unsigned int xx = pdep(xi,0x49249249 /* 0b01001001001001001001001001001001 */ );
341 const unsigned int yy = pdep(yi,0x92492492 /* 0b10010010010010010010010010010010 */);
342 const unsigned int zz = pdep(zi,0x24924924 /* 0b00100100100100100100100100100100 */);
343 return xx | yy | zz;
344 }
345
346#endif
347
348 /*! bit interleave operation for 64bit data types*/
349 template<class T>
350 __forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
351 T x = xin & 0x1fffff;
352 T y = yin & 0x1fffff;
353 T z = zin & 0x1fffff;
354
355 x = (x | x << 32) & 0x1f00000000ffff;
356 x = (x | x << 16) & 0x1f0000ff0000ff;
357 x = (x | x << 8) & 0x100f00f00f00f00f;
358 x = (x | x << 4) & 0x10c30c30c30c30c3;
359 x = (x | x << 2) & 0x1249249249249249;
360
361 y = (y | y << 32) & 0x1f00000000ffff;
362 y = (y | y << 16) & 0x1f0000ff0000ff;
363 y = (y | y << 8) & 0x100f00f00f00f00f;
364 y = (y | y << 4) & 0x10c30c30c30c30c3;
365 y = (y | y << 2) & 0x1249249249249249;
366
367 z = (z | z << 32) & 0x1f00000000ffff;
368 z = (z | z << 16) & 0x1f0000ff0000ff;
369 z = (z | z << 8) & 0x100f00f00f00f00f;
370 z = (z | z << 4) & 0x10c30c30c30c30c3;
371 z = (z | z << 2) & 0x1249249249249249;
372
373 return x | (y << 1) | (z << 2);
374 }
375}
376

source code of qtquick3d/src/3rdparty/embree/common/math/math.h