1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../../common/sys/platform.h" |
7 | #include "../../common/sys/sysinfo.h" |
8 | #include "../../common/sys/thread.h" |
9 | #include "../../common/sys/alloc.h" |
10 | #include "../../common/sys/ref.h" |
11 | #include "../../common/sys/intrinsics.h" |
12 | #include "../../common/sys/atomic.h" |
13 | #include "../../common/sys/mutex.h" |
14 | #include "../../common/sys/vector.h" |
15 | #include "../../common/sys/array.h" |
16 | #include "../../common/sys/string.h" |
17 | #include "../../common/sys/regression.h" |
18 | #include "../../common/sys/vector.h" |
19 | |
20 | #include "../../common/math/math.h" |
21 | #include "../../common/math/transcendental.h" |
22 | #include "../../common/simd/simd.h" |
23 | #include "../../common/math/vec2.h" |
24 | #include "../../common/math/vec3.h" |
25 | #include "../../common/math/vec4.h" |
26 | #include "../../common/math/vec2fa.h" |
27 | #include "../../common/math/vec3fa.h" |
28 | #include "../../common/math/interval.h" |
29 | #include "../../common/math/bbox.h" |
30 | #include "../../common/math/obbox.h" |
31 | #include "../../common/math/lbbox.h" |
32 | #include "../../common/math/linearspace2.h" |
33 | #include "../../common/math/linearspace3.h" |
34 | #include "../../common/math/affinespace.h" |
35 | #include "../../common/math/range.h" |
36 | #include "../../common/lexers/tokenstream.h" |
37 | |
38 | #include "../../common/tasking/taskscheduler.h" |
39 | |
40 | #define COMMA , |
41 | |
42 | #include "../config.h" |
43 | #include "isa.h" |
44 | #include "stat.h" |
45 | #include "profile.h" |
46 | #include "rtcore.h" |
47 | #include "vector.h" |
48 | #include "state.h" |
49 | #include "instance_stack.h" |
50 | |
51 | #include <vector> |
52 | #include <map> |
53 | #include <algorithm> |
54 | #include <functional> |
55 | #include <utility> |
56 | #include <sstream> |
57 | |
58 | namespace embree |
59 | { |
60 | //////////////////////////////////////////////////////////////////////////////// |
61 | /// Vec2 shortcuts |
62 | //////////////////////////////////////////////////////////////////////////////// |
63 | |
64 | template<int N> using Vec2vf = Vec2<vfloat<N>>; |
65 | template<int N> using Vec2vd = Vec2<vdouble<N>>; |
66 | template<int N> using Vec2vr = Vec2<vreal<N>>; |
67 | template<int N> using Vec2vi = Vec2<vint<N>>; |
68 | template<int N> using Vec2vl = Vec2<vllong<N>>; |
69 | template<int N> using Vec2vb = Vec2<vbool<N>>; |
70 | template<int N> using Vec2vbf = Vec2<vboolf<N>>; |
71 | template<int N> using Vec2vbd = Vec2<vboold<N>>; |
72 | |
73 | typedef Vec2<vfloat4> Vec2vf4; |
74 | typedef Vec2<vdouble4> Vec2vd4; |
75 | typedef Vec2<vreal4> Vec2vr4; |
76 | typedef Vec2<vint4> Vec2vi4; |
77 | typedef Vec2<vllong4> Vec2vl4; |
78 | typedef Vec2<vbool4> Vec2vb4; |
79 | typedef Vec2<vboolf4> Vec2vbf4; |
80 | typedef Vec2<vboold4> Vec2vbd4; |
81 | |
82 | typedef Vec2<vfloat8> Vec2vf8; |
83 | typedef Vec2<vdouble8> Vec2vd8; |
84 | typedef Vec2<vreal8> Vec2vr8; |
85 | typedef Vec2<vint8> Vec2vi8; |
86 | typedef Vec2<vllong8> Vec2vl8; |
87 | typedef Vec2<vbool8> Vec2vb8; |
88 | typedef Vec2<vboolf8> Vec2vbf8; |
89 | typedef Vec2<vboold8> Vec2vbd8; |
90 | |
91 | typedef Vec2<vfloat16> Vec2vf16; |
92 | typedef Vec2<vdouble16> Vec2vd16; |
93 | typedef Vec2<vreal16> Vec2vr16; |
94 | typedef Vec2<vint16> Vec2vi16; |
95 | typedef Vec2<vllong16> Vec2vl16; |
96 | typedef Vec2<vbool16> Vec2vb16; |
97 | typedef Vec2<vboolf16> Vec2vbf16; |
98 | typedef Vec2<vboold16> Vec2vbd16; |
99 | |
100 | typedef Vec2<vfloatx> Vec2vfx; |
101 | typedef Vec2<vdoublex> Vec2vdx; |
102 | typedef Vec2<vrealx> Vec2vrx; |
103 | typedef Vec2<vintx> Vec2vix; |
104 | typedef Vec2<vllongx> Vec2vlx; |
105 | typedef Vec2<vboolx> Vec2vbx; |
106 | typedef Vec2<vboolfx> Vec2vbfx; |
107 | typedef Vec2<vbooldx> Vec2vbdx; |
108 | |
109 | //////////////////////////////////////////////////////////////////////////////// |
110 | /// Vec3 shortcuts |
111 | //////////////////////////////////////////////////////////////////////////////// |
112 | |
113 | template<int N> using Vec3vf = Vec3<vfloat<N>>; |
114 | template<int N> using Vec3vd = Vec3<vdouble<N>>; |
115 | template<int N> using Vec3vr = Vec3<vreal<N>>; |
116 | template<int N> using Vec3vi = Vec3<vint<N>>; |
117 | template<int N> using Vec3vl = Vec3<vllong<N>>; |
118 | template<int N> using Vec3vb = Vec3<vbool<N>>; |
119 | template<int N> using Vec3vbf = Vec3<vboolf<N>>; |
120 | template<int N> using Vec3vbd = Vec3<vboold<N>>; |
121 | |
122 | typedef Vec3<vfloat4> Vec3vf4; |
123 | typedef Vec3<vdouble4> Vec3vd4; |
124 | typedef Vec3<vreal4> Vec3vr4; |
125 | typedef Vec3<vint4> Vec3vi4; |
126 | typedef Vec3<vllong4> Vec3vl4; |
127 | typedef Vec3<vbool4> Vec3vb4; |
128 | typedef Vec3<vboolf4> Vec3vbf4; |
129 | typedef Vec3<vboold4> Vec3vbd4; |
130 | |
131 | typedef Vec3<vfloat8> Vec3vf8; |
132 | typedef Vec3<vdouble8> Vec3vd8; |
133 | typedef Vec3<vreal8> Vec3vr8; |
134 | typedef Vec3<vint8> Vec3vi8; |
135 | typedef Vec3<vllong8> Vec3vl8; |
136 | typedef Vec3<vbool8> Vec3vb8; |
137 | typedef Vec3<vboolf8> Vec3vbf8; |
138 | typedef Vec3<vboold8> Vec3vbd8; |
139 | |
140 | typedef Vec3<vfloat16> Vec3vf16; |
141 | typedef Vec3<vdouble16> Vec3vd16; |
142 | typedef Vec3<vreal16> Vec3vr16; |
143 | typedef Vec3<vint16> Vec3vi16; |
144 | typedef Vec3<vllong16> Vec3vl16; |
145 | typedef Vec3<vbool16> Vec3vb16; |
146 | typedef Vec3<vboolf16> Vec3vbf16; |
147 | typedef Vec3<vboold16> Vec3vbd16; |
148 | |
149 | typedef Vec3<vfloatx> Vec3vfx; |
150 | typedef Vec3<vdoublex> Vec3vdx; |
151 | typedef Vec3<vrealx> Vec3vrx; |
152 | typedef Vec3<vintx> Vec3vix; |
153 | typedef Vec3<vllongx> Vec3vlx; |
154 | typedef Vec3<vboolx> Vec3vbx; |
155 | typedef Vec3<vboolfx> Vec3vbfx; |
156 | typedef Vec3<vbooldx> Vec3vbdx; |
157 | |
158 | //////////////////////////////////////////////////////////////////////////////// |
159 | /// Vec4 shortcuts |
160 | //////////////////////////////////////////////////////////////////////////////// |
161 | |
162 | template<int N> using Vec4vf = Vec4<vfloat<N>>; |
163 | template<int N> using Vec4vd = Vec4<vdouble<N>>; |
164 | template<int N> using Vec4vr = Vec4<vreal<N>>; |
165 | template<int N> using Vec4vi = Vec4<vint<N>>; |
166 | template<int N> using Vec4vl = Vec4<vllong<N>>; |
167 | template<int N> using Vec4vb = Vec4<vbool<N>>; |
168 | template<int N> using Vec4vbf = Vec4<vboolf<N>>; |
169 | template<int N> using Vec4vbd = Vec4<vboold<N>>; |
170 | |
171 | typedef Vec4<vfloat4> Vec4vf4; |
172 | typedef Vec4<vdouble4> Vec4vd4; |
173 | typedef Vec4<vreal4> Vec4vr4; |
174 | typedef Vec4<vint4> Vec4vi4; |
175 | typedef Vec4<vllong4> Vec4vl4; |
176 | typedef Vec4<vbool4> Vec4vb4; |
177 | typedef Vec4<vboolf4> Vec4vbf4; |
178 | typedef Vec4<vboold4> Vec4vbd4; |
179 | |
180 | typedef Vec4<vfloat8> Vec4vf8; |
181 | typedef Vec4<vdouble8> Vec4vd8; |
182 | typedef Vec4<vreal8> Vec4vr8; |
183 | typedef Vec4<vint8> Vec4vi8; |
184 | typedef Vec4<vllong8> Vec4vl8; |
185 | typedef Vec4<vbool8> Vec4vb8; |
186 | typedef Vec4<vboolf8> Vec4vbf8; |
187 | typedef Vec4<vboold8> Vec4vbd8; |
188 | |
189 | typedef Vec4<vfloat16> Vec4vf16; |
190 | typedef Vec4<vdouble16> Vec4vd16; |
191 | typedef Vec4<vreal16> Vec4vr16; |
192 | typedef Vec4<vint16> Vec4vi16; |
193 | typedef Vec4<vllong16> Vec4vl16; |
194 | typedef Vec4<vbool16> Vec4vb16; |
195 | typedef Vec4<vboolf16> Vec4vbf16; |
196 | typedef Vec4<vboold16> Vec4vbd16; |
197 | |
198 | typedef Vec4<vfloatx> Vec4vfx; |
199 | typedef Vec4<vdoublex> Vec4vdx; |
200 | typedef Vec4<vrealx> Vec4vrx; |
201 | typedef Vec4<vintx> Vec4vix; |
202 | typedef Vec4<vllongx> Vec4vlx; |
203 | typedef Vec4<vboolx> Vec4vbx; |
204 | typedef Vec4<vboolfx> Vec4vbfx; |
205 | typedef Vec4<vbooldx> Vec4vbdx; |
206 | |
207 | //////////////////////////////////////////////////////////////////////////////// |
208 | /// Other shortcuts |
209 | //////////////////////////////////////////////////////////////////////////////// |
210 | |
211 | template<int N> using BBox3vf = BBox<Vec3vf<N>>; |
212 | typedef BBox<Vec3vf4> BBox3vf4; |
213 | typedef BBox<Vec3vf8> BBox3vf8; |
214 | typedef BBox<Vec3vf16> BBox3vf16; |
215 | |
216 | /* calculate time segment itime and fractional time ftime */ |
217 | __forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime) |
218 | { |
219 | const float timeScaled = time * numTimeSegments; |
220 | const float itimef = clamp(x: floorf(x: timeScaled), lower: 0.0f, upper: numTimeSegments-1.0f); |
221 | ftime = timeScaled - itimef; |
222 | return int(itimef); |
223 | } |
224 | |
225 | __forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime) |
226 | { |
227 | const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments; |
228 | const float itimef = clamp(x: floorf(x: timeScaled), lower: 0.0f, upper: numTimeSegments-1.0f); |
229 | ftime = timeScaled - itimef; |
230 | return int(itimef); |
231 | } |
232 | |
233 | template<int N> |
234 | __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime) |
235 | { |
236 | const vfloat<N> timeScaled = time * numTimeSegments; |
237 | const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f); |
238 | ftime = timeScaled - itimef; |
239 | return vint<N>(itimef); |
240 | } |
241 | |
242 | template<int N> |
243 | __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime) |
244 | { |
245 | const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments; |
246 | const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f); |
247 | ftime = timeScaled - itimef; |
248 | return vint<N>(itimef); |
249 | } |
250 | |
251 | /* calculate overlapping time segment range */ |
252 | __forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments) |
253 | { |
254 | const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step |
255 | const float round_down = 1.0f-2.0f*float(ulp); |
256 | const int itime_lower = (int)max(a: floor(x: round_up *time_range.lower*numTimeSegments), b: 0.0f); |
257 | const int itime_upper = (int)min(a: ceil (x: round_down*time_range.upper*numTimeSegments), b: numTimeSegments); |
258 | return make_range(begin: itime_lower, end: itime_upper); |
259 | } |
260 | |
261 | /* calculate overlapping time segment range */ |
262 | __forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments) |
263 | { |
264 | const float lower = (range.lower-time_range.lower)/time_range.size(); |
265 | const float upper = (range.upper-time_range.lower)/time_range.size(); |
266 | return getTimeSegmentRange(time_range: BBox1f(lower,upper),numTimeSegments); |
267 | } |
268 | } |
269 | |