1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "node_intersector.h" |
7 | |
8 | namespace embree |
9 | { |
10 | namespace isa |
11 | { |
12 | ////////////////////////////////////////////////////////////////////////////////////// |
13 | // Ray packet structure used in hybrid traversal |
14 | ////////////////////////////////////////////////////////////////////////////////////// |
15 | |
16 | template<int K, bool robust> |
17 | struct TravRayK; |
18 | |
19 | /* Fast variant */ |
20 | template<int K> |
21 | struct TravRayK<K, false> |
22 | { |
23 | __forceinline TravRayK() {} |
24 | |
25 | __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) |
26 | { |
27 | init(ray_org, ray_dir, N); |
28 | } |
29 | |
30 | __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) |
31 | { |
32 | init(ray_org, ray_dir, N); |
33 | tnear = ray_tnear; |
34 | tfar = ray_tfar; |
35 | } |
36 | |
37 | __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) |
38 | { |
39 | org = ray_org; |
40 | dir = ray_dir; |
41 | rdir = rcp_safe(ray_dir); |
42 | #if defined(__AVX2__) || defined(__ARM_NEON) |
43 | org_rdir = org * rdir; |
44 | #endif |
45 | |
46 | if (N) |
47 | { |
48 | const int size = sizeof(float)*N; |
49 | nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size)); |
50 | nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size)); |
51 | nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size)); |
52 | } |
53 | } |
54 | |
55 | Vec3vf<K> org; |
56 | Vec3vf<K> dir; |
57 | Vec3vf<K> rdir; |
58 | #if defined(__AVX2__) || defined(__ARM_NEON) |
59 | Vec3vf<K> org_rdir; |
60 | #endif |
61 | Vec3vi<K> nearXYZ; |
62 | vfloat<K> tnear; |
63 | vfloat<K> tfar; |
64 | }; |
65 | |
66 | template<int K> |
67 | using TravRayKFast = TravRayK<K, false>; |
68 | |
69 | /* Robust variant */ |
70 | template<int K> |
71 | struct TravRayK<K, true> |
72 | { |
73 | __forceinline TravRayK() {} |
74 | |
75 | __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) |
76 | { |
77 | init(ray_org, ray_dir, N); |
78 | } |
79 | |
80 | __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) |
81 | { |
82 | init(ray_org, ray_dir, N); |
83 | tnear = ray_tnear; |
84 | tfar = ray_tfar; |
85 | } |
86 | |
87 | __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) |
88 | { |
89 | org = ray_org; |
90 | dir = ray_dir; |
91 | rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir)); |
92 | |
93 | if (N) |
94 | { |
95 | const int size = sizeof(float)*N; |
96 | nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size)); |
97 | nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size)); |
98 | nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size)); |
99 | } |
100 | } |
101 | |
102 | Vec3vf<K> org; |
103 | Vec3vf<K> dir; |
104 | Vec3vf<K> rdir; |
105 | Vec3vi<K> nearXYZ; |
106 | vfloat<K> tnear; |
107 | vfloat<K> tfar; |
108 | }; |
109 | |
110 | template<int K> |
111 | using TravRayKRobust = TravRayK<K, true>; |
112 | |
113 | ////////////////////////////////////////////////////////////////////////////////////// |
114 | // Fast AABBNode intersection |
115 | ////////////////////////////////////////////////////////////////////////////////////// |
116 | |
117 | template<int N, int K> |
118 | __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i, |
119 | const TravRayKFast<K>& ray, vfloat<K>& dist) |
120 | |
121 | { |
122 | #if defined(__AVX2__) || defined(__ARM_NEON) |
123 | const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x); |
124 | const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y); |
125 | const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z); |
126 | const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x); |
127 | const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y); |
128 | const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z); |
129 | #else |
130 | const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; |
131 | const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; |
132 | const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; |
133 | const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; |
134 | const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; |
135 | const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; |
136 | #endif |
137 | |
138 | #if defined(__AVX512F__) // SKX |
139 | if (K == 16) |
140 | { |
141 | /* use mixed float/int min/max */ |
142 | const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
143 | const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
144 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
145 | dist = lnearP; |
146 | return lhit; |
147 | } |
148 | else |
149 | #endif |
150 | { |
151 | const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
152 | const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
153 | #if defined(__AVX512F__) // SKX |
154 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
155 | #else |
156 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
157 | #endif |
158 | dist = lnearP; |
159 | return lhit; |
160 | } |
161 | } |
162 | |
163 | ////////////////////////////////////////////////////////////////////////////////////// |
164 | // Robust AABBNode intersection |
165 | ////////////////////////////////////////////////////////////////////////////////////// |
166 | |
167 | template<int N, int K> |
168 | __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i, |
169 | const TravRayKRobust<K>& ray, vfloat<K>& dist) |
170 | { |
171 | // FIXME: use per instruction rounding for AVX512 |
172 | const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; |
173 | const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; |
174 | const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; |
175 | const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; |
176 | const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; |
177 | const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; |
178 | const float round_up = 1.0f+3.0f*float(ulp); |
179 | const float round_down = 1.0f-3.0f*float(ulp); |
180 | const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); |
181 | const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); |
182 | const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); |
183 | dist = lnearP; |
184 | return lhit; |
185 | } |
186 | |
187 | ////////////////////////////////////////////////////////////////////////////////////// |
188 | // Fast AABBNodeMB intersection |
189 | ////////////////////////////////////////////////////////////////////////////////////// |
190 | |
191 | template<int N, int K> |
192 | __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i, |
193 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist) |
194 | { |
195 | const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); |
196 | const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); |
197 | const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); |
198 | const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); |
199 | const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); |
200 | const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); |
201 | |
202 | #if defined(__AVX2__) || defined(__ARM_NEON) |
203 | const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); |
204 | const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); |
205 | const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); |
206 | const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); |
207 | const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); |
208 | const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); |
209 | #else |
210 | const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; |
211 | const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; |
212 | const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; |
213 | const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; |
214 | const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; |
215 | const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; |
216 | #endif |
217 | |
218 | #if defined(__AVX512F__) // SKX |
219 | if (K == 16) |
220 | { |
221 | /* use mixed float/int min/max */ |
222 | const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
223 | const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
224 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
225 | dist = lnearP; |
226 | return lhit; |
227 | } |
228 | else |
229 | #endif |
230 | { |
231 | const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
232 | const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
233 | #if defined(__AVX512F__) // SKX |
234 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
235 | #else |
236 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
237 | #endif |
238 | dist = lnearP; |
239 | return lhit; |
240 | } |
241 | } |
242 | |
243 | ////////////////////////////////////////////////////////////////////////////////////// |
244 | // Robust AABBNodeMB intersection |
245 | ////////////////////////////////////////////////////////////////////////////////////// |
246 | |
247 | template<int N, int K> |
248 | __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i, |
249 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist) |
250 | { |
251 | const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); |
252 | const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); |
253 | const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); |
254 | const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); |
255 | const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); |
256 | const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); |
257 | |
258 | const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; |
259 | const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; |
260 | const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; |
261 | const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; |
262 | const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; |
263 | const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; |
264 | |
265 | const float round_up = 1.0f+3.0f*float(ulp); |
266 | const float round_down = 1.0f-3.0f*float(ulp); |
267 | |
268 | #if defined(__AVX512F__) // SKX |
269 | if (K == 16) |
270 | { |
271 | const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
272 | const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
273 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
274 | dist = lnearP; |
275 | return lhit; |
276 | } |
277 | else |
278 | #endif |
279 | { |
280 | const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
281 | const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
282 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
283 | dist = lnearP; |
284 | return lhit; |
285 | } |
286 | } |
287 | |
288 | ////////////////////////////////////////////////////////////////////////////////////// |
289 | // Fast AABBNodeMB4D intersection |
290 | ////////////////////////////////////////////////////////////////////////////////////// |
291 | |
292 | template<int N, int K> |
293 | __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i, |
294 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist) |
295 | { |
296 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
297 | |
298 | const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); |
299 | const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); |
300 | const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); |
301 | const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); |
302 | const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); |
303 | const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); |
304 | |
305 | #if defined(__AVX2__) || defined(__ARM_NEON) |
306 | const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); |
307 | const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); |
308 | const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); |
309 | const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); |
310 | const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); |
311 | const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); |
312 | #else |
313 | const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; |
314 | const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; |
315 | const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; |
316 | const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; |
317 | const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; |
318 | const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; |
319 | #endif |
320 | |
321 | const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); |
322 | const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); |
323 | vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
324 | if (unlikely(ref.isAABBNodeMB4D())) { |
325 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
326 | lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i])); |
327 | } |
328 | dist = lnearP; |
329 | return lhit; |
330 | } |
331 | |
332 | ////////////////////////////////////////////////////////////////////////////////////// |
333 | // Robust AABBNodeMB4D intersection |
334 | ////////////////////////////////////////////////////////////////////////////////////// |
335 | |
336 | template<int N, int K> |
337 | __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i, |
338 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist) |
339 | { |
340 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
341 | |
342 | const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); |
343 | const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); |
344 | const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); |
345 | const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); |
346 | const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); |
347 | const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); |
348 | |
349 | const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; |
350 | const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; |
351 | const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; |
352 | const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; |
353 | const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; |
354 | const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; |
355 | |
356 | const float round_up = 1.0f+3.0f*float(ulp); |
357 | const float round_down = 1.0f-3.0f*float(ulp); |
358 | const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); |
359 | const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); |
360 | vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
361 | |
362 | if (unlikely(ref.isAABBNodeMB4D())) { |
363 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
364 | lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i])); |
365 | } |
366 | dist = lnearP; |
367 | return lhit; |
368 | } |
369 | |
370 | ////////////////////////////////////////////////////////////////////////////////////// |
371 | // Fast OBBNode intersection |
372 | ////////////////////////////////////////////////////////////////////////////////////// |
373 | |
374 | template<int N, int K, bool robust> |
375 | __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i, |
376 | const TravRayK<K,robust>& ray, vfloat<K>& dist) |
377 | { |
378 | const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]), |
379 | Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]), |
380 | Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]), |
381 | Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i])); |
382 | |
383 | const Vec3vf<K> dir = xfmVector(naabb, ray.dir); |
384 | const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1? |
385 | const Vec3vf<K> org = xfmPoint(naabb, ray.org); |
386 | |
387 | const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir; |
388 | const vfloat<K> lclipMinY = org.y * nrdir.y; |
389 | const vfloat<K> lclipMinZ = org.z * nrdir.z; |
390 | const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir; |
391 | const vfloat<K> lclipMaxY = lclipMinY - nrdir.y; |
392 | const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z; |
393 | |
394 | vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
395 | vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
396 | if (robust) { |
397 | lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp)); |
398 | lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp)); |
399 | } |
400 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
401 | dist = lnearP; |
402 | return lhit; |
403 | } |
404 | |
405 | ////////////////////////////////////////////////////////////////////////////////////// |
406 | // Fast OBBNodeMB intersection |
407 | ////////////////////////////////////////////////////////////////////////////////////// |
408 | |
409 | template<int N, int K, bool robust> |
410 | __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i, |
411 | const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist) |
412 | { |
413 | const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]), |
414 | Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]), |
415 | Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]), |
416 | Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i])); |
417 | |
418 | const Vec3vf<K> b0_lower = zero; |
419 | const Vec3vf<K> b0_upper = one; |
420 | const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]); |
421 | const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]); |
422 | const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time); |
423 | const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time); |
424 | |
425 | const Vec3vf<K> dir = xfmVector(xfm, ray.dir); |
426 | const Vec3vf<K> rdir = rcp_safe(dir); |
427 | const Vec3vf<K> org = xfmPoint(xfm, ray.org); |
428 | |
429 | const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x; |
430 | const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y; |
431 | const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z; |
432 | const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x; |
433 | const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y; |
434 | const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z; |
435 | |
436 | vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
437 | vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
438 | if (robust) { |
439 | lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp)); |
440 | lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp)); |
441 | } |
442 | |
443 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
444 | dist = lnearP; |
445 | return lhit; |
446 | } |
447 | |
448 | |
449 | |
450 | ////////////////////////////////////////////////////////////////////////////////////// |
451 | // QuantizedBaseNode intersection |
452 | ////////////////////////////////////////////////////////////////////////////////////// |
453 | |
454 | template<int N, int K> |
455 | __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i, |
456 | const TravRayK<K,false>& ray, vfloat<K>& dist) |
457 | |
458 | { |
459 | assert(movemask(node->validMask()) & ((size_t)1 << i)); |
460 | const vfloat<N> lower_x = node->dequantizeLowerX(); |
461 | const vfloat<N> upper_x = node->dequantizeUpperX(); |
462 | const vfloat<N> lower_y = node->dequantizeLowerY(); |
463 | const vfloat<N> upper_y = node->dequantizeUpperY(); |
464 | const vfloat<N> lower_z = node->dequantizeLowerZ(); |
465 | const vfloat<N> upper_z = node->dequantizeUpperZ(); |
466 | |
467 | #if defined(__AVX2__) || defined(__ARM_NEON) |
468 | const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x); |
469 | const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y); |
470 | const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z); |
471 | const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x); |
472 | const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y); |
473 | const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z); |
474 | #else |
475 | const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; |
476 | const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; |
477 | const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; |
478 | const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; |
479 | const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; |
480 | const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; |
481 | #endif |
482 | |
483 | #if defined(__AVX512F__) // SKX |
484 | if (K == 16) |
485 | { |
486 | /* use mixed float/int min/max */ |
487 | const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
488 | const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
489 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
490 | dist = lnearP; |
491 | return lhit; |
492 | } |
493 | else |
494 | #endif |
495 | { |
496 | const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
497 | const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
498 | #if defined(__AVX512F__) // SKX |
499 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
500 | #else |
501 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
502 | #endif |
503 | dist = lnearP; |
504 | return lhit; |
505 | } |
506 | } |
507 | |
508 | template<int N, int K> |
509 | __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i, |
510 | const TravRayK<K,true>& ray, vfloat<K>& dist) |
511 | |
512 | { |
513 | assert(movemask(node->validMask()) & ((size_t)1 << i)); |
514 | const vfloat<N> lower_x = node->dequantizeLowerX(); |
515 | const vfloat<N> upper_x = node->dequantizeUpperX(); |
516 | const vfloat<N> lower_y = node->dequantizeLowerY(); |
517 | const vfloat<N> upper_y = node->dequantizeUpperY(); |
518 | const vfloat<N> lower_z = node->dequantizeLowerZ(); |
519 | const vfloat<N> upper_z = node->dequantizeUpperZ(); |
520 | |
521 | const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; |
522 | const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; |
523 | const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; |
524 | const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; |
525 | const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; |
526 | const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; |
527 | |
528 | const float round_up = 1.0f+3.0f*float(ulp); |
529 | const float round_down = 1.0f-3.0f*float(ulp); |
530 | |
531 | const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
532 | const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
533 | const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); |
534 | dist = lnearP; |
535 | return lhit; |
536 | } |
537 | |
538 | template<int N, int K> |
539 | __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, |
540 | const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist) |
541 | |
542 | { |
543 | assert(movemask(node->validMask()) & ((size_t)1 << i)); |
544 | |
545 | const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time); |
546 | const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time); |
547 | const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time); |
548 | const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time); |
549 | const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time); |
550 | const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time); |
551 | |
552 | #if defined(__AVX2__) || defined(__ARM_NEON) |
553 | const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); |
554 | const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); |
555 | const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); |
556 | const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); |
557 | const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); |
558 | const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); |
559 | #else |
560 | const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; |
561 | const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; |
562 | const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; |
563 | const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; |
564 | const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; |
565 | const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; |
566 | #endif |
567 | const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
568 | const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
569 | const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); |
570 | dist = lnearP; |
571 | return lhit; |
572 | } |
573 | |
574 | |
575 | template<int N, int K> |
576 | __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, |
577 | const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist) |
578 | |
579 | { |
580 | assert(movemask(node->validMask()) & ((size_t)1 << i)); |
581 | |
582 | const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time); |
583 | const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time); |
584 | const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time); |
585 | const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time); |
586 | const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time); |
587 | const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time); |
588 | |
589 | const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; |
590 | const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; |
591 | const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; |
592 | const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; |
593 | const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; |
594 | const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; |
595 | |
596 | const float round_up = 1.0f+3.0f*float(ulp); |
597 | const float round_down = 1.0f-3.0f*float(ulp); |
598 | |
599 | const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
600 | const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
601 | const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); |
602 | dist = lnearP; |
603 | return lhit; |
604 | } |
605 | |
606 | |
607 | ////////////////////////////////////////////////////////////////////////////////////// |
608 | // Node intersectors used in hybrid traversal |
609 | ////////////////////////////////////////////////////////////////////////////////////// |
610 | |
611 | /*! Intersects N nodes with K rays */ |
612 | template<int N, int K, int types, bool robust> |
613 | struct BVHNNodeIntersectorK; |
614 | |
615 | template<int N, int K> |
616 | struct BVHNNodeIntersectorK<N, K, BVH_AN1, false> |
617 | { |
618 | /* vmask is both an input and an output parameter! Its initial value should be the parent node |
619 | hit mask, which is used for correctly computing the current hit mask. The parent hit mask |
620 | is actually required only for motion blur node intersections (because different rays may |
621 | have different times), so for regular nodes vmask is simply overwritten. */ |
622 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
623 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
624 | { |
625 | vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist); |
626 | return true; |
627 | } |
628 | }; |
629 | |
630 | template<int N, int K> |
631 | struct BVHNNodeIntersectorK<N, K, BVH_AN1, true> |
632 | { |
633 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
634 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
635 | { |
636 | vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist); |
637 | return true; |
638 | } |
639 | }; |
640 | |
641 | template<int N, int K> |
642 | struct BVHNNodeIntersectorK<N, K, BVH_AN2, false> |
643 | { |
644 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
645 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
646 | { |
647 | vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist); |
648 | return true; |
649 | } |
650 | }; |
651 | |
652 | template<int N, int K> |
653 | struct BVHNNodeIntersectorK<N, K, BVH_AN2, true> |
654 | { |
655 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
656 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
657 | { |
658 | vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist); |
659 | return true; |
660 | } |
661 | }; |
662 | |
663 | template<int N, int K> |
664 | struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false> |
665 | { |
666 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
667 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
668 | { |
669 | if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist); |
670 | else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist); |
671 | return true; |
672 | } |
673 | }; |
674 | |
675 | template<int N, int K> |
676 | struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true> |
677 | { |
678 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
679 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
680 | { |
681 | if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist); |
682 | else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist); |
683 | return true; |
684 | } |
685 | }; |
686 | |
687 | template<int N, int K> |
688 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false> |
689 | { |
690 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
691 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
692 | { |
693 | if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist); |
694 | else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); |
695 | return true; |
696 | } |
697 | }; |
698 | |
699 | template<int N, int K> |
700 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true> |
701 | { |
702 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
703 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
704 | { |
705 | if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist); |
706 | else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); |
707 | return true; |
708 | } |
709 | }; |
710 | |
711 | template<int N, int K> |
712 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false> |
713 | { |
714 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
715 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
716 | { |
717 | vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist); |
718 | return true; |
719 | } |
720 | }; |
721 | |
722 | template<int N, int K> |
723 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true> |
724 | { |
725 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
726 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
727 | { |
728 | vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist); |
729 | return true; |
730 | } |
731 | }; |
732 | |
733 | template<int N, int K> |
734 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false> |
735 | { |
736 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
737 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
738 | { |
739 | if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { |
740 | vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist); |
741 | } else /*if (unlikely(node.isOBBNodeMB()))*/ { |
742 | assert(node.isOBBNodeMB()); |
743 | vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); |
744 | } |
745 | return true; |
746 | } |
747 | }; |
748 | |
749 | template<int N, int K> |
750 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true> |
751 | { |
752 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
753 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
754 | { |
755 | if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { |
756 | vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist); |
757 | } else /*if (unlikely(node.isOBBNodeMB()))*/ { |
758 | assert(node.isOBBNodeMB()); |
759 | vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); |
760 | } |
761 | return true; |
762 | } |
763 | }; |
764 | |
765 | |
766 | /*! Intersects N nodes with K rays */ |
767 | template<int N, int K, bool robust> |
768 | struct BVHNQuantizedBaseNodeIntersectorK; |
769 | |
770 | template<int N, int K> |
771 | struct BVHNQuantizedBaseNodeIntersectorK<N, K, false> |
772 | { |
773 | static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i, |
774 | const TravRayK<K,false>& ray, vfloat<K>& dist) |
775 | { |
776 | return intersectQuantizedNodeK<N,K>(node,i,ray,dist); |
777 | } |
778 | |
779 | static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, |
780 | const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist) |
781 | { |
782 | return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist); |
783 | } |
784 | |
785 | }; |
786 | |
787 | template<int N, int K> |
788 | struct BVHNQuantizedBaseNodeIntersectorK<N, K, true> |
789 | { |
790 | static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i, |
791 | const TravRayK<K,true>& ray, vfloat<K>& dist) |
792 | { |
793 | return intersectQuantizedNodeK<N,K>(node,i,ray,dist); |
794 | } |
795 | |
796 | static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, |
797 | const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist) |
798 | { |
799 | return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist); |
800 | } |
801 | }; |
802 | |
803 | |
804 | } |
805 | } |
806 | |