1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "node_intersector.h"
7
8namespace embree
9{
10 namespace isa
11 {
12 //////////////////////////////////////////////////////////////////////////////////////
13 // Ray structure used in single-ray traversal
14 //////////////////////////////////////////////////////////////////////////////////////
15
16 template<int N, bool robust>
17 struct TravRayBase;
18
19 /* Base (without tnear and tfar) */
20 template<int N>
21 struct TravRayBase<N,false>
22 {
23 __forceinline TravRayBase() {}
24
25 __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
26 : org_xyz(ray_org), dir_xyz(ray_dir)
27 {
28 const Vec3fa ray_rdir = rcp_safe(a: ray_dir);
29 org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
30 dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
31 rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z);
32#if defined(__AVX2__) || defined(__ARM_NEON)
33 const Vec3fa ray_org_rdir = ray_org*ray_rdir;
34 org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z);
35#endif
36 nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
37 nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
38 nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
39 farX = nearX ^ sizeof(vfloat<N>);
40 farY = nearY ^ sizeof(vfloat<N>);
41 farZ = nearZ ^ sizeof(vfloat<N>);
42 }
43
44 template<int K>
45 __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
46 const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
47 size_t flip = sizeof(vfloat<N>))
48 {
49 org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
50 dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
51 rdir = Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
52#if defined(__AVX2__) || defined(__ARM_NEON)
53 org_rdir = org*rdir;
54#endif
55 nearX = nearXYZ.x[k];
56 nearY = nearXYZ.y[k];
57 nearZ = nearXYZ.z[k];
58 farX = nearX ^ flip;
59 farY = nearY ^ flip;
60 farZ = nearZ ^ flip;
61 }
62
63 Vec3fa org_xyz, dir_xyz;
64 Vec3vf<N> org, dir, rdir;
65#if defined(__AVX2__) || defined(__ARM_NEON)
66 Vec3vf<N> org_rdir;
67#endif
68 size_t nearX, nearY, nearZ;
69 size_t farX, farY, farZ;
70 };
71
72 /* Base (without tnear and tfar) */
73 template<int N>
74 struct TravRayBase<N,true>
75 {
76 __forceinline TravRayBase() {}
77
78 __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
79 : org_xyz(ray_org), dir_xyz(ray_dir)
80 {
81 const float round_down = 1.0f-3.0f*float(ulp);
82 const float round_up = 1.0f+3.0f*float(ulp);
83 const Vec3fa ray_rdir = 1.0f/zero_fix(a: ray_dir);
84 const Vec3fa ray_rdir_near = round_down*ray_rdir;
85 const Vec3fa ray_rdir_far = round_up *ray_rdir;
86 org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
87 dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
88 rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z);
89 rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z);
90
91 nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
92 nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
93 nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
94 farX = nearX ^ sizeof(vfloat<N>);
95 farY = nearY ^ sizeof(vfloat<N>);
96 farZ = nearZ ^ sizeof(vfloat<N>);
97 }
98
99 template<int K>
100 __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
101 const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
102 size_t flip = sizeof(vfloat<N>))
103 {
104 const vfloat<N> round_down = 1.0f-3.0f*float(ulp);
105 const vfloat<N> round_up = 1.0f+3.0f*float(ulp);
106 org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
107 dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
108 rdir_near = round_down*Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
109 rdir_far = round_up *Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
110
111 nearX = nearXYZ.x[k];
112 nearY = nearXYZ.y[k];
113 nearZ = nearXYZ.z[k];
114 farX = nearX ^ flip;
115 farY = nearY ^ flip;
116 farZ = nearZ ^ flip;
117 }
118
119 Vec3fa org_xyz, dir_xyz;
120 Vec3vf<N> org, dir, rdir_near, rdir_far;
121 size_t nearX, nearY, nearZ;
122 size_t farX, farY, farZ;
123 };
124
125 /* Full (with tnear and tfar) */
126 template<int N, bool robust>
127 struct TravRay : TravRayBase<N,robust>
128 {
129 __forceinline TravRay() {}
130
131 __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar)
132 : TravRayBase<N,robust>(ray_org, ray_dir),
133 tnear(ray_tnear), tfar(ray_tfar) {}
134
135 template<int K>
136 __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
137 const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
138 float ray_tnear, float ray_tfar,
139 size_t flip = sizeof(vfloat<N>))
140 {
141 TravRayBase<N,robust>::template init<K>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip);
142 tnear = ray_tnear; tfar = ray_tfar;
143 }
144
145 vfloat<N> tnear;
146 vfloat<N> tfar;
147 };
148
149 //////////////////////////////////////////////////////////////////////////////////////
150 // Point Query structure used in single-ray traversal
151 //////////////////////////////////////////////////////////////////////////////////////
152
153 template<int N>
154 struct TravPointQuery
155 {
156 __forceinline TravPointQuery() {}
157
158 __forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad)
159 {
160 org = Vec3vf<N>(query_org.x, query_org.y, query_org.z);
161 rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z);
162 }
163
164 __forceinline vfloat<N> const& tfar() const {
165 return rad.x;
166 }
167
168 Vec3vf<N> org, rad;
169 };
170
171 //////////////////////////////////////////////////////////////////////////////////////
172 // point query
173 //////////////////////////////////////////////////////////////////////////////////////
174
175 template<int N>
176 __forceinline size_t pointQuerySphereDistAndMask(
177 const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
178 vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
179 {
180 const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
181 const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
182 const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
183 dist = vX * vX + vY * vY + vZ * vZ;
184 const vbool<N> vmask = dist <= query.tfar()*query.tfar();
185 const vbool<N> valid = minX <= maxX;
186 return movemask(vmask) & movemask(valid);
187 }
188
189 template<int N>
190 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
191 {
192 const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
193 const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
194 const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
195 const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
196 const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
197 const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
198 return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
199 }
200
201 template<int N>
202 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
203 {
204 const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
205 const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
206 const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
207 const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
208 const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
209 const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
210 const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
211 const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
212 const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
213 const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
214 const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
215 const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
216 return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
217 }
218
219 template<int N>
220 __forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
221 {
222 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
223 size_t mask = pointQueryNodeSphere(node, query, time, dist);
224
225 if (unlikely(ref.isAABBNodeMB4D())) {
226 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
227 const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
228 mask &= movemask(vmask);
229 }
230
231 return mask;
232 }
233
234 template<int N>
235 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
236 {
237 const vfloat<N> start_x(node->start.x);
238 const vfloat<N> scale_x(node->scale.x);
239 const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
240 const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
241 const vfloat<N> start_y(node->start.y);
242 const vfloat<N> scale_y(node->scale.y);
243 const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
244 const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
245 const vfloat<N> start_z(node->start.z);
246 const vfloat<N> scale_z(node->scale.z);
247 const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
248 const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
249 return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
250 }
251
252 template<int N>
253 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
254 {
255 const vfloat<N> minX = node->dequantizeLowerX(time);
256 const vfloat<N> maxX = node->dequantizeUpperX(time);
257 const vfloat<N> minY = node->dequantizeLowerY(time);
258 const vfloat<N> maxY = node->dequantizeUpperY(time);
259 const vfloat<N> minZ = node->dequantizeLowerZ(time);
260 const vfloat<N> maxZ = node->dequantizeUpperZ(time);
261 return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
262 }
263
264 template<int N>
265 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
266 {
267 // TODO: point query - implement
268 const vbool<N> vmask = vbool<N>(true);
269 const size_t mask = movemask(vmask) & ((1<<N)-1);
270 dist = vfloat<N>(0.0f);
271 return mask;
272 }
273
274 template<int N>
275 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
276 {
277 // TODO: point query - implement
278 const vbool<N> vmask = vbool<N>(true);
279 const size_t mask = movemask(vmask) & ((1<<N)-1);
280 dist = vfloat<N>(0.0f);
281 return mask;
282 }
283
284 template<int N>
285 __forceinline size_t pointQueryAABBDistAndMask(
286 const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
287 vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
288 {
289 const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
290 const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
291 const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
292 dist = vX * vX + vY * vY + vZ * vZ;
293 const vbool<N> valid = minX <= maxX;
294 const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) |
295 (maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) |
296 (maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z));
297 return movemask(vmask) & movemask(valid);
298 }
299
300 template<int N>
301 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
302 {
303 const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
304 const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
305 const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
306 const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
307 const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
308 const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
309 return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
310 }
311
312 template<int N>
313 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
314 {
315 const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
316 const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
317 const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
318 const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
319 const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
320 const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
321 const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
322 const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
323 const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
324 const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
325 const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
326 const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
327 return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
328 }
329
330 template<int N>
331 __forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
332 {
333 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
334 size_t mask = pointQueryNodeAABB(node, query, time, dist);
335
336 if (unlikely(ref.isAABBNodeMB4D())) {
337 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
338 const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
339 mask &= movemask(vmask);
340 }
341
342 return mask;
343 }
344
345 template<int N>
346 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
347 {
348 const size_t mvalid = movemask(node->validMask());
349 const vfloat<N> start_x(node->start.x);
350 const vfloat<N> scale_x(node->scale.x);
351 const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
352 const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
353 const vfloat<N> start_y(node->start.y);
354 const vfloat<N> scale_y(node->scale.y);
355 const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
356 const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
357 const vfloat<N> start_z(node->start.z);
358 const vfloat<N> scale_z(node->scale.z);
359 const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
360 const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
361 return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
362 }
363
364 template<int N>
365 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
366 {
367 const size_t mvalid = movemask(node->validMask());
368 const vfloat<N> minX = node->dequantizeLowerX(time);
369 const vfloat<N> maxX = node->dequantizeUpperX(time);
370 const vfloat<N> minY = node->dequantizeLowerY(time);
371 const vfloat<N> maxY = node->dequantizeUpperY(time);
372 const vfloat<N> minZ = node->dequantizeLowerZ(time);
373 const vfloat<N> maxZ = node->dequantizeUpperZ(time);
374 return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
375 }
376
377 template<int N>
378 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
379 {
380 // TODO: point query - implement
381 const vbool<N> vmask = vbool<N>(true);
382 const size_t mask = movemask(vmask) & ((1<<N)-1);
383 dist = vfloat<N>(0.0f);
384 return mask;
385 }
386
387 template<int N>
388 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
389 {
390 // TODO: point query - implement
391 const vbool<N> vmask = vbool<N>(true);
392 const size_t mask = movemask(vmask) & ((1<<N)-1);
393 dist = vfloat<N>(0.0f);
394 return mask;
395 }
396
397 //////////////////////////////////////////////////////////////////////////////////////
398 // Fast AABBNode intersection
399 //////////////////////////////////////////////////////////////////////////////////////
400
401 template<int N, bool robust>
402 __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist);
403
404 template<>
405 __forceinline size_t intersectNode<4>(const typename BVH4::AABBNode* node, const TravRay<4,false>& ray, vfloat4& dist)
406 {
407#if defined(__AVX2__) || defined(__ARM_NEON)
408 const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
409 const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
410 const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
411 const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
412 const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
413 const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
414#else
415 const vfloat4 tNearX = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
416 const vfloat4 tNearY = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
417 const vfloat4 tNearZ = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
418 const vfloat4 tFarX = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
419 const vfloat4 tFarY = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
420 const vfloat4 tFarZ = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
421#endif
422
423#if defined(__aarch64__)
424 const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear);
425 const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar);
426 const vbool4 vmask = asInt(tNear) <= asInt(tFar);
427 const size_t mask = movemask(vmask);
428#elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
429 const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
430 const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
431 const vbool4 vmask = asInt(tNear) > asInt(tFar);
432 const size_t mask = movemask(vmask) ^ ((1<<4)-1);
433#elif defined(__AVX512F__) // SKX
434 const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
435 const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
436 const vbool4 vmask = asInt(tNear) <= asInt(tFar);
437 const size_t mask = movemask(vmask);
438#else
439 const vfloat4 tNear = max(a: tNearX,b: tNearY,c: tNearZ,d: ray.tnear);
440 const vfloat4 tFar = min(a: tFarX ,b: tFarY ,c: tFarZ ,d: ray.tfar);
441 const vbool4 vmask = tNear <= tFar;
442 const size_t mask = movemask(a: vmask);
443#endif
444 dist = tNear;
445 return mask;
446 }
447
448#if defined(__AVX__)
449
450 template<>
451 __forceinline size_t intersectNode<8>(const typename BVH8::AABBNode* node, const TravRay<8,false>& ray, vfloat8& dist)
452 {
453#if defined(__AVX2__) || defined(__ARM_NEON)
454 const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
455 const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
456 const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
457 const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
458 const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
459 const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
460#else
461 const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
462 const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
463 const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
464 const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
465 const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
466 const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
467#endif
468
469#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
470 const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
471 const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
472 const vbool8 vmask = asInt(tNear) > asInt(tFar);
473 const size_t mask = movemask(vmask) ^ ((1<<8)-1);
474#elif defined(__AVX512F__) // SKX
475 const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
476 const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
477 const vbool8 vmask = asInt(tNear) <= asInt(tFar);
478 const size_t mask = movemask(vmask);
479#else
480 const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
481 const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
482 const vbool8 vmask = tNear <= tFar;
483 const size_t mask = movemask(vmask);
484#endif
485 dist = tNear;
486 return mask;
487 }
488
489#endif
490
491 //////////////////////////////////////////////////////////////////////////////////////
492 // Robust AABBNode intersection
493 //////////////////////////////////////////////////////////////////////////////////////
494
495 template<int N>
496 __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
497 {
498 const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
499 const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
500 const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
501 const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
502 const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
503 const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
504 const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
505 const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
506 const vbool<N> vmask = tNear <= tFar;
507 const size_t mask = movemask(vmask);
508 dist = tNear;
509 return mask;
510 }
511
512 //////////////////////////////////////////////////////////////////////////////////////
513 // Fast AABBNodeMB intersection
514 //////////////////////////////////////////////////////////////////////////////////////
515
516 template<int N>
517 __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
518 {
519 const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
520 const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
521 const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
522 const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
523 const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
524 const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
525#if defined(__AVX2__) || defined(__ARM_NEON)
526 const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
527 const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
528 const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
529 const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
530 const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
531 const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
532#else
533 const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
534 const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
535 const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
536 const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
537 const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
538 const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
539#endif
540#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
541 const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
542 const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
543 const vbool<N> vmask = asInt(tNear) > asInt(tFar);
544 const size_t mask = movemask(vmask) ^ ((1<<N)-1);
545#elif defined(__AVX512F__) // SKX
546 const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
547 const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
548 const vbool<N> vmask = asInt(tNear) <= asInt(tFar);
549 const size_t mask = movemask(vmask);
550#else
551 const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
552 const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
553 const vbool<N> vmask = tNear <= tFar;
554 const size_t mask = movemask(vmask);
555#endif
556 dist = tNear;
557 return mask;
558 }
559
560 //////////////////////////////////////////////////////////////////////////////////////
561 // Robust AABBNodeMB intersection
562 //////////////////////////////////////////////////////////////////////////////////////
563
564 template<int N>
565 __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
566 {
567 const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
568 const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
569 const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
570 const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
571 const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
572 const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
573 const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
574 const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
575 const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
576 const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
577 const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
578 const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
579 const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
580 const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
581 const size_t mask = movemask(tNear <= tFar);
582 dist = tNear;
583 return mask;
584 }
585
586 //////////////////////////////////////////////////////////////////////////////////////
587 // Fast AABBNodeMB4D intersection
588 //////////////////////////////////////////////////////////////////////////////////////
589
590 template<int N>
591 __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
592 {
593 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
594
595 const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
596 const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
597 const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
598 const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
599 const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
600 const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
601#if defined (__AVX2__) || defined(__ARM_NEON)
602 const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
603 const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
604 const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
605 const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
606 const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
607 const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
608#else
609 const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
610 const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
611 const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
612 const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
613 const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
614 const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
615#endif
616#if defined(__AVX2__) && !defined(__AVX512F__)
617 const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear));
618 const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar ));
619#else
620 const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
621 const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
622#endif
623 vbool<N> vmask = tNear <= tFar;
624 if (unlikely(ref.isAABBNodeMB4D())) {
625 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
626 vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
627 }
628 const size_t mask = movemask(vmask);
629 dist = tNear;
630 return mask;
631 }
632
633 //////////////////////////////////////////////////////////////////////////////////////
634 // Robust AABBNodeMB4D intersection
635 //////////////////////////////////////////////////////////////////////////////////////
636
637 template<int N>
638 __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
639 {
640 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
641
642 const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
643 const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
644 const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
645 const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
646 const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
647 const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
648 const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
649 const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
650 const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
651 const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
652 const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
653 const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
654 const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
655 const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
656 vbool<N> vmask = tNear <= tFar;
657 if (unlikely(ref.isAABBNodeMB4D())) {
658 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
659 vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
660 }
661 const size_t mask = movemask(vmask);
662 dist = tNear;
663 return mask;
664 }
665
666 //////////////////////////////////////////////////////////////////////////////////////
667 // Fast QuantizedBaseNode intersection
668 //////////////////////////////////////////////////////////////////////////////////////
669
670 template<int N, bool robust>
671 __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist);
672
673 template<>
674 __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,false>& ray, vfloat4& dist)
675 {
676 const size_t mvalid = movemask(a: node->validMask());
677 const vfloat4 start_x(node->start.x);
678 const vfloat4 scale_x(node->scale.x);
679 const vfloat4 lower_x = madd(a: node->dequantize<4>(offset: ray.nearX >> 2),b: scale_x,c: start_x);
680 const vfloat4 upper_x = madd(a: node->dequantize<4>(offset: ray.farX >> 2),b: scale_x,c: start_x);
681 const vfloat4 start_y(node->start.y);
682 const vfloat4 scale_y(node->scale.y);
683 const vfloat4 lower_y = madd(a: node->dequantize<4>(offset: ray.nearY >> 2),b: scale_y,c: start_y);
684 const vfloat4 upper_y = madd(a: node->dequantize<4>(offset: ray.farY >> 2),b: scale_y,c: start_y);
685 const vfloat4 start_z(node->start.z);
686 const vfloat4 scale_z(node->scale.z);
687 const vfloat4 lower_z = madd(a: node->dequantize<4>(offset: ray.nearZ >> 2),b: scale_z,c: start_z);
688 const vfloat4 upper_z = madd(a: node->dequantize<4>(offset: ray.farZ >> 2),b: scale_z,c: start_z);
689
690#if defined(__AVX2__) || defined(__ARM_NEON)
691 const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
692 const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
693 const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
694 const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
695 const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
696 const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
697#else
698 const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
699 const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
700 const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
701 const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
702 const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
703 const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
704#endif
705
706#if defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
707 const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
708 const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
709 const vbool4 vmask = asInt(tNear) > asInt(tFar);
710 const size_t mask = movemask(vmask) ^ ((1<<4)-1);
711#elif defined(__AVX512F__) // SKX
712 const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
713 const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
714 const vbool4 vmask = asInt(tNear) <= asInt(tFar);
715 const size_t mask = movemask(vmask);
716#else
717 const vfloat4 tNear = max(a: tNearX,b: tNearY,c: tNearZ,d: ray.tnear);
718 const vfloat4 tFar = min(a: tFarX ,b: tFarY ,c: tFarZ ,d: ray.tfar);
719 const vbool4 vmask = tNear <= tFar;
720 const size_t mask = movemask(a: vmask);
721#endif
722 dist = tNear;
723 return mask & mvalid;
724 }
725
726 template<>
727 __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,true>& ray, vfloat4& dist)
728 {
729 const size_t mvalid = movemask(a: node->validMask());
730 const vfloat4 start_x(node->start.x);
731 const vfloat4 scale_x(node->scale.x);
732 const vfloat4 lower_x = madd(a: node->dequantize<4>(offset: ray.nearX >> 2),b: scale_x,c: start_x);
733 const vfloat4 upper_x = madd(a: node->dequantize<4>(offset: ray.farX >> 2),b: scale_x,c: start_x);
734 const vfloat4 start_y(node->start.y);
735 const vfloat4 scale_y(node->scale.y);
736 const vfloat4 lower_y = madd(a: node->dequantize<4>(offset: ray.nearY >> 2),b: scale_y,c: start_y);
737 const vfloat4 upper_y = madd(a: node->dequantize<4>(offset: ray.farY >> 2),b: scale_y,c: start_y);
738 const vfloat4 start_z(node->start.z);
739 const vfloat4 scale_z(node->scale.z);
740 const vfloat4 lower_z = madd(a: node->dequantize<4>(offset: ray.nearZ >> 2),b: scale_z,c: start_z);
741 const vfloat4 upper_z = madd(a: node->dequantize<4>(offset: ray.farZ >> 2),b: scale_z,c: start_z);
742
743 const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
744 const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
745 const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
746 const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
747 const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
748 const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
749
750 const vfloat4 tNear = max(a: tNearX,b: tNearY,c: tNearZ,d: ray.tnear);
751 const vfloat4 tFar = min(a: tFarX ,b: tFarY ,c: tFarZ ,d: ray.tfar);
752 const vbool4 vmask = tNear <= tFar;
753 const size_t mask = movemask(a: vmask);
754 dist = tNear;
755 return mask & mvalid;
756 }
757
758
759#if defined(__AVX__)
760
761 template<>
762 __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,false>& ray, vfloat8& dist)
763 {
764 const size_t mvalid = movemask(node->validMask());
765 const vfloat8 start_x(node->start.x);
766 const vfloat8 scale_x(node->scale.x);
767 const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
768 const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
769 const vfloat8 start_y(node->start.y);
770 const vfloat8 scale_y(node->scale.y);
771 const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
772 const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
773 const vfloat8 start_z(node->start.z);
774 const vfloat8 scale_z(node->scale.z);
775 const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
776 const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
777
778#if defined(__AVX2__) || defined(__ARM_NEON)
779 const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
780 const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
781 const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
782 const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
783 const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
784 const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
785#else
786 const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
787 const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
788 const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
789 const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
790 const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
791 const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
792#endif
793
794#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
795 const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
796 const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
797 const vbool8 vmask = asInt(tNear) > asInt(tFar);
798 const size_t mask = movemask(vmask) ^ ((1<<8)-1);
799#elif defined(__AVX512F__) // SKX
800 const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
801 const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
802 const vbool8 vmask = asInt(tNear) <= asInt(tFar);
803 const size_t mask = movemask(vmask);
804#else
805 const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
806 const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
807 const vbool8 vmask = tNear <= tFar;
808 const size_t mask = movemask(vmask);
809#endif
810 dist = tNear;
811 return mask & mvalid;
812 }
813
814 template<>
815 __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,true>& ray, vfloat8& dist)
816 {
817 const size_t mvalid = movemask(node->validMask());
818 const vfloat8 start_x(node->start.x);
819 const vfloat8 scale_x(node->scale.x);
820 const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
821 const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
822 const vfloat8 start_y(node->start.y);
823 const vfloat8 scale_y(node->scale.y);
824 const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
825 const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
826 const vfloat8 start_z(node->start.z);
827 const vfloat8 scale_z(node->scale.z);
828 const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
829 const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
830
831 const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
832 const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
833 const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
834 const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
835 const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
836 const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
837
838 const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
839 const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
840 const vbool8 vmask = tNear <= tFar;
841 const size_t mask = movemask(vmask);
842
843 dist = tNear;
844 return mask & mvalid;
845 }
846
847
848#endif
849
850 template<int N>
851 __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
852 {
853 const vboolf<N> mvalid = node->validMask();
854 const vfloat<N> lower_x = node->dequantizeLowerX(time);
855 const vfloat<N> upper_x = node->dequantizeUpperX(time);
856 const vfloat<N> lower_y = node->dequantizeLowerY(time);
857 const vfloat<N> upper_y = node->dequantizeUpperY(time);
858 const vfloat<N> lower_z = node->dequantizeLowerZ(time);
859 const vfloat<N> upper_z = node->dequantizeUpperZ(time);
860#if defined(__AVX2__) || defined(__ARM_NEON)
861 const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
862 const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
863 const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
864 const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
865 const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
866 const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
867#else
868 const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x;
869 const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y;
870 const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
871 const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x;
872 const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y;
873 const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
874#endif
875
876 const vfloat<N> tminX = mini(tNearX,tFarX);
877 const vfloat<N> tmaxX = maxi(tNearX,tFarX);
878 const vfloat<N> tminY = mini(tNearY,tFarY);
879 const vfloat<N> tmaxY = maxi(tNearY,tFarY);
880 const vfloat<N> tminZ = mini(tNearZ,tFarZ);
881 const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
882 const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
883 const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
884#if defined(__AVX512F__) // SKX
885 const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
886#else
887 const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
888#endif
889 const size_t mask = movemask(vmask);
890 dist = tNear;
891 return mask;
892 }
893
894 template<int N>
895 __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
896 {
897 const vboolf<N> mvalid = node->validMask();
898 const vfloat<N> lower_x = node->dequantizeLowerX(time);
899 const vfloat<N> upper_x = node->dequantizeUpperX(time);
900 const vfloat<N> lower_y = node->dequantizeLowerY(time);
901 const vfloat<N> upper_y = node->dequantizeUpperY(time);
902 const vfloat<N> lower_z = node->dequantizeLowerZ(time);
903 const vfloat<N> upper_z = node->dequantizeUpperZ(time);
904 const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
905 const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
906 const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
907 const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
908 const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
909 const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
910
911 const vfloat<N> tminX = mini(tNearX,tFarX);
912 const vfloat<N> tmaxX = maxi(tNearX,tFarX);
913 const vfloat<N> tminY = mini(tNearY,tFarY);
914 const vfloat<N> tmaxY = maxi(tNearY,tFarY);
915 const vfloat<N> tminZ = mini(tNearZ,tFarZ);
916 const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
917 const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
918 const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
919#if defined(__AVX512F__) // SKX
920 const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
921#else
922 const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
923#endif
924 const size_t mask = movemask(vmask);
925 dist = tNear;
926 return mask;
927 }
928
929 //////////////////////////////////////////////////////////////////////////////////////
930 // Fast OBBNode intersection
931 //////////////////////////////////////////////////////////////////////////////////////
932
933 template<int N, bool robust>
934 __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist)
935 {
936 const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir);
937 //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir;
938 const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir);
939 const Vec3vf<N> org = xfmPoint(node->naabb,ray.org);
940 const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir;
941 const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir;
942
943 const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
944 const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
945 const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
946 const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
947 const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
948 const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
949 vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
950 vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
951 if (robust) {
952 tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
953 tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
954 }
955 const vbool<N> vmask = tNear <= tFar;
956 dist = tNear;
957 return movemask(vmask);
958 }
959
960 //////////////////////////////////////////////////////////////////////////////////////
961 // Fast OBBNodeMB intersection
962 //////////////////////////////////////////////////////////////////////////////////////
963
964 template<int N, bool robust>
965 __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,robust>& ray, const float time, vfloat<N>& dist)
966 {
967 const AffineSpace3vf<N> xfm = node->space0;
968 const Vec3vf<N> b0_lower = zero;
969 const Vec3vf<N> b0_upper = one;
970 const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time));
971 const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time));
972
973 const BBox3vf<N> bounds(lower,upper);
974 const Vec3vf<N> dir = xfmVector(xfm,ray.dir);
975 const Vec3vf<N> rdir = rcp_safe(dir);
976 const Vec3vf<N> org = xfmPoint(xfm,ray.org);
977
978 const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir;
979 const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir;
980
981 const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
982 const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
983 const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
984 const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
985 const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
986 const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
987 vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
988 vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
989 if (robust) {
990 tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
991 tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
992 }
993 const vbool<N> vmask = tNear <= tFar;
994 dist = tNear;
995 return movemask(vmask);
996 }
997
998 //////////////////////////////////////////////////////////////////////////////////////
999 // Node intersectors used in point query raversal
1000 //////////////////////////////////////////////////////////////////////////////////////
1001
1002 /*! Computes traversal information for N nodes with 1 point query */
1003 template<int N, int types>
1004 struct BVHNNodePointQuerySphere1;
1005
1006 template<int N>
1007 struct BVHNNodePointQuerySphere1<N, BVH_AN1>
1008 {
1009 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1010 {
1011 if (unlikely(node.isLeaf())) return false;
1012 mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
1013 return true;
1014 }
1015 };
1016
1017 template<int N>
1018 struct BVHNNodePointQuerySphere1<N, BVH_AN2>
1019 {
1020 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1021 {
1022 if (unlikely(node.isLeaf())) return false;
1023 mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
1024 return true;
1025 }
1026 };
1027
1028 template<int N>
1029 struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D>
1030 {
1031 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1032 {
1033 if (unlikely(node.isLeaf())) return false;
1034 mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist);
1035 return true;
1036 }
1037 };
1038
1039 template<int N>
1040 struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1>
1041 {
1042 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1043 {
1044 if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
1045 else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist);
1046 else return false;
1047 return true;
1048 }
1049 };
1050
1051 template<int N>
1052 struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2>
1053 {
1054 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1055 {
1056 if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
1057 else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
1058 else return false;
1059 return true;
1060 }
1061 };
1062
1063 template<int N>
1064 struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2>
1065 {
1066 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1067 {
1068 if (unlikely(node.isLeaf())) return false;
1069 if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
1070 else mask = pointQueryNodeSphereMB4D(node, query, time, dist);
1071 return true;
1072 }
1073 };
1074
1075 template<int N>
1076 struct BVHNNodePointQuerySphere1<N, BVH_QN1>
1077 {
1078 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1079 {
1080 if (unlikely(node.isLeaf())) return false;
1081 mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
1082 return true;
1083 }
1084 };
1085
1086 template<int N>
1087 struct BVHNQuantizedBaseNodePointQuerySphere1
1088 {
1089 static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
1090 {
1091 return pointQueryNodeSphere(node,query,dist);
1092 }
1093
1094 static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
1095 {
1096 return pointQueryNodeSphere(node,query,time,dist);
1097 }
1098 };
1099
1100 /*! Computes traversal information for N nodes with 1 point query */
1101 template<int N, int types>
1102 struct BVHNNodePointQueryAABB1;
1103
1104 template<int N>
1105 struct BVHNNodePointQueryAABB1<N, BVH_AN1>
1106 {
1107 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1108 {
1109 if (unlikely(node.isLeaf())) return false;
1110 mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
1111 return true;
1112 }
1113 };
1114
1115 template<int N>
1116 struct BVHNNodePointQueryAABB1<N, BVH_AN2>
1117 {
1118 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1119 {
1120 if (unlikely(node.isLeaf())) return false;
1121 mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
1122 return true;
1123 }
1124 };
1125
1126 template<int N>
1127 struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D>
1128 {
1129 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1130 {
1131 if (unlikely(node.isLeaf())) return false;
1132 mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist);
1133 return true;
1134 }
1135 };
1136
1137 template<int N>
1138 struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1>
1139 {
1140 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1141 {
1142 if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
1143 else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist);
1144 else return false;
1145 return true;
1146 }
1147 };
1148
1149 template<int N>
1150 struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2>
1151 {
1152 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1153 {
1154 if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
1155 else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
1156 else return false;
1157 return true;
1158 }
1159 };
1160
1161 template<int N>
1162 struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2>
1163 {
1164 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1165 {
1166 if (unlikely(node.isLeaf())) return false;
1167 if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
1168 else mask = pointQueryNodeAABBMB4D(node, query, time, dist);
1169 return true;
1170 }
1171 };
1172
1173 template<int N>
1174 struct BVHNNodePointQueryAABB1<N, BVH_QN1>
1175 {
1176 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1177 {
1178 if (unlikely(node.isLeaf())) return false;
1179 mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
1180 return true;
1181 }
1182 };
1183
1184 template<int N>
1185 struct BVHNQuantizedBaseNodePointQueryAABB1
1186 {
1187 static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
1188 {
1189 return pointQueryNodeAABB(node,query,dist);
1190 }
1191
1192 static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
1193 {
1194 return pointQueryNodeAABB(node,query,time,dist);
1195 }
1196 };
1197
1198
1199 //////////////////////////////////////////////////////////////////////////////////////
1200 // Node intersectors used in ray traversal
1201 //////////////////////////////////////////////////////////////////////////////////////
1202
1203 /*! Intersects N nodes with 1 ray */
1204 template<int N, int types, bool robust>
1205 struct BVHNNodeIntersector1;
1206
1207 template<int N>
1208 struct BVHNNodeIntersector1<N, BVH_AN1, false>
1209 {
1210 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1211 {
1212 if (unlikely(node.isLeaf())) return false;
1213 mask = intersectNode(node.getAABBNode(), ray, dist);
1214 return true;
1215 }
1216 };
1217
1218 template<int N>
1219 struct BVHNNodeIntersector1<N, BVH_AN1, true>
1220 {
1221 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1222 {
1223 if (unlikely(node.isLeaf())) return false;
1224 mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
1225 return true;
1226 }
1227 };
1228
1229 template<int N>
1230 struct BVHNNodeIntersector1<N, BVH_AN2, false>
1231 {
1232 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1233 {
1234 if (unlikely(node.isLeaf())) return false;
1235 mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
1236 return true;
1237 }
1238 };
1239
1240 template<int N>
1241 struct BVHNNodeIntersector1<N, BVH_AN2, true>
1242 {
1243 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1244 {
1245 if (unlikely(node.isLeaf())) return false;
1246 mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
1247 return true;
1248 }
1249 };
1250
1251 template<int N>
1252 struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, false>
1253 {
1254 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1255 {
1256 if (unlikely(node.isLeaf())) return false;
1257 mask = intersectNodeMB4D<N>(node, ray, time, dist);
1258 return true;
1259 }
1260 };
1261
1262 template<int N>
1263 struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, true>
1264 {
1265 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1266 {
1267 if (unlikely(node.isLeaf())) return false;
1268 mask = intersectNodeMB4DRobust<N>(node, ray, time, dist);
1269 return true;
1270 }
1271 };
1272
1273 template<int N>
1274 struct BVHNNodeIntersector1<N, BVH_AN1_UN1, false>
1275 {
1276 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1277 {
1278 if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist);
1279 else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
1280 else return false;
1281 return true;
1282 }
1283 };
1284
1285 template<int N>
1286 struct BVHNNodeIntersector1<N, BVH_AN1_UN1, true>
1287 {
1288 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1289 {
1290 if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
1291 else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
1292 else return false;
1293 return true;
1294 }
1295 };
1296
1297 template<int N>
1298 struct BVHNNodeIntersector1<N, BVH_AN2_UN2, false>
1299 {
1300 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1301 {
1302 if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
1303 else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1304 else return false;
1305 return true;
1306 }
1307 };
1308
1309 template<int N>
1310 struct BVHNNodeIntersector1<N, BVH_AN2_UN2, true>
1311 {
1312 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1313 {
1314 if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
1315 else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1316 else return false;
1317 return true;
1318 }
1319 };
1320
1321 template<int N>
1322 struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, false>
1323 {
1324 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1325 {
1326 if (unlikely(node.isLeaf())) return false;
1327 if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1328 else mask = intersectNodeMB4D(node, ray, time, dist);
1329 return true;
1330 }
1331 };
1332
1333 template<int N>
1334 struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, true>
1335 {
1336 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1337 {
1338 if (unlikely(node.isLeaf())) return false;
1339 if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1340 else mask = intersectNodeMB4DRobust(node, ray, time, dist);
1341 return true;
1342 }
1343 };
1344
1345 template<int N>
1346 struct BVHNNodeIntersector1<N, BVH_QN1, false>
1347 {
1348 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1349 {
1350 if (unlikely(node.isLeaf())) return false;
1351 mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
1352 return true;
1353 }
1354 };
1355
1356 template<int N>
1357 struct BVHNNodeIntersector1<N, BVH_QN1, true>
1358 {
1359 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1360 {
1361 if (unlikely(node.isLeaf())) return false;
1362 mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
1363 return true;
1364 }
1365 };
1366
1367 /*! Intersects N nodes with K rays */
1368 template<int N, bool robust>
1369 struct BVHNQuantizedBaseNodeIntersector1;
1370
1371 template<int N>
1372 struct BVHNQuantizedBaseNodeIntersector1<N, false>
1373 {
1374 static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,false>& ray, vfloat<N>& dist)
1375 {
1376 return intersectNode(node,ray,dist);
1377 }
1378
1379 static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
1380 {
1381 return intersectNode(node,ray,time,dist);
1382 }
1383
1384 };
1385
1386 template<int N>
1387 struct BVHNQuantizedBaseNodeIntersector1<N, true>
1388 {
1389 static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
1390 {
1391 return intersectNode(node,ray,dist);
1392 }
1393
1394 static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
1395 {
1396 return intersectNode(node,ray,time,dist);
1397 }
1398
1399 };
1400
1401
1402 }
1403}
1404

source code of qtquick3d/src/3rdparty/embree/kernels/bvh/node_intersector1.h