1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "node_intersector.h" |
7 | |
8 | namespace embree |
9 | { |
10 | namespace isa |
11 | { |
12 | ////////////////////////////////////////////////////////////////////////////////////// |
13 | // Ray structure used in single-ray traversal |
14 | ////////////////////////////////////////////////////////////////////////////////////// |
15 | |
16 | template<int N, bool robust> |
17 | struct TravRayBase; |
18 | |
19 | /* Base (without tnear and tfar) */ |
20 | template<int N> |
21 | struct TravRayBase<N,false> |
22 | { |
23 | __forceinline TravRayBase() {} |
24 | |
25 | __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir) |
26 | : org_xyz(ray_org), dir_xyz(ray_dir) |
27 | { |
28 | const Vec3fa ray_rdir = rcp_safe(a: ray_dir); |
29 | org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z); |
30 | dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z); |
31 | rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z); |
32 | #if defined(__AVX2__) || defined(__ARM_NEON) |
33 | const Vec3fa ray_org_rdir = ray_org*ray_rdir; |
34 | org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); |
35 | #endif |
36 | nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>); |
37 | nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>); |
38 | nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>); |
39 | farX = nearX ^ sizeof(vfloat<N>); |
40 | farY = nearY ^ sizeof(vfloat<N>); |
41 | farZ = nearZ ^ sizeof(vfloat<N>); |
42 | } |
43 | |
44 | template<int K> |
45 | __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, |
46 | const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, |
47 | size_t flip = sizeof(vfloat<N>)) |
48 | { |
49 | org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); |
50 | dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); |
51 | rdir = Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); |
52 | #if defined(__AVX2__) || defined(__ARM_NEON) |
53 | org_rdir = org*rdir; |
54 | #endif |
55 | nearX = nearXYZ.x[k]; |
56 | nearY = nearXYZ.y[k]; |
57 | nearZ = nearXYZ.z[k]; |
58 | farX = nearX ^ flip; |
59 | farY = nearY ^ flip; |
60 | farZ = nearZ ^ flip; |
61 | } |
62 | |
63 | Vec3fa org_xyz, dir_xyz; |
64 | Vec3vf<N> org, dir, rdir; |
65 | #if defined(__AVX2__) || defined(__ARM_NEON) |
66 | Vec3vf<N> org_rdir; |
67 | #endif |
68 | size_t nearX, nearY, nearZ; |
69 | size_t farX, farY, farZ; |
70 | }; |
71 | |
72 | /* Base (without tnear and tfar) */ |
73 | template<int N> |
74 | struct TravRayBase<N,true> |
75 | { |
76 | __forceinline TravRayBase() {} |
77 | |
78 | __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir) |
79 | : org_xyz(ray_org), dir_xyz(ray_dir) |
80 | { |
81 | const float round_down = 1.0f-3.0f*float(ulp); |
82 | const float round_up = 1.0f+3.0f*float(ulp); |
83 | const Vec3fa ray_rdir = 1.0f/zero_fix(a: ray_dir); |
84 | const Vec3fa ray_rdir_near = round_down*ray_rdir; |
85 | const Vec3fa ray_rdir_far = round_up *ray_rdir; |
86 | org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z); |
87 | dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z); |
88 | rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z); |
89 | rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z); |
90 | |
91 | nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>); |
92 | nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>); |
93 | nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>); |
94 | farX = nearX ^ sizeof(vfloat<N>); |
95 | farY = nearY ^ sizeof(vfloat<N>); |
96 | farZ = nearZ ^ sizeof(vfloat<N>); |
97 | } |
98 | |
99 | template<int K> |
100 | __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, |
101 | const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, |
102 | size_t flip = sizeof(vfloat<N>)) |
103 | { |
104 | const vfloat<N> round_down = 1.0f-3.0f*float(ulp); |
105 | const vfloat<N> round_up = 1.0f+3.0f*float(ulp); |
106 | org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); |
107 | dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); |
108 | rdir_near = round_down*Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); |
109 | rdir_far = round_up *Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); |
110 | |
111 | nearX = nearXYZ.x[k]; |
112 | nearY = nearXYZ.y[k]; |
113 | nearZ = nearXYZ.z[k]; |
114 | farX = nearX ^ flip; |
115 | farY = nearY ^ flip; |
116 | farZ = nearZ ^ flip; |
117 | } |
118 | |
119 | Vec3fa org_xyz, dir_xyz; |
120 | Vec3vf<N> org, dir, rdir_near, rdir_far; |
121 | size_t nearX, nearY, nearZ; |
122 | size_t farX, farY, farZ; |
123 | }; |
124 | |
125 | /* Full (with tnear and tfar) */ |
126 | template<int N, bool robust> |
127 | struct TravRay : TravRayBase<N,robust> |
128 | { |
129 | __forceinline TravRay() {} |
130 | |
131 | __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar) |
132 | : TravRayBase<N,robust>(ray_org, ray_dir), |
133 | tnear(ray_tnear), tfar(ray_tfar) {} |
134 | |
135 | template<int K> |
136 | __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, |
137 | const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, |
138 | float ray_tnear, float ray_tfar, |
139 | size_t flip = sizeof(vfloat<N>)) |
140 | { |
141 | TravRayBase<N,robust>::template init<K>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip); |
142 | tnear = ray_tnear; tfar = ray_tfar; |
143 | } |
144 | |
145 | vfloat<N> tnear; |
146 | vfloat<N> tfar; |
147 | }; |
148 | |
149 | ////////////////////////////////////////////////////////////////////////////////////// |
150 | // Point Query structure used in single-ray traversal |
151 | ////////////////////////////////////////////////////////////////////////////////////// |
152 | |
153 | template<int N> |
154 | struct TravPointQuery |
155 | { |
156 | __forceinline TravPointQuery() {} |
157 | |
158 | __forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad) |
159 | { |
160 | org = Vec3vf<N>(query_org.x, query_org.y, query_org.z); |
161 | rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z); |
162 | } |
163 | |
164 | __forceinline vfloat<N> const& tfar() const { |
165 | return rad.x; |
166 | } |
167 | |
168 | Vec3vf<N> org, rad; |
169 | }; |
170 | |
171 | ////////////////////////////////////////////////////////////////////////////////////// |
172 | // point query |
173 | ////////////////////////////////////////////////////////////////////////////////////// |
174 | |
175 | template<int N> |
176 | __forceinline size_t pointQuerySphereDistAndMask( |
177 | const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX, |
178 | vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ) |
179 | { |
180 | const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x; |
181 | const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y; |
182 | const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z; |
183 | dist = vX * vX + vY * vY + vZ * vZ; |
184 | const vbool<N> vmask = dist <= query.tfar()*query.tfar(); |
185 | const vbool<N> valid = minX <= maxX; |
186 | return movemask(vmask) & movemask(valid); |
187 | } |
188 | |
189 | template<int N> |
190 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
191 | { |
192 | const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x)); |
193 | const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y)); |
194 | const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z)); |
195 | const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x)); |
196 | const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y)); |
197 | const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z)); |
198 | return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); |
199 | } |
200 | |
201 | template<int N> |
202 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
203 | { |
204 | const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x); |
205 | const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y); |
206 | const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z); |
207 | const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x); |
208 | const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y); |
209 | const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z); |
210 | const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0])); |
211 | const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0])); |
212 | const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0])); |
213 | const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0])); |
214 | const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0])); |
215 | const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0])); |
216 | return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); |
217 | } |
218 | |
219 | template<int N> |
220 | __forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
221 | { |
222 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
223 | size_t mask = pointQueryNodeSphere(node, query, time, dist); |
224 | |
225 | if (unlikely(ref.isAABBNodeMB4D())) { |
226 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
227 | const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t); |
228 | mask &= movemask(vmask); |
229 | } |
230 | |
231 | return mask; |
232 | } |
233 | |
234 | template<int N> |
235 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
236 | { |
237 | const vfloat<N> start_x(node->start.x); |
238 | const vfloat<N> scale_x(node->scale.x); |
239 | const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x); |
240 | const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x); |
241 | const vfloat<N> start_y(node->start.y); |
242 | const vfloat<N> scale_y(node->scale.y); |
243 | const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y); |
244 | const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y); |
245 | const vfloat<N> start_z(node->start.z); |
246 | const vfloat<N> scale_z(node->scale.z); |
247 | const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z); |
248 | const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z); |
249 | return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask()); |
250 | } |
251 | |
252 | template<int N> |
253 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
254 | { |
255 | const vfloat<N> minX = node->dequantizeLowerX(time); |
256 | const vfloat<N> maxX = node->dequantizeUpperX(time); |
257 | const vfloat<N> minY = node->dequantizeLowerY(time); |
258 | const vfloat<N> maxY = node->dequantizeUpperY(time); |
259 | const vfloat<N> minZ = node->dequantizeLowerZ(time); |
260 | const vfloat<N> maxZ = node->dequantizeUpperZ(time); |
261 | return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask()); |
262 | } |
263 | |
264 | template<int N> |
265 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
266 | { |
267 | // TODO: point query - implement |
268 | const vbool<N> vmask = vbool<N>(true); |
269 | const size_t mask = movemask(vmask) & ((1<<N)-1); |
270 | dist = vfloat<N>(0.0f); |
271 | return mask; |
272 | } |
273 | |
274 | template<int N> |
275 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
276 | { |
277 | // TODO: point query - implement |
278 | const vbool<N> vmask = vbool<N>(true); |
279 | const size_t mask = movemask(vmask) & ((1<<N)-1); |
280 | dist = vfloat<N>(0.0f); |
281 | return mask; |
282 | } |
283 | |
284 | template<int N> |
285 | __forceinline size_t pointQueryAABBDistAndMask( |
286 | const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX, |
287 | vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ) |
288 | { |
289 | const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x; |
290 | const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y; |
291 | const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z; |
292 | dist = vX * vX + vY * vY + vZ * vZ; |
293 | const vbool<N> valid = minX <= maxX; |
294 | const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) | |
295 | (maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) | |
296 | (maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z)); |
297 | return movemask(vmask) & movemask(valid); |
298 | } |
299 | |
300 | template<int N> |
301 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
302 | { |
303 | const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x)); |
304 | const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y)); |
305 | const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z)); |
306 | const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x)); |
307 | const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y)); |
308 | const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z)); |
309 | return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); |
310 | } |
311 | |
312 | template<int N> |
313 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
314 | { |
315 | const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x); |
316 | const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y); |
317 | const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z); |
318 | const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x); |
319 | const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y); |
320 | const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z); |
321 | const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0])); |
322 | const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0])); |
323 | const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0])); |
324 | const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0])); |
325 | const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0])); |
326 | const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0])); |
327 | return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); |
328 | } |
329 | |
330 | template<int N> |
331 | __forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
332 | { |
333 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
334 | size_t mask = pointQueryNodeAABB(node, query, time, dist); |
335 | |
336 | if (unlikely(ref.isAABBNodeMB4D())) { |
337 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
338 | const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t); |
339 | mask &= movemask(vmask); |
340 | } |
341 | |
342 | return mask; |
343 | } |
344 | |
345 | template<int N> |
346 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
347 | { |
348 | const size_t mvalid = movemask(node->validMask()); |
349 | const vfloat<N> start_x(node->start.x); |
350 | const vfloat<N> scale_x(node->scale.x); |
351 | const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x); |
352 | const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x); |
353 | const vfloat<N> start_y(node->start.y); |
354 | const vfloat<N> scale_y(node->scale.y); |
355 | const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y); |
356 | const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y); |
357 | const vfloat<N> start_z(node->start.z); |
358 | const vfloat<N> scale_z(node->scale.z); |
359 | const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z); |
360 | const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z); |
361 | return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid; |
362 | } |
363 | |
364 | template<int N> |
365 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
366 | { |
367 | const size_t mvalid = movemask(node->validMask()); |
368 | const vfloat<N> minX = node->dequantizeLowerX(time); |
369 | const vfloat<N> maxX = node->dequantizeUpperX(time); |
370 | const vfloat<N> minY = node->dequantizeLowerY(time); |
371 | const vfloat<N> maxY = node->dequantizeUpperY(time); |
372 | const vfloat<N> minZ = node->dequantizeLowerZ(time); |
373 | const vfloat<N> maxZ = node->dequantizeUpperZ(time); |
374 | return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid; |
375 | } |
376 | |
377 | template<int N> |
378 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
379 | { |
380 | // TODO: point query - implement |
381 | const vbool<N> vmask = vbool<N>(true); |
382 | const size_t mask = movemask(vmask) & ((1<<N)-1); |
383 | dist = vfloat<N>(0.0f); |
384 | return mask; |
385 | } |
386 | |
387 | template<int N> |
388 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
389 | { |
390 | // TODO: point query - implement |
391 | const vbool<N> vmask = vbool<N>(true); |
392 | const size_t mask = movemask(vmask) & ((1<<N)-1); |
393 | dist = vfloat<N>(0.0f); |
394 | return mask; |
395 | } |
396 | |
397 | ////////////////////////////////////////////////////////////////////////////////////// |
398 | // Fast AABBNode intersection |
399 | ////////////////////////////////////////////////////////////////////////////////////// |
400 | |
401 | template<int N, bool robust> |
402 | __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist); |
403 | |
404 | template<> |
405 | __forceinline size_t intersectNode<4>(const typename BVH4::AABBNode* node, const TravRay<4,false>& ray, vfloat4& dist) |
406 | { |
407 | #if defined(__AVX2__) || defined(__ARM_NEON) |
408 | const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); |
409 | const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); |
410 | const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); |
411 | const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); |
412 | const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); |
413 | const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); |
414 | #else |
415 | const vfloat4 tNearX = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x; |
416 | const vfloat4 tNearY = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y; |
417 | const vfloat4 tNearZ = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z; |
418 | const vfloat4 tFarX = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x; |
419 | const vfloat4 tFarY = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y; |
420 | const vfloat4 tFarZ = (vfloat4::load(a: (float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z; |
421 | #endif |
422 | |
423 | #if defined(__aarch64__) |
424 | const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear); |
425 | const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar); |
426 | const vbool4 vmask = asInt(tNear) <= asInt(tFar); |
427 | const size_t mask = movemask(vmask); |
428 | #elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW |
429 | const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
430 | const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
431 | const vbool4 vmask = asInt(tNear) > asInt(tFar); |
432 | const size_t mask = movemask(vmask) ^ ((1<<4)-1); |
433 | #elif defined(__AVX512F__) // SKX |
434 | const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
435 | const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
436 | const vbool4 vmask = asInt(tNear) <= asInt(tFar); |
437 | const size_t mask = movemask(vmask); |
438 | #else |
439 | const vfloat4 tNear = max(a: tNearX,b: tNearY,c: tNearZ,d: ray.tnear); |
440 | const vfloat4 tFar = min(a: tFarX ,b: tFarY ,c: tFarZ ,d: ray.tfar); |
441 | const vbool4 vmask = tNear <= tFar; |
442 | const size_t mask = movemask(a: vmask); |
443 | #endif |
444 | dist = tNear; |
445 | return mask; |
446 | } |
447 | |
448 | #if defined(__AVX__) |
449 | |
450 | template<> |
451 | __forceinline size_t intersectNode<8>(const typename BVH8::AABBNode* node, const TravRay<8,false>& ray, vfloat8& dist) |
452 | { |
453 | #if defined(__AVX2__) || defined(__ARM_NEON) |
454 | const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); |
455 | const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); |
456 | const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); |
457 | const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); |
458 | const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); |
459 | const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); |
460 | #else |
461 | const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x; |
462 | const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y; |
463 | const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z; |
464 | const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x; |
465 | const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y; |
466 | const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z; |
467 | #endif |
468 | |
469 | #if defined(__AVX2__) && !defined(__AVX512F__) // HSW |
470 | const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
471 | const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
472 | const vbool8 vmask = asInt(tNear) > asInt(tFar); |
473 | const size_t mask = movemask(vmask) ^ ((1<<8)-1); |
474 | #elif defined(__AVX512F__) // SKX |
475 | const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
476 | const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
477 | const vbool8 vmask = asInt(tNear) <= asInt(tFar); |
478 | const size_t mask = movemask(vmask); |
479 | #else |
480 | const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
481 | const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
482 | const vbool8 vmask = tNear <= tFar; |
483 | const size_t mask = movemask(vmask); |
484 | #endif |
485 | dist = tNear; |
486 | return mask; |
487 | } |
488 | |
489 | #endif |
490 | |
491 | ////////////////////////////////////////////////////////////////////////////////////// |
492 | // Robust AABBNode intersection |
493 | ////////////////////////////////////////////////////////////////////////////////////// |
494 | |
495 | template<int N> |
496 | __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist) |
497 | { |
498 | const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x; |
499 | const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y; |
500 | const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z; |
501 | const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x; |
502 | const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y; |
503 | const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z; |
504 | const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
505 | const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
506 | const vbool<N> vmask = tNear <= tFar; |
507 | const size_t mask = movemask(vmask); |
508 | dist = tNear; |
509 | return mask; |
510 | } |
511 | |
512 | ////////////////////////////////////////////////////////////////////////////////////// |
513 | // Fast AABBNodeMB intersection |
514 | ////////////////////////////////////////////////////////////////////////////////////// |
515 | |
516 | template<int N> |
517 | __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) |
518 | { |
519 | const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); |
520 | const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); |
521 | const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); |
522 | const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); |
523 | const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); |
524 | const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); |
525 | #if defined(__AVX2__) || defined(__ARM_NEON) |
526 | const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x); |
527 | const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y); |
528 | const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z); |
529 | const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x); |
530 | const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y); |
531 | const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z); |
532 | #else |
533 | const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x; |
534 | const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y; |
535 | const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z; |
536 | const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x; |
537 | const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y; |
538 | const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z; |
539 | #endif |
540 | #if defined(__AVX2__) && !defined(__AVX512F__) // HSW |
541 | const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
542 | const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
543 | const vbool<N> vmask = asInt(tNear) > asInt(tFar); |
544 | const size_t mask = movemask(vmask) ^ ((1<<N)-1); |
545 | #elif defined(__AVX512F__) // SKX |
546 | const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
547 | const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
548 | const vbool<N> vmask = asInt(tNear) <= asInt(tFar); |
549 | const size_t mask = movemask(vmask); |
550 | #else |
551 | const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); |
552 | const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); |
553 | const vbool<N> vmask = tNear <= tFar; |
554 | const size_t mask = movemask(vmask); |
555 | #endif |
556 | dist = tNear; |
557 | return mask; |
558 | } |
559 | |
560 | ////////////////////////////////////////////////////////////////////////////////////// |
561 | // Robust AABBNodeMB intersection |
562 | ////////////////////////////////////////////////////////////////////////////////////// |
563 | |
564 | template<int N> |
565 | __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) |
566 | { |
567 | const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); |
568 | const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); |
569 | const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); |
570 | const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x; |
571 | const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y; |
572 | const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z; |
573 | const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); |
574 | const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); |
575 | const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); |
576 | const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); |
577 | const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x; |
578 | const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y; |
579 | const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z; |
580 | const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ); |
581 | const size_t mask = movemask(tNear <= tFar); |
582 | dist = tNear; |
583 | return mask; |
584 | } |
585 | |
586 | ////////////////////////////////////////////////////////////////////////////////////// |
587 | // Fast AABBNodeMB4D intersection |
588 | ////////////////////////////////////////////////////////////////////////////////////// |
589 | |
590 | template<int N> |
591 | __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) |
592 | { |
593 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
594 | |
595 | const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); |
596 | const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); |
597 | const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); |
598 | const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); |
599 | const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); |
600 | const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); |
601 | #if defined (__AVX2__) || defined(__ARM_NEON) |
602 | const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x); |
603 | const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y); |
604 | const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z); |
605 | const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x); |
606 | const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y); |
607 | const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z); |
608 | #else |
609 | const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x; |
610 | const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y; |
611 | const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z; |
612 | const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x; |
613 | const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y; |
614 | const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z; |
615 | #endif |
616 | #if defined(__AVX2__) && !defined(__AVX512F__) |
617 | const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear)); |
618 | const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar )); |
619 | #else |
620 | const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); |
621 | const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); |
622 | #endif |
623 | vbool<N> vmask = tNear <= tFar; |
624 | if (unlikely(ref.isAABBNodeMB4D())) { |
625 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
626 | vmask &= (node1->lower_t <= time) & (time < node1->upper_t); |
627 | } |
628 | const size_t mask = movemask(vmask); |
629 | dist = tNear; |
630 | return mask; |
631 | } |
632 | |
633 | ////////////////////////////////////////////////////////////////////////////////////// |
634 | // Robust AABBNodeMB4D intersection |
635 | ////////////////////////////////////////////////////////////////////////////////////// |
636 | |
637 | template<int N> |
638 | __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) |
639 | { |
640 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
641 | |
642 | const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); |
643 | const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); |
644 | const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); |
645 | const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x; |
646 | const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y; |
647 | const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z; |
648 | const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); |
649 | const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); |
650 | const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); |
651 | const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); |
652 | const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x; |
653 | const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y; |
654 | const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z; |
655 | const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ); |
656 | vbool<N> vmask = tNear <= tFar; |
657 | if (unlikely(ref.isAABBNodeMB4D())) { |
658 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
659 | vmask &= (node1->lower_t <= time) & (time < node1->upper_t); |
660 | } |
661 | const size_t mask = movemask(vmask); |
662 | dist = tNear; |
663 | return mask; |
664 | } |
665 | |
666 | ////////////////////////////////////////////////////////////////////////////////////// |
667 | // Fast QuantizedBaseNode intersection |
668 | ////////////////////////////////////////////////////////////////////////////////////// |
669 | |
670 | template<int N, bool robust> |
671 | __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist); |
672 | |
673 | template<> |
674 | __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,false>& ray, vfloat4& dist) |
675 | { |
676 | const size_t mvalid = movemask(a: node->validMask()); |
677 | const vfloat4 start_x(node->start.x); |
678 | const vfloat4 scale_x(node->scale.x); |
679 | const vfloat4 lower_x = madd(a: node->dequantize<4>(offset: ray.nearX >> 2),b: scale_x,c: start_x); |
680 | const vfloat4 upper_x = madd(a: node->dequantize<4>(offset: ray.farX >> 2),b: scale_x,c: start_x); |
681 | const vfloat4 start_y(node->start.y); |
682 | const vfloat4 scale_y(node->scale.y); |
683 | const vfloat4 lower_y = madd(a: node->dequantize<4>(offset: ray.nearY >> 2),b: scale_y,c: start_y); |
684 | const vfloat4 upper_y = madd(a: node->dequantize<4>(offset: ray.farY >> 2),b: scale_y,c: start_y); |
685 | const vfloat4 start_z(node->start.z); |
686 | const vfloat4 scale_z(node->scale.z); |
687 | const vfloat4 lower_z = madd(a: node->dequantize<4>(offset: ray.nearZ >> 2),b: scale_z,c: start_z); |
688 | const vfloat4 upper_z = madd(a: node->dequantize<4>(offset: ray.farZ >> 2),b: scale_z,c: start_z); |
689 | |
690 | #if defined(__AVX2__) || defined(__ARM_NEON) |
691 | const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); |
692 | const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); |
693 | const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); |
694 | const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); |
695 | const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); |
696 | const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); |
697 | #else |
698 | const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x; |
699 | const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y; |
700 | const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z; |
701 | const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x; |
702 | const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y; |
703 | const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z; |
704 | #endif |
705 | |
706 | #if defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW |
707 | const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
708 | const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
709 | const vbool4 vmask = asInt(tNear) > asInt(tFar); |
710 | const size_t mask = movemask(vmask) ^ ((1<<4)-1); |
711 | #elif defined(__AVX512F__) // SKX |
712 | const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
713 | const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
714 | const vbool4 vmask = asInt(tNear) <= asInt(tFar); |
715 | const size_t mask = movemask(vmask); |
716 | #else |
717 | const vfloat4 tNear = max(a: tNearX,b: tNearY,c: tNearZ,d: ray.tnear); |
718 | const vfloat4 tFar = min(a: tFarX ,b: tFarY ,c: tFarZ ,d: ray.tfar); |
719 | const vbool4 vmask = tNear <= tFar; |
720 | const size_t mask = movemask(a: vmask); |
721 | #endif |
722 | dist = tNear; |
723 | return mask & mvalid; |
724 | } |
725 | |
726 | template<> |
727 | __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,true>& ray, vfloat4& dist) |
728 | { |
729 | const size_t mvalid = movemask(a: node->validMask()); |
730 | const vfloat4 start_x(node->start.x); |
731 | const vfloat4 scale_x(node->scale.x); |
732 | const vfloat4 lower_x = madd(a: node->dequantize<4>(offset: ray.nearX >> 2),b: scale_x,c: start_x); |
733 | const vfloat4 upper_x = madd(a: node->dequantize<4>(offset: ray.farX >> 2),b: scale_x,c: start_x); |
734 | const vfloat4 start_y(node->start.y); |
735 | const vfloat4 scale_y(node->scale.y); |
736 | const vfloat4 lower_y = madd(a: node->dequantize<4>(offset: ray.nearY >> 2),b: scale_y,c: start_y); |
737 | const vfloat4 upper_y = madd(a: node->dequantize<4>(offset: ray.farY >> 2),b: scale_y,c: start_y); |
738 | const vfloat4 start_z(node->start.z); |
739 | const vfloat4 scale_z(node->scale.z); |
740 | const vfloat4 lower_z = madd(a: node->dequantize<4>(offset: ray.nearZ >> 2),b: scale_z,c: start_z); |
741 | const vfloat4 upper_z = madd(a: node->dequantize<4>(offset: ray.farZ >> 2),b: scale_z,c: start_z); |
742 | |
743 | const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; |
744 | const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; |
745 | const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; |
746 | const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; |
747 | const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; |
748 | const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; |
749 | |
750 | const vfloat4 tNear = max(a: tNearX,b: tNearY,c: tNearZ,d: ray.tnear); |
751 | const vfloat4 tFar = min(a: tFarX ,b: tFarY ,c: tFarZ ,d: ray.tfar); |
752 | const vbool4 vmask = tNear <= tFar; |
753 | const size_t mask = movemask(a: vmask); |
754 | dist = tNear; |
755 | return mask & mvalid; |
756 | } |
757 | |
758 | |
759 | #if defined(__AVX__) |
760 | |
761 | template<> |
762 | __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,false>& ray, vfloat8& dist) |
763 | { |
764 | const size_t mvalid = movemask(node->validMask()); |
765 | const vfloat8 start_x(node->start.x); |
766 | const vfloat8 scale_x(node->scale.x); |
767 | const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x); |
768 | const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x); |
769 | const vfloat8 start_y(node->start.y); |
770 | const vfloat8 scale_y(node->scale.y); |
771 | const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y); |
772 | const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y); |
773 | const vfloat8 start_z(node->start.z); |
774 | const vfloat8 scale_z(node->scale.z); |
775 | const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z); |
776 | const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z); |
777 | |
778 | #if defined(__AVX2__) || defined(__ARM_NEON) |
779 | const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); |
780 | const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); |
781 | const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); |
782 | const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); |
783 | const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); |
784 | const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); |
785 | #else |
786 | const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x; |
787 | const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y; |
788 | const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z; |
789 | const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x; |
790 | const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y; |
791 | const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z; |
792 | #endif |
793 | |
794 | #if defined(__AVX2__) && !defined(__AVX512F__) // HSW |
795 | const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
796 | const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
797 | const vbool8 vmask = asInt(tNear) > asInt(tFar); |
798 | const size_t mask = movemask(vmask) ^ ((1<<8)-1); |
799 | #elif defined(__AVX512F__) // SKX |
800 | const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
801 | const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
802 | const vbool8 vmask = asInt(tNear) <= asInt(tFar); |
803 | const size_t mask = movemask(vmask); |
804 | #else |
805 | const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
806 | const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
807 | const vbool8 vmask = tNear <= tFar; |
808 | const size_t mask = movemask(vmask); |
809 | #endif |
810 | dist = tNear; |
811 | return mask & mvalid; |
812 | } |
813 | |
814 | template<> |
815 | __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,true>& ray, vfloat8& dist) |
816 | { |
817 | const size_t mvalid = movemask(node->validMask()); |
818 | const vfloat8 start_x(node->start.x); |
819 | const vfloat8 scale_x(node->scale.x); |
820 | const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x); |
821 | const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x); |
822 | const vfloat8 start_y(node->start.y); |
823 | const vfloat8 scale_y(node->scale.y); |
824 | const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y); |
825 | const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y); |
826 | const vfloat8 start_z(node->start.z); |
827 | const vfloat8 scale_z(node->scale.z); |
828 | const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z); |
829 | const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z); |
830 | |
831 | const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; |
832 | const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; |
833 | const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; |
834 | const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; |
835 | const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; |
836 | const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; |
837 | |
838 | const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
839 | const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
840 | const vbool8 vmask = tNear <= tFar; |
841 | const size_t mask = movemask(vmask); |
842 | |
843 | dist = tNear; |
844 | return mask & mvalid; |
845 | } |
846 | |
847 | |
848 | #endif |
849 | |
850 | template<int N> |
851 | __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) |
852 | { |
853 | const vboolf<N> mvalid = node->validMask(); |
854 | const vfloat<N> lower_x = node->dequantizeLowerX(time); |
855 | const vfloat<N> upper_x = node->dequantizeUpperX(time); |
856 | const vfloat<N> lower_y = node->dequantizeLowerY(time); |
857 | const vfloat<N> upper_y = node->dequantizeUpperY(time); |
858 | const vfloat<N> lower_z = node->dequantizeLowerZ(time); |
859 | const vfloat<N> upper_z = node->dequantizeUpperZ(time); |
860 | #if defined(__AVX2__) || defined(__ARM_NEON) |
861 | const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); |
862 | const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); |
863 | const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); |
864 | const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); |
865 | const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); |
866 | const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); |
867 | #else |
868 | const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x; |
869 | const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y; |
870 | const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z; |
871 | const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x; |
872 | const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y; |
873 | const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z; |
874 | #endif |
875 | |
876 | const vfloat<N> tminX = mini(tNearX,tFarX); |
877 | const vfloat<N> tmaxX = maxi(tNearX,tFarX); |
878 | const vfloat<N> tminY = mini(tNearY,tFarY); |
879 | const vfloat<N> tmaxY = maxi(tNearY,tFarY); |
880 | const vfloat<N> tminZ = mini(tNearZ,tFarZ); |
881 | const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ); |
882 | const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear); |
883 | const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar); |
884 | #if defined(__AVX512F__) // SKX |
885 | const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar)); |
886 | #else |
887 | const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid; |
888 | #endif |
889 | const size_t mask = movemask(vmask); |
890 | dist = tNear; |
891 | return mask; |
892 | } |
893 | |
894 | template<int N> |
895 | __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) |
896 | { |
897 | const vboolf<N> mvalid = node->validMask(); |
898 | const vfloat<N> lower_x = node->dequantizeLowerX(time); |
899 | const vfloat<N> upper_x = node->dequantizeUpperX(time); |
900 | const vfloat<N> lower_y = node->dequantizeLowerY(time); |
901 | const vfloat<N> upper_y = node->dequantizeUpperY(time); |
902 | const vfloat<N> lower_z = node->dequantizeLowerZ(time); |
903 | const vfloat<N> upper_z = node->dequantizeUpperZ(time); |
904 | const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; |
905 | const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; |
906 | const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; |
907 | const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; |
908 | const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; |
909 | const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; |
910 | |
911 | const vfloat<N> tminX = mini(tNearX,tFarX); |
912 | const vfloat<N> tmaxX = maxi(tNearX,tFarX); |
913 | const vfloat<N> tminY = mini(tNearY,tFarY); |
914 | const vfloat<N> tmaxY = maxi(tNearY,tFarY); |
915 | const vfloat<N> tminZ = mini(tNearZ,tFarZ); |
916 | const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ); |
917 | const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear); |
918 | const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar); |
919 | #if defined(__AVX512F__) // SKX |
920 | const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar)); |
921 | #else |
922 | const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid; |
923 | #endif |
924 | const size_t mask = movemask(vmask); |
925 | dist = tNear; |
926 | return mask; |
927 | } |
928 | |
929 | ////////////////////////////////////////////////////////////////////////////////////// |
930 | // Fast OBBNode intersection |
931 | ////////////////////////////////////////////////////////////////////////////////////// |
932 | |
933 | template<int N, bool robust> |
934 | __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist) |
935 | { |
936 | const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir); |
937 | //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir; |
938 | const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir); |
939 | const Vec3vf<N> org = xfmPoint(node->naabb,ray.org); |
940 | const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir; |
941 | const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir; |
942 | |
943 | const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x); |
944 | const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y); |
945 | const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z); |
946 | const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x); |
947 | const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y); |
948 | const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z); |
949 | vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ); |
950 | vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); |
951 | if (robust) { |
952 | tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp)); |
953 | tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp)); |
954 | } |
955 | const vbool<N> vmask = tNear <= tFar; |
956 | dist = tNear; |
957 | return movemask(vmask); |
958 | } |
959 | |
960 | ////////////////////////////////////////////////////////////////////////////////////// |
961 | // Fast OBBNodeMB intersection |
962 | ////////////////////////////////////////////////////////////////////////////////////// |
963 | |
964 | template<int N, bool robust> |
965 | __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,robust>& ray, const float time, vfloat<N>& dist) |
966 | { |
967 | const AffineSpace3vf<N> xfm = node->space0; |
968 | const Vec3vf<N> b0_lower = zero; |
969 | const Vec3vf<N> b0_upper = one; |
970 | const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time)); |
971 | const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time)); |
972 | |
973 | const BBox3vf<N> bounds(lower,upper); |
974 | const Vec3vf<N> dir = xfmVector(xfm,ray.dir); |
975 | const Vec3vf<N> rdir = rcp_safe(dir); |
976 | const Vec3vf<N> org = xfmPoint(xfm,ray.org); |
977 | |
978 | const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir; |
979 | const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir; |
980 | |
981 | const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x); |
982 | const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y); |
983 | const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z); |
984 | const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x); |
985 | const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y); |
986 | const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z); |
987 | vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ); |
988 | vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); |
989 | if (robust) { |
990 | tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp)); |
991 | tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp)); |
992 | } |
993 | const vbool<N> vmask = tNear <= tFar; |
994 | dist = tNear; |
995 | return movemask(vmask); |
996 | } |
997 | |
998 | ////////////////////////////////////////////////////////////////////////////////////// |
999 | // Node intersectors used in point query raversal |
1000 | ////////////////////////////////////////////////////////////////////////////////////// |
1001 | |
1002 | /*! Computes traversal information for N nodes with 1 point query */ |
1003 | template<int N, int types> |
1004 | struct BVHNNodePointQuerySphere1; |
1005 | |
1006 | template<int N> |
1007 | struct BVHNNodePointQuerySphere1<N, BVH_AN1> |
1008 | { |
1009 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1010 | { |
1011 | if (unlikely(node.isLeaf())) return false; |
1012 | mask = pointQueryNodeSphere(node.getAABBNode(), query, dist); |
1013 | return true; |
1014 | } |
1015 | }; |
1016 | |
1017 | template<int N> |
1018 | struct BVHNNodePointQuerySphere1<N, BVH_AN2> |
1019 | { |
1020 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1021 | { |
1022 | if (unlikely(node.isLeaf())) return false; |
1023 | mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist); |
1024 | return true; |
1025 | } |
1026 | }; |
1027 | |
1028 | template<int N> |
1029 | struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D> |
1030 | { |
1031 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1032 | { |
1033 | if (unlikely(node.isLeaf())) return false; |
1034 | mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist); |
1035 | return true; |
1036 | } |
1037 | }; |
1038 | |
1039 | template<int N> |
1040 | struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1> |
1041 | { |
1042 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1043 | { |
1044 | if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist); |
1045 | else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist); |
1046 | else return false; |
1047 | return true; |
1048 | } |
1049 | }; |
1050 | |
1051 | template<int N> |
1052 | struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2> |
1053 | { |
1054 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1055 | { |
1056 | if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist); |
1057 | else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist); |
1058 | else return false; |
1059 | return true; |
1060 | } |
1061 | }; |
1062 | |
1063 | template<int N> |
1064 | struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2> |
1065 | { |
1066 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1067 | { |
1068 | if (unlikely(node.isLeaf())) return false; |
1069 | if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist); |
1070 | else mask = pointQueryNodeSphereMB4D(node, query, time, dist); |
1071 | return true; |
1072 | } |
1073 | }; |
1074 | |
1075 | template<int N> |
1076 | struct BVHNNodePointQuerySphere1<N, BVH_QN1> |
1077 | { |
1078 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1079 | { |
1080 | if (unlikely(node.isLeaf())) return false; |
1081 | mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist); |
1082 | return true; |
1083 | } |
1084 | }; |
1085 | |
1086 | template<int N> |
1087 | struct BVHNQuantizedBaseNodePointQuerySphere1 |
1088 | { |
1089 | static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
1090 | { |
1091 | return pointQueryNodeSphere(node,query,dist); |
1092 | } |
1093 | |
1094 | static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
1095 | { |
1096 | return pointQueryNodeSphere(node,query,time,dist); |
1097 | } |
1098 | }; |
1099 | |
1100 | /*! Computes traversal information for N nodes with 1 point query */ |
1101 | template<int N, int types> |
1102 | struct BVHNNodePointQueryAABB1; |
1103 | |
1104 | template<int N> |
1105 | struct BVHNNodePointQueryAABB1<N, BVH_AN1> |
1106 | { |
1107 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1108 | { |
1109 | if (unlikely(node.isLeaf())) return false; |
1110 | mask = pointQueryNodeAABB(node.getAABBNode(), query, dist); |
1111 | return true; |
1112 | } |
1113 | }; |
1114 | |
1115 | template<int N> |
1116 | struct BVHNNodePointQueryAABB1<N, BVH_AN2> |
1117 | { |
1118 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1119 | { |
1120 | if (unlikely(node.isLeaf())) return false; |
1121 | mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist); |
1122 | return true; |
1123 | } |
1124 | }; |
1125 | |
1126 | template<int N> |
1127 | struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D> |
1128 | { |
1129 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1130 | { |
1131 | if (unlikely(node.isLeaf())) return false; |
1132 | mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist); |
1133 | return true; |
1134 | } |
1135 | }; |
1136 | |
1137 | template<int N> |
1138 | struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1> |
1139 | { |
1140 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1141 | { |
1142 | if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist); |
1143 | else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist); |
1144 | else return false; |
1145 | return true; |
1146 | } |
1147 | }; |
1148 | |
1149 | template<int N> |
1150 | struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2> |
1151 | { |
1152 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1153 | { |
1154 | if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist); |
1155 | else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist); |
1156 | else return false; |
1157 | return true; |
1158 | } |
1159 | }; |
1160 | |
1161 | template<int N> |
1162 | struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2> |
1163 | { |
1164 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1165 | { |
1166 | if (unlikely(node.isLeaf())) return false; |
1167 | if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist); |
1168 | else mask = pointQueryNodeAABBMB4D(node, query, time, dist); |
1169 | return true; |
1170 | } |
1171 | }; |
1172 | |
1173 | template<int N> |
1174 | struct BVHNNodePointQueryAABB1<N, BVH_QN1> |
1175 | { |
1176 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1177 | { |
1178 | if (unlikely(node.isLeaf())) return false; |
1179 | mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist); |
1180 | return true; |
1181 | } |
1182 | }; |
1183 | |
1184 | template<int N> |
1185 | struct BVHNQuantizedBaseNodePointQueryAABB1 |
1186 | { |
1187 | static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
1188 | { |
1189 | return pointQueryNodeAABB(node,query,dist); |
1190 | } |
1191 | |
1192 | static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
1193 | { |
1194 | return pointQueryNodeAABB(node,query,time,dist); |
1195 | } |
1196 | }; |
1197 | |
1198 | |
1199 | ////////////////////////////////////////////////////////////////////////////////////// |
1200 | // Node intersectors used in ray traversal |
1201 | ////////////////////////////////////////////////////////////////////////////////////// |
1202 | |
1203 | /*! Intersects N nodes with 1 ray */ |
1204 | template<int N, int types, bool robust> |
1205 | struct BVHNNodeIntersector1; |
1206 | |
1207 | template<int N> |
1208 | struct BVHNNodeIntersector1<N, BVH_AN1, false> |
1209 | { |
1210 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1211 | { |
1212 | if (unlikely(node.isLeaf())) return false; |
1213 | mask = intersectNode(node.getAABBNode(), ray, dist); |
1214 | return true; |
1215 | } |
1216 | }; |
1217 | |
1218 | template<int N> |
1219 | struct BVHNNodeIntersector1<N, BVH_AN1, true> |
1220 | { |
1221 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1222 | { |
1223 | if (unlikely(node.isLeaf())) return false; |
1224 | mask = intersectNodeRobust(node.getAABBNode(), ray, dist); |
1225 | return true; |
1226 | } |
1227 | }; |
1228 | |
1229 | template<int N> |
1230 | struct BVHNNodeIntersector1<N, BVH_AN2, false> |
1231 | { |
1232 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1233 | { |
1234 | if (unlikely(node.isLeaf())) return false; |
1235 | mask = intersectNode(node.getAABBNodeMB(), ray, time, dist); |
1236 | return true; |
1237 | } |
1238 | }; |
1239 | |
1240 | template<int N> |
1241 | struct BVHNNodeIntersector1<N, BVH_AN2, true> |
1242 | { |
1243 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1244 | { |
1245 | if (unlikely(node.isLeaf())) return false; |
1246 | mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist); |
1247 | return true; |
1248 | } |
1249 | }; |
1250 | |
1251 | template<int N> |
1252 | struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, false> |
1253 | { |
1254 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1255 | { |
1256 | if (unlikely(node.isLeaf())) return false; |
1257 | mask = intersectNodeMB4D<N>(node, ray, time, dist); |
1258 | return true; |
1259 | } |
1260 | }; |
1261 | |
1262 | template<int N> |
1263 | struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, true> |
1264 | { |
1265 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1266 | { |
1267 | if (unlikely(node.isLeaf())) return false; |
1268 | mask = intersectNodeMB4DRobust<N>(node, ray, time, dist); |
1269 | return true; |
1270 | } |
1271 | }; |
1272 | |
1273 | template<int N> |
1274 | struct BVHNNodeIntersector1<N, BVH_AN1_UN1, false> |
1275 | { |
1276 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1277 | { |
1278 | if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist); |
1279 | else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist); |
1280 | else return false; |
1281 | return true; |
1282 | } |
1283 | }; |
1284 | |
1285 | template<int N> |
1286 | struct BVHNNodeIntersector1<N, BVH_AN1_UN1, true> |
1287 | { |
1288 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1289 | { |
1290 | if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist); |
1291 | else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist); |
1292 | else return false; |
1293 | return true; |
1294 | } |
1295 | }; |
1296 | |
1297 | template<int N> |
1298 | struct BVHNNodeIntersector1<N, BVH_AN2_UN2, false> |
1299 | { |
1300 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1301 | { |
1302 | if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist); |
1303 | else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); |
1304 | else return false; |
1305 | return true; |
1306 | } |
1307 | }; |
1308 | |
1309 | template<int N> |
1310 | struct BVHNNodeIntersector1<N, BVH_AN2_UN2, true> |
1311 | { |
1312 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1313 | { |
1314 | if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist); |
1315 | else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); |
1316 | else return false; |
1317 | return true; |
1318 | } |
1319 | }; |
1320 | |
1321 | template<int N> |
1322 | struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, false> |
1323 | { |
1324 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1325 | { |
1326 | if (unlikely(node.isLeaf())) return false; |
1327 | if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); |
1328 | else mask = intersectNodeMB4D(node, ray, time, dist); |
1329 | return true; |
1330 | } |
1331 | }; |
1332 | |
1333 | template<int N> |
1334 | struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, true> |
1335 | { |
1336 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1337 | { |
1338 | if (unlikely(node.isLeaf())) return false; |
1339 | if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); |
1340 | else mask = intersectNodeMB4DRobust(node, ray, time, dist); |
1341 | return true; |
1342 | } |
1343 | }; |
1344 | |
1345 | template<int N> |
1346 | struct BVHNNodeIntersector1<N, BVH_QN1, false> |
1347 | { |
1348 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1349 | { |
1350 | if (unlikely(node.isLeaf())) return false; |
1351 | mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist); |
1352 | return true; |
1353 | } |
1354 | }; |
1355 | |
1356 | template<int N> |
1357 | struct BVHNNodeIntersector1<N, BVH_QN1, true> |
1358 | { |
1359 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1360 | { |
1361 | if (unlikely(node.isLeaf())) return false; |
1362 | mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist); |
1363 | return true; |
1364 | } |
1365 | }; |
1366 | |
1367 | /*! Intersects N nodes with K rays */ |
1368 | template<int N, bool robust> |
1369 | struct BVHNQuantizedBaseNodeIntersector1; |
1370 | |
1371 | template<int N> |
1372 | struct BVHNQuantizedBaseNodeIntersector1<N, false> |
1373 | { |
1374 | static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,false>& ray, vfloat<N>& dist) |
1375 | { |
1376 | return intersectNode(node,ray,dist); |
1377 | } |
1378 | |
1379 | static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) |
1380 | { |
1381 | return intersectNode(node,ray,time,dist); |
1382 | } |
1383 | |
1384 | }; |
1385 | |
1386 | template<int N> |
1387 | struct BVHNQuantizedBaseNodeIntersector1<N, true> |
1388 | { |
1389 | static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,true>& ray, vfloat<N>& dist) |
1390 | { |
1391 | return intersectNode(node,ray,dist); |
1392 | } |
1393 | |
1394 | static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) |
1395 | { |
1396 | return intersectNode(node,ray,time,dist); |
1397 | } |
1398 | |
1399 | }; |
1400 | |
1401 | |
1402 | } |
1403 | } |
1404 | |