| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "bvh.h" | 
| 7 | #include "node_intersector1.h" | 
| 8 | #include "../common/stack_item.h" | 
| 9 |  | 
| 10 | #define NEW_SORTING_CODE 1 | 
| 11 |  | 
| 12 | namespace embree | 
| 13 | { | 
| 14 |   namespace isa | 
| 15 |   { | 
| 16 |     /*! BVH regular node traversal for single rays. */ | 
| 17 |     template<int N, int types> | 
| 18 |     class BVHNNodeTraverser1Hit; | 
| 19 |  | 
| 20 | #if defined(__AVX512VL__) // SKX | 
| 21 |  | 
| 22 |     template<int N> | 
| 23 |     __forceinline void isort_update(vint<N> &dist, const vint<N> &d) | 
| 24 |     { | 
| 25 |       const vint<N> dist_shift = align_shift_right<N-1>(dist,dist); | 
| 26 |       const vboolf<N> m_geq = d >= dist; | 
| 27 |       const vboolf<N> m_geq_shift = m_geq << 1; | 
| 28 |       dist = select(m_geq,d,dist); | 
| 29 |       dist = select(m_geq_shift,dist_shift,dist); | 
| 30 |     } | 
| 31 |  | 
| 32 |     template<int N> | 
| 33 |     __forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) { | 
| 34 |       dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero))); | 
| 35 |     } | 
| 36 |  | 
| 37 |     __forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) { | 
| 38 |       return toScalar(permutex2var((__m256i)index,n0,n1)); | 
| 39 |     } | 
| 40 |  | 
| 41 |     __forceinline float permuteExtract(const vint8& index, const vfloat8& n) { | 
| 42 |       return toScalar(permute(n,index)); | 
| 43 |     } | 
| 44 |  | 
| 45 | #endif | 
| 46 |  | 
| 47 |     /* Specialization for BVH4. */ | 
| 48 |     template<int types> | 
| 49 |     class BVHNNodeTraverser1Hit<4, types> | 
| 50 |     { | 
| 51 |       typedef BVH4 BVH; | 
| 52 |       typedef BVH4::NodeRef NodeRef; | 
| 53 |       typedef BVH4::BaseNode BaseNode; | 
| 54 |  | 
| 55 |  | 
| 56 |     public: | 
| 57 |       /* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */ | 
| 58 |       static __forceinline void traverseClosestHit(NodeRef& cur, | 
| 59 |                                                    size_t mask, | 
| 60 |                                                    const vfloat4& tNear, | 
| 61 |                                                    StackItemT<NodeRef>*& stackPtr, | 
| 62 |                                                    StackItemT<NodeRef>* stackEnd) | 
| 63 |       { | 
| 64 |         assert(mask != 0); | 
| 65 |         const BaseNode* node = cur.baseNode(); | 
| 66 |  | 
| 67 |         /*! one child is hit, continue with that child */ | 
| 68 |         size_t r = bscf(v&: mask); | 
| 69 |         cur = node->child(i: r); | 
| 70 |         BVH::prefetch(ref: cur,types); | 
| 71 |         if (likely(mask == 0)) { | 
| 72 |           assert(cur != BVH::emptyNode); | 
| 73 |           return; | 
| 74 |         } | 
| 75 |  | 
| 76 |         /*! two children are hit, push far child, and continue with closer child */ | 
| 77 |         NodeRef c0 = cur; | 
| 78 |         const unsigned int d0 = ((unsigned int*)&tNear)[r]; | 
| 79 |         r = bscf(v&: mask); | 
| 80 |         NodeRef c1 = node->child(i: r); | 
| 81 |         BVH::prefetch(ref: c1,types); | 
| 82 |         const unsigned int d1 = ((unsigned int*)&tNear)[r]; | 
| 83 |         assert(c0 != BVH::emptyNode); | 
| 84 |         assert(c1 != BVH::emptyNode); | 
| 85 |         if (likely(mask == 0)) { | 
| 86 |           assert(stackPtr < stackEnd); | 
| 87 |           if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; } | 
| 88 |           else         { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; } | 
| 89 |         } | 
| 90 |  | 
| 91 | #if NEW_SORTING_CODE == 1 | 
| 92 |         vint4 s0((size_t)c0,(size_t)d0); | 
| 93 |         vint4 s1((size_t)c1,(size_t)d1); | 
| 94 |         r = bscf(v&: mask); | 
| 95 |         NodeRef c2 = node->child(i: r); BVH::prefetch(ref: c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];  | 
| 96 |         vint4 s2((size_t)c2,(size_t)d2); | 
| 97 |         /* 3 hits */ | 
| 98 |         if (likely(mask == 0)) { | 
| 99 |           StackItemT<NodeRef>::sort3(s1&: s0,s2&: s1,s3&: s2); | 
| 100 |           *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; | 
| 101 |           cur = toSizeT(v: s2); | 
| 102 |           stackPtr+=2; | 
| 103 |           return; | 
| 104 |         } | 
| 105 |         r = bscf(v&: mask); | 
| 106 |         NodeRef c3 = node->child(i: r); BVH::prefetch(ref: c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];  | 
| 107 |         vint4 s3((size_t)c3,(size_t)d3); | 
| 108 |         /* 4 hits */ | 
| 109 |         StackItemT<NodeRef>::sort4(s1&: s0,s2&: s1,s3&: s2,s4&: s3); | 
| 110 |         *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; | 
| 111 |         cur = toSizeT(v: s3); | 
| 112 |         stackPtr+=3; | 
| 113 | #else | 
| 114 |         /*! Here starts the slow path for 3 or 4 hit children. We push | 
| 115 |          *  all nodes onto the stack to sort them there. */ | 
| 116 |         assert(stackPtr < stackEnd); | 
| 117 |         stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; | 
| 118 |         assert(stackPtr < stackEnd); | 
| 119 |         stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; | 
| 120 |  | 
| 121 |         /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ | 
| 122 |         assert(stackPtr < stackEnd); | 
| 123 |         r = bscf(mask); | 
| 124 |         NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; | 
| 125 |         assert(c != BVH::emptyNode); | 
| 126 |         if (likely(mask == 0)) { | 
| 127 |           sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); | 
| 128 |           cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; | 
| 129 |           return; | 
| 130 |         } | 
| 131 |  | 
| 132 |         /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ | 
| 133 |         assert(stackPtr < stackEnd); | 
| 134 |         r = bscf(mask); | 
| 135 |         c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; | 
| 136 |         assert(c != BVH::emptyNode); | 
| 137 |         sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); | 
| 138 |         cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; | 
| 139 | #endif | 
| 140 |       } | 
| 141 |  | 
| 142 |       /* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */ | 
| 143 |       static __forceinline void traverseAnyHit(NodeRef& cur, | 
| 144 |                                                size_t mask, | 
| 145 |                                                const vfloat4& tNear, | 
| 146 |                                                NodeRef*& stackPtr, | 
| 147 |                                                NodeRef* stackEnd) | 
| 148 |       { | 
| 149 |         const BaseNode* node = cur.baseNode(); | 
| 150 |  | 
| 151 |         /*! one child is hit, continue with that child */ | 
| 152 |         size_t r = bscf(v&: mask); | 
| 153 |         cur = node->child(i: r);  | 
| 154 |         BVH::prefetch(ref: cur,types); | 
| 155 |  | 
| 156 |         /* simpler in sequence traversal order */ | 
| 157 |         assert(cur != BVH::emptyNode); | 
| 158 |         if (likely(mask == 0)) return; | 
| 159 |         assert(stackPtr < stackEnd); | 
| 160 |         *stackPtr = cur; stackPtr++; | 
| 161 |  | 
| 162 |         for (; ;) | 
| 163 |         { | 
| 164 |           r = bscf(v&: mask); | 
| 165 |           cur = node->child(i: r); BVH::prefetch(ref: cur,types); | 
| 166 |           assert(cur != BVH::emptyNode); | 
| 167 |           if (likely(mask == 0)) return; | 
| 168 |           assert(stackPtr < stackEnd); | 
| 169 |           *stackPtr = cur; stackPtr++; | 
| 170 |         } | 
| 171 |       } | 
| 172 |     }; | 
| 173 |  | 
| 174 |     /* Specialization for BVH8. */ | 
| 175 |     template<int types> | 
| 176 |     class BVHNNodeTraverser1Hit<8, types> | 
| 177 |     { | 
| 178 |       typedef BVH8 BVH; | 
| 179 |       typedef BVH8::NodeRef NodeRef; | 
| 180 |       typedef BVH8::BaseNode BaseNode; | 
| 181 |        | 
| 182 | #if defined(__AVX512VL__) | 
| 183 |       template<class NodeRef, class BaseNode> | 
| 184 |         static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur, | 
| 185 |                                                               size_t mask, | 
| 186 |                                                               const vfloat8& tNear, | 
| 187 |                                                               StackItemT<NodeRef>*& stackPtr, | 
| 188 |                                                               StackItemT<NodeRef>* stackEnd) | 
| 189 |       { | 
| 190 |         assert(mask != 0); | 
| 191 |         const BaseNode* node = cur.baseNode(); | 
| 192 |         const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]); | 
| 193 |         const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]); | 
| 194 |         vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step); | 
| 195 |         distance_i = vint8::compact((int)mask,distance_i,distance_i); | 
| 196 |         cur = permuteExtract(distance_i,n0,n1); | 
| 197 |         BVH::prefetch(cur,types); | 
| 198 |  | 
| 199 |         mask &= mask-1; | 
| 200 |         if (likely(mask == 0)) return; | 
| 201 |  | 
| 202 |         /* 2 hits: order A0 B0 */ | 
| 203 |         const vint8 d0(distance_i); | 
| 204 |         const vint8 d1(shuffle<1>(distance_i)); | 
| 205 |         cur = permuteExtract(d1,n0,n1); | 
| 206 |         BVH::prefetch(cur,types); | 
| 207 |  | 
| 208 |         const vint8 dist_A0 = min(d0, d1); | 
| 209 |         const vint8 dist_B0 = max(d0, d1); | 
| 210 |         assert(dist_A0[0] < dist_B0[0]); | 
| 211 |  | 
| 212 |         mask &= mask-1; | 
| 213 |         if (likely(mask == 0)) { | 
| 214 |           cur                        = permuteExtract(dist_A0,n0,n1); | 
| 215 |           stackPtr[0].ptr            = permuteExtract(dist_B0,n0,n1); | 
| 216 |           *(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear); | 
| 217 |           stackPtr++; | 
| 218 |           return; | 
| 219 |         } | 
| 220 |  | 
| 221 |         /* 3 hits: order A1 B1 C1 */ | 
| 222 |  | 
| 223 |         const vint8 d2(shuffle<2>(distance_i)); | 
| 224 |         cur = permuteExtract(d2,n0,n1); | 
| 225 |         BVH::prefetch(cur,types); | 
| 226 |  | 
| 227 |         const vint8 dist_A1     = min(dist_A0,d2); | 
| 228 |         const vint8 dist_tmp_B1 = max(dist_A0,d2); | 
| 229 |         const vint8 dist_B1     = min(dist_B0,dist_tmp_B1); | 
| 230 |         const vint8 dist_C1     = max(dist_B0,dist_tmp_B1); | 
| 231 |         assert(dist_A1[0] < dist_B1[0]); | 
| 232 |         assert(dist_B1[0] < dist_C1[0]); | 
| 233 |  | 
| 234 |         mask &= mask-1; | 
| 235 |         if (likely(mask == 0)) { | 
| 236 |           cur                        = permuteExtract(dist_A1,n0,n1); | 
| 237 |           stackPtr[0].ptr            = permuteExtract(dist_C1,n0,n1); | 
| 238 |           *(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear); | 
| 239 |           stackPtr[1].ptr            = permuteExtract(dist_B1,n0,n1); | 
| 240 |           *(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear); | 
| 241 |           stackPtr+=2; | 
| 242 |           return; | 
| 243 |         } | 
| 244 |  | 
| 245 |         /* 4 hits: order A2 B2 C2 D2 */ | 
| 246 |  | 
| 247 |         const vint8 d3(shuffle<3>(distance_i)); | 
| 248 |         cur = permuteExtract(d3,n0,n1); | 
| 249 |         BVH::prefetch(cur,types); | 
| 250 |  | 
| 251 |         const vint8 dist_A2     = min(dist_A1,d3); | 
| 252 |         const vint8 dist_tmp_B2 = max(dist_A1,d3); | 
| 253 |         const vint8 dist_B2     = min(dist_B1,dist_tmp_B2); | 
| 254 |         const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2); | 
| 255 |         const vint8 dist_C2     = min(dist_C1,dist_tmp_C2); | 
| 256 |         const vint8 dist_D2     = max(dist_C1,dist_tmp_C2); | 
| 257 |         assert(dist_A2[0] < dist_B2[0]); | 
| 258 |         assert(dist_B2[0] < dist_C2[0]); | 
| 259 |         assert(dist_C2[0] < dist_D2[0]); | 
| 260 |          | 
| 261 |         mask &= mask-1; | 
| 262 |         if (likely(mask == 0)) { | 
| 263 |           cur                        = permuteExtract(dist_A2,n0,n1); | 
| 264 |           stackPtr[0].ptr            = permuteExtract(dist_D2,n0,n1); | 
| 265 |           *(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear); | 
| 266 |           stackPtr[1].ptr            = permuteExtract(dist_C2,n0,n1); | 
| 267 |           *(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear); | 
| 268 |           stackPtr[2].ptr            = permuteExtract(dist_B2,n0,n1); | 
| 269 |           *(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear); | 
| 270 |           stackPtr+=3; | 
| 271 |           return; | 
| 272 |         } | 
| 273 |  | 
| 274 |         /* >=5 hits: reverse to descending order for writing to stack */ | 
| 275 |  | 
| 276 |         distance_i = align_shift_right<3>(distance_i,distance_i); | 
| 277 |         const size_t hits = 4 + popcnt(mask); | 
| 278 |         vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert | 
| 279 | 	 | 
| 280 |         isort_quick_update<8>(dist,dist_A2); | 
| 281 |         isort_quick_update<8>(dist,dist_B2); | 
| 282 |         isort_quick_update<8>(dist,dist_C2); | 
| 283 |         isort_quick_update<8>(dist,dist_D2); | 
| 284 |  | 
| 285 |         do { | 
| 286 |  | 
| 287 |           distance_i = align_shift_right<1>(distance_i,distance_i); | 
| 288 |           cur = permuteExtract(distance_i,n0,n1); | 
| 289 |           BVH::prefetch(cur,types); | 
| 290 |           const vint8 new_dist(permute(distance_i,vint8(zero))); | 
| 291 |           mask &= mask-1; | 
| 292 |           isort_update<8>(dist,new_dist); | 
| 293 |  | 
| 294 |         } while(mask); | 
| 295 |  | 
| 296 |         for (size_t i=0; i<7; i++) | 
| 297 |           assert(dist[i+0]>=dist[i+1]); | 
| 298 |  | 
| 299 |         for (size_t i=0;i<hits-1;i++) | 
| 300 |         { | 
| 301 |           stackPtr->ptr            = permuteExtract(dist,n0,n1); | 
| 302 |           *(float*)&stackPtr->dist = permuteExtract(dist,tNear); | 
| 303 |           dist = align_shift_right<1>(dist,dist); | 
| 304 |           stackPtr++; | 
| 305 |         } | 
| 306 |         cur = permuteExtract(dist,n0,n1); | 
| 307 |       } | 
| 308 | #endif | 
| 309 |  | 
| 310 |     public: | 
| 311 |       static __forceinline void traverseClosestHit(NodeRef& cur, | 
| 312 |                                                    size_t mask, | 
| 313 |                                                    const vfloat8& tNear, | 
| 314 |                                                    StackItemT<NodeRef>*& stackPtr, | 
| 315 |                                                    StackItemT<NodeRef>* stackEnd) | 
| 316 |       { | 
| 317 |         assert(mask != 0); | 
| 318 | #if defined(__AVX512VL__) | 
| 319 |         traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd); | 
| 320 | #else | 
| 321 |  | 
| 322 |         const BaseNode* node = cur.baseNode(); | 
| 323 |  | 
| 324 |         /*! one child is hit, continue with that child */ | 
| 325 |         size_t r = bscf(v&: mask); | 
| 326 |         cur = node->child(i: r); | 
| 327 |         BVH::prefetch(ref: cur,types); | 
| 328 |         if (likely(mask == 0)) { | 
| 329 |           assert(cur != BVH::emptyNode); | 
| 330 |           return; | 
| 331 |         } | 
| 332 |  | 
| 333 |         /*! two children are hit, push far child, and continue with closer child */ | 
| 334 |         NodeRef c0 = cur; | 
| 335 |         const unsigned int d0 = ((unsigned int*)&tNear)[r]; | 
| 336 |         r = bscf(v&: mask); | 
| 337 |         NodeRef c1 = node->child(i: r); | 
| 338 |         BVH::prefetch(ref: c1,types); | 
| 339 |         const unsigned int d1 = ((unsigned int*)&tNear)[r]; | 
| 340 |  | 
| 341 |         assert(c0 != BVH::emptyNode); | 
| 342 |         assert(c1 != BVH::emptyNode); | 
| 343 |         if (likely(mask == 0)) { | 
| 344 |           assert(stackPtr < stackEnd); | 
| 345 |           if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; } | 
| 346 |           else         { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; } | 
| 347 |         } | 
| 348 | #if NEW_SORTING_CODE == 1 | 
| 349 |         vint4 s0((size_t)c0,(size_t)d0); | 
| 350 |         vint4 s1((size_t)c1,(size_t)d1); | 
| 351 |  | 
| 352 |         r = bscf(v&: mask); | 
| 353 |         NodeRef c2 = node->child(i: r); BVH::prefetch(ref: c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];  | 
| 354 |         vint4 s2((size_t)c2,(size_t)d2); | 
| 355 |         /* 3 hits */ | 
| 356 |         if (likely(mask == 0)) { | 
| 357 |           StackItemT<NodeRef>::sort3(s1&: s0,s2&: s1,s3&: s2); | 
| 358 |           *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; | 
| 359 |           cur = toSizeT(v: s2); | 
| 360 |           stackPtr+=2; | 
| 361 |           return; | 
| 362 |         } | 
| 363 |         r = bscf(v&: mask); | 
| 364 |         NodeRef c3 = node->child(i: r); BVH::prefetch(ref: c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];  | 
| 365 |         vint4 s3((size_t)c3,(size_t)d3); | 
| 366 |         /* 4 hits */ | 
| 367 |         if (likely(mask == 0)) { | 
| 368 |           StackItemT<NodeRef>::sort4(s1&: s0,s2&: s1,s3&: s2,s4&: s3); | 
| 369 |           *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; | 
| 370 |           cur = toSizeT(v: s3); | 
| 371 |           stackPtr+=3; | 
| 372 |           return; | 
| 373 |         } | 
| 374 |         *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3; | 
| 375 |         /*! fallback case if more than 4 children are hit */ | 
| 376 |         StackItemT<NodeRef>* stackFirst = stackPtr; | 
| 377 |         stackPtr+=4;       | 
| 378 |         while (1) | 
| 379 |         { | 
| 380 |           assert(stackPtr < stackEnd); | 
| 381 |           r = bscf(v&: mask); | 
| 382 |           NodeRef c = node->child(i: r); BVH::prefetch(ref: c,types); unsigned int d = *(unsigned int*)&tNear[r];  | 
| 383 |           const vint4 s((size_t)c,(size_t)d); | 
| 384 |           *(vint4*)stackPtr++ = s; | 
| 385 |           assert(c != BVH::emptyNode); | 
| 386 |           if (unlikely(mask == 0)) break; | 
| 387 |         } | 
| 388 |         sort(begin: stackFirst,end: stackPtr); | 
| 389 |         cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; | 
| 390 | #else | 
| 391 |         /*! Here starts the slow path for 3 or 4 hit children. We push | 
| 392 |          *  all nodes onto the stack to sort them there. */ | 
| 393 |         assert(stackPtr < stackEnd); | 
| 394 |         stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; | 
| 395 |         assert(stackPtr < stackEnd); | 
| 396 |         stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; | 
| 397 |  | 
| 398 |         /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ | 
| 399 |         assert(stackPtr < stackEnd); | 
| 400 |         r = bscf(mask); | 
| 401 |         NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; | 
| 402 |         assert(c != BVH::emptyNode); | 
| 403 |         if (likely(mask == 0)) { | 
| 404 |           sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); | 
| 405 |           cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; | 
| 406 |           return; | 
| 407 |         } | 
| 408 |  | 
| 409 |         /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ | 
| 410 |         assert(stackPtr < stackEnd); | 
| 411 |         r = bscf(mask); | 
| 412 |         c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; | 
| 413 |         assert(c != BVH::emptyNode); | 
| 414 |         if (likely(mask == 0)) { | 
| 415 |           sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); | 
| 416 |           cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; | 
| 417 |           return; | 
| 418 |         } | 
| 419 |         /*! fallback case if more than 4 children are hit */ | 
| 420 |         StackItemT<NodeRef>* stackFirst = stackPtr-4; | 
| 421 |         while (1) | 
| 422 |         { | 
| 423 |           assert(stackPtr < stackEnd); | 
| 424 |           r = bscf(mask); | 
| 425 |           c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; | 
| 426 |           assert(c != BVH::emptyNode); | 
| 427 |           if (unlikely(mask == 0)) break; | 
| 428 |         } | 
| 429 |         sort(stackFirst,stackPtr); | 
| 430 |         cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; | 
| 431 | #endif | 
| 432 | #endif | 
| 433 |       } | 
| 434 |  | 
| 435 |       static __forceinline void traverseAnyHit(NodeRef& cur, | 
| 436 |                                                size_t mask, | 
| 437 |                                                const vfloat8& tNear, | 
| 438 |                                                NodeRef*& stackPtr, | 
| 439 |                                                NodeRef* stackEnd) | 
| 440 |       { | 
| 441 |         const BaseNode* node = cur.baseNode(); | 
| 442 |  | 
| 443 |         /*! one child is hit, continue with that child */ | 
| 444 |         size_t r = bscf(v&: mask); | 
| 445 |         cur = node->child(i: r); | 
| 446 |         BVH::prefetch(ref: cur,types); | 
| 447 |  | 
| 448 |         /* simpler in sequence traversal order */ | 
| 449 |         assert(cur != BVH::emptyNode); | 
| 450 |         if (likely(mask == 0)) return; | 
| 451 |         assert(stackPtr < stackEnd); | 
| 452 |         *stackPtr = cur; stackPtr++; | 
| 453 |  | 
| 454 |         for (; ;) | 
| 455 |         { | 
| 456 |           r = bscf(v&: mask); | 
| 457 |           cur = node->child(i: r); BVH::prefetch(ref: cur,types); | 
| 458 |           assert(cur != BVH::emptyNode); | 
| 459 |           if (likely(mask == 0)) return; | 
| 460 |           assert(stackPtr < stackEnd); | 
| 461 |           *stackPtr = cur; stackPtr++; | 
| 462 |         } | 
| 463 |       } | 
| 464 |     }; | 
| 465 |   } | 
| 466 | } | 
| 467 |  |