| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "node_intersector.h" | 
| 7 |  | 
| 8 | namespace embree | 
| 9 | { | 
| 10 |   namespace isa | 
| 11 |   { | 
| 12 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 13 |     // Ray packet structure used in hybrid traversal | 
| 14 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 15 |  | 
| 16 |     template<int K, bool robust> | 
| 17 |     struct TravRayK; | 
| 18 |  | 
| 19 |     /* Fast variant */ | 
| 20 |     template<int K> | 
| 21 |     struct TravRayK<K, false> | 
| 22 |     { | 
| 23 |       __forceinline TravRayK() {} | 
| 24 |  | 
| 25 |       __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) | 
| 26 |       { | 
| 27 |         init(ray_org, ray_dir, N); | 
| 28 |       } | 
| 29 |  | 
| 30 |       __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) | 
| 31 |       { | 
| 32 |         init(ray_org, ray_dir, N); | 
| 33 |         tnear = ray_tnear; | 
| 34 |         tfar = ray_tfar; | 
| 35 |       } | 
| 36 |  | 
| 37 |       __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) | 
| 38 |       { | 
| 39 |         org = ray_org; | 
| 40 |         dir = ray_dir; | 
| 41 |         rdir = rcp_safe(ray_dir); | 
| 42 | #if defined(__AVX2__) || defined(__ARM_NEON) | 
| 43 |         org_rdir = org * rdir; | 
| 44 | #endif | 
| 45 |  | 
| 46 |         if (N) | 
| 47 |         { | 
| 48 |           const int size = sizeof(float)*N; | 
| 49 |           nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size)); | 
| 50 |           nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size)); | 
| 51 |           nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size)); | 
| 52 |         } | 
| 53 |       } | 
| 54 |  | 
| 55 |       Vec3vf<K> org; | 
| 56 |       Vec3vf<K> dir; | 
| 57 |       Vec3vf<K> rdir; | 
| 58 | #if defined(__AVX2__) || defined(__ARM_NEON) | 
| 59 |       Vec3vf<K> org_rdir; | 
| 60 | #endif | 
| 61 |       Vec3vi<K> nearXYZ; | 
| 62 |       vfloat<K> tnear; | 
| 63 |       vfloat<K> tfar; | 
| 64 |     }; | 
| 65 |  | 
| 66 |     template<int K> | 
| 67 |     using TravRayKFast = TravRayK<K, false>; | 
| 68 |  | 
| 69 |     /* Robust variant */ | 
| 70 |     template<int K> | 
| 71 |     struct TravRayK<K, true> | 
| 72 |     { | 
| 73 |       __forceinline TravRayK() {} | 
| 74 |  | 
| 75 |       __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) | 
| 76 |       { | 
| 77 |         init(ray_org, ray_dir, N); | 
| 78 |       } | 
| 79 |  | 
| 80 |       __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) | 
| 81 |       { | 
| 82 |         init(ray_org, ray_dir, N); | 
| 83 |         tnear = ray_tnear; | 
| 84 |         tfar = ray_tfar; | 
| 85 |       } | 
| 86 |  | 
| 87 |       __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) | 
| 88 |       { | 
| 89 |         org = ray_org; | 
| 90 |         dir = ray_dir; | 
| 91 |         rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir)); | 
| 92 |  | 
| 93 |         if (N) | 
| 94 |         { | 
| 95 |           const int size = sizeof(float)*N; | 
| 96 |           nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size)); | 
| 97 |           nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size)); | 
| 98 |           nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size)); | 
| 99 |         } | 
| 100 |       } | 
| 101 |  | 
| 102 |       Vec3vf<K> org; | 
| 103 |       Vec3vf<K> dir; | 
| 104 |       Vec3vf<K> rdir; | 
| 105 |       Vec3vi<K> nearXYZ; | 
| 106 |       vfloat<K> tnear; | 
| 107 |       vfloat<K> tfar; | 
| 108 |     }; | 
| 109 |  | 
| 110 |     template<int K> | 
| 111 |     using TravRayKRobust = TravRayK<K, true>; | 
| 112 |  | 
| 113 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 114 |     // Fast AABBNode intersection | 
| 115 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 116 |  | 
| 117 |     template<int N, int K> | 
| 118 |     __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i, | 
| 119 |                                          const TravRayKFast<K>& ray, vfloat<K>& dist) | 
| 120 |  | 
| 121 |     { | 
| 122 |   #if defined(__AVX2__) || defined(__ARM_NEON) | 
| 123 |       const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x); | 
| 124 |       const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y); | 
| 125 |       const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z); | 
| 126 |       const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x); | 
| 127 |       const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y); | 
| 128 |       const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z); | 
| 129 |   #else | 
| 130 |       const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; | 
| 131 |       const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; | 
| 132 |       const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; | 
| 133 |       const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; | 
| 134 |       const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; | 
| 135 |       const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; | 
| 136 |   #endif | 
| 137 |  | 
| 138 |   #if defined(__AVX512F__) // SKX | 
| 139 |       if (K == 16) | 
| 140 |       { | 
| 141 |         /* use mixed float/int min/max */ | 
| 142 |         const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); | 
| 143 |         const vfloat<K> lfarP  = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); | 
| 144 |         const vbool<K> lhit    = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); | 
| 145 |         dist = lnearP; | 
| 146 |         return lhit; | 
| 147 |       } | 
| 148 |       else | 
| 149 |   #endif | 
| 150 |       { | 
| 151 |         const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); | 
| 152 |         const vfloat<K> lfarP  = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); | 
| 153 |   #if defined(__AVX512F__) // SKX | 
| 154 |         const vbool<K> lhit    = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); | 
| 155 |   #else | 
| 156 |         const vbool<K> lhit    = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 157 |   #endif | 
| 158 |         dist = lnearP; | 
| 159 |         return lhit; | 
| 160 |       } | 
| 161 |     } | 
| 162 |  | 
| 163 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 164 |     // Robust AABBNode intersection | 
| 165 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 166 |  | 
| 167 |     template<int N, int K> | 
| 168 |     __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i, | 
| 169 |                                                const TravRayKRobust<K>& ray, vfloat<K>& dist) | 
| 170 |     { | 
| 171 |       // FIXME: use per instruction rounding for AVX512 | 
| 172 |       const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; | 
| 173 |       const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; | 
| 174 |       const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; | 
| 175 |       const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; | 
| 176 |       const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; | 
| 177 |       const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; | 
| 178 |       const float round_up   = 1.0f+3.0f*float(ulp); | 
| 179 |       const float round_down = 1.0f-3.0f*float(ulp); | 
| 180 |       const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); | 
| 181 |       const vfloat<K> lfarP  = round_up  *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); | 
| 182 |       const vbool<K> lhit   = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); | 
| 183 |       dist = lnearP; | 
| 184 |       return lhit; | 
| 185 |     } | 
| 186 |  | 
| 187 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 188 |     // Fast AABBNodeMB intersection | 
| 189 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 190 |  | 
| 191 |     template<int N, int K> | 
| 192 |     __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i, | 
| 193 |                                          const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 194 |     { | 
| 195 |       const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); | 
| 196 |       const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); | 
| 197 |       const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); | 
| 198 |       const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); | 
| 199 |       const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); | 
| 200 |       const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); | 
| 201 |  | 
| 202 | #if defined(__AVX2__) || defined(__ARM_NEON) | 
| 203 |       const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); | 
| 204 |       const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); | 
| 205 |       const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); | 
| 206 |       const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); | 
| 207 |       const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); | 
| 208 |       const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); | 
| 209 | #else | 
| 210 |       const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; | 
| 211 |       const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; | 
| 212 |       const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; | 
| 213 |       const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; | 
| 214 |       const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; | 
| 215 |       const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; | 
| 216 | #endif | 
| 217 |  | 
| 218 | #if defined(__AVX512F__) // SKX | 
| 219 |       if (K == 16) | 
| 220 |       { | 
| 221 |         /* use mixed float/int min/max */ | 
| 222 |         const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); | 
| 223 |         const vfloat<K> lfarP  = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); | 
| 224 |         const vbool<K> lhit    = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); | 
| 225 |         dist = lnearP; | 
| 226 |         return lhit; | 
| 227 |       } | 
| 228 |       else | 
| 229 | #endif | 
| 230 |       { | 
| 231 |         const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); | 
| 232 |         const vfloat<K> lfarP  = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); | 
| 233 | #if defined(__AVX512F__) // SKX | 
| 234 |         const vbool<K> lhit    = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); | 
| 235 | #else | 
| 236 |         const vbool<K> lhit    = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 237 | #endif | 
| 238 |         dist = lnearP; | 
| 239 |         return lhit; | 
| 240 |       } | 
| 241 |     } | 
| 242 |  | 
| 243 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 244 |     // Robust AABBNodeMB intersection | 
| 245 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 246 |  | 
| 247 |     template<int N, int K> | 
| 248 |     __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i, | 
| 249 |                                                const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 250 |     { | 
| 251 |       const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); | 
| 252 |       const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); | 
| 253 |       const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); | 
| 254 |       const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); | 
| 255 |       const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); | 
| 256 |       const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); | 
| 257 |  | 
| 258 |       const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; | 
| 259 |       const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; | 
| 260 |       const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; | 
| 261 |       const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; | 
| 262 |       const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; | 
| 263 |       const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; | 
| 264 |  | 
| 265 |       const float round_up   = 1.0f+3.0f*float(ulp); | 
| 266 |       const float round_down = 1.0f-3.0f*float(ulp); | 
| 267 |  | 
| 268 | #if defined(__AVX512F__) // SKX | 
| 269 |       if (K == 16) | 
| 270 |       { | 
| 271 |         const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); | 
| 272 |         const vfloat<K> lfarP  = round_up  *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); | 
| 273 |         const vbool<K>  lhit   = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 274 |         dist = lnearP; | 
| 275 |         return lhit; | 
| 276 |       } | 
| 277 |       else | 
| 278 | #endif | 
| 279 |       { | 
| 280 |         const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); | 
| 281 |         const vfloat<K> lfarP  = round_up  *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); | 
| 282 |         const vbool<K>  lhit   = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 283 |         dist = lnearP; | 
| 284 |         return lhit; | 
| 285 |       } | 
| 286 |     } | 
| 287 |  | 
| 288 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 289 |     // Fast AABBNodeMB4D intersection | 
| 290 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 291 |  | 
| 292 |     template<int N, int K> | 
| 293 |     __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i, | 
| 294 |                                              const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 295 |     { | 
| 296 |       const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); | 
| 297 |  | 
| 298 |       const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); | 
| 299 |       const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); | 
| 300 |       const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); | 
| 301 |       const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); | 
| 302 |       const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); | 
| 303 |       const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); | 
| 304 |  | 
| 305 | #if defined(__AVX2__) || defined(__ARM_NEON) | 
| 306 |       const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); | 
| 307 |       const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); | 
| 308 |       const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); | 
| 309 |       const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); | 
| 310 |       const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); | 
| 311 |       const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); | 
| 312 | #else | 
| 313 |       const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; | 
| 314 |       const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; | 
| 315 |       const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; | 
| 316 |       const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; | 
| 317 |       const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; | 
| 318 |       const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; | 
| 319 | #endif | 
| 320 |  | 
| 321 |       const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); | 
| 322 |       const vfloat<K> lfarP  = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); | 
| 323 |       vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 324 |       if (unlikely(ref.isAABBNodeMB4D())) { | 
| 325 |         const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; | 
| 326 |         lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i])); | 
| 327 |       } | 
| 328 |       dist = lnearP; | 
| 329 |       return lhit; | 
| 330 |     } | 
| 331 |  | 
| 332 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 333 |     // Robust AABBNodeMB4D intersection | 
| 334 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 335 |  | 
| 336 |     template<int N, int K> | 
| 337 |     __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i, | 
| 338 |                                                     const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 339 |     { | 
| 340 |       const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); | 
| 341 |  | 
| 342 |       const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); | 
| 343 |       const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); | 
| 344 |       const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); | 
| 345 |       const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); | 
| 346 |       const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); | 
| 347 |       const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); | 
| 348 |  | 
| 349 |       const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; | 
| 350 |       const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; | 
| 351 |       const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; | 
| 352 |       const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; | 
| 353 |       const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; | 
| 354 |       const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; | 
| 355 |  | 
| 356 |       const float round_up   = 1.0f+3.0f*float(ulp); | 
| 357 |       const float round_down = 1.0f-3.0f*float(ulp); | 
| 358 |       const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); | 
| 359 |       const vfloat<K> lfarP  = round_up  *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); | 
| 360 |       vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 361 |  | 
| 362 |       if (unlikely(ref.isAABBNodeMB4D())) { | 
| 363 |         const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; | 
| 364 |         lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i])); | 
| 365 |       } | 
| 366 |       dist = lnearP; | 
| 367 |       return lhit; | 
| 368 |     } | 
| 369 |  | 
| 370 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 371 |     // Fast OBBNode intersection | 
| 372 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 373 |  | 
| 374 |     template<int N, int K, bool robust> | 
| 375 |     __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i, | 
| 376 |                                           const TravRayK<K,robust>& ray, vfloat<K>& dist) | 
| 377 |     { | 
| 378 |       const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]), | 
| 379 |                                     Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]), | 
| 380 |                                     Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]), | 
| 381 |                                     Vec3f(node->naabb.p   .x[i], node->naabb.p   .y[i], node->naabb.p   .z[i])); | 
| 382 |  | 
| 383 |       const Vec3vf<K> dir = xfmVector(naabb, ray.dir); | 
| 384 |       const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1? | 
| 385 |       const Vec3vf<K> org = xfmPoint(naabb, ray.org); | 
| 386 |  | 
| 387 |       const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir; | 
| 388 |       const vfloat<K> lclipMinY = org.y * nrdir.y; | 
| 389 |       const vfloat<K> lclipMinZ = org.z * nrdir.z; | 
| 390 |       const vfloat<K> lclipMaxX  = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir; | 
| 391 |       const vfloat<K> lclipMaxY  = lclipMinY - nrdir.y; | 
| 392 |       const vfloat<K> lclipMaxZ  = lclipMinZ - nrdir.z; | 
| 393 |  | 
| 394 |       vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); | 
| 395 |       vfloat<K> lfarP  = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); | 
| 396 |       if (robust) { | 
| 397 |         lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp)); | 
| 398 |         lfarP  = lfarP *vfloat<K>(1.0f+3.0f*float(ulp)); | 
| 399 |       } | 
| 400 |       const vbool<K> lhit    = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 401 |       dist = lnearP; | 
| 402 |       return lhit; | 
| 403 |     } | 
| 404 |  | 
| 405 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 406 |     // Fast OBBNodeMB intersection | 
| 407 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 408 |  | 
| 409 |     template<int N, int K, bool robust> | 
| 410 |     __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i, | 
| 411 |                                           const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 412 |     { | 
| 413 |       const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]), | 
| 414 |                                   Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]), | 
| 415 |                                   Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]), | 
| 416 |                                   Vec3f(node->space0.p   .x[i], node->space0.p   .y[i], node->space0.p   .z[i])); | 
| 417 |  | 
| 418 |       const Vec3vf<K> b0_lower = zero; | 
| 419 |       const Vec3vf<K> b0_upper = one; | 
| 420 |       const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]); | 
| 421 |       const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]); | 
| 422 |       const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time); | 
| 423 |       const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time); | 
| 424 |  | 
| 425 |       const Vec3vf<K> dir = xfmVector(xfm, ray.dir); | 
| 426 |       const Vec3vf<K> rdir = rcp_safe(dir); | 
| 427 |       const Vec3vf<K> org = xfmPoint(xfm, ray.org); | 
| 428 |  | 
| 429 |       const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x; | 
| 430 |       const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y; | 
| 431 |       const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z; | 
| 432 |       const vfloat<K> lclipMaxX  = (upper.x - org.x) * rdir.x; | 
| 433 |       const vfloat<K> lclipMaxY  = (upper.y - org.y) * rdir.y; | 
| 434 |       const vfloat<K> lclipMaxZ  = (upper.z - org.z) * rdir.z; | 
| 435 |  | 
| 436 |       vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); | 
| 437 |       vfloat<K> lfarP  = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); | 
| 438 |       if (robust) { | 
| 439 |         lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp)); | 
| 440 |         lfarP  = lfarP *vfloat<K>(1.0f+3.0f*float(ulp)); | 
| 441 |       } | 
| 442 |          | 
| 443 |       const vbool<K> lhit    = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 444 |       dist = lnearP; | 
| 445 |       return lhit; | 
| 446 |     } | 
| 447 |  | 
| 448 |  | 
| 449 |  | 
| 450 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 451 |     // QuantizedBaseNode intersection | 
| 452 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 453 |  | 
| 454 |     template<int N, int K> | 
| 455 |     __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i, | 
| 456 |                                                    const TravRayK<K,false>& ray, vfloat<K>& dist) | 
| 457 |  | 
| 458 |     { | 
| 459 |       assert(movemask(node->validMask()) & ((size_t)1 << i)); | 
| 460 |       const vfloat<N> lower_x = node->dequantizeLowerX(); | 
| 461 |       const vfloat<N> upper_x = node->dequantizeUpperX(); | 
| 462 |       const vfloat<N> lower_y = node->dequantizeLowerY(); | 
| 463 |       const vfloat<N> upper_y = node->dequantizeUpperY(); | 
| 464 |       const vfloat<N> lower_z = node->dequantizeLowerZ(); | 
| 465 |       const vfloat<N> upper_z = node->dequantizeUpperZ(); | 
| 466 |  | 
| 467 |   #if defined(__AVX2__) || defined(__ARM_NEON) | 
| 468 |       const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x); | 
| 469 |       const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y); | 
| 470 |       const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z); | 
| 471 |       const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x); | 
| 472 |       const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y); | 
| 473 |       const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z); | 
| 474 |   #else | 
| 475 |       const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; | 
| 476 |       const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; | 
| 477 |       const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; | 
| 478 |       const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; | 
| 479 |       const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; | 
| 480 |       const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; | 
| 481 |   #endif | 
| 482 |  | 
| 483 |   #if defined(__AVX512F__) // SKX | 
| 484 |       if (K == 16) | 
| 485 |       { | 
| 486 |         /* use mixed float/int min/max */ | 
| 487 |         const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); | 
| 488 |         const vfloat<K> lfarP  = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); | 
| 489 |         const vbool<K> lhit    = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); | 
| 490 |         dist = lnearP; | 
| 491 |         return lhit; | 
| 492 |       } | 
| 493 |       else | 
| 494 |   #endif | 
| 495 |       { | 
| 496 |         const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); | 
| 497 |         const vfloat<K> lfarP  = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); | 
| 498 |   #if defined(__AVX512F__) // SKX | 
| 499 |         const vbool<K> lhit    = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); | 
| 500 |   #else | 
| 501 |         const vbool<K> lhit    = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); | 
| 502 |   #endif | 
| 503 |         dist = lnearP; | 
| 504 |         return lhit; | 
| 505 |       } | 
| 506 |     } | 
| 507 |  | 
| 508 |     template<int N, int K> | 
| 509 |     __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i, | 
| 510 |           const TravRayK<K,true>& ray, vfloat<K>& dist) | 
| 511 |  | 
| 512 |     { | 
| 513 |       assert(movemask(node->validMask()) & ((size_t)1 << i)); | 
| 514 |       const vfloat<N> lower_x = node->dequantizeLowerX(); | 
| 515 |       const vfloat<N> upper_x = node->dequantizeUpperX(); | 
| 516 |       const vfloat<N> lower_y = node->dequantizeLowerY(); | 
| 517 |       const vfloat<N> upper_y = node->dequantizeUpperY(); | 
| 518 |       const vfloat<N> lower_z = node->dequantizeLowerZ(); | 
| 519 |       const vfloat<N> upper_z = node->dequantizeUpperZ(); | 
| 520 |  | 
| 521 |       const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; | 
| 522 |       const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; | 
| 523 |       const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; | 
| 524 |       const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; | 
| 525 |       const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; | 
| 526 |       const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; | 
| 527 |  | 
| 528 |       const float round_up   = 1.0f+3.0f*float(ulp); | 
| 529 |       const float round_down = 1.0f-3.0f*float(ulp); | 
| 530 |  | 
| 531 |       const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); | 
| 532 |       const vfloat<K> lfarP  = round_up  *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); | 
| 533 |       const vbool<K> lhit    = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); | 
| 534 |       dist = lnearP; | 
| 535 |       return lhit; | 
| 536 |       } | 
| 537 |  | 
| 538 |     template<int N, int K> | 
| 539 |       __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, | 
| 540 |           const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 541 |  | 
| 542 |     { | 
| 543 |         assert(movemask(node->validMask()) & ((size_t)1 << i)); | 
| 544 |  | 
| 545 |         const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time); | 
| 546 |         const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time); | 
| 547 |         const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time); | 
| 548 |         const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time); | 
| 549 |         const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time); | 
| 550 |         const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time); | 
| 551 |          | 
| 552 | #if defined(__AVX2__) || defined(__ARM_NEON) | 
| 553 |         const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); | 
| 554 |         const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); | 
| 555 |         const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); | 
| 556 |         const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); | 
| 557 |         const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); | 
| 558 |         const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); | 
| 559 | #else | 
| 560 |         const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; | 
| 561 |         const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; | 
| 562 |         const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; | 
| 563 |         const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; | 
| 564 |         const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; | 
| 565 |         const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; | 
| 566 |   #endif | 
| 567 |         const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); | 
| 568 |         const vfloat<K> lfarP  = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); | 
| 569 |         const vbool<K> lhit    = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); | 
| 570 |         dist = lnearP; | 
| 571 |         return lhit; | 
| 572 |       } | 
| 573 |  | 
| 574 |  | 
| 575 |     template<int N, int K> | 
| 576 |       __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, | 
| 577 |           const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 578 |  | 
| 579 |     { | 
| 580 |         assert(movemask(node->validMask()) & ((size_t)1 << i)); | 
| 581 |  | 
| 582 |         const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time); | 
| 583 |         const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time); | 
| 584 |         const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time); | 
| 585 |         const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time); | 
| 586 |         const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time); | 
| 587 |         const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time); | 
| 588 |  | 
| 589 |         const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; | 
| 590 |         const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; | 
| 591 |         const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; | 
| 592 |         const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; | 
| 593 |         const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; | 
| 594 |         const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; | 
| 595 |  | 
| 596 |         const float round_up   = 1.0f+3.0f*float(ulp); | 
| 597 |         const float round_down = 1.0f-3.0f*float(ulp); | 
| 598 |  | 
| 599 |         const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); | 
| 600 |         const vfloat<K> lfarP  = round_up  *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); | 
| 601 |         const vbool<K> lhit    = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); | 
| 602 |         dist = lnearP; | 
| 603 |         return lhit; | 
| 604 |       } | 
| 605 |  | 
| 606 |  | 
| 607 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 608 |     // Node intersectors used in hybrid traversal | 
| 609 |     ////////////////////////////////////////////////////////////////////////////////////// | 
| 610 |  | 
| 611 |     /*! Intersects N nodes with K rays */ | 
| 612 |     template<int N, int K, int types, bool robust> | 
| 613 |     struct BVHNNodeIntersectorK; | 
| 614 |  | 
| 615 |     template<int N, int K> | 
| 616 |     struct BVHNNodeIntersectorK<N, K, BVH_AN1, false> | 
| 617 |     { | 
| 618 |       /* vmask is both an input and an output parameter! Its initial value should be the parent node | 
| 619 |          hit mask, which is used for correctly computing the current hit mask. The parent hit mask | 
| 620 |          is actually required only for motion blur node intersections (because different rays may | 
| 621 |          have different times), so for regular nodes vmask is simply overwritten. */ | 
| 622 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 623 |                                           const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 624 |       { | 
| 625 |         vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist); | 
| 626 |         return true; | 
| 627 |       } | 
| 628 |     }; | 
| 629 |  | 
| 630 |     template<int N, int K> | 
| 631 |     struct BVHNNodeIntersectorK<N, K, BVH_AN1, true> | 
| 632 |     { | 
| 633 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 634 |                                           const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 635 |       { | 
| 636 |         vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist); | 
| 637 |         return true; | 
| 638 |       } | 
| 639 |     }; | 
| 640 |  | 
| 641 |     template<int N, int K> | 
| 642 |     struct BVHNNodeIntersectorK<N, K, BVH_AN2, false> | 
| 643 |     { | 
| 644 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 645 |                                           const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 646 |       { | 
| 647 |         vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist); | 
| 648 |         return true; | 
| 649 |       } | 
| 650 |     }; | 
| 651 |  | 
| 652 |     template<int N, int K> | 
| 653 |     struct BVHNNodeIntersectorK<N, K, BVH_AN2, true> | 
| 654 |     { | 
| 655 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 656 |                                           const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 657 |       { | 
| 658 |         vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist); | 
| 659 |         return true; | 
| 660 |       } | 
| 661 |     }; | 
| 662 |  | 
| 663 |     template<int N, int K> | 
| 664 |     struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false> | 
| 665 |     { | 
| 666 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 667 |                                           const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 668 |       { | 
| 669 |         if (likely(node.isAABBNode()))              vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist); | 
| 670 |         else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist); | 
| 671 |         return true; | 
| 672 |       } | 
| 673 |     }; | 
| 674 |  | 
| 675 |     template<int N, int K> | 
| 676 |     struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true> | 
| 677 |     { | 
| 678 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 679 |                                           const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 680 |       { | 
| 681 |         if (likely(node.isAABBNode()))              vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist); | 
| 682 |         else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist); | 
| 683 |         return true; | 
| 684 |       } | 
| 685 |     }; | 
| 686 |  | 
| 687 |     template<int N, int K> | 
| 688 |     struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false> | 
| 689 |     { | 
| 690 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 691 |                                           const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 692 |       { | 
| 693 |         if (likely(node.isAABBNodeMB()))              vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist); | 
| 694 |         else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); | 
| 695 |         return true; | 
| 696 |       } | 
| 697 |     }; | 
| 698 |  | 
| 699 |     template<int N, int K> | 
| 700 |     struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true> | 
| 701 |     { | 
| 702 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 703 |                                           const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 704 |       { | 
| 705 |         if (likely(node.isAABBNodeMB()))              vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist); | 
| 706 |         else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); | 
| 707 |         return true; | 
| 708 |       } | 
| 709 |     }; | 
| 710 |  | 
| 711 |     template<int N, int K> | 
| 712 |     struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false> | 
| 713 |     { | 
| 714 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 715 |                                           const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 716 |       { | 
| 717 |         vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist); | 
| 718 |         return true; | 
| 719 |       } | 
| 720 |     }; | 
| 721 |  | 
| 722 |     template<int N, int K> | 
| 723 |     struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true> | 
| 724 |     { | 
| 725 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 726 |                                           const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 727 |       { | 
| 728 |         vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist); | 
| 729 |         return true; | 
| 730 |       } | 
| 731 |     }; | 
| 732 |  | 
| 733 |     template<int N, int K> | 
| 734 |     struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false> | 
| 735 |     { | 
| 736 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 737 |                                           const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 738 |       { | 
| 739 |         if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { | 
| 740 |           vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist); | 
| 741 |         } else /*if (unlikely(node.isOBBNodeMB()))*/ { | 
| 742 |           assert(node.isOBBNodeMB()); | 
| 743 |           vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); | 
| 744 |         } | 
| 745 |         return true; | 
| 746 |       } | 
| 747 |     }; | 
| 748 |  | 
| 749 |     template<int N, int K> | 
| 750 |     struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true> | 
| 751 |     { | 
| 752 |       static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, | 
| 753 |                                           const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) | 
| 754 |       { | 
| 755 |         if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { | 
| 756 |           vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist); | 
| 757 |         } else /*if (unlikely(node.isOBBNodeMB()))*/ { | 
| 758 |           assert(node.isOBBNodeMB()); | 
| 759 |           vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); | 
| 760 |         } | 
| 761 |         return true; | 
| 762 |       } | 
| 763 |     }; | 
| 764 |  | 
| 765 |  | 
| 766 |     /*! Intersects N nodes with K rays */ | 
| 767 |     template<int N, int K, bool robust> | 
| 768 |     struct BVHNQuantizedBaseNodeIntersectorK; | 
| 769 |  | 
| 770 |     template<int N, int K> | 
| 771 |     struct BVHNQuantizedBaseNodeIntersectorK<N, K, false> | 
| 772 |     { | 
| 773 |       static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i, | 
| 774 |                                               const TravRayK<K,false>& ray, vfloat<K>& dist) | 
| 775 |       { | 
| 776 |         return intersectQuantizedNodeK<N,K>(node,i,ray,dist); | 
| 777 |       } | 
| 778 |  | 
| 779 |       static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, | 
| 780 |                                                const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 781 |       { | 
| 782 |         return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist); | 
| 783 |       } | 
| 784 |  | 
| 785 |     }; | 
| 786 |  | 
| 787 |     template<int N, int K> | 
| 788 |     struct BVHNQuantizedBaseNodeIntersectorK<N, K, true> | 
| 789 |     { | 
| 790 |       static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i, | 
| 791 |                                                const TravRayK<K,true>& ray, vfloat<K>& dist) | 
| 792 |       { | 
| 793 |         return intersectQuantizedNodeK<N,K>(node,i,ray,dist); | 
| 794 |       } | 
| 795 |  | 
| 796 |       static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, | 
| 797 |           const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist) | 
| 798 |       { | 
| 799 |         return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist); | 
| 800 |       } | 
| 801 |     }; | 
| 802 |  | 
| 803 |  | 
| 804 |   } | 
| 805 | } | 
| 806 |  |