| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "grid_soa.h" | 
| 7 | #include "../common/ray.h" | 
| 8 | #include "triangle_intersector_pluecker.h" | 
| 9 |  | 
| 10 | namespace embree | 
| 11 | { | 
| 12 |   namespace isa | 
| 13 |   { | 
| 14 |     template<int K> | 
| 15 |       struct MapUV0 | 
| 16 |     { | 
| 17 |       const float* const grid_uv; | 
| 18 |       size_t ofs00, ofs01, ofs10, ofs11; | 
| 19 |        | 
| 20 |       __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) | 
| 21 |         : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} | 
| 22 |        | 
| 23 |       __forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const { | 
| 24 |         const vfloat<K> uv00(grid_uv[ofs00]); | 
| 25 |         const vfloat<K> uv01(grid_uv[ofs01]); | 
| 26 |         const vfloat<K> uv10(grid_uv[ofs10]); | 
| 27 |         const vfloat<K> uv11(grid_uv[ofs11]); | 
| 28 |         const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00); | 
| 29 |         const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01); | 
| 30 |         const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10); | 
| 31 |         const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0)); | 
| 32 |         u = uv[0]; v = uv[1]; | 
| 33 |       } | 
| 34 |     }; | 
| 35 |      | 
| 36 |     template<int K> | 
| 37 |       struct MapUV1 | 
| 38 |     { | 
| 39 |       const float* const grid_uv; | 
| 40 |       size_t ofs00, ofs01, ofs10, ofs11; | 
| 41 |        | 
| 42 |       __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) | 
| 43 |         : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} | 
| 44 |        | 
| 45 |       __forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const { | 
| 46 |         const vfloat<K> uv00(grid_uv[ofs00]); | 
| 47 |         const vfloat<K> uv01(grid_uv[ofs01]); | 
| 48 |         const vfloat<K> uv10(grid_uv[ofs10]); | 
| 49 |         const vfloat<K> uv11(grid_uv[ofs11]); | 
| 50 |         const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10); | 
| 51 |         const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01); | 
| 52 |         const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11); | 
| 53 |         const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0)); | 
| 54 |         u = uv[0]; v = uv[1]; | 
| 55 |       } | 
| 56 |     }; | 
| 57 |      | 
| 58 |     template<int K> | 
| 59 |       class GridSOAIntersectorK | 
| 60 |     { | 
| 61 |     public: | 
| 62 |       typedef void Primitive; | 
| 63 |  | 
| 64 |       class Precalculations | 
| 65 |       { | 
| 66 | #if defined(__AVX__) | 
| 67 |         static const int M = 8; | 
| 68 | #else | 
| 69 |         static const int M = 4; | 
| 70 | #endif | 
| 71 |  | 
| 72 |       public: | 
| 73 |         __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) | 
| 74 |           : grid(nullptr), intersector(valid,ray) {} | 
| 75 |  | 
| 76 |       public: | 
| 77 |         GridSOA* grid; | 
| 78 |         PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector | 
| 79 |       }; | 
| 80 |  | 
| 81 |       /*! Intersect a ray with the primitive. */ | 
| 82 |       static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 83 |       { | 
| 84 |         const size_t dim_offset    = pre.grid->dim_offset; | 
| 85 |         const size_t line_offset   = pre.grid->width; | 
| 86 |         const float* const grid_x  = pre.grid->decodeLeaf(0,prim); | 
| 87 |         const float* const grid_y  = grid_x + 1 * dim_offset; | 
| 88 |         const float* const grid_z  = grid_x + 2 * dim_offset; | 
| 89 |         const float* const grid_uv = grid_x + 3 * dim_offset; | 
| 90 |  | 
| 91 |         const size_t max_x = pre.grid->width  == 2 ? 1 : 2; | 
| 92 |         const size_t max_y = pre.grid->height == 2 ? 1 : 2; | 
| 93 |         for (size_t y=0; y<max_y; y++)         | 
| 94 |         { | 
| 95 |           for (size_t x=0; x<max_x; x++) | 
| 96 |           { | 
| 97 |             const size_t ofs00 = (y+0)*line_offset+(x+0); | 
| 98 |             const size_t ofs01 = (y+0)*line_offset+(x+1); | 
| 99 |             const size_t ofs10 = (y+1)*line_offset+(x+0); | 
| 100 |             const size_t ofs11 = (y+1)*line_offset+(x+1); | 
| 101 |             const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); | 
| 102 |             const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); | 
| 103 |             const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); | 
| 104 |             const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); | 
| 105 |  | 
| 106 |             pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); | 
| 107 |             pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); | 
| 108 |           } | 
| 109 |         } | 
| 110 |       } | 
| 111 |  | 
| 112 |       /*! Test if the ray is occluded by the primitive */ | 
| 113 |       static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 114 |       { | 
| 115 |         const size_t dim_offset    = pre.grid->dim_offset; | 
| 116 |         const size_t line_offset   = pre.grid->width; | 
| 117 |         const float* const grid_x  = pre.grid->decodeLeaf(0,prim); | 
| 118 |         const float* const grid_y  = grid_x + 1 * dim_offset; | 
| 119 |         const float* const grid_z  = grid_x + 2 * dim_offset; | 
| 120 |         const float* const grid_uv = grid_x + 3 * dim_offset; | 
| 121 |  | 
| 122 |         vbool<K> valid = valid_i; | 
| 123 |         const size_t max_x = pre.grid->width  == 2 ? 1 : 2; | 
| 124 |         const size_t max_y = pre.grid->height == 2 ? 1 : 2; | 
| 125 |         for (size_t y=0; y<max_y; y++)         | 
| 126 |         { | 
| 127 |           for (size_t x=0; x<max_x; x++) | 
| 128 |           { | 
| 129 |             const size_t ofs00 = (y+0)*line_offset+(x+0); | 
| 130 |             const size_t ofs01 = (y+0)*line_offset+(x+1); | 
| 131 |             const size_t ofs10 = (y+1)*line_offset+(x+0); | 
| 132 |             const size_t ofs11 = (y+1)*line_offset+(x+1); | 
| 133 |             const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); | 
| 134 |             const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); | 
| 135 |             const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); | 
| 136 |             const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); | 
| 137 |  | 
| 138 |             pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); | 
| 139 |             if (none(valid)) break; | 
| 140 |             pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); | 
| 141 |             if (none(valid)) break; | 
| 142 |           } | 
| 143 |         } | 
| 144 |         return !valid; | 
| 145 |       } | 
| 146 |  | 
| 147 |       template<typename Loader> | 
| 148 |         static __forceinline void intersect(RayHitK<K>& ray, size_t k, | 
| 149 |                                             IntersectContext* context, | 
| 150 |                                             const float* const grid_x, | 
| 151 |                                             const size_t line_offset, | 
| 152 |                                             const size_t lines, | 
| 153 |                                             Precalculations& pre) | 
| 154 |       { | 
| 155 |         typedef typename Loader::vfloat vfloat; | 
| 156 |         const size_t dim_offset    = pre.grid->dim_offset; | 
| 157 |         const float* const grid_y  = grid_x + 1 * dim_offset; | 
| 158 |         const float* const grid_z  = grid_x + 2 * dim_offset; | 
| 159 |         const float* const grid_uv = grid_x + 3 * dim_offset; | 
| 160 |         Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); | 
| 161 |         pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); | 
| 162 |       }; | 
| 163 |  | 
| 164 |       template<typename Loader> | 
| 165 |         static __forceinline bool occluded(RayK<K>& ray, size_t k, | 
| 166 |                                            IntersectContext* context, | 
| 167 |                                            const float* const grid_x, | 
| 168 |                                            const size_t line_offset, | 
| 169 |                                            const size_t lines, | 
| 170 |                                            Precalculations& pre) | 
| 171 |       { | 
| 172 |         typedef typename Loader::vfloat vfloat; | 
| 173 |         const size_t dim_offset    = pre.grid->dim_offset; | 
| 174 |         const float* const grid_y  = grid_x + 1 * dim_offset; | 
| 175 |         const float* const grid_z  = grid_x + 2 * dim_offset; | 
| 176 |         const float* const grid_uv = grid_x + 3 * dim_offset; | 
| 177 |         Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); | 
| 178 |         return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); | 
| 179 |       } | 
| 180 |  | 
| 181 |       /*! Intersect a ray with the primitive. */ | 
| 182 |       static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 183 |       { | 
| 184 |         const size_t line_offset   = pre.grid->width; | 
| 185 |         const size_t lines         = pre.grid->height; | 
| 186 |         const float* const grid_x  = pre.grid->decodeLeaf(0,prim); | 
| 187 | #if defined(__AVX__) | 
| 188 |         intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre); | 
| 189 | #else | 
| 190 |         intersect<GridSOA::Gather2x3>(ray, k, context, grid_x            , line_offset, lines, pre); | 
| 191 |         if (likely(lines > 2)) | 
| 192 |           intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre); | 
| 193 | #endif | 
| 194 |       } | 
| 195 |  | 
| 196 |       /*! Test if the ray is occluded by the primitive */ | 
| 197 |       static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 198 |       { | 
| 199 |         const size_t line_offset   = pre.grid->width; | 
| 200 |         const size_t lines         = pre.grid->height; | 
| 201 |         const float* const grid_x  = pre.grid->decodeLeaf(0,prim); | 
| 202 |  | 
| 203 | #if defined(__AVX__) | 
| 204 |         return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre); | 
| 205 | #else | 
| 206 |         if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x            , line_offset, lines, pre)) return true; | 
| 207 |         if (likely(lines > 2)) | 
| 208 |           if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true; | 
| 209 | #endif | 
| 210 |         return false; | 
| 211 |       } | 
| 212 |     }; | 
| 213 |  | 
| 214 |     template<int K> | 
| 215 |     class GridSOAMBIntersectorK | 
| 216 |     { | 
| 217 |     public: | 
| 218 |       typedef void Primitive; | 
| 219 |       typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations; | 
| 220 |  | 
| 221 |       /*! Intersect a ray with the primitive. */ | 
| 222 |       static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 223 |       { | 
| 224 |         vfloat<K> vftime; | 
| 225 |         vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); | 
| 226 |  | 
| 227 |         vbool<K> valid1 = valid_i; | 
| 228 |         while (any(valid1)) { | 
| 229 |           const size_t j = bsf(movemask(valid1)); | 
| 230 |           const int itime = vitime[j]; | 
| 231 |           const vbool<K> valid2 = valid1 & (itime == vitime); | 
| 232 |           valid1 = valid1 & !valid2; | 
| 233 |           intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node); | 
| 234 |         } | 
| 235 |       } | 
| 236 |  | 
| 237 |       /*! Intersect a ray with the primitive. */ | 
| 238 |       static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 239 |       { | 
| 240 |         const size_t grid_offset   = pre.grid->gridBytes >> 2; | 
| 241 |         const size_t dim_offset    = pre.grid->dim_offset; | 
| 242 |         const size_t line_offset   = pre.grid->width; | 
| 243 |         const float* const grid_x  = pre.grid->decodeLeaf(itime,prim); | 
| 244 |         const float* const grid_y  = grid_x + 1 * dim_offset; | 
| 245 |         const float* const grid_z  = grid_x + 2 * dim_offset; | 
| 246 |         const float* const grid_uv = grid_x + 3 * dim_offset; | 
| 247 |  | 
| 248 |         const size_t max_x = pre.grid->width  == 2 ? 1 : 2; | 
| 249 |         const size_t max_y = pre.grid->height == 2 ? 1 : 2; | 
| 250 |         for (size_t y=0; y<max_y; y++)         | 
| 251 |         { | 
| 252 |           for (size_t x=0; x<max_x; x++) | 
| 253 |           { | 
| 254 |             size_t ofs00 = (y+0)*line_offset+(x+0); | 
| 255 |             size_t ofs01 = (y+0)*line_offset+(x+1); | 
| 256 |             size_t ofs10 = (y+1)*line_offset+(x+0); | 
| 257 |             size_t ofs11 = (y+1)*line_offset+(x+1); | 
| 258 |             const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); | 
| 259 |             const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); | 
| 260 |             const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); | 
| 261 |             const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); | 
| 262 |             ofs00 += grid_offset; | 
| 263 |             ofs01 += grid_offset; | 
| 264 |             ofs10 += grid_offset; | 
| 265 |             ofs11 += grid_offset; | 
| 266 |             const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); | 
| 267 |             const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); | 
| 268 |             const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); | 
| 269 |             const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); | 
| 270 |             const Vec3vf<K> p00 = lerp(a00,b00,ftime); | 
| 271 |             const Vec3vf<K> p01 = lerp(a01,b01,ftime); | 
| 272 |             const Vec3vf<K> p10 = lerp(a10,b10,ftime); | 
| 273 |             const Vec3vf<K> p11 = lerp(a11,b11,ftime); | 
| 274 |  | 
| 275 |             pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); | 
| 276 |             pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); | 
| 277 |           } | 
| 278 |         } | 
| 279 |       } | 
| 280 |  | 
| 281 |       /*! Test if the ray is occluded by the primitive */ | 
| 282 |       static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 283 |       { | 
| 284 |         vfloat<K> vftime; | 
| 285 |         vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); | 
| 286 |  | 
| 287 |         vbool<K> valid_o = valid_i; | 
| 288 |         vbool<K> valid1 = valid_i; | 
| 289 |         while (any(valid1)) { | 
| 290 |           const int j = int(bsf(movemask(valid1))); | 
| 291 |           const int itime = vitime[j]; | 
| 292 |           const vbool<K> valid2 = valid1 & (itime == vitime); | 
| 293 |           valid1 = valid1 & !valid2; | 
| 294 |           valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node); | 
| 295 |         } | 
| 296 |         return !valid_o; | 
| 297 |       } | 
| 298 |  | 
| 299 |       /*! Test if the ray is occluded by the primitive */ | 
| 300 |       static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 301 |       { | 
| 302 |         const size_t grid_offset   = pre.grid->gridBytes >> 2; | 
| 303 |         const size_t dim_offset    = pre.grid->dim_offset; | 
| 304 |         const size_t line_offset   = pre.grid->width; | 
| 305 |         const float* const grid_x  = pre.grid->decodeLeaf(itime,prim); | 
| 306 |         const float* const grid_y  = grid_x + 1 * dim_offset; | 
| 307 |         const float* const grid_z  = grid_x + 2 * dim_offset; | 
| 308 |         const float* const grid_uv = grid_x + 3 * dim_offset; | 
| 309 |  | 
| 310 |         vbool<K> valid = valid_i; | 
| 311 |         const size_t max_x = pre.grid->width  == 2 ? 1 : 2; | 
| 312 |         const size_t max_y = pre.grid->height == 2 ? 1 : 2; | 
| 313 |         for (size_t y=0; y<max_y; y++)         | 
| 314 |         { | 
| 315 |           for (size_t x=0; x<max_x; x++) | 
| 316 |           { | 
| 317 |             size_t ofs00 = (y+0)*line_offset+(x+0); | 
| 318 |             size_t ofs01 = (y+0)*line_offset+(x+1); | 
| 319 |             size_t ofs10 = (y+1)*line_offset+(x+0); | 
| 320 |             size_t ofs11 = (y+1)*line_offset+(x+1); | 
| 321 |             const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); | 
| 322 |             const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); | 
| 323 |             const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); | 
| 324 |             const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); | 
| 325 |             ofs00 += grid_offset; | 
| 326 |             ofs01 += grid_offset; | 
| 327 |             ofs10 += grid_offset; | 
| 328 |             ofs11 += grid_offset; | 
| 329 |             const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); | 
| 330 |             const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); | 
| 331 |             const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); | 
| 332 |             const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); | 
| 333 |             const Vec3vf<K> p00 = lerp(a00,b00,ftime); | 
| 334 |             const Vec3vf<K> p01 = lerp(a01,b01,ftime); | 
| 335 |             const Vec3vf<K> p10 = lerp(a10,b10,ftime); | 
| 336 |             const Vec3vf<K> p11 = lerp(a11,b11,ftime); | 
| 337 |  | 
| 338 |             pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); | 
| 339 |             if (none(valid)) break; | 
| 340 |             pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); | 
| 341 |             if (none(valid)) break; | 
| 342 |           } | 
| 343 |         } | 
| 344 |         return valid; | 
| 345 |       } | 
| 346 |  | 
| 347 |       template<typename Loader> | 
| 348 |         static __forceinline void intersect(RayHitK<K>& ray, size_t k, | 
| 349 |                                             const float ftime, | 
| 350 |                                             IntersectContext* context, | 
| 351 |                                             const float* const grid_x, | 
| 352 |                                             const size_t line_offset, | 
| 353 |                                             const size_t lines, | 
| 354 |                                             Precalculations& pre) | 
| 355 |       { | 
| 356 |         typedef typename Loader::vfloat vfloat; | 
| 357 |         const size_t grid_offset   = pre.grid->gridBytes >> 2; | 
| 358 |         const size_t dim_offset    = pre.grid->dim_offset; | 
| 359 |         const float* const grid_y  = grid_x + 1 * dim_offset; | 
| 360 |         const float* const grid_z  = grid_x + 2 * dim_offset; | 
| 361 |         const float* const grid_uv = grid_x + 3 * dim_offset; | 
| 362 |  | 
| 363 |         Vec3<vfloat> a0, a1, a2; | 
| 364 |         Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); | 
| 365 |  | 
| 366 |         Vec3<vfloat> b0, b1, b2; | 
| 367 |         Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); | 
| 368 |  | 
| 369 |         Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); | 
| 370 |         Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); | 
| 371 |         Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); | 
| 372 |  | 
| 373 |         pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); | 
| 374 |       }; | 
| 375 |  | 
| 376 |       template<typename Loader> | 
| 377 |         static __forceinline bool occluded(RayK<K>& ray, size_t k, | 
| 378 |                                            const float ftime, | 
| 379 |                                            IntersectContext* context, | 
| 380 |                                            const float* const grid_x, | 
| 381 |                                            const size_t line_offset, | 
| 382 |                                            const size_t lines, | 
| 383 |                                            Precalculations& pre) | 
| 384 |       { | 
| 385 |         typedef typename Loader::vfloat vfloat; | 
| 386 |         const size_t grid_offset   = pre.grid->gridBytes >> 2; | 
| 387 |         const size_t dim_offset    = pre.grid->dim_offset; | 
| 388 |         const float* const grid_y  = grid_x + 1 * dim_offset; | 
| 389 |         const float* const grid_z  = grid_x + 2 * dim_offset; | 
| 390 |         const float* const grid_uv = grid_x + 3 * dim_offset; | 
| 391 |  | 
| 392 |         Vec3<vfloat> a0, a1, a2; | 
| 393 |         Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); | 
| 394 |  | 
| 395 |         Vec3<vfloat> b0, b1, b2; | 
| 396 |         Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); | 
| 397 |  | 
| 398 |         Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); | 
| 399 |         Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); | 
| 400 |         Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); | 
| 401 |  | 
| 402 |         return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); | 
| 403 |       } | 
| 404 |  | 
| 405 |       /*! Intersect a ray with the primitive. */ | 
| 406 |       static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 407 |       {  | 
| 408 |         float ftime; | 
| 409 |         int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime); | 
| 410 |  | 
| 411 |         const size_t line_offset   = pre.grid->width; | 
| 412 |         const size_t lines         = pre.grid->height; | 
| 413 |         const float* const grid_x  = pre.grid->decodeLeaf(itime,prim); | 
| 414 |  | 
| 415 | #if defined(__AVX__) | 
| 416 |         intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre); | 
| 417 | #else | 
| 418 |         intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre); | 
| 419 |         if (likely(lines > 2)) | 
| 420 |           intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre); | 
| 421 | #endif | 
| 422 |       } | 
| 423 |  | 
| 424 |       /*! Test if the ray is occluded by the primitive */ | 
| 425 |       static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) | 
| 426 |       { | 
| 427 |         float ftime; | 
| 428 |         int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime); | 
| 429 |  | 
| 430 |         const size_t line_offset   = pre.grid->width; | 
| 431 |         const size_t lines         = pre.grid->height; | 
| 432 |         const float* const grid_x  = pre.grid->decodeLeaf(itime,prim); | 
| 433 |  | 
| 434 | #if defined(__AVX__) | 
| 435 |         return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre); | 
| 436 | #else | 
| 437 |         if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true; | 
| 438 |         if (likely(lines > 2)) | 
| 439 |           if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true; | 
| 440 | #endif | 
| 441 |         return false; | 
| 442 |       } | 
| 443 |     }; | 
| 444 |   } | 
| 445 | } | 
| 446 |  |