| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "../common/ray.h" | 
| 7 | #include "../common/scene_subdiv_mesh.h" | 
| 8 | #include "../bvh/bvh.h" | 
| 9 | #include "../subdiv/tessellation.h" | 
| 10 | #include "../subdiv/tessellation_cache.h" | 
| 11 | #include "subdivpatch1.h" | 
| 12 |  | 
| 13 | namespace embree | 
| 14 | { | 
| 15 |   namespace isa | 
| 16 |   { | 
| 17 |     class GridSOA | 
| 18 |     { | 
| 19 |     public: | 
| 20 |  | 
| 21 |       /*! GridSOA constructor */ | 
| 22 |       GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps, | 
| 23 |               const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight, | 
| 24 |               const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr); | 
| 25 |  | 
| 26 |       /*! Subgrid creation */ | 
| 27 |       template<typename Allocator> | 
| 28 |         static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps, | 
| 29 |                                unsigned x0, unsigned x1, unsigned y0, unsigned y1,  | 
| 30 |                                const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr) | 
| 31 |       { | 
| 32 |         const unsigned width = x1-x0+1;   | 
| 33 |         const unsigned height = y1-y0+1;  | 
| 34 |         const GridRange range(0,width-1,0,height-1); | 
| 35 |         size_t bvhBytes = 0; | 
| 36 |         if (time_steps == 1)  | 
| 37 |           bvhBytes = getBVHBytes(range,nodeBytes: sizeof(BVH4::AABBNode),leafBytes: 0); | 
| 38 |         else { | 
| 39 |           bvhBytes = (time_steps-1)*getBVHBytes(range,nodeBytes: sizeof(BVH4::AABBNodeMB),leafBytes: 0); | 
| 40 |           bvhBytes += getTemporalBVHBytes(time_range: make_range(begin: 0,end: int(time_steps-1)),nodeBytes: sizeof(BVH4::AABBNodeMB4D)); | 
| 41 |         } | 
| 42 |         const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);   | 
| 43 |         size_t rootBytes = time_steps*sizeof(BVH4::NodeRef); | 
| 44 | #if !defined(__64BIT__) | 
| 45 |         rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding. | 
| 46 | #endif | 
| 47 |         void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes); | 
| 48 |         assert(data); | 
| 49 |         return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(i: patches->geomID()),bvhBytes,gridBytes,bounds_o); | 
| 50 |       } | 
| 51 |  | 
| 52 |       /*! Grid creation */ | 
| 53 |       template<typename Allocator> | 
| 54 |         static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps, | 
| 55 |                                const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)  | 
| 56 |       { | 
| 57 |         return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o); | 
| 58 |       } | 
| 59 |  | 
| 60 |        /*! returns reference to root */ | 
| 61 |       __forceinline       BVH4::NodeRef& root(size_t t = 0)       { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } | 
| 62 |       __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } | 
| 63 |  | 
| 64 |       /*! returns pointer to BVH array */ | 
| 65 |       __forceinline       char* bvhData()       { return &data[0]; } | 
| 66 |       __forceinline const char* bvhData() const { return &data[0]; } | 
| 67 |  | 
| 68 |       /*! returns pointer to Grid array */ | 
| 69 |       __forceinline       float* gridData(size_t t = 0)       { return (float*) &data[gridOffset + t*gridBytes]; } | 
| 70 |       __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; } | 
| 71 |        | 
| 72 |       __forceinline void* encodeLeaf(size_t u, size_t v) { | 
| 73 |         return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf | 
| 74 |       } | 
| 75 |       __forceinline float* decodeLeaf(size_t t, const void* ptr) { | 
| 76 |         return gridData(t) + (((size_t) (ptr) >> 4) - 1); | 
| 77 |       } | 
| 78 |  | 
| 79 |       /*! returns the size of the BVH over the grid in bytes */ | 
| 80 |       static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes); | 
| 81 |  | 
| 82 |       /*! returns the size of the temporal BVH over the time range BVHs */ | 
| 83 |       static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes); | 
| 84 |  | 
| 85 |       /*! calculates bounding box of grid range */ | 
| 86 |       __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const | 
| 87 |       { | 
| 88 |         const float* const grid_array = gridData(t: time); | 
| 89 |         const float* const grid_x_array = grid_array + 0 * dim_offset; | 
| 90 |         const float* const grid_y_array = grid_array + 1 * dim_offset; | 
| 91 |         const float* const grid_z_array = grid_array + 2 * dim_offset; | 
| 92 |          | 
| 93 |         /* compute the bounds just for the range! */ | 
| 94 |         BBox3fa bounds( empty ); | 
| 95 |         for (unsigned v = range.v_start; v<=range.v_end; v++)  | 
| 96 |         { | 
| 97 |           for (unsigned u = range.u_start; u<=range.u_end; u++) | 
| 98 |           { | 
| 99 |             const float x = grid_x_array[ v * width + u]; | 
| 100 |             const float y = grid_y_array[ v * width + u]; | 
| 101 |             const float z = grid_z_array[ v * width + u]; | 
| 102 |             bounds.extend( other: Vec3fa(x,y,z) ); | 
| 103 |           } | 
| 104 |         } | 
| 105 |         assert(is_finite(bounds)); | 
| 106 |         return bounds; | 
| 107 |       } | 
| 108 |  | 
| 109 |       /*! Evaluates grid over patch and builds BVH4 tree over the grid. */ | 
| 110 |       std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o); | 
| 111 |        | 
| 112 |       /*! Create BVH4 tree over grid. */ | 
| 113 |       std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator); | 
| 114 |  | 
| 115 |       /*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */ | 
| 116 |       std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o); | 
| 117 |        | 
| 118 |       /*! Create MBlur BVH4 tree over grid. */ | 
| 119 |       std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator); | 
| 120 |  | 
| 121 |       /*! Create MSMBlur BVH4 tree over grid. */ | 
| 122 |       std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o); | 
| 123 |  | 
| 124 |       template<typename Loader> | 
| 125 |         struct MapUV | 
| 126 |       { | 
| 127 |         typedef typename Loader::vfloat vfloat; | 
| 128 |         const float* const grid_uv; | 
| 129 |         size_t line_offset; | 
| 130 |         size_t lines; | 
| 131 |  | 
| 132 |         __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines) | 
| 133 |           : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {} | 
| 134 |  | 
| 135 |         __forceinline void operator() (vfloat& u, vfloat& v, Vec3<vfloat>& Ng) const { | 
| 136 |           const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);	 | 
| 137 |           const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]); | 
| 138 |           const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]); | 
| 139 |           const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);         | 
| 140 |           const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;         | 
| 141 |           u = uv[0];v = uv[1];  | 
| 142 |         } | 
| 143 |       }; | 
| 144 |  | 
| 145 |       struct Gather2x3 | 
| 146 |       { | 
| 147 |         enum { M = 4 }; | 
| 148 |         typedef vbool4 vbool; | 
| 149 |         typedef vint4 vint; | 
| 150 |         typedef vfloat4 vfloat; | 
| 151 |          | 
| 152 |         static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines) | 
| 153 |         { | 
| 154 |           vfloat4 r0 = vfloat4::loadu(a: grid + 0*line_offset); | 
| 155 |           vfloat4 r1 = vfloat4::loadu(a: grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid | 
| 156 |           if (unlikely(line_offset == 2)) | 
| 157 |           { | 
| 158 |             r0 = shuffle<0,1,1,1>(v: r0); | 
| 159 |             r1 = shuffle<0,1,1,1>(v: r1); | 
| 160 |           } | 
| 161 |           return Vec3vf4(unpacklo(a: r0,b: r1),       // r00, r10, r01, r11 | 
| 162 |                          shuffle<1,1,2,2>(v: r0),  // r01, r01, r02, r02 | 
| 163 |                          shuffle<0,1,1,2>(v: r1)); // r10, r11, r11, r12 | 
| 164 |         } | 
| 165 |  | 
| 166 |         static __forceinline void gather(const float* const grid_x,  | 
| 167 |                                          const float* const grid_y,  | 
| 168 |                                          const float* const grid_z,  | 
| 169 |                                          const size_t line_offset, | 
| 170 |                                          const size_t lines, | 
| 171 |                                          Vec3vf4& v0_o, | 
| 172 |                                          Vec3vf4& v1_o, | 
| 173 |                                          Vec3vf4& v2_o) | 
| 174 |         { | 
| 175 |           const Vec3vf4 tri_v012_x = gather(grid: grid_x,line_offset,lines); | 
| 176 |           const Vec3vf4 tri_v012_y = gather(grid: grid_y,line_offset,lines); | 
| 177 |           const Vec3vf4 tri_v012_z = gather(grid: grid_z,line_offset,lines); | 
| 178 |           v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]); | 
| 179 |           v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]); | 
| 180 |           v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]); | 
| 181 |         } | 
| 182 |       }; | 
| 183 |        | 
| 184 | #if defined (__AVX__) | 
| 185 |       struct Gather3x3 | 
| 186 |       { | 
| 187 |         enum { M = 8 }; | 
| 188 |         typedef vbool8 vbool; | 
| 189 |         typedef vint8 vint; | 
| 190 |         typedef vfloat8 vfloat; | 
| 191 |          | 
| 192 |         static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines) | 
| 193 |         { | 
| 194 |           vfloat4 ra = vfloat4::loadu(grid + 0*line_offset); | 
| 195 |           vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid | 
| 196 |           vfloat4 rc; | 
| 197 |           if (likely(lines > 2))  | 
| 198 |             rc = vfloat4::loadu(grid + 2*line_offset); | 
| 199 |           else                    | 
| 200 |             rc = rb; | 
| 201 |  | 
| 202 |           if (unlikely(line_offset == 2)) | 
| 203 |           { | 
| 204 |             ra = shuffle<0,1,1,1>(ra); | 
| 205 |             rb = shuffle<0,1,1,1>(rb); | 
| 206 |             rc = shuffle<0,1,1,1>(rc); | 
| 207 |           } | 
| 208 |            | 
| 209 |           const vfloat8 r0 = vfloat8(ra,rb); | 
| 210 |           const vfloat8 r1 = vfloat8(rb,rc); | 
| 211 |           return Vec3vf8(unpacklo(r0,r1),         // r00, r10, r01, r11, r10, r20, r11, r21 | 
| 212 |                          shuffle<1,1,2,2>(r0),    // r01, r01, r02, r02, r11, r11, r12, r12 | 
| 213 |                          shuffle<0,1,1,2>(r1));   // r10, r11, r11, r12, r20, r21, r21, r22 | 
| 214 |         } | 
| 215 |  | 
| 216 |         static __forceinline void gather(const float* const grid_x,  | 
| 217 |                                          const float* const grid_y,  | 
| 218 |                                          const float* const grid_z,  | 
| 219 |                                          const size_t line_offset, | 
| 220 |                                          const size_t lines, | 
| 221 |                                          Vec3vf8& v0_o, | 
| 222 |                                          Vec3vf8& v1_o, | 
| 223 |                                          Vec3vf8& v2_o) | 
| 224 |         { | 
| 225 |           const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines); | 
| 226 |           const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines); | 
| 227 |           const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines); | 
| 228 |           v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]); | 
| 229 |           v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]); | 
| 230 |           v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]); | 
| 231 |         } | 
| 232 |       }; | 
| 233 | #endif | 
| 234 |  | 
| 235 |       template<typename vfloat> | 
| 236 |       static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv) | 
| 237 |       { | 
| 238 |         typedef typename vfloat::Int vint; | 
| 239 |         const vint iu  = asInt(uv) & 0xffff; | 
| 240 |         const vint iv  = srl(asInt(uv),16); | 
| 241 | 	const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000); | 
| 242 | 	const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000); | 
| 243 | 	return Vec2<vfloat>(u,v); | 
| 244 |       } | 
| 245 |        | 
| 246 |       __forceinline unsigned int geomID() const  { | 
| 247 |         return _geomID; | 
| 248 |       }  | 
| 249 |        | 
| 250 |       __forceinline unsigned int primID() const  { | 
| 251 |         return _primID; | 
| 252 |       }  | 
| 253 |  | 
| 254 |     public: | 
| 255 |       BVH4::NodeRef troot; | 
| 256 | #if !defined(__64BIT__) | 
| 257 |       unsigned align1; | 
| 258 | #endif | 
| 259 |       unsigned time_steps; | 
| 260 |       unsigned width; | 
| 261 |  | 
| 262 |       unsigned height; | 
| 263 |       unsigned dim_offset; | 
| 264 |       unsigned _geomID; | 
| 265 |       unsigned _primID; | 
| 266 |  | 
| 267 |       unsigned align2; | 
| 268 |       unsigned gridOffset; | 
| 269 |       unsigned gridBytes; | 
| 270 |       unsigned rootOffset; | 
| 271 |  | 
| 272 |       char data[1];        //!< after the struct we first store the BVH, then the grid, and finally the roots | 
| 273 |     }; | 
| 274 |   } | 
| 275 | } | 
| 276 |  |