| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "../common/scene.h" | 
| 7 | #include "priminfo.h" | 
| 8 |  | 
| 9 | namespace embree | 
| 10 | { | 
| 11 |   static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5; | 
| 12 |  | 
| 13 |   namespace isa | 
| 14 |   { | 
| 15 |  | 
| 16 |     /*! mapping into bins */ | 
| 17 |     template<size_t BINS> | 
| 18 |       struct SpatialBinMapping | 
| 19 |       { | 
| 20 |       public: | 
| 21 |         __forceinline SpatialBinMapping() {} | 
| 22 |          | 
| 23 |         /*! calculates the mapping */ | 
| 24 |         __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo) | 
| 25 |         { | 
| 26 |           const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower; | 
| 27 |           const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper; | 
| 28 |           const vfloat4 eps = 128.0f*vfloat4(ulp)*max(a: abs(a: lower),b: abs(a: upper)); | 
| 29 |           const vfloat4 diag = max(a: eps,b: (vfloat4) pinfo.geomBounds.size()); | 
| 30 |           scale = select(m: upper-lower <= eps,t: vfloat4(0.0f),f: vfloat4(BINS)/diag); | 
| 31 |           ofs  = (vfloat4) pinfo.geomBounds.lower; | 
| 32 |           inv_scale = 1.0f / scale;  | 
| 33 |         } | 
| 34 |  | 
| 35 |         /*! slower but safe binning */ | 
| 36 |         __forceinline vint4 bin(const Vec3fa& p) const | 
| 37 |         { | 
| 38 |           const vint4 i = floori(a: (vfloat4(p)-ofs)*scale); | 
| 39 |           return clamp(x: i,lower: vint4(0),upper: vint4(BINS-1)); | 
| 40 |         } | 
| 41 |  | 
| 42 |         __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const | 
| 43 |         { | 
| 44 | #if defined(__AVX__) | 
| 45 |           const vfloat8 ofs8(ofs); | 
| 46 |           const vfloat8 scale8(scale); | 
| 47 |           const vint8 lu   = floori((vfloat8::loadu(&b)-ofs8)*scale8); | 
| 48 |           const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1)); | 
| 49 |           return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu)); | 
| 50 | #else | 
| 51 |           const vint4 lower = floori(a: (vfloat4(b.lower)-ofs)*scale); | 
| 52 |           const vint4 upper = floori(a: (vfloat4(b.upper)-ofs)*scale); | 
| 53 |           const vint4 c_lower = clamp(x: lower,lower: vint4(0),upper: vint4(BINS-1)); | 
| 54 |           const vint4 c_upper = clamp(x: upper,lower: vint4(0),upper: vint4(BINS-1)); | 
| 55 |           return std::pair<vint4,vint4>(c_lower,c_upper); | 
| 56 | #endif | 
| 57 |         } | 
| 58 |  | 
| 59 |          | 
| 60 |         /*! calculates left spatial position of bin */ | 
| 61 |         __forceinline float pos(const size_t bin, const size_t dim) const { | 
| 62 |           return madd(a: float(bin),b: inv_scale[dim],c: ofs[dim]); | 
| 63 |         } | 
| 64 |  | 
| 65 |         /*! calculates left spatial position of bin */ | 
| 66 |         template<size_t N> | 
| 67 |         __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const { | 
| 68 |           return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim])); | 
| 69 |         } | 
| 70 |          | 
| 71 |         /*! returns true if the mapping is invalid in some dimension */ | 
| 72 |         __forceinline bool invalid(const size_t dim) const { | 
| 73 |           return scale[dim] == 0.0f; | 
| 74 |         } | 
| 75 |          | 
| 76 |       public: | 
| 77 |         vfloat4 ofs,scale,inv_scale;  //!< linear function that maps to bin ID | 
| 78 |       }; | 
| 79 |  | 
| 80 |     /*! stores all information required to perform some split */ | 
| 81 |     template<size_t BINS> | 
| 82 |       struct SpatialBinSplit | 
| 83 |       { | 
| 84 |         /*! construct an invalid split by default */ | 
| 85 |         __forceinline SpatialBinSplit()  | 
| 86 |           : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {} | 
| 87 |          | 
| 88 |         /*! constructs specified split */ | 
| 89 |         __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping) | 
| 90 |           : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {} | 
| 91 |  | 
| 92 |         /*! constructs specified split */ | 
| 93 |         __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping) | 
| 94 |           : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {} | 
| 95 |          | 
| 96 |         /*! tests if this split is valid */ | 
| 97 |         __forceinline bool valid() const { return dim != -1; } | 
| 98 |          | 
| 99 |         /*! calculates surface area heuristic for performing the split */ | 
| 100 |         __forceinline float splitSAH() const { return sah; } | 
| 101 |          | 
| 102 |         /*! stream output */ | 
| 103 |         friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) { | 
| 104 |           return cout << "SpatialBinSplit { sah = "  << split.sah << ", dim = "  << split.dim << ", pos = "  << split.pos << ", left = "  << split.left << ", right = "  << split.right << ", factor = "  << split.factor << "}" ; | 
| 105 |         } | 
| 106 |          | 
| 107 |       public: | 
| 108 |         float sah;                 //!< SAH cost of the split | 
| 109 |         int   dim;                 //!< split dimension | 
| 110 |         int   pos;                 //!< split position | 
| 111 |         int   left;                //!< number of elements on the left side | 
| 112 |         int   right;               //!< number of elements on the right side | 
| 113 |         float factor;              //!< factor splitting the extended range | 
| 114 |         SpatialBinMapping<BINS> mapping; //!< mapping into bins | 
| 115 |       };     | 
| 116 |      | 
| 117 |     /*! stores all binning information */ | 
| 118 |     template<size_t BINS, typename PrimRef> | 
| 119 |       struct __aligned(64) SpatialBinInfo | 
| 120 |     { | 
| 121 |       SpatialBinInfo() { | 
| 122 |       } | 
| 123 |  | 
| 124 |       __forceinline SpatialBinInfo(EmptyTy) { | 
| 125 | 	clear(); | 
| 126 |       } | 
| 127 |  | 
| 128 |       /*! clears the bin info */ | 
| 129 |       __forceinline void clear()  | 
| 130 |       { | 
| 131 |         for (size_t i=0; i<BINS; i++) {  | 
| 132 |           bounds[i][0] = bounds[i][1] = bounds[i][2] = empty; | 
| 133 |           numBegin[i] = numEnd[i] = 0; | 
| 134 |         } | 
| 135 |       } | 
| 136 |        | 
| 137 |       /*! adds binning data */ | 
| 138 |       __forceinline void add(const size_t dim, | 
| 139 |                              const size_t beginID,  | 
| 140 |                              const size_t endID,  | 
| 141 |                              const size_t binID,  | 
| 142 |                              const BBox3fa &b, | 
| 143 |                              const size_t n = 1)  | 
| 144 |       { | 
| 145 |         assert(beginID < BINS); | 
| 146 |         assert(endID < BINS); | 
| 147 |         assert(binID < BINS); | 
| 148 |  | 
| 149 |         numBegin[beginID][dim]+=(unsigned int)n; | 
| 150 |         numEnd  [endID][dim]+=(unsigned int)n; | 
| 151 |         bounds  [binID][dim].extend(b);         | 
| 152 |       } | 
| 153 |  | 
| 154 |       /*! extends binning bounds */ | 
| 155 |       __forceinline void extend(const size_t dim, | 
| 156 |                                 const size_t binID,  | 
| 157 |                                 const BBox3fa &b)  | 
| 158 |       { | 
| 159 |         assert(binID < BINS); | 
| 160 |         bounds  [binID][dim].extend(b);         | 
| 161 |       } | 
| 162 |  | 
| 163 |       /*! bins an array of primitives */ | 
| 164 |       template<typename PrimitiveSplitterFactory> | 
| 165 |         __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) | 
| 166 |       { | 
| 167 |         for (size_t i=begin; i<end; i++) | 
| 168 |         { | 
| 169 |           const PrimRef& prim = source[i]; | 
| 170 |           unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); | 
| 171 |            | 
| 172 |           if (unlikely(splits <= 1)) | 
| 173 |           { | 
| 174 |             const vint4 bin = mapping.bin(center(prim.bounds())); | 
| 175 |             for (size_t dim=0; dim<3; dim++)  | 
| 176 |             { | 
| 177 |               assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS); | 
| 178 |               add(dim,beginID: bin[dim],endID: bin[dim],binID: bin[dim],b: prim.bounds()); | 
| 179 |             } | 
| 180 |           } | 
| 181 |           else | 
| 182 |           { | 
| 183 |             const vint4 bin0 = mapping.bin(prim.bounds().lower); | 
| 184 |             const vint4 bin1 = mapping.bin(prim.bounds().upper); | 
| 185 |              | 
| 186 |             for (size_t dim=0; dim<3; dim++)  | 
| 187 |             { | 
| 188 |               if (unlikely(mapping.invalid(dim)))  | 
| 189 |                 continue; | 
| 190 |                | 
| 191 |               size_t bin; | 
| 192 |               size_t l = bin0[dim]; | 
| 193 |               size_t r = bin1[dim]; | 
| 194 |                | 
| 195 |               // same bin optimization | 
| 196 |               if (likely(l == r))  | 
| 197 |               { | 
| 198 |                 add(dim,beginID: l,endID: l,binID: l,b: prim.bounds()); | 
| 199 |                 continue; | 
| 200 |               } | 
| 201 |               size_t bin_start = bin0[dim]; | 
| 202 |               size_t bin_end   = bin1[dim]; | 
| 203 |               BBox3fa rest = prim.bounds(); | 
| 204 |                | 
| 205 |               /* assure that split position always overlaps the primitive bounds */ | 
| 206 |               while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++; | 
| 207 |               while (bin_start < bin_end && mapping.pos(bin_end    ,dim) >= rest.upper[dim]) bin_end--; | 
| 208 |                | 
| 209 |               const auto splitter = splitterFactory(prim); | 
| 210 |               for (bin=bin_start; bin<bin_end; bin++)  | 
| 211 |               { | 
| 212 |                 const float pos = mapping.pos(bin+1,dim); | 
| 213 |                 BBox3fa left,right; | 
| 214 |                 splitter(rest,dim,pos,left,right); | 
| 215 |                  | 
| 216 |                 if (unlikely(left.empty())) l++;                 | 
| 217 |                 extend(dim,binID: bin,b: left); | 
| 218 |                 rest = right; | 
| 219 |               } | 
| 220 |               if (unlikely(rest.empty())) r--; | 
| 221 |               add(dim,beginID: l,endID: r,binID: bin,b: rest); | 
| 222 |             } | 
| 223 |           }               | 
| 224 |         } | 
| 225 |       } | 
| 226 |  | 
| 227 |  | 
| 228 |       /*! bins an array of primitives */ | 
| 229 |       __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) | 
| 230 |       { | 
| 231 |         for (size_t i=begin; i<end; i++) | 
| 232 |         { | 
| 233 |           const PrimRef &prim = source[i]; | 
| 234 |           const vint4 bin0 = mapping.bin(prim.bounds().lower); | 
| 235 |           const vint4 bin1 = mapping.bin(prim.bounds().upper); | 
| 236 |            | 
| 237 |           for (size_t dim=0; dim<3; dim++)  | 
| 238 |           { | 
| 239 |             if (unlikely(mapping.invalid(dim)))  | 
| 240 |               continue; | 
| 241 |              | 
| 242 |             const size_t l = bin0[dim]; | 
| 243 |             const size_t r = bin1[dim]; | 
| 244 |  | 
| 245 |             const unsigned int n  = prim.primID(); | 
| 246 |              | 
| 247 |             // same bin optimization | 
| 248 |             if (likely(l == r))  | 
| 249 |             { | 
| 250 |               add(dim,beginID: l,endID: l,binID: l,b: prim.bounds(),n); | 
| 251 |               continue; | 
| 252 |             } | 
| 253 |             const size_t bin_start = bin0[dim]; | 
| 254 |             const size_t bin_end   = bin1[dim]; | 
| 255 |             for (size_t bin=bin_start; bin<bin_end; bin++)  | 
| 256 |               add(dim,beginID: l,endID: r,binID: bin,b: prim.bounds(),n); | 
| 257 |           } | 
| 258 |         }               | 
| 259 |       } | 
| 260 |        | 
| 261 |       /*! merges in other binning information */ | 
| 262 |       void merge (const SpatialBinInfo& other) | 
| 263 |       { | 
| 264 |         for (size_t i=0; i<BINS; i++)  | 
| 265 |         { | 
| 266 |           numBegin[i] += other.numBegin[i]; | 
| 267 |           numEnd  [i] += other.numEnd  [i]; | 
| 268 |           bounds[i][0].extend(other.bounds[i][0]); | 
| 269 |           bounds[i][1].extend(other.bounds[i][1]); | 
| 270 |           bounds[i][2].extend(other.bounds[i][2]); | 
| 271 |         } | 
| 272 |       } | 
| 273 |  | 
| 274 |       /*! merges in other binning information */ | 
| 275 |       static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b) | 
| 276 |       { | 
| 277 |         SpatialBinInfo c(empty); | 
| 278 |         for (size_t i=0; i<BINS; i++)  | 
| 279 |         { | 
| 280 |           c.numBegin[i] += a.numBegin[i]+b.numBegin[i]; | 
| 281 |           c.numEnd  [i] += a.numEnd  [i]+b.numEnd  [i]; | 
| 282 |           c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]); | 
| 283 |           c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]); | 
| 284 |           c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]); | 
| 285 |         } | 
| 286 |         return c; | 
| 287 |       } | 
| 288 |        | 
| 289 |       /*! finds the best split by scanning binning information */ | 
| 290 |       SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const  | 
| 291 |       { | 
| 292 |         /* sweep from right to left and compute parallel prefix of merged bounds */ | 
| 293 |         vfloat4 rAreas[BINS]; | 
| 294 |         vuint4 rCounts[BINS]; | 
| 295 |         vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty; | 
| 296 |         for (size_t i=BINS-1; i>0; i--) | 
| 297 |         { | 
| 298 |           count += numEnd[i]; | 
| 299 |           rCounts[i] = count; | 
| 300 |           bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(b: bx); | 
| 301 |           by.extend(bounds[i][1]); rAreas[i][1] = halfArea(b: by); | 
| 302 |           bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(b: bz); | 
| 303 |           rAreas[i][3] = 0.0f; | 
| 304 |         } | 
| 305 |          | 
| 306 |         /* sweep from left to right and compute SAH */ | 
| 307 |         vuint4 blocks_add = (1 << blocks_shift)-1; | 
| 308 |         vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0; | 
| 309 |         count = 0; bx = empty; by = empty; bz = empty; | 
| 310 |         for (size_t i=1; i<BINS; i++, ii+=1) | 
| 311 |         { | 
| 312 |           count += numBegin[i-1]; | 
| 313 |           bx.extend(bounds[i-1][0]); float Ax = halfArea(b: bx); | 
| 314 |           by.extend(bounds[i-1][1]); float Ay = halfArea(b: by); | 
| 315 |           bz.extend(bounds[i-1][2]); float Az = halfArea(b: bz); | 
| 316 |           const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); | 
| 317 |           const vfloat4 rArea = rAreas[i]; | 
| 318 |           const vuint4 lCount = (count     +blocks_add) >> (unsigned int)(blocks_shift); | 
| 319 |           const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); | 
| 320 |           const vfloat4 sah = madd(a: lArea,b: vfloat4(lCount),c: rArea*vfloat4(rCount)); | 
| 321 |           // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); | 
| 322 |           const vbool4 mask = sah < vbestSAH; | 
| 323 |           vbestPos      = select(m: mask,t: ii ,f: vbestPos); | 
| 324 |           vbestSAH      = select(m: mask,t: sah,f: vbestSAH); | 
| 325 |           vbestlCount   = select(m: mask,t: count,f: vbestlCount); | 
| 326 |           vbestrCount   = select(mask,rCounts[i],vbestrCount); | 
| 327 |         } | 
| 328 |          | 
| 329 |         /* find best dimension */ | 
| 330 |         float bestSAH = inf; | 
| 331 |         int   bestDim = -1; | 
| 332 |         int   bestPos = 0; | 
| 333 |         unsigned int   bestlCount = 0; | 
| 334 |         unsigned int   bestrCount = 0; | 
| 335 |         for (int dim=0; dim<3; dim++)  | 
| 336 |         { | 
| 337 |           /* ignore zero sized dimensions */ | 
| 338 |           if (unlikely(mapping.invalid(dim))) | 
| 339 |             continue; | 
| 340 |            | 
| 341 |           /* test if this is a better dimension */ | 
| 342 |           if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { | 
| 343 |             bestDim = dim; | 
| 344 |             bestPos = vbestPos[dim]; | 
| 345 |             bestSAH = vbestSAH[dim]; | 
| 346 |             bestlCount = vbestlCount[dim]; | 
| 347 |             bestrCount = vbestrCount[dim]; | 
| 348 |           } | 
| 349 |         } | 
| 350 |         assert(bestSAH >= 0.0f); | 
| 351 |          | 
| 352 |         /* return invalid split if no split found */ | 
| 353 |         if (bestDim == -1)  | 
| 354 |           return SpatialBinSplit<BINS>(inf,-1,0,mapping); | 
| 355 |          | 
| 356 |         /* return best found split */ | 
| 357 |         return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping); | 
| 358 |       } | 
| 359 |        | 
| 360 |     private: | 
| 361 |       BBox3fa bounds[BINS][3];  //!< geometry bounds for each bin in each dimension | 
| 362 |       vuint4    numBegin[BINS];   //!< number of primitives starting in bin | 
| 363 |       vuint4    numEnd[BINS];     //!< number of primitives ending in bin | 
| 364 |     }; | 
| 365 |   } | 
| 366 | } | 
| 367 |  | 
| 368 |  |