| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "priminfo.h" | 
| 7 | #include "../../common/algorithms/parallel_reduce.h" | 
| 8 | #include "../../common/algorithms/parallel_partition.h" | 
| 9 |  | 
| 10 | namespace embree | 
| 11 | { | 
| 12 |   namespace isa | 
| 13 |   {  | 
| 14 |     /*! mapping into bins */ | 
| 15 |     template<size_t BINS> | 
| 16 |       struct BinMapping | 
| 17 |       { | 
| 18 |       public: | 
| 19 |         __forceinline BinMapping() {} | 
| 20 |          | 
| 21 |         /*! calculates the mapping */ | 
| 22 |         __forceinline BinMapping(size_t N, const BBox3fa& centBounds)  | 
| 23 |         { | 
| 24 |           num = min(a: BINS,b: size_t(4.0f + 0.05f*N)); | 
| 25 |           assert(num >= 1); | 
| 26 |           const vfloat4 eps = 1E-34f; | 
| 27 |           const vfloat4 diag = max(a: eps, b: (vfloat4) centBounds.size()); | 
| 28 |           scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f)); | 
| 29 |           ofs  = (vfloat4) centBounds.lower; | 
| 30 |         } | 
| 31 |  | 
| 32 |         /*! calculates the mapping */ | 
| 33 |         __forceinline BinMapping(const BBox3fa& centBounds)  | 
| 34 |         { | 
| 35 |           num = BINS; | 
| 36 |           const vfloat4 eps = 1E-34f; | 
| 37 |           const vfloat4 diag = max(a: eps, b: (vfloat4) centBounds.size()); | 
| 38 |           scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f)); | 
| 39 |           ofs  = (vfloat4) centBounds.lower; | 
| 40 |         } | 
| 41 |  | 
| 42 |         /*! calculates the mapping */ | 
| 43 |         template<typename PrimInfo> | 
| 44 |         __forceinline BinMapping(const PrimInfo& pinfo)  | 
| 45 |         { | 
| 46 |           const vfloat4 eps = 1E-34f; | 
| 47 |           num = min(a: BINS,b: size_t(4.0f + 0.05f*pinfo.size())); | 
| 48 |           const vfloat4 diag = max(a: eps,b: (vfloat4) pinfo.centBounds.size()); | 
| 49 |           scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f)); | 
| 50 |           ofs  = (vfloat4) pinfo.centBounds.lower; | 
| 51 |         } | 
| 52 |  | 
| 53 |         /*! returns number of bins */ | 
| 54 |         __forceinline size_t size() const { return num; } | 
| 55 |          | 
| 56 |         /*! slower but safe binning */ | 
| 57 |         __forceinline Vec3ia bin(const Vec3fa& p) const  | 
| 58 |         { | 
| 59 |           const vint4 i = floori(a: (vfloat4(p)-ofs)*scale); | 
| 60 | #if 1 | 
| 61 |           assert(i[0] >= 0 && (size_t)i[0] < num);  | 
| 62 |           assert(i[1] >= 0 && (size_t)i[1] < num); | 
| 63 |           assert(i[2] >= 0 && (size_t)i[2] < num); | 
| 64 |           return Vec3ia(i); | 
| 65 | #else | 
| 66 |           return Vec3ia(clamp(i,vint4(0),vint4(num-1))); | 
| 67 | #endif | 
| 68 |         } | 
| 69 |  | 
| 70 |         /*! faster but unsafe binning */ | 
| 71 |         __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const { | 
| 72 |           return Vec3ia(floori(a: (vfloat4(p)-ofs)*scale)); | 
| 73 |         } | 
| 74 |  | 
| 75 |         /*! faster but unsafe binning */ | 
| 76 |         template<typename PrimRef> | 
| 77 |         __forceinline Vec3ia bin_unsafe(const PrimRef& p) const { | 
| 78 |           return bin_unsafe(p.binCenter()); | 
| 79 |         } | 
| 80 |  | 
| 81 |         /*! faster but unsafe binning */ | 
| 82 |         template<typename PrimRef, typename BinBoundsAndCenter> | 
| 83 |         __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const { | 
| 84 |           return bin_unsafe(binBoundsAndCenter.binCenter(p)); | 
| 85 |         } | 
| 86 |  | 
| 87 |         template<typename PrimRef> | 
| 88 |         __forceinline bool bin_unsafe(const PrimRef& ref, | 
| 89 |                                       const vint4&   vSplitPos, | 
| 90 |                                       const vbool4&  splitDimMask) const // FIXME: rename to isLeft | 
| 91 |         { | 
| 92 |           return any(b: ((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask); | 
| 93 |         } | 
| 94 |         /*! calculates left spatial position of bin */ | 
| 95 |         __forceinline float pos(const size_t bin, const size_t dim) const { | 
| 96 |           return madd(a: float(bin),b: 1.0f / scale[dim],c: ofs[dim]); | 
| 97 |         } | 
| 98 |  | 
| 99 |         /*! returns true if the mapping is invalid in some dimension */ | 
| 100 |         __forceinline bool invalid(const size_t dim) const { | 
| 101 |           return scale[dim] == 0.0f; | 
| 102 |         } | 
| 103 |          | 
| 104 |         /*! stream output */ | 
| 105 |         friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) { | 
| 106 |           return cout << "BinMapping { num = "  << mapping.num << ", ofs = "  << mapping.ofs << ", scale = "  << mapping.scale << "}" ; | 
| 107 |         } | 
| 108 |          | 
| 109 |       public: | 
| 110 |         size_t num; | 
| 111 |         vfloat4 ofs,scale;        //!< linear function that maps to bin ID | 
| 112 |       }; | 
| 113 |      | 
| 114 |     /*! stores all information to perform some split */ | 
| 115 |     template<size_t BINS> | 
| 116 |       struct BinSplit | 
| 117 |       { | 
| 118 |         enum | 
| 119 |         { | 
| 120 |           SPLIT_OBJECT   = 0, | 
| 121 |           SPLIT_FALLBACK = 1, | 
| 122 |           SPLIT_ENFORCE  = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already | 
| 123 |           SPLIT_TEMPORAL = 2, | 
| 124 |           SPLIT_GEOMID   = 3, | 
| 125 |         }; | 
| 126 |  | 
| 127 |         /*! construct an invalid split by default */ | 
| 128 |         __forceinline BinSplit() | 
| 129 |           : sah(inf), dim(-1), pos(0), data(0) {} | 
| 130 |  | 
| 131 |         __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0) | 
| 132 |           : sah(sah), dim(dim), fpos(fpos), data(data) {} | 
| 133 |          | 
| 134 |         /*! constructs specified split */ | 
| 135 |         __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping) | 
| 136 |           : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {} | 
| 137 |          | 
| 138 |         /*! tests if this split is valid */ | 
| 139 |         __forceinline bool valid() const { return dim != -1; } | 
| 140 |          | 
| 141 |         /*! calculates surface area heuristic for performing the split */ | 
| 142 |         __forceinline float splitSAH() const { return sah; } | 
| 143 |          | 
| 144 |         /*! stream output */ | 
| 145 |         friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) { | 
| 146 |           return cout << "BinSplit { sah = "  << split.sah << ", dim = "  << split.dim << ", pos = "  << split.pos << "}" ; | 
| 147 |         } | 
| 148 |          | 
| 149 |       public: | 
| 150 |         float sah;                //!< SAH cost of the split | 
| 151 |         int dim;                  //!< split dimension | 
| 152 |         union { int pos; float fpos; };                  //!< bin index for splitting | 
| 153 |         unsigned int data;        //!< extra optional split data | 
| 154 |         BinMapping<BINS> mapping; //!< mapping into bins | 
| 155 |       }; | 
| 156 |      | 
| 157 |     /*! stores extended information about the split */ | 
| 158 |     template<typename BBox> | 
| 159 |       struct SplitInfoT | 
| 160 |     { | 
| 161 |  | 
| 162 |       __forceinline SplitInfoT () {} | 
| 163 |        | 
| 164 |       __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds) | 
| 165 | 	: leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {} | 
| 166 |        | 
| 167 |     public: | 
| 168 |       size_t leftCount,rightCount; | 
| 169 |       BBox leftBounds,rightBounds; | 
| 170 |     }; | 
| 171 |  | 
| 172 |     typedef SplitInfoT<BBox3fa> SplitInfo; | 
| 173 |     typedef SplitInfoT<LBBox3fa> SplitInfo2; | 
| 174 |      | 
| 175 |     /*! stores all binning information */ | 
| 176 |     template<size_t BINS, typename PrimRef, typename BBox> | 
| 177 |       struct __aligned(64) BinInfoT | 
| 178 |     {		   | 
| 179 |       typedef BinSplit<BINS> Split; | 
| 180 |       typedef vbool4 vbool; | 
| 181 |       typedef vint4 vint; | 
| 182 |       typedef vfloat4 vfloat; | 
| 183 |        | 
| 184 |       __forceinline BinInfoT() { | 
| 185 |       } | 
| 186 |        | 
| 187 |       __forceinline BinInfoT(EmptyTy) { | 
| 188 | 	clear(); | 
| 189 |       } | 
| 190 |  | 
| 191 |       /*! bin access function */ | 
| 192 |       __forceinline BBox &bounds(const size_t binID, const size_t dimID)             { return _bounds[binID][dimID]; } | 
| 193 |       __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; } | 
| 194 |  | 
| 195 |       __forceinline unsigned int &counts(const size_t binID, const size_t dimID)             { return _counts[binID][dimID]; } | 
| 196 |       __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; } | 
| 197 |  | 
| 198 |       __forceinline vuint4 &counts(const size_t binID)             { return _counts[binID]; } | 
| 199 |       __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; } | 
| 200 |  | 
| 201 |       /*! clears the bin info */ | 
| 202 |       __forceinline void clear()  | 
| 203 |       { | 
| 204 | 	for (size_t i=0; i<BINS; i++) { | 
| 205 | 	  bounds(i,0) = bounds(i,1) = bounds(i,2) = empty; | 
| 206 | 	  counts(i) = vuint4(zero); | 
| 207 | 	} | 
| 208 |       } | 
| 209 |        | 
| 210 |       /*! bins an array of primitives */ | 
| 211 |       __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping) | 
| 212 |       { | 
| 213 | 	if (unlikely(N == 0)) return; | 
| 214 | 	size_t i;  | 
| 215 | 	for (i=0; i<N-1; i+=2) | 
| 216 |         { | 
| 217 |           /*! map even and odd primitive to bin */ | 
| 218 |           BBox prim0; Vec3fa center0; | 
| 219 |           prims[i+0].binBoundsAndCenter(prim0,center0);  | 
| 220 |           const vint4 bin0 = (vint4)mapping.bin(center0);  | 
| 221 |            | 
| 222 |           BBox prim1; Vec3fa center1; | 
| 223 |           prims[i+1].binBoundsAndCenter(prim1,center1);  | 
| 224 |           const vint4 bin1 = (vint4)mapping.bin(center1);  | 
| 225 |            | 
| 226 |           /*! increase bounds for bins for even primitive */ | 
| 227 |           const unsigned int b00 = extract<0>(b: bin0); bounds(b00,0).extend(prim0);  | 
| 228 |           const unsigned int b01 = extract<1>(b: bin0); bounds(b01,1).extend(prim0);  | 
| 229 |           const unsigned int b02 = extract<2>(b: bin0); bounds(b02,2).extend(prim0);  | 
| 230 |           const unsigned int s0 = (unsigned int)prims[i+0].size(); | 
| 231 |           counts(b00,0)+=s0; | 
| 232 |           counts(b01,1)+=s0; | 
| 233 |           counts(b02,2)+=s0; | 
| 234 |  | 
| 235 |           /*! increase bounds of bins for odd primitive */ | 
| 236 |           const unsigned int b10 = extract<0>(b: bin1);  bounds(b10,0).extend(prim1);  | 
| 237 |           const unsigned int b11 = extract<1>(b: bin1);  bounds(b11,1).extend(prim1);  | 
| 238 |           const unsigned int b12 = extract<2>(b: bin1);  bounds(b12,2).extend(prim1);  | 
| 239 |           const unsigned int s1 = (unsigned int)prims[i+1].size(); | 
| 240 |           counts(b10,0)+=s1; | 
| 241 |           counts(b11,1)+=s1; | 
| 242 |           counts(b12,2)+=s1; | 
| 243 |         } | 
| 244 | 	/*! for uneven number of primitives */ | 
| 245 | 	if (i < N) | 
| 246 |         { | 
| 247 |           /*! map primitive to bin */ | 
| 248 |           BBox prim0; Vec3fa center0; | 
| 249 |           prims[i].binBoundsAndCenter(prim0,center0);  | 
| 250 |           const vint4 bin0 = (vint4)mapping.bin(center0);  | 
| 251 |            | 
| 252 |           /*! increase bounds of bins */ | 
| 253 |           const unsigned int s0 = (unsigned int)prims[i].size(); | 
| 254 |           const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); | 
| 255 |           const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); | 
| 256 |           const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); | 
| 257 |         } | 
| 258 |       } | 
| 259 |  | 
| 260 |       /*! bins an array of primitives */ | 
| 261 |       template<typename BinBoundsAndCenter> | 
| 262 |         __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) | 
| 263 |       { | 
| 264 | 	if (N == 0) return; | 
| 265 |          | 
| 266 | 	size_t i;  | 
| 267 | 	for (i=0; i<N-1; i+=2) | 
| 268 |         { | 
| 269 |           /*! map even and odd primitive to bin */ | 
| 270 |           BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);  | 
| 271 |           const vint4 bin0 = (vint4)mapping.bin(center0);  | 
| 272 |           BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);  | 
| 273 |           const vint4 bin1 = (vint4)mapping.bin(center1);  | 
| 274 |            | 
| 275 |           /*! increase bounds for bins for even primitive */ | 
| 276 |           const unsigned int s0 = prims[i+0].size(); | 
| 277 |           const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); | 
| 278 |           const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); | 
| 279 |           const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); | 
| 280 |            | 
| 281 |           /*! increase bounds of bins for odd primitive */ | 
| 282 |           const unsigned int s1 = prims[i+1].size(); | 
| 283 |           const int b10 = extract<0>(b: bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1); | 
| 284 |           const int b11 = extract<1>(b: bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1); | 
| 285 |           const int b12 = extract<2>(b: bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1); | 
| 286 |         } | 
| 287 | 	 | 
| 288 | 	/*! for uneven number of primitives */ | 
| 289 | 	if (i < N) | 
| 290 |         { | 
| 291 |           /*! map primitive to bin */ | 
| 292 |           BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);  | 
| 293 |           const vint4 bin0 = (vint4)mapping.bin(center0);  | 
| 294 |            | 
| 295 |           /*! increase bounds of bins */ | 
| 296 |           const unsigned int s0 = prims[i+0].size(); | 
| 297 |           const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); | 
| 298 |           const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); | 
| 299 |           const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); | 
| 300 |         } | 
| 301 |       } | 
| 302 |        | 
| 303 |       __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) { | 
| 304 | 	bin(prims+begin,end-begin,mapping); | 
| 305 |       } | 
| 306 |  | 
| 307 |       template<typename BinBoundsAndCenter> | 
| 308 |         __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { | 
| 309 | 	bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter); | 
| 310 |       } | 
| 311 |  | 
| 312 |       /*! merges in other binning information */ | 
| 313 |       __forceinline void merge (const BinInfoT& other, size_t numBins) | 
| 314 |       { | 
| 315 | 		 | 
| 316 | 	for (size_t i=0; i<numBins; i++)  | 
| 317 |         { | 
| 318 |           counts(i) += other.counts(i); | 
| 319 |           bounds(i,0).extend(other.bounds(i,0)); | 
| 320 |           bounds(i,1).extend(other.bounds(i,1)); | 
| 321 |           bounds(i,2).extend(other.bounds(i,2)); | 
| 322 |         } | 
| 323 |       } | 
| 324 |  | 
| 325 |       /*! reduces binning information */ | 
| 326 |       static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS) | 
| 327 |       { | 
| 328 |         BinInfoT c; | 
| 329 | 	for (size_t i=0; i<numBins; i++)  | 
| 330 |         { | 
| 331 |           c.counts(i) = a.counts(i)+b.counts(i); | 
| 332 |           c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0)); | 
| 333 |           c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1)); | 
| 334 |           c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2)); | 
| 335 |         } | 
| 336 |         return c; | 
| 337 |       } | 
| 338 |        | 
| 339 |       /*! finds the best split by scanning binning information */ | 
| 340 |       __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const | 
| 341 |       { | 
| 342 | 	/* sweep from right to left and compute parallel prefix of merged bounds */ | 
| 343 | 	vfloat4 rAreas[BINS]; | 
| 344 | 	vuint4 rCounts[BINS]; | 
| 345 | 	vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty; | 
| 346 | 	for (size_t i=mapping.size()-1; i>0; i--) | 
| 347 |         { | 
| 348 |           count += counts(i); | 
| 349 |           rCounts[i] = count; | 
| 350 |           bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx); | 
| 351 |           by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by); | 
| 352 |           bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz); | 
| 353 |           rAreas[i][3] = 0.0f; | 
| 354 |         } | 
| 355 | 	/* sweep from left to right and compute SAH */ | 
| 356 | 	vuint4 blocks_add = (1 << blocks_shift)-1; | 
| 357 | 	vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;  | 
| 358 | 	count = 0; bx = empty; by = empty; bz = empty; | 
| 359 | 	for (size_t i=1; i<mapping.size(); i++, ii+=1) | 
| 360 |         { | 
| 361 |           count += counts(i-1); | 
| 362 |           bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx); | 
| 363 |           by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by); | 
| 364 |           bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz); | 
| 365 |           const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); | 
| 366 |           const vfloat4 rArea = rAreas[i]; | 
| 367 |           const vuint4 lCount = (count     +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions. | 
| 368 |           const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); | 
| 369 |           const vfloat4 sah = madd(a: lArea,b: vfloat4(lCount),c: rArea*vfloat4(rCount)); | 
| 370 |           //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); | 
| 371 |  | 
| 372 |           vbestPos = select(m: sah < vbestSAH,t: ii ,f: vbestPos); | 
| 373 |           vbestSAH = select(m: sah < vbestSAH,t: sah,f: vbestSAH); | 
| 374 |         } | 
| 375 | 	 | 
| 376 | 	/* find best dimension */ | 
| 377 | 	float bestSAH = inf; | 
| 378 | 	int   bestDim = -1; | 
| 379 | 	int   bestPos = 0; | 
| 380 | 	for (int dim=0; dim<3; dim++)  | 
| 381 |         { | 
| 382 |           /* ignore zero sized dimensions */ | 
| 383 |           if (unlikely(mapping.invalid(dim))) | 
| 384 |             continue; | 
| 385 |            | 
| 386 |           /* test if this is a better dimension */ | 
| 387 |           if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { | 
| 388 |             bestDim = dim; | 
| 389 |             bestPos = vbestPos[dim]; | 
| 390 |             bestSAH = vbestSAH[dim]; | 
| 391 |           } | 
| 392 |         } | 
| 393 | 	return Split(bestSAH,bestDim,bestPos,mapping); | 
| 394 |       } | 
| 395 |        | 
| 396 |       /*! calculates extended split information */ | 
| 397 |       __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const  | 
| 398 |       { | 
| 399 | 	if (split.dim == -1) { | 
| 400 | 	  new (&info) SplitInfoT<BBox>(0,empty,0,empty); | 
| 401 | 	  return; | 
| 402 | 	} | 
| 403 | 	 | 
| 404 | 	size_t leftCount = 0; | 
| 405 | 	BBox leftBounds = empty; | 
| 406 | 	for (size_t i=0; i<(size_t)split.pos; i++) { | 
| 407 | 	  leftCount += counts(i,split.dim); | 
| 408 | 	  leftBounds.extend(bounds(i,split.dim)); | 
| 409 | 	} | 
| 410 | 	size_t rightCount = 0; | 
| 411 | 	BBox rightBounds = empty; | 
| 412 | 	for (size_t i=split.pos; i<mapping.size(); i++) { | 
| 413 | 	  rightCount += counts(i,split.dim); | 
| 414 | 	  rightBounds.extend(bounds(i,split.dim)); | 
| 415 | 	} | 
| 416 | 	new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds); | 
| 417 |       } | 
| 418 |  | 
| 419 |       /*! gets the number of primitives left of the split */ | 
| 420 |       __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const | 
| 421 |       { | 
| 422 |         if (unlikely(split.dim == -1)) return -1; | 
| 423 |  | 
| 424 |         size_t leftCount = 0; | 
| 425 |         for (size_t i = 0; i < (size_t)split.pos; i++) { | 
| 426 |           leftCount += counts(i, split.dim); | 
| 427 |         } | 
| 428 |         return leftCount; | 
| 429 |       } | 
| 430 |  | 
| 431 |       /*! gets the number of primitives right of the split */ | 
| 432 |       __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const | 
| 433 |       { | 
| 434 |         if (unlikely(split.dim == -1)) return -1; | 
| 435 |  | 
| 436 |         size_t rightCount = 0; | 
| 437 |         for (size_t i = (size_t)split.pos; i<mapping.size(); i++) { | 
| 438 |           rightCount += counts(i, split.dim); | 
| 439 |         } | 
| 440 |         return rightCount; | 
| 441 |       } | 
| 442 |  | 
| 443 |     private: | 
| 444 |       BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension | 
| 445 |       vuint4   _counts[BINS];    //!< counts number of primitives that map into the bins | 
| 446 |     }; | 
| 447 |   } | 
| 448 |  | 
| 449 |   template<typename BinInfoT, typename BinMapping, typename PrimRef> | 
| 450 |   __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping) | 
| 451 |   { | 
| 452 |     if (likely(end-begin < parallelThreshold)) { | 
| 453 |       binner.bin(prims,begin,end,mapping); | 
| 454 |     } else { | 
| 455 |       binner = parallel_reduce(begin,end,blockSize,binner, | 
| 456 |                               [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, | 
| 457 |                               [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); | 
| 458 |     } | 
| 459 |   } | 
| 460 |  | 
| 461 |   template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> | 
| 462 |   __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) | 
| 463 |   { | 
| 464 |     if (likely(end-begin < parallelThreshold)) { | 
| 465 |       binner.bin(prims,begin,end,mapping,binBoundsAndCenter); | 
| 466 |     } else { | 
| 467 |       binner = parallel_reduce(begin,end,blockSize,binner, | 
| 468 |                               [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, | 
| 469 |                               [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); | 
| 470 |     } | 
| 471 |   } | 
| 472 |  | 
| 473 |   template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef> | 
| 474 |   __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping) | 
| 475 |   { | 
| 476 |     if (!parallel) { | 
| 477 |       binner.bin(prims,begin,end,mapping); | 
| 478 |     } else { | 
| 479 |       binner = parallel_reduce(begin,end,blockSize,binner, | 
| 480 |                               [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, | 
| 481 |                               [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); | 
| 482 |     } | 
| 483 |   } | 
| 484 |  | 
| 485 |   template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> | 
| 486 |   __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) | 
| 487 |   { | 
| 488 |     if (!parallel) { | 
| 489 |       binner.bin(prims,begin,end,mapping,binBoundsAndCenter); | 
| 490 |     } else { | 
| 491 |       binner = parallel_reduce(begin,end,blockSize,binner, | 
| 492 |                               [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, | 
| 493 |                               [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); | 
| 494 |     } | 
| 495 |   } | 
| 496 | } | 
| 497 |  |