| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "heuristic_binning.h" | 
| 7 | #include "heuristic_spatial.h" | 
| 8 |  | 
| 9 | namespace embree | 
| 10 | { | 
| 11 |   namespace isa | 
| 12 |   {  | 
| 13 | #if 0 | 
| 14 | #define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f | 
| 15 | #define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f | 
| 16 | #define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f | 
| 17 | #else | 
| 18 | #define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f | 
| 19 | #define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f | 
| 20 | #define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f | 
| 21 | #endif | 
| 22 |  | 
| 23 |     struct  : public CentGeomBBox3fa, public extended_range<size_t> | 
| 24 |     { | 
| 25 |       __forceinline () { | 
| 26 |       } | 
| 27 |  | 
| 28 |       __forceinline (EmptyTy) | 
| 29 |         : CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {} | 
| 30 |  | 
| 31 |       __forceinline (size_t begin, size_t end, size_t ext_end, const CentGeomBBox3fa& centGeomBounds)  | 
| 32 |         : CentGeomBBox3fa(centGeomBounds), extended_range<size_t>(begin,end,ext_end) {} | 
| 33 |        | 
| 34 |       __forceinline float () const {  | 
| 35 | 	return expectedApproxHalfArea(box: geomBounds)*float(size());  | 
| 36 |       } | 
| 37 |        | 
| 38 |       __forceinline float (size_t block_shift) const {  | 
| 39 | 	return expectedApproxHalfArea(box: geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift); | 
| 40 |       } | 
| 41 |     }; | 
| 42 |  | 
| 43 |     template<typename ObjectSplit, typename SpatialSplit> | 
| 44 |       struct Split2 | 
| 45 |       { | 
| 46 |         __forceinline Split2 () {} | 
| 47 |          | 
| 48 |         __forceinline Split2 (const Split2& other)  | 
| 49 |         { | 
| 50 |           spatial = other.spatial; | 
| 51 |           sah = other.sah; | 
| 52 |           if (spatial) spatialSplit() = other.spatialSplit(); | 
| 53 |           else         objectSplit()  = other.objectSplit(); | 
| 54 |         } | 
| 55 |          | 
| 56 |         __forceinline Split2& operator= (const Split2& other)  | 
| 57 |         { | 
| 58 |           spatial = other.spatial; | 
| 59 |           sah = other.sah; | 
| 60 |           if (spatial) spatialSplit() = other.spatialSplit(); | 
| 61 |           else         objectSplit()  = other.objectSplit(); | 
| 62 |           return *this; | 
| 63 |         } | 
| 64 |            | 
| 65 |           __forceinline     ObjectSplit&  objectSplit()        { return *(      ObjectSplit*)data; } | 
| 66 |         __forceinline const ObjectSplit&  objectSplit() const  { return *(const ObjectSplit*)data; } | 
| 67 |          | 
| 68 |         __forceinline       SpatialSplit& spatialSplit()       { return *(      SpatialSplit*)data; } | 
| 69 |         __forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; } | 
| 70 |          | 
| 71 |         __forceinline Split2 (const ObjectSplit& objectSplit, float sah) | 
| 72 |           : spatial(false), sah(sah)  | 
| 73 |         { | 
| 74 |           new (data) ObjectSplit(objectSplit); | 
| 75 |         } | 
| 76 |          | 
| 77 |         __forceinline Split2 (const SpatialSplit& spatialSplit, float sah) | 
| 78 |           : spatial(true), sah(sah)  | 
| 79 |         { | 
| 80 |           new (data) SpatialSplit(spatialSplit); | 
| 81 |         } | 
| 82 |          | 
| 83 |         __forceinline float splitSAH() const {  | 
| 84 |           return sah;  | 
| 85 |         } | 
| 86 |          | 
| 87 |         __forceinline bool valid() const { | 
| 88 |           return sah < float(inf); | 
| 89 |         } | 
| 90 |          | 
| 91 |       public: | 
| 92 |         __aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)]; | 
| 93 |         bool spatial; | 
| 94 |         float sah; | 
| 95 |       }; | 
| 96 |      | 
| 97 |     /*! Performs standard object binning */ | 
| 98 |     template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS, size_t SPATIAL_BINS> | 
| 99 |       struct HeuristicArraySpatialSAH | 
| 100 |       { | 
| 101 |         typedef BinSplit<OBJECT_BINS> ObjectSplit; | 
| 102 |         typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner; | 
| 103 |  | 
| 104 |         typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit; | 
| 105 |         typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner; | 
| 106 |  | 
| 107 |         //typedef extended_range<size_t> Set; | 
| 108 |         typedef Split2<ObjectSplit,SpatialSplit> Split; | 
| 109 |          | 
| 110 |         static const size_t PARALLEL_THRESHOLD = 3*1024; | 
| 111 |         static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; | 
| 112 |         static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; | 
| 113 |  | 
| 114 |         static const size_t MOVE_STEP_SIZE = 64; | 
| 115 |         static const size_t CREATE_SPLITS_STEP_SIZE = 64; | 
| 116 |  | 
| 117 |         __forceinline HeuristicArraySpatialSAH () | 
| 118 |           : prims0(nullptr) {} | 
| 119 |          | 
| 120 |         /*! remember prim array */ | 
| 121 |         __forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info) | 
| 122 |           : prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {} | 
| 123 |  | 
| 124 |  | 
| 125 |         /*! compute extended ranges */ | 
| 126 |         __noinline void (const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight) | 
| 127 |         { | 
| 128 |           assert(set.ext_range_size() > 0); | 
| 129 |           const float left_factor           = (float)lweight / (lweight + rweight); | 
| 130 |           const size_t ext_range_size       = set.ext_range_size(); | 
| 131 |           const size_t left_ext_range_size  = min(a: (size_t)(floorf(x: left_factor * ext_range_size)),b: ext_range_size); | 
| 132 |           const size_t right_ext_range_size = ext_range_size - left_ext_range_size; | 
| 133 |           lset.set_ext_range(lset.end() + left_ext_range_size); | 
| 134 |           rset.set_ext_range(rset.end() + right_ext_range_size); | 
| 135 |         } | 
| 136 |  | 
| 137 |         /*! move ranges */ | 
| 138 |         __noinline void (const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset) | 
| 139 |         { | 
| 140 |           const size_t left_ext_range_size = lset.ext_range_size(); | 
| 141 |           const size_t right_size = rset.size(); | 
| 142 |  | 
| 143 |           /* has the left child an extended range? */ | 
| 144 |           if (left_ext_range_size > 0) | 
| 145 |           { | 
| 146 |             /* left extended range smaller than right range ? */ | 
| 147 |             if (left_ext_range_size < right_size) | 
| 148 |             { | 
| 149 |               /* only move a small part of the beginning of the right range to the end */ | 
| 150 |               parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {                   | 
| 151 |                   for (size_t i=r.begin(); i<r.end(); i++) | 
| 152 |                     prims0[i+right_size] = prims0[i]; | 
| 153 |                 }); | 
| 154 |             } | 
| 155 |             else | 
| 156 |             { | 
| 157 |               /* no overlap, move entire right range to new location, can be made fully parallel */ | 
| 158 |               parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE,  [&](const range<size_t>& r) { | 
| 159 |                   for (size_t i=r.begin(); i<r.end(); i++) | 
| 160 |                     prims0[i+left_ext_range_size] = prims0[i]; | 
| 161 |                 }); | 
| 162 |             } | 
| 163 |             /* update right range */ | 
| 164 |             assert(rset.ext_end() + left_ext_range_size == set.ext_end()); | 
| 165 |             rset.move_right(plus: left_ext_range_size); | 
| 166 |           } | 
| 167 |         } | 
| 168 |  | 
| 169 |         /*! finds the best split */ | 
| 170 |         const Split (const PrimInfoExtRange& set, const size_t logBlockSize) | 
| 171 |         { | 
| 172 |           SplitInfo oinfo; | 
| 173 |           const ObjectSplit object_split = object_find(set,logBlockSize,info&: oinfo); | 
| 174 |           const float object_split_sah = object_split.splitSAH(); | 
| 175 |  | 
| 176 |           if (unlikely(set.has_ext_range())) | 
| 177 |           { | 
| 178 |             const BBox3fa overlap = intersect(a: oinfo.leftBounds, b: oinfo.rightBounds); | 
| 179 |              | 
| 180 |             /* do only spatial splits if the child bounds overlap */ | 
| 181 |             if (safeArea(b: overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(b: root_info.geomBounds) && | 
| 182 |                 safeArea(b: overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(b: set.geomBounds)) | 
| 183 |             {               | 
| 184 |               const SpatialSplit spatial_split = spatial_find(set, logBlockSize); | 
| 185 |               const float spatial_split_sah = spatial_split.splitSAH(); | 
| 186 |  | 
| 187 |               /* valid spatial split, better SAH and number of splits do not exceed extended range */ | 
| 188 |               if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah && | 
| 189 |                   spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size()) | 
| 190 |               {           | 
| 191 |                 return Split(spatial_split,spatial_split_sah); | 
| 192 |               } | 
| 193 |             } | 
| 194 |           } | 
| 195 |  | 
| 196 |           return Split(object_split,object_split_sah); | 
| 197 |         } | 
| 198 |  | 
| 199 |         /*! finds the best object split */ | 
| 200 |         __forceinline const ObjectSplit (const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info) | 
| 201 |         { | 
| 202 |           if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info); | 
| 203 |           else                                 return parallel_object_find  (set,logBlockSize,info); | 
| 204 |         } | 
| 205 |  | 
| 206 |         /*! finds the best object split */ | 
| 207 |         __noinline const ObjectSplit (const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info) | 
| 208 |         { | 
| 209 |           ObjectBinner binner(empty);  | 
| 210 |           const BinMapping<OBJECT_BINS> mapping(set); | 
| 211 |           binner.bin(prims0,set.begin(),set.end(),mapping); | 
| 212 |           ObjectSplit s = binner.best(mapping,logBlockSize); | 
| 213 |           binner.getSplitInfo(mapping, s, info); | 
| 214 |           return s; | 
| 215 |         } | 
| 216 |  | 
| 217 |         /*! finds the best split */ | 
| 218 |         __noinline const ObjectSplit (const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info) | 
| 219 |         { | 
| 220 |           ObjectBinner binner(empty); | 
| 221 |           const BinMapping<OBJECT_BINS> mapping(set); | 
| 222 |           const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround | 
| 223 |           binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner, | 
| 224 |                                    [&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; }, | 
| 225 |                                    [&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; }); | 
| 226 |           ObjectSplit s = binner.best(mapping,logBlockSize); | 
| 227 |           binner.getSplitInfo(mapping, s, info); | 
| 228 |           return s; | 
| 229 |         } | 
| 230 |  | 
| 231 |         /*! finds the best spatial split */ | 
| 232 |         __forceinline const SpatialSplit (const PrimInfoExtRange& set, const size_t logBlockSize) | 
| 233 |         { | 
| 234 |           if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize); | 
| 235 |           else                                 return parallel_spatial_find  (set, logBlockSize); | 
| 236 |         } | 
| 237 |  | 
| 238 |         /*! finds the best spatial split */ | 
| 239 |         __noinline const SpatialSplit (const PrimInfoExtRange& set, const size_t logBlockSize) | 
| 240 |         { | 
| 241 |           SpatialBinner binner(empty);  | 
| 242 |           const SpatialBinMapping<SPATIAL_BINS> mapping(set); | 
| 243 |           binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping); | 
| 244 |           /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/ | 
| 245 |           return binner.best(mapping,logBlockSize); //,set.ext_size()); | 
| 246 |         } | 
| 247 |  | 
| 248 |         __noinline const SpatialSplit (const PrimInfoExtRange& set, const size_t logBlockSize) | 
| 249 |         { | 
| 250 |           SpatialBinner binner(empty); | 
| 251 |           const SpatialBinMapping<SPATIAL_BINS> mapping(set); | 
| 252 |           const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround | 
| 253 |           binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner, | 
| 254 |                                    [&] (const range<size_t>& r) -> SpatialBinner {  | 
| 255 |                                      SpatialBinner binner(empty);  | 
| 256 |                                      binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping); | 
| 257 |                                      return binner; }, | 
| 258 |                                    [&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); }); | 
| 259 |           /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/ | 
| 260 |           return binner.best(mapping,logBlockSize); //,set.ext_size()); | 
| 261 |         } | 
| 262 |  | 
| 263 |  | 
| 264 |         /*! subdivides primitives based on a spatial split */ | 
| 265 |         __noinline void (PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping) | 
| 266 |         { | 
| 267 |           assert(set.has_ext_range()); | 
| 268 |           const size_t max_ext_range_size = set.ext_range_size(); | 
| 269 |           const size_t ext_range_start = set.end(); | 
| 270 |  | 
| 271 |           /* atomic counter for number of primref splits */ | 
| 272 |           std::atomic<size_t> ext_elements; | 
| 273 |           ext_elements.store(i: 0); | 
| 274 |            | 
| 275 |           const float fpos = split.mapping.pos(split.pos,split.dim); | 
| 276 |          | 
| 277 |           const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS; | 
| 278 |  | 
| 279 |           parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) { | 
| 280 |               for (size_t i=r.begin();i<r.end();i++) | 
| 281 |               { | 
| 282 |                 const unsigned int splits = prims0[i].geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); | 
| 283 |  | 
| 284 |                 if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */ | 
| 285 |  | 
| 286 |                 //int bin0 = split.mapping.bin(prims0[i].lower)[split.dim]; | 
| 287 |                 //int bin1 = split.mapping.bin(prims0[i].upper)[split.dim]; | 
| 288 |                 //if (unlikely(bin0 < split.pos && bin1 >= split.pos)) | 
| 289 |  | 
| 290 |                 if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos)) | 
| 291 |                 { | 
| 292 |                   assert(splits > 1); | 
| 293 |  | 
| 294 |                   PrimRef left,right; | 
| 295 |                   const auto splitter = splitterFactory(prims0[i]); | 
| 296 |                   splitter(prims0[i],split.dim,fpos,left,right); | 
| 297 |                  | 
| 298 |                   // no empty splits | 
| 299 |                   if (unlikely(left.bounds().empty() || right.bounds().empty())) continue; | 
| 300 |                  | 
| 301 |                   left.lower.u  = (left.lower.u  & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); | 
| 302 |                   right.lower.u = (right.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); | 
| 303 |  | 
| 304 |                   const size_t ID = ext_elements.fetch_add(i: 1); | 
| 305 |  | 
| 306 |                   /* break if the number of subdivided elements are greater than the maximum allowed size */ | 
| 307 |                   if (unlikely(ID >= max_ext_range_size))  | 
| 308 |                     break; | 
| 309 |  | 
| 310 |                   /* only write within the correct bounds */ | 
| 311 |                   assert(ID < max_ext_range_size); | 
| 312 |                   prims0[i] = left; | 
| 313 |                   prims0[ext_range_start+ID] = right;      | 
| 314 |                 } | 
| 315 |               } | 
| 316 |             }); | 
| 317 |  | 
| 318 |           const size_t numExtElements = min(a: max_ext_range_size,b: ext_elements.load());           | 
| 319 |           assert(set.end()+numExtElements<=set.ext_end()); | 
| 320 |           set._end += numExtElements; | 
| 321 |         } | 
| 322 |          | 
| 323 |         /*! array partitioning */ | 
| 324 |         void (const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)  | 
| 325 |         { | 
| 326 |           PrimInfoExtRange set = set_i; | 
| 327 |            | 
| 328 |           /* valid split */ | 
| 329 |           if (unlikely(!split.valid())) { | 
| 330 |             deterministic_order(set); | 
| 331 |             return splitFallback(set,lset,rset); | 
| 332 |           } | 
| 333 |  | 
| 334 |           std::pair<size_t,size_t> ext_weights(0,0); | 
| 335 |  | 
| 336 |           if (unlikely(split.spatial)) | 
| 337 |           { | 
| 338 |             create_spatial_splits(set,split: split.spatialSplit(), mapping: split.spatialSplit().mapping);  | 
| 339 |  | 
| 340 |             /* spatial split */ | 
| 341 |             if (likely(set.size() < PARALLEL_THRESHOLD))  | 
| 342 |               ext_weights = sequential_spatial_split(split: split.spatialSplit(),set,lset,rset); | 
| 343 |             else | 
| 344 |               ext_weights = parallel_spatial_split(split: split.spatialSplit(),set,lset,rset); | 
| 345 |           } | 
| 346 |           else | 
| 347 |           { | 
| 348 |             /* object split */ | 
| 349 |             if (likely(set.size() < PARALLEL_THRESHOLD))  | 
| 350 |               ext_weights = sequential_object_split(split: split.objectSplit(),set,lset,rset); | 
| 351 |             else | 
| 352 |               ext_weights = parallel_object_split(split: split.objectSplit(),set,lset,rset); | 
| 353 |           } | 
| 354 |  | 
| 355 |           /* if we have an extended range, set extended child ranges and move right split range */ | 
| 356 |           if (unlikely(set.has_ext_range()))  | 
| 357 |           { | 
| 358 |             setExtentedRanges(set,lset,rset,lweight: ext_weights.first,rweight: ext_weights.second); | 
| 359 |             moveExtentedRange(set,lset,rset); | 
| 360 |           } | 
| 361 |         } | 
| 362 |  | 
| 363 |         /*! array partitioning */ | 
| 364 |         std::pair<size_t,size_t> (const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)  | 
| 365 |         { | 
| 366 |           const size_t begin = set.begin(); | 
| 367 |           const size_t end   = set.end(); | 
| 368 |           PrimInfo local_left(empty); | 
| 369 |           PrimInfo local_right(empty); | 
| 370 |           const unsigned int splitPos = split.pos; | 
| 371 |           const unsigned int splitDim = split.dim; | 
| 372 |           const unsigned int splitDimMask = (unsigned int)1 << splitDim;  | 
| 373 |  | 
| 374 |           const typename ObjectBinner::vint vSplitPos(splitPos); | 
| 375 |           const typename ObjectBinner::vbool vSplitMask(splitDimMask); | 
| 376 |           size_t center = serial_partitioning(prims0, | 
| 377 |                                               begin,end,local_left,local_right, | 
| 378 |                                               [&] (const PrimRef& ref) {  | 
| 379 |                                                 return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); | 
| 380 |                                               }, | 
| 381 |                                               [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });           | 
| 382 |           const size_t left_weight  = local_left.end; | 
| 383 |           const size_t right_weight = local_right.end; | 
| 384 |  | 
| 385 |           new (&lset) PrimInfoExtRange(begin,center,center,local_left); | 
| 386 |           new (&rset) PrimInfoExtRange(center,end,end,local_right); | 
| 387 |  | 
| 388 |           assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f); | 
| 389 |           assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f); | 
| 390 |           return std::pair<size_t,size_t>(left_weight,right_weight); | 
| 391 |         } | 
| 392 |  | 
| 393 |  | 
| 394 |         /*! array partitioning */ | 
| 395 |         __noinline std::pair<size_t,size_t> (const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)  | 
| 396 |         { | 
| 397 |           const size_t begin = set.begin(); | 
| 398 |           const size_t end   = set.end(); | 
| 399 |           PrimInfo local_left(empty); | 
| 400 |           PrimInfo local_right(empty); | 
| 401 |           const unsigned int splitPos = split.pos; | 
| 402 |           const unsigned int splitDim = split.dim; | 
| 403 |           const unsigned int splitDimMask = (unsigned int)1 << splitDim;  | 
| 404 |  | 
| 405 |           /* init spatial mapping */ | 
| 406 |           const SpatialBinMapping<SPATIAL_BINS> &mapping = split.mapping; | 
| 407 |           const vint4 vSplitPos(splitPos); | 
| 408 |           const vbool4 vSplitMask( (int)splitDimMask ); | 
| 409 |  | 
| 410 |           size_t center = serial_partitioning(prims0, | 
| 411 |                                               begin,end,local_left,local_right, | 
| 412 |                                               [&] (const PrimRef& ref) { | 
| 413 |                                                 const Vec3fa c = ref.bounds().center(); | 
| 414 |                                                 return any(b: ((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); | 
| 415 |                                               }, | 
| 416 |                                               [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); }); | 
| 417 |  | 
| 418 |           const size_t left_weight  = local_left.end; | 
| 419 |           const size_t right_weight = local_right.end; | 
| 420 |            | 
| 421 |           new (&lset) PrimInfoExtRange(begin,center,center,local_left); | 
| 422 |           new (&rset) PrimInfoExtRange(center,end,end,local_right); | 
| 423 |           assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f); | 
| 424 |           assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f); | 
| 425 |           return std::pair<size_t,size_t>(left_weight,right_weight); | 
| 426 |         } | 
| 427 |  | 
| 428 |  | 
| 429 |          | 
| 430 |         /*! array partitioning */ | 
| 431 |         __noinline std::pair<size_t,size_t> (const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) | 
| 432 |         { | 
| 433 |           const size_t begin = set.begin(); | 
| 434 |           const size_t end   = set.end(); | 
| 435 |           PrimInfo left(empty); | 
| 436 |           PrimInfo right(empty); | 
| 437 |           const unsigned int splitPos = split.pos; | 
| 438 |           const unsigned int splitDim = split.dim; | 
| 439 |           const unsigned int splitDimMask = (unsigned int)1 << splitDim; | 
| 440 |  | 
| 441 |           const typename ObjectBinner::vint vSplitPos(splitPos); | 
| 442 |           const typename ObjectBinner::vbool vSplitMask(splitDimMask); | 
| 443 |           auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }; | 
| 444 |  | 
| 445 |           const size_t center = parallel_partitioning( | 
| 446 |             prims0,begin,end,EmptyTy(),left,right,isLeft, | 
| 447 |             [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); }, | 
| 448 |             [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(other: pinfo1); }, | 
| 449 |             PARALLEL_PARTITION_BLOCK_SIZE); | 
| 450 |  | 
| 451 |           const size_t left_weight  = left.end; | 
| 452 |           const size_t right_weight = right.end; | 
| 453 |            | 
| 454 |           left.begin  = begin;  left.end  = center;  | 
| 455 |           right.begin = center; right.end = end; | 
| 456 |            | 
| 457 |           new (&lset) PrimInfoExtRange(begin,center,center,left); | 
| 458 |           new (&rset) PrimInfoExtRange(center,end,end,right); | 
| 459 |  | 
| 460 |           assert(area(left.geomBounds) >= 0.0f); | 
| 461 |           assert(area(right.geomBounds) >= 0.0f); | 
| 462 |           return std::pair<size_t,size_t>(left_weight,right_weight); | 
| 463 |         } | 
| 464 |  | 
| 465 |         /*! array partitioning */ | 
| 466 |         __noinline std::pair<size_t,size_t> (const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) | 
| 467 |         { | 
| 468 |           const size_t begin = set.begin(); | 
| 469 |           const size_t end   = set.end(); | 
| 470 |           PrimInfo left(empty); | 
| 471 |           PrimInfo right(empty); | 
| 472 |           const unsigned int splitPos = split.pos; | 
| 473 |           const unsigned int splitDim = split.dim; | 
| 474 |           const unsigned int splitDimMask = (unsigned int)1 << splitDim; | 
| 475 |  | 
| 476 |           /* init spatial mapping */ | 
| 477 |           const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping; | 
| 478 |           const vint4 vSplitPos(splitPos); | 
| 479 |           const vbool4 vSplitMask( (int)splitDimMask ); | 
| 480 |  | 
| 481 |           auto isLeft = [&] (const PrimRef &ref) {  | 
| 482 |             const Vec3fa c = ref.bounds().center(); | 
| 483 |             return any(b: ((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); }; | 
| 484 |  | 
| 485 |           const size_t center = parallel_partitioning( | 
| 486 |             prims0,begin,end,EmptyTy(),left,right,isLeft, | 
| 487 |             [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); }, | 
| 488 |             [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(other: pinfo1); }, | 
| 489 |             PARALLEL_PARTITION_BLOCK_SIZE); | 
| 490 |  | 
| 491 |           const size_t left_weight  = left.end; | 
| 492 |           const size_t right_weight = right.end; | 
| 493 |            | 
| 494 |           left.begin  = begin;  left.end  = center;  | 
| 495 |           right.begin = center; right.end = end; | 
| 496 |            | 
| 497 |           new (&lset) PrimInfoExtRange(begin,center,center,left); | 
| 498 |           new (&rset) PrimInfoExtRange(center,end,end,right); | 
| 499 |  | 
| 500 |           assert(area(left.geomBounds) >= 0.0f); | 
| 501 |           assert(area(right.geomBounds) >= 0.0f); | 
| 502 |           return std::pair<size_t,size_t>(left_weight,right_weight); | 
| 503 |         } | 
| 504 |  | 
| 505 |         void (const PrimInfoExtRange& set)  | 
| 506 |         { | 
| 507 |           /* required as parallel partition destroys original primitive order */ | 
| 508 |           std::sort(&prims0[set.begin()],&prims0[set.end()]); | 
| 509 |         } | 
| 510 |  | 
| 511 |         void (const PrimInfoExtRange& set,  | 
| 512 |                            PrimInfoExtRange& lset,  | 
| 513 |                            PrimInfoExtRange& rset) | 
| 514 |         { | 
| 515 |           const size_t begin = set.begin(); | 
| 516 |           const size_t end   = set.end(); | 
| 517 |           const size_t center = (begin + end)/2; | 
| 518 |  | 
| 519 |           PrimInfo left(empty); | 
| 520 |           for (size_t i=begin; i<center; i++) { | 
| 521 |             left.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); | 
| 522 |           } | 
| 523 |           const size_t lweight = left.end; | 
| 524 |            | 
| 525 |           PrimInfo right(empty); | 
| 526 |           for (size_t i=center; i<end; i++) { | 
| 527 |             right.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));	 | 
| 528 |           } | 
| 529 |           const size_t rweight = right.end; | 
| 530 |  | 
| 531 |           new (&lset) PrimInfoExtRange(begin,center,center,left); | 
| 532 |           new (&rset) PrimInfoExtRange(center,end,end,right); | 
| 533 |  | 
| 534 |           /* if we have an extended range */ | 
| 535 |           if (set.has_ext_range()) { | 
| 536 |             setExtentedRanges(set,lset,rset,lweight,rweight); | 
| 537 |             moveExtentedRange(set,lset,rset); | 
| 538 |           } | 
| 539 |         } | 
| 540 |          | 
| 541 |       private: | 
| 542 |         PrimRef* const prims0; | 
| 543 |         const PrimitiveSplitterFactory& splitterFactory; | 
| 544 |         const CentGeomBBox3fa& root_info; | 
| 545 |       }; | 
| 546 |   } | 
| 547 | } | 
| 548 |  |