| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #define MBLUR_NUM_TEMPORAL_BINS 2 | 
| 7 | #define MBLUR_NUM_OBJECT_BINS   32 | 
| 8 |  | 
| 9 | #include "../bvh/bvh.h" | 
| 10 | #include "../common/primref_mb.h" | 
| 11 | #include "heuristic_binning_array_aligned.h" | 
| 12 | #include "heuristic_timesplit_array.h" | 
| 13 |  | 
| 14 | namespace embree | 
| 15 | { | 
| 16 |   namespace isa | 
| 17 |   { | 
| 18 |     template<typename T> | 
| 19 |       struct SharedVector | 
| 20 |       { | 
| 21 |         __forceinline SharedVector() {} | 
| 22 |  | 
| 23 |         __forceinline SharedVector(T* ptr, size_t refCount = 1) | 
| 24 |           : prims(ptr), refCount(refCount) {} | 
| 25 |  | 
| 26 |         __forceinline void incRef() { | 
| 27 |           refCount++; | 
| 28 |         } | 
| 29 |  | 
| 30 |         __forceinline void decRef() | 
| 31 |         { | 
| 32 |           if (--refCount == 0) | 
| 33 |             delete prims; | 
| 34 |         } | 
| 35 |  | 
| 36 |         T* prims; | 
| 37 |         size_t refCount; | 
| 38 |       }; | 
| 39 |  | 
| 40 |     template<typename BuildRecord, int MAX_BRANCHING_FACTOR> | 
| 41 |       struct LocalChildListT | 
| 42 |       { | 
| 43 |         typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector; | 
| 44 |  | 
| 45 |         __forceinline LocalChildListT (const BuildRecord& record) | 
| 46 |           : numChildren(1), numSharedPrimVecs(1) | 
| 47 |         { | 
| 48 |           /* the local root will be freed in the ancestor where it was created (thus refCount is 2) */ | 
| 49 |           children[0] = record; | 
| 50 |           primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2); | 
| 51 |         } | 
| 52 |  | 
| 53 |         __forceinline ~LocalChildListT() | 
| 54 |         { | 
| 55 |           for (size_t i = 0; i < numChildren; i++) | 
| 56 |             primvecs[i]->decRef(); | 
| 57 |         } | 
| 58 |  | 
| 59 |         __forceinline BuildRecord& operator[] ( const size_t i ) { | 
| 60 |           return children[i]; | 
| 61 |         } | 
| 62 |  | 
| 63 |         __forceinline size_t size() const { | 
| 64 |           return numChildren; | 
| 65 |         } | 
| 66 |  | 
| 67 |         __forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector) | 
| 68 |         { | 
| 69 |           SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild]; | 
| 70 |           if (lrecord.prims.prims == bsharedPrimVec->prims) { | 
| 71 |             primvecs[bestChild] = bsharedPrimVec; | 
| 72 |             bsharedPrimVec->incRef(); | 
| 73 |           } | 
| 74 |           else { | 
| 75 |             primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims); | 
| 76 |           } | 
| 77 |  | 
| 78 |           if (rrecord.prims.prims == bsharedPrimVec->prims) { | 
| 79 |             primvecs[numChildren] = bsharedPrimVec; | 
| 80 |             bsharedPrimVec->incRef(); | 
| 81 |           } | 
| 82 |           else { | 
| 83 |             primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims); | 
| 84 |           } | 
| 85 |           bsharedPrimVec->decRef(); | 
| 86 |           new_vector.release(); | 
| 87 |  | 
| 88 |           children[bestChild] = lrecord; | 
| 89 |           children[numChildren] = rrecord; | 
| 90 |           numChildren++; | 
| 91 |         } | 
| 92 |  | 
| 93 |       public: | 
| 94 |         array_t<BuildRecord,MAX_BRANCHING_FACTOR> children; | 
| 95 |         array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs; | 
| 96 |         size_t numChildren; | 
| 97 |  | 
| 98 |         array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs; | 
| 99 |         size_t numSharedPrimVecs; | 
| 100 |       }; | 
| 101 |  | 
| 102 |     template<typename Mesh> | 
| 103 |       struct RecalculatePrimRef | 
| 104 |       { | 
| 105 |         Scene* scene; | 
| 106 |  | 
| 107 |         __forceinline RecalculatePrimRef (Scene* scene) | 
| 108 |           : scene(scene) {} | 
| 109 |  | 
| 110 |         __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const | 
| 111 |         { | 
| 112 |           const unsigned geomID = prim.geomID(); | 
| 113 |           const unsigned primID = prim.primID(); | 
| 114 |           const Mesh* mesh = scene->get<Mesh>(geomID); | 
| 115 |           const LBBox3fa lbounds = mesh->linearBounds(primID, time_range); | 
| 116 |           const range<int> tbounds = mesh->timeSegmentRange(time_range); | 
| 117 |           return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); | 
| 118 |         } | 
| 119 |  | 
| 120 |         // __noinline is workaround for ICC16 bug under MacOSX | 
| 121 |         __noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const | 
| 122 |         { | 
| 123 |           const unsigned geomID = prim.geomID(); | 
| 124 |           const unsigned primID = prim.primID(); | 
| 125 |           const Mesh* mesh = scene->get<Mesh>(geomID); | 
| 126 |           const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range); | 
| 127 |           const range<int> tbounds = mesh->timeSegmentRange(time_range); | 
| 128 |           return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); | 
| 129 |         } | 
| 130 |  | 
| 131 |         __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const { | 
| 132 |           return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range); | 
| 133 |         } | 
| 134 |  | 
| 135 |         // __noinline is workaround for ICC16 bug under MacOSX | 
| 136 |         __noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const { | 
| 137 |           return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range); | 
| 138 |         } | 
| 139 |       }; | 
| 140 |  | 
| 141 |     struct VirtualRecalculatePrimRef | 
| 142 |     { | 
| 143 |       Scene* scene; | 
| 144 |        | 
| 145 |       __forceinline VirtualRecalculatePrimRef (Scene* scene) | 
| 146 |         : scene(scene) {} | 
| 147 |        | 
| 148 |       __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const | 
| 149 |       { | 
| 150 |         const unsigned geomID = prim.geomID(); | 
| 151 |         const unsigned primID = prim.primID(); | 
| 152 |         const Geometry* mesh = scene->get(i: geomID); | 
| 153 |         const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range); | 
| 154 |         const range<int> tbounds = mesh->timeSegmentRange(range: time_range); | 
| 155 |         return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); | 
| 156 |       } | 
| 157 |        | 
| 158 |       __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const | 
| 159 |       { | 
| 160 |         const unsigned geomID = prim.geomID(); | 
| 161 |         const unsigned primID = prim.primID(); | 
| 162 |         const Geometry* mesh = scene->get(i: geomID); | 
| 163 |         const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range); | 
| 164 |         const range<int> tbounds = mesh->timeSegmentRange(range: time_range); | 
| 165 |         return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); | 
| 166 |       } | 
| 167 |        | 
| 168 |       __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const { | 
| 169 |         return scene->get(i: prim.geomID())->vlinearBounds(primID: prim.primID(), time_range); | 
| 170 |       } | 
| 171 |        | 
| 172 |       __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const { | 
| 173 |         return scene->get(i: prim.geomID())->vlinearBounds(space, primID: prim.primID(), time_range); | 
| 174 |       } | 
| 175 |     }; | 
| 176 |  | 
| 177 |     struct BVHBuilderMSMBlur | 
| 178 |     { | 
| 179 |       /*! settings for msmblur builder */ | 
| 180 |       struct Settings | 
| 181 |       { | 
| 182 |         /*! default settings */ | 
| 183 |         Settings () | 
| 184 |         : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8), | 
| 185 |           travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false), | 
| 186 |           singleThreadThreshold(1024) {} | 
| 187 |  | 
| 188 |  | 
| 189 |         Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold) | 
| 190 |         : branchingFactor(2), maxDepth(32), logBlockSize(bsr(v: sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), | 
| 191 |           travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold) | 
| 192 |         { | 
| 193 |           minLeafSize = min(a: minLeafSize,b: maxLeafSize); | 
| 194 |         } | 
| 195 |  | 
| 196 |       public: | 
| 197 |         size_t branchingFactor;  //!< branching factor of BVH to build | 
| 198 |         size_t maxDepth;         //!< maximum depth of BVH to build | 
| 199 |         size_t logBlockSize;     //!< log2 of blocksize for SAH heuristic | 
| 200 |         size_t minLeafSize;      //!< minimum size of a leaf | 
| 201 |         size_t maxLeafSize;      //!< maximum size of a leaf | 
| 202 |         float travCost;          //!< estimated cost of one traversal step | 
| 203 |         float intCost;           //!< estimated cost of one primitive intersection | 
| 204 |         bool singleLeafTimeSegment; //!< split time to single time range | 
| 205 |         size_t singleThreadThreshold; //!< threshold when we switch to single threaded build | 
| 206 |       }; | 
| 207 |  | 
| 208 |       struct BuildRecord | 
| 209 |       { | 
| 210 |       public: | 
| 211 | 	__forceinline BuildRecord () {} | 
| 212 |  | 
| 213 |         __forceinline BuildRecord (size_t depth) | 
| 214 |           : depth(depth) {} | 
| 215 |  | 
| 216 |         __forceinline BuildRecord (const SetMB& prims, size_t depth) | 
| 217 |           : depth(depth), prims(prims) {} | 
| 218 |  | 
| 219 |         __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) { | 
| 220 |           return a.prims.size() < b.prims.size(); | 
| 221 |         } | 
| 222 |  | 
| 223 |         __forceinline size_t size() const { | 
| 224 |           return prims.size(); | 
| 225 |         } | 
| 226 |  | 
| 227 |       public: | 
| 228 | 	size_t depth;                     //!< Depth of the root of this subtree. | 
| 229 | 	SetMB prims;                      //!< The list of primitives. | 
| 230 |       }; | 
| 231 |  | 
| 232 |       struct BuildRecordSplit : public BuildRecord | 
| 233 |       { | 
| 234 |         __forceinline BuildRecordSplit () {} | 
| 235 |  | 
| 236 |         __forceinline BuildRecordSplit (size_t depth)  | 
| 237 |           : BuildRecord(depth) {} | 
| 238 |  | 
| 239 |         __forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split) | 
| 240 |           : BuildRecord(record), split(split) {} | 
| 241 |          | 
| 242 |         BinSplit<MBLUR_NUM_OBJECT_BINS> split; | 
| 243 |       }; | 
| 244 |  | 
| 245 |       template< | 
| 246 |         typename NodeRef, | 
| 247 |         typename RecalculatePrimRef, | 
| 248 |         typename Allocator, | 
| 249 |         typename CreateAllocFunc, | 
| 250 |         typename CreateNodeFunc, | 
| 251 |         typename SetNodeFunc, | 
| 252 |         typename CreateLeafFunc, | 
| 253 |         typename ProgressMonitor> | 
| 254 |  | 
| 255 |         class BuilderT | 
| 256 |         { | 
| 257 |           ALIGNED_CLASS_(16); | 
| 258 |           static const size_t MAX_BRANCHING_FACTOR = 16;       //!< maximum supported BVH branching factor	   | 
| 259 |           static const size_t MIN_LARGE_LEAF_LEVELS = 8;        //!< create balanced tree if we are that many levels before the maximum tree depth | 
| 260 |  | 
| 261 |           typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D; | 
| 262 |           typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split; | 
| 263 |           typedef mvector<PrimRefMB>* PrimRefVector; | 
| 264 |           typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector; | 
| 265 |           typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList; | 
| 266 |           typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit; | 
| 267 |  | 
| 268 |         public: | 
| 269 |  | 
| 270 |           BuilderT (MemoryMonitorInterface* device, | 
| 271 |                     const RecalculatePrimRef recalculatePrimRef, | 
| 272 |                     const CreateAllocFunc createAlloc, | 
| 273 |                     const CreateNodeFunc createNode, | 
| 274 |                     const SetNodeFunc setNode, | 
| 275 |                     const CreateLeafFunc createLeaf, | 
| 276 |                     const ProgressMonitor progressMonitor, | 
| 277 |                     const Settings& settings) | 
| 278 |             : cfg(settings), | 
| 279 |             heuristicObjectSplit(), | 
| 280 |             heuristicTemporalSplit(device, recalculatePrimRef), | 
| 281 |             recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf), | 
| 282 |             progressMonitor(progressMonitor) | 
| 283 |           { | 
| 284 |             if (cfg.branchingFactor > MAX_BRANCHING_FACTOR) | 
| 285 |               throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large" ); | 
| 286 |           } | 
| 287 |  | 
| 288 |           /*! finds the best split */ | 
| 289 |           const Split find(const SetMB& set) | 
| 290 |           { | 
| 291 |             /* first try standard object split */ | 
| 292 |             const Split object_split = heuristicObjectSplit.find(set,logBlockSize: cfg.logBlockSize); | 
| 293 |             const float object_split_sah = object_split.splitSAH(); | 
| 294 |  | 
| 295 |             /* test temporal splits only when object split was bad */ | 
| 296 |             const float leaf_sah = set.leafSAH(block_shift: cfg.logBlockSize); | 
| 297 |             if (object_split_sah < 0.50f*leaf_sah) | 
| 298 |               return object_split; | 
| 299 |  | 
| 300 |             /* do temporal splits only if the time range is big enough */ | 
| 301 |             if (set.time_range.size() > 1.01f/float(set.max_num_time_segments)) | 
| 302 |             { | 
| 303 |               const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize); | 
| 304 |               const float temporal_split_sah = temporal_split.splitSAH(); | 
| 305 |  | 
| 306 |               /* take temporal split if it improved SAH */ | 
| 307 |               if (temporal_split_sah < object_split_sah) | 
| 308 |                 return temporal_split; | 
| 309 |             } | 
| 310 |  | 
| 311 |             return object_split; | 
| 312 |           } | 
| 313 |  | 
| 314 |           /*! array partitioning */ | 
| 315 |           __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset) | 
| 316 |           { | 
| 317 |             /* perform object split */ | 
| 318 |             if (likely(split.data == Split::SPLIT_OBJECT)) { | 
| 319 |               heuristicObjectSplit.split(split,set,lset,rset); | 
| 320 |             } | 
| 321 |             /* perform temporal split */ | 
| 322 |             else if (likely(split.data == Split::SPLIT_TEMPORAL)) { | 
| 323 |               return heuristicTemporalSplit.split(split,set,lset,rset); | 
| 324 |             } | 
| 325 |             /* perform fallback split */ | 
| 326 |             else if (unlikely(split.data == Split::SPLIT_FALLBACK)) { | 
| 327 |               set.deterministic_order(); | 
| 328 |               splitFallback(set,lset,rset); | 
| 329 |             } | 
| 330 |             /* split by geometry */ | 
| 331 |             else if (unlikely(split.data == Split::SPLIT_GEOMID)) { | 
| 332 |               set.deterministic_order(); | 
| 333 |               splitByGeometry(set,lset,rset); | 
| 334 |             } | 
| 335 |             else | 
| 336 |               assert(false); | 
| 337 |  | 
| 338 |             return std::unique_ptr<mvector<PrimRefMB>>(); | 
| 339 |           } | 
| 340 |  | 
| 341 |           /*! finds the best fallback split */ | 
| 342 |           __noinline Split findFallback(const SetMB& set) | 
| 343 |           { | 
| 344 |             /* split if primitives are not from same geometry */ | 
| 345 |             if (!sameGeometry(set)) | 
| 346 |               return Split(0.0f,Split::SPLIT_GEOMID); | 
| 347 |              | 
| 348 |             /* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */ | 
| 349 |             if (cfg.singleLeafTimeSegment) | 
| 350 |             { | 
| 351 |               /* test if one primitive has more than one time segment in time range, if so split time */ | 
| 352 |               for (size_t i=set.begin(); i<set.end(); i++) | 
| 353 |               { | 
| 354 |                 const PrimRefMB& prim = (*set.prims)[i]; | 
| 355 |                 const range<int> itime_range = prim.timeSegmentRange(range: set.time_range); | 
| 356 |                 const int localTimeSegments = itime_range.size(); | 
| 357 |                 assert(localTimeSegments > 0); | 
| 358 |                 if (localTimeSegments > 1) { | 
| 359 |                   const int icenter = (itime_range.begin() + itime_range.end())/2; | 
| 360 |                   const float splitTime = prim.timeStep(i: icenter); | 
| 361 |                   return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime); | 
| 362 |                 } | 
| 363 |               } | 
| 364 |             }         | 
| 365 |  | 
| 366 |             /* otherwise return fallback split */ | 
| 367 |             return Split(0.0f,Split::SPLIT_FALLBACK); | 
| 368 |           } | 
| 369 |  | 
| 370 |           /*! performs fallback split */ | 
| 371 |           void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset) | 
| 372 |           { | 
| 373 |             mvector<PrimRefMB>& prims = *set.prims; | 
| 374 |  | 
| 375 |             const size_t begin = set.begin(); | 
| 376 |             const size_t end   = set.end(); | 
| 377 |             const size_t center = (begin + end + 1) / 2; | 
| 378 |  | 
| 379 |             PrimInfoMB linfo = empty; | 
| 380 |             for (size_t i=begin; i<center; i++) | 
| 381 |               linfo.add_primref(prim: prims[i]); | 
| 382 |  | 
| 383 |             PrimInfoMB rinfo = empty; | 
| 384 |             for (size_t i=center; i<end; i++) | 
| 385 |               rinfo.add_primref(prim: prims[i]); | 
| 386 |  | 
| 387 |             new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range); | 
| 388 |             new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end  ),set.time_range); | 
| 389 |           } | 
| 390 |  | 
| 391 |           /*! checks if all primitives are from the same geometry */ | 
| 392 |           __forceinline bool sameGeometry(const SetMB& set) | 
| 393 |           { | 
| 394 |             if (set.size() == 0) return true; | 
| 395 |             mvector<PrimRefMB>& prims = *set.prims; | 
| 396 |             const size_t begin = set.begin(); | 
| 397 |             const size_t end   = set.end(); | 
| 398 |             unsigned int firstGeomID = prims[begin].geomID(); | 
| 399 |             for (size_t i=begin+1; i<end; i++) { | 
| 400 |               if (prims[i].geomID() != firstGeomID){ | 
| 401 |                 return false; | 
| 402 |               } | 
| 403 |             } | 
| 404 |             return true; | 
| 405 |           } | 
| 406 |  | 
| 407 |           /* split by geometry ID */ | 
| 408 |           void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset) | 
| 409 |           { | 
| 410 |             assert(set.size() > 1); | 
| 411 |  | 
| 412 |             mvector<PrimRefMB>& prims = *set.prims; | 
| 413 |             const size_t begin = set.begin(); | 
| 414 |             const size_t end   = set.end(); | 
| 415 |              | 
| 416 |             PrimInfoMB left(empty); | 
| 417 |             PrimInfoMB right(empty); | 
| 418 |             unsigned int geomID = prims[begin].geomID(); | 
| 419 |             size_t center = serial_partitioning(prims.data(),begin,end,left,right, | 
| 420 |                                                 [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; }, | 
| 421 |                                                 [ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); }); | 
| 422 |              | 
| 423 |             new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range); | 
| 424 |             new (&rset) SetMB(right,set.prims,range<size_t>(center,end  ),set.time_range); | 
| 425 |           } | 
| 426 |  | 
| 427 |           const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc) | 
| 428 |           { | 
| 429 |             /* this should never occur but is a fatal error */ | 
| 430 |             if (in.depth > cfg.maxDepth) | 
| 431 |               throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached" ); | 
| 432 |  | 
| 433 |             /* replace already found split by fallback split */ | 
| 434 |             const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(set: in.prims)); | 
| 435 |  | 
| 436 |             /* special case when directly creating leaf without any splits that could shrink time_range */ | 
| 437 |             bool force_split = false; | 
| 438 |             if (current.depth == 1 && current.size() > 0) | 
| 439 |             { | 
| 440 |               BBox1f c = empty; | 
| 441 |               BBox1f p = current.prims.time_range; | 
| 442 |               for (size_t i=current.prims.begin(); i<current.prims.end(); i++) { | 
| 443 |                 mvector<PrimRefMB>& prims = *current.prims.prims; | 
| 444 |                 c.extend(other: prims[i].time_range); | 
| 445 |               } | 
| 446 |                | 
| 447 |               force_split = c.lower > p.lower || c.upper < p.upper; | 
| 448 |             } | 
| 449 | 	     | 
| 450 |             /* create leaf for few primitives */ | 
| 451 |             if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split) | 
| 452 |               return createLeaf(current,alloc); | 
| 453 | 	   | 
| 454 |             /* fill all children by always splitting the largest one */ | 
| 455 |             bool hasTimeSplits = false; | 
| 456 |             NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; | 
| 457 |             LocalChildListSplit children(current); | 
| 458 |  | 
| 459 |             do { | 
| 460 |               /* find best child with largest bounding box area */ | 
| 461 |               size_t bestChild = -1; | 
| 462 |               size_t bestSize = 0; | 
| 463 |               for (size_t i=0; i<children.size(); i++) | 
| 464 |               { | 
| 465 |                 /* ignore leaves as they cannot get split */ | 
| 466 |                 if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split) | 
| 467 |                   continue; | 
| 468 |  | 
| 469 |                 force_split = false; | 
| 470 |                  | 
| 471 |                 /* remember child with largest size */ | 
| 472 |                 if (children[i].size() > bestSize) { | 
| 473 |                   bestSize = children[i].size(); | 
| 474 |                   bestChild = i; | 
| 475 |                 } | 
| 476 |               } | 
| 477 |               if (bestChild == -1) break; | 
| 478 |  | 
| 479 |               /* perform best found split */ | 
| 480 |               BuildRecordSplit& brecord = children[bestChild]; | 
| 481 |               BuildRecordSplit lrecord(current.depth+1); | 
| 482 |               BuildRecordSplit rrecord(current.depth+1); | 
| 483 |               std::unique_ptr<mvector<PrimRefMB>> new_vector = split(split: brecord.split,set: brecord.prims,lset&: lrecord.prims,rset&: rrecord.prims); | 
| 484 |               hasTimeSplits |= new_vector != nullptr; | 
| 485 |  | 
| 486 |               /* find new splits */ | 
| 487 |               lrecord.split = findFallback(set: lrecord.prims); | 
| 488 |               rrecord.split = findFallback(set: rrecord.prims); | 
| 489 |               children.split(bestChild,lrecord,rrecord,new_vector: std::move(new_vector)); | 
| 490 |  | 
| 491 |             } while (children.size() < cfg.branchingFactor); | 
| 492 |  | 
| 493 |             /* detect time_ranges that have shrunken */ | 
| 494 |             for (size_t i=0; i<children.size(); i++) { | 
| 495 |               const BBox1f c = children[i].prims.time_range; | 
| 496 |               const BBox1f p = in.prims.time_range; | 
| 497 |               hasTimeSplits |= c.lower > p.lower || c.upper < p.upper; | 
| 498 |             } | 
| 499 |  | 
| 500 |             /* create node */ | 
| 501 |             auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits); | 
| 502 |  | 
| 503 |             /* recurse into each child and perform reduction */ | 
| 504 |             LBBox3fa gbounds = empty; | 
| 505 |             for (size_t i=0; i<children.size(); i++) { | 
| 506 |               values[i] = createLargeLeaf(in: children[i],alloc); | 
| 507 |               gbounds.extend(other: values[i].lbounds); | 
| 508 |             } | 
| 509 |  | 
| 510 |             setNode(current,children.children.data(),node,values,children.numChildren); | 
| 511 |  | 
| 512 |             /* calculate geometry bounds of this node */ | 
| 513 |             if (hasTimeSplits) | 
| 514 |               return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range); | 
| 515 |             else | 
| 516 |               return NodeRecordMB4D(node,gbounds,current.prims.time_range); | 
| 517 |           } | 
| 518 |  | 
| 519 |           const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel) | 
| 520 |           { | 
| 521 |             /* get thread local allocator */ | 
| 522 |             if (!alloc) | 
| 523 |               alloc = createAlloc(); | 
| 524 |  | 
| 525 |             /* call memory monitor function to signal progress */ | 
| 526 |             if (toplevel && current.size() <= cfg.singleThreadThreshold) | 
| 527 |               progressMonitor(current.size()); | 
| 528 |  | 
| 529 |             /*! find best split */ | 
| 530 |             const Split csplit = find(set: current.prims); | 
| 531 |  | 
| 532 |             /*! compute leaf and split cost */ | 
| 533 |             const float leafSAH  = cfg.intCost*current.prims.leafSAH(block_shift: cfg.logBlockSize); | 
| 534 |             const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH(); | 
| 535 |             assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0))); | 
| 536 |  | 
| 537 |             /*! create a leaf node when threshold reached or SAH tells us to stop */ | 
| 538 |             if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) { | 
| 539 |               current.prims.deterministic_order(); | 
| 540 |               return createLargeLeaf(in: current,alloc); | 
| 541 |             } | 
| 542 |  | 
| 543 |             /*! perform initial split */ | 
| 544 |             SetMB lprims,rprims; | 
| 545 |             std::unique_ptr<mvector<PrimRefMB>> new_vector = split(split: csplit,set: current.prims,lset&: lprims,rset&: rprims); | 
| 546 |             bool hasTimeSplits = new_vector != nullptr; | 
| 547 |             NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; | 
| 548 |             LocalChildList children(current); | 
| 549 |             { | 
| 550 |               BuildRecord lrecord(lprims,current.depth+1); | 
| 551 |               BuildRecord rrecord(rprims,current.depth+1); | 
| 552 |               children.split(bestChild: 0,lrecord,rrecord,new_vector: std::move(new_vector)); | 
| 553 |             } | 
| 554 |  | 
| 555 |             /*! split until node is full or SAH tells us to stop */ | 
| 556 |             while (children.size() < cfg.branchingFactor)  | 
| 557 |             { | 
| 558 |               /*! find best child to split */ | 
| 559 |               float bestArea = neg_inf; | 
| 560 |               ssize_t bestChild = -1; | 
| 561 |               for (size_t i=0; i<children.size(); i++) | 
| 562 |               { | 
| 563 |                 if (children[i].size() <= cfg.minLeafSize) continue; | 
| 564 |                 if (expectedApproxHalfArea(box: children[i].prims.geomBounds) > bestArea) { | 
| 565 |                   bestChild = i; bestArea = expectedApproxHalfArea(box: children[i].prims.geomBounds); | 
| 566 |                 } | 
| 567 |               } | 
| 568 |               if (bestChild == -1) break; | 
| 569 |  | 
| 570 |               /* perform split */ | 
| 571 |               BuildRecord& brecord = children[bestChild]; | 
| 572 |               BuildRecord lrecord(current.depth+1); | 
| 573 |               BuildRecord rrecord(current.depth+1); | 
| 574 |               Split csplit = find(set: brecord.prims); | 
| 575 |               std::unique_ptr<mvector<PrimRefMB>> new_vector = split(split: csplit,set: brecord.prims,lset&: lrecord.prims,rset&: rrecord.prims); | 
| 576 |               hasTimeSplits |= new_vector != nullptr; | 
| 577 |               children.split(bestChild,lrecord,rrecord,new_vector: std::move(new_vector)); | 
| 578 |             } | 
| 579 |  | 
| 580 |             /* detect time_ranges that have shrunken */ | 
| 581 |             for (size_t i=0; i<children.size(); i++) { | 
| 582 |               const BBox1f c = children[i].prims.time_range; | 
| 583 |               const BBox1f p = current.prims.time_range; | 
| 584 |               hasTimeSplits |= c.lower > p.lower || c.upper < p.upper; | 
| 585 |             } | 
| 586 |  | 
| 587 |             /* sort buildrecords for simpler shadow ray traversal */ | 
| 588 |             //std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !! | 
| 589 |  | 
| 590 |             /*! create an inner node */ | 
| 591 |             auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits); | 
| 592 |             LBBox3fa gbounds = empty; | 
| 593 |  | 
| 594 |             /* spawn tasks */ | 
| 595 |             if (unlikely(current.size() > cfg.singleThreadThreshold)) | 
| 596 |             { | 
| 597 |               /*! parallel_for is faster than spawing sub-tasks */ | 
| 598 |               parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) { | 
| 599 |                   for (size_t i=r.begin(); i<r.end(); i++) { | 
| 600 |                     values[i] = recurse(current: children[i],alloc: nullptr,toplevel: true); | 
| 601 |                     _mm_mfence(); // to allow non-temporal stores during build | 
| 602 |                   } | 
| 603 |                 }); | 
| 604 |  | 
| 605 |               /*! merge bounding boxes */ | 
| 606 |               for (size_t i=0; i<children.size(); i++) | 
| 607 |                 gbounds.extend(other: values[i].lbounds); | 
| 608 |             } | 
| 609 |             /* recurse into each child */ | 
| 610 |             else | 
| 611 |             { | 
| 612 |               //for (size_t i=0; i<children.size(); i++) | 
| 613 |               for (ssize_t i=children.size()-1; i>=0; i--) { | 
| 614 |                 values[i] = recurse(current: children[i],alloc,toplevel: false); | 
| 615 |                 gbounds.extend(other: values[i].lbounds); | 
| 616 |               } | 
| 617 |             } | 
| 618 |  | 
| 619 |             setNode(current,children.children.data(),node,values,children.numChildren); | 
| 620 |  | 
| 621 |             /* calculate geometry bounds of this node */ | 
| 622 |             if (unlikely(hasTimeSplits)) | 
| 623 |               return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range); | 
| 624 |             else | 
| 625 |               return NodeRecordMB4D(node,gbounds,current.prims.time_range); | 
| 626 |           } | 
| 627 |  | 
| 628 |           /*! builder entry function */ | 
| 629 |           __forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo) | 
| 630 |           { | 
| 631 |             const SetMB set(pinfo,&prims); | 
| 632 |             auto ret = recurse(current: BuildRecord(set,1),alloc: nullptr,toplevel: true); | 
| 633 |             _mm_mfence(); // to allow non-temporal stores during build | 
| 634 |             return ret; | 
| 635 |           } | 
| 636 |  | 
| 637 |         private: | 
| 638 |           Settings cfg; | 
| 639 |           HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit; | 
| 640 |           HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit; | 
| 641 |           const RecalculatePrimRef recalculatePrimRef; | 
| 642 |           const CreateAllocFunc createAlloc; | 
| 643 |           const CreateNodeFunc createNode; | 
| 644 |           const SetNodeFunc setNode; | 
| 645 |           const CreateLeafFunc createLeaf; | 
| 646 |           const ProgressMonitor progressMonitor; | 
| 647 |         }; | 
| 648 |  | 
| 649 |       template<typename NodeRef, | 
| 650 |         typename RecalculatePrimRef, | 
| 651 |         typename CreateAllocFunc, | 
| 652 |         typename CreateNodeFunc, | 
| 653 |         typename SetNodeFunc, | 
| 654 |         typename CreateLeafFunc, | 
| 655 |         typename ProgressMonitorFunc> | 
| 656 |  | 
| 657 |         static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims, | 
| 658 |                                                       const PrimInfoMB& pinfo, | 
| 659 |                                                       MemoryMonitorInterface* device, | 
| 660 |                                                       const RecalculatePrimRef recalculatePrimRef, | 
| 661 |                                                       const CreateAllocFunc createAlloc, | 
| 662 |                                                       const CreateNodeFunc createNode, | 
| 663 |                                                       const SetNodeFunc setNode, | 
| 664 |                                                       const CreateLeafFunc createLeaf, | 
| 665 |                                                       const ProgressMonitorFunc progressMonitor, | 
| 666 |                                                       const Settings& settings) | 
| 667 |       { | 
| 668 |           typedef BuilderT< | 
| 669 |             NodeRef, | 
| 670 |             RecalculatePrimRef, | 
| 671 |             decltype(createAlloc()), | 
| 672 |             CreateAllocFunc, | 
| 673 |             CreateNodeFunc, | 
| 674 |             SetNodeFunc, | 
| 675 |             CreateLeafFunc, | 
| 676 |             ProgressMonitorFunc> Builder; | 
| 677 |  | 
| 678 |           Builder builder(device, | 
| 679 |                           recalculatePrimRef, | 
| 680 |                           createAlloc, | 
| 681 |                           createNode, | 
| 682 |                           setNode, | 
| 683 |                           createLeaf, | 
| 684 |                           progressMonitor, | 
| 685 |                           settings); | 
| 686 |  | 
| 687 |  | 
| 688 |           return builder(prims,pinfo); | 
| 689 |         } | 
| 690 |     }; | 
| 691 |   } | 
| 692 | } | 
| 693 |  |