| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "heuristic_binning_array_aligned.h" | 
| 7 | #include "heuristic_spatial_array.h" | 
| 8 | #include "heuristic_openmerge_array.h" | 
| 9 |  | 
| 10 | #if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL | 
| 11 | #  define NUM_OBJECT_BINS 16 | 
| 12 | #  define NUM_SPATIAL_BINS 16 | 
| 13 | #else | 
| 14 | #  define NUM_OBJECT_BINS 32 | 
| 15 | #  define NUM_SPATIAL_BINS 16 | 
| 16 | #endif | 
| 17 |  | 
| 18 | namespace embree | 
| 19 | { | 
| 20 |   namespace isa | 
| 21 |   { | 
| 22 |     MAYBE_UNUSED static const float travCost = 1.0f; | 
| 23 |     MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024; | 
| 24 |  | 
| 25 |     struct GeneralBVHBuilder | 
| 26 |     { | 
| 27 |       static const size_t MAX_BRANCHING_FACTOR = 16;       //!< maximum supported BVH branching factor       | 
| 28 |       static const size_t MIN_LARGE_LEAF_LEVELS = 8;       //!< create balanced tree of we are that many levels before the maximum tree depth | 
| 29 |        | 
| 30 |  | 
| 31 |       /*! settings for SAH builder */ | 
| 32 |       struct Settings | 
| 33 |       { | 
| 34 |         /*! default settings */ | 
| 35 |         Settings () | 
| 36 |         : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), | 
| 37 |           travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {} | 
| 38 |  | 
| 39 |         /*! initialize settings from API settings */ | 
| 40 |         Settings (const RTCBuildArguments& settings) | 
| 41 |         : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), | 
| 42 |           travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) | 
| 43 |         { | 
| 44 |           if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor; | 
| 45 |           if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth          )) maxDepth        = settings.maxDepth; | 
| 46 |           if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize      )) logBlockSize    = bsr(v: settings.sahBlockSize); | 
| 47 |           if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize       )) minLeafSize     = settings.minLeafSize; | 
| 48 |           if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize       )) maxLeafSize     = settings.maxLeafSize; | 
| 49 |           if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost     )) travCost        = settings.traversalCost; | 
| 50 |           if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost  )) intCost         = settings.intersectionCost; | 
| 51 |  | 
| 52 |           minLeafSize = min(a: minLeafSize,b: maxLeafSize); | 
| 53 |         } | 
| 54 |  | 
| 55 |         Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf) | 
| 56 |         : branchingFactor(2), maxDepth(32), logBlockSize(bsr(v: sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), | 
| 57 |           travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc) | 
| 58 |         { | 
| 59 |           minLeafSize = min(a: minLeafSize,b: maxLeafSize); | 
| 60 |         } | 
| 61 |  | 
| 62 |       public: | 
| 63 |         size_t branchingFactor;  //!< branching factor of BVH to build | 
| 64 |         size_t maxDepth;         //!< maximum depth of BVH to build | 
| 65 |         size_t logBlockSize;     //!< log2 of blocksize for SAH heuristic | 
| 66 |         size_t minLeafSize;      //!< minimum size of a leaf | 
| 67 |         size_t maxLeafSize;      //!< maximum size of a leaf | 
| 68 |         float travCost;          //!< estimated cost of one traversal step | 
| 69 |         float intCost;           //!< estimated cost of one primitive intersection | 
| 70 |         size_t singleThreadThreshold; //!< threshold when we switch to single threaded build | 
| 71 |         size_t primrefarrayalloc;  //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished | 
| 72 |       }; | 
| 73 |  | 
| 74 |       /*! recursive state of builder */ | 
| 75 |       template<typename Set, typename Split> | 
| 76 |         struct BuildRecordT | 
| 77 |         { | 
| 78 |         public: | 
| 79 |           __forceinline BuildRecordT () {} | 
| 80 |  | 
| 81 |           __forceinline BuildRecordT (size_t depth) | 
| 82 |             : depth(depth), alloc_barrier(false), prims(empty) {} | 
| 83 |  | 
| 84 |           __forceinline BuildRecordT (size_t depth, const Set& prims) | 
| 85 |             : depth(depth), alloc_barrier(false), prims(prims) {} | 
| 86 |  | 
| 87 |           __forceinline BBox3fa bounds() const { return prims.geomBounds; } | 
| 88 |  | 
| 89 |           __forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); } | 
| 90 |           __forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size();  } | 
| 91 |  | 
| 92 |           __forceinline size_t size() const { return prims.size(); } | 
| 93 |  | 
| 94 |         public: | 
| 95 |           size_t depth;       //!< Depth of the root of this subtree. | 
| 96 |           bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes | 
| 97 |           Set prims;          //!< The list of primitives. | 
| 98 |         }; | 
| 99 |  | 
| 100 |       template<typename PrimRef, typename Set> | 
| 101 |       struct DefaultCanCreateLeafFunc | 
| 102 |       { | 
| 103 |         __forceinline bool operator()(const PrimRef*, const Set&) const { return true; } | 
| 104 |       }; | 
| 105 |  | 
| 106 |       template<typename PrimRef, typename Set> | 
| 107 |       struct DefaultCanCreateLeafSplitFunc | 
| 108 |       { | 
| 109 |         __forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { } | 
| 110 |       }; | 
| 111 |  | 
| 112 |       template<typename BuildRecord, | 
| 113 |         typename Heuristic, | 
| 114 |         typename Set, | 
| 115 |         typename PrimRef, | 
| 116 |         typename ReductionTy, | 
| 117 |         typename Allocator, | 
| 118 |         typename CreateAllocFunc, | 
| 119 |         typename CreateNodeFunc, | 
| 120 |         typename UpdateNodeFunc, | 
| 121 |         typename CreateLeafFunc, | 
| 122 |         typename CanCreateLeafFunc, | 
| 123 |         typename CanCreateLeafSplitFunc, | 
| 124 |         typename ProgressMonitor> | 
| 125 |  | 
| 126 |         class BuilderT | 
| 127 |         { | 
| 128 |           friend struct GeneralBVHBuilder; | 
| 129 |  | 
| 130 |           BuilderT (PrimRef* prims, | 
| 131 |                     Heuristic& heuristic, | 
| 132 |                     const CreateAllocFunc& createAlloc, | 
| 133 |                     const CreateNodeFunc& createNode, | 
| 134 |                     const UpdateNodeFunc& updateNode, | 
| 135 |                     const CreateLeafFunc& createLeaf, | 
| 136 |                     const CanCreateLeafFunc& canCreateLeaf, | 
| 137 |                     const CanCreateLeafSplitFunc& canCreateLeafSplit, | 
| 138 |                     const ProgressMonitor& progressMonitor, | 
| 139 |                     const Settings& settings) : | 
| 140 |                     cfg(settings), | 
| 141 |                     prims(prims), | 
| 142 |                     heuristic(heuristic), | 
| 143 |                     createAlloc(createAlloc), | 
| 144 |                     createNode(createNode), | 
| 145 |                     updateNode(updateNode), | 
| 146 |                     createLeaf(createLeaf), | 
| 147 |                     canCreateLeaf(canCreateLeaf), | 
| 148 |                     canCreateLeafSplit(canCreateLeafSplit), | 
| 149 |                     progressMonitor(progressMonitor) | 
| 150 |           { | 
| 151 |             if (cfg.branchingFactor > MAX_BRANCHING_FACTOR) | 
| 152 |               throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large" ); | 
| 153 |           } | 
| 154 |  | 
| 155 |           const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc) | 
| 156 |           { | 
| 157 |             /* this should never occur but is a fatal error */ | 
| 158 |             if (current.depth > cfg.maxDepth) | 
| 159 |               throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached" ); | 
| 160 |  | 
| 161 |             /* create leaf for few primitives */ | 
| 162 |             if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims)) | 
| 163 |               return createLeaf(prims,current.prims,alloc); | 
| 164 |  | 
| 165 |             /* fill all children by always splitting the largest one */ | 
| 166 |             ReductionTy values[MAX_BRANCHING_FACTOR]; | 
| 167 |             BuildRecord children[MAX_BRANCHING_FACTOR]; | 
| 168 |             size_t numChildren = 1; | 
| 169 |             children[0] = current; | 
| 170 |             do { | 
| 171 |  | 
| 172 |               /* find best child with largest bounding box area */ | 
| 173 |               size_t bestChild = -1; | 
| 174 |               size_t bestSize = 0; | 
| 175 |               for (size_t i=0; i<numChildren; i++) | 
| 176 |               { | 
| 177 |                 /* ignore leaves as they cannot get split */ | 
| 178 |                 if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims)) | 
| 179 |                   continue; | 
| 180 |  | 
| 181 |                 /* remember child with largest size */ | 
| 182 |                 if (children[i].prims.size() > bestSize) { | 
| 183 |                   bestSize = children[i].prims.size(); | 
| 184 |                   bestChild = i; | 
| 185 |                 } | 
| 186 |               } | 
| 187 |               if (bestChild == (size_t)-1) break; | 
| 188 |  | 
| 189 |               /*! split best child into left and right child */ | 
| 190 |               BuildRecord left(current.depth+1); | 
| 191 |               BuildRecord right(current.depth+1); | 
| 192 |               if (!canCreateLeaf(prims,children[bestChild].prims)) { | 
| 193 |                 canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims); | 
| 194 |               } else { | 
| 195 |                 heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims); | 
| 196 |               } | 
| 197 |  | 
| 198 |               /* add new children left and right */ | 
| 199 |               children[bestChild] = children[numChildren-1]; | 
| 200 |               children[numChildren-1] = left; | 
| 201 |               children[numChildren+0] = right; | 
| 202 |               numChildren++; | 
| 203 |  | 
| 204 |             } while (numChildren < cfg.branchingFactor); | 
| 205 |  | 
| 206 |             /* set barrier for primrefarrayalloc */ | 
| 207 |             if (unlikely(current.size() > cfg.primrefarrayalloc)) | 
| 208 |               for (size_t i=0; i<numChildren; i++) | 
| 209 |                 children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc; | 
| 210 |  | 
| 211 |             /* create node */ | 
| 212 |             auto node = createNode(children,numChildren,alloc); | 
| 213 |  | 
| 214 |             /* recurse into each child  and perform reduction */ | 
| 215 |             for (size_t i=0; i<numChildren; i++) | 
| 216 |               values[i] = createLargeLeaf(current: children[i],alloc); | 
| 217 |  | 
| 218 |             /* perform reduction */ | 
| 219 |             return updateNode(current,children,node,values,numChildren); | 
| 220 |           } | 
| 221 |  | 
| 222 |           const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel) | 
| 223 |           { | 
| 224 |             /* get thread local allocator */ | 
| 225 |             if (!alloc) | 
| 226 |               alloc = createAlloc(); | 
| 227 |  | 
| 228 |             /* call memory monitor function to signal progress */ | 
| 229 |             if (toplevel && current.size() <= cfg.singleThreadThreshold) | 
| 230 |               progressMonitor(current.size()); | 
| 231 |  | 
| 232 |             /*! find best split */ | 
| 233 |             auto split = heuristic.find(current.prims,cfg.logBlockSize); | 
| 234 |  | 
| 235 |             /*! compute leaf and split cost */ | 
| 236 |             const float leafSAH  = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize); | 
| 237 |             const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH(); | 
| 238 |             assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0))); | 
| 239 |  | 
| 240 |             /*! create a leaf node when threshold reached or SAH tells us to stop */ | 
| 241 |             if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) { | 
| 242 |               heuristic.deterministic_order(current.prims); | 
| 243 |               return createLargeLeaf(current,alloc); | 
| 244 |             } | 
| 245 |  | 
| 246 |             /*! perform initial split */ | 
| 247 |             Set lprims,rprims; | 
| 248 |             heuristic.split(split,current.prims,lprims,rprims); | 
| 249 | 	     | 
| 250 |             /*! initialize child list with initial split */ | 
| 251 |             ReductionTy values[MAX_BRANCHING_FACTOR]; | 
| 252 |             BuildRecord children[MAX_BRANCHING_FACTOR]; | 
| 253 |             children[0] = BuildRecord(current.depth+1,lprims); | 
| 254 |             children[1] = BuildRecord(current.depth+1,rprims); | 
| 255 |             size_t numChildren = 2; | 
| 256 |  | 
| 257 |             /*! split until node is full or SAH tells us to stop */ | 
| 258 |             while (numChildren < cfg.branchingFactor) | 
| 259 |             { | 
| 260 |               /*! find best child to split */ | 
| 261 |               float bestArea = neg_inf; | 
| 262 |               ssize_t bestChild = -1; | 
| 263 |               for (size_t i=0; i<numChildren; i++) | 
| 264 |               { | 
| 265 |                 /* ignore leaves as they cannot get split */ | 
| 266 |                 if (children[i].prims.size() <= cfg.minLeafSize) continue; | 
| 267 |  | 
| 268 |                 /* find child with largest surface area */ | 
| 269 |                 if (halfArea(children[i].prims.geomBounds) > bestArea) { | 
| 270 |                   bestChild = i; | 
| 271 |                   bestArea = halfArea(children[i].prims.geomBounds); | 
| 272 |                 } | 
| 273 |               } | 
| 274 |               if (bestChild == -1) break; | 
| 275 |  | 
| 276 |               /* perform best found split */ | 
| 277 |               BuildRecord& brecord = children[bestChild]; | 
| 278 |               BuildRecord lrecord(current.depth+1); | 
| 279 |               BuildRecord rrecord(current.depth+1); | 
| 280 |               auto split = heuristic.find(brecord.prims,cfg.logBlockSize); | 
| 281 |               heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims); | 
| 282 |               children[bestChild  ] = lrecord; | 
| 283 |               children[numChildren] = rrecord; | 
| 284 |               numChildren++; | 
| 285 |             } | 
| 286 |  | 
| 287 |             /* set barrier for primrefarrayalloc */ | 
| 288 |             if (unlikely(current.size() > cfg.primrefarrayalloc)) | 
| 289 |               for (size_t i=0; i<numChildren; i++) | 
| 290 |                 children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc; | 
| 291 |  | 
| 292 |             /* sort buildrecords for faster shadow ray traversal */ | 
| 293 |             std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>()); | 
| 294 |  | 
| 295 |             /*! create an inner node */ | 
| 296 |             auto node = createNode(children,numChildren,alloc); | 
| 297 |  | 
| 298 |             /* spawn tasks */ | 
| 299 |             if (current.size() > cfg.singleThreadThreshold) | 
| 300 |             { | 
| 301 |               /*! parallel_for is faster than spawing sub-tasks */ | 
| 302 |               parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here | 
| 303 |                   for (size_t i=r.begin(); i<r.end(); i++) { | 
| 304 |                     values[i] = recurse(current&: children[i],alloc: nullptr,toplevel: true); | 
| 305 |                     _mm_mfence(); // to allow non-temporal stores during build | 
| 306 |                   } | 
| 307 |                 }); | 
| 308 |  | 
| 309 |               return updateNode(current,children,node,values,numChildren); | 
| 310 |             } | 
| 311 |             /* recurse into each child */ | 
| 312 |             else | 
| 313 |             { | 
| 314 |               for (size_t i=0; i<numChildren; i++) | 
| 315 |                 values[i] = recurse(current&: children[i],alloc,toplevel: false); | 
| 316 |  | 
| 317 |               return updateNode(current,children,node,values,numChildren); | 
| 318 |             } | 
| 319 |           } | 
| 320 |  | 
| 321 |         private: | 
| 322 |           Settings cfg; | 
| 323 |           PrimRef* prims; | 
| 324 |           Heuristic& heuristic; | 
| 325 |           const CreateAllocFunc& createAlloc; | 
| 326 |           const CreateNodeFunc& createNode; | 
| 327 |           const UpdateNodeFunc& updateNode; | 
| 328 |           const CreateLeafFunc& createLeaf; | 
| 329 |           const CanCreateLeafFunc& canCreateLeaf; | 
| 330 |           const CanCreateLeafSplitFunc& canCreateLeafSplit; | 
| 331 |           const ProgressMonitor& progressMonitor; | 
| 332 |         }; | 
| 333 |  | 
| 334 |       template< | 
| 335 |       typename ReductionTy, | 
| 336 |         typename Heuristic, | 
| 337 |         typename Set, | 
| 338 |         typename PrimRef, | 
| 339 |         typename CreateAllocFunc, | 
| 340 |         typename CreateNodeFunc, | 
| 341 |         typename UpdateNodeFunc, | 
| 342 |         typename CreateLeafFunc, | 
| 343 |         typename ProgressMonitor> | 
| 344 |  | 
| 345 |         __noinline static ReductionTy build(Heuristic& heuristic, | 
| 346 |                                             PrimRef* prims, | 
| 347 |                                             const Set& set, | 
| 348 |                                             CreateAllocFunc createAlloc, | 
| 349 |                                             CreateNodeFunc createNode, UpdateNodeFunc updateNode, | 
| 350 |                                             const CreateLeafFunc& createLeaf, | 
| 351 |                                             const ProgressMonitor& progressMonitor, | 
| 352 |                                             const Settings& settings) | 
| 353 |       { | 
| 354 |         typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord; | 
| 355 |  | 
| 356 |         typedef BuilderT< | 
| 357 |           BuildRecord, | 
| 358 |           Heuristic, | 
| 359 |           Set, | 
| 360 |           PrimRef, | 
| 361 |           ReductionTy, | 
| 362 |           decltype(createAlloc()), | 
| 363 |           CreateAllocFunc, | 
| 364 |           CreateNodeFunc, | 
| 365 |           UpdateNodeFunc, | 
| 366 |           CreateLeafFunc, | 
| 367 |           DefaultCanCreateLeafFunc<PrimRef, Set>, | 
| 368 |           DefaultCanCreateLeafSplitFunc<PrimRef, Set>, | 
| 369 |           ProgressMonitor> Builder; | 
| 370 |  | 
| 371 |         /* instantiate builder */ | 
| 372 |         Builder builder(prims, | 
| 373 |                         heuristic, | 
| 374 |                         createAlloc, | 
| 375 |                         createNode, | 
| 376 |                         updateNode, | 
| 377 |                         createLeaf, | 
| 378 |                         DefaultCanCreateLeafFunc<PrimRef, Set>(), | 
| 379 |                         DefaultCanCreateLeafSplitFunc<PrimRef, Set>(), | 
| 380 |                         progressMonitor, | 
| 381 |                         settings); | 
| 382 |  | 
| 383 |         /* build hierarchy */ | 
| 384 |         BuildRecord record(1,set); | 
| 385 |         const ReductionTy root = builder.recurse(record,nullptr,true); | 
| 386 |         _mm_mfence(); // to allow non-temporal stores during build | 
| 387 |         return root; | 
| 388 |       } | 
| 389 |  | 
| 390 |       template< | 
| 391 |       typename ReductionTy, | 
| 392 |         typename Heuristic, | 
| 393 |         typename Set, | 
| 394 |         typename PrimRef, | 
| 395 |         typename CreateAllocFunc, | 
| 396 |         typename CreateNodeFunc, | 
| 397 |         typename UpdateNodeFunc, | 
| 398 |         typename CreateLeafFunc, | 
| 399 |         typename CanCreateLeafFunc, | 
| 400 |         typename CanCreateLeafSplitFunc, | 
| 401 |         typename ProgressMonitor> | 
| 402 |  | 
| 403 |         __noinline static ReductionTy build(Heuristic& heuristic, | 
| 404 |                                             PrimRef* prims, | 
| 405 |                                             const Set& set, | 
| 406 |                                             CreateAllocFunc createAlloc, | 
| 407 |                                             CreateNodeFunc createNode, UpdateNodeFunc updateNode, | 
| 408 |                                             const CreateLeafFunc& createLeaf, | 
| 409 |                                             const CanCreateLeafFunc& canCreateLeaf, | 
| 410 |                                             const CanCreateLeafSplitFunc& canCreateLeafSplit, | 
| 411 |                                             const ProgressMonitor& progressMonitor, | 
| 412 |                                             const Settings& settings) | 
| 413 |       { | 
| 414 |         typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord; | 
| 415 |  | 
| 416 |         typedef BuilderT< | 
| 417 |           BuildRecord, | 
| 418 |           Heuristic, | 
| 419 |           Set, | 
| 420 |           PrimRef, | 
| 421 |           ReductionTy, | 
| 422 |           decltype(createAlloc()), | 
| 423 |           CreateAllocFunc, | 
| 424 |           CreateNodeFunc, | 
| 425 |           UpdateNodeFunc, | 
| 426 |           CreateLeafFunc, | 
| 427 |           CanCreateLeafFunc, | 
| 428 |           CanCreateLeafSplitFunc, | 
| 429 |           ProgressMonitor> Builder; | 
| 430 |  | 
| 431 |         /* instantiate builder */ | 
| 432 |         Builder builder(prims, | 
| 433 |                         heuristic, | 
| 434 |                         createAlloc, | 
| 435 |                         createNode, | 
| 436 |                         updateNode, | 
| 437 |                         createLeaf, | 
| 438 |                         canCreateLeaf, | 
| 439 |                         canCreateLeafSplit, | 
| 440 |                         progressMonitor, | 
| 441 |                         settings); | 
| 442 |  | 
| 443 |         /* build hierarchy */ | 
| 444 |         BuildRecord record(1,set); | 
| 445 |         const ReductionTy root = builder.recurse(record,nullptr,true); | 
| 446 |         _mm_mfence(); // to allow non-temporal stores during build | 
| 447 |         return root; | 
| 448 |       } | 
| 449 |     }; | 
| 450 |  | 
| 451 |     /* SAH builder that operates on an array of BuildRecords */ | 
| 452 |     struct BVHBuilderBinnedSAH | 
| 453 |     { | 
| 454 |       typedef PrimInfoRange Set; | 
| 455 |       typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic; | 
| 456 |       typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord; | 
| 457 |       typedef GeneralBVHBuilder::Settings Settings; | 
| 458 |  | 
| 459 |       /*! special builder that propagates reduction over the tree */ | 
| 460 |       template< | 
| 461 |       typename ReductionTy, | 
| 462 |         typename CreateAllocFunc, | 
| 463 |         typename CreateNodeFunc, | 
| 464 |         typename UpdateNodeFunc, | 
| 465 |         typename CreateLeafFunc, | 
| 466 |         typename ProgressMonitor> | 
| 467 |  | 
| 468 |         static ReductionTy build(CreateAllocFunc createAlloc, | 
| 469 |                                  CreateNodeFunc createNode, UpdateNodeFunc updateNode, | 
| 470 |                                  const CreateLeafFunc& createLeaf, | 
| 471 |                                  const ProgressMonitor& progressMonitor, | 
| 472 |                                  PrimRef* prims, const PrimInfo& pinfo, | 
| 473 |                                  const Settings& settings) | 
| 474 |       { | 
| 475 |         Heuristic heuristic(prims); | 
| 476 |         return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( | 
| 477 |           heuristic, | 
| 478 |           prims, | 
| 479 |           PrimInfoRange(0,pinfo.size(),pinfo), | 
| 480 |           createAlloc, | 
| 481 |           createNode, | 
| 482 |           updateNode, | 
| 483 |           createLeaf, | 
| 484 |           progressMonitor, | 
| 485 |           settings); | 
| 486 |       } | 
| 487 |  | 
| 488 |       /*! special builder that propagates reduction over the tree */ | 
| 489 |       template< | 
| 490 |       typename ReductionTy, | 
| 491 |         typename CreateAllocFunc, | 
| 492 |         typename CreateNodeFunc, | 
| 493 |         typename UpdateNodeFunc, | 
| 494 |         typename CreateLeafFunc, | 
| 495 |         typename CanCreateLeafFunc, | 
| 496 |         typename CanCreateLeafSplitFunc, | 
| 497 |         typename ProgressMonitor> | 
| 498 |  | 
| 499 |         static ReductionTy build(CreateAllocFunc createAlloc, | 
| 500 |                                  CreateNodeFunc createNode, UpdateNodeFunc updateNode, | 
| 501 |                                  const CreateLeafFunc& createLeaf, | 
| 502 |                                  const CanCreateLeafFunc& canCreateLeaf, | 
| 503 |                                  const CanCreateLeafSplitFunc& canCreateLeafSplit, | 
| 504 |                                  const ProgressMonitor& progressMonitor, | 
| 505 |                                  PrimRef* prims, const PrimInfo& pinfo, | 
| 506 |                                  const Settings& settings) | 
| 507 |       { | 
| 508 |         Heuristic heuristic(prims); | 
| 509 |         return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( | 
| 510 |           heuristic, | 
| 511 |           prims, | 
| 512 |           PrimInfoRange(0,pinfo.size(),pinfo), | 
| 513 |           createAlloc, | 
| 514 |           createNode, | 
| 515 |           updateNode, | 
| 516 |           createLeaf, | 
| 517 |           canCreateLeaf, | 
| 518 |           canCreateLeafSplit, | 
| 519 |           progressMonitor, | 
| 520 |           settings); | 
| 521 |       } | 
| 522 |     }; | 
| 523 |  | 
| 524 |     /* Spatial SAH builder that operates on an double-buffered array of BuildRecords */ | 
| 525 |     struct BVHBuilderBinnedFastSpatialSAH | 
| 526 |     { | 
| 527 |       typedef PrimInfoExtRange Set; | 
| 528 |       typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split; | 
| 529 |       typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord; | 
| 530 |       typedef GeneralBVHBuilder::Settings Settings; | 
| 531 |  | 
| 532 |       static const unsigned int GEOMID_MASK = 0xFFFFFFFF >>     RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS; | 
| 533 |       static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); | 
| 534 |  | 
| 535 |       template<typename ReductionTy, typename UserCreateLeaf> | 
| 536 |       struct CreateLeafExt | 
| 537 |       { | 
| 538 |         __forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf) | 
| 539 |           : userCreateLeaf(userCreateLeaf) {} | 
| 540 |  | 
| 541 |         // __noinline is workaround for ICC2016 compiler bug | 
| 542 |         template<typename Allocator> | 
| 543 |         __noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const | 
| 544 |         { | 
| 545 |           for (size_t i=range.begin(); i<range.end(); i++) | 
| 546 |             prims[i].lower.u &= GEOMID_MASK; | 
| 547 |  | 
| 548 |           return userCreateLeaf(prims,range,alloc); | 
| 549 |         } | 
| 550 |  | 
| 551 |         const UserCreateLeaf userCreateLeaf; | 
| 552 |       }; | 
| 553 |  | 
| 554 |       /*! special builder that propagates reduction over the tree */ | 
| 555 |       template< | 
| 556 |       typename ReductionTy, | 
| 557 |         typename CreateAllocFunc, | 
| 558 |         typename CreateNodeFunc, | 
| 559 |         typename UpdateNodeFunc, | 
| 560 |         typename CreateLeafFunc, | 
| 561 |         typename SplitPrimitiveFunc, | 
| 562 |         typename ProgressMonitor> | 
| 563 |  | 
| 564 |         static ReductionTy build(CreateAllocFunc createAlloc, | 
| 565 |                                  CreateNodeFunc createNode, | 
| 566 |                                  UpdateNodeFunc updateNode, | 
| 567 |                                  const CreateLeafFunc& createLeaf, | 
| 568 |                                  SplitPrimitiveFunc splitPrimitive, | 
| 569 |                                  ProgressMonitor progressMonitor, | 
| 570 |                                  PrimRef* prims, | 
| 571 |                                  const size_t extSize, | 
| 572 |                                  const PrimInfo& pinfo, | 
| 573 |                                  const Settings& settings) | 
| 574 |         { | 
| 575 |           typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic; | 
| 576 |           Heuristic heuristic(splitPrimitive,prims,pinfo); | 
| 577 |  | 
| 578 |           /* calculate total surface area */ // FIXME: this sum is not deterministic | 
| 579 |           const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double { | 
| 580 |  | 
| 581 |               double A = 0.0f; | 
| 582 |               for (size_t i=r.begin(); i<r.end(); i++) | 
| 583 |               { | 
| 584 |                 PrimRef& prim = prims[i]; | 
| 585 |                 A += area(b: prim.bounds()); | 
| 586 |               } | 
| 587 |               return A; | 
| 588 |             },std::plus<double>()); | 
| 589 |  | 
| 590 |  | 
| 591 |           /* calculate maximum number of spatial splits per primitive */ | 
| 592 |           const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1; | 
| 593 |           const float f = 10.0f; | 
| 594 |  | 
| 595 |           const float invA = 1.0f / A; | 
| 596 |           parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) { | 
| 597 |  | 
| 598 |               for (size_t i=r.begin(); i<r.end(); i++) | 
| 599 |               { | 
| 600 |                 PrimRef& prim = prims[i]; | 
| 601 |                 assert((prim.geomID() & SPLITS_MASK) == 0); | 
| 602 |                 // FIXME: is there a better general heuristic ? | 
| 603 |                 const float nf = ceilf(x: f*pinfo.size()*area(b: prim.bounds()) * invA); | 
| 604 |                 unsigned int n = 4+min(a: (int)maxSplits-4, b: max(a: 1, b: (int)(nf))); | 
| 605 |                 prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); | 
| 606 |               } | 
| 607 |             }); | 
| 608 |  | 
| 609 |           return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( | 
| 610 |             heuristic, | 
| 611 |             prims, | 
| 612 |             PrimInfoExtRange(0,pinfo.size(),extSize,pinfo), | 
| 613 |             createAlloc, | 
| 614 |             createNode, | 
| 615 |             updateNode, | 
| 616 |             CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf), | 
| 617 |             progressMonitor, | 
| 618 |             settings); | 
| 619 |         } | 
| 620 |     }; | 
| 621 |  | 
| 622 |     /* Open/Merge SAH builder that operates on an array of BuildRecords */ | 
| 623 |     struct BVHBuilderBinnedOpenMergeSAH | 
| 624 |     { | 
| 625 |       static const size_t NUM_OBJECT_BINS_HQ = 32; | 
| 626 |       typedef PrimInfoExtRange Set; | 
| 627 |       typedef BinSplit<NUM_OBJECT_BINS_HQ> Split; | 
| 628 |       typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord; | 
| 629 |       typedef GeneralBVHBuilder::Settings Settings; | 
| 630 |        | 
| 631 |       /*! special builder that propagates reduction over the tree */ | 
| 632 |       template< | 
| 633 |         typename ReductionTy,  | 
| 634 |         typename BuildRef, | 
| 635 |         typename CreateAllocFunc,  | 
| 636 |         typename CreateNodeFunc,  | 
| 637 |         typename UpdateNodeFunc,  | 
| 638 |         typename CreateLeafFunc,  | 
| 639 |         typename NodeOpenerFunc,  | 
| 640 |         typename ProgressMonitor> | 
| 641 |          | 
| 642 |         static ReductionTy build(CreateAllocFunc createAlloc,  | 
| 643 |                                  CreateNodeFunc createNode,  | 
| 644 |                                  UpdateNodeFunc updateNode,  | 
| 645 |                                  const CreateLeafFunc& createLeaf,  | 
| 646 |                                  NodeOpenerFunc nodeOpenerFunc, | 
| 647 |                                  ProgressMonitor progressMonitor, | 
| 648 |                                  BuildRef* prims,  | 
| 649 |                                  const size_t extSize, | 
| 650 |                                  const PrimInfo& pinfo,  | 
| 651 |                                  const Settings& settings) | 
| 652 |       { | 
| 653 |         typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic; | 
| 654 |         Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor); | 
| 655 |  | 
| 656 |         return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>( | 
| 657 |           heuristic, | 
| 658 |           prims, | 
| 659 |           PrimInfoExtRange(0,pinfo.size(),extSize,pinfo), | 
| 660 |           createAlloc, | 
| 661 |           createNode, | 
| 662 |           updateNode, | 
| 663 |           createLeaf, | 
| 664 |           progressMonitor, | 
| 665 |           settings); | 
| 666 |       } | 
| 667 |     }; | 
| 668 |   } | 
| 669 | } | 
| 670 |  |