1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "heuristic_binning_array_aligned.h" |
7 | #include "heuristic_spatial_array.h" |
8 | #include "heuristic_openmerge_array.h" |
9 | |
10 | #if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL |
11 | # define NUM_OBJECT_BINS 16 |
12 | # define NUM_SPATIAL_BINS 16 |
13 | #else |
14 | # define NUM_OBJECT_BINS 32 |
15 | # define NUM_SPATIAL_BINS 16 |
16 | #endif |
17 | |
18 | namespace embree |
19 | { |
20 | namespace isa |
21 | { |
22 | MAYBE_UNUSED static const float travCost = 1.0f; |
23 | MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024; |
24 | |
25 | struct GeneralBVHBuilder |
26 | { |
27 | static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor |
28 | static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth |
29 | |
30 | |
31 | /*! settings for SAH builder */ |
32 | struct Settings |
33 | { |
34 | /*! default settings */ |
35 | Settings () |
36 | : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), |
37 | travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {} |
38 | |
39 | /*! initialize settings from API settings */ |
40 | Settings (const RTCBuildArguments& settings) |
41 | : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), |
42 | travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) |
43 | { |
44 | if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor; |
45 | if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth; |
46 | if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(v: settings.sahBlockSize); |
47 | if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize; |
48 | if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize; |
49 | if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost; |
50 | if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost; |
51 | |
52 | minLeafSize = min(a: minLeafSize,b: maxLeafSize); |
53 | } |
54 | |
55 | Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf) |
56 | : branchingFactor(2), maxDepth(32), logBlockSize(bsr(v: sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), |
57 | travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc) |
58 | { |
59 | minLeafSize = min(a: minLeafSize,b: maxLeafSize); |
60 | } |
61 | |
62 | public: |
63 | size_t branchingFactor; //!< branching factor of BVH to build |
64 | size_t maxDepth; //!< maximum depth of BVH to build |
65 | size_t logBlockSize; //!< log2 of blocksize for SAH heuristic |
66 | size_t minLeafSize; //!< minimum size of a leaf |
67 | size_t maxLeafSize; //!< maximum size of a leaf |
68 | float travCost; //!< estimated cost of one traversal step |
69 | float intCost; //!< estimated cost of one primitive intersection |
70 | size_t singleThreadThreshold; //!< threshold when we switch to single threaded build |
71 | size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished |
72 | }; |
73 | |
74 | /*! recursive state of builder */ |
75 | template<typename Set, typename Split> |
76 | struct BuildRecordT |
77 | { |
78 | public: |
79 | __forceinline BuildRecordT () {} |
80 | |
81 | __forceinline BuildRecordT (size_t depth) |
82 | : depth(depth), alloc_barrier(false), prims(empty) {} |
83 | |
84 | __forceinline BuildRecordT (size_t depth, const Set& prims) |
85 | : depth(depth), alloc_barrier(false), prims(prims) {} |
86 | |
87 | __forceinline BBox3fa bounds() const { return prims.geomBounds; } |
88 | |
89 | __forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); } |
90 | __forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); } |
91 | |
92 | __forceinline size_t size() const { return prims.size(); } |
93 | |
94 | public: |
95 | size_t depth; //!< Depth of the root of this subtree. |
96 | bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes |
97 | Set prims; //!< The list of primitives. |
98 | }; |
99 | |
100 | template<typename PrimRef, typename Set> |
101 | struct DefaultCanCreateLeafFunc |
102 | { |
103 | __forceinline bool operator()(const PrimRef*, const Set&) const { return true; } |
104 | }; |
105 | |
106 | template<typename PrimRef, typename Set> |
107 | struct DefaultCanCreateLeafSplitFunc |
108 | { |
109 | __forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { } |
110 | }; |
111 | |
112 | template<typename BuildRecord, |
113 | typename Heuristic, |
114 | typename Set, |
115 | typename PrimRef, |
116 | typename ReductionTy, |
117 | typename Allocator, |
118 | typename CreateAllocFunc, |
119 | typename CreateNodeFunc, |
120 | typename UpdateNodeFunc, |
121 | typename CreateLeafFunc, |
122 | typename CanCreateLeafFunc, |
123 | typename CanCreateLeafSplitFunc, |
124 | typename ProgressMonitor> |
125 | |
126 | class BuilderT |
127 | { |
128 | friend struct GeneralBVHBuilder; |
129 | |
130 | BuilderT (PrimRef* prims, |
131 | Heuristic& heuristic, |
132 | const CreateAllocFunc& createAlloc, |
133 | const CreateNodeFunc& createNode, |
134 | const UpdateNodeFunc& updateNode, |
135 | const CreateLeafFunc& createLeaf, |
136 | const CanCreateLeafFunc& canCreateLeaf, |
137 | const CanCreateLeafSplitFunc& canCreateLeafSplit, |
138 | const ProgressMonitor& progressMonitor, |
139 | const Settings& settings) : |
140 | cfg(settings), |
141 | prims(prims), |
142 | heuristic(heuristic), |
143 | createAlloc(createAlloc), |
144 | createNode(createNode), |
145 | updateNode(updateNode), |
146 | createLeaf(createLeaf), |
147 | canCreateLeaf(canCreateLeaf), |
148 | canCreateLeafSplit(canCreateLeafSplit), |
149 | progressMonitor(progressMonitor) |
150 | { |
151 | if (cfg.branchingFactor > MAX_BRANCHING_FACTOR) |
152 | throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large" ); |
153 | } |
154 | |
155 | const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc) |
156 | { |
157 | /* this should never occur but is a fatal error */ |
158 | if (current.depth > cfg.maxDepth) |
159 | throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached" ); |
160 | |
161 | /* create leaf for few primitives */ |
162 | if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims)) |
163 | return createLeaf(prims,current.prims,alloc); |
164 | |
165 | /* fill all children by always splitting the largest one */ |
166 | ReductionTy values[MAX_BRANCHING_FACTOR]; |
167 | BuildRecord children[MAX_BRANCHING_FACTOR]; |
168 | size_t numChildren = 1; |
169 | children[0] = current; |
170 | do { |
171 | |
172 | /* find best child with largest bounding box area */ |
173 | size_t bestChild = -1; |
174 | size_t bestSize = 0; |
175 | for (size_t i=0; i<numChildren; i++) |
176 | { |
177 | /* ignore leaves as they cannot get split */ |
178 | if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims)) |
179 | continue; |
180 | |
181 | /* remember child with largest size */ |
182 | if (children[i].prims.size() > bestSize) { |
183 | bestSize = children[i].prims.size(); |
184 | bestChild = i; |
185 | } |
186 | } |
187 | if (bestChild == (size_t)-1) break; |
188 | |
189 | /*! split best child into left and right child */ |
190 | BuildRecord left(current.depth+1); |
191 | BuildRecord right(current.depth+1); |
192 | if (!canCreateLeaf(prims,children[bestChild].prims)) { |
193 | canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims); |
194 | } else { |
195 | heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims); |
196 | } |
197 | |
198 | /* add new children left and right */ |
199 | children[bestChild] = children[numChildren-1]; |
200 | children[numChildren-1] = left; |
201 | children[numChildren+0] = right; |
202 | numChildren++; |
203 | |
204 | } while (numChildren < cfg.branchingFactor); |
205 | |
206 | /* set barrier for primrefarrayalloc */ |
207 | if (unlikely(current.size() > cfg.primrefarrayalloc)) |
208 | for (size_t i=0; i<numChildren; i++) |
209 | children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc; |
210 | |
211 | /* create node */ |
212 | auto node = createNode(children,numChildren,alloc); |
213 | |
214 | /* recurse into each child and perform reduction */ |
215 | for (size_t i=0; i<numChildren; i++) |
216 | values[i] = createLargeLeaf(current: children[i],alloc); |
217 | |
218 | /* perform reduction */ |
219 | return updateNode(current,children,node,values,numChildren); |
220 | } |
221 | |
222 | const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel) |
223 | { |
224 | /* get thread local allocator */ |
225 | if (!alloc) |
226 | alloc = createAlloc(); |
227 | |
228 | /* call memory monitor function to signal progress */ |
229 | if (toplevel && current.size() <= cfg.singleThreadThreshold) |
230 | progressMonitor(current.size()); |
231 | |
232 | /*! find best split */ |
233 | auto split = heuristic.find(current.prims,cfg.logBlockSize); |
234 | |
235 | /*! compute leaf and split cost */ |
236 | const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize); |
237 | const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH(); |
238 | assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0))); |
239 | |
240 | /*! create a leaf node when threshold reached or SAH tells us to stop */ |
241 | if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) { |
242 | heuristic.deterministic_order(current.prims); |
243 | return createLargeLeaf(current,alloc); |
244 | } |
245 | |
246 | /*! perform initial split */ |
247 | Set lprims,rprims; |
248 | heuristic.split(split,current.prims,lprims,rprims); |
249 | |
250 | /*! initialize child list with initial split */ |
251 | ReductionTy values[MAX_BRANCHING_FACTOR]; |
252 | BuildRecord children[MAX_BRANCHING_FACTOR]; |
253 | children[0] = BuildRecord(current.depth+1,lprims); |
254 | children[1] = BuildRecord(current.depth+1,rprims); |
255 | size_t numChildren = 2; |
256 | |
257 | /*! split until node is full or SAH tells us to stop */ |
258 | while (numChildren < cfg.branchingFactor) |
259 | { |
260 | /*! find best child to split */ |
261 | float bestArea = neg_inf; |
262 | ssize_t bestChild = -1; |
263 | for (size_t i=0; i<numChildren; i++) |
264 | { |
265 | /* ignore leaves as they cannot get split */ |
266 | if (children[i].prims.size() <= cfg.minLeafSize) continue; |
267 | |
268 | /* find child with largest surface area */ |
269 | if (halfArea(children[i].prims.geomBounds) > bestArea) { |
270 | bestChild = i; |
271 | bestArea = halfArea(children[i].prims.geomBounds); |
272 | } |
273 | } |
274 | if (bestChild == -1) break; |
275 | |
276 | /* perform best found split */ |
277 | BuildRecord& brecord = children[bestChild]; |
278 | BuildRecord lrecord(current.depth+1); |
279 | BuildRecord rrecord(current.depth+1); |
280 | auto split = heuristic.find(brecord.prims,cfg.logBlockSize); |
281 | heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims); |
282 | children[bestChild ] = lrecord; |
283 | children[numChildren] = rrecord; |
284 | numChildren++; |
285 | } |
286 | |
287 | /* set barrier for primrefarrayalloc */ |
288 | if (unlikely(current.size() > cfg.primrefarrayalloc)) |
289 | for (size_t i=0; i<numChildren; i++) |
290 | children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc; |
291 | |
292 | /* sort buildrecords for faster shadow ray traversal */ |
293 | std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>()); |
294 | |
295 | /*! create an inner node */ |
296 | auto node = createNode(children,numChildren,alloc); |
297 | |
298 | /* spawn tasks */ |
299 | if (current.size() > cfg.singleThreadThreshold) |
300 | { |
301 | /*! parallel_for is faster than spawing sub-tasks */ |
302 | parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here |
303 | for (size_t i=r.begin(); i<r.end(); i++) { |
304 | values[i] = recurse(current&: children[i],alloc: nullptr,toplevel: true); |
305 | _mm_mfence(); // to allow non-temporal stores during build |
306 | } |
307 | }); |
308 | |
309 | return updateNode(current,children,node,values,numChildren); |
310 | } |
311 | /* recurse into each child */ |
312 | else |
313 | { |
314 | for (size_t i=0; i<numChildren; i++) |
315 | values[i] = recurse(current&: children[i],alloc,toplevel: false); |
316 | |
317 | return updateNode(current,children,node,values,numChildren); |
318 | } |
319 | } |
320 | |
321 | private: |
322 | Settings cfg; |
323 | PrimRef* prims; |
324 | Heuristic& heuristic; |
325 | const CreateAllocFunc& createAlloc; |
326 | const CreateNodeFunc& createNode; |
327 | const UpdateNodeFunc& updateNode; |
328 | const CreateLeafFunc& createLeaf; |
329 | const CanCreateLeafFunc& canCreateLeaf; |
330 | const CanCreateLeafSplitFunc& canCreateLeafSplit; |
331 | const ProgressMonitor& progressMonitor; |
332 | }; |
333 | |
334 | template< |
335 | typename ReductionTy, |
336 | typename Heuristic, |
337 | typename Set, |
338 | typename PrimRef, |
339 | typename CreateAllocFunc, |
340 | typename CreateNodeFunc, |
341 | typename UpdateNodeFunc, |
342 | typename CreateLeafFunc, |
343 | typename ProgressMonitor> |
344 | |
345 | __noinline static ReductionTy build(Heuristic& heuristic, |
346 | PrimRef* prims, |
347 | const Set& set, |
348 | CreateAllocFunc createAlloc, |
349 | CreateNodeFunc createNode, UpdateNodeFunc updateNode, |
350 | const CreateLeafFunc& createLeaf, |
351 | const ProgressMonitor& progressMonitor, |
352 | const Settings& settings) |
353 | { |
354 | typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord; |
355 | |
356 | typedef BuilderT< |
357 | BuildRecord, |
358 | Heuristic, |
359 | Set, |
360 | PrimRef, |
361 | ReductionTy, |
362 | decltype(createAlloc()), |
363 | CreateAllocFunc, |
364 | CreateNodeFunc, |
365 | UpdateNodeFunc, |
366 | CreateLeafFunc, |
367 | DefaultCanCreateLeafFunc<PrimRef, Set>, |
368 | DefaultCanCreateLeafSplitFunc<PrimRef, Set>, |
369 | ProgressMonitor> Builder; |
370 | |
371 | /* instantiate builder */ |
372 | Builder builder(prims, |
373 | heuristic, |
374 | createAlloc, |
375 | createNode, |
376 | updateNode, |
377 | createLeaf, |
378 | DefaultCanCreateLeafFunc<PrimRef, Set>(), |
379 | DefaultCanCreateLeafSplitFunc<PrimRef, Set>(), |
380 | progressMonitor, |
381 | settings); |
382 | |
383 | /* build hierarchy */ |
384 | BuildRecord record(1,set); |
385 | const ReductionTy root = builder.recurse(record,nullptr,true); |
386 | _mm_mfence(); // to allow non-temporal stores during build |
387 | return root; |
388 | } |
389 | |
390 | template< |
391 | typename ReductionTy, |
392 | typename Heuristic, |
393 | typename Set, |
394 | typename PrimRef, |
395 | typename CreateAllocFunc, |
396 | typename CreateNodeFunc, |
397 | typename UpdateNodeFunc, |
398 | typename CreateLeafFunc, |
399 | typename CanCreateLeafFunc, |
400 | typename CanCreateLeafSplitFunc, |
401 | typename ProgressMonitor> |
402 | |
403 | __noinline static ReductionTy build(Heuristic& heuristic, |
404 | PrimRef* prims, |
405 | const Set& set, |
406 | CreateAllocFunc createAlloc, |
407 | CreateNodeFunc createNode, UpdateNodeFunc updateNode, |
408 | const CreateLeafFunc& createLeaf, |
409 | const CanCreateLeafFunc& canCreateLeaf, |
410 | const CanCreateLeafSplitFunc& canCreateLeafSplit, |
411 | const ProgressMonitor& progressMonitor, |
412 | const Settings& settings) |
413 | { |
414 | typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord; |
415 | |
416 | typedef BuilderT< |
417 | BuildRecord, |
418 | Heuristic, |
419 | Set, |
420 | PrimRef, |
421 | ReductionTy, |
422 | decltype(createAlloc()), |
423 | CreateAllocFunc, |
424 | CreateNodeFunc, |
425 | UpdateNodeFunc, |
426 | CreateLeafFunc, |
427 | CanCreateLeafFunc, |
428 | CanCreateLeafSplitFunc, |
429 | ProgressMonitor> Builder; |
430 | |
431 | /* instantiate builder */ |
432 | Builder builder(prims, |
433 | heuristic, |
434 | createAlloc, |
435 | createNode, |
436 | updateNode, |
437 | createLeaf, |
438 | canCreateLeaf, |
439 | canCreateLeafSplit, |
440 | progressMonitor, |
441 | settings); |
442 | |
443 | /* build hierarchy */ |
444 | BuildRecord record(1,set); |
445 | const ReductionTy root = builder.recurse(record,nullptr,true); |
446 | _mm_mfence(); // to allow non-temporal stores during build |
447 | return root; |
448 | } |
449 | }; |
450 | |
451 | /* SAH builder that operates on an array of BuildRecords */ |
452 | struct BVHBuilderBinnedSAH |
453 | { |
454 | typedef PrimInfoRange Set; |
455 | typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic; |
456 | typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord; |
457 | typedef GeneralBVHBuilder::Settings Settings; |
458 | |
459 | /*! special builder that propagates reduction over the tree */ |
460 | template< |
461 | typename ReductionTy, |
462 | typename CreateAllocFunc, |
463 | typename CreateNodeFunc, |
464 | typename UpdateNodeFunc, |
465 | typename CreateLeafFunc, |
466 | typename ProgressMonitor> |
467 | |
468 | static ReductionTy build(CreateAllocFunc createAlloc, |
469 | CreateNodeFunc createNode, UpdateNodeFunc updateNode, |
470 | const CreateLeafFunc& createLeaf, |
471 | const ProgressMonitor& progressMonitor, |
472 | PrimRef* prims, const PrimInfo& pinfo, |
473 | const Settings& settings) |
474 | { |
475 | Heuristic heuristic(prims); |
476 | return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( |
477 | heuristic, |
478 | prims, |
479 | PrimInfoRange(0,pinfo.size(),pinfo), |
480 | createAlloc, |
481 | createNode, |
482 | updateNode, |
483 | createLeaf, |
484 | progressMonitor, |
485 | settings); |
486 | } |
487 | |
488 | /*! special builder that propagates reduction over the tree */ |
489 | template< |
490 | typename ReductionTy, |
491 | typename CreateAllocFunc, |
492 | typename CreateNodeFunc, |
493 | typename UpdateNodeFunc, |
494 | typename CreateLeafFunc, |
495 | typename CanCreateLeafFunc, |
496 | typename CanCreateLeafSplitFunc, |
497 | typename ProgressMonitor> |
498 | |
499 | static ReductionTy build(CreateAllocFunc createAlloc, |
500 | CreateNodeFunc createNode, UpdateNodeFunc updateNode, |
501 | const CreateLeafFunc& createLeaf, |
502 | const CanCreateLeafFunc& canCreateLeaf, |
503 | const CanCreateLeafSplitFunc& canCreateLeafSplit, |
504 | const ProgressMonitor& progressMonitor, |
505 | PrimRef* prims, const PrimInfo& pinfo, |
506 | const Settings& settings) |
507 | { |
508 | Heuristic heuristic(prims); |
509 | return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( |
510 | heuristic, |
511 | prims, |
512 | PrimInfoRange(0,pinfo.size(),pinfo), |
513 | createAlloc, |
514 | createNode, |
515 | updateNode, |
516 | createLeaf, |
517 | canCreateLeaf, |
518 | canCreateLeafSplit, |
519 | progressMonitor, |
520 | settings); |
521 | } |
522 | }; |
523 | |
524 | /* Spatial SAH builder that operates on an double-buffered array of BuildRecords */ |
525 | struct BVHBuilderBinnedFastSpatialSAH |
526 | { |
527 | typedef PrimInfoExtRange Set; |
528 | typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split; |
529 | typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord; |
530 | typedef GeneralBVHBuilder::Settings Settings; |
531 | |
532 | static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS; |
533 | static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); |
534 | |
535 | template<typename ReductionTy, typename UserCreateLeaf> |
536 | struct CreateLeafExt |
537 | { |
538 | __forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf) |
539 | : userCreateLeaf(userCreateLeaf) {} |
540 | |
541 | // __noinline is workaround for ICC2016 compiler bug |
542 | template<typename Allocator> |
543 | __noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const |
544 | { |
545 | for (size_t i=range.begin(); i<range.end(); i++) |
546 | prims[i].lower.u &= GEOMID_MASK; |
547 | |
548 | return userCreateLeaf(prims,range,alloc); |
549 | } |
550 | |
551 | const UserCreateLeaf userCreateLeaf; |
552 | }; |
553 | |
554 | /*! special builder that propagates reduction over the tree */ |
555 | template< |
556 | typename ReductionTy, |
557 | typename CreateAllocFunc, |
558 | typename CreateNodeFunc, |
559 | typename UpdateNodeFunc, |
560 | typename CreateLeafFunc, |
561 | typename SplitPrimitiveFunc, |
562 | typename ProgressMonitor> |
563 | |
564 | static ReductionTy build(CreateAllocFunc createAlloc, |
565 | CreateNodeFunc createNode, |
566 | UpdateNodeFunc updateNode, |
567 | const CreateLeafFunc& createLeaf, |
568 | SplitPrimitiveFunc splitPrimitive, |
569 | ProgressMonitor progressMonitor, |
570 | PrimRef* prims, |
571 | const size_t extSize, |
572 | const PrimInfo& pinfo, |
573 | const Settings& settings) |
574 | { |
575 | typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic; |
576 | Heuristic heuristic(splitPrimitive,prims,pinfo); |
577 | |
578 | /* calculate total surface area */ // FIXME: this sum is not deterministic |
579 | const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double { |
580 | |
581 | double A = 0.0f; |
582 | for (size_t i=r.begin(); i<r.end(); i++) |
583 | { |
584 | PrimRef& prim = prims[i]; |
585 | A += area(b: prim.bounds()); |
586 | } |
587 | return A; |
588 | },std::plus<double>()); |
589 | |
590 | |
591 | /* calculate maximum number of spatial splits per primitive */ |
592 | const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1; |
593 | const float f = 10.0f; |
594 | |
595 | const float invA = 1.0f / A; |
596 | parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) { |
597 | |
598 | for (size_t i=r.begin(); i<r.end(); i++) |
599 | { |
600 | PrimRef& prim = prims[i]; |
601 | assert((prim.geomID() & SPLITS_MASK) == 0); |
602 | // FIXME: is there a better general heuristic ? |
603 | const float nf = ceilf(x: f*pinfo.size()*area(b: prim.bounds()) * invA); |
604 | unsigned int n = 4+min(a: (int)maxSplits-4, b: max(a: 1, b: (int)(nf))); |
605 | prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); |
606 | } |
607 | }); |
608 | |
609 | return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( |
610 | heuristic, |
611 | prims, |
612 | PrimInfoExtRange(0,pinfo.size(),extSize,pinfo), |
613 | createAlloc, |
614 | createNode, |
615 | updateNode, |
616 | CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf), |
617 | progressMonitor, |
618 | settings); |
619 | } |
620 | }; |
621 | |
622 | /* Open/Merge SAH builder that operates on an array of BuildRecords */ |
623 | struct BVHBuilderBinnedOpenMergeSAH |
624 | { |
625 | static const size_t NUM_OBJECT_BINS_HQ = 32; |
626 | typedef PrimInfoExtRange Set; |
627 | typedef BinSplit<NUM_OBJECT_BINS_HQ> Split; |
628 | typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord; |
629 | typedef GeneralBVHBuilder::Settings Settings; |
630 | |
631 | /*! special builder that propagates reduction over the tree */ |
632 | template< |
633 | typename ReductionTy, |
634 | typename BuildRef, |
635 | typename CreateAllocFunc, |
636 | typename CreateNodeFunc, |
637 | typename UpdateNodeFunc, |
638 | typename CreateLeafFunc, |
639 | typename NodeOpenerFunc, |
640 | typename ProgressMonitor> |
641 | |
642 | static ReductionTy build(CreateAllocFunc createAlloc, |
643 | CreateNodeFunc createNode, |
644 | UpdateNodeFunc updateNode, |
645 | const CreateLeafFunc& createLeaf, |
646 | NodeOpenerFunc nodeOpenerFunc, |
647 | ProgressMonitor progressMonitor, |
648 | BuildRef* prims, |
649 | const size_t extSize, |
650 | const PrimInfo& pinfo, |
651 | const Settings& settings) |
652 | { |
653 | typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic; |
654 | Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor); |
655 | |
656 | return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>( |
657 | heuristic, |
658 | prims, |
659 | PrimInfoExtRange(0,pinfo.size(),extSize,pinfo), |
660 | createAlloc, |
661 | createNode, |
662 | updateNode, |
663 | createLeaf, |
664 | progressMonitor, |
665 | settings); |
666 | } |
667 | }; |
668 | } |
669 | } |
670 | |