1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #define MBLUR_NUM_TEMPORAL_BINS 2 |
7 | #define MBLUR_NUM_OBJECT_BINS 32 |
8 | |
9 | #include "../bvh/bvh.h" |
10 | #include "../common/primref_mb.h" |
11 | #include "heuristic_binning_array_aligned.h" |
12 | #include "heuristic_timesplit_array.h" |
13 | |
14 | namespace embree |
15 | { |
16 | namespace isa |
17 | { |
18 | template<typename T> |
19 | struct SharedVector |
20 | { |
21 | __forceinline SharedVector() {} |
22 | |
23 | __forceinline SharedVector(T* ptr, size_t refCount = 1) |
24 | : prims(ptr), refCount(refCount) {} |
25 | |
26 | __forceinline void incRef() { |
27 | refCount++; |
28 | } |
29 | |
30 | __forceinline void decRef() |
31 | { |
32 | if (--refCount == 0) |
33 | delete prims; |
34 | } |
35 | |
36 | T* prims; |
37 | size_t refCount; |
38 | }; |
39 | |
40 | template<typename BuildRecord, int MAX_BRANCHING_FACTOR> |
41 | struct LocalChildListT |
42 | { |
43 | typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector; |
44 | |
45 | __forceinline LocalChildListT (const BuildRecord& record) |
46 | : numChildren(1), numSharedPrimVecs(1) |
47 | { |
48 | /* the local root will be freed in the ancestor where it was created (thus refCount is 2) */ |
49 | children[0] = record; |
50 | primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2); |
51 | } |
52 | |
53 | __forceinline ~LocalChildListT() |
54 | { |
55 | for (size_t i = 0; i < numChildren; i++) |
56 | primvecs[i]->decRef(); |
57 | } |
58 | |
59 | __forceinline BuildRecord& operator[] ( const size_t i ) { |
60 | return children[i]; |
61 | } |
62 | |
63 | __forceinline size_t size() const { |
64 | return numChildren; |
65 | } |
66 | |
67 | __forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector) |
68 | { |
69 | SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild]; |
70 | if (lrecord.prims.prims == bsharedPrimVec->prims) { |
71 | primvecs[bestChild] = bsharedPrimVec; |
72 | bsharedPrimVec->incRef(); |
73 | } |
74 | else { |
75 | primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims); |
76 | } |
77 | |
78 | if (rrecord.prims.prims == bsharedPrimVec->prims) { |
79 | primvecs[numChildren] = bsharedPrimVec; |
80 | bsharedPrimVec->incRef(); |
81 | } |
82 | else { |
83 | primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims); |
84 | } |
85 | bsharedPrimVec->decRef(); |
86 | new_vector.release(); |
87 | |
88 | children[bestChild] = lrecord; |
89 | children[numChildren] = rrecord; |
90 | numChildren++; |
91 | } |
92 | |
93 | public: |
94 | array_t<BuildRecord,MAX_BRANCHING_FACTOR> children; |
95 | array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs; |
96 | size_t numChildren; |
97 | |
98 | array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs; |
99 | size_t numSharedPrimVecs; |
100 | }; |
101 | |
102 | template<typename Mesh> |
103 | struct RecalculatePrimRef |
104 | { |
105 | Scene* scene; |
106 | |
107 | __forceinline RecalculatePrimRef (Scene* scene) |
108 | : scene(scene) {} |
109 | |
110 | __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const |
111 | { |
112 | const unsigned geomID = prim.geomID(); |
113 | const unsigned primID = prim.primID(); |
114 | const Mesh* mesh = scene->get<Mesh>(geomID); |
115 | const LBBox3fa lbounds = mesh->linearBounds(primID, time_range); |
116 | const range<int> tbounds = mesh->timeSegmentRange(time_range); |
117 | return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); |
118 | } |
119 | |
120 | // __noinline is workaround for ICC16 bug under MacOSX |
121 | __noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const |
122 | { |
123 | const unsigned geomID = prim.geomID(); |
124 | const unsigned primID = prim.primID(); |
125 | const Mesh* mesh = scene->get<Mesh>(geomID); |
126 | const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range); |
127 | const range<int> tbounds = mesh->timeSegmentRange(time_range); |
128 | return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); |
129 | } |
130 | |
131 | __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const { |
132 | return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range); |
133 | } |
134 | |
135 | // __noinline is workaround for ICC16 bug under MacOSX |
136 | __noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const { |
137 | return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range); |
138 | } |
139 | }; |
140 | |
141 | struct VirtualRecalculatePrimRef |
142 | { |
143 | Scene* scene; |
144 | |
145 | __forceinline VirtualRecalculatePrimRef (Scene* scene) |
146 | : scene(scene) {} |
147 | |
148 | __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const |
149 | { |
150 | const unsigned geomID = prim.geomID(); |
151 | const unsigned primID = prim.primID(); |
152 | const Geometry* mesh = scene->get(i: geomID); |
153 | const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range); |
154 | const range<int> tbounds = mesh->timeSegmentRange(range: time_range); |
155 | return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); |
156 | } |
157 | |
158 | __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const |
159 | { |
160 | const unsigned geomID = prim.geomID(); |
161 | const unsigned primID = prim.primID(); |
162 | const Geometry* mesh = scene->get(i: geomID); |
163 | const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range); |
164 | const range<int> tbounds = mesh->timeSegmentRange(range: time_range); |
165 | return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); |
166 | } |
167 | |
168 | __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const { |
169 | return scene->get(i: prim.geomID())->vlinearBounds(primID: prim.primID(), time_range); |
170 | } |
171 | |
172 | __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const { |
173 | return scene->get(i: prim.geomID())->vlinearBounds(space, primID: prim.primID(), time_range); |
174 | } |
175 | }; |
176 | |
177 | struct BVHBuilderMSMBlur |
178 | { |
179 | /*! settings for msmblur builder */ |
180 | struct Settings |
181 | { |
182 | /*! default settings */ |
183 | Settings () |
184 | : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8), |
185 | travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false), |
186 | singleThreadThreshold(1024) {} |
187 | |
188 | |
189 | Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold) |
190 | : branchingFactor(2), maxDepth(32), logBlockSize(bsr(v: sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), |
191 | travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold) |
192 | { |
193 | minLeafSize = min(a: minLeafSize,b: maxLeafSize); |
194 | } |
195 | |
196 | public: |
197 | size_t branchingFactor; //!< branching factor of BVH to build |
198 | size_t maxDepth; //!< maximum depth of BVH to build |
199 | size_t logBlockSize; //!< log2 of blocksize for SAH heuristic |
200 | size_t minLeafSize; //!< minimum size of a leaf |
201 | size_t maxLeafSize; //!< maximum size of a leaf |
202 | float travCost; //!< estimated cost of one traversal step |
203 | float intCost; //!< estimated cost of one primitive intersection |
204 | bool singleLeafTimeSegment; //!< split time to single time range |
205 | size_t singleThreadThreshold; //!< threshold when we switch to single threaded build |
206 | }; |
207 | |
208 | struct BuildRecord |
209 | { |
210 | public: |
211 | __forceinline BuildRecord () {} |
212 | |
213 | __forceinline BuildRecord (size_t depth) |
214 | : depth(depth) {} |
215 | |
216 | __forceinline BuildRecord (const SetMB& prims, size_t depth) |
217 | : depth(depth), prims(prims) {} |
218 | |
219 | __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) { |
220 | return a.prims.size() < b.prims.size(); |
221 | } |
222 | |
223 | __forceinline size_t size() const { |
224 | return prims.size(); |
225 | } |
226 | |
227 | public: |
228 | size_t depth; //!< Depth of the root of this subtree. |
229 | SetMB prims; //!< The list of primitives. |
230 | }; |
231 | |
232 | struct BuildRecordSplit : public BuildRecord |
233 | { |
234 | __forceinline BuildRecordSplit () {} |
235 | |
236 | __forceinline BuildRecordSplit (size_t depth) |
237 | : BuildRecord(depth) {} |
238 | |
239 | __forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split) |
240 | : BuildRecord(record), split(split) {} |
241 | |
242 | BinSplit<MBLUR_NUM_OBJECT_BINS> split; |
243 | }; |
244 | |
245 | template< |
246 | typename NodeRef, |
247 | typename RecalculatePrimRef, |
248 | typename Allocator, |
249 | typename CreateAllocFunc, |
250 | typename CreateNodeFunc, |
251 | typename SetNodeFunc, |
252 | typename CreateLeafFunc, |
253 | typename ProgressMonitor> |
254 | |
255 | class BuilderT |
256 | { |
257 | ALIGNED_CLASS_(16); |
258 | static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor |
259 | static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth |
260 | |
261 | typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D; |
262 | typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split; |
263 | typedef mvector<PrimRefMB>* PrimRefVector; |
264 | typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector; |
265 | typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList; |
266 | typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit; |
267 | |
268 | public: |
269 | |
270 | BuilderT (MemoryMonitorInterface* device, |
271 | const RecalculatePrimRef recalculatePrimRef, |
272 | const CreateAllocFunc createAlloc, |
273 | const CreateNodeFunc createNode, |
274 | const SetNodeFunc setNode, |
275 | const CreateLeafFunc createLeaf, |
276 | const ProgressMonitor progressMonitor, |
277 | const Settings& settings) |
278 | : cfg(settings), |
279 | heuristicObjectSplit(), |
280 | heuristicTemporalSplit(device, recalculatePrimRef), |
281 | recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf), |
282 | progressMonitor(progressMonitor) |
283 | { |
284 | if (cfg.branchingFactor > MAX_BRANCHING_FACTOR) |
285 | throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large" ); |
286 | } |
287 | |
288 | /*! finds the best split */ |
289 | const Split find(const SetMB& set) |
290 | { |
291 | /* first try standard object split */ |
292 | const Split object_split = heuristicObjectSplit.find(set,logBlockSize: cfg.logBlockSize); |
293 | const float object_split_sah = object_split.splitSAH(); |
294 | |
295 | /* test temporal splits only when object split was bad */ |
296 | const float leaf_sah = set.leafSAH(block_shift: cfg.logBlockSize); |
297 | if (object_split_sah < 0.50f*leaf_sah) |
298 | return object_split; |
299 | |
300 | /* do temporal splits only if the time range is big enough */ |
301 | if (set.time_range.size() > 1.01f/float(set.max_num_time_segments)) |
302 | { |
303 | const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize); |
304 | const float temporal_split_sah = temporal_split.splitSAH(); |
305 | |
306 | /* take temporal split if it improved SAH */ |
307 | if (temporal_split_sah < object_split_sah) |
308 | return temporal_split; |
309 | } |
310 | |
311 | return object_split; |
312 | } |
313 | |
314 | /*! array partitioning */ |
315 | __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset) |
316 | { |
317 | /* perform object split */ |
318 | if (likely(split.data == Split::SPLIT_OBJECT)) { |
319 | heuristicObjectSplit.split(split,set,lset,rset); |
320 | } |
321 | /* perform temporal split */ |
322 | else if (likely(split.data == Split::SPLIT_TEMPORAL)) { |
323 | return heuristicTemporalSplit.split(split,set,lset,rset); |
324 | } |
325 | /* perform fallback split */ |
326 | else if (unlikely(split.data == Split::SPLIT_FALLBACK)) { |
327 | set.deterministic_order(); |
328 | splitFallback(set,lset,rset); |
329 | } |
330 | /* split by geometry */ |
331 | else if (unlikely(split.data == Split::SPLIT_GEOMID)) { |
332 | set.deterministic_order(); |
333 | splitByGeometry(set,lset,rset); |
334 | } |
335 | else |
336 | assert(false); |
337 | |
338 | return std::unique_ptr<mvector<PrimRefMB>>(); |
339 | } |
340 | |
341 | /*! finds the best fallback split */ |
342 | __noinline Split findFallback(const SetMB& set) |
343 | { |
344 | /* split if primitives are not from same geometry */ |
345 | if (!sameGeometry(set)) |
346 | return Split(0.0f,Split::SPLIT_GEOMID); |
347 | |
348 | /* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */ |
349 | if (cfg.singleLeafTimeSegment) |
350 | { |
351 | /* test if one primitive has more than one time segment in time range, if so split time */ |
352 | for (size_t i=set.begin(); i<set.end(); i++) |
353 | { |
354 | const PrimRefMB& prim = (*set.prims)[i]; |
355 | const range<int> itime_range = prim.timeSegmentRange(range: set.time_range); |
356 | const int localTimeSegments = itime_range.size(); |
357 | assert(localTimeSegments > 0); |
358 | if (localTimeSegments > 1) { |
359 | const int icenter = (itime_range.begin() + itime_range.end())/2; |
360 | const float splitTime = prim.timeStep(i: icenter); |
361 | return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime); |
362 | } |
363 | } |
364 | } |
365 | |
366 | /* otherwise return fallback split */ |
367 | return Split(0.0f,Split::SPLIT_FALLBACK); |
368 | } |
369 | |
370 | /*! performs fallback split */ |
371 | void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset) |
372 | { |
373 | mvector<PrimRefMB>& prims = *set.prims; |
374 | |
375 | const size_t begin = set.begin(); |
376 | const size_t end = set.end(); |
377 | const size_t center = (begin + end + 1) / 2; |
378 | |
379 | PrimInfoMB linfo = empty; |
380 | for (size_t i=begin; i<center; i++) |
381 | linfo.add_primref(prim: prims[i]); |
382 | |
383 | PrimInfoMB rinfo = empty; |
384 | for (size_t i=center; i<end; i++) |
385 | rinfo.add_primref(prim: prims[i]); |
386 | |
387 | new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range); |
388 | new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range); |
389 | } |
390 | |
391 | /*! checks if all primitives are from the same geometry */ |
392 | __forceinline bool sameGeometry(const SetMB& set) |
393 | { |
394 | if (set.size() == 0) return true; |
395 | mvector<PrimRefMB>& prims = *set.prims; |
396 | const size_t begin = set.begin(); |
397 | const size_t end = set.end(); |
398 | unsigned int firstGeomID = prims[begin].geomID(); |
399 | for (size_t i=begin+1; i<end; i++) { |
400 | if (prims[i].geomID() != firstGeomID){ |
401 | return false; |
402 | } |
403 | } |
404 | return true; |
405 | } |
406 | |
407 | /* split by geometry ID */ |
408 | void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset) |
409 | { |
410 | assert(set.size() > 1); |
411 | |
412 | mvector<PrimRefMB>& prims = *set.prims; |
413 | const size_t begin = set.begin(); |
414 | const size_t end = set.end(); |
415 | |
416 | PrimInfoMB left(empty); |
417 | PrimInfoMB right(empty); |
418 | unsigned int geomID = prims[begin].geomID(); |
419 | size_t center = serial_partitioning(prims.data(),begin,end,left,right, |
420 | [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; }, |
421 | [ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); }); |
422 | |
423 | new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range); |
424 | new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range); |
425 | } |
426 | |
427 | const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc) |
428 | { |
429 | /* this should never occur but is a fatal error */ |
430 | if (in.depth > cfg.maxDepth) |
431 | throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached" ); |
432 | |
433 | /* replace already found split by fallback split */ |
434 | const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(set: in.prims)); |
435 | |
436 | /* special case when directly creating leaf without any splits that could shrink time_range */ |
437 | bool force_split = false; |
438 | if (current.depth == 1 && current.size() > 0) |
439 | { |
440 | BBox1f c = empty; |
441 | BBox1f p = current.prims.time_range; |
442 | for (size_t i=current.prims.begin(); i<current.prims.end(); i++) { |
443 | mvector<PrimRefMB>& prims = *current.prims.prims; |
444 | c.extend(other: prims[i].time_range); |
445 | } |
446 | |
447 | force_split = c.lower > p.lower || c.upper < p.upper; |
448 | } |
449 | |
450 | /* create leaf for few primitives */ |
451 | if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split) |
452 | return createLeaf(current,alloc); |
453 | |
454 | /* fill all children by always splitting the largest one */ |
455 | bool hasTimeSplits = false; |
456 | NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; |
457 | LocalChildListSplit children(current); |
458 | |
459 | do { |
460 | /* find best child with largest bounding box area */ |
461 | size_t bestChild = -1; |
462 | size_t bestSize = 0; |
463 | for (size_t i=0; i<children.size(); i++) |
464 | { |
465 | /* ignore leaves as they cannot get split */ |
466 | if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split) |
467 | continue; |
468 | |
469 | force_split = false; |
470 | |
471 | /* remember child with largest size */ |
472 | if (children[i].size() > bestSize) { |
473 | bestSize = children[i].size(); |
474 | bestChild = i; |
475 | } |
476 | } |
477 | if (bestChild == -1) break; |
478 | |
479 | /* perform best found split */ |
480 | BuildRecordSplit& brecord = children[bestChild]; |
481 | BuildRecordSplit lrecord(current.depth+1); |
482 | BuildRecordSplit rrecord(current.depth+1); |
483 | std::unique_ptr<mvector<PrimRefMB>> new_vector = split(split: brecord.split,set: brecord.prims,lset&: lrecord.prims,rset&: rrecord.prims); |
484 | hasTimeSplits |= new_vector != nullptr; |
485 | |
486 | /* find new splits */ |
487 | lrecord.split = findFallback(set: lrecord.prims); |
488 | rrecord.split = findFallback(set: rrecord.prims); |
489 | children.split(bestChild,lrecord,rrecord,new_vector: std::move(new_vector)); |
490 | |
491 | } while (children.size() < cfg.branchingFactor); |
492 | |
493 | /* detect time_ranges that have shrunken */ |
494 | for (size_t i=0; i<children.size(); i++) { |
495 | const BBox1f c = children[i].prims.time_range; |
496 | const BBox1f p = in.prims.time_range; |
497 | hasTimeSplits |= c.lower > p.lower || c.upper < p.upper; |
498 | } |
499 | |
500 | /* create node */ |
501 | auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits); |
502 | |
503 | /* recurse into each child and perform reduction */ |
504 | LBBox3fa gbounds = empty; |
505 | for (size_t i=0; i<children.size(); i++) { |
506 | values[i] = createLargeLeaf(in: children[i],alloc); |
507 | gbounds.extend(other: values[i].lbounds); |
508 | } |
509 | |
510 | setNode(current,children.children.data(),node,values,children.numChildren); |
511 | |
512 | /* calculate geometry bounds of this node */ |
513 | if (hasTimeSplits) |
514 | return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range); |
515 | else |
516 | return NodeRecordMB4D(node,gbounds,current.prims.time_range); |
517 | } |
518 | |
519 | const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel) |
520 | { |
521 | /* get thread local allocator */ |
522 | if (!alloc) |
523 | alloc = createAlloc(); |
524 | |
525 | /* call memory monitor function to signal progress */ |
526 | if (toplevel && current.size() <= cfg.singleThreadThreshold) |
527 | progressMonitor(current.size()); |
528 | |
529 | /*! find best split */ |
530 | const Split csplit = find(set: current.prims); |
531 | |
532 | /*! compute leaf and split cost */ |
533 | const float leafSAH = cfg.intCost*current.prims.leafSAH(block_shift: cfg.logBlockSize); |
534 | const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH(); |
535 | assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0))); |
536 | |
537 | /*! create a leaf node when threshold reached or SAH tells us to stop */ |
538 | if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) { |
539 | current.prims.deterministic_order(); |
540 | return createLargeLeaf(in: current,alloc); |
541 | } |
542 | |
543 | /*! perform initial split */ |
544 | SetMB lprims,rprims; |
545 | std::unique_ptr<mvector<PrimRefMB>> new_vector = split(split: csplit,set: current.prims,lset&: lprims,rset&: rprims); |
546 | bool hasTimeSplits = new_vector != nullptr; |
547 | NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; |
548 | LocalChildList children(current); |
549 | { |
550 | BuildRecord lrecord(lprims,current.depth+1); |
551 | BuildRecord rrecord(rprims,current.depth+1); |
552 | children.split(bestChild: 0,lrecord,rrecord,new_vector: std::move(new_vector)); |
553 | } |
554 | |
555 | /*! split until node is full or SAH tells us to stop */ |
556 | while (children.size() < cfg.branchingFactor) |
557 | { |
558 | /*! find best child to split */ |
559 | float bestArea = neg_inf; |
560 | ssize_t bestChild = -1; |
561 | for (size_t i=0; i<children.size(); i++) |
562 | { |
563 | if (children[i].size() <= cfg.minLeafSize) continue; |
564 | if (expectedApproxHalfArea(box: children[i].prims.geomBounds) > bestArea) { |
565 | bestChild = i; bestArea = expectedApproxHalfArea(box: children[i].prims.geomBounds); |
566 | } |
567 | } |
568 | if (bestChild == -1) break; |
569 | |
570 | /* perform split */ |
571 | BuildRecord& brecord = children[bestChild]; |
572 | BuildRecord lrecord(current.depth+1); |
573 | BuildRecord rrecord(current.depth+1); |
574 | Split csplit = find(set: brecord.prims); |
575 | std::unique_ptr<mvector<PrimRefMB>> new_vector = split(split: csplit,set: brecord.prims,lset&: lrecord.prims,rset&: rrecord.prims); |
576 | hasTimeSplits |= new_vector != nullptr; |
577 | children.split(bestChild,lrecord,rrecord,new_vector: std::move(new_vector)); |
578 | } |
579 | |
580 | /* detect time_ranges that have shrunken */ |
581 | for (size_t i=0; i<children.size(); i++) { |
582 | const BBox1f c = children[i].prims.time_range; |
583 | const BBox1f p = current.prims.time_range; |
584 | hasTimeSplits |= c.lower > p.lower || c.upper < p.upper; |
585 | } |
586 | |
587 | /* sort buildrecords for simpler shadow ray traversal */ |
588 | //std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !! |
589 | |
590 | /*! create an inner node */ |
591 | auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits); |
592 | LBBox3fa gbounds = empty; |
593 | |
594 | /* spawn tasks */ |
595 | if (unlikely(current.size() > cfg.singleThreadThreshold)) |
596 | { |
597 | /*! parallel_for is faster than spawing sub-tasks */ |
598 | parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) { |
599 | for (size_t i=r.begin(); i<r.end(); i++) { |
600 | values[i] = recurse(current: children[i],alloc: nullptr,toplevel: true); |
601 | _mm_mfence(); // to allow non-temporal stores during build |
602 | } |
603 | }); |
604 | |
605 | /*! merge bounding boxes */ |
606 | for (size_t i=0; i<children.size(); i++) |
607 | gbounds.extend(other: values[i].lbounds); |
608 | } |
609 | /* recurse into each child */ |
610 | else |
611 | { |
612 | //for (size_t i=0; i<children.size(); i++) |
613 | for (ssize_t i=children.size()-1; i>=0; i--) { |
614 | values[i] = recurse(current: children[i],alloc,toplevel: false); |
615 | gbounds.extend(other: values[i].lbounds); |
616 | } |
617 | } |
618 | |
619 | setNode(current,children.children.data(),node,values,children.numChildren); |
620 | |
621 | /* calculate geometry bounds of this node */ |
622 | if (unlikely(hasTimeSplits)) |
623 | return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range); |
624 | else |
625 | return NodeRecordMB4D(node,gbounds,current.prims.time_range); |
626 | } |
627 | |
628 | /*! builder entry function */ |
629 | __forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo) |
630 | { |
631 | const SetMB set(pinfo,&prims); |
632 | auto ret = recurse(current: BuildRecord(set,1),alloc: nullptr,toplevel: true); |
633 | _mm_mfence(); // to allow non-temporal stores during build |
634 | return ret; |
635 | } |
636 | |
637 | private: |
638 | Settings cfg; |
639 | HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit; |
640 | HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit; |
641 | const RecalculatePrimRef recalculatePrimRef; |
642 | const CreateAllocFunc createAlloc; |
643 | const CreateNodeFunc createNode; |
644 | const SetNodeFunc setNode; |
645 | const CreateLeafFunc createLeaf; |
646 | const ProgressMonitor progressMonitor; |
647 | }; |
648 | |
649 | template<typename NodeRef, |
650 | typename RecalculatePrimRef, |
651 | typename CreateAllocFunc, |
652 | typename CreateNodeFunc, |
653 | typename SetNodeFunc, |
654 | typename CreateLeafFunc, |
655 | typename ProgressMonitorFunc> |
656 | |
657 | static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims, |
658 | const PrimInfoMB& pinfo, |
659 | MemoryMonitorInterface* device, |
660 | const RecalculatePrimRef recalculatePrimRef, |
661 | const CreateAllocFunc createAlloc, |
662 | const CreateNodeFunc createNode, |
663 | const SetNodeFunc setNode, |
664 | const CreateLeafFunc createLeaf, |
665 | const ProgressMonitorFunc progressMonitor, |
666 | const Settings& settings) |
667 | { |
668 | typedef BuilderT< |
669 | NodeRef, |
670 | RecalculatePrimRef, |
671 | decltype(createAlloc()), |
672 | CreateAllocFunc, |
673 | CreateNodeFunc, |
674 | SetNodeFunc, |
675 | CreateLeafFunc, |
676 | ProgressMonitorFunc> Builder; |
677 | |
678 | Builder builder(device, |
679 | recalculatePrimRef, |
680 | createAlloc, |
681 | createNode, |
682 | setNode, |
683 | createLeaf, |
684 | progressMonitor, |
685 | settings); |
686 | |
687 | |
688 | return builder(prims,pinfo); |
689 | } |
690 | }; |
691 | } |
692 | } |
693 | |