1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | // TODO: |
5 | // - adjust parallel build thresholds |
6 | // - openNodesBasedOnExtend should consider max extended size |
7 | |
8 | #pragma once |
9 | |
10 | #include "heuristic_binning.h" |
11 | #include "heuristic_spatial.h" |
12 | |
13 | /* stop opening of all bref.geomIDs are the same */ |
14 | #define EQUAL_GEOMID_STOP_CRITERIA 1 |
15 | |
16 | /* 10% spatial extend threshold */ |
17 | #define MAX_EXTEND_THRESHOLD 0.1f |
18 | |
19 | /* maximum is 8 children */ |
20 | #define MAX_OPENED_CHILD_NODES 8 |
21 | |
22 | /* open until all build refs are below threshold size in one step */ |
23 | #define USE_LOOP_OPENING 0 |
24 | |
25 | namespace embree |
26 | { |
27 | namespace isa |
28 | { |
29 | /*! Performs standard object binning */ |
30 | template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS> |
31 | struct HeuristicArrayOpenMergeSAH |
32 | { |
33 | typedef BinSplit<OBJECT_BINS> Split; |
34 | typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner; |
35 | |
36 | static const size_t PARALLEL_THRESHOLD = 1024; |
37 | static const size_t PARALLEL_FIND_BLOCK_SIZE = 512; |
38 | static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; |
39 | |
40 | static const size_t MOVE_STEP_SIZE = 64; |
41 | static const size_t CREATE_SPLITS_STEP_SIZE = 128; |
42 | |
43 | __forceinline HeuristicArrayOpenMergeSAH () |
44 | : prims0(nullptr) {} |
45 | |
46 | /*! remember prim array */ |
47 | __forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size) |
48 | : prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size) |
49 | { |
50 | assert(max_open_size <= MAX_OPENED_CHILD_NODES); |
51 | } |
52 | |
53 | struct OpenHeuristic |
54 | { |
55 | __forceinline ( const PrimInfoExtRange& pinfo ) |
56 | { |
57 | const Vec3fa diag = pinfo.geomBounds.size(); |
58 | dim = maxDim(a: diag); |
59 | assert(diag[dim] > 0.0f); |
60 | inv_max_extend = 1.0f / diag[dim]; |
61 | } |
62 | |
63 | __forceinline bool operator () ( PrimRef& prim ) const { |
64 | return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD; |
65 | } |
66 | |
67 | private: |
68 | size_t dim; |
69 | float inv_max_extend; |
70 | }; |
71 | |
72 | /*! compute extended ranges */ |
73 | __forceinline void (const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight) |
74 | { |
75 | assert(set.ext_range_size() > 0); |
76 | const float left_factor = (float)lweight / (lweight + rweight); |
77 | const size_t ext_range_size = set.ext_range_size(); |
78 | const size_t left_ext_range_size = min(a: (size_t)(floorf(x: left_factor * ext_range_size)),b: ext_range_size); |
79 | const size_t right_ext_range_size = ext_range_size - left_ext_range_size; |
80 | lset.set_ext_range(lset.end() + left_ext_range_size); |
81 | rset.set_ext_range(rset.end() + right_ext_range_size); |
82 | } |
83 | |
84 | /*! move ranges */ |
85 | __forceinline void (const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset) |
86 | { |
87 | const size_t left_ext_range_size = lset.ext_range_size(); |
88 | const size_t right_size = rset.size(); |
89 | |
90 | /* has the left child an extended range? */ |
91 | if (left_ext_range_size > 0) |
92 | { |
93 | /* left extended range smaller than right range ? */ |
94 | if (left_ext_range_size < right_size) |
95 | { |
96 | /* only move a small part of the beginning of the right range to the end */ |
97 | parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) { |
98 | for (size_t i=r.begin(); i<r.end(); i++) |
99 | prims0[i+right_size] = prims0[i]; |
100 | }); |
101 | } |
102 | else |
103 | { |
104 | /* no overlap, move entire right range to new location, can be made fully parallel */ |
105 | parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) { |
106 | for (size_t i=r.begin(); i<r.end(); i++) |
107 | prims0[i+left_ext_range_size] = prims0[i]; |
108 | }); |
109 | } |
110 | /* update right range */ |
111 | assert(rset.ext_end() + left_ext_range_size == set.ext_end()); |
112 | rset.move_right(plus: left_ext_range_size); |
113 | } |
114 | } |
115 | |
116 | /* estimates the extra space required when opening, and checks if all primitives are from same geometry */ |
117 | __noinline std::pair<size_t,bool> (const PrimInfoExtRange& set) |
118 | { |
119 | const OpenHeuristic heuristic(set); |
120 | const unsigned int geomID = prims0[set.begin()].geomID(); |
121 | |
122 | auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> { |
123 | bool commonGeomID = true; |
124 | size_t opens = 0; |
125 | for (size_t i=r.begin(); i<r.end(); i++) { |
126 | commonGeomID &= prims0[i].geomID() == geomID; |
127 | if (heuristic(prims0[i])) |
128 | opens += prims0[i].node.getN()-1; // coarse approximation |
129 | } |
130 | return std::pair<size_t,bool>(opens,commonGeomID); |
131 | }; |
132 | auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> { |
133 | return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second); |
134 | }; |
135 | return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction); |
136 | } |
137 | |
138 | // FIXME: should consider maximum available extended size |
139 | __noinline void (PrimInfoExtRange& set) |
140 | { |
141 | const OpenHeuristic heuristic(set); |
142 | const size_t ext_range_start = set.end(); |
143 | |
144 | if (false && set.size() < PARALLEL_THRESHOLD) |
145 | { |
146 | size_t = 0; |
147 | for (size_t i=set.begin(); i<set.end(); i++) |
148 | { |
149 | if (heuristic(prims0[i])) |
150 | { |
151 | PrimRef tmp[MAX_OPENED_CHILD_NODES]; |
152 | const size_t n = nodeOpenerFunc(prims0[i],tmp); |
153 | assert(extra_elements + n-1 <= set.ext_range_size()); |
154 | for (size_t j=0; j<n; j++) |
155 | set.extend_center2(tmp[j]); |
156 | |
157 | prims0[i] = tmp[0]; |
158 | for (size_t j=1; j<n; j++) |
159 | prims0[ext_range_start+extra_elements+j-1] = tmp[j]; |
160 | extra_elements += n-1; |
161 | } |
162 | } |
163 | set._end += extra_elements; |
164 | } |
165 | else |
166 | { |
167 | std::atomic<size_t> ext_elements; |
168 | ext_elements.store(i: 0); |
169 | PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo { |
170 | PrimInfo info(empty); |
171 | for (size_t i=r.begin(); i<r.end(); i++) |
172 | if (heuristic(prims0[i])) |
173 | { |
174 | PrimRef tmp[MAX_OPENED_CHILD_NODES]; |
175 | const size_t n = nodeOpenerFunc(prims0[i],tmp); |
176 | const size_t ID = ext_elements.fetch_add(i: n-1); |
177 | assert(ID + n-1 <= set.ext_range_size()); |
178 | |
179 | for (size_t j=0; j<n; j++) |
180 | info.extend_center2(tmp[j]); |
181 | |
182 | prims0[i] = tmp[0]; |
183 | for (size_t j=1; j<n; j++) |
184 | prims0[ext_range_start+ID+j-1] = tmp[j]; |
185 | } |
186 | return info; |
187 | }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); }); |
188 | set.centBounds.extend(other: info.centBounds); |
189 | assert(ext_elements.load() <= set.ext_range_size()); |
190 | set._end += ext_elements.load(); |
191 | } |
192 | } |
193 | |
194 | __noinline void (PrimInfoExtRange& set, const size_t est_new_elements) |
195 | { |
196 | const OpenHeuristic heuristic(set); |
197 | size_t = est_new_elements; |
198 | |
199 | while (next_iteration_extra_elements <= set.ext_range_size()) |
200 | { |
201 | next_iteration_extra_elements = 0; |
202 | size_t = 0; |
203 | const size_t ext_range_start = set.end(); |
204 | |
205 | for (size_t i=set.begin(); i<set.end(); i++) |
206 | { |
207 | if (heuristic(prims0[i])) |
208 | { |
209 | PrimRef tmp[MAX_OPENED_CHILD_NODES]; |
210 | const size_t n = nodeOpenerFunc(prims0[i],tmp); |
211 | assert(extra_elements + n-1 <= set.ext_range_size()); |
212 | for (size_t j=0;j<n;j++) |
213 | set.extend_center2(tmp[j]); |
214 | |
215 | prims0[i] = tmp[0]; |
216 | for (size_t j=1;j<n;j++) |
217 | prims0[ext_range_start+extra_elements+j-1] = tmp[j]; |
218 | extra_elements += n-1; |
219 | |
220 | for (size_t j=0; j<n; j++) |
221 | if (heuristic(tmp[j])) |
222 | next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation |
223 | |
224 | } |
225 | } |
226 | assert( extra_elements <= set.ext_range_size()); |
227 | set._end += extra_elements; |
228 | |
229 | for (size_t i=set.begin();i<set.end();i++) |
230 | assert(prims0[i].numPrimitives() > 0); |
231 | |
232 | if (unlikely(next_iteration_extra_elements == 0)) break; |
233 | } |
234 | } |
235 | |
236 | __noinline const Split (PrimInfoExtRange& set, const size_t logBlockSize) |
237 | { |
238 | /* single element */ |
239 | if (set.size() <= 1) |
240 | return Split(); |
241 | |
242 | /* disable opening if there is no overlap */ |
243 | const size_t D = 4; |
244 | if (unlikely(set.has_ext_range() && set.size() <= D)) |
245 | { |
246 | bool disjoint = true; |
247 | for (size_t j=set.begin(); j<set.end()-1; j++) { |
248 | for (size_t i=set.begin()+1; i<set.end(); i++) { |
249 | if (conjoint(prims0[j].bounds(),prims0[i].bounds())) { |
250 | disjoint = false; break; |
251 | } |
252 | } |
253 | } |
254 | if (disjoint) set.set_ext_range(set.end()); /* disables opening */ |
255 | } |
256 | |
257 | std::pair<size_t,bool> p(0,false); |
258 | |
259 | /* disable opening when all primitives are from same geometry */ |
260 | if (unlikely(set.has_ext_range())) |
261 | { |
262 | p = getProperties(set); |
263 | #if EQUAL_GEOMID_STOP_CRITERIA == 1 |
264 | if (p.second) set.set_ext_range(set.end()); /* disable opening */ |
265 | #endif |
266 | } |
267 | |
268 | /* open nodes when we have sufficient space available */ |
269 | if (unlikely(set.has_ext_range())) |
270 | { |
271 | #if USE_LOOP_OPENING == 1 |
272 | openNodesBasedOnExtendLoop(set,p.first); |
273 | #else |
274 | if (p.first <= set.ext_range_size()) |
275 | openNodesBasedOnExtend(set); |
276 | #endif |
277 | |
278 | /* disable opening when unsufficient space for opening a node available */ |
279 | if (set.ext_range_size() < max_open_size-1) |
280 | set.set_ext_range(set.end()); /* disable opening */ |
281 | } |
282 | |
283 | /* find best split */ |
284 | return object_find(set,logBlockSize); |
285 | } |
286 | |
287 | |
288 | /*! finds the best object split */ |
289 | __forceinline const Split (const PrimInfoExtRange& set,const size_t logBlockSize) |
290 | { |
291 | if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize); |
292 | else return parallel_object_find (set,logBlockSize); |
293 | } |
294 | |
295 | /*! finds the best object split */ |
296 | __noinline const Split (const PrimInfoExtRange& set, const size_t logBlockSize) |
297 | { |
298 | Binner binner(empty); |
299 | const BinMapping<OBJECT_BINS> mapping(set.centBounds); |
300 | binner.bin(prims0,set.begin(),set.end(),mapping); |
301 | return binner.best(mapping,logBlockSize); |
302 | } |
303 | |
304 | /*! finds the best split */ |
305 | __noinline const Split (const PrimInfoExtRange& set, const size_t logBlockSize) |
306 | { |
307 | Binner binner(empty); |
308 | const BinMapping<OBJECT_BINS> mapping(set.centBounds); |
309 | const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround |
310 | auto body = [&] (const range<size_t>& r) -> Binner { |
311 | Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; |
312 | }; |
313 | auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner { |
314 | Binner r = b0; r.merge(b1,_mapping.size()); return r; |
315 | }; |
316 | binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction); |
317 | return binner.best(mapping,logBlockSize); |
318 | } |
319 | |
320 | /*! array partitioning */ |
321 | __noinline void (const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset) |
322 | { |
323 | PrimInfoExtRange set = set_i; |
324 | |
325 | /* valid split */ |
326 | if (unlikely(!split.valid())) { |
327 | deterministic_order(set); |
328 | splitFallback(set,lset,rset); |
329 | return; |
330 | } |
331 | |
332 | std::pair<size_t,size_t> ext_weights(0,0); |
333 | |
334 | /* object split */ |
335 | if (likely(set.size() < PARALLEL_THRESHOLD)) |
336 | ext_weights = sequential_object_split(split,set,lset,rset); |
337 | else |
338 | ext_weights = parallel_object_split(split,set,lset,rset); |
339 | |
340 | /* if we have an extended range, set extended child ranges and move right split range */ |
341 | if (unlikely(set.has_ext_range())) |
342 | { |
343 | setExtentedRanges(set,lset,rset,lweight: ext_weights.first,rweight: ext_weights.second); |
344 | moveExtentedRange(set,lset,rset); |
345 | } |
346 | } |
347 | |
348 | /*! array partitioning */ |
349 | std::pair<size_t,size_t> (const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) |
350 | { |
351 | const size_t begin = set.begin(); |
352 | const size_t end = set.end(); |
353 | PrimInfo local_left(empty); |
354 | PrimInfo local_right(empty); |
355 | const unsigned int splitPos = split.pos; |
356 | const unsigned int splitDim = split.dim; |
357 | const unsigned int splitDimMask = (unsigned int)1 << splitDim; |
358 | |
359 | const vint4 vSplitPos(splitPos); |
360 | const vbool4 vSplitMask( (int)splitDimMask ); |
361 | |
362 | size_t center = serial_partitioning(prims0, |
363 | begin,end,local_left,local_right, |
364 | [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }, |
365 | [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); }); |
366 | |
367 | new (&lset) PrimInfoExtRange(begin,center,center,local_left); |
368 | new (&rset) PrimInfoExtRange(center,end,end,local_right); |
369 | assert(area(lset.geomBounds) >= 0.0f); |
370 | assert(area(rset.geomBounds) >= 0.0f); |
371 | return std::pair<size_t,size_t>(local_left.size(),local_right.size()); |
372 | } |
373 | |
374 | /*! array partitioning */ |
375 | __noinline std::pair<size_t,size_t> (const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) |
376 | { |
377 | const size_t begin = set.begin(); |
378 | const size_t end = set.end(); |
379 | PrimInfo left(empty); |
380 | PrimInfo right(empty); |
381 | const unsigned int splitPos = split.pos; |
382 | const unsigned int splitDim = split.dim; |
383 | const unsigned int splitDimMask = (unsigned int)1 << splitDim; |
384 | |
385 | const vint4 vSplitPos(splitPos); |
386 | const vbool4 vSplitMask( (int)splitDimMask ); |
387 | auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }; |
388 | |
389 | const size_t center = parallel_partitioning( |
390 | prims0,begin,end,EmptyTy(),left,right,isLeft, |
391 | [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); }, |
392 | [] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(other: pinfo1); }, |
393 | PARALLEL_PARTITION_BLOCK_SIZE); |
394 | |
395 | new (&lset) PrimInfoExtRange(begin,center,center,left); |
396 | new (&rset) PrimInfoExtRange(center,end,end,right); |
397 | assert(area(lset.geomBounds) >= 0.0f); |
398 | assert(area(rset.geomBounds) >= 0.0f); |
399 | |
400 | return std::pair<size_t,size_t>(left.size(),right.size()); |
401 | } |
402 | |
403 | void deterministic_order(const extended_range<size_t>& set) |
404 | { |
405 | /* required as parallel partition destroys original primitive order */ |
406 | std::sort(&prims0[set.begin()],&prims0[set.end()]); |
407 | } |
408 | |
409 | __forceinline void (const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) |
410 | { |
411 | const size_t begin = set.begin(); |
412 | const size_t end = set.end(); |
413 | const size_t center = (begin + end)/2; |
414 | |
415 | PrimInfo left(empty); |
416 | for (size_t i=begin; i<center; i++) |
417 | left.add_center2(prims0[i]); |
418 | |
419 | const size_t lweight = left.end; |
420 | |
421 | PrimInfo right(empty); |
422 | for (size_t i=center; i<end; i++) |
423 | right.add_center2(prims0[i]); |
424 | |
425 | const size_t rweight = right.end; |
426 | new (&lset) PrimInfoExtRange(begin,center,center,left); |
427 | new (&rset) PrimInfoExtRange(center,end,end,right); |
428 | |
429 | /* if we have an extended range */ |
430 | if (set.has_ext_range()) |
431 | { |
432 | setExtentedRanges(set,lset,rset,lweight,rweight); |
433 | moveExtentedRange(set,lset,rset); |
434 | } |
435 | } |
436 | |
437 | private: |
438 | PrimRef* const prims0; |
439 | const NodeOpenerFunc& nodeOpenerFunc; |
440 | size_t max_open_size; |
441 | }; |
442 | } |
443 | } |
444 | |