1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "priminfo.h"
7#include "../../common/algorithms/parallel_reduce.h"
8#include "../../common/algorithms/parallel_partition.h"
9
10namespace embree
11{
12 namespace isa
13 {
14 /*! mapping into bins */
15 template<size_t BINS>
16 struct BinMapping
17 {
18 public:
19 __forceinline BinMapping() {}
20
21 /*! calculates the mapping */
22 __forceinline BinMapping(size_t N, const BBox3fa& centBounds)
23 {
24 num = min(a: BINS,b: size_t(4.0f + 0.05f*N));
25 assert(num >= 1);
26 const vfloat4 eps = 1E-34f;
27 const vfloat4 diag = max(a: eps, b: (vfloat4) centBounds.size());
28 scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f));
29 ofs = (vfloat4) centBounds.lower;
30 }
31
32 /*! calculates the mapping */
33 __forceinline BinMapping(const BBox3fa& centBounds)
34 {
35 num = BINS;
36 const vfloat4 eps = 1E-34f;
37 const vfloat4 diag = max(a: eps, b: (vfloat4) centBounds.size());
38 scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f));
39 ofs = (vfloat4) centBounds.lower;
40 }
41
42 /*! calculates the mapping */
43 template<typename PrimInfo>
44 __forceinline BinMapping(const PrimInfo& pinfo)
45 {
46 const vfloat4 eps = 1E-34f;
47 num = min(a: BINS,b: size_t(4.0f + 0.05f*pinfo.size()));
48 const vfloat4 diag = max(a: eps,b: (vfloat4) pinfo.centBounds.size());
49 scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f));
50 ofs = (vfloat4) pinfo.centBounds.lower;
51 }
52
53 /*! returns number of bins */
54 __forceinline size_t size() const { return num; }
55
56 /*! slower but safe binning */
57 __forceinline Vec3ia bin(const Vec3fa& p) const
58 {
59 const vint4 i = floori(a: (vfloat4(p)-ofs)*scale);
60#if 1
61 assert(i[0] >= 0 && (size_t)i[0] < num);
62 assert(i[1] >= 0 && (size_t)i[1] < num);
63 assert(i[2] >= 0 && (size_t)i[2] < num);
64 return Vec3ia(i);
65#else
66 return Vec3ia(clamp(i,vint4(0),vint4(num-1)));
67#endif
68 }
69
70 /*! faster but unsafe binning */
71 __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const {
72 return Vec3ia(floori(a: (vfloat4(p)-ofs)*scale));
73 }
74
75 /*! faster but unsafe binning */
76 template<typename PrimRef>
77 __forceinline Vec3ia bin_unsafe(const PrimRef& p) const {
78 return bin_unsafe(p.binCenter());
79 }
80
81 /*! faster but unsafe binning */
82 template<typename PrimRef, typename BinBoundsAndCenter>
83 __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const {
84 return bin_unsafe(binBoundsAndCenter.binCenter(p));
85 }
86
87 template<typename PrimRef>
88 __forceinline bool bin_unsafe(const PrimRef& ref,
89 const vint4& vSplitPos,
90 const vbool4& splitDimMask) const // FIXME: rename to isLeft
91 {
92 return any(b: ((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask);
93 }
94 /*! calculates left spatial position of bin */
95 __forceinline float pos(const size_t bin, const size_t dim) const {
96 return madd(a: float(bin),b: 1.0f / scale[dim],c: ofs[dim]);
97 }
98
99 /*! returns true if the mapping is invalid in some dimension */
100 __forceinline bool invalid(const size_t dim) const {
101 return scale[dim] == 0.0f;
102 }
103
104 /*! stream output */
105 friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) {
106 return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}";
107 }
108
109 public:
110 size_t num;
111 vfloat4 ofs,scale; //!< linear function that maps to bin ID
112 };
113
114 /*! stores all information to perform some split */
115 template<size_t BINS>
116 struct BinSplit
117 {
118 enum
119 {
120 SPLIT_OBJECT = 0,
121 SPLIT_FALLBACK = 1,
122 SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already
123 SPLIT_TEMPORAL = 2,
124 SPLIT_GEOMID = 3,
125 };
126
127 /*! construct an invalid split by default */
128 __forceinline BinSplit()
129 : sah(inf), dim(-1), pos(0), data(0) {}
130
131 __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0)
132 : sah(sah), dim(dim), fpos(fpos), data(data) {}
133
134 /*! constructs specified split */
135 __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping)
136 : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {}
137
138 /*! tests if this split is valid */
139 __forceinline bool valid() const { return dim != -1; }
140
141 /*! calculates surface area heuristic for performing the split */
142 __forceinline float splitSAH() const { return sah; }
143
144 /*! stream output */
145 friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) {
146 return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}";
147 }
148
149 public:
150 float sah; //!< SAH cost of the split
151 int dim; //!< split dimension
152 union { int pos; float fpos; }; //!< bin index for splitting
153 unsigned int data; //!< extra optional split data
154 BinMapping<BINS> mapping; //!< mapping into bins
155 };
156
157 /*! stores extended information about the split */
158 template<typename BBox>
159 struct SplitInfoT
160 {
161
162 __forceinline SplitInfoT () {}
163
164 __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds)
165 : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {}
166
167 public:
168 size_t leftCount,rightCount;
169 BBox leftBounds,rightBounds;
170 };
171
172 typedef SplitInfoT<BBox3fa> SplitInfo;
173 typedef SplitInfoT<LBBox3fa> SplitInfo2;
174
175 /*! stores all binning information */
176 template<size_t BINS, typename PrimRef, typename BBox>
177 struct __aligned(64) BinInfoT
178 {
179 typedef BinSplit<BINS> Split;
180 typedef vbool4 vbool;
181 typedef vint4 vint;
182 typedef vfloat4 vfloat;
183
184 __forceinline BinInfoT() {
185 }
186
187 __forceinline BinInfoT(EmptyTy) {
188 clear();
189 }
190
191 /*! bin access function */
192 __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; }
193 __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; }
194
195 __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; }
196 __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; }
197
198 __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; }
199 __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; }
200
201 /*! clears the bin info */
202 __forceinline void clear()
203 {
204 for (size_t i=0; i<BINS; i++) {
205 bounds(i,0) = bounds(i,1) = bounds(i,2) = empty;
206 counts(i) = vuint4(zero);
207 }
208 }
209
210 /*! bins an array of primitives */
211 __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping)
212 {
213 if (unlikely(N == 0)) return;
214 size_t i;
215 for (i=0; i<N-1; i+=2)
216 {
217 /*! map even and odd primitive to bin */
218 BBox prim0; Vec3fa center0;
219 prims[i+0].binBoundsAndCenter(prim0,center0);
220 const vint4 bin0 = (vint4)mapping.bin(center0);
221
222 BBox prim1; Vec3fa center1;
223 prims[i+1].binBoundsAndCenter(prim1,center1);
224 const vint4 bin1 = (vint4)mapping.bin(center1);
225
226 /*! increase bounds for bins for even primitive */
227 const unsigned int b00 = extract<0>(b: bin0); bounds(b00,0).extend(prim0);
228 const unsigned int b01 = extract<1>(b: bin0); bounds(b01,1).extend(prim0);
229 const unsigned int b02 = extract<2>(b: bin0); bounds(b02,2).extend(prim0);
230 const unsigned int s0 = (unsigned int)prims[i+0].size();
231 counts(b00,0)+=s0;
232 counts(b01,1)+=s0;
233 counts(b02,2)+=s0;
234
235 /*! increase bounds of bins for odd primitive */
236 const unsigned int b10 = extract<0>(b: bin1); bounds(b10,0).extend(prim1);
237 const unsigned int b11 = extract<1>(b: bin1); bounds(b11,1).extend(prim1);
238 const unsigned int b12 = extract<2>(b: bin1); bounds(b12,2).extend(prim1);
239 const unsigned int s1 = (unsigned int)prims[i+1].size();
240 counts(b10,0)+=s1;
241 counts(b11,1)+=s1;
242 counts(b12,2)+=s1;
243 }
244 /*! for uneven number of primitives */
245 if (i < N)
246 {
247 /*! map primitive to bin */
248 BBox prim0; Vec3fa center0;
249 prims[i].binBoundsAndCenter(prim0,center0);
250 const vint4 bin0 = (vint4)mapping.bin(center0);
251
252 /*! increase bounds of bins */
253 const unsigned int s0 = (unsigned int)prims[i].size();
254 const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
255 const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
256 const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
257 }
258 }
259
260 /*! bins an array of primitives */
261 template<typename BinBoundsAndCenter>
262 __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
263 {
264 if (N == 0) return;
265
266 size_t i;
267 for (i=0; i<N-1; i+=2)
268 {
269 /*! map even and odd primitive to bin */
270 BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
271 const vint4 bin0 = (vint4)mapping.bin(center0);
272 BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);
273 const vint4 bin1 = (vint4)mapping.bin(center1);
274
275 /*! increase bounds for bins for even primitive */
276 const unsigned int s0 = prims[i+0].size();
277 const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
278 const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
279 const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
280
281 /*! increase bounds of bins for odd primitive */
282 const unsigned int s1 = prims[i+1].size();
283 const int b10 = extract<0>(b: bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1);
284 const int b11 = extract<1>(b: bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1);
285 const int b12 = extract<2>(b: bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1);
286 }
287
288 /*! for uneven number of primitives */
289 if (i < N)
290 {
291 /*! map primitive to bin */
292 BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
293 const vint4 bin0 = (vint4)mapping.bin(center0);
294
295 /*! increase bounds of bins */
296 const unsigned int s0 = prims[i+0].size();
297 const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
298 const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
299 const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
300 }
301 }
302
303 __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) {
304 bin(prims+begin,end-begin,mapping);
305 }
306
307 template<typename BinBoundsAndCenter>
308 __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) {
309 bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter);
310 }
311
312 /*! merges in other binning information */
313 __forceinline void merge (const BinInfoT& other, size_t numBins)
314 {
315
316 for (size_t i=0; i<numBins; i++)
317 {
318 counts(i) += other.counts(i);
319 bounds(i,0).extend(other.bounds(i,0));
320 bounds(i,1).extend(other.bounds(i,1));
321 bounds(i,2).extend(other.bounds(i,2));
322 }
323 }
324
325 /*! reduces binning information */
326 static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS)
327 {
328 BinInfoT c;
329 for (size_t i=0; i<numBins; i++)
330 {
331 c.counts(i) = a.counts(i)+b.counts(i);
332 c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0));
333 c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1));
334 c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2));
335 }
336 return c;
337 }
338
339 /*! finds the best split by scanning binning information */
340 __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const
341 {
342 /* sweep from right to left and compute parallel prefix of merged bounds */
343 vfloat4 rAreas[BINS];
344 vuint4 rCounts[BINS];
345 vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
346 for (size_t i=mapping.size()-1; i>0; i--)
347 {
348 count += counts(i);
349 rCounts[i] = count;
350 bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
351 by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
352 bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
353 rAreas[i][3] = 0.0f;
354 }
355 /* sweep from left to right and compute SAH */
356 vuint4 blocks_add = (1 << blocks_shift)-1;
357 vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
358 count = 0; bx = empty; by = empty; bz = empty;
359 for (size_t i=1; i<mapping.size(); i++, ii+=1)
360 {
361 count += counts(i-1);
362 bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
363 by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
364 bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
365 const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
366 const vfloat4 rArea = rAreas[i];
367 const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions.
368 const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
369 const vfloat4 sah = madd(a: lArea,b: vfloat4(lCount),c: rArea*vfloat4(rCount));
370 //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
371
372 vbestPos = select(m: sah < vbestSAH,t: ii ,f: vbestPos);
373 vbestSAH = select(m: sah < vbestSAH,t: sah,f: vbestSAH);
374 }
375
376 /* find best dimension */
377 float bestSAH = inf;
378 int bestDim = -1;
379 int bestPos = 0;
380 for (int dim=0; dim<3; dim++)
381 {
382 /* ignore zero sized dimensions */
383 if (unlikely(mapping.invalid(dim)))
384 continue;
385
386 /* test if this is a better dimension */
387 if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
388 bestDim = dim;
389 bestPos = vbestPos[dim];
390 bestSAH = vbestSAH[dim];
391 }
392 }
393 return Split(bestSAH,bestDim,bestPos,mapping);
394 }
395
396 /*! calculates extended split information */
397 __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
398 {
399 if (split.dim == -1) {
400 new (&info) SplitInfoT<BBox>(0,empty,0,empty);
401 return;
402 }
403
404 size_t leftCount = 0;
405 BBox leftBounds = empty;
406 for (size_t i=0; i<(size_t)split.pos; i++) {
407 leftCount += counts(i,split.dim);
408 leftBounds.extend(bounds(i,split.dim));
409 }
410 size_t rightCount = 0;
411 BBox rightBounds = empty;
412 for (size_t i=split.pos; i<mapping.size(); i++) {
413 rightCount += counts(i,split.dim);
414 rightBounds.extend(bounds(i,split.dim));
415 }
416 new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds);
417 }
418
419 /*! gets the number of primitives left of the split */
420 __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const
421 {
422 if (unlikely(split.dim == -1)) return -1;
423
424 size_t leftCount = 0;
425 for (size_t i = 0; i < (size_t)split.pos; i++) {
426 leftCount += counts(i, split.dim);
427 }
428 return leftCount;
429 }
430
431 /*! gets the number of primitives right of the split */
432 __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const
433 {
434 if (unlikely(split.dim == -1)) return -1;
435
436 size_t rightCount = 0;
437 for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
438 rightCount += counts(i, split.dim);
439 }
440 return rightCount;
441 }
442
443 private:
444 BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
445 vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins
446 };
447 }
448
449 template<typename BinInfoT, typename BinMapping, typename PrimRef>
450 __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping)
451 {
452 if (likely(end-begin < parallelThreshold)) {
453 binner.bin(prims,begin,end,mapping);
454 } else {
455 binner = parallel_reduce(begin,end,blockSize,binner,
456 [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
457 [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
458 }
459 }
460
461 template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
462 __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
463 {
464 if (likely(end-begin < parallelThreshold)) {
465 binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
466 } else {
467 binner = parallel_reduce(begin,end,blockSize,binner,
468 [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
469 [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
470 }
471 }
472
473 template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef>
474 __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping)
475 {
476 if (!parallel) {
477 binner.bin(prims,begin,end,mapping);
478 } else {
479 binner = parallel_reduce(begin,end,blockSize,binner,
480 [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
481 [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
482 }
483 }
484
485 template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
486 __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
487 {
488 if (!parallel) {
489 binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
490 } else {
491 binner = parallel_reduce(begin,end,blockSize,binner,
492 [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
493 [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
494 }
495 }
496}
497

source code of qtquick3d/src/3rdparty/embree/kernels/builders/heuristic_binning.h