1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "priminfo.h" |
7 | #include "../../common/algorithms/parallel_reduce.h" |
8 | #include "../../common/algorithms/parallel_partition.h" |
9 | |
10 | namespace embree |
11 | { |
12 | namespace isa |
13 | { |
14 | /*! mapping into bins */ |
15 | template<size_t BINS> |
16 | struct BinMapping |
17 | { |
18 | public: |
19 | __forceinline BinMapping() {} |
20 | |
21 | /*! calculates the mapping */ |
22 | __forceinline BinMapping(size_t N, const BBox3fa& centBounds) |
23 | { |
24 | num = min(a: BINS,b: size_t(4.0f + 0.05f*N)); |
25 | assert(num >= 1); |
26 | const vfloat4 eps = 1E-34f; |
27 | const vfloat4 diag = max(a: eps, b: (vfloat4) centBounds.size()); |
28 | scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f)); |
29 | ofs = (vfloat4) centBounds.lower; |
30 | } |
31 | |
32 | /*! calculates the mapping */ |
33 | __forceinline BinMapping(const BBox3fa& centBounds) |
34 | { |
35 | num = BINS; |
36 | const vfloat4 eps = 1E-34f; |
37 | const vfloat4 diag = max(a: eps, b: (vfloat4) centBounds.size()); |
38 | scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f)); |
39 | ofs = (vfloat4) centBounds.lower; |
40 | } |
41 | |
42 | /*! calculates the mapping */ |
43 | template<typename PrimInfo> |
44 | __forceinline BinMapping(const PrimInfo& pinfo) |
45 | { |
46 | const vfloat4 eps = 1E-34f; |
47 | num = min(a: BINS,b: size_t(4.0f + 0.05f*pinfo.size())); |
48 | const vfloat4 diag = max(a: eps,b: (vfloat4) pinfo.centBounds.size()); |
49 | scale = select(m: diag > eps,t: vfloat4(0.99f*num)/diag,f: vfloat4(0.0f)); |
50 | ofs = (vfloat4) pinfo.centBounds.lower; |
51 | } |
52 | |
53 | /*! returns number of bins */ |
54 | __forceinline size_t size() const { return num; } |
55 | |
56 | /*! slower but safe binning */ |
57 | __forceinline Vec3ia bin(const Vec3fa& p) const |
58 | { |
59 | const vint4 i = floori(a: (vfloat4(p)-ofs)*scale); |
60 | #if 1 |
61 | assert(i[0] >= 0 && (size_t)i[0] < num); |
62 | assert(i[1] >= 0 && (size_t)i[1] < num); |
63 | assert(i[2] >= 0 && (size_t)i[2] < num); |
64 | return Vec3ia(i); |
65 | #else |
66 | return Vec3ia(clamp(i,vint4(0),vint4(num-1))); |
67 | #endif |
68 | } |
69 | |
70 | /*! faster but unsafe binning */ |
71 | __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const { |
72 | return Vec3ia(floori(a: (vfloat4(p)-ofs)*scale)); |
73 | } |
74 | |
75 | /*! faster but unsafe binning */ |
76 | template<typename PrimRef> |
77 | __forceinline Vec3ia bin_unsafe(const PrimRef& p) const { |
78 | return bin_unsafe(p.binCenter()); |
79 | } |
80 | |
81 | /*! faster but unsafe binning */ |
82 | template<typename PrimRef, typename BinBoundsAndCenter> |
83 | __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const { |
84 | return bin_unsafe(binBoundsAndCenter.binCenter(p)); |
85 | } |
86 | |
87 | template<typename PrimRef> |
88 | __forceinline bool bin_unsafe(const PrimRef& ref, |
89 | const vint4& vSplitPos, |
90 | const vbool4& splitDimMask) const // FIXME: rename to isLeft |
91 | { |
92 | return any(b: ((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask); |
93 | } |
94 | /*! calculates left spatial position of bin */ |
95 | __forceinline float pos(const size_t bin, const size_t dim) const { |
96 | return madd(a: float(bin),b: 1.0f / scale[dim],c: ofs[dim]); |
97 | } |
98 | |
99 | /*! returns true if the mapping is invalid in some dimension */ |
100 | __forceinline bool invalid(const size_t dim) const { |
101 | return scale[dim] == 0.0f; |
102 | } |
103 | |
104 | /*! stream output */ |
105 | friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) { |
106 | return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}" ; |
107 | } |
108 | |
109 | public: |
110 | size_t num; |
111 | vfloat4 ofs,scale; //!< linear function that maps to bin ID |
112 | }; |
113 | |
114 | /*! stores all information to perform some split */ |
115 | template<size_t BINS> |
116 | struct BinSplit |
117 | { |
118 | enum |
119 | { |
120 | SPLIT_OBJECT = 0, |
121 | SPLIT_FALLBACK = 1, |
122 | SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already |
123 | SPLIT_TEMPORAL = 2, |
124 | SPLIT_GEOMID = 3, |
125 | }; |
126 | |
127 | /*! construct an invalid split by default */ |
128 | __forceinline BinSplit() |
129 | : sah(inf), dim(-1), pos(0), data(0) {} |
130 | |
131 | __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0) |
132 | : sah(sah), dim(dim), fpos(fpos), data(data) {} |
133 | |
134 | /*! constructs specified split */ |
135 | __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping) |
136 | : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {} |
137 | |
138 | /*! tests if this split is valid */ |
139 | __forceinline bool valid() const { return dim != -1; } |
140 | |
141 | /*! calculates surface area heuristic for performing the split */ |
142 | __forceinline float splitSAH() const { return sah; } |
143 | |
144 | /*! stream output */ |
145 | friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) { |
146 | return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}" ; |
147 | } |
148 | |
149 | public: |
150 | float sah; //!< SAH cost of the split |
151 | int dim; //!< split dimension |
152 | union { int pos; float fpos; }; //!< bin index for splitting |
153 | unsigned int data; //!< extra optional split data |
154 | BinMapping<BINS> mapping; //!< mapping into bins |
155 | }; |
156 | |
157 | /*! stores extended information about the split */ |
158 | template<typename BBox> |
159 | struct SplitInfoT |
160 | { |
161 | |
162 | __forceinline SplitInfoT () {} |
163 | |
164 | __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds) |
165 | : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {} |
166 | |
167 | public: |
168 | size_t leftCount,rightCount; |
169 | BBox leftBounds,rightBounds; |
170 | }; |
171 | |
172 | typedef SplitInfoT<BBox3fa> SplitInfo; |
173 | typedef SplitInfoT<LBBox3fa> SplitInfo2; |
174 | |
175 | /*! stores all binning information */ |
176 | template<size_t BINS, typename PrimRef, typename BBox> |
177 | struct __aligned(64) BinInfoT |
178 | { |
179 | typedef BinSplit<BINS> Split; |
180 | typedef vbool4 vbool; |
181 | typedef vint4 vint; |
182 | typedef vfloat4 vfloat; |
183 | |
184 | __forceinline BinInfoT() { |
185 | } |
186 | |
187 | __forceinline BinInfoT(EmptyTy) { |
188 | clear(); |
189 | } |
190 | |
191 | /*! bin access function */ |
192 | __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; } |
193 | __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; } |
194 | |
195 | __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; } |
196 | __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; } |
197 | |
198 | __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; } |
199 | __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; } |
200 | |
201 | /*! clears the bin info */ |
202 | __forceinline void clear() |
203 | { |
204 | for (size_t i=0; i<BINS; i++) { |
205 | bounds(i,0) = bounds(i,1) = bounds(i,2) = empty; |
206 | counts(i) = vuint4(zero); |
207 | } |
208 | } |
209 | |
210 | /*! bins an array of primitives */ |
211 | __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping) |
212 | { |
213 | if (unlikely(N == 0)) return; |
214 | size_t i; |
215 | for (i=0; i<N-1; i+=2) |
216 | { |
217 | /*! map even and odd primitive to bin */ |
218 | BBox prim0; Vec3fa center0; |
219 | prims[i+0].binBoundsAndCenter(prim0,center0); |
220 | const vint4 bin0 = (vint4)mapping.bin(center0); |
221 | |
222 | BBox prim1; Vec3fa center1; |
223 | prims[i+1].binBoundsAndCenter(prim1,center1); |
224 | const vint4 bin1 = (vint4)mapping.bin(center1); |
225 | |
226 | /*! increase bounds for bins for even primitive */ |
227 | const unsigned int b00 = extract<0>(b: bin0); bounds(b00,0).extend(prim0); |
228 | const unsigned int b01 = extract<1>(b: bin0); bounds(b01,1).extend(prim0); |
229 | const unsigned int b02 = extract<2>(b: bin0); bounds(b02,2).extend(prim0); |
230 | const unsigned int s0 = (unsigned int)prims[i+0].size(); |
231 | counts(b00,0)+=s0; |
232 | counts(b01,1)+=s0; |
233 | counts(b02,2)+=s0; |
234 | |
235 | /*! increase bounds of bins for odd primitive */ |
236 | const unsigned int b10 = extract<0>(b: bin1); bounds(b10,0).extend(prim1); |
237 | const unsigned int b11 = extract<1>(b: bin1); bounds(b11,1).extend(prim1); |
238 | const unsigned int b12 = extract<2>(b: bin1); bounds(b12,2).extend(prim1); |
239 | const unsigned int s1 = (unsigned int)prims[i+1].size(); |
240 | counts(b10,0)+=s1; |
241 | counts(b11,1)+=s1; |
242 | counts(b12,2)+=s1; |
243 | } |
244 | /*! for uneven number of primitives */ |
245 | if (i < N) |
246 | { |
247 | /*! map primitive to bin */ |
248 | BBox prim0; Vec3fa center0; |
249 | prims[i].binBoundsAndCenter(prim0,center0); |
250 | const vint4 bin0 = (vint4)mapping.bin(center0); |
251 | |
252 | /*! increase bounds of bins */ |
253 | const unsigned int s0 = (unsigned int)prims[i].size(); |
254 | const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
255 | const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
256 | const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
257 | } |
258 | } |
259 | |
260 | /*! bins an array of primitives */ |
261 | template<typename BinBoundsAndCenter> |
262 | __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
263 | { |
264 | if (N == 0) return; |
265 | |
266 | size_t i; |
267 | for (i=0; i<N-1; i+=2) |
268 | { |
269 | /*! map even and odd primitive to bin */ |
270 | BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); |
271 | const vint4 bin0 = (vint4)mapping.bin(center0); |
272 | BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1); |
273 | const vint4 bin1 = (vint4)mapping.bin(center1); |
274 | |
275 | /*! increase bounds for bins for even primitive */ |
276 | const unsigned int s0 = prims[i+0].size(); |
277 | const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
278 | const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
279 | const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
280 | |
281 | /*! increase bounds of bins for odd primitive */ |
282 | const unsigned int s1 = prims[i+1].size(); |
283 | const int b10 = extract<0>(b: bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1); |
284 | const int b11 = extract<1>(b: bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1); |
285 | const int b12 = extract<2>(b: bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1); |
286 | } |
287 | |
288 | /*! for uneven number of primitives */ |
289 | if (i < N) |
290 | { |
291 | /*! map primitive to bin */ |
292 | BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); |
293 | const vint4 bin0 = (vint4)mapping.bin(center0); |
294 | |
295 | /*! increase bounds of bins */ |
296 | const unsigned int s0 = prims[i+0].size(); |
297 | const int b00 = extract<0>(b: bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
298 | const int b01 = extract<1>(b: bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
299 | const int b02 = extract<2>(b: bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
300 | } |
301 | } |
302 | |
303 | __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) { |
304 | bin(prims+begin,end-begin,mapping); |
305 | } |
306 | |
307 | template<typename BinBoundsAndCenter> |
308 | __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { |
309 | bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter); |
310 | } |
311 | |
312 | /*! merges in other binning information */ |
313 | __forceinline void merge (const BinInfoT& other, size_t numBins) |
314 | { |
315 | |
316 | for (size_t i=0; i<numBins; i++) |
317 | { |
318 | counts(i) += other.counts(i); |
319 | bounds(i,0).extend(other.bounds(i,0)); |
320 | bounds(i,1).extend(other.bounds(i,1)); |
321 | bounds(i,2).extend(other.bounds(i,2)); |
322 | } |
323 | } |
324 | |
325 | /*! reduces binning information */ |
326 | static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS) |
327 | { |
328 | BinInfoT c; |
329 | for (size_t i=0; i<numBins; i++) |
330 | { |
331 | c.counts(i) = a.counts(i)+b.counts(i); |
332 | c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0)); |
333 | c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1)); |
334 | c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2)); |
335 | } |
336 | return c; |
337 | } |
338 | |
339 | /*! finds the best split by scanning binning information */ |
340 | __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const |
341 | { |
342 | /* sweep from right to left and compute parallel prefix of merged bounds */ |
343 | vfloat4 rAreas[BINS]; |
344 | vuint4 rCounts[BINS]; |
345 | vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty; |
346 | for (size_t i=mapping.size()-1; i>0; i--) |
347 | { |
348 | count += counts(i); |
349 | rCounts[i] = count; |
350 | bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx); |
351 | by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by); |
352 | bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz); |
353 | rAreas[i][3] = 0.0f; |
354 | } |
355 | /* sweep from left to right and compute SAH */ |
356 | vuint4 blocks_add = (1 << blocks_shift)-1; |
357 | vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; |
358 | count = 0; bx = empty; by = empty; bz = empty; |
359 | for (size_t i=1; i<mapping.size(); i++, ii+=1) |
360 | { |
361 | count += counts(i-1); |
362 | bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx); |
363 | by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by); |
364 | bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz); |
365 | const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); |
366 | const vfloat4 rArea = rAreas[i]; |
367 | const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions. |
368 | const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); |
369 | const vfloat4 sah = madd(a: lArea,b: vfloat4(lCount),c: rArea*vfloat4(rCount)); |
370 | //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); |
371 | |
372 | vbestPos = select(m: sah < vbestSAH,t: ii ,f: vbestPos); |
373 | vbestSAH = select(m: sah < vbestSAH,t: sah,f: vbestSAH); |
374 | } |
375 | |
376 | /* find best dimension */ |
377 | float bestSAH = inf; |
378 | int bestDim = -1; |
379 | int bestPos = 0; |
380 | for (int dim=0; dim<3; dim++) |
381 | { |
382 | /* ignore zero sized dimensions */ |
383 | if (unlikely(mapping.invalid(dim))) |
384 | continue; |
385 | |
386 | /* test if this is a better dimension */ |
387 | if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { |
388 | bestDim = dim; |
389 | bestPos = vbestPos[dim]; |
390 | bestSAH = vbestSAH[dim]; |
391 | } |
392 | } |
393 | return Split(bestSAH,bestDim,bestPos,mapping); |
394 | } |
395 | |
396 | /*! calculates extended split information */ |
397 | __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const |
398 | { |
399 | if (split.dim == -1) { |
400 | new (&info) SplitInfoT<BBox>(0,empty,0,empty); |
401 | return; |
402 | } |
403 | |
404 | size_t leftCount = 0; |
405 | BBox leftBounds = empty; |
406 | for (size_t i=0; i<(size_t)split.pos; i++) { |
407 | leftCount += counts(i,split.dim); |
408 | leftBounds.extend(bounds(i,split.dim)); |
409 | } |
410 | size_t rightCount = 0; |
411 | BBox rightBounds = empty; |
412 | for (size_t i=split.pos; i<mapping.size(); i++) { |
413 | rightCount += counts(i,split.dim); |
414 | rightBounds.extend(bounds(i,split.dim)); |
415 | } |
416 | new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds); |
417 | } |
418 | |
419 | /*! gets the number of primitives left of the split */ |
420 | __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const |
421 | { |
422 | if (unlikely(split.dim == -1)) return -1; |
423 | |
424 | size_t leftCount = 0; |
425 | for (size_t i = 0; i < (size_t)split.pos; i++) { |
426 | leftCount += counts(i, split.dim); |
427 | } |
428 | return leftCount; |
429 | } |
430 | |
431 | /*! gets the number of primitives right of the split */ |
432 | __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const |
433 | { |
434 | if (unlikely(split.dim == -1)) return -1; |
435 | |
436 | size_t rightCount = 0; |
437 | for (size_t i = (size_t)split.pos; i<mapping.size(); i++) { |
438 | rightCount += counts(i, split.dim); |
439 | } |
440 | return rightCount; |
441 | } |
442 | |
443 | private: |
444 | BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension |
445 | vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins |
446 | }; |
447 | } |
448 | |
449 | template<typename BinInfoT, typename BinMapping, typename PrimRef> |
450 | __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping) |
451 | { |
452 | if (likely(end-begin < parallelThreshold)) { |
453 | binner.bin(prims,begin,end,mapping); |
454 | } else { |
455 | binner = parallel_reduce(begin,end,blockSize,binner, |
456 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, |
457 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
458 | } |
459 | } |
460 | |
461 | template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> |
462 | __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
463 | { |
464 | if (likely(end-begin < parallelThreshold)) { |
465 | binner.bin(prims,begin,end,mapping,binBoundsAndCenter); |
466 | } else { |
467 | binner = parallel_reduce(begin,end,blockSize,binner, |
468 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, |
469 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
470 | } |
471 | } |
472 | |
473 | template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef> |
474 | __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping) |
475 | { |
476 | if (!parallel) { |
477 | binner.bin(prims,begin,end,mapping); |
478 | } else { |
479 | binner = parallel_reduce(begin,end,blockSize,binner, |
480 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, |
481 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
482 | } |
483 | } |
484 | |
485 | template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> |
486 | __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
487 | { |
488 | if (!parallel) { |
489 | binner.bin(prims,begin,end,mapping,binBoundsAndCenter); |
490 | } else { |
491 | binner = parallel_reduce(begin,end,blockSize,binner, |
492 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, |
493 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
494 | } |
495 | } |
496 | } |
497 | |