| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "heuristic_binning.h" | 
| 7 |  | 
| 8 | namespace embree | 
| 9 | { | 
| 10 |   namespace isa | 
| 11 |   { | 
| 12 |     struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t> | 
| 13 |     { | 
| 14 |       __forceinline PrimInfoRange () { | 
| 15 |       } | 
| 16 |  | 
| 17 |       __forceinline PrimInfoRange(const PrimInfo& pinfo) | 
| 18 |         : CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {} | 
| 19 |  | 
| 20 |       __forceinline PrimInfoRange(EmptyTy) | 
| 21 |         : CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {} | 
| 22 |  | 
| 23 |       __forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds) | 
| 24 |         : CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {} | 
| 25 |        | 
| 26 |       __forceinline float leafSAH() const {  | 
| 27 | 	return expectedApproxHalfArea(box: geomBounds)*float(size());  | 
| 28 |       } | 
| 29 |        | 
| 30 |       __forceinline float leafSAH(size_t block_shift) const {  | 
| 31 | 	return expectedApproxHalfArea(box: geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift); | 
| 32 |       } | 
| 33 |     }; | 
| 34 |      | 
| 35 |     /*! Performs standard object binning */ | 
| 36 |     template<typename PrimRef, size_t BINS> | 
| 37 |       struct HeuristicArrayBinningSAH | 
| 38 |       { | 
| 39 |         typedef BinSplit<BINS> Split; | 
| 40 |         typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner; | 
| 41 |         typedef range<size_t> Set; | 
| 42 |  | 
| 43 |         static const size_t PARALLEL_THRESHOLD = 3 * 1024; | 
| 44 |         static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; | 
| 45 |         static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; | 
| 46 |  | 
| 47 |         __forceinline HeuristicArrayBinningSAH () | 
| 48 |           : prims(nullptr) {} | 
| 49 |  | 
| 50 |         /*! remember prim array */ | 
| 51 |         __forceinline HeuristicArrayBinningSAH (PrimRef* prims) | 
| 52 |           : prims(prims) {} | 
| 53 |  | 
| 54 |         /*! finds the best split */ | 
| 55 |         __noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize) | 
| 56 |         { | 
| 57 |           if (likely(pinfo.size() < PARALLEL_THRESHOLD)) | 
| 58 |             return find_template<false>(pinfo,logBlockSize); | 
| 59 |           else | 
| 60 |             return find_template<true>(pinfo,logBlockSize); | 
| 61 |         } | 
| 62 |  | 
| 63 |         template<bool parallel> | 
| 64 |         __forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize) | 
| 65 |         { | 
| 66 |           Binner binner(empty); | 
| 67 |           const BinMapping<BINS> mapping(pinfo); | 
| 68 |           bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping); | 
| 69 |           return binner.best(mapping,logBlockSize); | 
| 70 |         } | 
| 71 |  | 
| 72 |         /*! array partitioning */ | 
| 73 |         __forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) | 
| 74 |         { | 
| 75 |           if (likely(pinfo.size() < PARALLEL_THRESHOLD)) | 
| 76 |             split_template<false>(split,pinfo,linfo,rinfo); | 
| 77 |           else | 
| 78 |             split_template<true>(split,pinfo,linfo,rinfo); | 
| 79 |         } | 
| 80 |  | 
| 81 |         template<bool parallel> | 
| 82 |         __forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset) | 
| 83 |         { | 
| 84 |           if (!split.valid()) { | 
| 85 |             deterministic_order(pinfo: set); | 
| 86 |             return splitFallback(pinfo: set,linfo&: lset,rinfo&: rset); | 
| 87 |           } | 
| 88 |            | 
| 89 |           const size_t begin = set.begin(); | 
| 90 |           const size_t end   = set.end(); | 
| 91 |           CentGeomBBox3fa local_left(empty); | 
| 92 |           CentGeomBBox3fa local_right(empty); | 
| 93 |           const unsigned int splitPos = split.pos; | 
| 94 |           const unsigned int splitDim = split.dim; | 
| 95 |           const unsigned int splitDimMask = (unsigned int)1 << splitDim; | 
| 96 |  | 
| 97 |           const typename Binner::vint vSplitPos(splitPos); | 
| 98 |           const typename Binner::vbool vSplitMask(splitDimMask); | 
| 99 |           auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }; | 
| 100 |  | 
| 101 |           size_t center = 0; | 
| 102 |           if (!parallel) | 
| 103 |             center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft, | 
| 104 |                                          [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }); | 
| 105 |           else | 
| 106 |             center = parallel_partitioning( | 
| 107 |               prims,begin,end,EmptyTy(),local_left,local_right,isLeft, | 
| 108 |               [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }, | 
| 109 |               [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(other: pinfo1); }, | 
| 110 |               PARALLEL_PARTITION_BLOCK_SIZE); | 
| 111 |            | 
| 112 |           new (&lset) PrimInfoRange(begin,center,local_left); | 
| 113 |           new (&rset) PrimInfoRange(center,end,local_right); | 
| 114 |           assert(area(lset.geomBounds) >= 0.0f); | 
| 115 |           assert(area(rset.geomBounds) >= 0.0f); | 
| 116 |         } | 
| 117 |  | 
| 118 |         void deterministic_order(const PrimInfoRange& pinfo) | 
| 119 |         { | 
| 120 |           /* required as parallel partition destroys original primitive order */ | 
| 121 |           std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]); | 
| 122 |         } | 
| 123 |  | 
| 124 |         void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) | 
| 125 |         { | 
| 126 |           const size_t begin = pinfo.begin(); | 
| 127 |           const size_t end   = pinfo.end(); | 
| 128 |           const size_t center = (begin + end)/2; | 
| 129 |  | 
| 130 |           CentGeomBBox3fa left(empty); | 
| 131 |           for (size_t i=begin; i<center; i++) | 
| 132 |             left.extend_center2(prims[i]); | 
| 133 |           new (&linfo) PrimInfoRange(begin,center,left); | 
| 134 |  | 
| 135 |           CentGeomBBox3fa right(empty); | 
| 136 |           for (size_t i=center; i<end; i++) | 
| 137 |             right.extend_center2(prims[i]); | 
| 138 |           new (&rinfo) PrimInfoRange(center,end,right); | 
| 139 |         } | 
| 140 |  | 
| 141 |         void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo) | 
| 142 |         { | 
| 143 |           assert(range.size() > 1); | 
| 144 |           CentGeomBBox3fa left(empty); | 
| 145 |           CentGeomBBox3fa right(empty); | 
| 146 |           unsigned int geomID = prims[range.begin()].geomID(); | 
| 147 |           size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right, | 
| 148 |                                               [&] ( const PrimRef& prim ) { return prim.geomID() == geomID; }, | 
| 149 |                                               [ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); }); | 
| 150 |  | 
| 151 |           new (&linfo) PrimInfoRange(range.begin(),center,left); | 
| 152 |           new (&rinfo) PrimInfoRange(center,range.end(),right); | 
| 153 |         } | 
| 154 |  | 
| 155 |       private: | 
| 156 |         PrimRef* const prims; | 
| 157 |       }; | 
| 158 |  | 
| 159 |     /*! Performs standard object binning */ | 
| 160 |     template<typename PrimRefMB, size_t BINS> | 
| 161 |       struct HeuristicArrayBinningMB | 
| 162 |       { | 
| 163 |         typedef BinSplit<BINS> Split; | 
| 164 |         typedef typename PrimRefMB::BBox BBox; | 
| 165 |         typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner; | 
| 166 |         static const size_t PARALLEL_THRESHOLD = 3 * 1024; | 
| 167 |         static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; | 
| 168 |         static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; | 
| 169 |  | 
| 170 |         /*! finds the best split */ | 
| 171 |         const Split find(const SetMB& set, const size_t logBlockSize) | 
| 172 |         { | 
| 173 |           ObjectBinner binner(empty); | 
| 174 |           const BinMapping<BINS> mapping(set.size(),set.centBounds); | 
| 175 |           bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping); | 
| 176 |           Split osplit = binner.best(mapping,logBlockSize); | 
| 177 |           osplit.sah *= set.time_range.size(); | 
| 178 |           if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split | 
| 179 |           return osplit; | 
| 180 |         } | 
| 181 |          | 
| 182 |         /*! array partitioning */ | 
| 183 |         __forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset) | 
| 184 |         { | 
| 185 |           const size_t begin = set.begin(); | 
| 186 |           const size_t end   = set.end(); | 
| 187 |           PrimInfoMB left = empty; | 
| 188 |           PrimInfoMB right = empty; | 
| 189 |           const vint4 vSplitPos(split.pos); | 
| 190 |           const vbool4 vSplitMask(1 << split.dim); | 
| 191 |           auto isLeft = [&] (const PrimRefMB &ref) { return any(b: ((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); }; | 
| 192 |           auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); }; | 
| 193 |           auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(other: pinfo1); }; | 
| 194 |           size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD); | 
| 195 |           new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range); | 
| 196 |           new (&rset) SetMB(right,set.prims,range<size_t>(center,end  ),set.time_range); | 
| 197 |         } | 
| 198 |       }; | 
| 199 |   } | 
| 200 | } | 
| 201 |  |