| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "../common/primref_mb.h" | 
| 7 | #include "../../common/algorithms/parallel_filter.h" | 
| 8 |  | 
| 9 | #define MBLUR_TIME_SPLIT_THRESHOLD 1.25f | 
| 10 |  | 
| 11 | namespace embree | 
| 12 | { | 
| 13 |   namespace isa | 
| 14 |   {  | 
| 15 |     /*! Performs standard object binning */ | 
| 16 |     template<typename PrimRefMB, typename RecalculatePrimRef, size_t BINS> | 
| 17 |       struct HeuristicMBlurTemporalSplit | 
| 18 |       { | 
| 19 |         typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split; | 
| 20 |         typedef mvector<PrimRefMB>* PrimRefVector; | 
| 21 |         typedef typename PrimRefMB::BBox BBox;  | 
| 22 |  | 
| 23 |         static const size_t PARALLEL_THRESHOLD = 3 * 1024; | 
| 24 |         static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; | 
| 25 |         static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; | 
| 26 |  | 
| 27 |         HeuristicMBlurTemporalSplit (MemoryMonitorInterface* device, const RecalculatePrimRef& recalculatePrimRef) | 
| 28 |           : device(device), recalculatePrimRef(recalculatePrimRef) {} | 
| 29 |  | 
| 30 |         struct TemporalBinInfo | 
| 31 |         { | 
| 32 |           __forceinline TemporalBinInfo () { | 
| 33 |           } | 
| 34 |  | 
| 35 |           __forceinline TemporalBinInfo (EmptyTy) | 
| 36 |           { | 
| 37 |             for (size_t i=0; i<BINS-1; i++) | 
| 38 |             { | 
| 39 |               count0[i] = count1[i] = 0; | 
| 40 |               bounds0[i] = bounds1[i] = empty; | 
| 41 |             } | 
| 42 |           } | 
| 43 |            | 
| 44 |           void bin(const PrimRefMB* prims, size_t begin, size_t end, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef) | 
| 45 |           { | 
| 46 |             for (int b=0; b<BINS-1; b++) | 
| 47 |             { | 
| 48 |               const float t = float(b+1)/float(BINS); | 
| 49 |               const float ct = lerp(v0: time_range.lower,v1: time_range.upper,t); | 
| 50 |               const float center_time = set.align_time(ct); | 
| 51 |               if (center_time <= time_range.lower) continue; | 
| 52 |               if (center_time >= time_range.upper) continue; | 
| 53 |               const BBox1f dt0(time_range.lower,center_time); | 
| 54 |               const BBox1f dt1(center_time,time_range.upper); | 
| 55 |                | 
| 56 |               /* find linear bounds for both time segments */ | 
| 57 |               for (size_t i=begin; i<end; i++)  | 
| 58 |               { | 
| 59 |                 if (prims[i].time_range_overlap(dt0)) | 
| 60 |                 { | 
| 61 |                   const LBBox3fa bn0 = recalculatePrimRef.linearBounds(prims[i],dt0); | 
| 62 | #if MBLUR_BIN_LBBOX | 
| 63 |                   bounds0[b].extend(bn0); | 
| 64 | #else | 
| 65 |                   bounds0[b].extend(bn0.interpolate(0.5f)); | 
| 66 | #endif | 
| 67 |                   count0[b] += prims[i].timeSegmentRange(dt0).size(); | 
| 68 |                 } | 
| 69 |  | 
| 70 |                 if (prims[i].time_range_overlap(dt1)) | 
| 71 |                 { | 
| 72 |                   const LBBox3fa bn1 = recalculatePrimRef.linearBounds(prims[i],dt1); | 
| 73 | #if MBLUR_BIN_LBBOX | 
| 74 |                   bounds1[b].extend(bn1); | 
| 75 | #else | 
| 76 |                   bounds1[b].extend(bn1.interpolate(0.5f)); | 
| 77 | #endif | 
| 78 |                   count1[b] += prims[i].timeSegmentRange(dt1).size(); | 
| 79 |                 } | 
| 80 |               } | 
| 81 |             } | 
| 82 |           } | 
| 83 |  | 
| 84 |           __forceinline void bin_parallel(const PrimRefMB* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)  | 
| 85 |           { | 
| 86 |             if (likely(end-begin < parallelThreshold)) { | 
| 87 |               bin(prims,begin,end,time_range,set,recalculatePrimRef); | 
| 88 |             }  | 
| 89 |             else  | 
| 90 |             { | 
| 91 |               auto bin = [&](const range<size_t>& r) -> TemporalBinInfo {  | 
| 92 |                 TemporalBinInfo binner(empty); binner.bin(prims, r.begin(), r.end(), time_range, set, recalculatePrimRef); return binner;  | 
| 93 |               }; | 
| 94 |               *this = parallel_reduce(begin,end,blockSize,TemporalBinInfo(empty),bin,merge2); | 
| 95 |             } | 
| 96 |           } | 
| 97 |            | 
| 98 |           /*! merges in other binning information */ | 
| 99 |           __forceinline void merge (const TemporalBinInfo& other) | 
| 100 |           { | 
| 101 |             for (size_t i=0; i<BINS-1; i++)  | 
| 102 |             { | 
| 103 |               count0[i] += other.count0[i]; | 
| 104 |               count1[i] += other.count1[i]; | 
| 105 |               bounds0[i].extend(other.bounds0[i]); | 
| 106 |               bounds1[i].extend(other.bounds1[i]); | 
| 107 |             } | 
| 108 |           } | 
| 109 |  | 
| 110 |           static __forceinline const TemporalBinInfo merge2(const TemporalBinInfo& a, const TemporalBinInfo& b) { | 
| 111 |             TemporalBinInfo r = a; r.merge(b); return r; | 
| 112 |           } | 
| 113 |                      | 
| 114 |           Split best(int logBlockSize, BBox1f time_range, const SetMB& set) | 
| 115 |           { | 
| 116 |             float bestSAH = inf; | 
| 117 |             float bestPos = 0.0f; | 
| 118 |             for (int b=0; b<BINS-1; b++) | 
| 119 |             { | 
| 120 |               float t = float(b+1)/float(BINS); | 
| 121 |               float ct = lerp(v0: time_range.lower,v1: time_range.upper,t); | 
| 122 |               const float center_time = set.align_time(ct); | 
| 123 |               if (center_time <= time_range.lower) continue; | 
| 124 |               if (center_time >= time_range.upper) continue; | 
| 125 |               const BBox1f dt0(time_range.lower,center_time); | 
| 126 |               const BBox1f dt1(center_time,time_range.upper); | 
| 127 |                | 
| 128 |               /* calculate sah */ | 
| 129 |               const size_t lCount = (count0[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize); | 
| 130 |               const size_t rCount = (count1[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize); | 
| 131 |               float sah0 = expectedApproxHalfArea(bounds0[b])*float(lCount)*dt0.size(); | 
| 132 |               float sah1 = expectedApproxHalfArea(bounds1[b])*float(rCount)*dt1.size(); | 
| 133 |               if (unlikely(lCount == 0)) sah0 = 0.0f; // happens for initial splits when objects not alive over entire shutter time | 
| 134 |               if (unlikely(rCount == 0)) sah1 = 0.0f; | 
| 135 |               const float sah = sah0+sah1; | 
| 136 |               if (sah < bestSAH) { | 
| 137 |                 bestSAH = sah; | 
| 138 |                 bestPos = center_time; | 
| 139 |               } | 
| 140 |             } | 
| 141 |             return Split(bestSAH*MBLUR_TIME_SPLIT_THRESHOLD,(unsigned)Split::SPLIT_TEMPORAL,0,bestPos); | 
| 142 |           } | 
| 143 |            | 
| 144 |         public: | 
| 145 |           size_t count0[BINS-1]; | 
| 146 |           size_t count1[BINS-1]; | 
| 147 |           BBox bounds0[BINS-1]; | 
| 148 |           BBox bounds1[BINS-1]; | 
| 149 |         }; | 
| 150 |          | 
| 151 |         /*! finds the best split */ | 
| 152 |         const Split find(const SetMB& set, const size_t logBlockSize) | 
| 153 |         { | 
| 154 |           assert(set.size() > 0); | 
| 155 |           TemporalBinInfo binner(empty); | 
| 156 |           binner.bin_parallel(set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,set.time_range,set,recalculatePrimRef); | 
| 157 |           Split tsplit = binner.best((int)logBlockSize,set.time_range,set); | 
| 158 |           if (!tsplit.valid()) tsplit.data = Split::SPLIT_FALLBACK; // use fallback split | 
| 159 |           return tsplit; | 
| 160 |         } | 
| 161 |  | 
| 162 |         __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& tsplit, const SetMB& set, SetMB& lset, SetMB& rset) | 
| 163 |         { | 
| 164 |           assert(tsplit.sah != float(inf)); | 
| 165 |           assert(tsplit.fpos > set.time_range.lower); | 
| 166 |           assert(tsplit.fpos < set.time_range.upper); | 
| 167 |  | 
| 168 |           float center_time = tsplit.fpos; | 
| 169 |           const BBox1f time_range0(set.time_range.lower,center_time); | 
| 170 |           const BBox1f time_range1(center_time,set.time_range.upper); | 
| 171 |           mvector<PrimRefMB>& prims = *set.prims; | 
| 172 |            | 
| 173 |           /* calculate primrefs for first time range */ | 
| 174 |           std::unique_ptr<mvector<PrimRefMB>> new_vector(new mvector<PrimRefMB>(device, set.size())); | 
| 175 |           PrimRefVector lprims = new_vector.get(); | 
| 176 |            | 
| 177 |           auto reduction_func0 = [&] (const range<size_t>& r) { | 
| 178 |             PrimInfoMB pinfo = empty; | 
| 179 |             for (size_t i=r.begin(); i<r.end(); i++)  | 
| 180 |             { | 
| 181 |               if (likely(prims[i].time_range_overlap(time_range0))) | 
| 182 |               { | 
| 183 |                 const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range0); | 
| 184 |                 (*lprims)[i-set.begin()] = prim; | 
| 185 |                 pinfo.add_primref(prim); | 
| 186 |               } | 
| 187 |               else | 
| 188 |               { | 
| 189 |                 (*lprims)[i-set.begin()] = prims[i]; | 
| 190 |               } | 
| 191 |             } | 
| 192 |             return pinfo; | 
| 193 |           };         | 
| 194 |           PrimInfoMB linfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func0,PrimInfoMB::merge2); | 
| 195 |  | 
| 196 |           /* primrefs for first time range are in lprims[0 .. set.size()) */ | 
| 197 |           /* some primitives may need to be filtered out */ | 
| 198 |           if (linfo.size() != set.size()) | 
| 199 |             linfo.object_range._end = parallel_filter(lprims->data(), size_t(0), set.size(), size_t(1024), | 
| 200 |                                                       [&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range0); }); | 
| 201 |                        | 
| 202 |           lset = SetMB(linfo,lprims,time_range0); | 
| 203 |  | 
| 204 |           /* calculate primrefs for second time range */ | 
| 205 |           auto reduction_func1 = [&] (const range<size_t>& r) { | 
| 206 |             PrimInfoMB pinfo = empty; | 
| 207 |             for (size_t i=r.begin(); i<r.end(); i++)  | 
| 208 |             { | 
| 209 |               if (likely(prims[i].time_range_overlap(time_range1))) | 
| 210 |               { | 
| 211 |                 const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range1); | 
| 212 |                 prims[i] = prim; | 
| 213 |                 pinfo.add_primref(prim); | 
| 214 |               } | 
| 215 |             } | 
| 216 |             return pinfo; | 
| 217 |           };         | 
| 218 |           PrimInfoMB rinfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func1,PrimInfoMB::merge2); | 
| 219 |           rinfo.object_range = range<size_t>(set.begin(), set.begin() + rinfo.size()); | 
| 220 |  | 
| 221 |           /* primrefs for second time range are in prims[set.begin() .. set.end()) */ | 
| 222 |           /* some primitives may need to be filtered out */ | 
| 223 |           if (rinfo.size() != set.size()) | 
| 224 |             rinfo.object_range._end = parallel_filter(prims.data(), set.begin(), set.end(), size_t(1024), | 
| 225 |                                                       [&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range1); }); | 
| 226 |          | 
| 227 |           rset = SetMB(rinfo,&prims,time_range1); | 
| 228 |  | 
| 229 |           return new_vector; | 
| 230 |         } | 
| 231 |  | 
| 232 |       private: | 
| 233 |         MemoryMonitorInterface* device;              // device to report memory usage to | 
| 234 |         const RecalculatePrimRef recalculatePrimRef; | 
| 235 |       }; | 
| 236 |   } | 
| 237 | } | 
| 238 |  |