1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "primitive.h" |
7 | |
8 | namespace embree |
9 | { |
10 | template<int M> |
11 | struct LineMi |
12 | { |
13 | /* Virtual interface to query information about the line segment type */ |
14 | struct Type : public PrimitiveType |
15 | { |
16 | const char* name() const; |
17 | size_t sizeActive(const char* This) const; |
18 | size_t sizeTotal(const char* This) const; |
19 | size_t getBytes(const char* This) const; |
20 | }; |
21 | static Type type; |
22 | |
23 | public: |
24 | |
25 | /* primitive supports multiple time segments */ |
26 | static const bool singleTimeSegment = false; |
27 | |
28 | /* Returns maximum number of stored line segments */ |
29 | static __forceinline size_t max_size() { return M; } |
30 | |
31 | /* Returns required number of primitive blocks for N line segments */ |
32 | static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } |
33 | |
34 | /* Returns required number of bytes for N line segments */ |
35 | static __forceinline size_t bytes(size_t N) { return blocks(N)*sizeof(LineMi); } |
36 | |
37 | public: |
38 | |
39 | /* Default constructor */ |
40 | __forceinline LineMi() { } |
41 | |
42 | /* Construction from vertices and IDs */ |
43 | __forceinline LineMi(const vuint<M>& v0, unsigned short leftExists, unsigned short rightExists, const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype) |
44 | : gtype((unsigned char)gtype), m((unsigned char)popcnt(vuint<M>(primIDs) != vuint<M>(-1))), sharedGeomID(geomIDs[0]), leftExists (leftExists), rightExists(rightExists), v0(v0), primIDs(primIDs) |
45 | { |
46 | assert(all(vuint<M>(geomID()) == geomIDs)); |
47 | } |
48 | |
49 | /* Returns a mask that tells which line segments are valid */ |
50 | __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } |
51 | |
52 | /* Returns if the specified line segment is valid */ |
53 | __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } |
54 | |
55 | /* Returns the number of stored line segments */ |
56 | __forceinline size_t size() const { return bsf(~movemask(valid())); } |
57 | |
58 | /* Returns the geometry IDs */ |
59 | //template<class T> |
60 | //static __forceinline T unmask(T &index) { return index & 0x3fffffff; } |
61 | |
62 | __forceinline unsigned int geomID(unsigned int i = 0) const { return sharedGeomID; } |
63 | //__forceinline vuint<M> geomID() { return unmask(geomIDs); } |
64 | //__forceinline const vuint<M> geomID() const { return unmask(geomIDs); } |
65 | //__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return unmask(geomIDs[i]); } |
66 | |
67 | /* Returns the primitive IDs */ |
68 | __forceinline vuint<M>& primID() { return primIDs; } |
69 | __forceinline const vuint<M>& primID() const { return primIDs; } |
70 | __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } |
71 | |
72 | /* gather the line segments */ |
73 | __forceinline void gather(Vec4vf<M>& p0, |
74 | Vec4vf<M>& p1, |
75 | const LineSegments* geom) const; |
76 | |
77 | __forceinline void gatheri(Vec4vf<M>& p0, |
78 | Vec4vf<M>& p1, |
79 | const LineSegments* geom, |
80 | const int itime) const; |
81 | |
82 | __forceinline void gather(Vec4vf<M>& p0, |
83 | Vec4vf<M>& p1, |
84 | const LineSegments* geom, |
85 | float time) const; |
86 | |
87 | /* gather the line segments with lateral info */ |
88 | __forceinline void gather(Vec4vf<M>& p0, |
89 | Vec4vf<M>& p1, |
90 | Vec4vf<M>& pL, |
91 | Vec4vf<M>& pR, |
92 | const LineSegments* geom) const; |
93 | |
94 | __forceinline void gatheri(Vec4vf<M>& p0, |
95 | Vec4vf<M>& p1, |
96 | Vec4vf<M>& pL, |
97 | Vec4vf<M>& pR, |
98 | const LineSegments* geom, |
99 | const int itime) const; |
100 | |
101 | __forceinline void gather(Vec4vf<M>& p0, |
102 | Vec4vf<M>& p1, |
103 | Vec4vf<M>& pL, |
104 | Vec4vf<M>& pR, |
105 | const LineSegments* geom, |
106 | float time) const; |
107 | |
108 | __forceinline void gather(Vec4vf<M>& p0, |
109 | Vec4vf<M>& p1, |
110 | vbool<M>& cL, |
111 | vbool<M>& cR, |
112 | const LineSegments* geom) const; |
113 | |
114 | __forceinline void gatheri(Vec4vf<M>& p0, |
115 | Vec4vf<M>& p1, |
116 | vbool<M>& cL, |
117 | vbool<M>& cR, |
118 | const LineSegments* geom, |
119 | const int itime) const; |
120 | |
121 | __forceinline void gather(Vec4vf<M>& p0, |
122 | Vec4vf<M>& p1, |
123 | vbool<M>& cL, |
124 | vbool<M>& cR, |
125 | const LineSegments* geom, |
126 | float time) const; |
127 | |
128 | /* Calculate the bounds of the line segments */ |
129 | __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const |
130 | { |
131 | BBox3fa bounds = empty; |
132 | for (size_t i=0; i<M && valid(i); i++) |
133 | { |
134 | const LineSegments* geom = scene->get<LineSegments>(geomID(i)); |
135 | const Vec3ff& p0 = geom->vertex(v0[i]+0,itime); |
136 | const Vec3ff& p1 = geom->vertex(v0[i]+1,itime); |
137 | BBox3fa b = merge(a: BBox3fa(p0),b: BBox3fa(p1)); |
138 | b = enlarge(a: b,b: Vec3fa(max(a: p0.w,b: p1.w))); |
139 | bounds.extend(other: b); |
140 | } |
141 | return bounds; |
142 | } |
143 | |
144 | /* Calculate the linear bounds of the primitive */ |
145 | __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) { |
146 | return LBBox3fa(bounds(scene,itime: itime+0), bounds(scene,itime: itime+1)); |
147 | } |
148 | |
149 | __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) { |
150 | LBBox3fa allBounds = empty; |
151 | for (size_t i=0; i<M && valid(i); i++) |
152 | { |
153 | const LineSegments* geom = scene->get<LineSegments>(geomID(i)); |
154 | allBounds.extend(other: geom->linearBounds(primID(i), itime, numTimeSteps)); |
155 | } |
156 | return allBounds; |
157 | } |
158 | |
159 | __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) |
160 | { |
161 | LBBox3fa allBounds = empty; |
162 | for (size_t i=0; i<M && valid(i); i++) |
163 | { |
164 | const LineSegments* geom = scene->get<LineSegments>(geomID(i: (unsigned int)i)); |
165 | allBounds.extend(other: geom->linearBounds(primID(i), time_range)); |
166 | } |
167 | return allBounds; |
168 | } |
169 | |
170 | /* Fill line segment from line segment list */ |
171 | template<typename PrimRefT> |
172 | __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) |
173 | { |
174 | Geometry::GType gty = scene->get(prims[begin].geomID())->getType(); |
175 | vuint<M> geomID, primID; |
176 | vuint<M> v0; |
177 | unsigned short leftExists = 0; |
178 | unsigned short rightExists = 0; |
179 | const PrimRefT* prim = &prims[begin]; |
180 | |
181 | for (size_t i=0; i<M; i++) |
182 | { |
183 | const LineSegments* geom = scene->get<LineSegments>(prim->geomID()); |
184 | if (begin<end) { |
185 | geomID[i] = prim->geomID(); |
186 | primID[i] = prim->primID(); |
187 | v0[i] = geom->segment(i: prim->primID()); |
188 | leftExists |= geom->segmentLeftExists(i: primID[i]) << i; |
189 | rightExists |= geom->segmentRightExists(i: primID[i]) << i; |
190 | begin++; |
191 | } else { |
192 | assert(i); |
193 | if (i>0) { |
194 | geomID[i] = geomID[i-1]; |
195 | primID[i] = -1; |
196 | v0[i] = v0[i-1]; |
197 | } |
198 | } |
199 | if (begin<end) prim = &prims[begin]; // FIXME: remove this line |
200 | } |
201 | new (this) LineMi(v0,leftExists,rightExists,geomID,primID,gty); // FIXME: use non temporal store |
202 | } |
203 | |
204 | template<typename BVH, typename Allocator> |
205 | __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) |
206 | { |
207 | size_t start = set.begin(); |
208 | size_t items = LineMi::blocks(N: set.size()); |
209 | size_t numbytes = LineMi::bytes(N: set.size()); |
210 | LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float)); |
211 | for (size_t i=0; i<items; i++) { |
212 | accel[i].fill(prims,start,set.end(),bvh->scene); |
213 | } |
214 | return bvh->encodeLeaf((char*)accel,items); |
215 | }; |
216 | |
217 | __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) |
218 | { |
219 | fill(prims,begin,end,scene); |
220 | return linearBounds(scene,itime); |
221 | } |
222 | |
223 | __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) |
224 | { |
225 | fill(prims,begin,end,scene); |
226 | return linearBounds(scene,time_range); |
227 | } |
228 | |
229 | template<typename BVH, typename SetMB, typename Allocator> |
230 | __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc) |
231 | { |
232 | size_t start = prims.begin(); |
233 | size_t end = prims.end(); |
234 | size_t items = LineMi::blocks(N: prims.size()); |
235 | size_t numbytes = LineMi::bytes(N: prims.size()); |
236 | LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float)); |
237 | const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items); |
238 | |
239 | LBBox3fa bounds = empty; |
240 | for (size_t i=0; i<items; i++) |
241 | bounds.extend(other: accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range)); |
242 | |
243 | return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range); |
244 | }; |
245 | |
246 | /* Updates the primitive */ |
247 | __forceinline BBox3fa update(LineSegments* geom) |
248 | { |
249 | BBox3fa bounds = empty; |
250 | for (size_t i=0; i<M && valid(i); i++) |
251 | { |
252 | const Vec3ff& p0 = geom->vertex(v0[i]+0); |
253 | const Vec3ff& p1 = geom->vertex(v0[i]+1); |
254 | BBox3fa b = merge(a: BBox3fa(p0),b: BBox3fa(p1)); |
255 | b = enlarge(a: b,b: Vec3fa(max(a: p0.w,b: p1.w))); |
256 | bounds.extend(other: b); |
257 | } |
258 | return bounds; |
259 | } |
260 | |
261 | /*! output operator */ |
262 | friend __forceinline embree_ostream operator<<(embree_ostream cout, const LineMi& line) { |
263 | return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}" ; |
264 | } |
265 | |
266 | public: |
267 | unsigned char gtype; |
268 | unsigned char m; |
269 | unsigned int sharedGeomID; |
270 | unsigned short leftExists, rightExists; |
271 | vuint<M> v0; // index of start vertex |
272 | private: |
273 | vuint<M> primIDs; // primitive ID |
274 | }; |
275 | |
276 | template<> |
277 | __forceinline void LineMi<4>::gather(Vec4vf4& p0, |
278 | Vec4vf4& p1, |
279 | const LineSegments* geom) const |
280 | { |
281 | const vfloat4 a0 = vfloat4::loadu(a: geom->vertexPtr(i: v0[0])); |
282 | const vfloat4 a1 = vfloat4::loadu(a: geom->vertexPtr(i: v0[1])); |
283 | const vfloat4 a2 = vfloat4::loadu(a: geom->vertexPtr(i: v0[2])); |
284 | const vfloat4 a3 = vfloat4::loadu(a: geom->vertexPtr(i: v0[3])); |
285 | transpose(r0: a0,r1: a1,r2: a2,r3: a3,c0&: p0.x,c1&: p0.y,c2&: p0.z,c3&: p0.w); |
286 | |
287 | const vfloat4 b0 = vfloat4::loadu(a: geom->vertexPtr(i: v0[0]+1)); |
288 | const vfloat4 b1 = vfloat4::loadu(a: geom->vertexPtr(i: v0[1]+1)); |
289 | const vfloat4 b2 = vfloat4::loadu(a: geom->vertexPtr(i: v0[2]+1)); |
290 | const vfloat4 b3 = vfloat4::loadu(a: geom->vertexPtr(i: v0[3]+1)); |
291 | transpose(r0: b0,r1: b1,r2: b2,r3: b3,c0&: p1.x,c1&: p1.y,c2&: p1.z,c3&: p1.w); |
292 | } |
293 | |
294 | template<> |
295 | __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, |
296 | Vec4vf4& p1, |
297 | const LineSegments* geom, |
298 | const int itime) const |
299 | { |
300 | const vfloat4 a0 = vfloat4::loadu(a: geom->vertexPtr(i: v0[0],itime)); |
301 | const vfloat4 a1 = vfloat4::loadu(a: geom->vertexPtr(i: v0[1],itime)); |
302 | const vfloat4 a2 = vfloat4::loadu(a: geom->vertexPtr(i: v0[2],itime)); |
303 | const vfloat4 a3 = vfloat4::loadu(a: geom->vertexPtr(i: v0[3],itime)); |
304 | transpose(r0: a0,r1: a1,r2: a2,r3: a3,c0&: p0.x,c1&: p0.y,c2&: p0.z,c3&: p0.w); |
305 | |
306 | const vfloat4 b0 = vfloat4::loadu(a: geom->vertexPtr(i: v0[0]+1,itime)); |
307 | const vfloat4 b1 = vfloat4::loadu(a: geom->vertexPtr(i: v0[1]+1,itime)); |
308 | const vfloat4 b2 = vfloat4::loadu(a: geom->vertexPtr(i: v0[2]+1,itime)); |
309 | const vfloat4 b3 = vfloat4::loadu(a: geom->vertexPtr(i: v0[3]+1,itime)); |
310 | transpose(r0: b0,r1: b1,r2: b2,r3: b3,c0&: p1.x,c1&: p1.y,c2&: p1.z,c3&: p1.w); |
311 | } |
312 | |
313 | template<> |
314 | __forceinline void LineMi<4>::gather(Vec4vf4& p0, |
315 | Vec4vf4& p1, |
316 | const LineSegments* geom, |
317 | float time) const |
318 | { |
319 | float ftime; |
320 | const int itime = geom->timeSegment(time, ftime); |
321 | |
322 | Vec4vf4 a0,a1; |
323 | gatheri(p0&: a0,p1&: a1,geom,itime); |
324 | Vec4vf4 b0,b1; |
325 | gatheri(p0&: b0,p1&: b1,geom,itime: itime+1); |
326 | p0 = lerp(v0: a0,v1: b0,t: vfloat4(ftime)); |
327 | p1 = lerp(v0: a1,v1: b1,t: vfloat4(ftime)); |
328 | } |
329 | |
330 | template<> |
331 | __forceinline void LineMi<4>::gather(Vec4vf4& p0, |
332 | Vec4vf4& p1, |
333 | vbool4& cL, |
334 | vbool4& cR, |
335 | const LineSegments* geom) const |
336 | { |
337 | gather(p0,p1,geom); |
338 | cL = !vbool4(leftExists); |
339 | cR = !vbool4(rightExists); |
340 | } |
341 | |
342 | template<> |
343 | __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, |
344 | Vec4vf4& p1, |
345 | vbool4& cL, |
346 | vbool4& cR, |
347 | const LineSegments* geom, |
348 | const int itime) const |
349 | { |
350 | gatheri(p0,p1,geom,itime); |
351 | cL = !vbool4(leftExists); |
352 | cR = !vbool4(rightExists); |
353 | } |
354 | |
355 | template<> |
356 | __forceinline void LineMi<4>::gather(Vec4vf4& p0, |
357 | Vec4vf4& p1, |
358 | vbool4& cL, |
359 | vbool4& cR, |
360 | const LineSegments* geom, |
361 | float time) const |
362 | { |
363 | float ftime; |
364 | const int itime = geom->timeSegment(time, ftime); |
365 | |
366 | Vec4vf4 a0,a1; |
367 | gatheri(p0&: a0,p1&: a1,geom,itime); |
368 | Vec4vf4 b0,b1; |
369 | gatheri(p0&: b0,p1&: b1,geom,itime: itime+1); |
370 | p0 = lerp(v0: a0,v1: b0,t: vfloat4(ftime)); |
371 | p1 = lerp(v0: a1,v1: b1,t: vfloat4(ftime)); |
372 | cL = !vbool4(leftExists); |
373 | cR = !vbool4(rightExists); |
374 | } |
375 | |
376 | template<> |
377 | __forceinline void LineMi<4>::gather(Vec4vf4& p0, |
378 | Vec4vf4& p1, |
379 | Vec4vf4& pL, |
380 | Vec4vf4& pR, |
381 | const LineSegments* geom) const |
382 | { |
383 | const vfloat4 a0 = vfloat4::loadu(a: geom->vertexPtr(i: v0[0])); |
384 | const vfloat4 a1 = vfloat4::loadu(a: geom->vertexPtr(i: v0[1])); |
385 | const vfloat4 a2 = vfloat4::loadu(a: geom->vertexPtr(i: v0[2])); |
386 | const vfloat4 a3 = vfloat4::loadu(a: geom->vertexPtr(i: v0[3])); |
387 | transpose(r0: a0,r1: a1,r2: a2,r3: a3,c0&: p0.x,c1&: p0.y,c2&: p0.z,c3&: p0.w); |
388 | |
389 | const vfloat4 b0 = vfloat4::loadu(a: geom->vertexPtr(i: v0[0]+1)); |
390 | const vfloat4 b1 = vfloat4::loadu(a: geom->vertexPtr(i: v0[1]+1)); |
391 | const vfloat4 b2 = vfloat4::loadu(a: geom->vertexPtr(i: v0[2]+1)); |
392 | const vfloat4 b3 = vfloat4::loadu(a: geom->vertexPtr(i: v0[3]+1)); |
393 | transpose(r0: b0,r1: b1,r2: b2,r3: b3,c0&: p1.x,c1&: p1.y,c2&: p1.z,c3&: p1.w); |
394 | |
395 | const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[0]-1)) : vfloat4(inf); |
396 | const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[1]-1)) : vfloat4(inf); |
397 | const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[2]-1)) : vfloat4(inf); |
398 | const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[3]-1)) : vfloat4(inf); |
399 | transpose(r0: l0,r1: l1,r2: l2,r3: l3,c0&: pL.x,c1&: pL.y,c2&: pL.z,c3&: pL.w); |
400 | |
401 | const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[0]+2)) : vfloat4(inf); |
402 | const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[1]+2)) : vfloat4(inf); |
403 | const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[2]+2)) : vfloat4(inf); |
404 | const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[3]+2)) : vfloat4(inf); |
405 | transpose(r0,r1,r2,r3,c0&: pR.x,c1&: pR.y,c2&: pR.z,c3&: pR.w); |
406 | } |
407 | |
408 | template<> |
409 | __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, |
410 | Vec4vf4& p1, |
411 | Vec4vf4& pL, |
412 | Vec4vf4& pR, |
413 | const LineSegments* geom, |
414 | const int itime) const |
415 | { |
416 | const vfloat4 a0 = vfloat4::loadu(a: geom->vertexPtr(i: v0[0],itime)); |
417 | const vfloat4 a1 = vfloat4::loadu(a: geom->vertexPtr(i: v0[1],itime)); |
418 | const vfloat4 a2 = vfloat4::loadu(a: geom->vertexPtr(i: v0[2],itime)); |
419 | const vfloat4 a3 = vfloat4::loadu(a: geom->vertexPtr(i: v0[3],itime)); |
420 | transpose(r0: a0,r1: a1,r2: a2,r3: a3,c0&: p0.x,c1&: p0.y,c2&: p0.z,c3&: p0.w); |
421 | |
422 | const vfloat4 b0 = vfloat4::loadu(a: geom->vertexPtr(i: v0[0]+1,itime)); |
423 | const vfloat4 b1 = vfloat4::loadu(a: geom->vertexPtr(i: v0[1]+1,itime)); |
424 | const vfloat4 b2 = vfloat4::loadu(a: geom->vertexPtr(i: v0[2]+1,itime)); |
425 | const vfloat4 b3 = vfloat4::loadu(a: geom->vertexPtr(i: v0[3]+1,itime)); |
426 | transpose(r0: b0,r1: b1,r2: b2,r3: b3,c0&: p1.x,c1&: p1.y,c2&: p1.z,c3&: p1.w); |
427 | |
428 | const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[0]-1,itime)) : vfloat4(inf); |
429 | const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[1]-1,itime)) : vfloat4(inf); |
430 | const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[2]-1,itime)) : vfloat4(inf); |
431 | const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[3]-1,itime)) : vfloat4(inf); |
432 | transpose(r0: l0,r1: l1,r2: l2,r3: l3,c0&: pL.x,c1&: pL.y,c2&: pL.z,c3&: pL.w); |
433 | |
434 | const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[0]+2,itime)) : vfloat4(inf); |
435 | const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[1]+2,itime)) : vfloat4(inf); |
436 | const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[2]+2,itime)) : vfloat4(inf); |
437 | const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(a: geom->vertexPtr(i: v0[3]+2,itime)) : vfloat4(inf); |
438 | transpose(r0,r1,r2,r3,c0&: pR.x,c1&: pR.y,c2&: pR.z,c3&: pR.w); |
439 | } |
440 | |
441 | template<> |
442 | __forceinline void LineMi<4>::gather(Vec4vf4& p0, |
443 | Vec4vf4& p1, |
444 | Vec4vf4& pL, |
445 | Vec4vf4& pR, |
446 | const LineSegments* geom, |
447 | float time) const |
448 | { |
449 | float ftime; |
450 | const int itime = geom->timeSegment(time, ftime); |
451 | |
452 | Vec4vf4 a0,a1,aL,aR; |
453 | gatheri(p0&: a0,p1&: a1,pL&: aL,pR&: aR,geom,itime); |
454 | Vec4vf4 b0,b1,bL,bR; |
455 | gatheri(p0&: b0,p1&: b1,pL&: bL,pR&: bR,geom,itime: itime+1); |
456 | p0 = lerp(v0: a0,v1: b0,t: vfloat4(ftime)); |
457 | p1 = lerp(v0: a1,v1: b1,t: vfloat4(ftime)); |
458 | pL = lerp(v0: aL,v1: bL,t: vfloat4(ftime)); |
459 | pR = lerp(v0: aR,v1: bR,t: vfloat4(ftime)); |
460 | } |
461 | |
462 | #if defined(__AVX__) |
463 | |
464 | template<> |
465 | __forceinline void LineMi<8>::gather(Vec4vf8& p0, |
466 | Vec4vf8& p1, |
467 | const LineSegments* geom) const |
468 | { |
469 | const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); |
470 | const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); |
471 | const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); |
472 | const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); |
473 | const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4])); |
474 | const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5])); |
475 | const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6])); |
476 | const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7])); |
477 | transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); |
478 | |
479 | const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); |
480 | const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); |
481 | const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); |
482 | const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); |
483 | const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1)); |
484 | const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1)); |
485 | const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1)); |
486 | const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1)); |
487 | transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); |
488 | } |
489 | |
490 | template<> |
491 | __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, |
492 | Vec4vf8& p1, |
493 | const LineSegments* geom, |
494 | const int itime) const |
495 | { |
496 | const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); |
497 | const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); |
498 | const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); |
499 | const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); |
500 | const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime)); |
501 | const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime)); |
502 | const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime)); |
503 | const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime)); |
504 | transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); |
505 | |
506 | const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); |
507 | const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); |
508 | const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); |
509 | const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); |
510 | const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime)); |
511 | const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime)); |
512 | const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime)); |
513 | const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime)); |
514 | transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); |
515 | } |
516 | |
517 | template<> |
518 | __forceinline void LineMi<8>::gather(Vec4vf8& p0, |
519 | Vec4vf8& p1, |
520 | const LineSegments* geom, |
521 | float time) const |
522 | { |
523 | float ftime; |
524 | const int itime = geom->timeSegment(time, ftime); |
525 | |
526 | Vec4vf8 a0,a1; |
527 | gatheri(a0,a1,geom,itime); |
528 | Vec4vf8 b0,b1; |
529 | gatheri(b0,b1,geom,itime+1); |
530 | p0 = lerp(a0,b0,vfloat8(ftime)); |
531 | p1 = lerp(a1,b1,vfloat8(ftime)); |
532 | } |
533 | |
534 | template<> |
535 | __forceinline void LineMi<8>::gather(Vec4vf8& p0, |
536 | Vec4vf8& p1, |
537 | Vec4vf8& pL, |
538 | Vec4vf8& pR, |
539 | const LineSegments* geom) const |
540 | { |
541 | const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); |
542 | const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); |
543 | const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); |
544 | const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); |
545 | const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4])); |
546 | const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5])); |
547 | const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6])); |
548 | const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7])); |
549 | transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); |
550 | |
551 | const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); |
552 | const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); |
553 | const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); |
554 | const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); |
555 | const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1)); |
556 | const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1)); |
557 | const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1)); |
558 | const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1)); |
559 | transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); |
560 | |
561 | const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf); |
562 | const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf); |
563 | const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf); |
564 | const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf); |
565 | const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1)) : vfloat4(inf); |
566 | const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1)) : vfloat4(inf); |
567 | const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1)) : vfloat4(inf); |
568 | const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1)) : vfloat4(inf); |
569 | transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w); |
570 | |
571 | const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf); |
572 | const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf); |
573 | const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf); |
574 | const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf); |
575 | const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2)) : vfloat4(inf); |
576 | const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2)) : vfloat4(inf); |
577 | const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2)) : vfloat4(inf); |
578 | const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2)) : vfloat4(inf); |
579 | transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w); |
580 | } |
581 | |
582 | template<> |
583 | __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, |
584 | Vec4vf8& p1, |
585 | Vec4vf8& pL, |
586 | Vec4vf8& pR, |
587 | const LineSegments* geom, |
588 | const int itime) const |
589 | { |
590 | const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); |
591 | const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); |
592 | const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); |
593 | const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); |
594 | const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime)); |
595 | const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime)); |
596 | const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime)); |
597 | const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime)); |
598 | transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); |
599 | |
600 | const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); |
601 | const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); |
602 | const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); |
603 | const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); |
604 | const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime)); |
605 | const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime)); |
606 | const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime)); |
607 | const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime)); |
608 | transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); |
609 | |
610 | const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf); |
611 | const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf); |
612 | const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf); |
613 | const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf); |
614 | const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1,itime)) : vfloat4(inf); |
615 | const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1,itime)) : vfloat4(inf); |
616 | const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1,itime)) : vfloat4(inf); |
617 | const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1,itime)) : vfloat4(inf); |
618 | transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w); |
619 | |
620 | const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf); |
621 | const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf); |
622 | const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf); |
623 | const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf); |
624 | const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2,itime)) : vfloat4(inf); |
625 | const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2,itime)) : vfloat4(inf); |
626 | const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2,itime)) : vfloat4(inf); |
627 | const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2,itime)) : vfloat4(inf); |
628 | transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w); |
629 | } |
630 | |
631 | template<> |
632 | __forceinline void LineMi<8>::gather(Vec4vf8& p0, |
633 | Vec4vf8& p1, |
634 | Vec4vf8& pL, |
635 | Vec4vf8& pR, |
636 | const LineSegments* geom, |
637 | float time) const |
638 | { |
639 | float ftime; |
640 | const int itime = geom->timeSegment(time, ftime); |
641 | |
642 | Vec4vf8 a0,a1,aL,aR; |
643 | gatheri(a0,a1,aL,aR,geom,itime); |
644 | Vec4vf8 b0,b1,bL,bR; |
645 | gatheri(b0,b1,bL,bR,geom,itime+1); |
646 | p0 = lerp(a0,b0,vfloat8(ftime)); |
647 | p1 = lerp(a1,b1,vfloat8(ftime)); |
648 | pL = lerp(aL,bL,vfloat8(ftime)); |
649 | pR = lerp(aR,bR,vfloat8(ftime)); |
650 | } |
651 | |
652 | template<> |
653 | __forceinline void LineMi<8>::gather(Vec4vf8& p0, |
654 | Vec4vf8& p1, |
655 | vbool8& cL, |
656 | vbool8& cR, |
657 | const LineSegments* geom) const |
658 | { |
659 | gather(p0,p1,geom); |
660 | cL = !vbool8(leftExists); |
661 | cR = !vbool8(rightExists); |
662 | } |
663 | |
664 | template<> |
665 | __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, |
666 | Vec4vf8& p1, |
667 | vbool8& cL, |
668 | vbool8& cR, |
669 | const LineSegments* geom, |
670 | const int itime) const |
671 | { |
672 | gatheri(p0,p1,geom,itime); |
673 | cL = !vbool8(leftExists); |
674 | cR = !vbool8(rightExists); |
675 | } |
676 | |
677 | template<> |
678 | __forceinline void LineMi<8>::gather(Vec4vf8& p0, |
679 | Vec4vf8& p1, |
680 | vbool8& cL, |
681 | vbool8& cR, |
682 | const LineSegments* geom, |
683 | float time) const |
684 | { |
685 | float ftime; |
686 | const int itime = geom->timeSegment(time, ftime); |
687 | |
688 | Vec4vf8 a0,a1; |
689 | gatheri(a0,a1,geom,itime); |
690 | Vec4vf8 b0,b1; |
691 | gatheri(b0,b1,geom,itime+1); |
692 | p0 = lerp(a0,b0,vfloat8(ftime)); |
693 | p1 = lerp(a1,b1,vfloat8(ftime)); |
694 | cL = !vbool8(leftExists); |
695 | cR = !vbool8(rightExists); |
696 | } |
697 | |
698 | #endif |
699 | |
700 | template<int M> |
701 | typename LineMi<M>::Type LineMi<M>::type; |
702 | |
703 | typedef LineMi<4> Line4i; |
704 | typedef LineMi<8> Line8i; |
705 | } |
706 | |