1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "quad_intersector_moeller.h" |
7 | |
8 | /*! Modified Pluecker ray/triangle intersector. The test first shifts |
9 | * the ray origin into the origin of the coordinate system and then |
10 | * uses Pluecker coordinates for the intersection. Due to the shift, |
11 | * the Pluecker coordinate calculation simplifies and the tests get |
12 | * numerically stable. The edge equations are watertight along the |
13 | * edge for neighboring triangles. */ |
14 | |
15 | namespace embree |
16 | { |
17 | namespace isa |
18 | { |
19 | template<int M> |
20 | struct QuadHitPlueckerM |
21 | { |
22 | __forceinline QuadHitPlueckerM() {} |
23 | |
24 | __forceinline QuadHitPlueckerM(const vbool<M>& valid, |
25 | const vfloat<M>& U, |
26 | const vfloat<M>& V, |
27 | const vfloat<M>& UVW, |
28 | const vfloat<M>& t, |
29 | const Vec3vf<M>& Ng, |
30 | const vbool<M>& flags) |
31 | : U(U), V(V), UVW(UVW), tri_Ng(Ng), valid(valid), vt(t), flags(flags) {} |
32 | |
33 | __forceinline void finalize() |
34 | { |
35 | const vbool<M> invalid = abs(UVW) < min_rcp_input; |
36 | const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); |
37 | const vfloat<M> u = min(U * rcpUVW,1.0f); |
38 | const vfloat<M> v = min(V * rcpUVW,1.0f); |
39 | const vfloat<M> u1 = vfloat<M>(1.0f) - u; |
40 | const vfloat<M> v1 = vfloat<M>(1.0f) - v; |
41 | #if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING) |
42 | vu = select(flags,u1,u); |
43 | vv = select(flags,v1,v); |
44 | vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z); |
45 | #else |
46 | const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f)); |
47 | vv = select(flags,u1,v); |
48 | vu = select(flags,v1,u); |
49 | vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z); |
50 | #endif |
51 | } |
52 | |
53 | __forceinline Vec2f uv(const size_t i) |
54 | { |
55 | const float u = vu[i]; |
56 | const float v = vv[i]; |
57 | return Vec2f(u,v); |
58 | } |
59 | |
60 | __forceinline float t(const size_t i) { return vt[i]; } |
61 | __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } |
62 | |
63 | private: |
64 | vfloat<M> U; |
65 | vfloat<M> V; |
66 | vfloat<M> UVW; |
67 | Vec3vf<M> tri_Ng; |
68 | |
69 | public: |
70 | vbool<M> valid; |
71 | vfloat<M> vu; |
72 | vfloat<M> vv; |
73 | vfloat<M> vt; |
74 | Vec3vf<M> vNg; |
75 | |
76 | public: |
77 | const vbool<M> flags; |
78 | }; |
79 | |
80 | template<int K> |
81 | struct QuadHitPlueckerK |
82 | { |
83 | __forceinline QuadHitPlueckerK(const vfloat<K>& U, |
84 | const vfloat<K>& V, |
85 | const vfloat<K>& UVW, |
86 | const vfloat<K>& t, |
87 | const Vec3vf<K>& Ng, |
88 | const vbool<K>& flags) |
89 | : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng) {} |
90 | |
91 | __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const |
92 | { |
93 | const vbool<K> invalid = abs(UVW) < min_rcp_input; |
94 | const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); |
95 | const vfloat<K> u0 = min(U * rcpUVW,1.0f); |
96 | const vfloat<K> v0 = min(V * rcpUVW,1.0f); |
97 | const vfloat<K> u1 = vfloat<K>(1.0f) - u0; |
98 | const vfloat<K> v1 = vfloat<K>(1.0f) - v0; |
99 | const vfloat<K> u = select(flags,u1,u0); |
100 | const vfloat<K> v = select(flags,v1,v0); |
101 | const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); |
102 | return std::make_tuple(u,v,t,Ng); |
103 | } |
104 | |
105 | private: |
106 | const vfloat<K> U; |
107 | const vfloat<K> V; |
108 | const vfloat<K> UVW; |
109 | const vfloat<K> t; |
110 | const vbool<K> flags; |
111 | const Vec3vf<K> tri_Ng; |
112 | }; |
113 | |
114 | struct PlueckerIntersectorTriangle1 |
115 | { |
116 | template<int M, typename Epilog> |
117 | static __forceinline bool intersect(Ray& ray, |
118 | const Vec3vf<M>& tri_v0, |
119 | const Vec3vf<M>& tri_v1, |
120 | const Vec3vf<M>& tri_v2, |
121 | const vbool<M>& flags, |
122 | const Epilog& epilog) |
123 | { |
124 | /* calculate vertices relative to ray origin */ |
125 | const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); |
126 | const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); |
127 | const Vec3vf<M> v0 = tri_v0-O; |
128 | const Vec3vf<M> v1 = tri_v1-O; |
129 | const Vec3vf<M> v2 = tri_v2-O; |
130 | |
131 | /* calculate triangle edges */ |
132 | const Vec3vf<M> e0 = v2-v0; |
133 | const Vec3vf<M> e1 = v0-v1; |
134 | const Vec3vf<M> e2 = v1-v2; |
135 | |
136 | /* perform edge tests */ |
137 | const vfloat<M> U = dot(cross(e0,v2+v0),D); |
138 | const vfloat<M> V = dot(cross(e1,v0+v1),D); |
139 | const vfloat<M> W = dot(cross(e2,v1+v2),D); |
140 | const vfloat<M> UVW = U+V+W; |
141 | const vfloat<M> eps = float(ulp)*abs(UVW); |
142 | #if defined(EMBREE_BACKFACE_CULLING) |
143 | vbool<M> valid = max(U,V,W) <= eps; |
144 | #else |
145 | vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); |
146 | #endif |
147 | if (unlikely(none(valid))) return false; |
148 | |
149 | /* calculate geometry normal and denominator */ |
150 | const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); |
151 | const vfloat<M> den = twice(dot(Ng,D)); |
152 | |
153 | /* perform depth test */ |
154 | const vfloat<M> T = twice(dot(v0,Ng)); |
155 | const vfloat<M> t = rcp(den)*T; |
156 | valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); |
157 | valid &= den != vfloat<M>(zero); |
158 | if (unlikely(none(valid))) return false; |
159 | |
160 | /* update hit information */ |
161 | QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags); |
162 | return epilog(valid,hit); |
163 | } |
164 | }; |
165 | |
166 | /*! Intersects M quads with 1 ray */ |
167 | template<int M, bool filter> |
168 | struct QuadMIntersector1Pluecker |
169 | { |
170 | __forceinline QuadMIntersector1Pluecker() {} |
171 | |
172 | __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} |
173 | |
174 | __forceinline void intersect(RayHit& ray, IntersectContext* context, |
175 | const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, |
176 | const vuint<M>& geomID, const vuint<M>& primID) const |
177 | { |
178 | Intersect1EpilogM<M,filter> epilog(ray,context,geomID,primID); |
179 | PlueckerIntersectorTriangle1::intersect<M>(ray,v0,v1,v3,vbool<M>(false),epilog); |
180 | PlueckerIntersectorTriangle1::intersect<M>(ray,v2,v3,v1,vbool<M>(true),epilog); |
181 | } |
182 | |
183 | __forceinline bool occluded(Ray& ray, IntersectContext* context, |
184 | const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, |
185 | const vuint<M>& geomID, const vuint<M>& primID) const |
186 | { |
187 | Occluded1EpilogM<M,filter> epilog(ray,context,geomID,primID); |
188 | if (PlueckerIntersectorTriangle1::intersect<M>(ray,v0,v1,v3,vbool<M>(false),epilog)) return true; |
189 | if (PlueckerIntersectorTriangle1::intersect<M>(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true; |
190 | return false; |
191 | } |
192 | }; |
193 | |
194 | #if defined(__AVX__) |
195 | |
196 | /*! Intersects 4 quads with 1 ray using AVX */ |
197 | template<bool filter> |
198 | struct QuadMIntersector1Pluecker<4,filter> |
199 | { |
200 | __forceinline QuadMIntersector1Pluecker() {} |
201 | |
202 | __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} |
203 | |
204 | template<typename Epilog> |
205 | __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const |
206 | { |
207 | const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); |
208 | #if !defined(EMBREE_BACKFACE_CULLING) |
209 | const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); |
210 | const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); |
211 | #else |
212 | const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); |
213 | const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); |
214 | #endif |
215 | const vbool8 flags(0,0,0,0,1,1,1,1); |
216 | return PlueckerIntersectorTriangle1::intersect<8>(ray,vtx0,vtx1,vtx2,flags,epilog); |
217 | } |
218 | |
219 | __forceinline bool intersect(RayHit& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, |
220 | const vuint4& geomID, const vuint4& primID) const |
221 | { |
222 | return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID))); |
223 | } |
224 | |
225 | __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, |
226 | const vuint4& geomID, const vuint4& primID) const |
227 | { |
228 | return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID))); |
229 | } |
230 | }; |
231 | |
232 | #endif |
233 | |
234 | |
235 | /* ----------------------------- */ |
236 | /* -- ray packet intersectors -- */ |
237 | /* ----------------------------- */ |
238 | |
239 | struct PlueckerIntersector1KTriangleM |
240 | { |
241 | /*! Intersect k'th ray from ray packet of size K with M triangles. */ |
242 | template<int M, int K, typename Epilog> |
243 | static __forceinline bool intersect1(RayK<K>& ray, |
244 | size_t k, |
245 | const Vec3vf<M>& tri_v0, |
246 | const Vec3vf<M>& tri_v1, |
247 | const Vec3vf<M>& tri_v2, |
248 | const vbool<M>& flags, |
249 | const Epilog& epilog) |
250 | { |
251 | /* calculate vertices relative to ray origin */ |
252 | const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); |
253 | const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); |
254 | const Vec3vf<M> v0 = tri_v0-O; |
255 | const Vec3vf<M> v1 = tri_v1-O; |
256 | const Vec3vf<M> v2 = tri_v2-O; |
257 | |
258 | /* calculate triangle edges */ |
259 | const Vec3vf<M> e0 = v2-v0; |
260 | const Vec3vf<M> e1 = v0-v1; |
261 | const Vec3vf<M> e2 = v1-v2; |
262 | |
263 | /* perform edge tests */ |
264 | const vfloat<M> U = dot(cross(e0,v2+v0),D); |
265 | const vfloat<M> V = dot(cross(e1,v0+v1),D); |
266 | const vfloat<M> W = dot(cross(e2,v1+v2),D); |
267 | |
268 | const vfloat<M> UVW = U+V+W; |
269 | const vfloat<M> eps = float(ulp)*abs(UVW); |
270 | #if defined(EMBREE_BACKFACE_CULLING) |
271 | vbool<M> valid = max(U,V,W) <= eps; |
272 | #else |
273 | vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); |
274 | #endif |
275 | if (unlikely(none(valid))) return false; |
276 | |
277 | /* calculate geometry normal and denominator */ |
278 | const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); |
279 | const vfloat<M> den = twice(dot(Ng,D)); |
280 | |
281 | /* perform depth test */ |
282 | const vfloat<M> T = twice(dot(v0,Ng)); |
283 | const vfloat<M> t = rcp(den)*T; |
284 | valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]); |
285 | if (unlikely(none(valid))) return false; |
286 | |
287 | /* avoid division by 0 */ |
288 | valid &= den != vfloat<M>(zero); |
289 | if (unlikely(none(valid))) return false; |
290 | |
291 | /* update hit information */ |
292 | QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags); |
293 | return epilog(valid,hit); |
294 | } |
295 | }; |
296 | |
297 | template<int M, int K, bool filter> |
298 | struct QuadMIntersectorKPlueckerBase |
299 | { |
300 | __forceinline QuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {} |
301 | |
302 | /*! Intersects K rays with one of M triangles. */ |
303 | template<typename Epilog> |
304 | __forceinline vbool<K> intersectK(const vbool<K>& valid0, |
305 | RayK<K>& ray, |
306 | const Vec3vf<K>& tri_v0, |
307 | const Vec3vf<K>& tri_v1, |
308 | const Vec3vf<K>& tri_v2, |
309 | const vbool<K>& flags, |
310 | const Epilog& epilog) const |
311 | { |
312 | /* calculate vertices relative to ray origin */ |
313 | vbool<K> valid = valid0; |
314 | const Vec3vf<K> O = ray.org; |
315 | const Vec3vf<K> D = ray.dir; |
316 | const Vec3vf<K> v0 = tri_v0-O; |
317 | const Vec3vf<K> v1 = tri_v1-O; |
318 | const Vec3vf<K> v2 = tri_v2-O; |
319 | |
320 | /* calculate triangle edges */ |
321 | const Vec3vf<K> e0 = v2-v0; |
322 | const Vec3vf<K> e1 = v0-v1; |
323 | const Vec3vf<K> e2 = v1-v2; |
324 | |
325 | /* perform edge tests */ |
326 | const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D); |
327 | const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D); |
328 | const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D); |
329 | const vfloat<K> UVW = U+V+W; |
330 | const vfloat<K> eps = float(ulp)*abs(UVW); |
331 | #if defined(EMBREE_BACKFACE_CULLING) |
332 | valid &= max(U,V,W) <= eps; |
333 | #else |
334 | valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); |
335 | #endif |
336 | if (unlikely(none(valid))) return false; |
337 | |
338 | /* calculate geometry normal and denominator */ |
339 | const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); |
340 | const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D)); |
341 | |
342 | /* perform depth test */ |
343 | const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng))); |
344 | const vfloat<K> t = rcp(den)*T; |
345 | valid &= ray.tnear() <= t & t <= ray.tfar; |
346 | valid &= den != vfloat<K>(zero); |
347 | if (unlikely(none(valid))) return false; |
348 | |
349 | /* calculate hit information */ |
350 | QuadHitPlueckerK<K> hit(U,V,UVW,t,Ng,flags); |
351 | return epilog(valid,hit); |
352 | } |
353 | |
354 | /*! Intersects K rays with one of M quads. */ |
355 | template<typename Epilog> |
356 | __forceinline bool intersectK(const vbool<K>& valid0, |
357 | RayK<K>& ray, |
358 | const Vec3vf<K>& v0, |
359 | const Vec3vf<K>& v1, |
360 | const Vec3vf<K>& v2, |
361 | const Vec3vf<K>& v3, |
362 | const Epilog& epilog) const |
363 | { |
364 | intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog); |
365 | if (none(valid0)) return true; |
366 | intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog); |
367 | return none(valid0); |
368 | } |
369 | }; |
370 | |
371 | template<int M, int K, bool filter> |
372 | struct QuadMIntersectorKPluecker : public QuadMIntersectorKPlueckerBase<M,K,filter> |
373 | { |
374 | __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) |
375 | : QuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {} |
376 | |
377 | __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, |
378 | const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, |
379 | const vuint<M>& geomID, const vuint<M>& primID) const |
380 | { |
381 | Intersect1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID); |
382 | PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog); |
383 | PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog); |
384 | } |
385 | |
386 | __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, |
387 | const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, |
388 | const vuint<M>& geomID, const vuint<M>& primID) const |
389 | { |
390 | Occluded1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID); |
391 | if (PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; |
392 | if (PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; |
393 | return false; |
394 | } |
395 | }; |
396 | |
397 | #if defined(__AVX__) |
398 | |
399 | /*! Intersects 4 quads with 1 ray using AVX */ |
400 | template<int K, bool filter> |
401 | struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter> |
402 | { |
403 | __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) |
404 | : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {} |
405 | |
406 | template<typename Epilog> |
407 | __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const |
408 | { |
409 | const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); |
410 | const vbool8 flags(0,0,0,0,1,1,1,1); |
411 | #if !defined(EMBREE_BACKFACE_CULLING) |
412 | const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); |
413 | const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); |
414 | #else |
415 | const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); |
416 | const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); |
417 | #endif |
418 | return PlueckerIntersector1KTriangleM::intersect1<8,K>(ray,k,vtx0,vtx1,vtx2,flags,epilog); |
419 | } |
420 | |
421 | __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, |
422 | const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, |
423 | const vuint4& geomID, const vuint4& primID) const |
424 | { |
425 | return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); |
426 | } |
427 | |
428 | __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, |
429 | const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, |
430 | const vuint4& geomID, const vuint4& primID) const |
431 | { |
432 | return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); |
433 | } |
434 | }; |
435 | |
436 | #endif |
437 | } |
438 | } |
439 | |