1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../common/ray.h" |
7 | #include "cylinder.h" |
8 | #include "plane.h" |
9 | #include "line_intersector.h" |
10 | #include "curve_intersector_precalculations.h" |
11 | |
12 | namespace embree |
13 | { |
14 | namespace isa |
15 | { |
16 | static const size_t numJacobianIterations = 5; |
17 | #if defined(__AVX__) |
18 | static const size_t numBezierSubdivisions = 2; |
19 | #else |
20 | static const size_t numBezierSubdivisions = 3; |
21 | #endif |
22 | |
23 | struct BezierCurveHit |
24 | { |
25 | __forceinline BezierCurveHit() {} |
26 | |
27 | __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng) |
28 | : t(t), u(u), v(0.0f), Ng(Ng) {} |
29 | |
30 | __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng) |
31 | : t(t), u(u), v(v), Ng(Ng) {} |
32 | |
33 | __forceinline void finalize() {} |
34 | |
35 | public: |
36 | float t; |
37 | float u; |
38 | float v; |
39 | Vec3fa Ng; |
40 | }; |
41 | |
42 | template<typename NativeCurve3ff, typename Ray, typename Epilog> |
43 | __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i, |
44 | const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1, |
45 | const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du, |
46 | const Epilog& epilog) |
47 | { |
48 | if (tp.lower[i]+dt > ray.tfar) return false; |
49 | Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]); |
50 | if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]); |
51 | if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]); |
52 | BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o); |
53 | return epilog(hit); |
54 | } |
55 | |
56 | template<typename NativeCurve3ff, typename Ray, typename Epilog> |
57 | __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog) |
58 | { |
59 | const Vec3fa org = zero; |
60 | const Vec3fa dir = ray.dir; |
61 | const float length_ray_dir = length(a: dir); |
62 | |
63 | /* error of curve evaluations is propertional to largest coordinate */ |
64 | const BBox3ff box = curve.bounds(); |
65 | const float P_err = 16.0f*float(ulp)*reduce_max(v: max(a: abs(a: box.lower),b: abs(a: box.upper))); |
66 | |
67 | for (size_t i=0; i<numJacobianIterations; i++) |
68 | { |
69 | const Vec3fa Q = madd(a: Vec3fa(t),b: dir,c: org); |
70 | //const Vec3fa dQdu = zero; |
71 | const Vec3fa dQdt = dir; |
72 | const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here |
73 | |
74 | Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu); |
75 | //const Vec3fa dPdt = zero; |
76 | |
77 | const Vec3fa R = Q-P; |
78 | const float len_R = length(a: R); //reduce_max(abs(R)); |
79 | const float R_err = max(a: Q_err,b: P_err); |
80 | const Vec3fa dRdu = /*dQdu*/-dPdu; |
81 | const Vec3fa dRdt = dQdt;//-dPdt; |
82 | |
83 | const Vec3fa T = normalize(a: dPdu); |
84 | const Vec3fa dTdu = dnormalize(p: dPdu,dp: ddPdu); |
85 | //const Vec3fa dTdt = zero; |
86 | const float cos_err = P_err/length(a: dPdu); |
87 | |
88 | /* Error estimate for dot(R,T): |
89 | |
90 | dot(R,T) = cos(R,T) |R| |T| |
91 | = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err) |
92 | = cos(R,T)*|R|*|T| |
93 | +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err) |
94 | +- cos_error*(|R| + |T|) |
95 | +- lower order terms |
96 | with cos(R,T) being in [0,1] and |T| = 1 we get: |
97 | dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1) |
98 | */ |
99 | |
100 | const float f = dot(a: R,b: T); |
101 | const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R); |
102 | const float dfdu = dot(a: dRdu,b: T) + dot(a: R,b: dTdu); |
103 | const float dfdt = dot(a: dRdt,b: T);// + dot(R,dTdt); |
104 | |
105 | const float K = dot(a: R,b: R)-sqr(x: f); |
106 | const float dKdu = /*2.0f*/(dot(a: R,b: dRdu)-f*dfdu); |
107 | const float dKdt = /*2.0f*/(dot(a: R,b: dRdt)-f*dfdt); |
108 | const float rsqrt_K = rsqrt(x: K); |
109 | |
110 | const float g = sqrt(x: K)-P.w; |
111 | const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w; |
112 | const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w; |
113 | const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w; |
114 | |
115 | const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt); |
116 | const Vec2f dut = rcp(a: J)*Vec2f(f,g); |
117 | const Vec2f ut = Vec2f(u,t) - dut; |
118 | u = ut.x; t = ut.y; |
119 | |
120 | if (abs(x: f) < f_err && abs(x: g) < g_err) |
121 | { |
122 | t+=dt; |
123 | if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs |
124 | if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs |
125 | const Vec3fa R = normalize(a: Q-P); |
126 | const Vec3fa U = madd(a: Vec3fa(dPdu.w),b: R,c: dPdu); |
127 | const Vec3fa V = cross(a: dPdu,b: R); |
128 | BezierCurveHit hit(t,u,cross(a: V,b: U)); |
129 | return epilog(hit); |
130 | } |
131 | } |
132 | return false; |
133 | } |
134 | |
135 | template<typename NativeCurve3ff, typename Ray, typename Epilog> |
136 | bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, |
137 | float u0, float u1, unsigned int depth, const Epilog& epilog) |
138 | { |
139 | #if defined(__AVX__) |
140 | enum { VSIZEX_ = 8 }; |
141 | typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues |
142 | typedef vint8 vintx; |
143 | typedef vfloat8 vfloatx; |
144 | #else |
145 | enum { VSIZEX_ = 4 }; |
146 | typedef vbool4 vboolx; |
147 | typedef vint4 vintx; |
148 | typedef vfloat4 vfloatx; |
149 | #endif |
150 | typedef Vec3<vfloatx> Vec3vfx; |
151 | typedef Vec4<vfloatx> Vec4vfx; |
152 | |
153 | unsigned int maxDepth = numBezierSubdivisions; |
154 | bool found = false; |
155 | const Vec3fa org = zero; |
156 | const Vec3fa dir = ray.dir; |
157 | |
158 | unsigned int sptr = 0; |
159 | const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below |
160 | struct StackEntry { |
161 | vboolx valid; |
162 | vfloatx tlower; |
163 | float u0; |
164 | float u1; |
165 | unsigned int depth; |
166 | }; |
167 | StackEntry stack[stack_size]; |
168 | goto entry; |
169 | |
170 | /* terminate if stack is empty */ |
171 | while (sptr) |
172 | { |
173 | /* pop from stack */ |
174 | { |
175 | sptr--; |
176 | vboolx valid = stack[sptr].valid; |
177 | const vfloatx tlower = stack[sptr].tlower; |
178 | valid &= tlower+dt <= ray.tfar; |
179 | if (none(b: valid)) continue; |
180 | u0 = stack[sptr].u0; |
181 | u1 = stack[sptr].u1; |
182 | depth = stack[sptr].depth; |
183 | const size_t i = select_min(valid,v: tlower); clear(a&: valid,index: i); |
184 | stack[sptr].valid = valid; |
185 | if (any(b: valid)) sptr++; // there are still items on the stack |
186 | |
187 | /* process next segment */ |
188 | const vfloatx vu0 = lerp(a: u0,b: u1,t: vfloatx(step)*(1.0f/(vfloatx::size-1))); |
189 | u0 = vu0[i+0]; |
190 | u1 = vu0[i+1]; |
191 | } |
192 | entry: |
193 | |
194 | /* subdivide curve */ |
195 | const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1))); |
196 | const vfloatx vu0 = lerp(a: u0,b: u1,t: vfloatx(step)*(1.0f/(vfloatx::size-1))); |
197 | Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale); |
198 | const Vec4vfx P3 = shift_right_1(a: P0); |
199 | const Vec4vfx dP3du = shift_right_1(a: dP0du); |
200 | const Vec4vfx P1 = P0 + dP0du; |
201 | const Vec4vfx P2 = P3 - dP3du; |
202 | |
203 | /* calculate bounding cylinders */ |
204 | const vfloatx rr1 = sqr_point_to_line_distance(PmQ0: Vec3vfx(dP0du),Q1mQ0: Vec3vfx(P3-P0)); |
205 | const vfloatx rr2 = sqr_point_to_line_distance(PmQ0: Vec3vfx(dP3du),Q1mQ0: Vec3vfx(P3-P0)); |
206 | const vfloatx maxr12 = sqrt(a: max(a: rr1,b: rr2)); |
207 | const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp); |
208 | const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp); |
209 | vfloatx r_outer = max(a: P0.w,b: P1.w,c: P2.w,d: P3.w)+maxr12; |
210 | vfloatx r_inner = min(a: P0.w,b: P1.w,c: P2.w,d: P3.w)-maxr12; |
211 | r_outer = one_plus_ulp*r_outer; |
212 | r_inner = max(a: 0.0f,b: one_minus_ulp*r_inner); |
213 | const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer); |
214 | const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner); |
215 | vboolx valid = true; clear(a&: valid,index: vfloatx::size-1); |
216 | |
217 | /* intersect with outer cylinder */ |
218 | BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1; |
219 | valid &= cylinder_outer.intersect(org,dir,t_o&: tc_outer,u0_o&: u_outer0,Ng0_o&: Ng_outer0,u1_o&: u_outer1,Ng1_o&: Ng_outer1); |
220 | if (none(b: valid)) continue; |
221 | |
222 | /* intersect with cap-planes */ |
223 | BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt); |
224 | tp = embree::intersect(a: tp,b: tc_outer); |
225 | BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(ray_org: org,ray_dir: dir); |
226 | tp = embree::intersect(a: tp,b: h0); |
227 | BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(ray_org: org,ray_dir: dir); |
228 | tp = embree::intersect(a: tp,b: h1); |
229 | valid &= tp.lower <= tp.upper; |
230 | if (none(b: valid)) continue; |
231 | |
232 | /* clamp and correct u parameter */ |
233 | u_outer0 = clamp(x: u_outer0,lower: vfloatx(0.0f),upper: vfloatx(1.0f)); |
234 | u_outer1 = clamp(x: u_outer1,lower: vfloatx(0.0f),upper: vfloatx(1.0f)); |
235 | u_outer0 = lerp(a: u0,b: u1,t: (vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size))); |
236 | u_outer1 = lerp(a: u0,b: u1,t: (vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size))); |
237 | |
238 | /* intersect with inner cylinder */ |
239 | BBox<vfloatx> tc_inner; |
240 | vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero; |
241 | const vboolx valid_inner = cylinder_inner.intersect(org,dir,t_o&: tc_inner,u0_o&: u_inner0,Ng0_o&: Ng_inner0,u1_o&: u_inner1,Ng1_o&: Ng_inner1); |
242 | |
243 | /* at the unstable area we subdivide deeper */ |
244 | const vboolx unstable0 = (!valid_inner) | (abs(a: dot(a: Vec3vfx(Vec3fa(ray.dir)),b: Ng_inner0)) < 0.3f); |
245 | const vboolx unstable1 = (!valid_inner) | (abs(a: dot(a: Vec3vfx(Vec3fa(ray.dir)),b: Ng_inner1)) < 0.3f); |
246 | |
247 | /* subtract the inner interval from the current hit interval */ |
248 | BBox<vfloatx> tp0, tp1; |
249 | subtract(a: tp,b: tc_inner,c&: tp0,d&: tp1); |
250 | vboolx valid0 = valid & (tp0.lower <= tp0.upper); |
251 | vboolx valid1 = valid & (tp1.lower <= tp1.upper); |
252 | if (none(b: valid0 | valid1)) continue; |
253 | |
254 | /* iterate over all first hits front to back */ |
255 | const vintx termDepth0 = select(m: unstable0,t: vintx(maxDepth+1),f: vintx(maxDepth)); |
256 | vboolx recursion_valid0 = valid0 & (depth < termDepth0); |
257 | valid0 &= depth >= termDepth0; |
258 | |
259 | while (any(b: valid0)) |
260 | { |
261 | const size_t i = select_min(valid: valid0,v: tp0.lower); clear(a&: valid0,index: i); |
262 | found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog); |
263 | //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog); |
264 | valid0 &= tp0.lower+dt <= ray.tfar; |
265 | } |
266 | valid1 &= tp1.lower+dt <= ray.tfar; |
267 | |
268 | /* iterate over all second hits front to back */ |
269 | const vintx termDepth1 = select(m: unstable1,t: vintx(maxDepth+1),f: vintx(maxDepth)); |
270 | vboolx recursion_valid1 = valid1 & (depth < termDepth1); |
271 | valid1 &= depth >= termDepth1; |
272 | while (any(b: valid1)) |
273 | { |
274 | const size_t i = select_min(valid: valid1,v: tp1.lower); clear(a&: valid1,index: i); |
275 | found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog); |
276 | //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog); |
277 | valid1 &= tp1.lower+dt <= ray.tfar; |
278 | } |
279 | |
280 | /* push valid segments to stack */ |
281 | recursion_valid0 &= tp0.lower+dt <= ray.tfar; |
282 | recursion_valid1 &= tp1.lower+dt <= ray.tfar; |
283 | const vboolx recursion_valid = recursion_valid0 | recursion_valid1; |
284 | if (any(b: recursion_valid)) |
285 | { |
286 | assert(sptr < stack_size); |
287 | stack[sptr].valid = recursion_valid; |
288 | stack[sptr].tlower = select(m: recursion_valid0,t: tp0.lower,f: tp1.lower); |
289 | stack[sptr].u0 = u0; |
290 | stack[sptr].u1 = u1; |
291 | stack[sptr].depth = depth+1; |
292 | sptr++; |
293 | } |
294 | } |
295 | return found; |
296 | } |
297 | |
298 | template<template<typename Ty> class NativeCurve> |
299 | struct SweepCurve1Intersector1 |
300 | { |
301 | typedef NativeCurve<Vec3ff> NativeCurve3ff; |
302 | |
303 | template<typename Epilog> |
304 | __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, |
305 | IntersectContext* context, |
306 | const CurveGeometry* geom, const unsigned int primID, |
307 | const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, |
308 | const Epilog& epilog) |
309 | { |
310 | STAT3(normal.trav_prims,1,1,1); |
311 | |
312 | /* move ray closer to make intersection stable */ |
313 | NativeCurve3ff curve0(v0,v1,v2,v3); |
314 | curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); |
315 | const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(x: dot(a: ray.dir,b: ray.dir)); |
316 | const Vec3ff ref(madd(a: Vec3fa(dt),b: ray.dir,c: ray.org),0.0f); |
317 | const NativeCurve3ff curve1 = curve0-ref; |
318 | return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); |
319 | } |
320 | }; |
321 | |
322 | template<template<typename Ty> class NativeCurve, int K> |
323 | struct SweepCurve1IntersectorK |
324 | { |
325 | typedef NativeCurve<Vec3ff> NativeCurve3ff; |
326 | |
327 | struct Ray1 |
328 | { |
329 | __forceinline Ray1(RayK<K>& ray, size_t k) |
330 | : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {} |
331 | |
332 | Vec3fa org; |
333 | Vec3fa dir; |
334 | float _tnear; |
335 | float& tfar; |
336 | |
337 | __forceinline float& tnear() { return _tnear; } |
338 | //__forceinline float& tfar() { return _tfar; } |
339 | __forceinline const float& tnear() const { return _tnear; } |
340 | //__forceinline const float& tfar() const { return _tfar; } |
341 | |
342 | }; |
343 | |
344 | template<typename Epilog> |
345 | __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, |
346 | IntersectContext* context, |
347 | const CurveGeometry* geom, const unsigned int primID, |
348 | const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, |
349 | const Epilog& epilog) |
350 | { |
351 | STAT3(normal.trav_prims,1,1,1); |
352 | Ray1 ray(vray,k); |
353 | |
354 | /* move ray closer to make intersection stable */ |
355 | NativeCurve3ff curve0(v0,v1,v2,v3); |
356 | curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); |
357 | const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); |
358 | const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); |
359 | const NativeCurve3ff curve1 = curve0-ref; |
360 | return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); |
361 | } |
362 | }; |
363 | } |
364 | } |
365 | |