1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "../common/ray.h"
7#include "cylinder.h"
8#include "plane.h"
9#include "line_intersector.h"
10#include "curve_intersector_precalculations.h"
11
12namespace embree
13{
14 namespace isa
15 {
16 static const size_t numJacobianIterations = 5;
17#if defined(__AVX__)
18 static const size_t numBezierSubdivisions = 2;
19#else
20 static const size_t numBezierSubdivisions = 3;
21#endif
22
23 struct BezierCurveHit
24 {
25 __forceinline BezierCurveHit() {}
26
27 __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
28 : t(t), u(u), v(0.0f), Ng(Ng) {}
29
30 __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
31 : t(t), u(u), v(v), Ng(Ng) {}
32
33 __forceinline void finalize() {}
34
35 public:
36 float t;
37 float u;
38 float v;
39 Vec3fa Ng;
40 };
41
42 template<typename NativeCurve3ff, typename Ray, typename Epilog>
43 __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
44 const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
45 const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
46 const Epilog& epilog)
47 {
48 if (tp.lower[i]+dt > ray.tfar) return false;
49 Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);
50 if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);
51 if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);
52 BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o);
53 return epilog(hit);
54 }
55
56 template<typename NativeCurve3ff, typename Ray, typename Epilog>
57 __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
58 {
59 const Vec3fa org = zero;
60 const Vec3fa dir = ray.dir;
61 const float length_ray_dir = length(a: dir);
62
63 /* error of curve evaluations is propertional to largest coordinate */
64 const BBox3ff box = curve.bounds();
65 const float P_err = 16.0f*float(ulp)*reduce_max(v: max(a: abs(a: box.lower),b: abs(a: box.upper)));
66
67 for (size_t i=0; i<numJacobianIterations; i++)
68 {
69 const Vec3fa Q = madd(a: Vec3fa(t),b: dir,c: org);
70 //const Vec3fa dQdu = zero;
71 const Vec3fa dQdt = dir;
72 const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here
73
74 Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
75 //const Vec3fa dPdt = zero;
76
77 const Vec3fa R = Q-P;
78 const float len_R = length(a: R); //reduce_max(abs(R));
79 const float R_err = max(a: Q_err,b: P_err);
80 const Vec3fa dRdu = /*dQdu*/-dPdu;
81 const Vec3fa dRdt = dQdt;//-dPdt;
82
83 const Vec3fa T = normalize(a: dPdu);
84 const Vec3fa dTdu = dnormalize(p: dPdu,dp: ddPdu);
85 //const Vec3fa dTdt = zero;
86 const float cos_err = P_err/length(a: dPdu);
87
88 /* Error estimate for dot(R,T):
89
90 dot(R,T) = cos(R,T) |R| |T|
91 = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err)
92 = cos(R,T)*|R|*|T|
93 +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err)
94 +- cos_error*(|R| + |T|)
95 +- lower order terms
96 with cos(R,T) being in [0,1] and |T| = 1 we get:
97 dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1)
98 */
99
100 const float f = dot(a: R,b: T);
101 const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R);
102 const float dfdu = dot(a: dRdu,b: T) + dot(a: R,b: dTdu);
103 const float dfdt = dot(a: dRdt,b: T);// + dot(R,dTdt);
104
105 const float K = dot(a: R,b: R)-sqr(x: f);
106 const float dKdu = /*2.0f*/(dot(a: R,b: dRdu)-f*dfdu);
107 const float dKdt = /*2.0f*/(dot(a: R,b: dRdt)-f*dfdt);
108 const float rsqrt_K = rsqrt(x: K);
109
110 const float g = sqrt(x: K)-P.w;
111 const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w;
112 const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;
113 const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;
114
115 const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);
116 const Vec2f dut = rcp(a: J)*Vec2f(f,g);
117 const Vec2f ut = Vec2f(u,t) - dut;
118 u = ut.x; t = ut.y;
119
120 if (abs(x: f) < f_err && abs(x: g) < g_err)
121 {
122 t+=dt;
123 if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
124 if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs
125 const Vec3fa R = normalize(a: Q-P);
126 const Vec3fa U = madd(a: Vec3fa(dPdu.w),b: R,c: dPdu);
127 const Vec3fa V = cross(a: dPdu,b: R);
128 BezierCurveHit hit(t,u,cross(a: V,b: U));
129 return epilog(hit);
130 }
131 }
132 return false;
133 }
134
135 template<typename NativeCurve3ff, typename Ray, typename Epilog>
136 bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve,
137 float u0, float u1, unsigned int depth, const Epilog& epilog)
138 {
139#if defined(__AVX__)
140 enum { VSIZEX_ = 8 };
141 typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
142 typedef vint8 vintx;
143 typedef vfloat8 vfloatx;
144#else
145 enum { VSIZEX_ = 4 };
146 typedef vbool4 vboolx;
147 typedef vint4 vintx;
148 typedef vfloat4 vfloatx;
149#endif
150 typedef Vec3<vfloatx> Vec3vfx;
151 typedef Vec4<vfloatx> Vec4vfx;
152
153 unsigned int maxDepth = numBezierSubdivisions;
154 bool found = false;
155 const Vec3fa org = zero;
156 const Vec3fa dir = ray.dir;
157
158 unsigned int sptr = 0;
159 const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below
160 struct StackEntry {
161 vboolx valid;
162 vfloatx tlower;
163 float u0;
164 float u1;
165 unsigned int depth;
166 };
167 StackEntry stack[stack_size];
168 goto entry;
169
170 /* terminate if stack is empty */
171 while (sptr)
172 {
173 /* pop from stack */
174 {
175 sptr--;
176 vboolx valid = stack[sptr].valid;
177 const vfloatx tlower = stack[sptr].tlower;
178 valid &= tlower+dt <= ray.tfar;
179 if (none(b: valid)) continue;
180 u0 = stack[sptr].u0;
181 u1 = stack[sptr].u1;
182 depth = stack[sptr].depth;
183 const size_t i = select_min(valid,v: tlower); clear(a&: valid,index: i);
184 stack[sptr].valid = valid;
185 if (any(b: valid)) sptr++; // there are still items on the stack
186
187 /* process next segment */
188 const vfloatx vu0 = lerp(a: u0,b: u1,t: vfloatx(step)*(1.0f/(vfloatx::size-1)));
189 u0 = vu0[i+0];
190 u1 = vu0[i+1];
191 }
192 entry:
193
194 /* subdivide curve */
195 const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1)));
196 const vfloatx vu0 = lerp(a: u0,b: u1,t: vfloatx(step)*(1.0f/(vfloatx::size-1)));
197 Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);
198 const Vec4vfx P3 = shift_right_1(a: P0);
199 const Vec4vfx dP3du = shift_right_1(a: dP0du);
200 const Vec4vfx P1 = P0 + dP0du;
201 const Vec4vfx P2 = P3 - dP3du;
202
203 /* calculate bounding cylinders */
204 const vfloatx rr1 = sqr_point_to_line_distance(PmQ0: Vec3vfx(dP0du),Q1mQ0: Vec3vfx(P3-P0));
205 const vfloatx rr2 = sqr_point_to_line_distance(PmQ0: Vec3vfx(dP3du),Q1mQ0: Vec3vfx(P3-P0));
206 const vfloatx maxr12 = sqrt(a: max(a: rr1,b: rr2));
207 const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp);
208 const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp);
209 vfloatx r_outer = max(a: P0.w,b: P1.w,c: P2.w,d: P3.w)+maxr12;
210 vfloatx r_inner = min(a: P0.w,b: P1.w,c: P2.w,d: P3.w)-maxr12;
211 r_outer = one_plus_ulp*r_outer;
212 r_inner = max(a: 0.0f,b: one_minus_ulp*r_inner);
213 const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
214 const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
215 vboolx valid = true; clear(a&: valid,index: vfloatx::size-1);
216
217 /* intersect with outer cylinder */
218 BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
219 valid &= cylinder_outer.intersect(org,dir,t_o&: tc_outer,u0_o&: u_outer0,Ng0_o&: Ng_outer0,u1_o&: u_outer1,Ng1_o&: Ng_outer1);
220 if (none(b: valid)) continue;
221
222 /* intersect with cap-planes */
223 BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
224 tp = embree::intersect(a: tp,b: tc_outer);
225 BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(ray_org: org,ray_dir: dir);
226 tp = embree::intersect(a: tp,b: h0);
227 BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(ray_org: org,ray_dir: dir);
228 tp = embree::intersect(a: tp,b: h1);
229 valid &= tp.lower <= tp.upper;
230 if (none(b: valid)) continue;
231
232 /* clamp and correct u parameter */
233 u_outer0 = clamp(x: u_outer0,lower: vfloatx(0.0f),upper: vfloatx(1.0f));
234 u_outer1 = clamp(x: u_outer1,lower: vfloatx(0.0f),upper: vfloatx(1.0f));
235 u_outer0 = lerp(a: u0,b: u1,t: (vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size)));
236 u_outer1 = lerp(a: u0,b: u1,t: (vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size)));
237
238 /* intersect with inner cylinder */
239 BBox<vfloatx> tc_inner;
240 vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
241 const vboolx valid_inner = cylinder_inner.intersect(org,dir,t_o&: tc_inner,u0_o&: u_inner0,Ng0_o&: Ng_inner0,u1_o&: u_inner1,Ng1_o&: Ng_inner1);
242
243 /* at the unstable area we subdivide deeper */
244 const vboolx unstable0 = (!valid_inner) | (abs(a: dot(a: Vec3vfx(Vec3fa(ray.dir)),b: Ng_inner0)) < 0.3f);
245 const vboolx unstable1 = (!valid_inner) | (abs(a: dot(a: Vec3vfx(Vec3fa(ray.dir)),b: Ng_inner1)) < 0.3f);
246
247 /* subtract the inner interval from the current hit interval */
248 BBox<vfloatx> tp0, tp1;
249 subtract(a: tp,b: tc_inner,c&: tp0,d&: tp1);
250 vboolx valid0 = valid & (tp0.lower <= tp0.upper);
251 vboolx valid1 = valid & (tp1.lower <= tp1.upper);
252 if (none(b: valid0 | valid1)) continue;
253
254 /* iterate over all first hits front to back */
255 const vintx termDepth0 = select(m: unstable0,t: vintx(maxDepth+1),f: vintx(maxDepth));
256 vboolx recursion_valid0 = valid0 & (depth < termDepth0);
257 valid0 &= depth >= termDepth0;
258
259 while (any(b: valid0))
260 {
261 const size_t i = select_min(valid: valid0,v: tp0.lower); clear(a&: valid0,index: i);
262 found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);
263 //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
264 valid0 &= tp0.lower+dt <= ray.tfar;
265 }
266 valid1 &= tp1.lower+dt <= ray.tfar;
267
268 /* iterate over all second hits front to back */
269 const vintx termDepth1 = select(m: unstable1,t: vintx(maxDepth+1),f: vintx(maxDepth));
270 vboolx recursion_valid1 = valid1 & (depth < termDepth1);
271 valid1 &= depth >= termDepth1;
272 while (any(b: valid1))
273 {
274 const size_t i = select_min(valid: valid1,v: tp1.lower); clear(a&: valid1,index: i);
275 found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);
276 //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
277 valid1 &= tp1.lower+dt <= ray.tfar;
278 }
279
280 /* push valid segments to stack */
281 recursion_valid0 &= tp0.lower+dt <= ray.tfar;
282 recursion_valid1 &= tp1.lower+dt <= ray.tfar;
283 const vboolx recursion_valid = recursion_valid0 | recursion_valid1;
284 if (any(b: recursion_valid))
285 {
286 assert(sptr < stack_size);
287 stack[sptr].valid = recursion_valid;
288 stack[sptr].tlower = select(m: recursion_valid0,t: tp0.lower,f: tp1.lower);
289 stack[sptr].u0 = u0;
290 stack[sptr].u1 = u1;
291 stack[sptr].depth = depth+1;
292 sptr++;
293 }
294 }
295 return found;
296 }
297
298 template<template<typename Ty> class NativeCurve>
299 struct SweepCurve1Intersector1
300 {
301 typedef NativeCurve<Vec3ff> NativeCurve3ff;
302
303 template<typename Epilog>
304 __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
305 IntersectContext* context,
306 const CurveGeometry* geom, const unsigned int primID,
307 const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
308 const Epilog& epilog)
309 {
310 STAT3(normal.trav_prims,1,1,1);
311
312 /* move ray closer to make intersection stable */
313 NativeCurve3ff curve0(v0,v1,v2,v3);
314 curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
315 const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(x: dot(a: ray.dir,b: ray.dir));
316 const Vec3ff ref(madd(a: Vec3fa(dt),b: ray.dir,c: ray.org),0.0f);
317 const NativeCurve3ff curve1 = curve0-ref;
318 return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
319 }
320 };
321
322 template<template<typename Ty> class NativeCurve, int K>
323 struct SweepCurve1IntersectorK
324 {
325 typedef NativeCurve<Vec3ff> NativeCurve3ff;
326
327 struct Ray1
328 {
329 __forceinline Ray1(RayK<K>& ray, size_t k)
330 : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
331
332 Vec3fa org;
333 Vec3fa dir;
334 float _tnear;
335 float& tfar;
336
337 __forceinline float& tnear() { return _tnear; }
338 //__forceinline float& tfar() { return _tfar; }
339 __forceinline const float& tnear() const { return _tnear; }
340 //__forceinline const float& tfar() const { return _tfar; }
341
342 };
343
344 template<typename Epilog>
345 __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
346 IntersectContext* context,
347 const CurveGeometry* geom, const unsigned int primID,
348 const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
349 const Epilog& epilog)
350 {
351 STAT3(normal.trav_prims,1,1,1);
352 Ray1 ray(vray,k);
353
354 /* move ray closer to make intersection stable */
355 NativeCurve3ff curve0(v0,v1,v2,v3);
356 curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
357 const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
358 const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
359 const NativeCurve3ff curve1 = curve0-ref;
360 return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
361 }
362 };
363 }
364}
365

source code of qtquick3d/src/3rdparty/embree/kernels/geometry/curve_intersector_sweep.h