curve_intersector_sweep.h source code [qtquick3d/src/3rdparty/embree/kernels/geometry/curve_intersector_sweep.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "../common/ray.h"
7	#include "cylinder.h"
8	#include "plane.h"
9	#include "line_intersector.h"
10	#include "curve_intersector_precalculations.h"
11
12	namespace embree
13	{
14	namespace isa
15	{
16	static const size_t numJacobianIterations = `5`;
17	#if defined(__AVX__)
18	static const size_t numBezierSubdivisions = `2`;
19	#else
20	static const size_t numBezierSubdivisions = `3`;
21	#endif
22
23	struct BezierCurveHit
24	{
25	__forceinline BezierCurveHit() {}
26
27	__forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
28	: t(t), u(u), v(`0.0f`), Ng (Ng) {}
29
30	__forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
31	: t(t), u(u), v(v), Ng (Ng) {}
32
33	__forceinline void finalize() {}
34
35	public:
36	float t;
37	float u;
38	float v;
39	Vec3fa Ng;
40	};
41
42	template<typename NativeCurve3ff, typename Ray, typename Epilog>
43	__forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
44	const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
45	const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
46	const Epilog& epilog)
47	{
48	if (tp.lower [i]+dt > ray.tfar) return false;
49	Vec3fa Ng_o = Vec3fa (Ng.x [i],Ng.y [i],Ng.z [i]);
50	if (h0.lower [i] == tp.lower [i]) Ng_o = -Vec3fa (dP0du.x [i],dP0du.y [i],dP0du.z [i]);
51	if (h1.lower [i] == tp.lower [i]) Ng_o = +Vec3fa (dP3du.x [i],dP3du.y [i],dP3du.z [i]);
52	BezierCurveHit hit(tp.lower [i]+dt,u [i],Ng_o);
53	return epilog(hit);
54	}
55
56	template<typename NativeCurve3ff, typename Ray, typename Epilog>
57	__forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
58	{
59	const Vec3fa org = zero;
60	const Vec3fa dir = ray.dir;
61	const float length_ray_dir = length(a: dir);
62
63	/ error of curve evaluations is propertional to largest coordinate /
64	const BBox3ff box = curve.bounds();
65	const float P_err = `16.0f`*float(ulp)*reduce_max(v: max(a: abs(a: box.lower),b: abs(a: box.upper)));
66
67	for (size_t i=`0`; i<numJacobianIterations; i++)
68	{
69	const Vec3fa Q = madd(a: Vec3fa (t),b: dir,c: org);
70	//const Vec3fa dQdu = zero;
71	const Vec3fa dQdt = dir;
72	const float Q_err = `16.0f`*float(ulp)length_ray_dirt; // works as org=zero here
73
74	Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
75	//const Vec3fa dPdt = zero;
76
77	const Vec3fa R = Q -P;
78	const float len_R = length(a: R); //reduce_max(abs(R));
79	const float R_err = max(a: Q_err,b: P_err);
80	const Vec3fa dRdu = /dQdu/-dPdu;
81	const Vec3fa dRdt = dQdt;//-dPdt;
82
83	const Vec3fa T = normalize(a: dPdu);
84	const Vec3fa dTdu = dnormalize(p: dPdu,dp: ddPdu);
85	//const Vec3fa dTdt = zero;
86	const float cos_err = P_err/length(a: dPdu);
87
88	/ Error estimate for dot(R,T):*
89
90	dot(R,T) = cos(R,T) \|R\| \|T\|
91	= (cos(R,T) +- cos_error) (\|R\| +- \|R\|_err) * (\|T\| +- \|T\|_err)*
92	= cos(R,T)\|R\|\|T\|
93	+- cos(R,T)(\|R\|\|T\|_err + \|T\|\|R\|_err)*
94	+- cos_error(\|R\| + \|T\|)*
95	+- lower order terms
96	with cos(R,T) being in [0,1] and \|T\| = 1 we get:
97	dot(R,T)_err = \|R\|\|T\|_err + \|R\|_err = cos_error(\|R\|+1)
98	*/
99
100	const float f = dot(a: R,b: T);
101	const float f_err = len_RP_err + R_err + cos_err(`1.0f`+len_R);
102	const float dfdu = dot(a: dRdu,b: T) + dot(a: R,b: dTdu);
103	const float dfdt = dot(a: dRdt,b: T);// + dot(R,dTdt);
104
105	const float K = dot(a: R,b: R)-sqr(x: f);
106	const float dKdu = /2.0f/(dot(a: R,b: dRdu)-f*dfdu);
107	const float dKdt = /2.0f/(dot(a: R,b: dRdt)-f*dfdt);
108	const float rsqrt_K = rsqrt(x: K);
109
110	const float g = sqrt(x: K)-P.w;
111	const float g_err = R_err + f_err + `16.0f`*float(ulp)*box.upper.w;
112	const float dgdu = /0.5f/dKdu*rsqrt_K-dPdu.w;
113	const float dgdt = /0.5f/dKdtrsqrt_K;//-dPdt.w;*
114
115	const LinearSpace2f J = LinearSpace2f (dfdu,dfdt,dgdu,dgdt);
116	const Vec2f dut = rcp(a: J)*Vec2f (f,g);
117	const Vec2f ut = Vec2f (u,t) - dut;
118	u = ut.x; t = ut.y;
119
120	if (abs(x: f) < f_err && abs(x: g) < g_err)
121	{
122	t+=dt;
123	if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
124	if (!(u >= `0.0f` && u <= `1.0f`)) return false; // rejects NaNs
125	const Vec3fa R = normalize(a: Q -P);
126	const Vec3fa U = madd(a: Vec3fa (dPdu.w),b: R,c: dPdu);
127	const Vec3fa V = cross(a: dPdu,b: R);
128	BezierCurveHit hit(t,u,cross(a: V,b: U));
129	return epilog(hit);
130	}
131	}
132	return false;
133	}
134
135	template<typename NativeCurve3ff, typename Ray, typename Epilog>
136	bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve,
137	float u0, float u1, unsigned int depth, const Epilog& epilog)
138	{
139	#if defined(__AVX__)
140	enum { VSIZEX_ = `8` };
141	typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
142	typedef vint8 vintx;
143	typedef vfloat8 vfloatx;
144	#else
145	enum { VSIZEX_ = `4` };
146	typedef vbool4 vboolx;
147	typedef vint4 vintx;
148	typedef vfloat4 vfloatx;
149	#endif
150	typedef Vec3<vfloatx> Vec3vfx;
151	typedef Vec4<vfloatx> Vec4vfx;
152
153	unsigned int maxDepth = numBezierSubdivisions;
154	bool found = false;
155	const Vec3fa org = zero;
156	const Vec3fa dir = ray.dir;
157
158	unsigned int sptr = `0`;
159	const unsigned int stack_size = numBezierSubdivisions+`1`; // +1 because of unstable workaround below
160	struct StackEntry {
161	vboolx valid;
162	vfloatx tlower;
163	float u0;
164	float u1;
165	unsigned int depth;
166	};
167	StackEntry stack[stack_size];
168	goto entry;
169
170	/ terminate if stack is empty /
171	while (sptr)
172	{
173	/ pop from stack /
174	{
175	sptr--;
176	vboolx valid = stack[sptr].valid;
177	const vfloatx tlower = stack[sptr].tlower;
178	valid &= tlower +dt <= ray.tfar;
179	if (none(b: valid)) continue;
180	u0 = stack[sptr].u0;
181	u1 = stack[sptr].u1;
182	depth = stack[sptr].depth;
183	const size_t i = select_min(valid,v: tlower); clear(a&: valid,index: i);
184	stack[sptr].valid = valid;
185	if (any(b: valid)) sptr++; // there are still items on the stack
186
187	/ process next segment /
188	const vfloatx vu0 = lerp(a: u0,b: u1,t: vfloatx (step)*(`1.0f`/(vfloatx::size-`1`)));
189	u0 = vu0 [i+`0`];
190	u1 = vu0 [i+`1`];
191	}
192	entry:
193
194	/ subdivide curve /
195	const float dscale = (u1-u0)(`1.0f`/(`3.0f`(vfloatx::size-`1`)));
196	const vfloatx vu0 = lerp(a: u0,b: u1,t: vfloatx (step)*(`1.0f`/(vfloatx::size-`1`)));
197	Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx (dscale);
198	const Vec4vfx P3 = shift_right_1(a: P0);
199	const Vec4vfx dP3du = shift_right_1(a: dP0du);
200	const Vec4vfx P1 = P0 + dP0du;
201	const Vec4vfx P2 = P3 - dP3du;
202
203	/ calculate bounding cylinders /
204	const vfloatx rr1 = sqr_point_to_line_distance(PmQ0: Vec3vfx(dP0du),Q1mQ0: Vec3vfx(P3 -P0));
205	const vfloatx rr2 = sqr_point_to_line_distance(PmQ0: Vec3vfx(dP3du),Q1mQ0: Vec3vfx(P3 -P0));
206	const vfloatx maxr12 = sqrt(a: max(a: rr1,b: rr2));
207	const vfloatx one_plus_ulp = `1.0f`+`2.0f`*float(ulp);
208	const vfloatx one_minus_ulp = `1.0f`-`2.0f`*float(ulp);
209	vfloatx r_outer = max(a: P0.w,b: P1.w,c: P2.w,d: P3.w)+maxr12;
210	vfloatx r_inner = min(a: P0.w,b: P1.w,c: P2.w,d: P3.w)-maxr12;
211	r_outer = one_plus_ulp *r_outer;
212	r_inner = max(a: `0.0f`,b: one_minus_ulp *r_inner);
213	const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
214	const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
215	vboolx valid = true; clear(a&: valid,index: vfloatx::size-`1`);
216
217	/ intersect with outer cylinder /
218	BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
219	valid &= cylinder_outer.intersect(org,dir,t_o&: tc_outer,u0_o&: u_outer0,Ng0_o&: Ng_outer0,u1_o&: u_outer1,Ng1_o&: Ng_outer1);
220	if (none(b: valid)) continue;
221
222	/ intersect with cap-planes /
223	BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
224	tp = embree::intersect(a: tp,b: tc_outer);
225	BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(ray_org: org,ray_dir: dir);
226	tp = embree::intersect(a: tp,b: h0);
227	BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(ray_org: org,ray_dir: dir);
228	tp = embree::intersect(a: tp,b: h1);
229	valid &= tp.lower <= tp.upper;
230	if (none(b: valid)) continue;
231
232	/ clamp and correct u parameter /
233	u_outer0 = clamp(x: u_outer0,lower: vfloatx (`0.0f`),upper: vfloatx (`1.0f`));
234	u_outer1 = clamp(x: u_outer1,lower: vfloatx (`0.0f`),upper: vfloatx (`1.0f`));
235	u_outer0 = lerp(a: u0,b: u1,t: (vfloatx (step)+u_outer0)(`1.0f`/float*(vfloatx::size)));
236	u_outer1 = lerp(a: u0,b: u1,t: (vfloatx (step)+u_outer1)(`1.0f`/float*(vfloatx::size)));
237
238	/ intersect with inner cylinder /
239	BBox<vfloatx> tc_inner;
240	vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
241	const vboolx valid_inner = cylinder_inner.intersect(org,dir,t_o&: tc_inner,u0_o&: u_inner0,Ng0_o&: Ng_inner0,u1_o&: u_inner1,Ng1_o&: Ng_inner1);
242
243	/ at the unstable area we subdivide deeper /
244	const vboolx unstable0 = (!valid_inner) \| (abs(a: dot(a: Vec3vfx(Vec3fa(ray.dir)),b: Ng_inner0)) < `0.3f`);
245	const vboolx unstable1 = (!valid_inner) \| (abs(a: dot(a: Vec3vfx(Vec3fa(ray.dir)),b: Ng_inner1)) < `0.3f`);
246
247	/ subtract the inner interval from the current hit interval /
248	BBox<vfloatx> tp0, tp1;
249	subtract(a: tp,b: tc_inner,c&: tp0,d&: tp1);
250	vboolx valid0 = valid & (tp0.lower <= tp0.upper);
251	vboolx valid1 = valid & (tp1.lower <= tp1.upper);
252	if (none(b: valid0 \| valid1)) continue;
253
254	/ iterate over all first hits front to back /
255	const vintx termDepth0 = select(m: unstable0,t: vintx (maxDepth+`1`),f: vintx (maxDepth));
256	vboolx recursion_valid0 = valid0 & (depth < termDepth0);
257	valid0 &= depth >= termDepth0;
258
259	while (any(b: valid0))
260	{
261	const size_t i = select_min(valid: valid0,v: tp0.lower); clear(a&: valid0,index: i);
262	found = found \| intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0 [i],tp0.lower [i],epilog);
263	//found = found \| intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
264	valid0 &= tp0.lower +dt <= ray.tfar;
265	}
266	valid1 &= tp1.lower +dt <= ray.tfar;
267
268	/ iterate over all second hits front to back /
269	const vintx termDepth1 = select(m: unstable1,t: vintx (maxDepth+`1`),f: vintx (maxDepth));
270	vboolx recursion_valid1 = valid1 & (depth < termDepth1);
271	valid1 &= depth >= termDepth1;
272	while (any(b: valid1))
273	{
274	const size_t i = select_min(valid: valid1,v: tp1.lower); clear(a&: valid1,index: i);
275	found = found \| intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1 [i],tp1.upper [i],epilog);
276	//found = found \| intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
277	valid1 &= tp1.lower +dt <= ray.tfar;
278	}
279
280	/ push valid segments to stack /
281	recursion_valid0 &= tp0.lower +dt <= ray.tfar;
282	recursion_valid1 &= tp1.lower +dt <= ray.tfar;
283	const vboolx recursion_valid = recursion_valid0 \| recursion_valid1;
284	if (any(b: recursion_valid))
285	{
286	assert(sptr < stack_size);
287	stack[sptr].valid = recursion_valid;
288	stack[sptr].tlower = select(m: recursion_valid0,t: tp0.lower,f: tp1.lower);
289	stack[sptr].u0 = u0;
290	stack[sptr].u1 = u1;
291	stack[sptr].depth = depth+`1`;
292	sptr++;
293	}
294	}
295	return found;
296	}
297
298	template<template<typename Ty> class NativeCurve>
299	struct SweepCurve1Intersector1
300	{
301	typedef NativeCurve<Vec3ff> NativeCurve3ff;
302
303	template<typename Epilog>
304	__noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
305	IntersectContext* context,
306	const CurveGeometry* geom, const unsigned int primID,
307	const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
308	const Epilog& epilog)
309	{
310	STAT3(normal.trav_prims,`1`,`1`,`1`);
311
312	/ move ray closer to make intersection stable /
313	NativeCurve3ff curve0(v0,v1,v2,v3);
314	curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
315	const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(x: dot(a: ray.dir,b: ray.dir));
316	const Vec3ff ref(madd(a: Vec3fa (dt),b: ray.dir,c: ray.org),`0.0f`);
317	const NativeCurve3ff curve1 = curve0-ref;
318	return intersect_bezier_recursive_jacobian(ray,dt,curve1,`0.0f`,`1.0f`,`1`,epilog);
319	}
320	};
321
322	template<template<typename Ty> class NativeCurve, int K>
323	struct SweepCurve1IntersectorK
324	{
325	typedef NativeCurve<Vec3ff> NativeCurve3ff;
326
327	struct Ray1
328	{
329	__forceinline Ray1(RayK<K>& ray, size_t k)
330	: org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
331
332	Vec3fa org;
333	Vec3fa dir;
334	float _tnear;
335	float& tfar;
336
337	__forceinline float& tnear() { return _tnear; }
338	//__forceinline float& tfar() { return _tfar; }
339	__forceinline const float& tnear() const { return _tnear; }
340	//__forceinline const float& tfar() const { return _tfar; }
341
342	};
343
344	template<typename Epilog>
345	__forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
346	IntersectContext* context,
347	const CurveGeometry* geom, const unsigned int primID,
348	const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
349	const Epilog& epilog)
350	{
351	STAT3(normal.trav_prims,`1`,`1`,`1`);
352	Ray1 ray(vray,k);
353
354	/ move ray closer to make intersection stable /
355	NativeCurve3ff curve0(v0,v1,v2,v3);
356	curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
357	const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
358	const Vec3ff ref(madd(Vec3fa (dt),ray.dir,ray.org),`0.0f`);
359	const NativeCurve3ff curve1 = curve0-ref;
360	return intersect_bezier_recursive_jacobian(ray,dt,curve1,`0.0f`,`1.0f`,`1`,epilog);
361	}
362	};
363	}
364	}
365

Provided by KDAB

Definitions

numJacobianIterations
numBezierSubdivisions
BezierCurveHit
BezierCurveHit
BezierCurveHit
BezierCurveHit
finalize
intersect_bezier_iterative_debug
intersect_bezier_iterative_jacobian
intersect_bezier_recursive_jacobian
SweepCurve1Intersector1
intersect
SweepCurve1IntersectorK
Ray1
Ray1
tnear
tnear

Start learning QML with our Intro Training

Find out more

Definitions

source code of qtquick3d/src/3rdparty/embree/kernels/geometry/curve_intersector_sweep.h