1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../common/ray.h" |
7 | #include "curve_intersector_precalculations.h" |
8 | #include "curve_intersector_sweep.h" |
9 | #include "../subdiv/linear_bezier_patch.h" |
10 | |
11 | #define DBG(x) |
12 | |
13 | namespace embree |
14 | { |
15 | namespace isa |
16 | { |
17 | template<typename Ray, typename Epilog> |
18 | struct TensorLinearCubicBezierSurfaceIntersector |
19 | { |
20 | const LinearSpace3fa& ray_space; |
21 | Ray& ray; |
22 | TensorLinearCubicBezierSurface3fa curve3d; |
23 | TensorLinearCubicBezierSurface2fa curve2d; |
24 | float eps; |
25 | const Epilog& epilog; |
26 | bool isHit; |
27 | |
28 | __forceinline TensorLinearCubicBezierSurfaceIntersector (const LinearSpace3fa& ray_space, Ray& ray, const TensorLinearCubicBezierSurface3fa& curve3d, const Epilog& epilog) |
29 | : ray_space(ray_space), ray(ray), curve3d(curve3d), epilog(epilog), isHit(false) |
30 | { |
31 | const TensorLinearCubicBezierSurface3fa curve3dray = curve3d.xfm(ray_space,ray.org); |
32 | curve2d = TensorLinearCubicBezierSurface2fa(CubicBezierCurve2fa(curve3dray.L),CubicBezierCurve2fa(curve3dray.R)); |
33 | const BBox2fa b2 = curve2d.bounds(); |
34 | eps = 8.0f*float(ulp)*reduce_max(v: max(a: abs(a: b2.lower),b: abs(a: b2.upper))); |
35 | } |
36 | |
37 | __forceinline Interval1f solve_linear(const float u0, const float u1, const float& p0, const float& p1) |
38 | { |
39 | if (p1 == p0) { |
40 | if (p0 == 0.0f) return Interval1f(u0,u1); |
41 | else return Interval1f(empty); |
42 | } |
43 | const float t = -p0/(p1-p0); |
44 | const float tt = lerp(v0: u0,v1: u1,t); |
45 | return Interval1f(tt); |
46 | } |
47 | |
48 | __forceinline void solve_linear(const float u0, const float u1, const Interval1f& p0, const Interval1f& p1, Interval1f& u) |
49 | { |
50 | if (sign(x: p0.lower) != sign(x: p0.upper)) u.extend(other: u0); |
51 | if (sign(x: p0.lower) != sign(x: p1.lower)) u.extend(solve_linear(u0,u1,p0.lower,p1.lower)); |
52 | if (sign(x: p0.upper) != sign(x: p1.upper)) u.extend(solve_linear(u0,u1,p0.upper,p1.upper)); |
53 | if (sign(x: p1.lower) != sign(x: p1.upper)) u.extend(other: u1); |
54 | } |
55 | |
56 | __forceinline Interval1f bezier_clipping(const CubicBezierCurve<Interval1f>& curve) |
57 | { |
58 | Interval1f u = empty; |
59 | solve_linear(0.0f/3.0f,1.0f/3.0f,curve.v0,curve.v1,u); |
60 | solve_linear(0.0f/3.0f,2.0f/3.0f,curve.v0,curve.v2,u); |
61 | solve_linear(0.0f/3.0f,3.0f/3.0f,curve.v0,curve.v3,u); |
62 | solve_linear(1.0f/3.0f,2.0f/3.0f,curve.v1,curve.v2,u); |
63 | solve_linear(1.0f/3.0f,3.0f/3.0f,curve.v1,curve.v3,u); |
64 | solve_linear(2.0f/3.0f,3.0f/3.0f,curve.v2,curve.v3,u); |
65 | return intersect(a: u,b: Interval1f(0.0f,1.0f)); |
66 | } |
67 | |
68 | __forceinline Interval1f bezier_clipping(const LinearBezierCurve<Interval1f>& curve) |
69 | { |
70 | Interval1f v = empty; |
71 | solve_linear(0.0f,1.0f,curve.v0,curve.v1,v); |
72 | return intersect(a: v,b: Interval1f(0.0f,1.0f)); |
73 | } |
74 | |
75 | __forceinline void solve_bezier_clipping(BBox1f cu, BBox1f cv, const TensorLinearCubicBezierSurface2fa& curve2) |
76 | { |
77 | BBox2fa bounds = curve2.bounds(); |
78 | if (bounds.upper.x < 0.0f) return; |
79 | if (bounds.upper.y < 0.0f) return; |
80 | if (bounds.lower.x > 0.0f) return; |
81 | if (bounds.lower.y > 0.0f) return; |
82 | |
83 | if (max(a: cu.size(),b: cv.size()) < 1E-4f) |
84 | { |
85 | const float u = cu.center(); |
86 | const float v = cv.center(); |
87 | TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org); |
88 | const float t = curve_z.eval(u,v); |
89 | if (ray.tnear() <= t && t <= ray.tfar) { |
90 | const Vec3fa Ng = cross(a: curve3d.eval_du(u,v),b: curve3d.eval_dv(u,v)); |
91 | BezierCurveHit hit(t,u,v,Ng); |
92 | isHit |= epilog(hit); |
93 | } |
94 | return; |
95 | } |
96 | |
97 | const Vec2fa dv = curve2.axis_v(); |
98 | const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dx: dv); |
99 | LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); |
100 | if (!curve0v.hasRoot()) return; |
101 | |
102 | const Interval1f v = bezier_clipping(curve0v); |
103 | if (isEmpty(v)) return; |
104 | TensorLinearCubicBezierSurface2fa curve2a = curve2.clip_v(v); |
105 | cv = BBox1f(lerp(v0: cv.lower,v1: cv.upper,t: v.lower),lerp(v0: cv.lower,v1: cv.upper,t: v.upper)); |
106 | |
107 | const Vec2fa du = curve2.axis_u(); |
108 | const TensorLinearCubicBezierSurface1f curve1u = curve2a.xfm(dx: du); |
109 | CubicBezierCurve<Interval1f> curve0u = curve1u.reduce_v(); |
110 | int roots = curve0u.maxRoots(); |
111 | if (roots == 0) return; |
112 | |
113 | if (roots == 1) |
114 | { |
115 | const Interval1f u = bezier_clipping(curve0u); |
116 | if (isEmpty(v: u)) return; |
117 | TensorLinearCubicBezierSurface2fa curve2b = curve2a.clip_u(u); |
118 | cu = BBox1f(lerp(v0: cu.lower,v1: cu.upper,t: u.lower),lerp(v0: cu.lower,v1: cu.upper,t: u.upper)); |
119 | solve_bezier_clipping(cu,cv,curve2b); |
120 | return; |
121 | } |
122 | |
123 | TensorLinearCubicBezierSurface2fa curve2l, curve2r; |
124 | curve2a.split_u(left&: curve2l,right&: curve2r); |
125 | solve_bezier_clipping(BBox1f(cu.lower,cu.center()),cv,curve2l); |
126 | solve_bezier_clipping(BBox1f(cu.center(),cu.upper),cv,curve2r); |
127 | } |
128 | |
129 | __forceinline bool solve_bezier_clipping() |
130 | { |
131 | solve_bezier_clipping(BBox1f(0.0f,1.0f),BBox1f(0.0f,1.0f),curve2d); |
132 | return isHit; |
133 | } |
134 | |
135 | __forceinline void solve_newton_raphson(BBox1f cu, BBox1f cv) |
136 | { |
137 | Vec2fa uv(cu.center(),cv.center()); |
138 | const Vec2fa dfdu = curve2d.eval_du(u: uv.x,v: uv.y); |
139 | const Vec2fa dfdv = curve2d.eval_dv(u: uv.x,v: uv.y); |
140 | const LinearSpace2fa rcp_J = rcp(a: LinearSpace2fa(dfdu,dfdv)); |
141 | solve_newton_raphson_loop(cu,cv,uv_in: uv,dfdu,dfdv,rcp_J); |
142 | } |
143 | |
144 | __forceinline void solve_newton_raphson_loop(BBox1f cu, BBox1f cv, const Vec2fa& uv_in, const Vec2fa& dfdu, const Vec2fa& dfdv, const LinearSpace2fa& rcp_J) |
145 | { |
146 | Vec2fa uv = uv_in; |
147 | |
148 | for (size_t i=0; i<200; i++) |
149 | { |
150 | const Vec2fa f = curve2d.eval(u: uv.x,v: uv.y); |
151 | const Vec2fa duv = rcp_J*f; |
152 | uv -= duv; |
153 | |
154 | if (max(a: abs(x: f.x),b: abs(x: f.y)) < eps) |
155 | { |
156 | const float u = uv.x; |
157 | const float v = uv.y; |
158 | if (!(u >= 0.0f && u <= 1.0f)) return; // rejects NaNs |
159 | if (!(v >= 0.0f && v <= 1.0f)) return; // rejects NaNs |
160 | const TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org); |
161 | const float t = curve_z.eval(u,v); |
162 | if (!(ray.tnear() <= t && t <= ray.tfar)) return; // rejects NaNs |
163 | const Vec3fa Ng = cross(a: curve3d.eval_du(u,v),b: curve3d.eval_dv(u,v)); |
164 | BezierCurveHit hit(t,u,v,Ng); |
165 | isHit |= epilog(hit); |
166 | return; |
167 | } |
168 | } |
169 | } |
170 | |
171 | __forceinline bool clip_v(BBox1f& cu, BBox1f& cv) |
172 | { |
173 | const Vec2fa dv = curve2d.eval_dv(u: cu.lower,v: cv.lower); |
174 | const TensorLinearCubicBezierSurface1f curve1v = curve2d.xfm(dx: dv).clip(u: cu,v: cv); |
175 | LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); |
176 | if (!curve0v.hasRoot()) return false; |
177 | Interval1f v = bezier_clipping(curve0v); |
178 | if (isEmpty(v)) return false; |
179 | v = intersect(a: v + Interval1f(-0.1f,+0.1f),b: Interval1f(0.0f,1.0f)); |
180 | cv = BBox1f(lerp(v0: cv.lower,v1: cv.upper,t: v.lower),lerp(v0: cv.lower,v1: cv.upper,t: v.upper)); |
181 | return true; |
182 | } |
183 | |
184 | __forceinline bool solve_krawczyk(bool very_small, BBox1f& cu, BBox1f& cv) |
185 | { |
186 | /* perform bezier clipping in v-direction to get tight v-bounds */ |
187 | TensorLinearCubicBezierSurface2fa curve2 = curve2d.clip(u: cu,v: cv); |
188 | const Vec2fa dv = curve2.axis_v(); |
189 | const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dx: dv); |
190 | LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); |
191 | if (unlikely(!curve0v.hasRoot())) return true; |
192 | Interval1f v = bezier_clipping(curve0v); |
193 | if (unlikely(isEmpty(v))) return true; |
194 | v = intersect(a: v + Interval1f(-0.1f,+0.1f),b: Interval1f(0.0f,1.0f)); |
195 | curve2 = curve2.clip_v(v); |
196 | cv = BBox1f(lerp(v0: cv.lower,v1: cv.upper,t: v.lower),lerp(v0: cv.lower,v1: cv.upper,t: v.upper)); |
197 | |
198 | /* perform one newton raphson iteration */ |
199 | Vec2fa c(cu.center(),cv.center()); |
200 | Vec2fa f,dfdu,dfdv; curve2d.eval(u: c.x,v: c.y,p&: f,dpdu&: dfdu,dpdv&: dfdv); |
201 | const LinearSpace2fa rcp_J = rcp(a: LinearSpace2fa(dfdu,dfdv)); |
202 | const Vec2fa c1 = c - rcp_J*f; |
203 | |
204 | /* calculate bounds of derivatives */ |
205 | const BBox2fa bounds_du = (1.0f/cu.size())*curve2.derivative_u().bounds(); |
206 | const BBox2fa bounds_dv = (1.0f/cv.size())*curve2.derivative_v().bounds(); |
207 | |
208 | /* calculate krawczyk test */ |
209 | LinearSpace2<Vec2<Interval1f>> I(Interval1f(1.0f), Interval1f(0.0f), |
210 | Interval1f(0.0f), Interval1f(1.0f)); |
211 | |
212 | LinearSpace2<Vec2<Interval1f>> G(Interval1f(bounds_du.lower.x,bounds_du.upper.x), Interval1f(bounds_dv.lower.x,bounds_dv.upper.x), |
213 | Interval1f(bounds_du.lower.y,bounds_du.upper.y), Interval1f(bounds_dv.lower.y,bounds_dv.upper.y)); |
214 | |
215 | const LinearSpace2<Vec2f> rcp_J2(rcp_J); |
216 | const LinearSpace2<Vec2<Interval1f>> rcp_Ji(rcp_J2); |
217 | |
218 | const Vec2<Interval1f> x(cu,cv); |
219 | const Vec2<Interval1f> K = Vec2<Interval1f>(Vec2f(c1)) + (I - rcp_Ji*G)*(x-Vec2<Interval1f>(Vec2f(c))); |
220 | |
221 | /* test if there is no solution */ |
222 | const Vec2<Interval1f> KK = intersect(a: K,b: x); |
223 | if (unlikely(isEmpty(KK.x) || isEmpty(KK.y))) return true; |
224 | |
225 | /* exit if convergence cannot get proven, but terminate if we are very small */ |
226 | if (unlikely(!subset(K,x) && !very_small)) return false; |
227 | |
228 | /* solve using newton raphson iteration of convergence is guarenteed */ |
229 | solve_newton_raphson_loop(cu,cv,uv_in: c1,dfdu,dfdv,rcp_J); |
230 | return true; |
231 | } |
232 | |
233 | __forceinline void solve_newton_raphson_no_recursion(BBox1f cu, BBox1f cv) |
234 | { |
235 | if (!clip_v(cu,cv)) return; |
236 | return solve_newton_raphson(cu,cv); |
237 | } |
238 | |
239 | __forceinline void solve_newton_raphson_recursion(BBox1f cu, BBox1f cv) |
240 | { |
241 | unsigned int sptr = 0; |
242 | const unsigned int stack_size = 4; |
243 | unsigned int mask_stack[stack_size]; |
244 | BBox1f cu_stack[stack_size]; |
245 | BBox1f cv_stack[stack_size]; |
246 | goto entry; |
247 | |
248 | /* terminate if stack is empty */ |
249 | while (sptr) |
250 | { |
251 | /* pop from stack */ |
252 | { |
253 | sptr--; |
254 | size_t mask = mask_stack[sptr]; |
255 | cu = cu_stack[sptr]; |
256 | cv = cv_stack[sptr]; |
257 | const size_t i = bscf(v&: mask); |
258 | mask_stack[sptr] = mask; |
259 | if (mask) sptr++; // there are still items on the stack |
260 | |
261 | /* process next element recurse into each hit curve segment */ |
262 | const float u0 = float(i+0)*(1.0f/(VSIZEX-1)); |
263 | const float u1 = float(i+1)*(1.0f/(VSIZEX-1)); |
264 | const BBox1f cui(lerp(v0: cu.lower,v1: cu.upper,t: u0),lerp(v0: cu.lower,v1: cu.upper,t: u1)); |
265 | cu = cui; |
266 | } |
267 | |
268 | #if 0 |
269 | solve_newton_raphson_no_recursion(cu,cv); |
270 | continue; |
271 | |
272 | #else |
273 | /* we assume convergence for small u ranges and verify using krawczyk */ |
274 | if (cu.size() < 1.0f/6.0f) { |
275 | const bool very_small = cu.size() < 0.001f || sptr >= stack_size; |
276 | if (solve_krawczyk(very_small,cu,cv)) { |
277 | continue; |
278 | } |
279 | } |
280 | #endif |
281 | |
282 | entry: |
283 | |
284 | /* split the curve into VSIZEX-1 segments in u-direction */ |
285 | vboolx valid = true; |
286 | TensorLinearCubicBezierSurface<Vec2vfx> subcurves = curve2d.clip_v(v: cv).vsplit_u(valid,u: cu); |
287 | |
288 | /* slabs test in u-direction */ |
289 | Vec2vfx ndv = cross(a: subcurves.axis_v()); |
290 | BBox<vfloatx> boundsv = subcurves.vxfm(dx: ndv).bounds(); |
291 | valid &= boundsv.lower <= eps; |
292 | valid &= boundsv.upper >= -eps; |
293 | if (none(b: valid)) continue; |
294 | |
295 | /* slabs test in v-direction */ |
296 | Vec2vfx ndu = cross(a: subcurves.axis_u()); |
297 | BBox<vfloatx> boundsu = subcurves.vxfm(dx: ndu).bounds(); |
298 | valid &= boundsu.lower <= eps; |
299 | valid &= boundsu.upper >= -eps; |
300 | if (none(b: valid)) continue; |
301 | |
302 | /* push valid segments to stack */ |
303 | assert(sptr < stack_size); |
304 | mask_stack [sptr] = movemask(a: valid); |
305 | cu_stack [sptr] = cu; |
306 | cv_stack [sptr] = cv; |
307 | sptr++; |
308 | } |
309 | } |
310 | |
311 | __forceinline bool solve_newton_raphson_main() |
312 | { |
313 | BBox1f vu(0.0f,1.0f); |
314 | BBox1f vv(0.0f,1.0f); |
315 | solve_newton_raphson_recursion(cu: vu,cv: vv); |
316 | return isHit; |
317 | } |
318 | }; |
319 | |
320 | |
321 | template<template<typename Ty> class SourceCurve> |
322 | struct OrientedCurve1Intersector1 |
323 | { |
324 | //template<typename Ty> using Curve = SourceCurve<Ty>; |
325 | typedef SourceCurve<Vec3ff> SourceCurve3ff; |
326 | typedef SourceCurve<Vec3fa> SourceCurve3fa; |
327 | |
328 | __forceinline OrientedCurve1Intersector1() {} |
329 | |
330 | __forceinline OrientedCurve1Intersector1(const Ray& ray, const void* ptr) {} |
331 | |
332 | template<typename Epilog> |
333 | __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, |
334 | IntersectContext* context, |
335 | const CurveGeometry* geom, const unsigned int primID, |
336 | const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i, |
337 | const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i, |
338 | const Epilog& epilog) const |
339 | { |
340 | STAT3(normal.trav_prims,1,1,1); |
341 | |
342 | SourceCurve3ff ccurve(v0i,v1i,v2i,v3i); |
343 | SourceCurve3fa ncurve(n0i,n1i,n2i,n3i); |
344 | ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve); |
345 | TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); |
346 | //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping(); |
347 | return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main(); |
348 | } |
349 | |
350 | template<typename Epilog> |
351 | __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, |
352 | IntersectContext* context, |
353 | const CurveGeometry* geom, const unsigned int primID, |
354 | const TensorLinearCubicBezierSurface3fa& curve, const Epilog& epilog) const |
355 | { |
356 | STAT3(normal.trav_prims,1,1,1); |
357 | //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping(); |
358 | return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main(); |
359 | } |
360 | }; |
361 | |
362 | template<template<typename Ty> class SourceCurve, int K> |
363 | struct OrientedCurve1IntersectorK |
364 | { |
365 | //template<typename Ty> using Curve = SourceCurve<Ty>; |
366 | typedef SourceCurve<Vec3ff> SourceCurve3ff; |
367 | typedef SourceCurve<Vec3fa> SourceCurve3fa; |
368 | |
369 | struct Ray1 |
370 | { |
371 | __forceinline Ray1(RayK<K>& ray, size_t k) |
372 | : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {} |
373 | |
374 | Vec3fa org; |
375 | Vec3fa dir; |
376 | float _tnear; |
377 | float& tfar; |
378 | |
379 | __forceinline float& tnear() { return _tnear; } |
380 | //__forceinline float& tfar() { return _tfar; } |
381 | __forceinline const float& tnear() const { return _tnear; } |
382 | //__forceinline const float& tfar() const { return _tfar; } |
383 | }; |
384 | |
385 | template<typename Epilog> |
386 | __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, |
387 | IntersectContext* context, |
388 | const CurveGeometry* geom, const unsigned int primID, |
389 | const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i, |
390 | const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i, |
391 | const Epilog& epilog) |
392 | { |
393 | STAT3(normal.trav_prims,1,1,1); |
394 | Ray1 ray(vray,k); |
395 | SourceCurve3ff ccurve(v0i,v1i,v2i,v3i); |
396 | SourceCurve3fa ncurve(n0i,n1i,n2i,n3i); |
397 | ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve); |
398 | TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); |
399 | //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping(); |
400 | return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main(); |
401 | } |
402 | |
403 | template<typename Epilog> |
404 | __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, |
405 | IntersectContext* context, |
406 | const CurveGeometry* geom, const unsigned int primID, |
407 | const TensorLinearCubicBezierSurface3fa& curve, |
408 | const Epilog& epilog) |
409 | { |
410 | STAT3(normal.trav_prims,1,1,1); |
411 | Ray1 ray(vray,k); |
412 | //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping(); |
413 | return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main(); |
414 | } |
415 | }; |
416 | } |
417 | } |
418 | |