1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../common/default.h" |
7 | #include "../common/scene_curves.h" |
8 | |
9 | /* |
10 | |
11 | Implements Catmul Rom curves with control points p0, p1, p2, p3. At |
12 | t=0 the curve goes through p1, with tangent (p2-p0)/3, and for t=1 |
13 | the curve goes through p2 with tangent (p3-p2)/2. |
14 | |
15 | */ |
16 | |
17 | namespace embree |
18 | { |
19 | class CatmullRomBasis |
20 | { |
21 | public: |
22 | |
23 | template<typename T> |
24 | static __forceinline Vec4<T> eval(const T& u) |
25 | { |
26 | const T t = u; |
27 | const T s = T(1.0f) - u; |
28 | const T n0 = - t * s * s; |
29 | const T n1 = 2.0f + t * t * (3.0f * t - 5.0f); |
30 | const T n2 = 2.0f + s * s * (3.0f * s - 5.0f); |
31 | const T n3 = - s * t * t; |
32 | return T(0.5f) * Vec4<T>(n0, n1, n2, n3); |
33 | } |
34 | |
35 | template<typename T> |
36 | static __forceinline Vec4<T> derivative(const T& u) |
37 | { |
38 | const T t = u; |
39 | const T s = 1.0f - u; |
40 | const T n0 = - s * s + 2.0f * s * t; |
41 | const T n1 = 2.0f * t * (3.0f * t - 5.0f) + 3.0f * t * t; |
42 | const T n2 = 2.0f * s * (3.0f * t + 2.0f) - 3.0f * s * s; |
43 | const T n3 = -2.0f * s * t + t * t; |
44 | return T(0.5f) * Vec4<T>(n0, n1, n2, n3); |
45 | } |
46 | |
47 | template<typename T> |
48 | static __forceinline Vec4<T> derivative2(const T& u) |
49 | { |
50 | const T t = u; |
51 | const T n0 = -3.0f * t + 2.0f; |
52 | const T n1 = 9.0f * t - 5.0f; |
53 | const T n2 = -9.0f * t + 4.0f; |
54 | const T n3 = 3.0f * t - 1.0f; |
55 | return Vec4<T>(n0, n1, n2, n3); |
56 | } |
57 | }; |
58 | |
59 | struct PrecomputedCatmullRomBasis |
60 | { |
61 | enum { N = 16 }; |
62 | public: |
63 | PrecomputedCatmullRomBasis() {} |
64 | PrecomputedCatmullRomBasis(int shift); |
65 | |
66 | /* basis for bspline evaluation */ |
67 | public: |
68 | float c0[N+1][N+1]; |
69 | float c1[N+1][N+1]; |
70 | float c2[N+1][N+1]; |
71 | float c3[N+1][N+1]; |
72 | |
73 | /* basis for bspline derivative evaluation */ |
74 | public: |
75 | float d0[N+1][N+1]; |
76 | float d1[N+1][N+1]; |
77 | float d2[N+1][N+1]; |
78 | float d3[N+1][N+1]; |
79 | }; |
80 | extern PrecomputedCatmullRomBasis catmullrom_basis0; |
81 | extern PrecomputedCatmullRomBasis catmullrom_basis1; |
82 | |
83 | template<typename Vertex> |
84 | struct CatmullRomCurveT |
85 | { |
86 | Vertex v0,v1,v2,v3; |
87 | |
88 | __forceinline CatmullRomCurveT() {} |
89 | |
90 | __forceinline CatmullRomCurveT(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3) |
91 | : v0(v0), v1(v1), v2(v2), v3(v3) {} |
92 | |
93 | __forceinline Vertex begin() const { |
94 | return madd(1.0f/6.0f,v0,madd(2.0f/3.0f,v1,1.0f/6.0f*v2)); |
95 | } |
96 | |
97 | __forceinline Vertex end() const { |
98 | return madd(1.0f/6.0f,v1,madd(2.0f/3.0f,v2,1.0f/6.0f*v3)); |
99 | } |
100 | |
101 | __forceinline Vertex center() const { |
102 | return 0.25f*(v0+v1+v2+v3); |
103 | } |
104 | |
105 | __forceinline BBox<Vertex> bounds() const { |
106 | return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3)); |
107 | } |
108 | |
109 | __forceinline friend CatmullRomCurveT operator -( const CatmullRomCurveT& a, const Vertex& b ) { |
110 | return CatmullRomCurveT(a.v0-b,a.v1-b,a.v2-b,a.v3-b); |
111 | } |
112 | |
113 | __forceinline CatmullRomCurveT<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const |
114 | { |
115 | const Vec3ff q0(xfmVector(space,v0-p), v0.w); |
116 | const Vec3ff q1(xfmVector(space,v1-p), v1.w); |
117 | const Vec3ff q2(xfmVector(space,v2-p), v2.w); |
118 | const Vec3ff q3(xfmVector(space,v3-p), v3.w); |
119 | return CatmullRomCurveT<Vec3ff>(q0,q1,q2,q3); |
120 | } |
121 | |
122 | __forceinline Vertex eval(const float t) const |
123 | { |
124 | const Vec4<float> b = CatmullRomBasis::eval(u: t); |
125 | return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3))); |
126 | } |
127 | |
128 | __forceinline Vertex eval_du(const float t) const |
129 | { |
130 | const Vec4<float> b = CatmullRomBasis::derivative(u: t); |
131 | return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3))); |
132 | } |
133 | |
134 | __forceinline Vertex eval_dudu(const float t) const |
135 | { |
136 | const Vec4<float> b = CatmullRomBasis::derivative2(u: t); |
137 | return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3))); |
138 | } |
139 | |
140 | __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const |
141 | { |
142 | p = eval(t); |
143 | dp = eval_du(t); |
144 | ddp = eval_dudu(t); |
145 | } |
146 | |
147 | template<int M> |
148 | __forceinline Vec4vf<M> veval(const vfloat<M>& t) const |
149 | { |
150 | const Vec4vf<M> b = CatmullRomBasis::eval(t); |
151 | return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3)))); |
152 | } |
153 | |
154 | template<int M> |
155 | __forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const |
156 | { |
157 | const Vec4vf<M> b = CatmullRomBasis::derivative(t); |
158 | return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3)))); |
159 | } |
160 | |
161 | template<int M> |
162 | __forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const |
163 | { |
164 | const Vec4vf<M> b = CatmullRomBasis::derivative2(t); |
165 | return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3)))); |
166 | } |
167 | |
168 | template<int M> |
169 | __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const |
170 | { |
171 | p = veval<M>(t); |
172 | dp = veval_du<M>(t); |
173 | } |
174 | |
175 | template<int M> |
176 | __forceinline Vec4vf<M> eval0(const int ofs, const int size) const |
177 | { |
178 | assert(size <= PrecomputedCatmullRomBasis::N); |
179 | assert(ofs <= size); |
180 | return madd(vfloat<M>::loadu(&catmullrom_basis0.c0[size][ofs]), Vec4vf<M>(v0), |
181 | madd(vfloat<M>::loadu(&catmullrom_basis0.c1[size][ofs]), Vec4vf<M>(v1), |
182 | madd(vfloat<M>::loadu(&catmullrom_basis0.c2[size][ofs]), Vec4vf<M>(v2), |
183 | vfloat<M>::loadu(&catmullrom_basis0.c3[size][ofs]) * Vec4vf<M>(v3)))); |
184 | } |
185 | |
186 | template<int M> |
187 | __forceinline Vec4vf<M> eval1(const int ofs, const int size) const |
188 | { |
189 | assert(size <= PrecomputedCatmullRomBasis::N); |
190 | assert(ofs <= size); |
191 | return madd(vfloat<M>::loadu(&catmullrom_basis1.c0[size][ofs]), Vec4vf<M>(v0), |
192 | madd(vfloat<M>::loadu(&catmullrom_basis1.c1[size][ofs]), Vec4vf<M>(v1), |
193 | madd(vfloat<M>::loadu(&catmullrom_basis1.c2[size][ofs]), Vec4vf<M>(v2), |
194 | vfloat<M>::loadu(&catmullrom_basis1.c3[size][ofs]) * Vec4vf<M>(v3)))); |
195 | } |
196 | |
197 | template<int M> |
198 | __forceinline Vec4vf<M> derivative0(const int ofs, const int size) const |
199 | { |
200 | assert(size <= PrecomputedCatmullRomBasis::N); |
201 | assert(ofs <= size); |
202 | return madd(vfloat<M>::loadu(&catmullrom_basis0.d0[size][ofs]), Vec4vf<M>(v0), |
203 | madd(vfloat<M>::loadu(&catmullrom_basis0.d1[size][ofs]), Vec4vf<M>(v1), |
204 | madd(vfloat<M>::loadu(&catmullrom_basis0.d2[size][ofs]), Vec4vf<M>(v2), |
205 | vfloat<M>::loadu(&catmullrom_basis0.d3[size][ofs]) * Vec4vf<M>(v3)))); |
206 | } |
207 | |
208 | template<int M> |
209 | __forceinline Vec4vf<M> derivative1(const int ofs, const int size) const |
210 | { |
211 | assert(size <= PrecomputedCatmullRomBasis::N); |
212 | assert(ofs <= size); |
213 | return madd(vfloat<M>::loadu(&catmullrom_basis1.d0[size][ofs]), Vec4vf<M>(v0), |
214 | madd(vfloat<M>::loadu(&catmullrom_basis1.d1[size][ofs]), Vec4vf<M>(v1), |
215 | madd(vfloat<M>::loadu(&catmullrom_basis1.d2[size][ofs]), Vec4vf<M>(v2), |
216 | vfloat<M>::loadu(&catmullrom_basis1.d3[size][ofs]) * Vec4vf<M>(v3)))); |
217 | } |
218 | |
219 | /* calculates bounds of catmull-rom curve geometry */ |
220 | __forceinline BBox3fa accurateRoundBounds() const |
221 | { |
222 | const int N = 7; |
223 | const float scale = 1.0f/(3.0f*(N-1)); |
224 | Vec4vfx pl(pos_inf), pu(neg_inf); |
225 | for (int i=0; i<=N; i+=VSIZEX) |
226 | { |
227 | vintx vi = vintx(i)+vintx(step); |
228 | vboolx valid = vi <= vintx(N); |
229 | const Vec4vfx p = eval0<VSIZEX>(i,N); |
230 | const Vec4vfx dp = derivative0<VSIZEX>(i,N); |
231 | const Vec4vfx pm = p-Vec4vfx(scale)*select(s: vi!=vintx(0),t: dp,f: Vec4vfx(zero)); |
232 | const Vec4vfx pp = p+Vec4vfx(scale)*select(s: vi!=vintx(N),t: dp,f: Vec4vfx(zero)); |
233 | pl = select(s: valid,t: min(a: pl,b: p,c: pm,d: pp),f: pl); // FIXME: use masked min |
234 | pu = select(s: valid,t: max(a: pu,b: p,c: pm,d: pp),f: pu); // FIXME: use masked min |
235 | } |
236 | const Vec3fa lower(reduce_min(v: pl.x),reduce_min(v: pl.y),reduce_min(v: pl.z)); |
237 | const Vec3fa upper(reduce_max(v: pu.x),reduce_max(v: pu.y),reduce_max(v: pu.z)); |
238 | const float r_min = reduce_min(v: pl.w); |
239 | const float r_max = reduce_max(v: pu.w); |
240 | const Vec3fa upper_r = Vec3fa(max(a: abs(x: r_min),b: abs(x: r_max))); |
241 | return enlarge(a: BBox3fa(lower,upper),b: upper_r); |
242 | } |
243 | |
244 | /* calculates bounds when tessellated into N line segments */ |
245 | __forceinline BBox3fa accurateFlatBounds(int N) const |
246 | { |
247 | if (likely(N == 4)) |
248 | { |
249 | const Vec4vf4 pi = eval0<4>(0,4); |
250 | const Vec3fa lower(reduce_min(v: pi.x),reduce_min(v: pi.y),reduce_min(v: pi.z)); |
251 | const Vec3fa upper(reduce_max(v: pi.x),reduce_max(v: pi.y),reduce_max(v: pi.z)); |
252 | const Vec3fa upper_r = Vec3fa(reduce_max(v: abs(a: pi.w))); |
253 | const Vec3ff pe = end(); |
254 | return enlarge(a: BBox3fa(min(a: lower,b: pe),max(a: upper,b: pe)),b: max(a: upper_r,b: Vec3fa(abs(x: pe.w)))); |
255 | } |
256 | else |
257 | { |
258 | Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f); |
259 | for (int i=0; i<=N; i+=VSIZEX) |
260 | { |
261 | vboolx valid = vintx(i)+vintx(step) <= vintx(N); |
262 | const Vec4vfx pi = eval0<VSIZEX>(i,N); |
263 | |
264 | pl.x = select(m: valid,t: min(a: pl.x,b: pi.x),f: pl.x); // FIXME: use masked min |
265 | pl.y = select(m: valid,t: min(a: pl.y,b: pi.y),f: pl.y); |
266 | pl.z = select(m: valid,t: min(a: pl.z,b: pi.z),f: pl.z); |
267 | |
268 | pu.x = select(m: valid,t: max(a: pu.x,b: pi.x),f: pu.x); // FIXME: use masked min |
269 | pu.y = select(m: valid,t: max(a: pu.y,b: pi.y),f: pu.y); |
270 | pu.z = select(m: valid,t: max(a: pu.z,b: pi.z),f: pu.z); |
271 | |
272 | ru = select(m: valid,t: max(a: ru,b: abs(a: pi.w)),f: ru); |
273 | } |
274 | const Vec3fa lower(reduce_min(v: pl.x),reduce_min(v: pl.y),reduce_min(v: pl.z)); |
275 | const Vec3fa upper(reduce_max(v: pu.x),reduce_max(v: pu.y),reduce_max(v: pu.z)); |
276 | const Vec3fa upper_r(reduce_max(v: ru)); |
277 | return enlarge(a: BBox3fa(lower,upper),b: upper_r); |
278 | } |
279 | } |
280 | |
281 | friend __forceinline embree_ostream operator<<(embree_ostream cout, const CatmullRomCurveT& curve) { |
282 | return cout << "CatmullRomCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }" ; |
283 | } |
284 | }; |
285 | |
286 | template<typename CurveGeometry> |
287 | __forceinline CatmullRomCurveT<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CatmullRomCurveT<Vec3ff>& curve) |
288 | { |
289 | return CatmullRomCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0), |
290 | enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1), |
291 | enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2), |
292 | enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3)); |
293 | } |
294 | |
295 | typedef CatmullRomCurveT<Vec3fa> CatmullRomCurve3fa; |
296 | } |
297 | |
298 | |