1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "bezier_curve.h" |
7 | |
8 | namespace embree |
9 | { |
10 | namespace isa |
11 | { |
12 | template<typename V> |
13 | struct TensorLinearQuadraticBezierSurface |
14 | { |
15 | QuadraticBezierCurve<V> L; |
16 | QuadraticBezierCurve<V> R; |
17 | |
18 | __forceinline TensorLinearQuadraticBezierSurface() {} |
19 | |
20 | __forceinline TensorLinearQuadraticBezierSurface(const TensorLinearQuadraticBezierSurface<V>& curve) |
21 | : L(curve.L), R(curve.R) {} |
22 | |
23 | __forceinline TensorLinearQuadraticBezierSurface& operator= (const TensorLinearQuadraticBezierSurface& other) { |
24 | L = other.L; R = other.R; return *this; |
25 | } |
26 | |
27 | __forceinline TensorLinearQuadraticBezierSurface(const QuadraticBezierCurve<V>& L, const QuadraticBezierCurve<V>& R) |
28 | : L(L), R(R) {} |
29 | |
30 | __forceinline BBox<V> bounds() const { |
31 | return merge(L.bounds(),R.bounds()); |
32 | } |
33 | }; |
34 | |
35 | template<> |
36 | struct TensorLinearQuadraticBezierSurface<Vec2fa> |
37 | { |
38 | QuadraticBezierCurve<vfloat4> LR; |
39 | |
40 | __forceinline TensorLinearQuadraticBezierSurface() {} |
41 | |
42 | __forceinline TensorLinearQuadraticBezierSurface(const TensorLinearQuadraticBezierSurface<Vec2fa>& curve) |
43 | : LR(curve.LR) {} |
44 | |
45 | __forceinline TensorLinearQuadraticBezierSurface& operator= (const TensorLinearQuadraticBezierSurface& other) { |
46 | LR = other.LR; return *this; |
47 | } |
48 | |
49 | __forceinline TensorLinearQuadraticBezierSurface(const QuadraticBezierCurve<vfloat4>& LR) |
50 | : LR(LR) {} |
51 | |
52 | __forceinline BBox<Vec2fa> bounds() const |
53 | { |
54 | const BBox<vfloat4> b = LR.bounds(); |
55 | const BBox<Vec2fa> bl(Vec2fa(b.lower),Vec2fa(b.upper)); |
56 | const BBox<Vec2fa> br(Vec2fa(shuffle<2,3,2,3>(v: b.lower)),Vec2fa(shuffle<2,3,2,3>(v: b.upper))); |
57 | return merge(a: bl,b: br); |
58 | } |
59 | }; |
60 | |
61 | template<typename V> |
62 | struct TensorLinearCubicBezierSurface |
63 | { |
64 | CubicBezierCurve<V> L; |
65 | CubicBezierCurve<V> R; |
66 | |
67 | __forceinline TensorLinearCubicBezierSurface() {} |
68 | |
69 | __forceinline TensorLinearCubicBezierSurface(const TensorLinearCubicBezierSurface& curve) |
70 | : L(curve.L), R(curve.R) {} |
71 | |
72 | __forceinline TensorLinearCubicBezierSurface& operator= (const TensorLinearCubicBezierSurface& other) { |
73 | L = other.L; R = other.R; return *this; |
74 | } |
75 | |
76 | __forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<V>& L, const CubicBezierCurve<V>& R) |
77 | : L(L), R(R) {} |
78 | |
79 | template<template<typename T> class SourceCurve> |
80 | __forceinline static TensorLinearCubicBezierSurface fromCenterAndNormalCurve(const SourceCurve<Vec3ff>& center, const SourceCurve<Vec3fa>& normal) |
81 | { |
82 | SourceCurve<Vec3ff> vcurve = center; |
83 | SourceCurve<Vec3fa> ncurve = normal; |
84 | |
85 | /* here we construct a patch which follows the curve l(t) = |
86 | * p(t) +/- r(t)*normalize(cross(n(t),dp(t))) */ |
87 | |
88 | const Vec3ff p0 = vcurve.eval(0.0f); |
89 | const Vec3ff dp0 = vcurve.eval_du(0.0f); |
90 | //const Vec3ff ddp0 = vcurve.eval_dudu(0.0f); // ddp0 is assumed to be 0 |
91 | |
92 | const Vec3fa n0 = ncurve.eval(0.0f); |
93 | const Vec3fa dn0 = ncurve.eval_du(0.0f); |
94 | |
95 | const Vec3ff p1 = vcurve.eval(1.0f); |
96 | const Vec3ff dp1 = vcurve.eval_du(1.0f); |
97 | //const Vec3ff ddp1 = vcurve.eval_dudu(1.0f); // ddp1 is assumed to be 0 |
98 | |
99 | const Vec3fa n1 = ncurve.eval(1.0f); |
100 | const Vec3fa dn1 = ncurve.eval_du(1.0f); |
101 | |
102 | const Vec3fa bt0 = cross(a: n0,b: dp0); |
103 | const Vec3fa dbt0 = cross(a: dn0,b: dp0);// + cross(n0,ddp0); |
104 | |
105 | const Vec3fa bt1 = cross(a: n1,b: dp1); |
106 | const Vec3fa dbt1 = cross(a: dn1,b: dp1);// + cross(n1,ddp1); |
107 | |
108 | const Vec3fa k0 = normalize(a: bt0); |
109 | const Vec3fa dk0 = dnormalize(p: bt0,dp: dbt0); |
110 | |
111 | const Vec3fa k1 = normalize(a: bt1); |
112 | const Vec3fa dk1 = dnormalize(p: bt1,dp: dbt1); |
113 | |
114 | const Vec3fa l0 = p0 - p0.w*k0; |
115 | const Vec3fa dl0 = dp0 - (dp0.w*k0 + p0.w*dk0); |
116 | |
117 | const Vec3fa r0 = p0 + p0.w*k0; |
118 | const Vec3fa dr0 = dp0 + (dp0.w*k0 + p0.w*dk0); |
119 | |
120 | const Vec3fa l1 = p1 - p1.w*k1; |
121 | const Vec3fa dl1 = dp1 - (dp1.w*k1 + p1.w*dk1); |
122 | |
123 | const Vec3fa r1 = p1 + p1.w*k1; |
124 | const Vec3fa dr1 = dp1 + (dp1.w*k1 + p1.w*dk1); |
125 | |
126 | const float scale = 1.0f/3.0f; |
127 | CubicBezierCurve<V> L(l0,l0+scale*dl0,l1-scale*dl1,l1); |
128 | CubicBezierCurve<V> R(r0,r0+scale*dr0,r1-scale*dr1,r1); |
129 | return TensorLinearCubicBezierSurface(L,R); |
130 | } |
131 | |
132 | __forceinline BBox<V> bounds() const { |
133 | return merge(L.bounds(),R.bounds()); |
134 | } |
135 | |
136 | __forceinline BBox3fa accurateBounds() const { |
137 | return merge(L.accurateBounds(),R.accurateBounds()); |
138 | } |
139 | |
140 | __forceinline CubicBezierCurve<Interval1f> reduce_v() const { |
141 | return merge(CubicBezierCurve<Interval<V>>(L),CubicBezierCurve<Interval<V>>(R)); |
142 | } |
143 | |
144 | __forceinline LinearBezierCurve<Interval1f> reduce_u() const { |
145 | return LinearBezierCurve<Interval1f>(L.bounds(),R.bounds()); |
146 | } |
147 | |
148 | __forceinline TensorLinearCubicBezierSurface<float> xfm(const V& dx) const { |
149 | return TensorLinearCubicBezierSurface<float>(L.xfm(dx),R.xfm(dx)); |
150 | } |
151 | |
152 | __forceinline TensorLinearCubicBezierSurface<vfloatx> vxfm(const V& dx) const { |
153 | return TensorLinearCubicBezierSurface<vfloatx>(L.vxfm(dx),R.vxfm(dx)); |
154 | } |
155 | |
156 | __forceinline TensorLinearCubicBezierSurface<float> xfm(const V& dx, const V& p) const { |
157 | return TensorLinearCubicBezierSurface<float>(L.xfm(dx,p),R.xfm(dx,p)); |
158 | } |
159 | |
160 | __forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space) const { |
161 | return TensorLinearCubicBezierSurface(L.xfm(space),R.xfm(space)); |
162 | } |
163 | |
164 | __forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p) const { |
165 | return TensorLinearCubicBezierSurface(L.xfm(space,p),R.xfm(space,p)); |
166 | } |
167 | |
168 | __forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p, const float s) const { |
169 | return TensorLinearCubicBezierSurface(L.xfm(space,p,s),R.xfm(space,p,s)); |
170 | } |
171 | |
172 | __forceinline TensorLinearCubicBezierSurface clip_u(const Interval1f& u) const { |
173 | return TensorLinearCubicBezierSurface(L.clip(u),R.clip(u)); |
174 | } |
175 | |
176 | __forceinline TensorLinearCubicBezierSurface clip_v(const Interval1f& v) const { |
177 | return TensorLinearCubicBezierSurface(clerp(L,R,V(v.lower)),clerp(L,R,V(v.upper))); |
178 | } |
179 | |
180 | __forceinline TensorLinearCubicBezierSurface clip(const Interval1f& u, const Interval1f& v) const { |
181 | return clip_v(v).clip_u(u); |
182 | } |
183 | |
184 | __forceinline void split_u(TensorLinearCubicBezierSurface& left, TensorLinearCubicBezierSurface& right, const float u = 0.5f) const |
185 | { |
186 | CubicBezierCurve<V> L0,L1; L.split(L0,L1,u); |
187 | CubicBezierCurve<V> R0,R1; R.split(R0,R1,u); |
188 | new (&left ) TensorLinearCubicBezierSurface(L0,R0); |
189 | new (&right) TensorLinearCubicBezierSurface(L1,R1); |
190 | } |
191 | |
192 | __forceinline TensorLinearCubicBezierSurface<Vec2vfx> vsplit_u(vboolx& valid, const BBox1f& u) const { |
193 | valid = true; clear(a&: valid,index: VSIZEX-1); |
194 | return TensorLinearCubicBezierSurface<Vec2vfx>(L.split(u),R.split(u)); |
195 | } |
196 | |
197 | __forceinline V eval(const float u, const float v) const { |
198 | return clerp(L,R,V(v)).eval(u); |
199 | } |
200 | |
201 | __forceinline V eval_du(const float u, const float v) const { |
202 | return clerp(L,R,V(v)).eval_dt(u); |
203 | } |
204 | |
205 | __forceinline V eval_dv(const float u, const float v) const { |
206 | return (R-L).eval(u); |
207 | } |
208 | |
209 | __forceinline void eval(const float u, const float v, V& p, V& dpdu, V& dpdv) const |
210 | { |
211 | V p0, dp0du; L.eval(u,p0,dp0du); |
212 | V p1, dp1du; R.eval(u,p1,dp1du); |
213 | p = lerp(p0,p1,v); |
214 | dpdu = lerp(dp0du,dp1du,v); |
215 | dpdv = p1-p0; |
216 | } |
217 | |
218 | __forceinline TensorLinearQuadraticBezierSurface<V> derivative_u() const { |
219 | return TensorLinearQuadraticBezierSurface<V>(L.derivative(),R.derivative()); |
220 | } |
221 | |
222 | __forceinline CubicBezierCurve<V> derivative_v() const { |
223 | return R-L; |
224 | } |
225 | |
226 | __forceinline V axis_u() const { |
227 | return (L.end()-L.begin())+(R.end()-R.begin()); |
228 | } |
229 | |
230 | __forceinline V axis_v() const { |
231 | return (R.begin()-L.begin())+(R.end()-L.end()); |
232 | } |
233 | |
234 | friend embree_ostream operator<<(embree_ostream cout, const TensorLinearCubicBezierSurface& a) |
235 | { |
236 | return cout << "TensorLinearCubicBezierSurface" << embree_endl |
237 | << "{" << embree_endl |
238 | << " L = " << a.L << ", " << embree_endl |
239 | << " R = " << a.R << embree_endl |
240 | << "}" ; |
241 | } |
242 | |
243 | friend __forceinline TensorLinearCubicBezierSurface clerp(const TensorLinearCubicBezierSurface& a, const TensorLinearCubicBezierSurface& b, const float t) { |
244 | return TensorLinearCubicBezierSurface(clerp(a.L,b.L,V(t)), clerp(a.R,b.R,V(t))); |
245 | } |
246 | }; |
247 | |
248 | template<> |
249 | struct TensorLinearCubicBezierSurface<Vec2fa> |
250 | { |
251 | CubicBezierCurve<vfloat4> LR; |
252 | |
253 | __forceinline TensorLinearCubicBezierSurface() {} |
254 | |
255 | __forceinline TensorLinearCubicBezierSurface(const TensorLinearCubicBezierSurface& curve) |
256 | : LR(curve.LR) {} |
257 | |
258 | __forceinline TensorLinearCubicBezierSurface& operator= (const TensorLinearCubicBezierSurface& other) { |
259 | LR = other.LR; return *this; |
260 | } |
261 | |
262 | __forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<vfloat4>& LR) |
263 | : LR(LR) {} |
264 | |
265 | __forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<Vec2fa>& L, const CubicBezierCurve<Vec2fa>& R) |
266 | : LR(shuffle<0,1,0,1>(a: vfloat4(L.v0),b: vfloat4(R.v0)),shuffle<0,1,0,1>(a: vfloat4(L.v1),b: vfloat4(R.v1)),shuffle<0,1,0,1>(a: vfloat4(L.v2),b: vfloat4(R.v2)),shuffle<0,1,0,1>(a: vfloat4(L.v3),b: vfloat4(R.v3))) {} |
267 | |
268 | __forceinline CubicBezierCurve<Vec2fa> getL() const { |
269 | return CubicBezierCurve<Vec2fa>(Vec2fa(LR.v0),Vec2fa(LR.v1),Vec2fa(LR.v2),Vec2fa(LR.v3)); |
270 | } |
271 | |
272 | __forceinline CubicBezierCurve<Vec2fa> getR() const { |
273 | return CubicBezierCurve<Vec2fa>(Vec2fa(shuffle<2,3,2,3>(v: LR.v0)),Vec2fa(shuffle<2,3,2,3>(v: LR.v1)),Vec2fa(shuffle<2,3,2,3>(v: LR.v2)),Vec2fa(shuffle<2,3,2,3>(v: LR.v3))); |
274 | } |
275 | |
276 | __forceinline BBox<Vec2fa> bounds() const |
277 | { |
278 | const BBox<vfloat4> b = LR.bounds(); |
279 | const BBox<Vec2fa> bl(Vec2fa(b.lower),Vec2fa(b.upper)); |
280 | const BBox<Vec2fa> br(Vec2fa(shuffle<2,3,2,3>(v: b.lower)),Vec2fa(shuffle<2,3,2,3>(v: b.upper))); |
281 | return merge(a: bl,b: br); |
282 | } |
283 | |
284 | __forceinline BBox1f bounds(const Vec2fa& axis) const |
285 | { |
286 | const CubicBezierCurve<vfloat4> LRx = LR; |
287 | const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(v: LR.v0),shuffle<1,0,3,2>(v: LR.v1),shuffle<1,0,3,2>(v: LR.v2),shuffle<1,0,3,2>(v: LR.v3)); |
288 | const CubicBezierCurve<vfloat4> LRa = cmadd(a: shuffle<0>(v: vfloat4(axis)),b: LRx,c: shuffle<1>(v: vfloat4(axis))*LRy); |
289 | const BBox<vfloat4> Lb = LRa.bounds(); |
290 | const BBox<vfloat4> Rb(shuffle<3>(v: Lb.lower),shuffle<3>(v: Lb.upper)); |
291 | const BBox<vfloat4> b = merge(a: Lb,b: Rb); |
292 | return BBox1f(b.lower[0],b.upper[0]); |
293 | } |
294 | |
295 | __forceinline TensorLinearCubicBezierSurface<float> xfm(const Vec2fa& dx) const |
296 | { |
297 | const CubicBezierCurve<vfloat4> LRx = LR; |
298 | const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(v: LR.v0),shuffle<1,0,3,2>(v: LR.v1),shuffle<1,0,3,2>(v: LR.v2),shuffle<1,0,3,2>(v: LR.v3)); |
299 | const CubicBezierCurve<vfloat4> LRa = cmadd(a: shuffle<0>(v: vfloat4(dx)),b: LRx,c: shuffle<1>(v: vfloat4(dx))*LRy); |
300 | return TensorLinearCubicBezierSurface<float>(CubicBezierCurve<float>(LRa.v0[0],LRa.v1[0],LRa.v2[0],LRa.v3[0]), |
301 | CubicBezierCurve<float>(LRa.v0[2],LRa.v1[2],LRa.v2[2],LRa.v3[2])); |
302 | } |
303 | |
304 | __forceinline TensorLinearCubicBezierSurface<float> xfm(const Vec2fa& dx, const Vec2fa& p) const |
305 | { |
306 | const vfloat4 pxyxy = shuffle<0,1,0,1>(v: vfloat4(p)); |
307 | const CubicBezierCurve<vfloat4> LRx = LR-pxyxy; |
308 | const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(v: LR.v0),shuffle<1,0,3,2>(v: LR.v1),shuffle<1,0,3,2>(v: LR.v2),shuffle<1,0,3,2>(v: LR.v3)); |
309 | const CubicBezierCurve<vfloat4> LRa = cmadd(a: shuffle<0>(v: vfloat4(dx)),b: LRx,c: shuffle<1>(v: vfloat4(dx))*LRy); |
310 | return TensorLinearCubicBezierSurface<float>(CubicBezierCurve<float>(LRa.v0[0],LRa.v1[0],LRa.v2[0],LRa.v3[0]), |
311 | CubicBezierCurve<float>(LRa.v0[2],LRa.v1[2],LRa.v2[2],LRa.v3[2])); |
312 | } |
313 | |
314 | __forceinline TensorLinearCubicBezierSurface clip_u(const Interval1f& u) const { |
315 | return TensorLinearCubicBezierSurface(LR.clip(u1: u)); |
316 | } |
317 | |
318 | __forceinline TensorLinearCubicBezierSurface clip_v(const Interval1f& v) const |
319 | { |
320 | const CubicBezierCurve<vfloat4> LL(shuffle<0,1,0,1>(v: LR.v0),shuffle<0,1,0,1>(v: LR.v1),shuffle<0,1,0,1>(v: LR.v2),shuffle<0,1,0,1>(v: LR.v3)); |
321 | const CubicBezierCurve<vfloat4> RR(shuffle<2,3,2,3>(v: LR.v0),shuffle<2,3,2,3>(v: LR.v1),shuffle<2,3,2,3>(v: LR.v2),shuffle<2,3,2,3>(v: LR.v3)); |
322 | return TensorLinearCubicBezierSurface(clerp(a: LL,b: RR,t: vfloat4(v.lower,v.lower,v.upper,v.upper))); |
323 | } |
324 | |
325 | __forceinline TensorLinearCubicBezierSurface clip(const Interval1f& u, const Interval1f& v) const { |
326 | return clip_v(v).clip_u(u); |
327 | } |
328 | |
329 | __forceinline void split_u(TensorLinearCubicBezierSurface& left, TensorLinearCubicBezierSurface& right, const float u = 0.5f) const |
330 | { |
331 | CubicBezierCurve<vfloat4> LR0,LR1; LR.split(left&: LR0,right&: LR1,t: u); |
332 | new (&left ) TensorLinearCubicBezierSurface(LR0); |
333 | new (&right) TensorLinearCubicBezierSurface(LR1); |
334 | } |
335 | |
336 | __forceinline TensorLinearCubicBezierSurface<Vec2vfx> vsplit_u(vboolx& valid, const BBox1f& u) const { |
337 | valid = true; clear(a&: valid,index: VSIZEX-1); |
338 | return TensorLinearCubicBezierSurface<Vec2vfx>(getL().split(u),getR().split(u)); |
339 | } |
340 | |
341 | __forceinline Vec2fa eval(const float u, const float v) const |
342 | { |
343 | const vfloat4 p = LR.eval(t: u); |
344 | return Vec2fa(lerp(a: shuffle<0,1,0,1>(v: p),b: shuffle<2,3,2,3>(v: p),t: v)); |
345 | } |
346 | |
347 | __forceinline Vec2fa eval_du(const float u, const float v) const |
348 | { |
349 | const vfloat4 dpdu = LR.eval_dt(t: u); |
350 | return Vec2fa(lerp(a: shuffle<0,1,0,1>(v: dpdu),b: shuffle<2,3,2,3>(v: dpdu),t: v)); |
351 | } |
352 | |
353 | __forceinline Vec2fa eval_dv(const float u, const float v) const |
354 | { |
355 | const vfloat4 p = LR.eval(t: u); |
356 | return Vec2fa(shuffle<2,3,2,3>(v: p)-shuffle<0,1,0,1>(v: p)); |
357 | } |
358 | |
359 | __forceinline void eval(const float u, const float v, Vec2fa& p, Vec2fa& dpdu, Vec2fa& dpdv) const |
360 | { |
361 | vfloat4 p0, dp0du; LR.eval(t: u,p&: p0,dp&: dp0du); |
362 | p = Vec2fa(lerp(a: shuffle<0,1,0,1>(v: p0),b: shuffle<2,3,2,3>(v: p0),t: v)); |
363 | dpdu = Vec2fa(lerp(a: shuffle<0,1,0,1>(v: dp0du),b: shuffle<2,3,2,3>(v: dp0du),t: v)); |
364 | dpdv = Vec2fa(shuffle<2,3,2,3>(v: p0)-shuffle<0,1,0,1>(v: p0)); |
365 | } |
366 | |
367 | __forceinline TensorLinearQuadraticBezierSurface<Vec2fa> derivative_u() const { |
368 | return TensorLinearQuadraticBezierSurface<Vec2fa>(LR.derivative()); |
369 | } |
370 | |
371 | __forceinline CubicBezierCurve<Vec2fa> derivative_v() const { |
372 | return getR()-getL(); |
373 | } |
374 | |
375 | __forceinline Vec2fa axis_u() const |
376 | { |
377 | const CubicBezierCurve<Vec2fa> L = getL(); |
378 | const CubicBezierCurve<Vec2fa> R = getR(); |
379 | return (L.end()-L.begin())+(R.end()-R.begin()); |
380 | } |
381 | |
382 | __forceinline Vec2fa axis_v() const |
383 | { |
384 | const CubicBezierCurve<Vec2fa> L = getL(); |
385 | const CubicBezierCurve<Vec2fa> R = getR(); |
386 | return (R.begin()-L.begin())+(R.end()-L.end()); |
387 | } |
388 | |
389 | friend embree_ostream operator<<(embree_ostream cout, const TensorLinearCubicBezierSurface& a) |
390 | { |
391 | return cout << "TensorLinearCubicBezierSurface" << embree_endl |
392 | << "{" << embree_endl |
393 | << " L = " << a.getL() << ", " << embree_endl |
394 | << " R = " << a.getR() << embree_endl |
395 | << "}" ; |
396 | } |
397 | }; |
398 | |
399 | typedef TensorLinearCubicBezierSurface<float> TensorLinearCubicBezierSurface1f; |
400 | typedef TensorLinearCubicBezierSurface<Vec2fa> TensorLinearCubicBezierSurface2fa; |
401 | typedef TensorLinearCubicBezierSurface<Vec3fa> TensorLinearCubicBezierSurface3fa; |
402 | } |
403 | } |
404 | |