1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "catmullclark_patch.h" |
7 | #include "bezier_patch.h" |
8 | #include "bezier_curve.h" |
9 | #include "catmullclark_coefficients.h" |
10 | |
11 | namespace embree |
12 | { |
13 | template<typename Vertex, typename Vertex_t = Vertex> |
14 | class __aligned(64) GregoryPatchT |
15 | { |
16 | typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch; |
17 | typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch; |
18 | typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring; |
19 | typedef BezierCurveT<Vertex> BezierCurve; |
20 | |
21 | public: |
22 | Vertex v[4][4]; |
23 | Vertex f[2][2]; |
24 | |
25 | __forceinline GregoryPatchT() {} |
26 | |
27 | __forceinline GregoryPatchT(const CatmullClarkPatch& patch) { |
28 | init(patch); |
29 | } |
30 | |
31 | __forceinline GregoryPatchT(const CatmullClarkPatch& patch, |
32 | const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) |
33 | { |
34 | init_crackfix(patch,border0,border1,border2,border3); |
35 | } |
36 | |
37 | __forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) { |
38 | init(CatmullClarkPatch(edge,vertices,stride)); |
39 | } |
40 | |
41 | __forceinline Vertex& p0() { return v[0][0]; } |
42 | __forceinline Vertex& p1() { return v[0][3]; } |
43 | __forceinline Vertex& p2() { return v[3][3]; } |
44 | __forceinline Vertex& p3() { return v[3][0]; } |
45 | |
46 | __forceinline Vertex& e0_p() { return v[0][1]; } |
47 | __forceinline Vertex& e0_m() { return v[1][0]; } |
48 | __forceinline Vertex& e1_p() { return v[1][3]; } |
49 | __forceinline Vertex& e1_m() { return v[0][2]; } |
50 | __forceinline Vertex& e2_p() { return v[3][2]; } |
51 | __forceinline Vertex& e2_m() { return v[2][3]; } |
52 | __forceinline Vertex& e3_p() { return v[2][0]; } |
53 | __forceinline Vertex& e3_m() { return v[3][1]; } |
54 | |
55 | __forceinline Vertex& f0_p() { return v[1][1]; } |
56 | __forceinline Vertex& f1_p() { return v[1][2]; } |
57 | __forceinline Vertex& f2_p() { return v[2][2]; } |
58 | __forceinline Vertex& f3_p() { return v[2][1]; } |
59 | __forceinline Vertex& f0_m() { return f[0][0]; } |
60 | __forceinline Vertex& f1_m() { return f[0][1]; } |
61 | __forceinline Vertex& f2_m() { return f[1][1]; } |
62 | __forceinline Vertex& f3_m() { return f[1][0]; } |
63 | |
64 | __forceinline const Vertex& p0() const { return v[0][0]; } |
65 | __forceinline const Vertex& p1() const { return v[0][3]; } |
66 | __forceinline const Vertex& p2() const { return v[3][3]; } |
67 | __forceinline const Vertex& p3() const { return v[3][0]; } |
68 | |
69 | __forceinline const Vertex& e0_p() const { return v[0][1]; } |
70 | __forceinline const Vertex& e0_m() const { return v[1][0]; } |
71 | __forceinline const Vertex& e1_p() const { return v[1][3]; } |
72 | __forceinline const Vertex& e1_m() const { return v[0][2]; } |
73 | __forceinline const Vertex& e2_p() const { return v[3][2]; } |
74 | __forceinline const Vertex& e2_m() const { return v[2][3]; } |
75 | __forceinline const Vertex& e3_p() const { return v[2][0]; } |
76 | __forceinline const Vertex& e3_m() const { return v[3][1]; } |
77 | |
78 | __forceinline const Vertex& f0_p() const { return v[1][1]; } |
79 | __forceinline const Vertex& f1_p() const { return v[1][2]; } |
80 | __forceinline const Vertex& f2_p() const { return v[2][2]; } |
81 | __forceinline const Vertex& f3_p() const { return v[2][1]; } |
82 | __forceinline const Vertex& f0_m() const { return f[0][0]; } |
83 | __forceinline const Vertex& f1_m() const { return f[0][1]; } |
84 | __forceinline const Vertex& f2_m() const { return f[1][1]; } |
85 | __forceinline const Vertex& f3_m() const { return f[1][0]; } |
86 | |
87 | __forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) { |
88 | return irreg_patch.ring[index].getLimitVertex(); |
89 | } |
90 | |
91 | __forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) { |
92 | return madd(1.0f/3.0f,irreg_patch.ring[index].getLimitTangent(),p_vtx); |
93 | } |
94 | |
95 | __forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) { |
96 | return madd(1.0f/3.0f,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx); |
97 | } |
98 | |
99 | __forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) |
100 | { |
101 | CatmullClark1Ring3fa r0,r1,r2; |
102 | irreg_patch.ring[index].subdivide(r0); |
103 | r0.subdivide(dest&: r1); |
104 | r1.subdivide(dest&: r2); |
105 | return madd(8.0f/3.0f,r2.getLimitTangent(),p_vtx); |
106 | } |
107 | |
108 | __forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) |
109 | { |
110 | CatmullClark1Ring3fa r0,r1,r2; |
111 | irreg_patch.ring[index].subdivide(r0); |
112 | r0.subdivide(dest&: r1); |
113 | r1.subdivide(dest&: r2); |
114 | return madd(8.0f/3.0f,r2.getSecondLimitTangent(),p_vtx); |
115 | } |
116 | |
117 | void initFaceVertex(const CatmullClarkPatch& irreg_patch, |
118 | const size_t index, |
119 | const Vertex& p_vtx, |
120 | const Vertex& e0_p_vtx, |
121 | const Vertex& e1_m_vtx, |
122 | const unsigned int face_valence_p1, |
123 | const Vertex& e0_m_vtx, |
124 | const Vertex& e3_p_vtx, |
125 | const unsigned int face_valence_p3, |
126 | Vertex& f_p_vtx, |
127 | Vertex& f_m_vtx) |
128 | { |
129 | const unsigned int face_valence = irreg_patch.ring[index].face_valence; |
130 | const unsigned int edge_valence = irreg_patch.ring[index].edge_valence; |
131 | const unsigned int border_index = irreg_patch.ring[index].border_index; |
132 | |
133 | const Vertex& vtx = irreg_patch.ring[index].vtx; |
134 | const Vertex e_i = irreg_patch.ring[index].getEdgeCenter(0); |
135 | const Vertex c_i_m_1 = irreg_patch.ring[index].getQuadCenter(0); |
136 | const Vertex e_i_m_1 = irreg_patch.ring[index].getEdgeCenter(1); |
137 | |
138 | Vertex c_i, e_i_p_1; |
139 | const bool hasHardEdge0 = |
140 | std::isinf(irreg_patch.ring[index].vertex_crease_weight) && |
141 | std::isinf(irreg_patch.ring[index].crease_weight[0]); |
142 | |
143 | if (unlikely((border_index == edge_valence-2) || hasHardEdge0)) |
144 | { |
145 | /* mirror quad center and edge mid-point */ |
146 | c_i = madd(2.0f, e_i - c_i_m_1, c_i_m_1); |
147 | e_i_p_1 = madd(2.0f, vtx - e_i_m_1, e_i_m_1); |
148 | } |
149 | else |
150 | { |
151 | c_i = irreg_patch.ring[index].getQuadCenter( face_valence-1 ); |
152 | e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-1 ); |
153 | } |
154 | |
155 | Vertex c_i_m_2, e_i_m_2; |
156 | const bool hasHardEdge1 = |
157 | std::isinf(irreg_patch.ring[index].vertex_crease_weight) && |
158 | std::isinf(irreg_patch.ring[index].crease_weight[1]); |
159 | |
160 | if (unlikely(border_index == 2 || hasHardEdge1)) |
161 | { |
162 | /* mirror quad center and edge mid-point */ |
163 | c_i_m_2 = madd(2.0f, e_i_m_1 - c_i_m_1, c_i_m_1); |
164 | e_i_m_2 = madd(2.0f, vtx - e_i, + e_i); |
165 | } |
166 | else |
167 | { |
168 | c_i_m_2 = irreg_patch.ring[index].getQuadCenter( 1 ); |
169 | e_i_m_2 = irreg_patch.ring[index].getEdgeCenter( 2 ); |
170 | } |
171 | |
172 | const float d = 3.0f; |
173 | //const float c = cosf(2.0f*M_PI/(float)face_valence); |
174 | //const float c_e_p = cosf(2.0f*M_PI/(float)face_valence_p1); |
175 | //const float c_e_m = cosf(2.0f*M_PI/(float)face_valence_p3); |
176 | |
177 | const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(n: face_valence); |
178 | const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(n: face_valence_p1); |
179 | const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(n: face_valence_p3); |
180 | |
181 | const Vertex r_e_p = 1.0f/3.0f * (e_i_m_1 - e_i_p_1) + 2.0f/3.0f * (c_i_m_1 - c_i); |
182 | const Vertex r_e_m = 1.0f/3.0f * (e_i - e_i_m_2) + 2.0f/3.0f * (c_i_m_1 - c_i_m_2); |
183 | |
184 | f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); |
185 | f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); |
186 | } |
187 | |
188 | __noinline void init(const CatmullClarkPatch& patch) |
189 | { |
190 | assert( patch.ring[0].hasValidPositions() ); |
191 | assert( patch.ring[1].hasValidPositions() ); |
192 | assert( patch.ring[2].hasValidPositions() ); |
193 | assert( patch.ring[3].hasValidPositions() ); |
194 | |
195 | p0() = initCornerVertex(irreg_patch: patch,index: 0); |
196 | p1() = initCornerVertex(irreg_patch: patch,index: 1); |
197 | p2() = initCornerVertex(irreg_patch: patch,index: 2); |
198 | p3() = initCornerVertex(irreg_patch: patch,index: 3); |
199 | |
200 | e0_p() = initPositiveEdgeVertex(irreg_patch: patch,index: 0, p_vtx: p0()); |
201 | e1_p() = initPositiveEdgeVertex(irreg_patch: patch,index: 1, p_vtx: p1()); |
202 | e2_p() = initPositiveEdgeVertex(irreg_patch: patch,index: 2, p_vtx: p2()); |
203 | e3_p() = initPositiveEdgeVertex(irreg_patch: patch,index: 3, p_vtx: p3()); |
204 | |
205 | e0_m() = initNegativeEdgeVertex(irreg_patch: patch,index: 0, p_vtx: p0()); |
206 | e1_m() = initNegativeEdgeVertex(irreg_patch: patch,index: 1, p_vtx: p1()); |
207 | e2_m() = initNegativeEdgeVertex(irreg_patch: patch,index: 2, p_vtx: p2()); |
208 | e3_m() = initNegativeEdgeVertex(irreg_patch: patch,index: 3, p_vtx: p3()); |
209 | |
210 | const unsigned int face_valence_p0 = patch.ring[0].face_valence; |
211 | const unsigned int face_valence_p1 = patch.ring[1].face_valence; |
212 | const unsigned int face_valence_p2 = patch.ring[2].face_valence; |
213 | const unsigned int face_valence_p3 = patch.ring[3].face_valence; |
214 | |
215 | initFaceVertex(irreg_patch: patch,index: 0,p_vtx: p0(),e0_p_vtx: e0_p(),e1_m_vtx: e1_m(),face_valence_p1,e0_m_vtx: e0_m(),e3_p_vtx: e3_p(),face_valence_p3,f_p_vtx&: f0_p(),f_m_vtx&: f0_m() ); |
216 | initFaceVertex(irreg_patch: patch,index: 1,p_vtx: p1(),e0_p_vtx: e1_p(),e1_m_vtx: e2_m(),face_valence_p1: face_valence_p2,e0_m_vtx: e1_m(),e3_p_vtx: e0_p(),face_valence_p3: face_valence_p0,f_p_vtx&: f1_p(),f_m_vtx&: f1_m() ); |
217 | initFaceVertex(irreg_patch: patch,index: 2,p_vtx: p2(),e0_p_vtx: e2_p(),e1_m_vtx: e3_m(),face_valence_p1: face_valence_p3,e0_m_vtx: e2_m(),e3_p_vtx: e1_p(),face_valence_p3: face_valence_p1,f_p_vtx&: f2_p(),f_m_vtx&: f2_m() ); |
218 | initFaceVertex(irreg_patch: patch,index: 3,p_vtx: p3(),e0_p_vtx: e3_p(),e1_m_vtx: e0_m(),face_valence_p1: face_valence_p0,e0_m_vtx: e3_m(),e3_p_vtx: e2_p(),face_valence_p3,f_p_vtx&: f3_p(),f_m_vtx&: f3_m() ); |
219 | |
220 | } |
221 | |
222 | __noinline void init_crackfix(const CatmullClarkPatch& patch, |
223 | const BezierCurve* border0, |
224 | const BezierCurve* border1, |
225 | const BezierCurve* border2, |
226 | const BezierCurve* border3) |
227 | { |
228 | assert( patch.ring[0].hasValidPositions() ); |
229 | assert( patch.ring[1].hasValidPositions() ); |
230 | assert( patch.ring[2].hasValidPositions() ); |
231 | assert( patch.ring[3].hasValidPositions() ); |
232 | |
233 | p0() = initCornerVertex(irreg_patch: patch,index: 0); |
234 | p1() = initCornerVertex(irreg_patch: patch,index: 1); |
235 | p2() = initCornerVertex(irreg_patch: patch,index: 2); |
236 | p3() = initCornerVertex(irreg_patch: patch,index: 3); |
237 | |
238 | e0_p() = initPositiveEdgeVertex(irreg_patch: patch,index: 0, p_vtx: p0()); |
239 | e1_p() = initPositiveEdgeVertex(irreg_patch: patch,index: 1, p_vtx: p1()); |
240 | e2_p() = initPositiveEdgeVertex(irreg_patch: patch,index: 2, p_vtx: p2()); |
241 | e3_p() = initPositiveEdgeVertex(irreg_patch: patch,index: 3, p_vtx: p3()); |
242 | |
243 | e0_m() = initNegativeEdgeVertex(irreg_patch: patch,index: 0, p_vtx: p0()); |
244 | e1_m() = initNegativeEdgeVertex(irreg_patch: patch,index: 1, p_vtx: p1()); |
245 | e2_m() = initNegativeEdgeVertex(irreg_patch: patch,index: 2, p_vtx: p2()); |
246 | e3_m() = initNegativeEdgeVertex(irreg_patch: patch,index: 3, p_vtx: p3()); |
247 | |
248 | if (unlikely(border0 != nullptr)) |
249 | { |
250 | p0() = border0->v0; |
251 | e0_p() = border0->v1; |
252 | e1_m() = border0->v2; |
253 | p1() = border0->v3; |
254 | } |
255 | |
256 | if (unlikely(border1 != nullptr)) |
257 | { |
258 | p1() = border1->v0; |
259 | e1_p() = border1->v1; |
260 | e2_m() = border1->v2; |
261 | p2() = border1->v3; |
262 | } |
263 | |
264 | if (unlikely(border2 != nullptr)) |
265 | { |
266 | p2() = border2->v0; |
267 | e2_p() = border2->v1; |
268 | e3_m() = border2->v2; |
269 | p3() = border2->v3; |
270 | } |
271 | |
272 | if (unlikely(border3 != nullptr)) |
273 | { |
274 | p3() = border3->v0; |
275 | e3_p() = border3->v1; |
276 | e0_m() = border3->v2; |
277 | p0() = border3->v3; |
278 | } |
279 | |
280 | const unsigned int face_valence_p0 = patch.ring[0].face_valence; |
281 | const unsigned int face_valence_p1 = patch.ring[1].face_valence; |
282 | const unsigned int face_valence_p2 = patch.ring[2].face_valence; |
283 | const unsigned int face_valence_p3 = patch.ring[3].face_valence; |
284 | |
285 | initFaceVertex(irreg_patch: patch,index: 0,p_vtx: p0(),e0_p_vtx: e0_p(),e1_m_vtx: e1_m(),face_valence_p1,e0_m_vtx: e0_m(),e3_p_vtx: e3_p(),face_valence_p3,f_p_vtx&: f0_p(),f_m_vtx&: f0_m() ); |
286 | initFaceVertex(irreg_patch: patch,index: 1,p_vtx: p1(),e0_p_vtx: e1_p(),e1_m_vtx: e2_m(),face_valence_p1: face_valence_p2,e0_m_vtx: e1_m(),e3_p_vtx: e0_p(),face_valence_p3: face_valence_p0,f_p_vtx&: f1_p(),f_m_vtx&: f1_m() ); |
287 | initFaceVertex(irreg_patch: patch,index: 2,p_vtx: p2(),e0_p_vtx: e2_p(),e1_m_vtx: e3_m(),face_valence_p1: face_valence_p3,e0_m_vtx: e2_m(),e3_p_vtx: e1_p(),face_valence_p3: face_valence_p1,f_p_vtx&: f2_p(),f_m_vtx&: f2_m() ); |
288 | initFaceVertex(irreg_patch: patch,index: 3,p_vtx: p3(),e0_p_vtx: e3_p(),e1_m_vtx: e0_m(),face_valence_p1: face_valence_p0,e0_m_vtx: e3_m(),e3_p_vtx: e2_p(),face_valence_p3,f_p_vtx&: f3_p(),f_m_vtx&: f3_m() ); |
289 | } |
290 | |
291 | |
292 | void computeGregoryPatchFacePoints(const unsigned int face_valence, |
293 | const Vertex& r_e_p, |
294 | const Vertex& r_e_m, |
295 | const Vertex& p_vtx, |
296 | const Vertex& e0_p_vtx, |
297 | const Vertex& e1_m_vtx, |
298 | const unsigned int face_valence_p1, |
299 | const Vertex& e0_m_vtx, |
300 | const Vertex& e3_p_vtx, |
301 | const unsigned int face_valence_p3, |
302 | Vertex& f_p_vtx, |
303 | Vertex& f_m_vtx, |
304 | const float d = 3.0f) |
305 | { |
306 | //const float c = cosf(2.0*M_PI/(float)face_valence); |
307 | //const float c_e_p = cosf(2.0*M_PI/(float)face_valence_p1); |
308 | //const float c_e_m = cosf(2.0*M_PI/(float)face_valence_p3); |
309 | |
310 | const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(n: face_valence); |
311 | const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(n: face_valence_p1); |
312 | const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(n: face_valence_p3); |
313 | |
314 | |
315 | f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); |
316 | f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); |
317 | f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); |
318 | f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); |
319 | } |
320 | |
321 | __noinline void init(const GeneralCatmullClarkPatch& patch) |
322 | { |
323 | assert(patch.size() == 4); |
324 | #if 0 |
325 | CatmullClarkPatch qpatch; patch.init(qpatch); |
326 | init(qpatch); |
327 | #else |
328 | const float face_valence_p0 = patch.ring[0].face_valence; |
329 | const float face_valence_p1 = patch.ring[1].face_valence; |
330 | const float face_valence_p2 = patch.ring[2].face_valence; |
331 | const float face_valence_p3 = patch.ring[3].face_valence; |
332 | |
333 | Vertex p0_r_p, p0_r_m; |
334 | patch.ring[0].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m ); |
335 | |
336 | Vertex p1_r_p, p1_r_m; |
337 | patch.ring[1].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m ); |
338 | |
339 | Vertex p2_r_p, p2_r_m; |
340 | patch.ring[2].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m ); |
341 | |
342 | Vertex p3_r_p, p3_r_m; |
343 | patch.ring[3].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m ); |
344 | |
345 | computeGregoryPatchFacePoints(face_valence: face_valence_p0, r_e_p: p0_r_p, r_e_m: p0_r_m, p_vtx: p0(), e0_p_vtx: e0_p(), e1_m_vtx: e1_m(), face_valence_p1, e0_m_vtx: e0_m(), e3_p_vtx: e3_p(), face_valence_p3, f_p_vtx&: f0_p(), f_m_vtx&: f0_m() ); |
346 | computeGregoryPatchFacePoints(face_valence: face_valence_p1, r_e_p: p1_r_p, r_e_m: p1_r_m, p_vtx: p1(), e0_p_vtx: e1_p(), e1_m_vtx: e2_m(), face_valence_p1: face_valence_p2, e0_m_vtx: e1_m(), e3_p_vtx: e0_p(), face_valence_p3: face_valence_p0, f_p_vtx&: f1_p(), f_m_vtx&: f1_m() ); |
347 | computeGregoryPatchFacePoints(face_valence: face_valence_p2, r_e_p: p2_r_p, r_e_m: p2_r_m, p_vtx: p2(), e0_p_vtx: e2_p(), e1_m_vtx: e3_m(), face_valence_p1: face_valence_p3, e0_m_vtx: e2_m(), e3_p_vtx: e1_p(), face_valence_p3: face_valence_p1, f_p_vtx&: f2_p(), f_m_vtx&: f2_m() ); |
348 | computeGregoryPatchFacePoints(face_valence: face_valence_p3, r_e_p: p3_r_p, r_e_m: p3_r_m, p_vtx: p3(), e0_p_vtx: e3_p(), e1_m_vtx: e0_m(), face_valence_p1: face_valence_p0, e0_m_vtx: e3_m(), e3_p_vtx: e2_p(), face_valence_p3, f_p_vtx&: f3_p(), f_m_vtx&: f3_m() ); |
349 | |
350 | #endif |
351 | } |
352 | |
353 | |
354 | __forceinline void convert_to_bezier() |
355 | { |
356 | f0_p() = (f0_p() + f0_m()) * 0.5f; |
357 | f1_p() = (f1_p() + f1_m()) * 0.5f; |
358 | f2_p() = (f2_p() + f2_m()) * 0.5f; |
359 | f3_p() = (f3_p() + f3_m()) * 0.5f; |
360 | f0_m() = Vertex( zero ); |
361 | f1_m() = Vertex( zero ); |
362 | f2_m() = Vertex( zero ); |
363 | f3_m() = Vertex( zero ); |
364 | } |
365 | |
366 | static __forceinline void computeInnerVertices(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv, |
367 | Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21) |
368 | { |
369 | if (unlikely(uu == 0.0f || uu == 1.0f || vv == 0.0f || vv == 1.0f)) |
370 | { |
371 | matrix_11 = matrix[1][1]; |
372 | matrix_12 = matrix[1][2]; |
373 | matrix_22 = matrix[2][2]; |
374 | matrix_21 = matrix[2][1]; |
375 | } |
376 | else |
377 | { |
378 | const Vertex_t f0_p = matrix[1][1]; |
379 | const Vertex_t f1_p = matrix[1][2]; |
380 | const Vertex_t f2_p = matrix[2][2]; |
381 | const Vertex_t f3_p = matrix[2][1]; |
382 | |
383 | const Vertex_t f0_m = f_m[0][0]; |
384 | const Vertex_t f1_m = f_m[0][1]; |
385 | const Vertex_t f2_m = f_m[1][1]; |
386 | const Vertex_t f3_m = f_m[1][0]; |
387 | |
388 | matrix_11 = ( uu * f0_p + vv * f0_m)*rcp(x: uu+vv); |
389 | matrix_12 = ((1.0f-uu) * f1_m + vv * f1_p)*rcp(x: 1.0f-uu+vv); |
390 | matrix_22 = ((1.0f-uu) * f2_p + (1.0f-vv) * f2_m)*rcp(x: 2.0f-uu-vv); |
391 | matrix_21 = ( uu * f3_m + (1.0f-vv) * f3_p)*rcp(x: 1.0f+uu-vv); |
392 | } |
393 | } |
394 | |
395 | template<typename vfloat> |
396 | static __forceinline void computeInnerVertices(const Vertex v[4][4], const Vertex f[2][2], |
397 | size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21) |
398 | { |
399 | const auto m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); |
400 | |
401 | const vfloat f0_p = v[1][1][i]; |
402 | const vfloat f1_p = v[1][2][i]; |
403 | const vfloat f2_p = v[2][2][i]; |
404 | const vfloat f3_p = v[2][1][i]; |
405 | |
406 | const vfloat f0_m = f[0][0][i]; |
407 | const vfloat f1_m = f[0][1][i]; |
408 | const vfloat f2_m = f[1][1][i]; |
409 | const vfloat f3_m = f[1][0][i]; |
410 | |
411 | const vfloat one_minus_uu = vfloat(1.0f) - uu; |
412 | const vfloat one_minus_vv = vfloat(1.0f) - vv; |
413 | |
414 | const vfloat f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); |
415 | const vfloat f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); |
416 | const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); |
417 | const vfloat f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); |
418 | |
419 | matrix_11 = select(m_border,f0_p,f0_i); |
420 | matrix_12 = select(m_border,f1_p,f1_i); |
421 | matrix_22 = select(m_border,f2_p,f2_i); |
422 | matrix_21 = select(m_border,f3_p,f3_i); |
423 | } |
424 | |
425 | static __forceinline Vertex eval(const Vertex matrix[4][4], const Vertex f[2][2], const float& uu, const float& vv) |
426 | { |
427 | Vertex_t v_11, v_12, v_22, v_21; |
428 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
429 | |
430 | const Vec4<float> Bu = BezierBasis::eval(u: uu); |
431 | const Vec4<float> Bv = BezierBasis::eval(u: vv); |
432 | |
433 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
434 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
435 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
436 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
437 | } |
438 | |
439 | static __forceinline Vertex eval_du(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
440 | { |
441 | Vertex_t v_11, v_12, v_22, v_21; |
442 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
443 | |
444 | const Vec4<float> Bu = BezierBasis::derivative(u: uu); |
445 | const Vec4<float> Bv = BezierBasis::eval(u: vv); |
446 | |
447 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
448 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
449 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
450 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
451 | } |
452 | |
453 | static __forceinline Vertex eval_dv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
454 | { |
455 | Vertex_t v_11, v_12, v_22, v_21; |
456 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
457 | |
458 | const Vec4<float> Bu = BezierBasis::eval(u: uu); |
459 | const Vec4<float> Bv = BezierBasis::derivative(u: vv); |
460 | |
461 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
462 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
463 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
464 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
465 | } |
466 | |
467 | static __forceinline Vertex eval_dudu(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
468 | { |
469 | Vertex_t v_11, v_12, v_22, v_21; |
470 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
471 | |
472 | const Vec4<float> Bu = BezierBasis::derivative2(u: uu); |
473 | const Vec4<float> Bv = BezierBasis::eval(u: vv); |
474 | |
475 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
476 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
477 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
478 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
479 | } |
480 | |
481 | static __forceinline Vertex eval_dvdv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
482 | { |
483 | Vertex_t v_11, v_12, v_22, v_21; |
484 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
485 | |
486 | const Vec4<float> Bu = BezierBasis::eval(u: uu); |
487 | const Vec4<float> Bv = BezierBasis::derivative2(u: vv); |
488 | |
489 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
490 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
491 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
492 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
493 | } |
494 | |
495 | static __forceinline Vertex eval_dudv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
496 | { |
497 | Vertex_t v_11, v_12, v_22, v_21; |
498 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
499 | |
500 | const Vec4<float> Bu = BezierBasis::derivative(u: uu); |
501 | const Vec4<float> Bv = BezierBasis::derivative(u: vv); |
502 | |
503 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
504 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
505 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
506 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
507 | } |
508 | |
509 | __forceinline Vertex eval(const float uu, const float vv) const { |
510 | return eval(v,f,uu,vv); |
511 | } |
512 | |
513 | __forceinline Vertex eval_du( const float uu, const float vv) const { |
514 | return eval_du(v,f,uu,vv); |
515 | } |
516 | |
517 | __forceinline Vertex eval_dv( const float uu, const float vv) const { |
518 | return eval_dv(v,f,uu,vv); |
519 | } |
520 | |
521 | __forceinline Vertex eval_dudu( const float uu, const float vv) const { |
522 | return eval_dudu(v,f,uu,vv); |
523 | } |
524 | |
525 | __forceinline Vertex eval_dvdv( const float uu, const float vv) const { |
526 | return eval_dvdv(v,f,uu,vv); |
527 | } |
528 | |
529 | __forceinline Vertex eval_dudv( const float uu, const float vv) const { |
530 | return eval_dudv(v,f,uu,vv); |
531 | } |
532 | |
533 | static __forceinline Vertex normal(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv) // FIXME: why not using basis functions |
534 | { |
535 | /* interpolate inner vertices */ |
536 | Vertex_t matrix_11, matrix_12, matrix_22, matrix_21; |
537 | computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21); |
538 | |
539 | /* tangentU */ |
540 | const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[0][0], (Vertex_t)matrix[1][0], (Vertex_t)matrix[2][0], (Vertex_t)matrix[3][0]); |
541 | const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[0][1], (Vertex_t)matrix_11 , (Vertex_t)matrix_21 , (Vertex_t)matrix[3][1]); |
542 | const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[0][2], (Vertex_t)matrix_12 , (Vertex_t)matrix_22 , (Vertex_t)matrix[3][2]); |
543 | const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[0][3], (Vertex_t)matrix[1][3], (Vertex_t)matrix[2][3], (Vertex_t)matrix[3][3]); |
544 | |
545 | const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3); |
546 | |
547 | /* tangentV */ |
548 | const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[0][0], (Vertex_t)matrix[0][1], (Vertex_t)matrix[0][2], (Vertex_t)matrix[0][3]); |
549 | const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[1][0], (Vertex_t)matrix_11 , (Vertex_t)matrix_12 , (Vertex_t)matrix[1][3]); |
550 | const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[2][0], (Vertex_t)matrix_21 , (Vertex_t)matrix_22 , (Vertex_t)matrix[2][3]); |
551 | const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[3][0], (Vertex_t)matrix[3][1], (Vertex_t)matrix[3][2], (Vertex_t)matrix[3][3]); |
552 | |
553 | const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3); |
554 | |
555 | /* normal = tangentU x tangentV */ |
556 | const Vertex_t n = cross(tangentU,tangentV); |
557 | |
558 | return n; |
559 | } |
560 | |
561 | __forceinline Vertex normal( const float uu, const float vv) const { |
562 | return normal(v,f,uu,vv); |
563 | } |
564 | |
565 | __forceinline void eval(const float u, const float v, |
566 | Vertex* P, Vertex* dPdu, Vertex* dPdv, |
567 | Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv, |
568 | const float dscale = 1.0f) const |
569 | { |
570 | if (P) { |
571 | *P = eval(u,v); |
572 | } |
573 | if (dPdu) { |
574 | assert(dPdu); *dPdu = eval_du(u,v)*dscale; |
575 | assert(dPdv); *dPdv = eval_dv(u,v)*dscale; |
576 | } |
577 | if (ddPdudu) { |
578 | assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(x: dscale); |
579 | assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(x: dscale); |
580 | assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(x: dscale); |
581 | } |
582 | } |
583 | |
584 | template<class vfloat> |
585 | static __forceinline vfloat eval(const Vertex v[4][4], const Vertex f[2][2], |
586 | const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n, |
587 | vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21) |
588 | { |
589 | const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i])))); |
590 | const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(matrix_11 ),madd(v_n[2],vfloat(matrix_21 ),v_n[3] * vfloat(v[3][1][i])))); |
591 | const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(matrix_12 ),madd(v_n[2],vfloat(matrix_22 ),v_n[3] * vfloat(v[3][2][i])))); |
592 | const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i])))); |
593 | return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x))); |
594 | } |
595 | |
596 | template<typename vbool, typename vfloat> |
597 | static __forceinline void eval(const Vertex v[4][4], const Vertex f[2][2], |
598 | const vbool& valid, const vfloat& uu, const vfloat& vv, |
599 | float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, |
600 | const float dscale, const size_t dstride, const size_t N) |
601 | { |
602 | if (P) { |
603 | const Vec4<vfloat> u_n = BezierBasis::eval(uu); |
604 | const Vec4<vfloat> v_n = BezierBasis::eval(vv); |
605 | for (size_t i=0; i<N; i++) { |
606 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
607 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
608 | vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)); |
609 | } |
610 | } |
611 | if (dPdu) |
612 | { |
613 | { |
614 | assert(dPdu); |
615 | const Vec4<vfloat> u_n = BezierBasis::derivative(uu); |
616 | const Vec4<vfloat> v_n = BezierBasis::eval(vv); |
617 | for (size_t i=0; i<N; i++) { |
618 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
619 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
620 | vfloat::store(valid,dPdu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale); |
621 | } |
622 | } |
623 | { |
624 | assert(dPdv); |
625 | const Vec4<vfloat> u_n = BezierBasis::eval(uu); |
626 | const Vec4<vfloat> v_n = BezierBasis::derivative(vv); |
627 | for (size_t i=0; i<N; i++) { |
628 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
629 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
630 | vfloat::store(valid,dPdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale); |
631 | } |
632 | } |
633 | } |
634 | if (ddPdudu) |
635 | { |
636 | { |
637 | assert(ddPdudu); |
638 | const Vec4<vfloat> u_n = BezierBasis::derivative2(uu); |
639 | const Vec4<vfloat> v_n = BezierBasis::eval(vv); |
640 | for (size_t i=0; i<N; i++) { |
641 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
642 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
643 | vfloat::store(valid,ddPdudu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(x: dscale)); |
644 | } |
645 | } |
646 | { |
647 | assert(ddPdvdv); |
648 | const Vec4<vfloat> u_n = BezierBasis::eval(uu); |
649 | const Vec4<vfloat> v_n = BezierBasis::derivative2(vv); |
650 | for (size_t i=0; i<N; i++) { |
651 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
652 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
653 | vfloat::store(valid,ddPdvdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(x: dscale)); |
654 | } |
655 | } |
656 | { |
657 | assert(ddPdudv); |
658 | const Vec4<vfloat> u_n = BezierBasis::derivative(uu); |
659 | const Vec4<vfloat> v_n = BezierBasis::derivative(vv); |
660 | for (size_t i=0; i<N; i++) { |
661 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
662 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
663 | vfloat::store(valid,ddPdudv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(x: dscale)); |
664 | } |
665 | } |
666 | } |
667 | } |
668 | |
669 | template<typename vbool, typename vfloat> |
670 | __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, |
671 | float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, |
672 | const float dscale, const size_t dstride, const size_t N) const { |
673 | eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N); |
674 | } |
675 | |
676 | template<class T> |
677 | static __forceinline Vec3<T> eval_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv) |
678 | { |
679 | typedef typename T::Bool M; |
680 | const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); |
681 | |
682 | const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z); |
683 | const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z); |
684 | const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z); |
685 | const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z); |
686 | |
687 | const Vec3<T> f0_m = f[0][0]; |
688 | const Vec3<T> f1_m = f[0][1]; |
689 | const Vec3<T> f2_m = f[1][1]; |
690 | const Vec3<T> f3_m = f[1][0]; |
691 | |
692 | const T one_minus_uu = T(1.0f) - uu; |
693 | const T one_minus_vv = T(1.0f) - vv; |
694 | |
695 | const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); |
696 | const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); |
697 | const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); |
698 | const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); |
699 | |
700 | const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) ); |
701 | const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) ); |
702 | const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) ); |
703 | const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) ); |
704 | |
705 | const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu; |
706 | const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv; |
707 | const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu); |
708 | const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv); |
709 | const T B2_u = 3.0f * (uu * one_minus_uu * uu); |
710 | const T B2_v = 3.0f * (vv * one_minus_vv * vv); |
711 | const T B3_u = uu * uu * uu; |
712 | const T B3_v = vv * vv * vv; |
713 | |
714 | const T x = madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u * matrix[0][3].x))), |
715 | madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,F0.x ,madd(B2_u,F1.x ,B3_u * matrix[1][3].x))), |
716 | madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,F3.x ,madd(B2_u,F2.x ,B3_u * matrix[2][3].x))), |
717 | B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u * matrix[3][3].x)))))); |
718 | |
719 | const T y = madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u * matrix[0][3].y))), |
720 | madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,F0.y ,madd(B2_u,F1.y ,B3_u * matrix[1][3].y))), |
721 | madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,F3.y ,madd(B2_u,F2.y ,B3_u * matrix[2][3].y))), |
722 | B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u * matrix[3][3].y)))))); |
723 | |
724 | const T z = madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u * matrix[0][3].z))), |
725 | madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,F0.z ,madd(B2_u,F1.z ,B3_u * matrix[1][3].z))), |
726 | madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,F3.z ,madd(B2_u,F2.z ,B3_u * matrix[2][3].z))), |
727 | B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u * matrix[3][3].z)))))); |
728 | |
729 | return Vec3<T>(x,y,z); |
730 | } |
731 | |
732 | template<class T> |
733 | __forceinline Vec3<T> eval(const T& uu, const T& vv) const |
734 | { |
735 | Vec3<T> ff[2][2]; |
736 | ff[0][0] = Vec3<T>(f[0][0]); |
737 | ff[0][1] = Vec3<T>(f[0][1]); |
738 | ff[1][1] = Vec3<T>(f[1][1]); |
739 | ff[1][0] = Vec3<T>(f[1][0]); |
740 | return eval_t(v,ff,uu,vv); |
741 | } |
742 | |
743 | template<class T> |
744 | static __forceinline Vec3<T> normal_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv) |
745 | { |
746 | typedef typename T::Bool M; |
747 | |
748 | const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z); |
749 | const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z); |
750 | const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z); |
751 | const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z); |
752 | |
753 | const Vec3<T> f0_m = f[0][0]; |
754 | const Vec3<T> f1_m = f[0][1]; |
755 | const Vec3<T> f2_m = f[1][1]; |
756 | const Vec3<T> f3_m = f[1][0]; |
757 | |
758 | const T one_minus_uu = T(1.0f) - uu; |
759 | const T one_minus_vv = T(1.0f) - vv; |
760 | |
761 | const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); |
762 | const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); |
763 | const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); |
764 | const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); |
765 | |
766 | #if 1 |
767 | const M m_corner0 = (uu == 0.0f) & (vv == 0.0f); |
768 | const M m_corner1 = (uu == 1.0f) & (vv == 0.0f); |
769 | const M m_corner2 = (uu == 1.0f) & (vv == 1.0f); |
770 | const M m_corner3 = (uu == 0.0f) & (vv == 1.0f); |
771 | const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) ); |
772 | const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) ); |
773 | const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) ); |
774 | const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) ); |
775 | #else |
776 | const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); |
777 | const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) ); |
778 | const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) ); |
779 | const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) ); |
780 | const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) ); |
781 | #endif |
782 | |
783 | const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z); |
784 | const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z); |
785 | const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z); |
786 | const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z); |
787 | |
788 | const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z); |
789 | const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z); |
790 | const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z); |
791 | |
792 | const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z); |
793 | const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z); |
794 | const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z); |
795 | |
796 | const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z); |
797 | const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z); |
798 | |
799 | /* tangentU */ |
800 | const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30); |
801 | const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31); |
802 | const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32); |
803 | const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33); |
804 | |
805 | const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3); |
806 | |
807 | /* tangentV */ |
808 | const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03); |
809 | const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13); |
810 | const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23); |
811 | const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33); |
812 | |
813 | const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3); |
814 | |
815 | /* normal = tangentU x tangentV */ |
816 | const Vec3<T> n = cross(tangentU,tangentV); |
817 | return n; |
818 | } |
819 | |
820 | template<class T> |
821 | __forceinline Vec3<T> normal(const T& uu, const T& vv) const |
822 | { |
823 | Vec3<T> ff[2][2]; |
824 | ff[0][0] = Vec3<T>(f[0][0]); |
825 | ff[0][1] = Vec3<T>(f[0][1]); |
826 | ff[1][1] = Vec3<T>(f[1][1]); |
827 | ff[1][0] = Vec3<T>(f[1][0]); |
828 | return normal_t(v,ff,uu,vv); |
829 | } |
830 | |
831 | __forceinline BBox<Vertex> bounds() const |
832 | { |
833 | const Vertex *const cv = &v[0][0]; |
834 | BBox<Vertex> bounds (cv[0]); |
835 | for (size_t i=1; i<16; i++) |
836 | bounds.extend( cv[i] ); |
837 | bounds.extend(f[0][0]); |
838 | bounds.extend(f[1][0]); |
839 | bounds.extend(f[1][1]); |
840 | bounds.extend(f[1][1]); |
841 | return bounds; |
842 | } |
843 | |
844 | friend embree_ostream operator<<(embree_ostream o, const GregoryPatchT& p) |
845 | { |
846 | for (size_t y=0; y<4; y++) |
847 | for (size_t x=0; x<4; x++) |
848 | o << "v[" << y << "][" << x << "] " << p.v[y][x] << embree_endl; |
849 | |
850 | for (size_t y=0; y<2; y++) |
851 | for (size_t x=0; x<2; x++) |
852 | o << "f[" << y << "][" << x << "] " << p.f[y][x] << embree_endl; |
853 | return o; |
854 | } |
855 | }; |
856 | |
857 | typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa; |
858 | |
859 | template<typename Vertex, typename Vertex_t> |
860 | __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride) |
861 | { |
862 | CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride); |
863 | GregoryPatchT<Vertex,Vertex_t> gpatch(patch); |
864 | gpatch.convert_to_bezier(); |
865 | for (size_t y=0; y<4; y++) |
866 | for (size_t x=0; x<4; x++) |
867 | matrix[y][x] = (Vertex_t)gpatch.v[y][x]; |
868 | } |
869 | |
870 | template<typename Vertex, typename Vertex_t> |
871 | __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch) |
872 | { |
873 | GregoryPatchT<Vertex,Vertex_t> gpatch(patch); |
874 | gpatch.convert_to_bezier(); |
875 | for (size_t y=0; y<4; y++) |
876 | for (size_t x=0; x<4; x++) |
877 | matrix[y][x] = (Vertex_t)gpatch.v[y][x]; |
878 | } |
879 | |
880 | template<typename Vertex, typename Vertex_t> |
881 | __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch, |
882 | const BezierCurveT<Vertex>* border0, |
883 | const BezierCurveT<Vertex>* border1, |
884 | const BezierCurveT<Vertex>* border2, |
885 | const BezierCurveT<Vertex>* border3) |
886 | { |
887 | GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3); |
888 | gpatch.convert_to_bezier(); |
889 | for (size_t y=0; y<4; y++) |
890 | for (size_t x=0; x<4; x++) |
891 | matrix[y][x] = (Vertex_t)gpatch.v[y][x]; |
892 | } |
893 | } |
894 | |