1 | /* graphene-simd4x4f.h: 4x4 float vector operations |
2 | * |
3 | * SPDX-License-Identifier: MIT |
4 | * |
5 | * Copyright 2014 Emmanuele Bassi |
6 | * |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | * of this software and associated documentation files (the "Software"), to deal |
9 | * in the Software without restriction, including without limitation the rights |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | * copies of the Software, and to permit persons to whom the Software is |
12 | * furnished to do so, subject to the following conditions: |
13 | * |
14 | * The above copyright notice and this permission notice shall be included in |
15 | * all copies or substantial portions of the Software. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
23 | * THE SOFTWARE. |
24 | */ |
25 | |
26 | #pragma once |
27 | |
28 | #include "graphene-simd4f.h" |
29 | |
30 | #include <math.h> |
31 | #include <float.h> |
32 | |
33 | GRAPHENE_BEGIN_DECLS |
34 | |
35 | /** |
36 | * graphene_simd4x4f_t: |
37 | * |
38 | * A SIMD-based matrix type that uses four #graphene_simd4f_t vectors. |
39 | * |
40 | * The matrix is treated as row-major, i.e. the x, y, z, and w vectors |
41 | * are rows, and elements of each vector are a column: |
42 | * |
43 | * |[<!-- language="C" --> |
44 | * graphene_simd4x4f_t = { |
45 | * x.x, x.y, x.z, x.w, |
46 | * y.x, y.y, y.z, y.w, |
47 | * z.x, z.y, z.z, z.w, |
48 | * w.x, w.y, w.z, w.w |
49 | * } |
50 | * ]| |
51 | * |
52 | * The contents of the #graphene_simd4x4f_t type are private and |
53 | * cannot be accessed directly; use the provided API instead. |
54 | * |
55 | * Since: 1.0 |
56 | */ |
57 | |
58 | /** |
59 | * graphene_simd4x4f_init: |
60 | * @x: a #graphene_simd4f_t for the first row |
61 | * @y: a #graphene_simd4f_t for the second row |
62 | * @z: a #graphene_simd4f_t for the third row |
63 | * @w: a #graphene_simd4f_t for the fourth row |
64 | * |
65 | * Creates a new #graphene_simd4x4f_t using the given row vectors |
66 | * to initialize it. |
67 | * |
68 | * Returns: the newly created #graphene_simd4x4f_t |
69 | * |
70 | * Since: 1.0 |
71 | */ |
72 | static inline graphene_simd4x4f_t GRAPHENE_VECTORCALL |
73 | graphene_simd4x4f_init (graphene_simd4f_t x, |
74 | graphene_simd4f_t y, |
75 | graphene_simd4f_t z, |
76 | graphene_simd4f_t w) |
77 | { |
78 | graphene_simd4x4f_t s; |
79 | |
80 | s.x = x; |
81 | s.y = y; |
82 | s.z = z; |
83 | s.w = w; |
84 | |
85 | return s; |
86 | } |
87 | |
88 | /** |
89 | * graphene_simd4x4f_init_identity: |
90 | * @m: a #graphene_simd4x4f_t |
91 | * |
92 | * Initializes @m to be the identity matrix. |
93 | * |
94 | * Since: 1.0 |
95 | */ |
96 | static inline void |
97 | graphene_simd4x4f_init_identity (graphene_simd4x4f_t *m) |
98 | { |
99 | *m = graphene_simd4x4f_init (graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f), |
100 | graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f), |
101 | graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f), |
102 | graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f)); |
103 | } |
104 | |
105 | /** |
106 | * graphene_simd4x4f_init_from_float: |
107 | * @m: a #graphene_simd4x4f_t |
108 | * @f: (array fixed-size=16): an array of 16 floating point values |
109 | * |
110 | * Initializes a #graphene_simd4x4f_t with the given array |
111 | * of floating point values. |
112 | * |
113 | * Since: 1.0 |
114 | */ |
115 | static inline void |
116 | graphene_simd4x4f_init_from_float (graphene_simd4x4f_t *m, |
117 | const float *f) |
118 | { |
119 | m->x = graphene_simd4f_init_4f (f + 0); |
120 | m->y = graphene_simd4f_init_4f (f + 4); |
121 | m->z = graphene_simd4f_init_4f (f + 8); |
122 | m->w = graphene_simd4f_init_4f (f + 12); |
123 | } |
124 | |
125 | /** |
126 | * graphene_simd4x4f_to_float: |
127 | * @m: a #graphene_sidm4x4f_t |
128 | * @v: (out caller-allocates) (array fixed-size=16): a floating |
129 | * point values vector capable of holding at least 16 values |
130 | * |
131 | * Copies the content of @m in a float array. |
132 | * |
133 | * Since: 1.0 |
134 | */ |
135 | static inline void |
136 | graphene_simd4x4f_to_float (const graphene_simd4x4f_t *m, |
137 | float *v) |
138 | { |
139 | graphene_simd4f_dup_4f (m->x, v + 0); |
140 | graphene_simd4f_dup_4f (m->y, v + 4); |
141 | graphene_simd4f_dup_4f (m->z, v + 8); |
142 | graphene_simd4f_dup_4f (m->w, v + 12); |
143 | } |
144 | |
145 | GRAPHENE_AVAILABLE_IN_1_0 |
146 | void graphene_simd4x4f_transpose_in_place (graphene_simd4x4f_t *s); |
147 | |
148 | #if defined(GRAPHENE_USE_SSE) |
149 | |
150 | #ifdef __GNUC__ |
151 | #define graphene_simd4x4f_transpose_in_place(s) \ |
152 | (__extension__ ({ \ |
153 | _MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w); \ |
154 | })) |
155 | #elif defined (_MSC_VER) |
156 | #define graphene_simd4x4f_transpose_in_place(s) \ |
157 | _MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w) |
158 | #endif |
159 | |
160 | #elif defined(GRAPHENE_USE_GCC) |
161 | |
162 | #define graphene_simd4x4f_transpose_in_place(s) \ |
163 | (__extension__ ({ \ |
164 | const graphene_simd4f_t sx = (s)->x; \ |
165 | const graphene_simd4f_t sy = (s)->y; \ |
166 | const graphene_simd4f_t sz = (s)->z; \ |
167 | const graphene_simd4f_t sw = (s)->w; \ |
168 | (s)->x = graphene_simd4f_init (sx[0], sy[0], sz[0], sw[0]); \ |
169 | (s)->y = graphene_simd4f_init (sx[1], sy[1], sz[1], sw[1]); \ |
170 | (s)->z = graphene_simd4f_init (sx[2], sy[2], sz[2], sw[2]); \ |
171 | (s)->w = graphene_simd4f_init (sx[3], sy[3], sz[3], sw[3]); \ |
172 | })) |
173 | |
174 | #elif defined(GRAPHENE_USE_ARM_NEON) |
175 | |
176 | # ifdef __GNUC__ |
177 | |
178 | #define graphene_simd4x4f_transpose_in_place(s) \ |
179 | (__extension__ ({ \ |
180 | const graphene_simd4f_union_t sx = { (s)->x }; \ |
181 | const graphene_simd4f_union_t sy = { (s)->y }; \ |
182 | const graphene_simd4f_union_t sz = { (s)->z }; \ |
183 | const graphene_simd4f_union_t sw = { (s)->w }; \ |
184 | (s)->x = graphene_simd4f_init (sx.f[0], sy.f[0], sz.f[0], sw.f[0]); \ |
185 | (s)->y = graphene_simd4f_init (sx.f[1], sy.f[1], sz.f[1], sw.f[1]); \ |
186 | (s)->z = graphene_simd4f_init (sx.f[2], sy.f[2], sz.f[2], sw.f[2]); \ |
187 | (s)->w = graphene_simd4f_init (sx.f[3], sy.f[3], sz.f[3], sw.f[3]); \ |
188 | })) |
189 | |
190 | # elif defined (_MSC_VER) |
191 | |
192 | #define graphene_simd4x4f_transpose_in_place(s) _simd4x4f_transpose_in_place(s) |
193 | static inline void |
194 | _simd4x4f_transpose_in_place (graphene_simd4x4f_t *s) |
195 | { |
196 | const graphene_simd4f_union_t sx = { (s)->x }; |
197 | const graphene_simd4f_union_t sy = { (s)->y }; |
198 | const graphene_simd4f_union_t sz = { (s)->z }; |
199 | const graphene_simd4f_union_t sw = { (s)->w }; |
200 | (s)->x = graphene_simd4f_init (sx.f[0], sy.f[0], sz.f[0], sw.f[0]); |
201 | (s)->y = graphene_simd4f_init (sx.f[1], sy.f[1], sz.f[1], sw.f[1]); |
202 | (s)->z = graphene_simd4f_init (sx.f[2], sy.f[2], sz.f[2], sw.f[2]); |
203 | (s)->w = graphene_simd4f_init (sx.f[3], sy.f[3], sz.f[3], sw.f[3]); |
204 | } |
205 | |
206 | # endif |
207 | |
208 | #elif defined(GRAPHENE_USE_SCALAR) |
209 | |
210 | #define graphene_simd4x4f_transpose_in_place(s) \ |
211 | (graphene_simd4x4f_transpose_in_place ((graphene_simd4x4f_t *) (s))) |
212 | |
213 | #else |
214 | # error "No implementation for graphene_simd4x4f_t defined." |
215 | #endif |
216 | |
217 | /** |
218 | * graphene_simd4x4f_sum: |
219 | * @a: a #graphene_simd4f_t |
220 | * @res: (out): return location for the sum vector |
221 | * |
222 | * Adds all the row vectors of @a. |
223 | * |
224 | * Since: 1.0 |
225 | */ |
226 | static inline void |
227 | graphene_simd4x4f_sum (const graphene_simd4x4f_t *a, |
228 | graphene_simd4f_t *res) |
229 | { |
230 | graphene_simd4f_t s = graphene_simd4f_add (a->x, a->y); |
231 | s = graphene_simd4f_add (s, a->z); |
232 | s = graphene_simd4f_add (s, a->w); |
233 | *res = s; |
234 | } |
235 | |
236 | /** |
237 | * graphene_simd4x4f_vec4_mul: |
238 | * @a: a #graphene_simd4x4f_t |
239 | * @b: a #graphene_simd4f_t |
240 | * @res: (out): return location for a #graphene_simd4f_t |
241 | * |
242 | * Left multiplies the given #graphene_simd4x4f_t with the given |
243 | * #graphene_simd4f_t row vector using a dot product: |
244 | * |
245 | * |[<!-- language="plain" --> |
246 | * res = b × A |
247 | * |
248 | * = ⎡x⎤ ⎛ x.x x.y x.z x.w ⎞ |
249 | * ⎜y⎟ ⎜ y.x y.y y.z y.w ⎟ |
250 | * ⎜z⎟ ⎜ z.x z.y z.z z.w ⎟ |
251 | * ⎣w⎦ ⎝ w.x w.y w.z w.w ⎠ |
252 | * |
253 | * = [ x.x × x x.y × x x.z × x x.w × x ] |
254 | * + + + + |
255 | * [ y.x × y y.y × y y.z × y y.w × y ] |
256 | * + + + + |
257 | * [ z.x × z z.y × z z.z × z z.w × z ] |
258 | * + + + + |
259 | * [ w.x × w w.y × w w.z × w w.w × w ] |
260 | * |
261 | * = ⎡ x.x × x + y.x × y + z.x × z + w.x × w ⎤ |
262 | * ⎜ x.y × x + y.y × y + z.y × z + w.y × w ⎟ |
263 | * ⎜ x.z × x + y.z × y + z.z × z + w.z × w ⎟ |
264 | * ⎣ x.w × x + y.w × y + z.w × z + w.w × w ⎦ |
265 | * ]| |
266 | * |
267 | * Since: 1.0 |
268 | */ |
269 | static inline void |
270 | graphene_simd4x4f_vec4_mul (const graphene_simd4x4f_t *a, |
271 | const graphene_simd4f_t *b, |
272 | graphene_simd4f_t *res) |
273 | { |
274 | const graphene_simd4f_t v = *b; |
275 | const graphene_simd4f_t v_x = graphene_simd4f_splat_x (v); |
276 | const graphene_simd4f_t v_y = graphene_simd4f_splat_y (v); |
277 | const graphene_simd4f_t v_z = graphene_simd4f_splat_z (v); |
278 | const graphene_simd4f_t v_w = graphene_simd4f_splat_w (v); |
279 | |
280 | *res = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (a->x, v_x), |
281 | graphene_simd4f_mul (a->y, v_y)), |
282 | graphene_simd4f_add (graphene_simd4f_mul (a->z, v_z), |
283 | graphene_simd4f_mul (a->w, v_w))); |
284 | } |
285 | |
286 | /** |
287 | * graphene_simd4x4f_vec3_mul: |
288 | * @m: a #graphene_simd4x4f_t |
289 | * @v: a #graphene_simd4f_t |
290 | * @res: (out): return location for a #graphene_simd4f_t |
291 | * |
292 | * Left multiplies the given #graphene_simd4x4f_t with the given |
293 | * #graphene_simd4f_t, using only the first three row vectors |
294 | * of the matrix, and the first three components of the vector; |
295 | * the W components of the matrix and vector are ignored: |
296 | * |
297 | * |[<!-- language="plain" --> |
298 | * res = b × A |
299 | * |
300 | * = ⎡x⎤ ⎛ x.x x.y x.z ⎞ |
301 | * ⎜y⎟ ⎜ y.x y.y y.z ⎟ |
302 | * ⎣z⎦ ⎝ z.x z.y z.z ⎠ |
303 | * |
304 | * = [ x.x × x x.y × x x.z × x ] |
305 | * + + + |
306 | * [ y.x × y y.y × y y.z × y ] |
307 | * + + + |
308 | * [ z.x × z z.y × z z.z × z ] |
309 | * |
310 | * = ⎡ x.x × x + y.x × y + z.x × z ⎤ |
311 | * ⎜ x.y × x + y.y × y + z.y × z ⎟ |
312 | * ⎜ x.z × x + y.z × y + z.z × z ⎟ |
313 | * ⎣ 0 ⎦ |
314 | * ]| |
315 | * |
316 | * See also: graphene_simd4x4f_vec4_mul(), graphene_simd4x4f_point3_mul() |
317 | * |
318 | * Since: 1.0 |
319 | */ |
320 | static inline void |
321 | graphene_simd4x4f_vec3_mul (const graphene_simd4x4f_t *m, |
322 | const graphene_simd4f_t *v, |
323 | graphene_simd4f_t *res) |
324 | { |
325 | const graphene_simd4f_t v_x = graphene_simd4f_splat_x (*v); |
326 | const graphene_simd4f_t v_y = graphene_simd4f_splat_y (*v); |
327 | const graphene_simd4f_t v_z = graphene_simd4f_splat_z (*v); |
328 | graphene_simd4f_t r; |
329 | |
330 | r = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (m->x, v_x), |
331 | graphene_simd4f_mul (m->y, v_y)), |
332 | graphene_simd4f_mul (m->z, v_z)); |
333 | *res = graphene_simd4f_zero_w (r); |
334 | } |
335 | |
336 | /** |
337 | * graphene_simd4x4f_point3_mul: |
338 | * @m: a #graphene_simd4x4f_t |
339 | * @p: a #graphene_simd4f_t |
340 | * @res: (out): return location for a #graphene_simd4f_t |
341 | * |
342 | * Multiplies the given #graphene_simd4x4f_t with the given |
343 | * #graphene_simd4f_t. |
344 | * |
345 | * Unlike graphene_simd4x4f_vec3_mul(), this function will |
346 | * use the W components of the matrix: |
347 | * |
348 | * |[<!-- language="plain" --> |
349 | * res = b × A |
350 | * |
351 | * = ⎡x⎤ ⎛ x.x x.y x.z x.w ⎞ |
352 | * ⎜y⎟ ⎜ y.x y.y y.z y.w ⎟ |
353 | * ⎜z⎟ ⎜ z.x z.y z.z z.w ⎟ |
354 | * ⎣w⎦ ⎝ w.x w.y w.z w.w ⎠ |
355 | * |
356 | * = [ x.x × x x.y × x x.z × x x.w × x ] |
357 | * + + + + |
358 | * [ y.x × y y.y × y y.z × y y.w × y ] |
359 | * + + + + |
360 | * [ z.x × z z.y × z z.z × z z.w × z ] |
361 | * + + + + |
362 | * [ w.x w.y w.z w.w ] |
363 | * |
364 | * = ⎡ x.x × x + y.x × y + z.x × z + w.x ⎤ |
365 | * ⎜ x.y × x + y.y × y + z.y × z + w.y ⎟ |
366 | * ⎜ x.z × x + y.z × y + z.z × z + w.z ⎟ |
367 | * ⎣ x.w × x + y.w × y + z.w × z + w.w ⎦ |
368 | * ]| |
369 | * |
370 | * Since: 1.0 |
371 | */ |
372 | static inline void |
373 | graphene_simd4x4f_point3_mul (const graphene_simd4x4f_t *m, |
374 | const graphene_simd4f_t *p, |
375 | graphene_simd4f_t *res) |
376 | { |
377 | const graphene_simd4f_t v = *p; |
378 | const graphene_simd4f_t v_x = graphene_simd4f_splat_x (v); |
379 | const graphene_simd4f_t v_y = graphene_simd4f_splat_y (v); |
380 | const graphene_simd4f_t v_z = graphene_simd4f_splat_z (v); |
381 | |
382 | *res = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (m->x, v_x), |
383 | graphene_simd4f_mul (m->y, v_y)), |
384 | graphene_simd4f_add (graphene_simd4f_mul (m->z, v_z), |
385 | m->w)); |
386 | } |
387 | |
388 | /** |
389 | * graphene_simd4x4f_transpose: |
390 | * @s: a #graphene_simd4x4f_t |
391 | * @res: (out): return location for the transposed matrix |
392 | * |
393 | * Transposes the given #graphene_simd4x4f_t. |
394 | * |
395 | * Since: 1.0 |
396 | */ |
397 | static inline void |
398 | graphene_simd4x4f_transpose (const graphene_simd4x4f_t *s, |
399 | graphene_simd4x4f_t *res) |
400 | { |
401 | *res = *s; |
402 | graphene_simd4x4f_transpose_in_place (res); |
403 | } |
404 | |
405 | /** |
406 | * graphene_simd4x4f_inv_ortho_vec3_mul: |
407 | * @a: a #graphene_simd4x4f_t |
408 | * @b: a #graphene_simd4f_t |
409 | * @res: (out): return location for the transformed vector |
410 | * |
411 | * Performs the inverse orthographic transformation of the first |
412 | * three components in the given vector, using the first three |
413 | * row vectors of the given SIMD matrix. |
414 | * |
415 | * Since: 1.0 |
416 | */ |
417 | static inline void |
418 | graphene_simd4x4f_inv_ortho_vec3_mul (const graphene_simd4x4f_t *a, |
419 | const graphene_simd4f_t *b, |
420 | graphene_simd4f_t *res) |
421 | { |
422 | graphene_simd4x4f_t transpose = *a; |
423 | graphene_simd4f_t translation = *b; |
424 | |
425 | transpose.w = graphene_simd4f_init (0.f, 0.f, 0.f, 0.f); |
426 | graphene_simd4x4f_transpose_in_place (&transpose); |
427 | |
428 | graphene_simd4x4f_vec3_mul (m: &transpose, v: &translation, res); |
429 | } |
430 | |
431 | /** |
432 | * graphene_simd4x4f_inv_ortho_point3_mul: |
433 | * @a: a #graphene_simd4x4f_t |
434 | * @b: a #graphene_simd4x4f_t |
435 | * @res: (out): return location for the result vector |
436 | * |
437 | * Performs the inverse orthographic transformation of the first |
438 | * three components in the given vector, using the given SIMD |
439 | * matrix. |
440 | * |
441 | * Unlike graphene_simd4x4f_inv_ortho_vec3_mul(), this function |
442 | * will also use the fourth row vector of the SIMD matrix. |
443 | * |
444 | * Since: 1.0 |
445 | */ |
446 | static inline void |
447 | graphene_simd4x4f_inv_ortho_point3_mul (const graphene_simd4x4f_t *a, |
448 | const graphene_simd4f_t *b, |
449 | graphene_simd4f_t *res) |
450 | { |
451 | graphene_simd4f_t translation = graphene_simd4f_sub (*b, a->w); |
452 | graphene_simd4x4f_t transpose = *a; |
453 | |
454 | transpose.w = graphene_simd4f_init (0.f, 0.f, 0.f, 0.f); |
455 | graphene_simd4x4f_transpose_in_place (&transpose); |
456 | |
457 | graphene_simd4x4f_point3_mul (m: &transpose, p: &translation, res); |
458 | } |
459 | |
460 | /** |
461 | * graphene_simd4x4f_matrix_mul: |
462 | * @a: a #graphene_simd4x4f_t |
463 | * @b: a #graphene_simd4x4f_t |
464 | * @res: (out): return location for the result |
465 | * |
466 | * Multiplies the two matrices, following the convention: |
467 | * |
468 | * |[<!-- language="plain" --> |
469 | * res = A × B |
470 | * |
471 | * = ⎡ A.x × B ⎤ |
472 | * ⎜ A.y × B ⎟ |
473 | * ⎜ A.z × B ⎟ |
474 | * ⎣ A.w × B ⎦ |
475 | * |
476 | * = ⎡ res.x ⎤ |
477 | * ⎜ res.y ⎟ |
478 | * ⎜ res.z ⎟ |
479 | * ⎣ res.w ⎦ |
480 | * ]| |
481 | * |
482 | * See also: graphene_simd4x4f_vec4_mul() |
483 | * |
484 | * Since: 1.0 |
485 | */ |
486 | static inline void |
487 | graphene_simd4x4f_matrix_mul (const graphene_simd4x4f_t *a, |
488 | const graphene_simd4x4f_t *b, |
489 | graphene_simd4x4f_t *res) |
490 | { |
491 | #if 0 |
492 | /* this is the classic naive A*B implementation of the row * column |
493 | * matrix product. using a SIMD scalar implementation, it's fairly |
494 | * slow at 329ns per multiplication; the SSE implementation makes it |
495 | * about 10x faster, at 32ns; the GCC vector implementation is only |
496 | * 5x faster, at 66ns. the biggest culprits are the transpose operation |
497 | * and the multiple, one lane reads to compute the scalar sum. |
498 | */ |
499 | graphene_simd4x4f_t t; |
500 | |
501 | graphene_simd4x4f_transpose (b, &t); |
502 | |
503 | res->x = |
504 | graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.x)), |
505 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.y)), |
506 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.z)), |
507 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.w))); |
508 | |
509 | res->y = |
510 | graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.x)), |
511 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.y)), |
512 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.z)), |
513 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.w))); |
514 | |
515 | res->z = |
516 | graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.x)), |
517 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.y)), |
518 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.z)), |
519 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.w))); |
520 | |
521 | res->w = |
522 | graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.x)), |
523 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.y)), |
524 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.z)), |
525 | graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.w))); |
526 | #else |
527 | /* this is an optimized version of the matrix multiplication, using |
528 | * four dot products for each row vector. this yields drastically |
529 | * better numbers while retaining the same correct results as above: |
530 | * the scalar implementation now clocks at 91ns; the GCC vector |
531 | * implementation is 19ns; and the SSE implementation is 16ns. |
532 | * |
533 | * the order is correct if we want to multiply A with B; remember |
534 | * that matrix multiplication is non-commutative. |
535 | */ |
536 | graphene_simd4f_t x, y, z, w; |
537 | |
538 | graphene_simd4x4f_vec4_mul (a: b, b: &a->x, res: &x); |
539 | graphene_simd4x4f_vec4_mul (a: b, b: &a->y, res: &y); |
540 | graphene_simd4x4f_vec4_mul (a: b, b: &a->z, res: &z); |
541 | graphene_simd4x4f_vec4_mul (a: b, b: &a->w, res: &w); |
542 | |
543 | *res = graphene_simd4x4f_init (x, y, z, w); |
544 | #endif |
545 | } |
546 | |
547 | /** |
548 | * graphene_simd4x4f_init_perspective: |
549 | * @m: a #graphene_simd4x4f_t |
550 | * @fovy_rad: the angle of the field of vision, in radians |
551 | * @aspect: the aspect value |
552 | * @z_near: the depth of the near clipping plane |
553 | * @z_far: the depth of the far clipping plane |
554 | * |
555 | * Initializes a #graphene_simd4x4f_t with a perspective projection. |
556 | * |
557 | * Since: 1.0 |
558 | */ |
559 | static inline void |
560 | graphene_simd4x4f_init_perspective (graphene_simd4x4f_t *m, |
561 | float fovy_rad, |
562 | float aspect, |
563 | float z_near, |
564 | float z_far) |
565 | { |
566 | float delta_z = z_far - z_near; |
567 | float cotangent = tanf (GRAPHENE_PI_2 - fovy_rad * 0.5f); |
568 | |
569 | float a = cotangent / aspect; |
570 | float b = cotangent; |
571 | float c = -(z_far + z_near) / delta_z; |
572 | float d = -2 * z_near * z_far / delta_z; |
573 | |
574 | m->x = graphene_simd4f_init ( a, 0.0f, 0.0f, 0.0f); |
575 | m->y = graphene_simd4f_init (0.0f, b, 0.0f, 0.0f); |
576 | m->z = graphene_simd4f_init (0.0f, 0.0f, c, -1.0f); |
577 | m->w = graphene_simd4f_init (0.0f, 0.0f, d, 0.0f); |
578 | } |
579 | |
580 | /** |
581 | * graphene_simd4x4f_init_ortho: |
582 | * @m: a #graphene_simd4x4f_t |
583 | * @left: edge of the left clipping plane |
584 | * @right: edge of the right clipping plane |
585 | * @bottom: edge of the bottom clipping plane |
586 | * @top: edge of the top clipping plane |
587 | * @z_near: depth of the near clipping plane |
588 | * @z_far: depth of the far clipping plane |
589 | * |
590 | * Initializes the given SIMD matrix with an orthographic projection. |
591 | * |
592 | * Since: 1.0 |
593 | */ |
594 | static inline void |
595 | graphene_simd4x4f_init_ortho (graphene_simd4x4f_t *m, |
596 | float left, |
597 | float right, |
598 | float bottom, |
599 | float top, |
600 | float z_near, |
601 | float z_far) |
602 | { |
603 | float delta_x = right - left; |
604 | float delta_y = top - bottom; |
605 | float delta_z = z_far - z_near; |
606 | |
607 | float a = 2.0f / delta_x; |
608 | float b = -(right + left) / delta_x; |
609 | float c = 2.0f / delta_y; |
610 | float d = -(top + bottom) / delta_y; |
611 | float e = -2.0f / delta_z; |
612 | float f = -(z_far + z_near) / delta_z; |
613 | |
614 | m->x = graphene_simd4f_init ( a, 0.0f, 0.0f, 0.0f); |
615 | m->y = graphene_simd4f_init (0.0f, c, 0.0f, 0.0f); |
616 | m->z = graphene_simd4f_init (0.0f, 0.0f, e, 0.0f); |
617 | m->w = graphene_simd4f_init ( b, d, f, 1.0f); |
618 | } |
619 | |
620 | /** |
621 | * graphene_simd4x4f_init_look_at: |
622 | * @m: a #graphene_simd4x4f_t |
623 | * @eye: vector for the camera coordinates |
624 | * @center: vector for the object coordinates |
625 | * @up: vector for the upwards direction |
626 | * |
627 | * Initializes a SIMD matrix with the projection necessary for |
628 | * the camera at the @eye coordinates to look at the object at |
629 | * the @center coordinates. The top of the camera is aligned to |
630 | * the @up vector. |
631 | * |
632 | * Since: 1.0 |
633 | */ |
634 | static inline void |
635 | graphene_simd4x4f_init_look_at (graphene_simd4x4f_t *m, |
636 | graphene_simd4f_t eye, |
637 | graphene_simd4f_t center, |
638 | graphene_simd4f_t up) |
639 | { |
640 | const graphene_simd4f_t direction = graphene_simd4f_sub (center, eye); |
641 | graphene_simd4f_t cross; |
642 | graphene_simd4f_t z_axis; |
643 | graphene_simd4f_t x_axis; |
644 | graphene_simd4f_t y_axis; |
645 | float eye_v[4]; |
646 | |
647 | if (graphene_simd4f_get_x (graphene_simd4f_dot3 (direction, direction)) < FLT_EPSILON) |
648 | /* eye and center are in the same position */ |
649 | z_axis = graphene_simd4f_init (0, 0, 1, 0); |
650 | else |
651 | z_axis = graphene_simd4f_normalize3 (v: direction); |
652 | |
653 | cross = graphene_simd4f_cross3 (z_axis, up); |
654 | if (graphene_simd4f_get_x (graphene_simd4f_dot3 (cross, cross)) < FLT_EPSILON) |
655 | { |
656 | graphene_simd4f_t tweak_z; |
657 | |
658 | /* up and z_axis are parallel */ |
659 | if (fabs (graphene_simd4f_get_z (up) - 1.0) < FLT_EPSILON) |
660 | tweak_z = graphene_simd4f_init (0.0001f, 0, 0, 0); |
661 | else |
662 | tweak_z = graphene_simd4f_init (0, 0, 0.0001f, 0); |
663 | |
664 | z_axis = graphene_simd4f_add (z_axis, tweak_z); |
665 | z_axis = graphene_simd4f_normalize3 (v: z_axis); |
666 | cross = graphene_simd4f_cross3 (z_axis, up); |
667 | } |
668 | |
669 | x_axis = graphene_simd4f_normalize3 (v: cross); |
670 | y_axis = graphene_simd4f_cross3 (x_axis, z_axis); |
671 | |
672 | graphene_simd4f_dup_4f (eye, eye_v); |
673 | |
674 | m->x = x_axis; |
675 | m->y = y_axis; |
676 | m->z = graphene_simd4f_neg (z_axis); |
677 | m->w = graphene_simd4f_init (-eye_v[0], -eye_v[1], -eye_v[2], 1.f); |
678 | } |
679 | |
680 | /** |
681 | * graphene_simd4x4f_init_frustum: |
682 | * @m: a #graphene_simd4x4f_t |
683 | * @left: distance of the left clipping plane |
684 | * @right: distance of the right clipping plane |
685 | * @bottom: distance of the bottom clipping plane |
686 | * @top: distance of the top clipping plane |
687 | * @z_near: distance of the near clipping plane |
688 | * @z_far: distance of the far clipping plane |
689 | * |
690 | * Initializes a SIMD matrix with a frustum described by the distances |
691 | * of six clipping planes. |
692 | * |
693 | * Since: 1.2 |
694 | */ |
695 | static inline void |
696 | graphene_simd4x4f_init_frustum (graphene_simd4x4f_t *m, |
697 | float left, |
698 | float right, |
699 | float bottom, |
700 | float top, |
701 | float z_near, |
702 | float z_far) |
703 | { |
704 | float x = 2.f * z_near / (right - left); |
705 | float y = 2.f * z_near / (top - bottom); |
706 | |
707 | float a = (right + left) / (right - left); |
708 | float b = (top + bottom) / (top - bottom); |
709 | float c = -1.f * (z_far + z_near) / (z_far - z_near); |
710 | float d = -2.f * z_far * z_near / (z_far - z_near); |
711 | |
712 | m->x = graphene_simd4f_init ( x, 0.f, 0.f, 0.f); |
713 | m->y = graphene_simd4f_init (0.f, y, 0.f, 0.f); |
714 | m->z = graphene_simd4f_init ( a, b, c, -1.f); |
715 | m->w = graphene_simd4f_init (0.f, 0.f, d, 0.f); |
716 | } |
717 | |
718 | /** |
719 | * graphene_simd4x4f_perspective: |
720 | * @m: a #graphene_simd4x4f_t |
721 | * @depth: depth of the perspective |
722 | * |
723 | * Adds a perspective transformation for the given @depth. |
724 | * |
725 | * Since: 1.0 |
726 | */ |
727 | static inline void |
728 | graphene_simd4x4f_perspective (graphene_simd4x4f_t *m, |
729 | float depth) |
730 | { |
731 | #if 1 |
732 | const float m_xw = graphene_simd4f_get_w (m->x); |
733 | const float m_yw = graphene_simd4f_get_w (m->y); |
734 | const float m_zw = graphene_simd4f_get_w (m->z); |
735 | const float m_ww = graphene_simd4f_get_w (m->w); |
736 | |
737 | const float p0 = graphene_simd4f_get_z (m->x) + -1.0f / depth * m_xw; |
738 | const float p1 = graphene_simd4f_get_z (m->y) + -1.0f / depth * m_yw; |
739 | const float p2 = graphene_simd4f_get_z (m->z) + -1.0f / depth * m_zw; |
740 | const float p3 = graphene_simd4f_get_z (m->w) + -1.0f / depth * m_ww; |
741 | |
742 | const graphene_simd4f_t p_x = graphene_simd4f_merge_w (m->x, m_xw + p0); |
743 | const graphene_simd4f_t p_y = graphene_simd4f_merge_w (m->y, m_yw + p1); |
744 | const graphene_simd4f_t p_z = graphene_simd4f_merge_w (m->z, m_zw + p2); |
745 | const graphene_simd4f_t p_w = graphene_simd4f_merge_w (m->w, m_ww + p3); |
746 | #else |
747 | /* this is equivalent to the operations above, but trying to inline |
748 | * them into SIMD registers as much as possible by transposing the |
749 | * original matrix and operating on the resulting column vectors. it |
750 | * should warrant a micro benchmark, because while the above code is |
751 | * dominated by single channel reads, the code below has a transpose |
752 | * operation. |
753 | */ |
754 | graphene_simd4x4f_t t; |
755 | const graphene_simd4f_t f, p; |
756 | const graphene_simd4f_t p_x, p_y, p_z, p_w; |
757 | |
758 | graphene_simd4x4f_transpose (m, &t); |
759 | |
760 | f = graphene_simd4f_neg (graphene_simd4f_reciprocal (graphene_simd4f_splat (depth))); |
761 | p = graphene_simd4f_sum (t.w, graphene_simd4f_sum (t.z, graphene_simd4f_mul (f, t.w))); |
762 | p_x = graphene_simd4f_merge_w (m->x, graphene_simd4f_get_x (p)); |
763 | p_y = graphene_simd4f_merge_w (m->y, graphene_simd4f_get_y (p)); |
764 | p_z = graphene_simd4f_merge_w (m->z, graphene_simd4f_get_z (p)); |
765 | p_w = graphene_simd4f_merge_w (m->w, graphene_simd4f_get_w (p)); |
766 | #endif |
767 | |
768 | *m = graphene_simd4x4f_init (x: p_x, y: p_y, z: p_z, w: p_w); |
769 | } |
770 | |
771 | /** |
772 | * graphene_simd4x4f_translation: |
773 | * @m: a #graphene_simd4x4f_t |
774 | * @x: coordinate of the X translation |
775 | * @y: coordinate of the Y translation |
776 | * @z: coordinate of the Z translation |
777 | * |
778 | * Initializes @m to contain a translation to the given coordinates. |
779 | * |
780 | * Since: 1.0 |
781 | */ |
782 | static inline void |
783 | graphene_simd4x4f_translation (graphene_simd4x4f_t *m, |
784 | float x, |
785 | float y, |
786 | float z) |
787 | { |
788 | *m = graphene_simd4x4f_init (graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f), |
789 | graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f), |
790 | graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f), |
791 | graphene_simd4f_init ( x, y, z, 1.0f)); |
792 | } |
793 | |
794 | /** |
795 | * graphene_simd4x4f_scale: |
796 | * @m: a #graphene_simd4x4f_t |
797 | * @x: scaling factor on the X axis |
798 | * @y: scaling factor on the Y axis |
799 | * @z: scaling factor on the Z axis |
800 | * |
801 | * Initializes @m to contain a scaling transformation with the |
802 | * given factors. |
803 | * |
804 | * Since: 1.0 |
805 | */ |
806 | static inline void |
807 | graphene_simd4x4f_scale (graphene_simd4x4f_t *m, |
808 | float x, |
809 | float y, |
810 | float z) |
811 | { |
812 | *m = graphene_simd4x4f_init (graphene_simd4f_init ( x, 0.0f, 0.0f, 0.0f), |
813 | graphene_simd4f_init (0.0f, y, 0.0f, 0.0f), |
814 | graphene_simd4f_init (0.0f, 0.0f, z, 0.0f), |
815 | graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f)); |
816 | |
817 | } |
818 | |
819 | /** |
820 | * graphene_simd4x4f_rotation: |
821 | * @m: a #graphene_simd4x4f_t |
822 | * @rad: the rotation, in radians |
823 | * @axis: the vector of the axis of rotation |
824 | * |
825 | * Initializes @m to contain a rotation of the given angle |
826 | * along the given axis. |
827 | * |
828 | * Since: 1.0 |
829 | */ |
830 | static inline void |
831 | graphene_simd4x4f_rotation (graphene_simd4x4f_t *m, |
832 | float rad, |
833 | graphene_simd4f_t axis) |
834 | { |
835 | float sine, cosine; |
836 | float x, y, z; |
837 | float ab, bc, ca; |
838 | float tx, ty, tz; |
839 | graphene_simd4f_t i, j, k; |
840 | |
841 | rad = -rad; |
842 | axis = graphene_simd4f_normalize3 (v: axis); |
843 | |
844 | /* We cannot use graphene_sincos() because it's a private function, whereas |
845 | * graphene-simd4x4f.h is a public header |
846 | */ |
847 | sine = sinf (x: rad); |
848 | cosine = cosf (x: rad); |
849 | |
850 | x = graphene_simd4f_get_x (axis); |
851 | y = graphene_simd4f_get_y (axis); |
852 | z = graphene_simd4f_get_z (axis); |
853 | |
854 | ab = x * y * (1.0f - cosine); |
855 | bc = y * z * (1.0f - cosine); |
856 | ca = z * x * (1.0f - cosine); |
857 | |
858 | tx = x * x; |
859 | ty = y * y; |
860 | tz = z * z; |
861 | |
862 | i = graphene_simd4f_init (tx + cosine * (1.0f - tx), ab - z * sine, ca + y * sine, 0.f); |
863 | j = graphene_simd4f_init (ab + z * sine, ty + cosine * (1.0f - ty), bc - x * sine, 0.f); |
864 | k = graphene_simd4f_init (ca - y * sine, bc + x * sine, tz + cosine * (1.0f - tz), 0.f); |
865 | |
866 | *m = graphene_simd4x4f_init (x: i, y: j, z: k, graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f)); |
867 | } |
868 | |
869 | /** |
870 | * graphene_simd4x4f_add: |
871 | * @a: a #graphene_simd4x4f_t |
872 | * @b: a #graphene_simd4x4f_t |
873 | * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t |
874 | * |
875 | * Adds each row vector of @a and @b and places the results in @res. |
876 | * |
877 | * Since: 1.0 |
878 | */ |
879 | static inline void |
880 | graphene_simd4x4f_add (const graphene_simd4x4f_t *a, |
881 | const graphene_simd4x4f_t *b, |
882 | graphene_simd4x4f_t *res) |
883 | { |
884 | res->x = graphene_simd4f_add (a->x, b->x); |
885 | res->y = graphene_simd4f_add (a->y, b->y); |
886 | res->z = graphene_simd4f_add (a->z, b->z); |
887 | res->w = graphene_simd4f_add (a->w, b->w); |
888 | } |
889 | |
890 | /** |
891 | * graphene_simd4x4f_sub: |
892 | * @a: a #graphene_simd4x4f_t |
893 | * @b: a #graphene_simd4x4f_t |
894 | * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t |
895 | * |
896 | * Subtracts each row vector of @a and @b and places the results in @res. |
897 | * |
898 | * Since: 1.0 |
899 | */ |
900 | static inline void |
901 | graphene_simd4x4f_sub (const graphene_simd4x4f_t *a, |
902 | const graphene_simd4x4f_t *b, |
903 | graphene_simd4x4f_t *res) |
904 | { |
905 | res->x = graphene_simd4f_sub (a->x, b->x); |
906 | res->y = graphene_simd4f_sub (a->y, b->y); |
907 | res->z = graphene_simd4f_sub (a->z, b->z); |
908 | res->w = graphene_simd4f_sub (a->w, b->w); |
909 | } |
910 | |
911 | /** |
912 | * graphene_simd4x4f_mul: |
913 | * @a: a #graphene_simd4x4f_t |
914 | * @b: a #graphene_simd4x4f_t |
915 | * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t |
916 | * |
917 | * Multiplies each row vector of @a and @b and places the results in @res. |
918 | * |
919 | * You most likely want graphene_simd4x4f_matrix_mul() instead. |
920 | * |
921 | * Since: 1.0 |
922 | */ |
923 | static inline void |
924 | graphene_simd4x4f_mul (const graphene_simd4x4f_t *a, |
925 | const graphene_simd4x4f_t *b, |
926 | graphene_simd4x4f_t *res) |
927 | { |
928 | res->x = graphene_simd4f_mul (a->x, b->x); |
929 | res->y = graphene_simd4f_mul (a->y, b->y); |
930 | res->z = graphene_simd4f_mul (a->z, b->z); |
931 | res->w = graphene_simd4f_mul (a->w, b->w); |
932 | } |
933 | |
934 | /** |
935 | * graphene_simd4x4f_div: |
936 | * @a: a #graphene_simd4x4f_t |
937 | * @b: a #graphene_simd4x4f_t |
938 | * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t |
939 | * |
940 | * Divides each row vector of @a and @b and places the results in @res. |
941 | * |
942 | * Since: 1.0 |
943 | */ |
944 | static inline void |
945 | graphene_simd4x4f_div (const graphene_simd4x4f_t *a, |
946 | const graphene_simd4x4f_t *b, |
947 | graphene_simd4x4f_t *res) |
948 | { |
949 | res->x = graphene_simd4f_div (a->x, b->x); |
950 | res->y = graphene_simd4f_div (a->y, b->y); |
951 | res->z = graphene_simd4f_div (a->z, b->z); |
952 | res->w = graphene_simd4f_div (a->w, b->w); |
953 | } |
954 | |
955 | /** |
956 | * graphene_simd4x4f_inverse: |
957 | * @m: a #graphene_simd4x4f_t |
958 | * @res: (out): return location for the inverse matrix |
959 | * |
960 | * Inverts the given #graphene_simd4x4f_t. |
961 | * |
962 | * Returns: `true` if the matrix was invertible |
963 | * |
964 | * Since: 1.0 |
965 | */ |
966 | static inline bool |
967 | graphene_simd4x4f_inverse (const graphene_simd4x4f_t *m, |
968 | graphene_simd4x4f_t *res) |
969 | { |
970 | /* split rows */ |
971 | const graphene_simd4f_t r0 = m->x; |
972 | const graphene_simd4f_t r1 = m->y; |
973 | const graphene_simd4f_t r2 = m->z; |
974 | const graphene_simd4f_t r3 = m->w; |
975 | |
976 | /* cofactors */ |
977 | const graphene_simd4f_t r0_wxyz = graphene_simd4f_shuffle_wxyz (r0); |
978 | const graphene_simd4f_t r0_zwxy = graphene_simd4f_shuffle_zwxy (r0); |
979 | const graphene_simd4f_t r0_yzwx = graphene_simd4f_shuffle_yzwx (r0); |
980 | |
981 | const graphene_simd4f_t r1_wxyz = graphene_simd4f_shuffle_wxyz (r1); |
982 | const graphene_simd4f_t r1_zwxy = graphene_simd4f_shuffle_zwxy (r1); |
983 | const graphene_simd4f_t r1_yzwx = graphene_simd4f_shuffle_yzwx (r1); |
984 | |
985 | const graphene_simd4f_t r2_wxyz = graphene_simd4f_shuffle_wxyz (r2); |
986 | const graphene_simd4f_t r2_zwxy = graphene_simd4f_shuffle_zwxy (r2); |
987 | const graphene_simd4f_t r2_yzwx = graphene_simd4f_shuffle_yzwx (r2); |
988 | |
989 | const graphene_simd4f_t r3_wxyz = graphene_simd4f_shuffle_wxyz (r3); |
990 | const graphene_simd4f_t r3_zwxy = graphene_simd4f_shuffle_zwxy (r3); |
991 | const graphene_simd4f_t r3_yzwx = graphene_simd4f_shuffle_yzwx (r3); |
992 | |
993 | const graphene_simd4f_t r0_wxyz_x_r1 = graphene_simd4f_mul (r0_wxyz, r1); |
994 | const graphene_simd4f_t r0_wxyz_x_r1_yzwx = graphene_simd4f_mul (r0_wxyz, r1_yzwx); |
995 | const graphene_simd4f_t r0_wxyz_x_r1_zwxy = graphene_simd4f_mul (r0_wxyz, r1_zwxy); |
996 | |
997 | const graphene_simd4f_t r2_wxyz_x_r3 = graphene_simd4f_mul (r2_wxyz, r3); |
998 | const graphene_simd4f_t r2_wxyz_x_r3_yzwx = graphene_simd4f_mul (r2_wxyz, r3_yzwx); |
999 | const graphene_simd4f_t r2_wxyz_x_r3_zwxy = graphene_simd4f_mul (r2_wxyz, r3_zwxy); |
1000 | |
1001 | const graphene_simd4f_t ar1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3_zwxy), |
1002 | graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3)); |
1003 | const graphene_simd4f_t ar2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3_yzwx), |
1004 | r2_wxyz_x_r3_yzwx); |
1005 | const graphene_simd4f_t ar3 = graphene_simd4f_sub (r2_wxyz_x_r3_zwxy, |
1006 | graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3)); |
1007 | |
1008 | const graphene_simd4f_t br1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r0_wxyz_x_r1_zwxy), |
1009 | graphene_simd4f_shuffle_zwxy (r0_wxyz_x_r1)); |
1010 | const graphene_simd4f_t br2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r0_wxyz_x_r1_yzwx), |
1011 | r0_wxyz_x_r1_yzwx); |
1012 | const graphene_simd4f_t br3 = graphene_simd4f_sub (r0_wxyz_x_r1_zwxy, |
1013 | graphene_simd4f_shuffle_wxyz (r0_wxyz_x_r1)); |
1014 | |
1015 | const graphene_simd4f_t r0_sum = |
1016 | graphene_simd4f_madd (m1: r0_yzwx, m2: ar3, |
1017 | a: graphene_simd4f_madd (m1: r0_zwxy, m2: ar2, |
1018 | graphene_simd4f_mul (r0_wxyz, ar1))); |
1019 | const graphene_simd4f_t r1_sum = |
1020 | graphene_simd4f_madd (m1: r1_wxyz, m2: ar1, |
1021 | a: graphene_simd4f_madd (m1: r1_zwxy, m2: ar2, |
1022 | graphene_simd4f_mul (r1_yzwx, ar3))); |
1023 | const graphene_simd4f_t r2_sum = |
1024 | graphene_simd4f_madd (m1: r2_yzwx, m2: br3, |
1025 | a: graphene_simd4f_madd (m1: r2_zwxy, m2: br2, |
1026 | graphene_simd4f_mul (r2_wxyz, br1))); |
1027 | const graphene_simd4f_t r3_sum = |
1028 | graphene_simd4f_madd (m1: r3_yzwx, m2: br3, |
1029 | a: graphene_simd4f_madd (m1: r3_zwxy, m2: br2, |
1030 | graphene_simd4f_mul (r3_wxyz, br1))); |
1031 | |
1032 | /* determinant and its inverse */ |
1033 | const graphene_simd4f_t d0 = graphene_simd4f_mul (r1_sum, r0); |
1034 | const graphene_simd4f_t d1 = graphene_simd4f_add (d0, graphene_simd4f_merge_high (d0, d0)); |
1035 | const graphene_simd4f_t det = graphene_simd4f_sub (d1, graphene_simd4f_splat_y (d1)); |
1036 | if (fabsf (graphene_simd4f_get_x (det)) >= FLT_EPSILON) |
1037 | { |
1038 | const graphene_simd4f_t invdet = graphene_simd4f_splat_x (graphene_simd4f_div (graphene_simd4f_splat (1.0f), det)); |
1039 | |
1040 | const graphene_simd4f_t o0 = graphene_simd4f_mul (graphene_simd4f_flip_sign_0101 (r1_sum), invdet); |
1041 | const graphene_simd4f_t o1 = graphene_simd4f_mul (graphene_simd4f_flip_sign_1010 (r0_sum), invdet); |
1042 | const graphene_simd4f_t o2 = graphene_simd4f_mul (graphene_simd4f_flip_sign_0101 (r3_sum), invdet); |
1043 | const graphene_simd4f_t o3 = graphene_simd4f_mul (graphene_simd4f_flip_sign_1010 (r2_sum), invdet); |
1044 | |
1045 | graphene_simd4x4f_t mt = graphene_simd4x4f_init (x: o0, y: o1, z: o2, w: o3); |
1046 | |
1047 | /* transpose the resulting matrix */ |
1048 | graphene_simd4x4f_transpose (s: &mt, res); |
1049 | |
1050 | return true; |
1051 | } |
1052 | |
1053 | return false; |
1054 | } |
1055 | |
1056 | /** |
1057 | * graphene_simd4x4f_determinant: |
1058 | * @m: a #graphene_simd4x4f_t |
1059 | * @det_r: (out): return location for the matrix determinant |
1060 | * @invdet_r: (out): return location for the inverse of the matrix |
1061 | * determinant |
1062 | * |
1063 | * Computes the determinant (and its inverse) of the given matrix |
1064 | * |
1065 | * Since: 1.0 |
1066 | */ |
1067 | static inline void |
1068 | graphene_simd4x4f_determinant (const graphene_simd4x4f_t *m, |
1069 | graphene_simd4f_t *det_r, |
1070 | graphene_simd4f_t *invdet_r) |
1071 | { |
1072 | /* split rows */ |
1073 | const graphene_simd4f_t r0 = m->x; |
1074 | const graphene_simd4f_t r1 = m->y; |
1075 | const graphene_simd4f_t r2 = m->z; |
1076 | const graphene_simd4f_t r3 = m->w; |
1077 | |
1078 | /* cofactors */ |
1079 | const graphene_simd4f_t r1_wxyz = graphene_simd4f_shuffle_wxyz (r1); |
1080 | const graphene_simd4f_t r1_zwxy = graphene_simd4f_shuffle_zwxy (r1); |
1081 | const graphene_simd4f_t r1_yzwx = graphene_simd4f_shuffle_yzwx (r1); |
1082 | |
1083 | const graphene_simd4f_t r2_wxyz = graphene_simd4f_shuffle_wxyz (r2); |
1084 | |
1085 | const graphene_simd4f_t r3_zwxy = graphene_simd4f_shuffle_zwxy (r3); |
1086 | const graphene_simd4f_t r3_yzwx = graphene_simd4f_shuffle_yzwx (r3); |
1087 | |
1088 | const graphene_simd4f_t r2_wxyz_x_r3 = graphene_simd4f_mul (r2_wxyz, r3); |
1089 | const graphene_simd4f_t r2_wxyz_x_r3_yzwx = graphene_simd4f_mul (r2_wxyz, r3_yzwx); |
1090 | const graphene_simd4f_t r2_wxyz_x_r3_zwxy = graphene_simd4f_mul (r2_wxyz, r3_zwxy); |
1091 | |
1092 | const graphene_simd4f_t ar1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3_zwxy), |
1093 | graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3)); |
1094 | const graphene_simd4f_t ar2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3_yzwx), |
1095 | r2_wxyz_x_r3_yzwx); |
1096 | const graphene_simd4f_t ar3 = graphene_simd4f_sub (r2_wxyz_x_r3_zwxy, |
1097 | graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3)); |
1098 | |
1099 | const graphene_simd4f_t r1_sum = |
1100 | graphene_simd4f_madd (m1: r1_wxyz, m2: ar1, |
1101 | a: graphene_simd4f_madd (m1: r1_zwxy, m2: ar2, |
1102 | graphene_simd4f_mul (r1_yzwx, ar3))); |
1103 | |
1104 | /* determinant and its inverse */ |
1105 | const graphene_simd4f_t d0 = graphene_simd4f_mul (r1_sum, r0); |
1106 | const graphene_simd4f_t d1 = graphene_simd4f_add (d0, graphene_simd4f_merge_high (d0, d0)); |
1107 | |
1108 | const graphene_simd4f_t det = graphene_simd4f_sub (d1, graphene_simd4f_splat_y (d1)); |
1109 | |
1110 | const graphene_simd4f_t invdet = graphene_simd4f_splat_x (graphene_simd4f_div (graphene_simd4f_splat (1.0f), det)); |
1111 | |
1112 | if (det_r != NULL) |
1113 | *det_r = det; |
1114 | |
1115 | if (invdet_r != NULL) |
1116 | *invdet_r = invdet; |
1117 | } |
1118 | |
1119 | /** |
1120 | * graphene_simd4x4f_is_identity: |
1121 | * @m: a #graphene_simd4x4f_t |
1122 | * |
1123 | * Checks whether the given matrix is the identity matrix. |
1124 | * |
1125 | * Returns: `true` if the matrix is the identity matrix |
1126 | * |
1127 | * Since: 1.0 |
1128 | */ |
1129 | static inline bool |
1130 | graphene_simd4x4f_is_identity (const graphene_simd4x4f_t *m) |
1131 | { |
1132 | const graphene_simd4f_t r0 = graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f); |
1133 | const graphene_simd4f_t r1 = graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f); |
1134 | const graphene_simd4f_t r2 = graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f); |
1135 | const graphene_simd4f_t r3 = graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f); |
1136 | |
1137 | return graphene_simd4f_cmp_eq (m->x, r0) && |
1138 | graphene_simd4f_cmp_eq (m->y, r1) && |
1139 | graphene_simd4f_cmp_eq (m->z, r2) && |
1140 | graphene_simd4f_cmp_eq (m->w, r3); |
1141 | } |
1142 | |
1143 | /** |
1144 | * graphene_simd4x4f_is_2d: |
1145 | * @m: a #graphene_simd4x4f_t |
1146 | * |
1147 | * Checks whether the given matrix is compatible with an affine |
1148 | * transformation matrix. |
1149 | * |
1150 | * Returns: `true` if the matrix is compatible with an affine |
1151 | * transformation matrix |
1152 | * |
1153 | * Since: 1.0 |
1154 | */ |
1155 | static inline bool |
1156 | graphene_simd4x4f_is_2d (const graphene_simd4x4f_t *m) |
1157 | { |
1158 | float f[4]; |
1159 | |
1160 | if (!(fabsf (graphene_simd4f_get_z (m->x)) < FLT_EPSILON && fabsf (graphene_simd4f_get_w (m->x)) < FLT_EPSILON)) |
1161 | return false; |
1162 | |
1163 | if (!(fabsf (graphene_simd4f_get_z (m->y)) < FLT_EPSILON && fabsf (graphene_simd4f_get_w (m->y)) < FLT_EPSILON)) |
1164 | return false; |
1165 | |
1166 | graphene_simd4f_dup_4f (m->z, f); |
1167 | if (!(fabsf (x: f[0]) < FLT_EPSILON && |
1168 | fabsf (x: f[1]) < FLT_EPSILON && |
1169 | 1.f - fabsf (x: f[2]) < FLT_EPSILON && |
1170 | fabsf (x: f[3]) < FLT_EPSILON)) |
1171 | return false; |
1172 | |
1173 | if (!(fabsf (graphene_simd4f_get_z (m->w)) < FLT_EPSILON && 1.f - fabsf (graphene_simd4f_get_w (m->w)) < FLT_EPSILON)) |
1174 | return false; |
1175 | |
1176 | return true; |
1177 | } |
1178 | |
1179 | GRAPHENE_END_DECLS |
1180 | |