1/* graphene-simd4x4f.h: 4x4 float vector operations
2 *
3 * SPDX-License-Identifier: MIT
4 *
5 * Copyright 2014 Emmanuele Bassi
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26#pragma once
27
28#include "graphene-simd4f.h"
29
30#include <math.h>
31#include <float.h>
32
33GRAPHENE_BEGIN_DECLS
34
35/**
36 * graphene_simd4x4f_t:
37 *
38 * A SIMD-based matrix type that uses four #graphene_simd4f_t vectors.
39 *
40 * The matrix is treated as row-major, i.e. the x, y, z, and w vectors
41 * are rows, and elements of each vector are a column:
42 *
43 * |[<!-- language="C" -->
44 * graphene_simd4x4f_t = {
45 * x.x, x.y, x.z, x.w,
46 * y.x, y.y, y.z, y.w,
47 * z.x, z.y, z.z, z.w,
48 * w.x, w.y, w.z, w.w
49 * }
50 * ]|
51 *
52 * The contents of the #graphene_simd4x4f_t type are private and
53 * cannot be accessed directly; use the provided API instead.
54 *
55 * Since: 1.0
56 */
57
58/**
59 * graphene_simd4x4f_init:
60 * @x: a #graphene_simd4f_t for the first row
61 * @y: a #graphene_simd4f_t for the second row
62 * @z: a #graphene_simd4f_t for the third row
63 * @w: a #graphene_simd4f_t for the fourth row
64 *
65 * Creates a new #graphene_simd4x4f_t using the given row vectors
66 * to initialize it.
67 *
68 * Returns: the newly created #graphene_simd4x4f_t
69 *
70 * Since: 1.0
71 */
72static inline graphene_simd4x4f_t GRAPHENE_VECTORCALL
73graphene_simd4x4f_init (graphene_simd4f_t x,
74 graphene_simd4f_t y,
75 graphene_simd4f_t z,
76 graphene_simd4f_t w)
77{
78 graphene_simd4x4f_t s;
79
80 s.x = x;
81 s.y = y;
82 s.z = z;
83 s.w = w;
84
85 return s;
86}
87
88/**
89 * graphene_simd4x4f_init_identity:
90 * @m: a #graphene_simd4x4f_t
91 *
92 * Initializes @m to be the identity matrix.
93 *
94 * Since: 1.0
95 */
96static inline void
97graphene_simd4x4f_init_identity (graphene_simd4x4f_t *m)
98{
99 *m = graphene_simd4x4f_init (graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f),
100 graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f),
101 graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f),
102 graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f));
103}
104
105/**
106 * graphene_simd4x4f_init_from_float:
107 * @m: a #graphene_simd4x4f_t
108 * @f: (array fixed-size=16): an array of 16 floating point values
109 *
110 * Initializes a #graphene_simd4x4f_t with the given array
111 * of floating point values.
112 *
113 * Since: 1.0
114 */
115static inline void
116graphene_simd4x4f_init_from_float (graphene_simd4x4f_t *m,
117 const float *f)
118{
119 m->x = graphene_simd4f_init_4f (f + 0);
120 m->y = graphene_simd4f_init_4f (f + 4);
121 m->z = graphene_simd4f_init_4f (f + 8);
122 m->w = graphene_simd4f_init_4f (f + 12);
123}
124
125/**
126 * graphene_simd4x4f_to_float:
127 * @m: a #graphene_sidm4x4f_t
128 * @v: (out caller-allocates) (array fixed-size=16): a floating
129 * point values vector capable of holding at least 16 values
130 *
131 * Copies the content of @m in a float array.
132 *
133 * Since: 1.0
134 */
135static inline void
136graphene_simd4x4f_to_float (const graphene_simd4x4f_t *m,
137 float *v)
138{
139 graphene_simd4f_dup_4f (m->x, v + 0);
140 graphene_simd4f_dup_4f (m->y, v + 4);
141 graphene_simd4f_dup_4f (m->z, v + 8);
142 graphene_simd4f_dup_4f (m->w, v + 12);
143}
144
145GRAPHENE_AVAILABLE_IN_1_0
146void graphene_simd4x4f_transpose_in_place (graphene_simd4x4f_t *s);
147
148#if defined(GRAPHENE_USE_SSE)
149
150#ifdef __GNUC__
151#define graphene_simd4x4f_transpose_in_place(s) \
152 (__extension__ ({ \
153 _MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w); \
154 }))
155#elif defined (_MSC_VER)
156#define graphene_simd4x4f_transpose_in_place(s) \
157 _MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w)
158#endif
159
160#elif defined(GRAPHENE_USE_GCC)
161
162#define graphene_simd4x4f_transpose_in_place(s) \
163 (__extension__ ({ \
164 const graphene_simd4f_t sx = (s)->x; \
165 const graphene_simd4f_t sy = (s)->y; \
166 const graphene_simd4f_t sz = (s)->z; \
167 const graphene_simd4f_t sw = (s)->w; \
168 (s)->x = graphene_simd4f_init (sx[0], sy[0], sz[0], sw[0]); \
169 (s)->y = graphene_simd4f_init (sx[1], sy[1], sz[1], sw[1]); \
170 (s)->z = graphene_simd4f_init (sx[2], sy[2], sz[2], sw[2]); \
171 (s)->w = graphene_simd4f_init (sx[3], sy[3], sz[3], sw[3]); \
172 }))
173
174#elif defined(GRAPHENE_USE_ARM_NEON)
175
176# ifdef __GNUC__
177
178#define graphene_simd4x4f_transpose_in_place(s) \
179 (__extension__ ({ \
180 const graphene_simd4f_union_t sx = { (s)->x }; \
181 const graphene_simd4f_union_t sy = { (s)->y }; \
182 const graphene_simd4f_union_t sz = { (s)->z }; \
183 const graphene_simd4f_union_t sw = { (s)->w }; \
184 (s)->x = graphene_simd4f_init (sx.f[0], sy.f[0], sz.f[0], sw.f[0]); \
185 (s)->y = graphene_simd4f_init (sx.f[1], sy.f[1], sz.f[1], sw.f[1]); \
186 (s)->z = graphene_simd4f_init (sx.f[2], sy.f[2], sz.f[2], sw.f[2]); \
187 (s)->w = graphene_simd4f_init (sx.f[3], sy.f[3], sz.f[3], sw.f[3]); \
188 }))
189
190# elif defined (_MSC_VER)
191
192#define graphene_simd4x4f_transpose_in_place(s) _simd4x4f_transpose_in_place(s)
193static inline void
194_simd4x4f_transpose_in_place (graphene_simd4x4f_t *s)
195{
196 const graphene_simd4f_union_t sx = { (s)->x };
197 const graphene_simd4f_union_t sy = { (s)->y };
198 const graphene_simd4f_union_t sz = { (s)->z };
199 const graphene_simd4f_union_t sw = { (s)->w };
200 (s)->x = graphene_simd4f_init (sx.f[0], sy.f[0], sz.f[0], sw.f[0]);
201 (s)->y = graphene_simd4f_init (sx.f[1], sy.f[1], sz.f[1], sw.f[1]);
202 (s)->z = graphene_simd4f_init (sx.f[2], sy.f[2], sz.f[2], sw.f[2]);
203 (s)->w = graphene_simd4f_init (sx.f[3], sy.f[3], sz.f[3], sw.f[3]);
204}
205
206# endif
207
208#elif defined(GRAPHENE_USE_SCALAR)
209
210#define graphene_simd4x4f_transpose_in_place(s) \
211 (graphene_simd4x4f_transpose_in_place ((graphene_simd4x4f_t *) (s)))
212
213#else
214# error "No implementation for graphene_simd4x4f_t defined."
215#endif
216
217/**
218 * graphene_simd4x4f_sum:
219 * @a: a #graphene_simd4f_t
220 * @res: (out): return location for the sum vector
221 *
222 * Adds all the row vectors of @a.
223 *
224 * Since: 1.0
225 */
226static inline void
227graphene_simd4x4f_sum (const graphene_simd4x4f_t *a,
228 graphene_simd4f_t *res)
229{
230 graphene_simd4f_t s = graphene_simd4f_add (a->x, a->y);
231 s = graphene_simd4f_add (s, a->z);
232 s = graphene_simd4f_add (s, a->w);
233 *res = s;
234}
235
236/**
237 * graphene_simd4x4f_vec4_mul:
238 * @a: a #graphene_simd4x4f_t
239 * @b: a #graphene_simd4f_t
240 * @res: (out): return location for a #graphene_simd4f_t
241 *
242 * Left multiplies the given #graphene_simd4x4f_t with the given
243 * #graphene_simd4f_t row vector using a dot product:
244 *
245 * |[<!-- language="plain" -->
246 * res = b × A
247 *
248 * = ⎡x⎤ ⎛ x.x x.y x.z x.w ⎞
249 * ⎜y⎟ ⎜ y.x y.y y.z y.w ⎟
250 * ⎜z⎟ ⎜ z.x z.y z.z z.w ⎟
251 * ⎣w⎦ ⎝ w.x w.y w.z w.w ⎠
252 *
253 * = [ x.x × x x.y × x x.z × x x.w × x ]
254 * + + + +
255 * [ y.x × y y.y × y y.z × y y.w × y ]
256 * + + + +
257 * [ z.x × z z.y × z z.z × z z.w × z ]
258 * + + + +
259 * [ w.x × w w.y × w w.z × w w.w × w ]
260 *
261 * = ⎡ x.x × x + y.x × y + z.x × z + w.x × w ⎤
262 * ⎜ x.y × x + y.y × y + z.y × z + w.y × w ⎟
263 * ⎜ x.z × x + y.z × y + z.z × z + w.z × w ⎟
264 * ⎣ x.w × x + y.w × y + z.w × z + w.w × w ⎦
265 * ]|
266 *
267 * Since: 1.0
268 */
269static inline void
270graphene_simd4x4f_vec4_mul (const graphene_simd4x4f_t *a,
271 const graphene_simd4f_t *b,
272 graphene_simd4f_t *res)
273{
274 const graphene_simd4f_t v = *b;
275 const graphene_simd4f_t v_x = graphene_simd4f_splat_x (v);
276 const graphene_simd4f_t v_y = graphene_simd4f_splat_y (v);
277 const graphene_simd4f_t v_z = graphene_simd4f_splat_z (v);
278 const graphene_simd4f_t v_w = graphene_simd4f_splat_w (v);
279
280 *res = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (a->x, v_x),
281 graphene_simd4f_mul (a->y, v_y)),
282 graphene_simd4f_add (graphene_simd4f_mul (a->z, v_z),
283 graphene_simd4f_mul (a->w, v_w)));
284}
285
286/**
287 * graphene_simd4x4f_vec3_mul:
288 * @m: a #graphene_simd4x4f_t
289 * @v: a #graphene_simd4f_t
290 * @res: (out): return location for a #graphene_simd4f_t
291 *
292 * Left multiplies the given #graphene_simd4x4f_t with the given
293 * #graphene_simd4f_t, using only the first three row vectors
294 * of the matrix, and the first three components of the vector;
295 * the W components of the matrix and vector are ignored:
296 *
297 * |[<!-- language="plain" -->
298 * res = b × A
299 *
300 * = ⎡x⎤ ⎛ x.x x.y x.z ⎞
301 * ⎜y⎟ ⎜ y.x y.y y.z ⎟
302 * ⎣z⎦ ⎝ z.x z.y z.z ⎠
303 *
304 * = [ x.x × x x.y × x x.z × x ]
305 * + + +
306 * [ y.x × y y.y × y y.z × y ]
307 * + + +
308 * [ z.x × z z.y × z z.z × z ]
309 *
310 * = ⎡ x.x × x + y.x × y + z.x × z ⎤
311 * ⎜ x.y × x + y.y × y + z.y × z ⎟
312 * ⎜ x.z × x + y.z × y + z.z × z ⎟
313 * ⎣ 0 ⎦
314 * ]|
315 *
316 * See also: graphene_simd4x4f_vec4_mul(), graphene_simd4x4f_point3_mul()
317 *
318 * Since: 1.0
319 */
320static inline void
321graphene_simd4x4f_vec3_mul (const graphene_simd4x4f_t *m,
322 const graphene_simd4f_t *v,
323 graphene_simd4f_t *res)
324{
325 const graphene_simd4f_t v_x = graphene_simd4f_splat_x (*v);
326 const graphene_simd4f_t v_y = graphene_simd4f_splat_y (*v);
327 const graphene_simd4f_t v_z = graphene_simd4f_splat_z (*v);
328 graphene_simd4f_t r;
329
330 r = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (m->x, v_x),
331 graphene_simd4f_mul (m->y, v_y)),
332 graphene_simd4f_mul (m->z, v_z));
333 *res = graphene_simd4f_zero_w (r);
334}
335
336/**
337 * graphene_simd4x4f_point3_mul:
338 * @m: a #graphene_simd4x4f_t
339 * @p: a #graphene_simd4f_t
340 * @res: (out): return location for a #graphene_simd4f_t
341 *
342 * Multiplies the given #graphene_simd4x4f_t with the given
343 * #graphene_simd4f_t.
344 *
345 * Unlike graphene_simd4x4f_vec3_mul(), this function will
346 * use the W components of the matrix:
347 *
348 * |[<!-- language="plain" -->
349 * res = b × A
350 *
351 * = ⎡x⎤ ⎛ x.x x.y x.z x.w ⎞
352 * ⎜y⎟ ⎜ y.x y.y y.z y.w ⎟
353 * ⎜z⎟ ⎜ z.x z.y z.z z.w ⎟
354 * ⎣w⎦ ⎝ w.x w.y w.z w.w ⎠
355 *
356 * = [ x.x × x x.y × x x.z × x x.w × x ]
357 * + + + +
358 * [ y.x × y y.y × y y.z × y y.w × y ]
359 * + + + +
360 * [ z.x × z z.y × z z.z × z z.w × z ]
361 * + + + +
362 * [ w.x w.y w.z w.w ]
363 *
364 * = ⎡ x.x × x + y.x × y + z.x × z + w.x ⎤
365 * ⎜ x.y × x + y.y × y + z.y × z + w.y ⎟
366 * ⎜ x.z × x + y.z × y + z.z × z + w.z ⎟
367 * ⎣ x.w × x + y.w × y + z.w × z + w.w ⎦
368 * ]|
369 *
370 * Since: 1.0
371 */
372static inline void
373graphene_simd4x4f_point3_mul (const graphene_simd4x4f_t *m,
374 const graphene_simd4f_t *p,
375 graphene_simd4f_t *res)
376{
377 const graphene_simd4f_t v = *p;
378 const graphene_simd4f_t v_x = graphene_simd4f_splat_x (v);
379 const graphene_simd4f_t v_y = graphene_simd4f_splat_y (v);
380 const graphene_simd4f_t v_z = graphene_simd4f_splat_z (v);
381
382 *res = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (m->x, v_x),
383 graphene_simd4f_mul (m->y, v_y)),
384 graphene_simd4f_add (graphene_simd4f_mul (m->z, v_z),
385 m->w));
386}
387
388/**
389 * graphene_simd4x4f_transpose:
390 * @s: a #graphene_simd4x4f_t
391 * @res: (out): return location for the transposed matrix
392 *
393 * Transposes the given #graphene_simd4x4f_t.
394 *
395 * Since: 1.0
396 */
397static inline void
398graphene_simd4x4f_transpose (const graphene_simd4x4f_t *s,
399 graphene_simd4x4f_t *res)
400{
401 *res = *s;
402 graphene_simd4x4f_transpose_in_place (res);
403}
404
405/**
406 * graphene_simd4x4f_inv_ortho_vec3_mul:
407 * @a: a #graphene_simd4x4f_t
408 * @b: a #graphene_simd4f_t
409 * @res: (out): return location for the transformed vector
410 *
411 * Performs the inverse orthographic transformation of the first
412 * three components in the given vector, using the first three
413 * row vectors of the given SIMD matrix.
414 *
415 * Since: 1.0
416 */
417static inline void
418graphene_simd4x4f_inv_ortho_vec3_mul (const graphene_simd4x4f_t *a,
419 const graphene_simd4f_t *b,
420 graphene_simd4f_t *res)
421{
422 graphene_simd4x4f_t transpose = *a;
423 graphene_simd4f_t translation = *b;
424
425 transpose.w = graphene_simd4f_init (0.f, 0.f, 0.f, 0.f);
426 graphene_simd4x4f_transpose_in_place (&transpose);
427
428 graphene_simd4x4f_vec3_mul (m: &transpose, v: &translation, res);
429}
430
431/**
432 * graphene_simd4x4f_inv_ortho_point3_mul:
433 * @a: a #graphene_simd4x4f_t
434 * @b: a #graphene_simd4x4f_t
435 * @res: (out): return location for the result vector
436 *
437 * Performs the inverse orthographic transformation of the first
438 * three components in the given vector, using the given SIMD
439 * matrix.
440 *
441 * Unlike graphene_simd4x4f_inv_ortho_vec3_mul(), this function
442 * will also use the fourth row vector of the SIMD matrix.
443 *
444 * Since: 1.0
445 */
446static inline void
447graphene_simd4x4f_inv_ortho_point3_mul (const graphene_simd4x4f_t *a,
448 const graphene_simd4f_t *b,
449 graphene_simd4f_t *res)
450{
451 graphene_simd4f_t translation = graphene_simd4f_sub (*b, a->w);
452 graphene_simd4x4f_t transpose = *a;
453
454 transpose.w = graphene_simd4f_init (0.f, 0.f, 0.f, 0.f);
455 graphene_simd4x4f_transpose_in_place (&transpose);
456
457 graphene_simd4x4f_point3_mul (m: &transpose, p: &translation, res);
458}
459
460/**
461 * graphene_simd4x4f_matrix_mul:
462 * @a: a #graphene_simd4x4f_t
463 * @b: a #graphene_simd4x4f_t
464 * @res: (out): return location for the result
465 *
466 * Multiplies the two matrices, following the convention:
467 *
468 * |[<!-- language="plain" -->
469 * res = A × B
470 *
471 * = ⎡ A.x × B ⎤
472 * ⎜ A.y × B ⎟
473 * ⎜ A.z × B ⎟
474 * ⎣ A.w × B ⎦
475 *
476 * = ⎡ res.x ⎤
477 * ⎜ res.y ⎟
478 * ⎜ res.z ⎟
479 * ⎣ res.w ⎦
480 * ]|
481 *
482 * See also: graphene_simd4x4f_vec4_mul()
483 *
484 * Since: 1.0
485 */
486static inline void
487graphene_simd4x4f_matrix_mul (const graphene_simd4x4f_t *a,
488 const graphene_simd4x4f_t *b,
489 graphene_simd4x4f_t *res)
490{
491#if 0
492 /* this is the classic naive A*B implementation of the row * column
493 * matrix product. using a SIMD scalar implementation, it's fairly
494 * slow at 329ns per multiplication; the SSE implementation makes it
495 * about 10x faster, at 32ns; the GCC vector implementation is only
496 * 5x faster, at 66ns. the biggest culprits are the transpose operation
497 * and the multiple, one lane reads to compute the scalar sum.
498 */
499 graphene_simd4x4f_t t;
500
501 graphene_simd4x4f_transpose (b, &t);
502
503 res->x =
504 graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.x)),
505 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.y)),
506 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.z)),
507 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.w)));
508
509 res->y =
510 graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.x)),
511 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.y)),
512 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.z)),
513 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.w)));
514
515 res->z =
516 graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.x)),
517 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.y)),
518 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.z)),
519 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.w)));
520
521 res->w =
522 graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.x)),
523 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.y)),
524 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.z)),
525 graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.w)));
526#else
527 /* this is an optimized version of the matrix multiplication, using
528 * four dot products for each row vector. this yields drastically
529 * better numbers while retaining the same correct results as above:
530 * the scalar implementation now clocks at 91ns; the GCC vector
531 * implementation is 19ns; and the SSE implementation is 16ns.
532 *
533 * the order is correct if we want to multiply A with B; remember
534 * that matrix multiplication is non-commutative.
535 */
536 graphene_simd4f_t x, y, z, w;
537
538 graphene_simd4x4f_vec4_mul (a: b, b: &a->x, res: &x);
539 graphene_simd4x4f_vec4_mul (a: b, b: &a->y, res: &y);
540 graphene_simd4x4f_vec4_mul (a: b, b: &a->z, res: &z);
541 graphene_simd4x4f_vec4_mul (a: b, b: &a->w, res: &w);
542
543 *res = graphene_simd4x4f_init (x, y, z, w);
544#endif
545}
546
547/**
548 * graphene_simd4x4f_init_perspective:
549 * @m: a #graphene_simd4x4f_t
550 * @fovy_rad: the angle of the field of vision, in radians
551 * @aspect: the aspect value
552 * @z_near: the depth of the near clipping plane
553 * @z_far: the depth of the far clipping plane
554 *
555 * Initializes a #graphene_simd4x4f_t with a perspective projection.
556 *
557 * Since: 1.0
558 */
559static inline void
560graphene_simd4x4f_init_perspective (graphene_simd4x4f_t *m,
561 float fovy_rad,
562 float aspect,
563 float z_near,
564 float z_far)
565{
566 float delta_z = z_far - z_near;
567 float cotangent = tanf (GRAPHENE_PI_2 - fovy_rad * 0.5f);
568
569 float a = cotangent / aspect;
570 float b = cotangent;
571 float c = -(z_far + z_near) / delta_z;
572 float d = -2 * z_near * z_far / delta_z;
573
574 m->x = graphene_simd4f_init ( a, 0.0f, 0.0f, 0.0f);
575 m->y = graphene_simd4f_init (0.0f, b, 0.0f, 0.0f);
576 m->z = graphene_simd4f_init (0.0f, 0.0f, c, -1.0f);
577 m->w = graphene_simd4f_init (0.0f, 0.0f, d, 0.0f);
578}
579
580/**
581 * graphene_simd4x4f_init_ortho:
582 * @m: a #graphene_simd4x4f_t
583 * @left: edge of the left clipping plane
584 * @right: edge of the right clipping plane
585 * @bottom: edge of the bottom clipping plane
586 * @top: edge of the top clipping plane
587 * @z_near: depth of the near clipping plane
588 * @z_far: depth of the far clipping plane
589 *
590 * Initializes the given SIMD matrix with an orthographic projection.
591 *
592 * Since: 1.0
593 */
594static inline void
595graphene_simd4x4f_init_ortho (graphene_simd4x4f_t *m,
596 float left,
597 float right,
598 float bottom,
599 float top,
600 float z_near,
601 float z_far)
602{
603 float delta_x = right - left;
604 float delta_y = top - bottom;
605 float delta_z = z_far - z_near;
606
607 float a = 2.0f / delta_x;
608 float b = -(right + left) / delta_x;
609 float c = 2.0f / delta_y;
610 float d = -(top + bottom) / delta_y;
611 float e = -2.0f / delta_z;
612 float f = -(z_far + z_near) / delta_z;
613
614 m->x = graphene_simd4f_init ( a, 0.0f, 0.0f, 0.0f);
615 m->y = graphene_simd4f_init (0.0f, c, 0.0f, 0.0f);
616 m->z = graphene_simd4f_init (0.0f, 0.0f, e, 0.0f);
617 m->w = graphene_simd4f_init ( b, d, f, 1.0f);
618}
619
620/**
621 * graphene_simd4x4f_init_look_at:
622 * @m: a #graphene_simd4x4f_t
623 * @eye: vector for the camera coordinates
624 * @center: vector for the object coordinates
625 * @up: vector for the upwards direction
626 *
627 * Initializes a SIMD matrix with the projection necessary for
628 * the camera at the @eye coordinates to look at the object at
629 * the @center coordinates. The top of the camera is aligned to
630 * the @up vector.
631 *
632 * Since: 1.0
633 */
634static inline void
635graphene_simd4x4f_init_look_at (graphene_simd4x4f_t *m,
636 graphene_simd4f_t eye,
637 graphene_simd4f_t center,
638 graphene_simd4f_t up)
639{
640 const graphene_simd4f_t direction = graphene_simd4f_sub (center, eye);
641 graphene_simd4f_t cross;
642 graphene_simd4f_t z_axis;
643 graphene_simd4f_t x_axis;
644 graphene_simd4f_t y_axis;
645 float eye_v[4];
646
647 if (graphene_simd4f_get_x (graphene_simd4f_dot3 (direction, direction)) < FLT_EPSILON)
648 /* eye and center are in the same position */
649 z_axis = graphene_simd4f_init (0, 0, 1, 0);
650 else
651 z_axis = graphene_simd4f_normalize3 (v: direction);
652
653 cross = graphene_simd4f_cross3 (z_axis, up);
654 if (graphene_simd4f_get_x (graphene_simd4f_dot3 (cross, cross)) < FLT_EPSILON)
655 {
656 graphene_simd4f_t tweak_z;
657
658 /* up and z_axis are parallel */
659 if (fabs (graphene_simd4f_get_z (up) - 1.0) < FLT_EPSILON)
660 tweak_z = graphene_simd4f_init (0.0001f, 0, 0, 0);
661 else
662 tweak_z = graphene_simd4f_init (0, 0, 0.0001f, 0);
663
664 z_axis = graphene_simd4f_add (z_axis, tweak_z);
665 z_axis = graphene_simd4f_normalize3 (v: z_axis);
666 cross = graphene_simd4f_cross3 (z_axis, up);
667 }
668
669 x_axis = graphene_simd4f_normalize3 (v: cross);
670 y_axis = graphene_simd4f_cross3 (x_axis, z_axis);
671
672 graphene_simd4f_dup_4f (eye, eye_v);
673
674 m->x = x_axis;
675 m->y = y_axis;
676 m->z = graphene_simd4f_neg (z_axis);
677 m->w = graphene_simd4f_init (-eye_v[0], -eye_v[1], -eye_v[2], 1.f);
678}
679
680/**
681 * graphene_simd4x4f_init_frustum:
682 * @m: a #graphene_simd4x4f_t
683 * @left: distance of the left clipping plane
684 * @right: distance of the right clipping plane
685 * @bottom: distance of the bottom clipping plane
686 * @top: distance of the top clipping plane
687 * @z_near: distance of the near clipping plane
688 * @z_far: distance of the far clipping plane
689 *
690 * Initializes a SIMD matrix with a frustum described by the distances
691 * of six clipping planes.
692 *
693 * Since: 1.2
694 */
695static inline void
696graphene_simd4x4f_init_frustum (graphene_simd4x4f_t *m,
697 float left,
698 float right,
699 float bottom,
700 float top,
701 float z_near,
702 float z_far)
703{
704 float x = 2.f * z_near / (right - left);
705 float y = 2.f * z_near / (top - bottom);
706
707 float a = (right + left) / (right - left);
708 float b = (top + bottom) / (top - bottom);
709 float c = -1.f * (z_far + z_near) / (z_far - z_near);
710 float d = -2.f * z_far * z_near / (z_far - z_near);
711
712 m->x = graphene_simd4f_init ( x, 0.f, 0.f, 0.f);
713 m->y = graphene_simd4f_init (0.f, y, 0.f, 0.f);
714 m->z = graphene_simd4f_init ( a, b, c, -1.f);
715 m->w = graphene_simd4f_init (0.f, 0.f, d, 0.f);
716}
717
718/**
719 * graphene_simd4x4f_perspective:
720 * @m: a #graphene_simd4x4f_t
721 * @depth: depth of the perspective
722 *
723 * Adds a perspective transformation for the given @depth.
724 *
725 * Since: 1.0
726 */
727static inline void
728graphene_simd4x4f_perspective (graphene_simd4x4f_t *m,
729 float depth)
730{
731#if 1
732 const float m_xw = graphene_simd4f_get_w (m->x);
733 const float m_yw = graphene_simd4f_get_w (m->y);
734 const float m_zw = graphene_simd4f_get_w (m->z);
735 const float m_ww = graphene_simd4f_get_w (m->w);
736
737 const float p0 = graphene_simd4f_get_z (m->x) + -1.0f / depth * m_xw;
738 const float p1 = graphene_simd4f_get_z (m->y) + -1.0f / depth * m_yw;
739 const float p2 = graphene_simd4f_get_z (m->z) + -1.0f / depth * m_zw;
740 const float p3 = graphene_simd4f_get_z (m->w) + -1.0f / depth * m_ww;
741
742 const graphene_simd4f_t p_x = graphene_simd4f_merge_w (m->x, m_xw + p0);
743 const graphene_simd4f_t p_y = graphene_simd4f_merge_w (m->y, m_yw + p1);
744 const graphene_simd4f_t p_z = graphene_simd4f_merge_w (m->z, m_zw + p2);
745 const graphene_simd4f_t p_w = graphene_simd4f_merge_w (m->w, m_ww + p3);
746#else
747 /* this is equivalent to the operations above, but trying to inline
748 * them into SIMD registers as much as possible by transposing the
749 * original matrix and operating on the resulting column vectors. it
750 * should warrant a micro benchmark, because while the above code is
751 * dominated by single channel reads, the code below has a transpose
752 * operation.
753 */
754 graphene_simd4x4f_t t;
755 const graphene_simd4f_t f, p;
756 const graphene_simd4f_t p_x, p_y, p_z, p_w;
757
758 graphene_simd4x4f_transpose (m, &t);
759
760 f = graphene_simd4f_neg (graphene_simd4f_reciprocal (graphene_simd4f_splat (depth)));
761 p = graphene_simd4f_sum (t.w, graphene_simd4f_sum (t.z, graphene_simd4f_mul (f, t.w)));
762 p_x = graphene_simd4f_merge_w (m->x, graphene_simd4f_get_x (p));
763 p_y = graphene_simd4f_merge_w (m->y, graphene_simd4f_get_y (p));
764 p_z = graphene_simd4f_merge_w (m->z, graphene_simd4f_get_z (p));
765 p_w = graphene_simd4f_merge_w (m->w, graphene_simd4f_get_w (p));
766#endif
767
768 *m = graphene_simd4x4f_init (x: p_x, y: p_y, z: p_z, w: p_w);
769}
770
771/**
772 * graphene_simd4x4f_translation:
773 * @m: a #graphene_simd4x4f_t
774 * @x: coordinate of the X translation
775 * @y: coordinate of the Y translation
776 * @z: coordinate of the Z translation
777 *
778 * Initializes @m to contain a translation to the given coordinates.
779 *
780 * Since: 1.0
781 */
782static inline void
783graphene_simd4x4f_translation (graphene_simd4x4f_t *m,
784 float x,
785 float y,
786 float z)
787{
788 *m = graphene_simd4x4f_init (graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f),
789 graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f),
790 graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f),
791 graphene_simd4f_init ( x, y, z, 1.0f));
792}
793
794/**
795 * graphene_simd4x4f_scale:
796 * @m: a #graphene_simd4x4f_t
797 * @x: scaling factor on the X axis
798 * @y: scaling factor on the Y axis
799 * @z: scaling factor on the Z axis
800 *
801 * Initializes @m to contain a scaling transformation with the
802 * given factors.
803 *
804 * Since: 1.0
805 */
806static inline void
807graphene_simd4x4f_scale (graphene_simd4x4f_t *m,
808 float x,
809 float y,
810 float z)
811{
812 *m = graphene_simd4x4f_init (graphene_simd4f_init ( x, 0.0f, 0.0f, 0.0f),
813 graphene_simd4f_init (0.0f, y, 0.0f, 0.0f),
814 graphene_simd4f_init (0.0f, 0.0f, z, 0.0f),
815 graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f));
816
817}
818
819/**
820 * graphene_simd4x4f_rotation:
821 * @m: a #graphene_simd4x4f_t
822 * @rad: the rotation, in radians
823 * @axis: the vector of the axis of rotation
824 *
825 * Initializes @m to contain a rotation of the given angle
826 * along the given axis.
827 *
828 * Since: 1.0
829 */
830static inline void
831graphene_simd4x4f_rotation (graphene_simd4x4f_t *m,
832 float rad,
833 graphene_simd4f_t axis)
834{
835 float sine, cosine;
836 float x, y, z;
837 float ab, bc, ca;
838 float tx, ty, tz;
839 graphene_simd4f_t i, j, k;
840
841 rad = -rad;
842 axis = graphene_simd4f_normalize3 (v: axis);
843
844 /* We cannot use graphene_sincos() because it's a private function, whereas
845 * graphene-simd4x4f.h is a public header
846 */
847 sine = sinf (x: rad);
848 cosine = cosf (x: rad);
849
850 x = graphene_simd4f_get_x (axis);
851 y = graphene_simd4f_get_y (axis);
852 z = graphene_simd4f_get_z (axis);
853
854 ab = x * y * (1.0f - cosine);
855 bc = y * z * (1.0f - cosine);
856 ca = z * x * (1.0f - cosine);
857
858 tx = x * x;
859 ty = y * y;
860 tz = z * z;
861
862 i = graphene_simd4f_init (tx + cosine * (1.0f - tx), ab - z * sine, ca + y * sine, 0.f);
863 j = graphene_simd4f_init (ab + z * sine, ty + cosine * (1.0f - ty), bc - x * sine, 0.f);
864 k = graphene_simd4f_init (ca - y * sine, bc + x * sine, tz + cosine * (1.0f - tz), 0.f);
865
866 *m = graphene_simd4x4f_init (x: i, y: j, z: k, graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f));
867}
868
869/**
870 * graphene_simd4x4f_add:
871 * @a: a #graphene_simd4x4f_t
872 * @b: a #graphene_simd4x4f_t
873 * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
874 *
875 * Adds each row vector of @a and @b and places the results in @res.
876 *
877 * Since: 1.0
878 */
879static inline void
880graphene_simd4x4f_add (const graphene_simd4x4f_t *a,
881 const graphene_simd4x4f_t *b,
882 graphene_simd4x4f_t *res)
883{
884 res->x = graphene_simd4f_add (a->x, b->x);
885 res->y = graphene_simd4f_add (a->y, b->y);
886 res->z = graphene_simd4f_add (a->z, b->z);
887 res->w = graphene_simd4f_add (a->w, b->w);
888}
889
890/**
891 * graphene_simd4x4f_sub:
892 * @a: a #graphene_simd4x4f_t
893 * @b: a #graphene_simd4x4f_t
894 * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
895 *
896 * Subtracts each row vector of @a and @b and places the results in @res.
897 *
898 * Since: 1.0
899 */
900static inline void
901graphene_simd4x4f_sub (const graphene_simd4x4f_t *a,
902 const graphene_simd4x4f_t *b,
903 graphene_simd4x4f_t *res)
904{
905 res->x = graphene_simd4f_sub (a->x, b->x);
906 res->y = graphene_simd4f_sub (a->y, b->y);
907 res->z = graphene_simd4f_sub (a->z, b->z);
908 res->w = graphene_simd4f_sub (a->w, b->w);
909}
910
911/**
912 * graphene_simd4x4f_mul:
913 * @a: a #graphene_simd4x4f_t
914 * @b: a #graphene_simd4x4f_t
915 * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
916 *
917 * Multiplies each row vector of @a and @b and places the results in @res.
918 *
919 * You most likely want graphene_simd4x4f_matrix_mul() instead.
920 *
921 * Since: 1.0
922 */
923static inline void
924graphene_simd4x4f_mul (const graphene_simd4x4f_t *a,
925 const graphene_simd4x4f_t *b,
926 graphene_simd4x4f_t *res)
927{
928 res->x = graphene_simd4f_mul (a->x, b->x);
929 res->y = graphene_simd4f_mul (a->y, b->y);
930 res->z = graphene_simd4f_mul (a->z, b->z);
931 res->w = graphene_simd4f_mul (a->w, b->w);
932}
933
934/**
935 * graphene_simd4x4f_div:
936 * @a: a #graphene_simd4x4f_t
937 * @b: a #graphene_simd4x4f_t
938 * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
939 *
940 * Divides each row vector of @a and @b and places the results in @res.
941 *
942 * Since: 1.0
943 */
944static inline void
945graphene_simd4x4f_div (const graphene_simd4x4f_t *a,
946 const graphene_simd4x4f_t *b,
947 graphene_simd4x4f_t *res)
948{
949 res->x = graphene_simd4f_div (a->x, b->x);
950 res->y = graphene_simd4f_div (a->y, b->y);
951 res->z = graphene_simd4f_div (a->z, b->z);
952 res->w = graphene_simd4f_div (a->w, b->w);
953}
954
955/**
956 * graphene_simd4x4f_inverse:
957 * @m: a #graphene_simd4x4f_t
958 * @res: (out): return location for the inverse matrix
959 *
960 * Inverts the given #graphene_simd4x4f_t.
961 *
962 * Returns: `true` if the matrix was invertible
963 *
964 * Since: 1.0
965 */
966static inline bool
967graphene_simd4x4f_inverse (const graphene_simd4x4f_t *m,
968 graphene_simd4x4f_t *res)
969{
970 /* split rows */
971 const graphene_simd4f_t r0 = m->x;
972 const graphene_simd4f_t r1 = m->y;
973 const graphene_simd4f_t r2 = m->z;
974 const graphene_simd4f_t r3 = m->w;
975
976 /* cofactors */
977 const graphene_simd4f_t r0_wxyz = graphene_simd4f_shuffle_wxyz (r0);
978 const graphene_simd4f_t r0_zwxy = graphene_simd4f_shuffle_zwxy (r0);
979 const graphene_simd4f_t r0_yzwx = graphene_simd4f_shuffle_yzwx (r0);
980
981 const graphene_simd4f_t r1_wxyz = graphene_simd4f_shuffle_wxyz (r1);
982 const graphene_simd4f_t r1_zwxy = graphene_simd4f_shuffle_zwxy (r1);
983 const graphene_simd4f_t r1_yzwx = graphene_simd4f_shuffle_yzwx (r1);
984
985 const graphene_simd4f_t r2_wxyz = graphene_simd4f_shuffle_wxyz (r2);
986 const graphene_simd4f_t r2_zwxy = graphene_simd4f_shuffle_zwxy (r2);
987 const graphene_simd4f_t r2_yzwx = graphene_simd4f_shuffle_yzwx (r2);
988
989 const graphene_simd4f_t r3_wxyz = graphene_simd4f_shuffle_wxyz (r3);
990 const graphene_simd4f_t r3_zwxy = graphene_simd4f_shuffle_zwxy (r3);
991 const graphene_simd4f_t r3_yzwx = graphene_simd4f_shuffle_yzwx (r3);
992
993 const graphene_simd4f_t r0_wxyz_x_r1 = graphene_simd4f_mul (r0_wxyz, r1);
994 const graphene_simd4f_t r0_wxyz_x_r1_yzwx = graphene_simd4f_mul (r0_wxyz, r1_yzwx);
995 const graphene_simd4f_t r0_wxyz_x_r1_zwxy = graphene_simd4f_mul (r0_wxyz, r1_zwxy);
996
997 const graphene_simd4f_t r2_wxyz_x_r3 = graphene_simd4f_mul (r2_wxyz, r3);
998 const graphene_simd4f_t r2_wxyz_x_r3_yzwx = graphene_simd4f_mul (r2_wxyz, r3_yzwx);
999 const graphene_simd4f_t r2_wxyz_x_r3_zwxy = graphene_simd4f_mul (r2_wxyz, r3_zwxy);
1000
1001 const graphene_simd4f_t ar1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3_zwxy),
1002 graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3));
1003 const graphene_simd4f_t ar2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3_yzwx),
1004 r2_wxyz_x_r3_yzwx);
1005 const graphene_simd4f_t ar3 = graphene_simd4f_sub (r2_wxyz_x_r3_zwxy,
1006 graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3));
1007
1008 const graphene_simd4f_t br1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r0_wxyz_x_r1_zwxy),
1009 graphene_simd4f_shuffle_zwxy (r0_wxyz_x_r1));
1010 const graphene_simd4f_t br2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r0_wxyz_x_r1_yzwx),
1011 r0_wxyz_x_r1_yzwx);
1012 const graphene_simd4f_t br3 = graphene_simd4f_sub (r0_wxyz_x_r1_zwxy,
1013 graphene_simd4f_shuffle_wxyz (r0_wxyz_x_r1));
1014
1015 const graphene_simd4f_t r0_sum =
1016 graphene_simd4f_madd (m1: r0_yzwx, m2: ar3,
1017 a: graphene_simd4f_madd (m1: r0_zwxy, m2: ar2,
1018 graphene_simd4f_mul (r0_wxyz, ar1)));
1019 const graphene_simd4f_t r1_sum =
1020 graphene_simd4f_madd (m1: r1_wxyz, m2: ar1,
1021 a: graphene_simd4f_madd (m1: r1_zwxy, m2: ar2,
1022 graphene_simd4f_mul (r1_yzwx, ar3)));
1023 const graphene_simd4f_t r2_sum =
1024 graphene_simd4f_madd (m1: r2_yzwx, m2: br3,
1025 a: graphene_simd4f_madd (m1: r2_zwxy, m2: br2,
1026 graphene_simd4f_mul (r2_wxyz, br1)));
1027 const graphene_simd4f_t r3_sum =
1028 graphene_simd4f_madd (m1: r3_yzwx, m2: br3,
1029 a: graphene_simd4f_madd (m1: r3_zwxy, m2: br2,
1030 graphene_simd4f_mul (r3_wxyz, br1)));
1031
1032 /* determinant and its inverse */
1033 const graphene_simd4f_t d0 = graphene_simd4f_mul (r1_sum, r0);
1034 const graphene_simd4f_t d1 = graphene_simd4f_add (d0, graphene_simd4f_merge_high (d0, d0));
1035 const graphene_simd4f_t det = graphene_simd4f_sub (d1, graphene_simd4f_splat_y (d1));
1036 if (fabsf (graphene_simd4f_get_x (det)) >= FLT_EPSILON)
1037 {
1038 const graphene_simd4f_t invdet = graphene_simd4f_splat_x (graphene_simd4f_div (graphene_simd4f_splat (1.0f), det));
1039
1040 const graphene_simd4f_t o0 = graphene_simd4f_mul (graphene_simd4f_flip_sign_0101 (r1_sum), invdet);
1041 const graphene_simd4f_t o1 = graphene_simd4f_mul (graphene_simd4f_flip_sign_1010 (r0_sum), invdet);
1042 const graphene_simd4f_t o2 = graphene_simd4f_mul (graphene_simd4f_flip_sign_0101 (r3_sum), invdet);
1043 const graphene_simd4f_t o3 = graphene_simd4f_mul (graphene_simd4f_flip_sign_1010 (r2_sum), invdet);
1044
1045 graphene_simd4x4f_t mt = graphene_simd4x4f_init (x: o0, y: o1, z: o2, w: o3);
1046
1047 /* transpose the resulting matrix */
1048 graphene_simd4x4f_transpose (s: &mt, res);
1049
1050 return true;
1051 }
1052
1053 return false;
1054}
1055
1056/**
1057 * graphene_simd4x4f_determinant:
1058 * @m: a #graphene_simd4x4f_t
1059 * @det_r: (out): return location for the matrix determinant
1060 * @invdet_r: (out): return location for the inverse of the matrix
1061 * determinant
1062 *
1063 * Computes the determinant (and its inverse) of the given matrix
1064 *
1065 * Since: 1.0
1066 */
1067static inline void
1068graphene_simd4x4f_determinant (const graphene_simd4x4f_t *m,
1069 graphene_simd4f_t *det_r,
1070 graphene_simd4f_t *invdet_r)
1071{
1072 /* split rows */
1073 const graphene_simd4f_t r0 = m->x;
1074 const graphene_simd4f_t r1 = m->y;
1075 const graphene_simd4f_t r2 = m->z;
1076 const graphene_simd4f_t r3 = m->w;
1077
1078 /* cofactors */
1079 const graphene_simd4f_t r1_wxyz = graphene_simd4f_shuffle_wxyz (r1);
1080 const graphene_simd4f_t r1_zwxy = graphene_simd4f_shuffle_zwxy (r1);
1081 const graphene_simd4f_t r1_yzwx = graphene_simd4f_shuffle_yzwx (r1);
1082
1083 const graphene_simd4f_t r2_wxyz = graphene_simd4f_shuffle_wxyz (r2);
1084
1085 const graphene_simd4f_t r3_zwxy = graphene_simd4f_shuffle_zwxy (r3);
1086 const graphene_simd4f_t r3_yzwx = graphene_simd4f_shuffle_yzwx (r3);
1087
1088 const graphene_simd4f_t r2_wxyz_x_r3 = graphene_simd4f_mul (r2_wxyz, r3);
1089 const graphene_simd4f_t r2_wxyz_x_r3_yzwx = graphene_simd4f_mul (r2_wxyz, r3_yzwx);
1090 const graphene_simd4f_t r2_wxyz_x_r3_zwxy = graphene_simd4f_mul (r2_wxyz, r3_zwxy);
1091
1092 const graphene_simd4f_t ar1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3_zwxy),
1093 graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3));
1094 const graphene_simd4f_t ar2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3_yzwx),
1095 r2_wxyz_x_r3_yzwx);
1096 const graphene_simd4f_t ar3 = graphene_simd4f_sub (r2_wxyz_x_r3_zwxy,
1097 graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3));
1098
1099 const graphene_simd4f_t r1_sum =
1100 graphene_simd4f_madd (m1: r1_wxyz, m2: ar1,
1101 a: graphene_simd4f_madd (m1: r1_zwxy, m2: ar2,
1102 graphene_simd4f_mul (r1_yzwx, ar3)));
1103
1104 /* determinant and its inverse */
1105 const graphene_simd4f_t d0 = graphene_simd4f_mul (r1_sum, r0);
1106 const graphene_simd4f_t d1 = graphene_simd4f_add (d0, graphene_simd4f_merge_high (d0, d0));
1107
1108 const graphene_simd4f_t det = graphene_simd4f_sub (d1, graphene_simd4f_splat_y (d1));
1109
1110 const graphene_simd4f_t invdet = graphene_simd4f_splat_x (graphene_simd4f_div (graphene_simd4f_splat (1.0f), det));
1111
1112 if (det_r != NULL)
1113 *det_r = det;
1114
1115 if (invdet_r != NULL)
1116 *invdet_r = invdet;
1117}
1118
1119/**
1120 * graphene_simd4x4f_is_identity:
1121 * @m: a #graphene_simd4x4f_t
1122 *
1123 * Checks whether the given matrix is the identity matrix.
1124 *
1125 * Returns: `true` if the matrix is the identity matrix
1126 *
1127 * Since: 1.0
1128 */
1129static inline bool
1130graphene_simd4x4f_is_identity (const graphene_simd4x4f_t *m)
1131{
1132 const graphene_simd4f_t r0 = graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f);
1133 const graphene_simd4f_t r1 = graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f);
1134 const graphene_simd4f_t r2 = graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f);
1135 const graphene_simd4f_t r3 = graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f);
1136
1137 return graphene_simd4f_cmp_eq (m->x, r0) &&
1138 graphene_simd4f_cmp_eq (m->y, r1) &&
1139 graphene_simd4f_cmp_eq (m->z, r2) &&
1140 graphene_simd4f_cmp_eq (m->w, r3);
1141}
1142
1143/**
1144 * graphene_simd4x4f_is_2d:
1145 * @m: a #graphene_simd4x4f_t
1146 *
1147 * Checks whether the given matrix is compatible with an affine
1148 * transformation matrix.
1149 *
1150 * Returns: `true` if the matrix is compatible with an affine
1151 * transformation matrix
1152 *
1153 * Since: 1.0
1154 */
1155static inline bool
1156graphene_simd4x4f_is_2d (const graphene_simd4x4f_t *m)
1157{
1158 float f[4];
1159
1160 if (!(fabsf (graphene_simd4f_get_z (m->x)) < FLT_EPSILON && fabsf (graphene_simd4f_get_w (m->x)) < FLT_EPSILON))
1161 return false;
1162
1163 if (!(fabsf (graphene_simd4f_get_z (m->y)) < FLT_EPSILON && fabsf (graphene_simd4f_get_w (m->y)) < FLT_EPSILON))
1164 return false;
1165
1166 graphene_simd4f_dup_4f (m->z, f);
1167 if (!(fabsf (x: f[0]) < FLT_EPSILON &&
1168 fabsf (x: f[1]) < FLT_EPSILON &&
1169 1.f - fabsf (x: f[2]) < FLT_EPSILON &&
1170 fabsf (x: f[3]) < FLT_EPSILON))
1171 return false;
1172
1173 if (!(fabsf (graphene_simd4f_get_z (m->w)) < FLT_EPSILON && 1.f - fabsf (graphene_simd4f_get_w (m->w)) < FLT_EPSILON))
1174 return false;
1175
1176 return true;
1177}
1178
1179GRAPHENE_END_DECLS
1180

source code of gtk/subprojects/graphene/include/graphene-simd4x4f.h