graphene-simd4x4f.h source code [gtk/subprojects/graphene/include/graphene-simd4x4f.h]

1	/ graphene-simd4x4f.h: 4x4 float vector operations*
2	*
3	* SPDX-License-Identifier: MIT
4	*
5	* Copyright 2014 Emmanuele Bassi
6	*
7	* Permission is hereby granted, free of charge, to any person obtaining a copy
8	* of this software and associated documentation files (the "Software"), to deal
9	* in the Software without restriction, including without limitation the rights
10	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11	* copies of the Software, and to permit persons to whom the Software is
12	* furnished to do so, subject to the following conditions:
13	*
14	* The above copyright notice and this permission notice shall be included in
15	* all copies or substantial portions of the Software.
16	*
17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23	* THE SOFTWARE.
24	*/
25
26	#pragma once
27
28	#include "graphene-simd4f.h"
29
30	#include <math.h>
31	#include <float.h>
32
33	GRAPHENE_BEGIN_DECLS
34
35	/**
36	* graphene_simd4x4f_t:
37	*
38	* A SIMD-based matrix type that uses four #graphene_simd4f_t vectors.
39	*
40	* The matrix is treated as row-major, i.e. the x, y, z, and w vectors
41	* are rows, and elements of each vector are a column:
42	*
43	* \|[<!-- language="C" -->
44	* graphene_simd4x4f_t = {
45	* x.x, x.y, x.z, x.w,
46	* y.x, y.y, y.z, y.w,
47	* z.x, z.y, z.z, z.w,
48	* w.x, w.y, w.z, w.w
49	* }
50	* ]\|
51	*
52	* The contents of the #graphene_simd4x4f_t type are private and
53	* cannot be accessed directly; use the provided API instead.
54	*
55	* Since: 1.0
56	*/
57
58	/**
59	* graphene_simd4x4f_init:
60	* @x: a #graphene_simd4f_t for the first row
61	* @y: a #graphene_simd4f_t for the second row
62	* @z: a #graphene_simd4f_t for the third row
63	* @w: a #graphene_simd4f_t for the fourth row
64	*
65	* Creates a new #graphene_simd4x4f_t using the given row vectors
66	* to initialize it.
67	*
68	* Returns: the newly created #graphene_simd4x4f_t
69	*
70	* Since: 1.0
71	*/
72	static inline graphene_simd4x4f_t GRAPHENE_VECTORCALL
73	graphene_simd4x4f_init (graphene_simd4f_t x,
74	graphene_simd4f_t y,
75	graphene_simd4f_t z,
76	graphene_simd4f_t w)
77	{
78	graphene_simd4x4f_t s;
79
80	s.x = x;
81	s.y = y;
82	s.z = z;
83	s.w = w;
84
85	return s;
86	}
87
88	/**
89	* graphene_simd4x4f_init_identity:
90	* @m: a #graphene_simd4x4f_t
91	*
92	* Initializes @m to be the identity matrix.
93	*
94	* Since: 1.0
95	*/
96	static inline void
97	graphene_simd4x4f_init_identity (graphene_simd4x4f_t *m)
98	{
99	*m = graphene_simd4x4f_init (graphene_simd4f_init (`1.0f`, `0.0f`, `0.0f`, `0.0f`),
100	graphene_simd4f_init (`0.0f`, `1.0f`, `0.0f`, `0.0f`),
101	graphene_simd4f_init (`0.0f`, `0.0f`, `1.0f`, `0.0f`),
102	graphene_simd4f_init (`0.0f`, `0.0f`, `0.0f`, `1.0f`));
103	}
104
105	/**
106	* graphene_simd4x4f_init_from_float:
107	* @m: a #graphene_simd4x4f_t
108	* @f: (array fixed-size=16): an array of 16 floating point values
109	*
110	* Initializes a #graphene_simd4x4f_t with the given array
111	* of floating point values.
112	*
113	* Since: 1.0
114	*/
115	static inline void
116	graphene_simd4x4f_init_from_float (graphene_simd4x4f_t *m,
117	const float *f)
118	{
119	m->x = graphene_simd4f_init_4f (f + `0`);
120	m->y = graphene_simd4f_init_4f (f + `4`);
121	m->z = graphene_simd4f_init_4f (f + `8`);
122	m->w = graphene_simd4f_init_4f (f + `12`);
123	}
124
125	/**
126	* graphene_simd4x4f_to_float:
127	* @m: a #graphene_sidm4x4f_t
128	* @v: (out caller-allocates) (array fixed-size=16): a floating
129	* point values vector capable of holding at least 16 values
130	*
131	* Copies the content of @m in a float array.
132	*
133	* Since: 1.0
134	*/
135	static inline void
136	graphene_simd4x4f_to_float (const graphene_simd4x4f_t *m,
137	float *v)
138	{
139	graphene_simd4f_dup_4f (m->x, v + `0`);
140	graphene_simd4f_dup_4f (m->y, v + `4`);
141	graphene_simd4f_dup_4f (m->z, v + `8`);
142	graphene_simd4f_dup_4f (m->w, v + `12`);
143	}
144
145	GRAPHENE_AVAILABLE_IN_1_0
146	void graphene_simd4x4f_transpose_in_place (graphene_simd4x4f_t *s);
147
148	#if defined(GRAPHENE_USE_SSE)
149
150	#ifdef __GNUC__
151	#define graphene_simd4x4f_transpose_in_place(s) \
152	(__extension__ ({ \
153	_MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w); \
154	}))
155	#elif defined (_MSC_VER)
156	#define graphene_simd4x4f_transpose_in_place(s) \
157	_MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w)
158	#endif
159
160	#elif defined(GRAPHENE_USE_GCC)
161
162	#define graphene_simd4x4f_transpose_in_place(s) \
163	(__extension__ ({ \
164	const graphene_simd4f_t sx = (s)->x; \
165	const graphene_simd4f_t sy = (s)->y; \
166	const graphene_simd4f_t sz = (s)->z; \
167	const graphene_simd4f_t sw = (s)->w; \
168	(s)->x = graphene_simd4f_init (sx[0], sy[0], sz[0], sw[0]); \
169	(s)->y = graphene_simd4f_init (sx[1], sy[1], sz[1], sw[1]); \
170	(s)->z = graphene_simd4f_init (sx[2], sy[2], sz[2], sw[2]); \
171	(s)->w = graphene_simd4f_init (sx[3], sy[3], sz[3], sw[3]); \
172	}))
173
174	#elif defined(GRAPHENE_USE_ARM_NEON)
175
176	# ifdef __GNUC__
177
178	#define graphene_simd4x4f_transpose_in_place(s) \
179	(__extension__ ({ \
180	const graphene_simd4f_union_t sx = { (s)->x }; \
181	const graphene_simd4f_union_t sy = { (s)->y }; \
182	const graphene_simd4f_union_t sz = { (s)->z }; \
183	const graphene_simd4f_union_t sw = { (s)->w }; \
184	(s)->x = graphene_simd4f_init (sx.f[0], sy.f[0], sz.f[0], sw.f[0]); \
185	(s)->y = graphene_simd4f_init (sx.f[1], sy.f[1], sz.f[1], sw.f[1]); \
186	(s)->z = graphene_simd4f_init (sx.f[2], sy.f[2], sz.f[2], sw.f[2]); \
187	(s)->w = graphene_simd4f_init (sx.f[3], sy.f[3], sz.f[3], sw.f[3]); \
188	}))
189
190	# elif defined (_MSC_VER)
191
192	#define graphene_simd4x4f_transpose_in_place(s) _simd4x4f_transpose_in_place(s)
193	static inline void
194	_simd4x4f_transpose_in_place (graphene_simd4x4f_t *s)
195	{
196	const graphene_simd4f_union_t sx = { (s)->x };
197	const graphene_simd4f_union_t sy = { (s)->y };
198	const graphene_simd4f_union_t sz = { (s)->z };
199	const graphene_simd4f_union_t sw = { (s)->w };
200	(s)->x = graphene_simd4f_init (sx.f[`0`], sy.f[`0`], sz.f[`0`], sw.f[`0`]);
201	(s)->y = graphene_simd4f_init (sx.f[`1`], sy.f[`1`], sz.f[`1`], sw.f[`1`]);
202	(s)->z = graphene_simd4f_init (sx.f[`2`], sy.f[`2`], sz.f[`2`], sw.f[`2`]);
203	(s)->w = graphene_simd4f_init (sx.f[`3`], sy.f[`3`], sz.f[`3`], sw.f[`3`]);
204	}
205
206	# endif
207
208	#elif defined(GRAPHENE_USE_SCALAR)
209
210	#define graphene_simd4x4f_transpose_in_place(s) \
211	(graphene_simd4x4f_transpose_in_place ((graphene_simd4x4f_t *) (s)))
212
213	#else
214	# error "No implementation for graphene_simd4x4f_t defined."
215	#endif
216
217	/**
218	* graphene_simd4x4f_sum:
219	* @a: a #graphene_simd4f_t
220	* @res: (out): return location for the sum vector
221	*
222	* Adds all the row vectors of @a.
223	*
224	* Since: 1.0
225	*/
226	static inline void
227	graphene_simd4x4f_sum (const graphene_simd4x4f_t *a,
228	graphene_simd4f_t *res)
229	{
230	graphene_simd4f_t s = graphene_simd4f_add (a->x, a->y);
231	s = graphene_simd4f_add (s, a->z);
232	s = graphene_simd4f_add (s, a->w);
233	*res = s;
234	}
235
236	/**
237	* graphene_simd4x4f_vec4_mul:
238	* @a: a #graphene_simd4x4f_t
239	* @b: a #graphene_simd4f_t
240	* @res: (out): return location for a #graphene_simd4f_t
241	*
242	* Left multiplies the given #graphene_simd4x4f_t with the given
243	* #graphene_simd4f_t row vector using a dot product:
244	*
245	* \|[<!-- language="plain" -->
246	* res = b × A
247	*
248	* = ⎡x⎤ ⎛ x.x x.y x.z x.w ⎞
249	* ⎜y⎟ ⎜ y.x y.y y.z y.w ⎟
250	* ⎜z⎟ ⎜ z.x z.y z.z z.w ⎟
251	* ⎣w⎦ ⎝ w.x w.y w.z w.w ⎠
252	*
253	* = [ x.x × x x.y × x x.z × x x.w × x ]
254	* + + + +
255	* [ y.x × y y.y × y y.z × y y.w × y ]
256	* + + + +
257	* [ z.x × z z.y × z z.z × z z.w × z ]
258	* + + + +
259	* [ w.x × w w.y × w w.z × w w.w × w ]
260	*
261	* = ⎡ x.x × x + y.x × y + z.x × z + w.x × w ⎤
262	* ⎜ x.y × x + y.y × y + z.y × z + w.y × w ⎟
263	* ⎜ x.z × x + y.z × y + z.z × z + w.z × w ⎟
264	* ⎣ x.w × x + y.w × y + z.w × z + w.w × w ⎦
265	* ]\|
266	*
267	* Since: 1.0
268	*/
269	static inline void
270	graphene_simd4x4f_vec4_mul (const graphene_simd4x4f_t *a,
271	const graphene_simd4f_t *b,
272	graphene_simd4f_t *res)
273	{
274	const graphene_simd4f_t v = *b;
275	const graphene_simd4f_t v_x = graphene_simd4f_splat_x (v);
276	const graphene_simd4f_t v_y = graphene_simd4f_splat_y (v);
277	const graphene_simd4f_t v_z = graphene_simd4f_splat_z (v);
278	const graphene_simd4f_t v_w = graphene_simd4f_splat_w (v);
279
280	*res = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (a->x, v_x),
281	graphene_simd4f_mul (a->y, v_y)),
282	graphene_simd4f_add (graphene_simd4f_mul (a->z, v_z),
283	graphene_simd4f_mul (a->w, v_w)));
284	}
285
286	/**
287	* graphene_simd4x4f_vec3_mul:
288	* @m: a #graphene_simd4x4f_t
289	* @v: a #graphene_simd4f_t
290	* @res: (out): return location for a #graphene_simd4f_t
291	*
292	* Left multiplies the given #graphene_simd4x4f_t with the given
293	* #graphene_simd4f_t, using only the first three row vectors
294	* of the matrix, and the first three components of the vector;
295	* the W components of the matrix and vector are ignored:
296	*
297	* \|[<!-- language="plain" -->
298	* res = b × A
299	*
300	* = ⎡x⎤ ⎛ x.x x.y x.z ⎞
301	* ⎜y⎟ ⎜ y.x y.y y.z ⎟
302	* ⎣z⎦ ⎝ z.x z.y z.z ⎠
303	*
304	* = [ x.x × x x.y × x x.z × x ]
305	* + + +
306	* [ y.x × y y.y × y y.z × y ]
307	* + + +
308	* [ z.x × z z.y × z z.z × z ]
309	*
310	* = ⎡ x.x × x + y.x × y + z.x × z ⎤
311	* ⎜ x.y × x + y.y × y + z.y × z ⎟
312	* ⎜ x.z × x + y.z × y + z.z × z ⎟
313	* ⎣ 0 ⎦
314	* ]\|
315	*
316	* See also: graphene_simd4x4f_vec4_mul(), graphene_simd4x4f_point3_mul()
317	*
318	* Since: 1.0
319	*/
320	static inline void
321	graphene_simd4x4f_vec3_mul (const graphene_simd4x4f_t *m,
322	const graphene_simd4f_t *v,
323	graphene_simd4f_t *res)
324	{
325	const graphene_simd4f_t v_x = graphene_simd4f_splat_x (*v);
326	const graphene_simd4f_t v_y = graphene_simd4f_splat_y (*v);
327	const graphene_simd4f_t v_z = graphene_simd4f_splat_z (*v);
328	graphene_simd4f_t r;
329
330	r = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (m->x, v_x),
331	graphene_simd4f_mul (m->y, v_y)),
332	graphene_simd4f_mul (m->z, v_z));
333	*res = graphene_simd4f_zero_w (r);
334	}
335
336	/**
337	* graphene_simd4x4f_point3_mul:
338	* @m: a #graphene_simd4x4f_t
339	* @p: a #graphene_simd4f_t
340	* @res: (out): return location for a #graphene_simd4f_t
341	*
342	* Multiplies the given #graphene_simd4x4f_t with the given
343	* #graphene_simd4f_t.
344	*
345	* Unlike graphene_simd4x4f_vec3_mul(), this function will
346	* use the W components of the matrix:
347	*
348	* \|[<!-- language="plain" -->
349	* res = b × A
350	*
351	* = ⎡x⎤ ⎛ x.x x.y x.z x.w ⎞
352	* ⎜y⎟ ⎜ y.x y.y y.z y.w ⎟
353	* ⎜z⎟ ⎜ z.x z.y z.z z.w ⎟
354	* ⎣w⎦ ⎝ w.x w.y w.z w.w ⎠
355	*
356	* = [ x.x × x x.y × x x.z × x x.w × x ]
357	* + + + +
358	* [ y.x × y y.y × y y.z × y y.w × y ]
359	* + + + +
360	* [ z.x × z z.y × z z.z × z z.w × z ]
361	* + + + +
362	* [ w.x w.y w.z w.w ]
363	*
364	* = ⎡ x.x × x + y.x × y + z.x × z + w.x ⎤
365	* ⎜ x.y × x + y.y × y + z.y × z + w.y ⎟
366	* ⎜ x.z × x + y.z × y + z.z × z + w.z ⎟
367	* ⎣ x.w × x + y.w × y + z.w × z + w.w ⎦
368	* ]\|
369	*
370	* Since: 1.0
371	*/
372	static inline void
373	graphene_simd4x4f_point3_mul (const graphene_simd4x4f_t *m,
374	const graphene_simd4f_t *p,
375	graphene_simd4f_t *res)
376	{
377	const graphene_simd4f_t v = *p;
378	const graphene_simd4f_t v_x = graphene_simd4f_splat_x (v);
379	const graphene_simd4f_t v_y = graphene_simd4f_splat_y (v);
380	const graphene_simd4f_t v_z = graphene_simd4f_splat_z (v);
381
382	*res = graphene_simd4f_add (graphene_simd4f_add (graphene_simd4f_mul (m->x, v_x),
383	graphene_simd4f_mul (m->y, v_y)),
384	graphene_simd4f_add (graphene_simd4f_mul (m->z, v_z),
385	m->w));
386	}
387
388	/**
389	* graphene_simd4x4f_transpose:
390	* @s: a #graphene_simd4x4f_t
391	* @res: (out): return location for the transposed matrix
392	*
393	* Transposes the given #graphene_simd4x4f_t.
394	*
395	* Since: 1.0
396	*/
397	static inline void
398	graphene_simd4x4f_transpose (const graphene_simd4x4f_t *s,
399	graphene_simd4x4f_t *res)
400	{
401	res = s;
402	graphene_simd4x4f_transpose_in_place (res);
403	}
404
405	/**
406	* graphene_simd4x4f_inv_ortho_vec3_mul:
407	* @a: a #graphene_simd4x4f_t
408	* @b: a #graphene_simd4f_t
409	* @res: (out): return location for the transformed vector
410	*
411	* Performs the inverse orthographic transformation of the first
412	* three components in the given vector, using the first three
413	* row vectors of the given SIMD matrix.
414	*
415	* Since: 1.0
416	*/
417	static inline void
418	graphene_simd4x4f_inv_ortho_vec3_mul (const graphene_simd4x4f_t *a,
419	const graphene_simd4f_t *b,
420	graphene_simd4f_t *res)
421	{
422	graphene_simd4x4f_t transpose = *a;
423	graphene_simd4f_t translation = *b;
424
425	transpose.w = graphene_simd4f_init (`0.f`, `0.f`, `0.f`, `0.f`);
426	graphene_simd4x4f_transpose_in_place (&transpose);
427
428	graphene_simd4x4f_vec3_mul (m: &transpose, v: &translation, res);
429	}
430
431	/**
432	* graphene_simd4x4f_inv_ortho_point3_mul:
433	* @a: a #graphene_simd4x4f_t
434	* @b: a #graphene_simd4x4f_t
435	* @res: (out): return location for the result vector
436	*
437	* Performs the inverse orthographic transformation of the first
438	* three components in the given vector, using the given SIMD
439	* matrix.
440	*
441	* Unlike graphene_simd4x4f_inv_ortho_vec3_mul(), this function
442	* will also use the fourth row vector of the SIMD matrix.
443	*
444	* Since: 1.0
445	*/
446	static inline void
447	graphene_simd4x4f_inv_ortho_point3_mul (const graphene_simd4x4f_t *a,
448	const graphene_simd4f_t *b,
449	graphene_simd4f_t *res)
450	{
451	graphene_simd4f_t translation = graphene_simd4f_sub (*b, a->w);
452	graphene_simd4x4f_t transpose = *a;
453
454	transpose.w = graphene_simd4f_init (`0.f`, `0.f`, `0.f`, `0.f`);
455	graphene_simd4x4f_transpose_in_place (&transpose);
456
457	graphene_simd4x4f_point3_mul (m: &transpose, p: &translation, res);
458	}
459
460	/**
461	* graphene_simd4x4f_matrix_mul:
462	* @a: a #graphene_simd4x4f_t
463	* @b: a #graphene_simd4x4f_t
464	* @res: (out): return location for the result
465	*
466	* Multiplies the two matrices, following the convention:
467	*
468	* \|[<!-- language="plain" -->
469	* res = A × B
470	*
471	* = ⎡ A.x × B ⎤
472	* ⎜ A.y × B ⎟
473	* ⎜ A.z × B ⎟
474	* ⎣ A.w × B ⎦
475	*
476	* = ⎡ res.x ⎤
477	* ⎜ res.y ⎟
478	* ⎜ res.z ⎟
479	* ⎣ res.w ⎦
480	* ]\|
481	*
482	* See also: graphene_simd4x4f_vec4_mul()
483	*
484	* Since: 1.0
485	*/
486	static inline void
487	graphene_simd4x4f_matrix_mul (const graphene_simd4x4f_t *a,
488	const graphene_simd4x4f_t *b,
489	graphene_simd4x4f_t *res)
490	{
491	#if 0
492	/ this is the classic naive AB implementation of the row column*
493	* matrix product. using a SIMD scalar implementation, it's fairly
494	* slow at 329ns per multiplication; the SSE implementation makes it
495	* about 10x faster, at 32ns; the GCC vector implementation is only
496	* 5x faster, at 66ns. the biggest culprits are the transpose operation
497	* and the multiple, one lane reads to compute the scalar sum.
498	*/
499	graphene_simd4x4f_t t;
500
501	graphene_simd4x4f_transpose (b, &t);
502
503	res->x =
504	graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.x)),
505	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.y)),
506	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.z)),
507	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->x, t.w)));
508
509	res->y =
510	graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.x)),
511	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.y)),
512	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.z)),
513	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->y, t.w)));
514
515	res->z =
516	graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.x)),
517	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.y)),
518	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.z)),
519	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->z, t.w)));
520
521	res->w =
522	graphene_simd4f_init (graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.x)),
523	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.y)),
524	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.z)),
525	graphene_simd4f_sum_scalar (graphene_simd4f_mul (a->w, t.w)));
526	#else
527	/ this is an optimized version of the matrix multiplication, using*
528	* four dot products for each row vector. this yields drastically
529	* better numbers while retaining the same correct results as above:
530	* the scalar implementation now clocks at 91ns; the GCC vector
531	* implementation is 19ns; and the SSE implementation is 16ns.
532	*
533	* the order is correct if we want to multiply A with B; remember
534	* that matrix multiplication is non-commutative.
535	*/
536	graphene_simd4f_t x, y, z, w;
537
538	graphene_simd4x4f_vec4_mul (a: b, b: &a->x, res: &x);
539	graphene_simd4x4f_vec4_mul (a: b, b: &a->y, res: &y);
540	graphene_simd4x4f_vec4_mul (a: b, b: &a->z, res: &z);
541	graphene_simd4x4f_vec4_mul (a: b, b: &a->w, res: &w);
542
543	*res = graphene_simd4x4f_init (x, y, z, w);
544	#endif
545	}
546
547	/**
548	* graphene_simd4x4f_init_perspective:
549	* @m: a #graphene_simd4x4f_t
550	* @fovy_rad: the angle of the field of vision, in radians
551	* @aspect: the aspect value
552	* @z_near: the depth of the near clipping plane
553	* @z_far: the depth of the far clipping plane
554	*
555	* Initializes a #graphene_simd4x4f_t with a perspective projection.
556	*
557	* Since: 1.0
558	*/
559	static inline void
560	graphene_simd4x4f_init_perspective (graphene_simd4x4f_t *m,
561	float fovy_rad,
562	float aspect,
563	float z_near,
564	float z_far)
565	{
566	float delta_z = z_far - z_near;
567	float cotangent = tanf (GRAPHENE_PI_2 - fovy_rad * `0.5f`);
568
569	float a = cotangent / aspect;
570	float b = cotangent;
571	float c = -(z_far + z_near) / delta_z;
572	float d = -`2` * z_near * z_far / delta_z;
573
574	m->x = graphene_simd4f_init ( a, `0.0f`, `0.0f`, `0.0f`);
575	m->y = graphene_simd4f_init (`0.0f`, b, `0.0f`, `0.0f`);
576	m->z = graphene_simd4f_init (`0.0f`, `0.0f`, c, -`1.0f`);
577	m->w = graphene_simd4f_init (`0.0f`, `0.0f`, d, `0.0f`);
578	}
579
580	/**
581	* graphene_simd4x4f_init_ortho:
582	* @m: a #graphene_simd4x4f_t
583	* @left: edge of the left clipping plane
584	* @right: edge of the right clipping plane
585	* @bottom: edge of the bottom clipping plane
586	* @top: edge of the top clipping plane
587	* @z_near: depth of the near clipping plane
588	* @z_far: depth of the far clipping plane
589	*
590	* Initializes the given SIMD matrix with an orthographic projection.
591	*
592	* Since: 1.0
593	*/
594	static inline void
595	graphene_simd4x4f_init_ortho (graphene_simd4x4f_t *m,
596	float left,
597	float right,
598	float bottom,
599	float top,
600	float z_near,
601	float z_far)
602	{
603	float delta_x = right - left;
604	float delta_y = top - bottom;
605	float delta_z = z_far - z_near;
606
607	float a = `2.0f` / delta_x;
608	float b = -(right + left) / delta_x;
609	float c = `2.0f` / delta_y;
610	float d = -(top + bottom) / delta_y;
611	float e = -`2.0f` / delta_z;
612	float f = -(z_far + z_near) / delta_z;
613
614	m->x = graphene_simd4f_init ( a, `0.0f`, `0.0f`, `0.0f`);
615	m->y = graphene_simd4f_init (`0.0f`, c, `0.0f`, `0.0f`);
616	m->z = graphene_simd4f_init (`0.0f`, `0.0f`, e, `0.0f`);
617	m->w = graphene_simd4f_init ( b, d, f, `1.0f`);
618	}
619
620	/**
621	* graphene_simd4x4f_init_look_at:
622	* @m: a #graphene_simd4x4f_t
623	* @eye: vector for the camera coordinates
624	* @center: vector for the object coordinates
625	* @up: vector for the upwards direction
626	*
627	* Initializes a SIMD matrix with the projection necessary for
628	* the camera at the @eye coordinates to look at the object at
629	* the @center coordinates. The top of the camera is aligned to
630	* the @up vector.
631	*
632	* Since: 1.0
633	*/
634	static inline void
635	graphene_simd4x4f_init_look_at (graphene_simd4x4f_t *m,
636	graphene_simd4f_t eye,
637	graphene_simd4f_t center,
638	graphene_simd4f_t up)
639	{
640	const graphene_simd4f_t direction = graphene_simd4f_sub (center, eye);
641	graphene_simd4f_t cross;
642	graphene_simd4f_t z_axis;
643	graphene_simd4f_t x_axis;
644	graphene_simd4f_t y_axis;
645	float eye_v[`4`];
646
647	if (graphene_simd4f_get_x (graphene_simd4f_dot3 (direction, direction)) < FLT_EPSILON)
648	/ eye and center are in the same position /
649	z_axis = graphene_simd4f_init (`0`, `0`, `1`, `0`);
650	else
651	z_axis = graphene_simd4f_normalize3 (v: direction);
652
653	cross = graphene_simd4f_cross3 (z_axis, up);
654	if (graphene_simd4f_get_x (graphene_simd4f_dot3 (cross, cross)) < FLT_EPSILON)
655	{
656	graphene_simd4f_t tweak_z;
657
658	/ up and z_axis are parallel /
659	if (fabs (graphene_simd4f_get_z (up) - `1.0`) < FLT_EPSILON)
660	tweak_z = graphene_simd4f_init (`0.0001f`, `0`, `0`, `0`);
661	else
662	tweak_z = graphene_simd4f_init (`0`, `0`, `0.0001f`, `0`);
663
664	z_axis = graphene_simd4f_add (z_axis, tweak_z);
665	z_axis = graphene_simd4f_normalize3 (v: z_axis);
666	cross = graphene_simd4f_cross3 (z_axis, up);
667	}
668
669	x_axis = graphene_simd4f_normalize3 (v: cross);
670	y_axis = graphene_simd4f_cross3 (x_axis, z_axis);
671
672	graphene_simd4f_dup_4f (eye, eye_v);
673
674	m->x = x_axis;
675	m->y = y_axis;
676	m->z = graphene_simd4f_neg (z_axis);
677	m->w = graphene_simd4f_init (-eye_v[`0`], -eye_v[`1`], -eye_v[`2`], `1.f`);
678	}
679
680	/**
681	* graphene_simd4x4f_init_frustum:
682	* @m: a #graphene_simd4x4f_t
683	* @left: distance of the left clipping plane
684	* @right: distance of the right clipping plane
685	* @bottom: distance of the bottom clipping plane
686	* @top: distance of the top clipping plane
687	* @z_near: distance of the near clipping plane
688	* @z_far: distance of the far clipping plane
689	*
690	* Initializes a SIMD matrix with a frustum described by the distances
691	* of six clipping planes.
692	*
693	* Since: 1.2
694	*/
695	static inline void
696	graphene_simd4x4f_init_frustum (graphene_simd4x4f_t *m,
697	float left,
698	float right,
699	float bottom,
700	float top,
701	float z_near,
702	float z_far)
703	{
704	float x = `2.f` * z_near / (right - left);
705	float y = `2.f` * z_near / (top - bottom);
706
707	float a = (right + left) / (right - left);
708	float b = (top + bottom) / (top - bottom);
709	float c = -`1.f` * (z_far + z_near) / (z_far - z_near);
710	float d = -`2.f` * z_far * z_near / (z_far - z_near);
711
712	m->x = graphene_simd4f_init ( x, `0.f`, `0.f`, `0.f`);
713	m->y = graphene_simd4f_init (`0.f`, y, `0.f`, `0.f`);
714	m->z = graphene_simd4f_init ( a, b, c, -`1.f`);
715	m->w = graphene_simd4f_init (`0.f`, `0.f`, d, `0.f`);
716	}
717
718	/**
719	* graphene_simd4x4f_perspective:
720	* @m: a #graphene_simd4x4f_t
721	* @depth: depth of the perspective
722	*
723	* Adds a perspective transformation for the given @depth.
724	*
725	* Since: 1.0
726	*/
727	static inline void
728	graphene_simd4x4f_perspective (graphene_simd4x4f_t *m,
729	float depth)
730	{
731	#if 1
732	const float m_xw = graphene_simd4f_get_w (m->x);
733	const float m_yw = graphene_simd4f_get_w (m->y);
734	const float m_zw = graphene_simd4f_get_w (m->z);
735	const float m_ww = graphene_simd4f_get_w (m->w);
736
737	const float p0 = graphene_simd4f_get_z (m->x) + -`1.0f` / depth * m_xw;
738	const float p1 = graphene_simd4f_get_z (m->y) + -`1.0f` / depth * m_yw;
739	const float p2 = graphene_simd4f_get_z (m->z) + -`1.0f` / depth * m_zw;
740	const float p3 = graphene_simd4f_get_z (m->w) + -`1.0f` / depth * m_ww;
741
742	const graphene_simd4f_t p_x = graphene_simd4f_merge_w (m->x, m_xw + p0);
743	const graphene_simd4f_t p_y = graphene_simd4f_merge_w (m->y, m_yw + p1);
744	const graphene_simd4f_t p_z = graphene_simd4f_merge_w (m->z, m_zw + p2);
745	const graphene_simd4f_t p_w = graphene_simd4f_merge_w (m->w, m_ww + p3);
746	#else
747	/ this is equivalent to the operations above, but trying to inline*
748	* them into SIMD registers as much as possible by transposing the
749	* original matrix and operating on the resulting column vectors. it
750	* should warrant a micro benchmark, because while the above code is
751	* dominated by single channel reads, the code below has a transpose
752	* operation.
753	*/
754	graphene_simd4x4f_t t;
755	const graphene_simd4f_t f, p;
756	const graphene_simd4f_t p_x, p_y, p_z, p_w;
757
758	graphene_simd4x4f_transpose (m, &t);
759
760	f = graphene_simd4f_neg (graphene_simd4f_reciprocal (graphene_simd4f_splat (depth)));
761	p = graphene_simd4f_sum (t.w, graphene_simd4f_sum (t.z, graphene_simd4f_mul (f, t.w)));
762	p_x = graphene_simd4f_merge_w (m->x, graphene_simd4f_get_x (p));
763	p_y = graphene_simd4f_merge_w (m->y, graphene_simd4f_get_y (p));
764	p_z = graphene_simd4f_merge_w (m->z, graphene_simd4f_get_z (p));
765	p_w = graphene_simd4f_merge_w (m->w, graphene_simd4f_get_w (p));
766	#endif
767
768	*m = graphene_simd4x4f_init (x: p_x, y: p_y, z: p_z, w: p_w);
769	}
770
771	/**
772	* graphene_simd4x4f_translation:
773	* @m: a #graphene_simd4x4f_t
774	* @x: coordinate of the X translation
775	* @y: coordinate of the Y translation
776	* @z: coordinate of the Z translation
777	*
778	* Initializes @m to contain a translation to the given coordinates.
779	*
780	* Since: 1.0
781	*/
782	static inline void
783	graphene_simd4x4f_translation (graphene_simd4x4f_t *m,
784	float x,
785	float y,
786	float z)
787	{
788	*m = graphene_simd4x4f_init (graphene_simd4f_init (`1.0f`, `0.0f`, `0.0f`, `0.0f`),
789	graphene_simd4f_init (`0.0f`, `1.0f`, `0.0f`, `0.0f`),
790	graphene_simd4f_init (`0.0f`, `0.0f`, `1.0f`, `0.0f`),
791	graphene_simd4f_init ( x, y, z, `1.0f`));
792	}
793
794	/**
795	* graphene_simd4x4f_scale:
796	* @m: a #graphene_simd4x4f_t
797	* @x: scaling factor on the X axis
798	* @y: scaling factor on the Y axis
799	* @z: scaling factor on the Z axis
800	*
801	* Initializes @m to contain a scaling transformation with the
802	* given factors.
803	*
804	* Since: 1.0
805	*/
806	static inline void
807	graphene_simd4x4f_scale (graphene_simd4x4f_t *m,
808	float x,
809	float y,
810	float z)
811	{
812	*m = graphene_simd4x4f_init (graphene_simd4f_init ( x, `0.0f`, `0.0f`, `0.0f`),
813	graphene_simd4f_init (`0.0f`, y, `0.0f`, `0.0f`),
814	graphene_simd4f_init (`0.0f`, `0.0f`, z, `0.0f`),
815	graphene_simd4f_init (`0.0f`, `0.0f`, `0.0f`, `1.0f`));
816
817	}
818
819	/**
820	* graphene_simd4x4f_rotation:
821	* @m: a #graphene_simd4x4f_t
822	* @rad: the rotation, in radians
823	* @axis: the vector of the axis of rotation
824	*
825	* Initializes @m to contain a rotation of the given angle
826	* along the given axis.
827	*
828	* Since: 1.0
829	*/
830	static inline void
831	graphene_simd4x4f_rotation (graphene_simd4x4f_t *m,
832	float rad,
833	graphene_simd4f_t axis)
834	{
835	float sine, cosine;
836	float x, y, z;
837	float ab, bc, ca;
838	float tx, ty, tz;
839	graphene_simd4f_t i, j, k;
840
841	rad = -rad;
842	axis = graphene_simd4f_normalize3 (v: axis);
843
844	/ We cannot use graphene_sincos() because it's a private function, whereas*
845	* graphene-simd4x4f.h is a public header
846	*/
847	sine = sinf (x: rad);
848	cosine = cosf (x: rad);
849
850	x = graphene_simd4f_get_x (axis);
851	y = graphene_simd4f_get_y (axis);
852	z = graphene_simd4f_get_z (axis);
853
854	ab = x * y * (`1.0f` - cosine);
855	bc = y * z * (`1.0f` - cosine);
856	ca = z * x * (`1.0f` - cosine);
857
858	tx = x * x;
859	ty = y * y;
860	tz = z * z;
861
862	i = graphene_simd4f_init (tx + cosine * (`1.0f` - tx), ab - z * sine, ca + y * sine, `0.f`);
863	j = graphene_simd4f_init (ab + z * sine, ty + cosine * (`1.0f` - ty), bc - x * sine, `0.f`);
864	k = graphene_simd4f_init (ca - y * sine, bc + x * sine, tz + cosine * (`1.0f` - tz), `0.f`);
865
866	*m = graphene_simd4x4f_init (x: i, y: j, z: k, graphene_simd4f_init (`0.0f`, `0.0f`, `0.0f`, `1.0f`));
867	}
868
869	/**
870	* graphene_simd4x4f_add:
871	* @a: a #graphene_simd4x4f_t
872	* @b: a #graphene_simd4x4f_t
873	* @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
874	*
875	* Adds each row vector of @a and @b and places the results in @res.
876	*
877	* Since: 1.0
878	*/
879	static inline void
880	graphene_simd4x4f_add (const graphene_simd4x4f_t *a,
881	const graphene_simd4x4f_t *b,
882	graphene_simd4x4f_t *res)
883	{
884	res->x = graphene_simd4f_add (a->x, b->x);
885	res->y = graphene_simd4f_add (a->y, b->y);
886	res->z = graphene_simd4f_add (a->z, b->z);
887	res->w = graphene_simd4f_add (a->w, b->w);
888	}
889
890	/**
891	* graphene_simd4x4f_sub:
892	* @a: a #graphene_simd4x4f_t
893	* @b: a #graphene_simd4x4f_t
894	* @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
895	*
896	* Subtracts each row vector of @a and @b and places the results in @res.
897	*
898	* Since: 1.0
899	*/
900	static inline void
901	graphene_simd4x4f_sub (const graphene_simd4x4f_t *a,
902	const graphene_simd4x4f_t *b,
903	graphene_simd4x4f_t *res)
904	{
905	res->x = graphene_simd4f_sub (a->x, b->x);
906	res->y = graphene_simd4f_sub (a->y, b->y);
907	res->z = graphene_simd4f_sub (a->z, b->z);
908	res->w = graphene_simd4f_sub (a->w, b->w);
909	}
910
911	/**
912	* graphene_simd4x4f_mul:
913	* @a: a #graphene_simd4x4f_t
914	* @b: a #graphene_simd4x4f_t
915	* @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
916	*
917	* Multiplies each row vector of @a and @b and places the results in @res.
918	*
919	* You most likely want graphene_simd4x4f_matrix_mul() instead.
920	*
921	* Since: 1.0
922	*/
923	static inline void
924	graphene_simd4x4f_mul (const graphene_simd4x4f_t *a,
925	const graphene_simd4x4f_t *b,
926	graphene_simd4x4f_t *res)
927	{
928	res->x = graphene_simd4f_mul (a->x, b->x);
929	res->y = graphene_simd4f_mul (a->y, b->y);
930	res->z = graphene_simd4f_mul (a->z, b->z);
931	res->w = graphene_simd4f_mul (a->w, b->w);
932	}
933
934	/**
935	* graphene_simd4x4f_div:
936	* @a: a #graphene_simd4x4f_t
937	* @b: a #graphene_simd4x4f_t
938	* @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
939	*
940	* Divides each row vector of @a and @b and places the results in @res.
941	*
942	* Since: 1.0
943	*/
944	static inline void
945	graphene_simd4x4f_div (const graphene_simd4x4f_t *a,
946	const graphene_simd4x4f_t *b,
947	graphene_simd4x4f_t *res)
948	{
949	res->x = graphene_simd4f_div (a->x, b->x);
950	res->y = graphene_simd4f_div (a->y, b->y);
951	res->z = graphene_simd4f_div (a->z, b->z);
952	res->w = graphene_simd4f_div (a->w, b->w);
953	}
954
955	/**
956	* graphene_simd4x4f_inverse:
957	* @m: a #graphene_simd4x4f_t
958	* @res: (out): return location for the inverse matrix
959	*
960	* Inverts the given #graphene_simd4x4f_t.
961	*
962	* Returns: `true` if the matrix was invertible
963	*
964	* Since: 1.0
965	*/
966	static inline bool
967	graphene_simd4x4f_inverse (const graphene_simd4x4f_t *m,
968	graphene_simd4x4f_t *res)
969	{
970	/ split rows /
971	const graphene_simd4f_t r0 = m->x;
972	const graphene_simd4f_t r1 = m->y;
973	const graphene_simd4f_t r2 = m->z;
974	const graphene_simd4f_t r3 = m->w;
975
976	/ cofactors /
977	const graphene_simd4f_t r0_wxyz = graphene_simd4f_shuffle_wxyz (r0);
978	const graphene_simd4f_t r0_zwxy = graphene_simd4f_shuffle_zwxy (r0);
979	const graphene_simd4f_t r0_yzwx = graphene_simd4f_shuffle_yzwx (r0);
980
981	const graphene_simd4f_t r1_wxyz = graphene_simd4f_shuffle_wxyz (r1);
982	const graphene_simd4f_t r1_zwxy = graphene_simd4f_shuffle_zwxy (r1);
983	const graphene_simd4f_t r1_yzwx = graphene_simd4f_shuffle_yzwx (r1);
984
985	const graphene_simd4f_t r2_wxyz = graphene_simd4f_shuffle_wxyz (r2);
986	const graphene_simd4f_t r2_zwxy = graphene_simd4f_shuffle_zwxy (r2);
987	const graphene_simd4f_t r2_yzwx = graphene_simd4f_shuffle_yzwx (r2);
988
989	const graphene_simd4f_t r3_wxyz = graphene_simd4f_shuffle_wxyz (r3);
990	const graphene_simd4f_t r3_zwxy = graphene_simd4f_shuffle_zwxy (r3);
991	const graphene_simd4f_t r3_yzwx = graphene_simd4f_shuffle_yzwx (r3);
992
993	const graphene_simd4f_t r0_wxyz_x_r1 = graphene_simd4f_mul (r0_wxyz, r1);
994	const graphene_simd4f_t r0_wxyz_x_r1_yzwx = graphene_simd4f_mul (r0_wxyz, r1_yzwx);
995	const graphene_simd4f_t r0_wxyz_x_r1_zwxy = graphene_simd4f_mul (r0_wxyz, r1_zwxy);
996
997	const graphene_simd4f_t r2_wxyz_x_r3 = graphene_simd4f_mul (r2_wxyz, r3);
998	const graphene_simd4f_t r2_wxyz_x_r3_yzwx = graphene_simd4f_mul (r2_wxyz, r3_yzwx);
999	const graphene_simd4f_t r2_wxyz_x_r3_zwxy = graphene_simd4f_mul (r2_wxyz, r3_zwxy);
1000
1001	const graphene_simd4f_t ar1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3_zwxy),
1002	graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3));
1003	const graphene_simd4f_t ar2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3_yzwx),
1004	r2_wxyz_x_r3_yzwx);
1005	const graphene_simd4f_t ar3 = graphene_simd4f_sub (r2_wxyz_x_r3_zwxy,
1006	graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3));
1007
1008	const graphene_simd4f_t br1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r0_wxyz_x_r1_zwxy),
1009	graphene_simd4f_shuffle_zwxy (r0_wxyz_x_r1));
1010	const graphene_simd4f_t br2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r0_wxyz_x_r1_yzwx),
1011	r0_wxyz_x_r1_yzwx);
1012	const graphene_simd4f_t br3 = graphene_simd4f_sub (r0_wxyz_x_r1_zwxy,
1013	graphene_simd4f_shuffle_wxyz (r0_wxyz_x_r1));
1014
1015	const graphene_simd4f_t r0_sum =
1016	graphene_simd4f_madd (m1: r0_yzwx, m2: ar3,
1017	a: graphene_simd4f_madd (m1: r0_zwxy, m2: ar2,
1018	graphene_simd4f_mul (r0_wxyz, ar1)));
1019	const graphene_simd4f_t r1_sum =
1020	graphene_simd4f_madd (m1: r1_wxyz, m2: ar1,
1021	a: graphene_simd4f_madd (m1: r1_zwxy, m2: ar2,
1022	graphene_simd4f_mul (r1_yzwx, ar3)));
1023	const graphene_simd4f_t r2_sum =
1024	graphene_simd4f_madd (m1: r2_yzwx, m2: br3,
1025	a: graphene_simd4f_madd (m1: r2_zwxy, m2: br2,
1026	graphene_simd4f_mul (r2_wxyz, br1)));
1027	const graphene_simd4f_t r3_sum =
1028	graphene_simd4f_madd (m1: r3_yzwx, m2: br3,
1029	a: graphene_simd4f_madd (m1: r3_zwxy, m2: br2,
1030	graphene_simd4f_mul (r3_wxyz, br1)));
1031
1032	/ determinant and its inverse /
1033	const graphene_simd4f_t d0 = graphene_simd4f_mul (r1_sum, r0);
1034	const graphene_simd4f_t d1 = graphene_simd4f_add (d0, graphene_simd4f_merge_high (d0, d0));
1035	const graphene_simd4f_t det = graphene_simd4f_sub (d1, graphene_simd4f_splat_y (d1));
1036	if (fabsf (graphene_simd4f_get_x (det)) >= FLT_EPSILON)
1037	{
1038	const graphene_simd4f_t invdet = graphene_simd4f_splat_x (graphene_simd4f_div (graphene_simd4f_splat (`1.0f`), det));
1039
1040	const graphene_simd4f_t o0 = graphene_simd4f_mul (graphene_simd4f_flip_sign_0101 (r1_sum), invdet);
1041	const graphene_simd4f_t o1 = graphene_simd4f_mul (graphene_simd4f_flip_sign_1010 (r0_sum), invdet);
1042	const graphene_simd4f_t o2 = graphene_simd4f_mul (graphene_simd4f_flip_sign_0101 (r3_sum), invdet);
1043	const graphene_simd4f_t o3 = graphene_simd4f_mul (graphene_simd4f_flip_sign_1010 (r2_sum), invdet);
1044
1045	graphene_simd4x4f_t mt = graphene_simd4x4f_init (x: o0, y: o1, z: o2, w: o3);
1046
1047	/ transpose the resulting matrix /
1048	graphene_simd4x4f_transpose (s: &mt, res);
1049
1050	return true;
1051	}
1052
1053	return false;
1054	}
1055
1056	/**
1057	* graphene_simd4x4f_determinant:
1058	* @m: a #graphene_simd4x4f_t
1059	* @det_r: (out): return location for the matrix determinant
1060	* @invdet_r: (out): return location for the inverse of the matrix
1061	* determinant
1062	*
1063	* Computes the determinant (and its inverse) of the given matrix
1064	*
1065	* Since: 1.0
1066	*/
1067	static inline void
1068	graphene_simd4x4f_determinant (const graphene_simd4x4f_t *m,
1069	graphene_simd4f_t *det_r,
1070	graphene_simd4f_t *invdet_r)
1071	{
1072	/ split rows /
1073	const graphene_simd4f_t r0 = m->x;
1074	const graphene_simd4f_t r1 = m->y;
1075	const graphene_simd4f_t r2 = m->z;
1076	const graphene_simd4f_t r3 = m->w;
1077
1078	/ cofactors /
1079	const graphene_simd4f_t r1_wxyz = graphene_simd4f_shuffle_wxyz (r1);
1080	const graphene_simd4f_t r1_zwxy = graphene_simd4f_shuffle_zwxy (r1);
1081	const graphene_simd4f_t r1_yzwx = graphene_simd4f_shuffle_yzwx (r1);
1082
1083	const graphene_simd4f_t r2_wxyz = graphene_simd4f_shuffle_wxyz (r2);
1084
1085	const graphene_simd4f_t r3_zwxy = graphene_simd4f_shuffle_zwxy (r3);
1086	const graphene_simd4f_t r3_yzwx = graphene_simd4f_shuffle_yzwx (r3);
1087
1088	const graphene_simd4f_t r2_wxyz_x_r3 = graphene_simd4f_mul (r2_wxyz, r3);
1089	const graphene_simd4f_t r2_wxyz_x_r3_yzwx = graphene_simd4f_mul (r2_wxyz, r3_yzwx);
1090	const graphene_simd4f_t r2_wxyz_x_r3_zwxy = graphene_simd4f_mul (r2_wxyz, r3_zwxy);
1091
1092	const graphene_simd4f_t ar1 = graphene_simd4f_sub (graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3_zwxy),
1093	graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3));
1094	const graphene_simd4f_t ar2 = graphene_simd4f_sub (graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3_yzwx),
1095	r2_wxyz_x_r3_yzwx);
1096	const graphene_simd4f_t ar3 = graphene_simd4f_sub (r2_wxyz_x_r3_zwxy,
1097	graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3));
1098
1099	const graphene_simd4f_t r1_sum =
1100	graphene_simd4f_madd (m1: r1_wxyz, m2: ar1,
1101	a: graphene_simd4f_madd (m1: r1_zwxy, m2: ar2,
1102	graphene_simd4f_mul (r1_yzwx, ar3)));
1103
1104	/ determinant and its inverse /
1105	const graphene_simd4f_t d0 = graphene_simd4f_mul (r1_sum, r0);
1106	const graphene_simd4f_t d1 = graphene_simd4f_add (d0, graphene_simd4f_merge_high (d0, d0));
1107
1108	const graphene_simd4f_t det = graphene_simd4f_sub (d1, graphene_simd4f_splat_y (d1));
1109
1110	const graphene_simd4f_t invdet = graphene_simd4f_splat_x (graphene_simd4f_div (graphene_simd4f_splat (`1.0f`), det));
1111
1112	if (det_r != NULL)
1113	*det_r = det;
1114
1115	if (invdet_r != NULL)
1116	*invdet_r = invdet;
1117	}
1118
1119	/**
1120	* graphene_simd4x4f_is_identity:
1121	* @m: a #graphene_simd4x4f_t
1122	*
1123	* Checks whether the given matrix is the identity matrix.
1124	*
1125	* Returns: `true` if the matrix is the identity matrix
1126	*
1127	* Since: 1.0
1128	*/
1129	static inline bool
1130	graphene_simd4x4f_is_identity (const graphene_simd4x4f_t *m)
1131	{
1132	const graphene_simd4f_t r0 = graphene_simd4f_init (`1.0f`, `0.0f`, `0.0f`, `0.0f`);
1133	const graphene_simd4f_t r1 = graphene_simd4f_init (`0.0f`, `1.0f`, `0.0f`, `0.0f`);
1134	const graphene_simd4f_t r2 = graphene_simd4f_init (`0.0f`, `0.0f`, `1.0f`, `0.0f`);
1135	const graphene_simd4f_t r3 = graphene_simd4f_init (`0.0f`, `0.0f`, `0.0f`, `1.0f`);
1136
1137	return graphene_simd4f_cmp_eq (m->x, r0) &&
1138	graphene_simd4f_cmp_eq (m->y, r1) &&
1139	graphene_simd4f_cmp_eq (m->z, r2) &&
1140	graphene_simd4f_cmp_eq (m->w, r3);
1141	}
1142
1143	/**
1144	* graphene_simd4x4f_is_2d:
1145	* @m: a #graphene_simd4x4f_t
1146	*
1147	* Checks whether the given matrix is compatible with an affine
1148	* transformation matrix.
1149	*
1150	* Returns: `true` if the matrix is compatible with an affine
1151	* transformation matrix
1152	*
1153	* Since: 1.0
1154	*/
1155	static inline bool
1156	graphene_simd4x4f_is_2d (const graphene_simd4x4f_t *m)
1157	{
1158	float f[`4`];
1159
1160	if (!(fabsf (graphene_simd4f_get_z (m->x)) < FLT_EPSILON && fabsf (graphene_simd4f_get_w (m->x)) < FLT_EPSILON))
1161	return false;
1162
1163	if (!(fabsf (graphene_simd4f_get_z (m->y)) < FLT_EPSILON && fabsf (graphene_simd4f_get_w (m->y)) < FLT_EPSILON))
1164	return false;
1165
1166	graphene_simd4f_dup_4f (m->z, f);
1167	if (!(fabsf (x: f[`0`]) < FLT_EPSILON &&
1168	fabsf (x: f[`1`]) < FLT_EPSILON &&
1169	`1.f` - fabsf (x: f[`2`]) < FLT_EPSILON &&
1170	fabsf (x: f[`3`]) < FLT_EPSILON))
1171	return false;
1172
1173	if (!(fabsf (graphene_simd4f_get_z (m->w)) < FLT_EPSILON && `1.f` - fabsf (graphene_simd4f_get_w (m->w)) < FLT_EPSILON))
1174	return false;
1175
1176	return true;
1177	}
1178
1179	GRAPHENE_END_DECLS
1180

source code of gtk/subprojects/graphene/include/graphene-simd4x4f.h