v_math.h source code [glibc/sysdeps/aarch64/fpu/v_math.h]

1	/ Utilities for Advanced SIMD libmvec routines.*
2	Copyright (C) 2023-2024 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#ifndef _V_MATH_H
20	#define _V_MATH_H
21
22	#include <arm_neon.h>
23	#include "vecmath_config.h"
24
25	#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
26
27	#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
28	#define V_NAME_D1(fun) _ZGVnN2v_##fun
29	#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
30	#define V_NAME_D2(fun) _ZGVnN2vv_##fun
31
32	#include "advsimd_f32_protos.h"
33
34	#define HALF_WIDTH_ALIAS_F1(fun) \
35	float32x2_t VPCS_ATTR _ZGVnN2v_##fun##f (float32x2_t x) \
36	{ \
37	return vget_low_f32 (_ZGVnN4v_##fun##f (vcombine_f32 (x, x))); \
38	}
39
40	#define HALF_WIDTH_ALIAS_F2(fun) \
41	float32x2_t VPCS_ATTR _ZGVnN2vv_##fun##f (float32x2_t x, float32x2_t y) \
42	{ \
43	return vget_low_f32 ( \
44	_ZGVnN4vv_##fun##f (vcombine_f32 (x, x), vcombine_f32 (y, y))); \
45	}
46
47	/ Shorthand helpers for declaring constants. /
48	#define V2(X) { X, X }
49	#define V4(X) { X, X, X, X }
50	#define V8(X) { X, X, X, X, X, X, X, X }
51
52	static inline int
53	v_any_u16h (uint16x4_t x)
54	{
55	return vget_lane_u64 (vreinterpret_u64_u16 (x), `0`) != `0`;
56	}
57
58	static inline float32x4_t
59	v_f32 (float x)
60	{
61	return (float32x4_t) V4 (x);
62	}
63	static inline uint32x4_t
64	v_u32 (uint32_t x)
65	{
66	return (uint32x4_t) V4 (x);
67	}
68	static inline int32x4_t
69	v_s32 (int32_t x)
70	{
71	return (int32x4_t) V4 (x);
72	}
73
74	/ true if any elements of a vector compare result is non-zero. /
75	static inline int
76	v_any_u32 (uint32x4_t x)
77	{
78	/ assume elements in x are either 0 or -1u. /
79	return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != `0`;
80	}
81	static inline int
82	v_any_u32h (uint32x2_t x)
83	{
84	return vget_lane_u64 (vreinterpret_u64_u32 (x), `0`) != `0`;
85	}
86	static inline float32x4_t
87	v_lookup_f32 (const float *tab, uint32x4_t idx)
88	{
89	return (float32x4_t){ tab[idx[`0`]], tab[idx[`1`]], tab[idx[`2`]], tab[idx[`3`]] };
90	}
91	static inline uint32x4_t
92	v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
93	{
94	return (uint32x4_t){ tab[idx[`0`]], tab[idx[`1`]], tab[idx[`2`]], tab[idx[`3`]] };
95	}
96	static inline float32x4_t
97	v_call_f32 (float (f) (float*), float32x4_t x, float32x4_t y, uint32x4_t p)
98	{
99	return (float32x4_t){ p[`0`] ? f (x[`0`]) : y[`0`], p[`1`] ? f (x[`1`]) : y[`1`],
100	p[`2`] ? f (x[`2`]) : y[`2`], p[`3`] ? f (x[`3`]) : y[`3`] };
101	}
102	static inline float32x4_t
103	v_call2_f32 (float (f) (float, float*), float32x4_t x1, float32x4_t x2,
104	float32x4_t y, uint32x4_t p)
105	{
106	return (float32x4_t){ p[`0`] ? f (x1[`0`], x2[`0`]) : y[`0`],
107	p[`1`] ? f (x1[`1`], x2[`1`]) : y[`1`],
108	p[`2`] ? f (x1[`2`], x2[`2`]) : y[`2`],
109	p[`3`] ? f (x1[`3`], x2[`3`]) : y[`3`] };
110	}
111
112	static inline float64x2_t
113	v_f64 (double x)
114	{
115	return (float64x2_t) V2 (x);
116	}
117	static inline uint64x2_t
118	v_u64 (uint64_t x)
119	{
120	return (uint64x2_t) V2 (x);
121	}
122	static inline int64x2_t
123	v_s64 (int64_t x)
124	{
125	return (int64x2_t) V2 (x);
126	}
127
128	/ true if any elements of a vector compare result is non-zero. /
129	static inline int
130	v_any_u64 (uint64x2_t x)
131	{
132	/ assume elements in x are either 0 or -1u. /
133	return vpaddd_u64 (x) != `0`;
134	}
135	/ true if all elements of a vector compare result is 1. /
136	static inline int
137	v_all_u64 (uint64x2_t x)
138	{
139	/ assume elements in x are either 0 or -1u. /
140	return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -`2`;
141	}
142	static inline float64x2_t
143	v_lookup_f64 (const double *tab, uint64x2_t idx)
144	{
145	return (float64x2_t){ tab[idx[`0`]], tab[idx[`1`]] };
146	}
147	static inline uint64x2_t
148	v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
149	{
150	return (uint64x2_t){ tab[idx[`0`]], tab[idx[`1`]] };
151	}
152	static inline float64x2_t
153	v_call_f64 (double (f) (double*), float64x2_t x, float64x2_t y, uint64x2_t p)
154	{
155	double p1 = p[`1`];
156	double x1 = x[`1`];
157	if (__glibc_likely (p[`0`]))
158	y[`0`] = f (x[`0`]);
159	if (__glibc_likely (p1))
160	y[`1`] = f (x1);
161	return y;
162	}
163	static inline float64x2_t
164	v_call2_f64 (double (f) (double, double*), float64x2_t x1, float64x2_t x2,
165	float64x2_t y, uint64x2_t p)
166	{
167	return (float64x2_t){ p[`0`] ? f (x1[`0`], x2[`0`]) : y[`0`],
168	p[`1`] ? f (x1[`1`], x2[`1`]) : y[`1`] };
169	}
170
171	#endif
172

source code of glibc/sysdeps/aarch64/fpu/v_math.h