1/* Utilities for Advanced SIMD libmvec routines.
2 Copyright (C) 2023-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#ifndef _V_MATH_H
20#define _V_MATH_H
21
22#include <arm_neon.h>
23#include "vecmath_config.h"
24
25#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
26
27#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
28#define V_NAME_D1(fun) _ZGVnN2v_##fun
29#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
30#define V_NAME_D2(fun) _ZGVnN2vv_##fun
31
32#include "advsimd_f32_protos.h"
33
34#define HALF_WIDTH_ALIAS_F1(fun) \
35 float32x2_t VPCS_ATTR _ZGVnN2v_##fun##f (float32x2_t x) \
36 { \
37 return vget_low_f32 (_ZGVnN4v_##fun##f (vcombine_f32 (x, x))); \
38 }
39
40#define HALF_WIDTH_ALIAS_F2(fun) \
41 float32x2_t VPCS_ATTR _ZGVnN2vv_##fun##f (float32x2_t x, float32x2_t y) \
42 { \
43 return vget_low_f32 ( \
44 _ZGVnN4vv_##fun##f (vcombine_f32 (x, x), vcombine_f32 (y, y))); \
45 }
46
47/* Shorthand helpers for declaring constants. */
48#define V2(X) { X, X }
49#define V4(X) { X, X, X, X }
50#define V8(X) { X, X, X, X, X, X, X, X }
51
52static inline int
53v_any_u16h (uint16x4_t x)
54{
55 return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
56}
57
58static inline float32x4_t
59v_f32 (float x)
60{
61 return (float32x4_t) V4 (x);
62}
63static inline uint32x4_t
64v_u32 (uint32_t x)
65{
66 return (uint32x4_t) V4 (x);
67}
68static inline int32x4_t
69v_s32 (int32_t x)
70{
71 return (int32x4_t) V4 (x);
72}
73
74/* true if any elements of a vector compare result is non-zero. */
75static inline int
76v_any_u32 (uint32x4_t x)
77{
78 /* assume elements in x are either 0 or -1u. */
79 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
80}
81static inline int
82v_any_u32h (uint32x2_t x)
83{
84 return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
85}
86static inline float32x4_t
87v_lookup_f32 (const float *tab, uint32x4_t idx)
88{
89 return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
90}
91static inline uint32x4_t
92v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
93{
94 return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
95}
96static inline float32x4_t
97v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
98{
99 return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
100 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
101}
102static inline float32x4_t
103v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
104 float32x4_t y, uint32x4_t p)
105{
106 return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
107 p[1] ? f (x1[1], x2[1]) : y[1],
108 p[2] ? f (x1[2], x2[2]) : y[2],
109 p[3] ? f (x1[3], x2[3]) : y[3] };
110}
111
112static inline float64x2_t
113v_f64 (double x)
114{
115 return (float64x2_t) V2 (x);
116}
117static inline uint64x2_t
118v_u64 (uint64_t x)
119{
120 return (uint64x2_t) V2 (x);
121}
122static inline int64x2_t
123v_s64 (int64_t x)
124{
125 return (int64x2_t) V2 (x);
126}
127
128/* true if any elements of a vector compare result is non-zero. */
129static inline int
130v_any_u64 (uint64x2_t x)
131{
132 /* assume elements in x are either 0 or -1u. */
133 return vpaddd_u64 (x) != 0;
134}
135/* true if all elements of a vector compare result is 1. */
136static inline int
137v_all_u64 (uint64x2_t x)
138{
139 /* assume elements in x are either 0 or -1u. */
140 return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
141}
142static inline float64x2_t
143v_lookup_f64 (const double *tab, uint64x2_t idx)
144{
145 return (float64x2_t){ tab[idx[0]], tab[idx[1]] };
146}
147static inline uint64x2_t
148v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
149{
150 return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
151}
152static inline float64x2_t
153v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
154{
155 double p1 = p[1];
156 double x1 = x[1];
157 if (__glibc_likely (p[0]))
158 y[0] = f (x[0]);
159 if (__glibc_likely (p1))
160 y[1] = f (x1);
161 return y;
162}
163static inline float64x2_t
164v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
165 float64x2_t y, uint64x2_t p)
166{
167 return (float64x2_t){ p[0] ? f (x1[0], x2[0]) : y[0],
168 p[1] ? f (x1[1], x2[1]) : y[1] };
169}
170
171#endif
172

source code of glibc/sysdeps/aarch64/fpu/v_math.h