1 | /* Utilities for SVE libmvec routines. |
2 | Copyright (C) 2023-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #ifndef SV_MATH_H |
20 | #define SV_MATH_H |
21 | |
22 | #include <arm_sve.h> |
23 | #include <stdbool.h> |
24 | |
25 | #include "vecmath_config.h" |
26 | |
27 | #define SV_NAME_F1(fun) _ZGVsMxv_##fun##f |
28 | #define SV_NAME_D1(fun) _ZGVsMxv_##fun |
29 | #define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f |
30 | #define SV_NAME_D2(fun) _ZGVsMxvv_##fun |
31 | |
32 | /* Double precision. */ |
33 | static inline svint64_t |
34 | sv_s64 (int64_t x) |
35 | { |
36 | return svdup_n_s64 (x); |
37 | } |
38 | |
39 | static inline svuint64_t |
40 | sv_u64 (uint64_t x) |
41 | { |
42 | return svdup_n_u64 (x); |
43 | } |
44 | |
45 | static inline svfloat64_t |
46 | sv_f64 (double x) |
47 | { |
48 | return svdup_n_f64 (x); |
49 | } |
50 | |
51 | static inline svfloat64_t |
52 | sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp) |
53 | { |
54 | svbool_t p = svpfirst (cmp, svpfalse ()); |
55 | while (svptest_any (cmp, p)) |
56 | { |
57 | double elem = svclastb_n_f64 (p, 0, x); |
58 | elem = (*f) (elem); |
59 | svfloat64_t y2 = svdup_n_f64 (elem); |
60 | y = svsel_f64 (p, y2, y); |
61 | p = svpnext_b64 (cmp, p); |
62 | } |
63 | return y; |
64 | } |
65 | |
66 | static inline svfloat64_t |
67 | sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2, |
68 | svfloat64_t y, svbool_t cmp) |
69 | { |
70 | svbool_t p = svpfirst (cmp, svpfalse ()); |
71 | while (svptest_any (cmp, p)) |
72 | { |
73 | double elem1 = svclastb_n_f64 (p, 0, x1); |
74 | double elem2 = svclastb_n_f64 (p, 0, x2); |
75 | double ret = (*f) (elem1, elem2); |
76 | svfloat64_t y2 = svdup_n_f64 (ret); |
77 | y = svsel_f64 (p, y2, y); |
78 | p = svpnext_b64 (cmp, p); |
79 | } |
80 | return y; |
81 | } |
82 | |
83 | static inline svuint64_t |
84 | sv_mod_n_u64_x (svbool_t pg, svuint64_t x, uint64_t y) |
85 | { |
86 | svuint64_t q = svdiv_n_u64_x (pg, x, y); |
87 | return svmls_n_u64_x (pg, x, q, y); |
88 | } |
89 | |
90 | /* Single precision. */ |
91 | static inline svint32_t |
92 | sv_s32 (int32_t x) |
93 | { |
94 | return svdup_n_s32 (x); |
95 | } |
96 | |
97 | static inline svuint32_t |
98 | sv_u32 (uint32_t x) |
99 | { |
100 | return svdup_n_u32 (x); |
101 | } |
102 | |
103 | static inline svfloat32_t |
104 | sv_f32 (float x) |
105 | { |
106 | return svdup_n_f32 (x); |
107 | } |
108 | |
109 | static inline svfloat32_t |
110 | sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp) |
111 | { |
112 | svbool_t p = svpfirst (cmp, svpfalse ()); |
113 | while (svptest_any (cmp, p)) |
114 | { |
115 | float elem = svclastb_n_f32 (p, 0, x); |
116 | elem = f (elem); |
117 | svfloat32_t y2 = svdup_n_f32 (elem); |
118 | y = svsel_f32 (p, y2, y); |
119 | p = svpnext_b32 (cmp, p); |
120 | } |
121 | return y; |
122 | } |
123 | |
124 | static inline svfloat32_t |
125 | sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2, |
126 | svfloat32_t y, svbool_t cmp) |
127 | { |
128 | svbool_t p = svpfirst (cmp, svpfalse ()); |
129 | while (svptest_any (cmp, p)) |
130 | { |
131 | float elem1 = svclastb_n_f32 (p, 0, x1); |
132 | float elem2 = svclastb_n_f32 (p, 0, x2); |
133 | float ret = f (elem1, elem2); |
134 | svfloat32_t y2 = svdup_n_f32 (ret); |
135 | y = svsel_f32 (p, y2, y); |
136 | p = svpnext_b32 (cmp, p); |
137 | } |
138 | return y; |
139 | } |
140 | |
141 | #endif |
142 | |