1/*
2 * Vector math abstractions.
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 */
8
9#ifndef _V_MATH_H
10#define _V_MATH_H
11
12#ifndef WANT_VMATH
13/* Enable the build of vector math code. */
14# define WANT_VMATH 1
15#endif
16#if WANT_VMATH
17
18/* The goal of this header is to allow vector and scalar
19 build of the same algorithm, the provided intrinsic
20 wrappers are also vector length agnostic so they can
21 be implemented for SVE too (or other simd architectures)
22 and then the code should work on those targets too. */
23
24#if SCALAR
25#define V_NAME(x) __s_##x
26#elif VPCS && __aarch64__
27#define V_NAME(x) __vn_##x
28#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
29#else
30#define V_NAME(x) __v_##x
31#endif
32
33#ifndef VPCS_ATTR
34#define VPCS_ATTR
35#endif
36#ifndef VPCS_ALIAS
37#define VPCS_ALIAS
38#endif
39
40#include <stdint.h>
41#include "math_config.h"
42
43typedef float f32_t;
44typedef uint32_t u32_t;
45typedef int32_t s32_t;
46typedef double f64_t;
47typedef uint64_t u64_t;
48typedef int64_t s64_t;
49
50/* reinterpret as type1 from type2. */
51static inline u32_t
52as_u32_f32 (f32_t x)
53{
54 union { f32_t f; u32_t u; } r = {.f: x};
55 return r.u;
56}
57static inline f32_t
58as_f32_u32 (u32_t x)
59{
60 union { u32_t u; f32_t f; } r = {.u: x};
61 return r.f;
62}
63static inline s32_t
64as_s32_u32 (u32_t x)
65{
66 union { u32_t u; s32_t i; } r = {.u: x};
67 return r.i;
68}
69static inline u32_t
70as_u32_s32 (s32_t x)
71{
72 union { s32_t i; u32_t u; } r = {.i: x};
73 return r.u;
74}
75static inline u64_t
76as_u64_f64 (f64_t x)
77{
78 union { f64_t f; u64_t u; } r = {.f: x};
79 return r.u;
80}
81static inline f64_t
82as_f64_u64 (u64_t x)
83{
84 union { u64_t u; f64_t f; } r = {.u: x};
85 return r.f;
86}
87static inline s64_t
88as_s64_u64 (u64_t x)
89{
90 union { u64_t u; s64_t i; } r = {.u: x};
91 return r.i;
92}
93static inline u64_t
94as_u64_s64 (s64_t x)
95{
96 union { s64_t i; u64_t u; } r = {.i: x};
97 return r.u;
98}
99
100#if SCALAR
101#define V_SUPPORTED 1
102typedef f32_t v_f32_t;
103typedef u32_t v_u32_t;
104typedef s32_t v_s32_t;
105typedef f64_t v_f64_t;
106typedef u64_t v_u64_t;
107typedef s64_t v_s64_t;
108
109static inline int
110v_lanes32 (void)
111{
112 return 1;
113}
114
115static inline v_f32_t
116v_f32 (f32_t x)
117{
118 return x;
119}
120static inline v_u32_t
121v_u32 (u32_t x)
122{
123 return x;
124}
125static inline v_s32_t
126v_s32 (s32_t x)
127{
128 return x;
129}
130
131static inline f32_t
132v_get_f32 (v_f32_t x, int i)
133{
134 return x;
135}
136static inline u32_t
137v_get_u32 (v_u32_t x, int i)
138{
139 return x;
140}
141static inline s32_t
142v_get_s32 (v_s32_t x, int i)
143{
144 return x;
145}
146
147static inline void
148v_set_f32 (v_f32_t *x, int i, f32_t v)
149{
150 *x = v;
151}
152static inline void
153v_set_u32 (v_u32_t *x, int i, u32_t v)
154{
155 *x = v;
156}
157static inline void
158v_set_s32 (v_s32_t *x, int i, s32_t v)
159{
160 *x = v;
161}
162
163/* true if any elements of a v_cond result is non-zero. */
164static inline int
165v_any_u32 (v_u32_t x)
166{
167 return x != 0;
168}
169/* to wrap the result of relational operators. */
170static inline v_u32_t
171v_cond_u32 (v_u32_t x)
172{
173 return x ? -1 : 0;
174}
175static inline v_f32_t
176v_abs_f32 (v_f32_t x)
177{
178 return __builtin_fabsf (x);
179}
180static inline v_f32_t
181v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
182{
183 return __builtin_fmaf (x, y, z);
184}
185static inline v_f32_t
186v_round_f32 (v_f32_t x)
187{
188 return __builtin_roundf (x);
189}
190static inline v_s32_t
191v_round_s32 (v_f32_t x)
192{
193 return __builtin_lroundf (x); /* relies on -fno-math-errno. */
194}
195/* convert to type1 from type2. */
196static inline v_f32_t
197v_to_f32_s32 (v_s32_t x)
198{
199 return x;
200}
201static inline v_f32_t
202v_to_f32_u32 (v_u32_t x)
203{
204 return x;
205}
206/* reinterpret as type1 from type2. */
207static inline v_u32_t
208v_as_u32_f32 (v_f32_t x)
209{
210 union { v_f32_t f; v_u32_t u; } r = {.f: x};
211 return r.u;
212}
213static inline v_f32_t
214v_as_f32_u32 (v_u32_t x)
215{
216 union { v_u32_t u; v_f32_t f; } r = {.u: x};
217 return r.f;
218}
219static inline v_s32_t
220v_as_s32_u32 (v_u32_t x)
221{
222 union { v_u32_t u; v_s32_t i; } r = {.u: x};
223 return r.i;
224}
225static inline v_u32_t
226v_as_u32_s32 (v_s32_t x)
227{
228 union { v_s32_t i; v_u32_t u; } r = {.i: x};
229 return r.u;
230}
231static inline v_f32_t
232v_lookup_f32 (const f32_t *tab, v_u32_t idx)
233{
234 return tab[idx];
235}
236static inline v_u32_t
237v_lookup_u32 (const u32_t *tab, v_u32_t idx)
238{
239 return tab[idx];
240}
241static inline v_f32_t
242v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
243{
244 return f (x);
245}
246static inline v_f32_t
247v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
248 v_u32_t p)
249{
250 return f (x1, x2);
251}
252
253static inline int
254v_lanes64 (void)
255{
256 return 1;
257}
258static inline v_f64_t
259v_f64 (f64_t x)
260{
261 return x;
262}
263static inline v_u64_t
264v_u64 (u64_t x)
265{
266 return x;
267}
268static inline v_s64_t
269v_s64 (s64_t x)
270{
271 return x;
272}
273static inline f64_t
274v_get_f64 (v_f64_t x, int i)
275{
276 return x;
277}
278static inline void
279v_set_f64 (v_f64_t *x, int i, f64_t v)
280{
281 *x = v;
282}
283/* true if any elements of a v_cond result is non-zero. */
284static inline int
285v_any_u64 (v_u64_t x)
286{
287 return x != 0;
288}
289/* to wrap the result of relational operators. */
290static inline v_u64_t
291v_cond_u64 (v_u64_t x)
292{
293 return x ? -1 : 0;
294}
295static inline v_f64_t
296v_abs_f64 (v_f64_t x)
297{
298 return __builtin_fabs (x);
299}
300static inline v_f64_t
301v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
302{
303 return __builtin_fma (x, y, z);
304}
305static inline v_f64_t
306v_round_f64 (v_f64_t x)
307{
308 return __builtin_round (x);
309}
310static inline v_s64_t
311v_round_s64 (v_f64_t x)
312{
313 return __builtin_lround (x); /* relies on -fno-math-errno. */
314}
315/* convert to type1 from type2. */
316static inline v_f64_t
317v_to_f64_s64 (v_s64_t x)
318{
319 return x;
320}
321static inline v_f64_t
322v_to_f64_u64 (v_u64_t x)
323{
324 return x;
325}
326/* reinterpret as type1 from type2. */
327static inline v_u64_t
328v_as_u64_f64 (v_f64_t x)
329{
330 union { v_f64_t f; v_u64_t u; } r = {.f: x};
331 return r.u;
332}
333static inline v_f64_t
334v_as_f64_u64 (v_u64_t x)
335{
336 union { v_u64_t u; v_f64_t f; } r = {.u: x};
337 return r.f;
338}
339static inline v_s64_t
340v_as_s64_u64 (v_u64_t x)
341{
342 union { v_u64_t u; v_s64_t i; } r = {.u: x};
343 return r.i;
344}
345static inline v_u64_t
346v_as_u64_s64 (v_s64_t x)
347{
348 union { v_s64_t i; v_u64_t u; } r = {.i: x};
349 return r.u;
350}
351static inline v_f64_t
352v_lookup_f64 (const f64_t *tab, v_u64_t idx)
353{
354 return tab[idx];
355}
356static inline v_u64_t
357v_lookup_u64 (const u64_t *tab, v_u64_t idx)
358{
359 return tab[idx];
360}
361static inline v_f64_t
362v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
363{
364 return f (x);
365}
366
367#elif __aarch64__
368#define V_SUPPORTED 1
369#include <arm_neon.h>
370typedef float32x4_t v_f32_t;
371typedef uint32x4_t v_u32_t;
372typedef int32x4_t v_s32_t;
373typedef float64x2_t v_f64_t;
374typedef uint64x2_t v_u64_t;
375typedef int64x2_t v_s64_t;
376
377static inline int
378v_lanes32 (void)
379{
380 return 4;
381}
382
383static inline v_f32_t
384v_f32 (f32_t x)
385{
386 return (v_f32_t){x, x, x, x};
387}
388static inline v_u32_t
389v_u32 (u32_t x)
390{
391 return (v_u32_t){x, x, x, x};
392}
393static inline v_s32_t
394v_s32 (s32_t x)
395{
396 return (v_s32_t){x, x, x, x};
397}
398
399static inline f32_t
400v_get_f32 (v_f32_t x, int i)
401{
402 return x[i];
403}
404static inline u32_t
405v_get_u32 (v_u32_t x, int i)
406{
407 return x[i];
408}
409static inline s32_t
410v_get_s32 (v_s32_t x, int i)
411{
412 return x[i];
413}
414
415static inline void
416v_set_f32 (v_f32_t *x, int i, f32_t v)
417{
418 (*x)[i] = v;
419}
420static inline void
421v_set_u32 (v_u32_t *x, int i, u32_t v)
422{
423 (*x)[i] = v;
424}
425static inline void
426v_set_s32 (v_s32_t *x, int i, s32_t v)
427{
428 (*x)[i] = v;
429}
430
431/* true if any elements of a v_cond result is non-zero. */
432static inline int
433v_any_u32 (v_u32_t x)
434{
435 /* assume elements in x are either 0 or -1u. */
436 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
437}
438/* to wrap the result of relational operators. */
439static inline v_u32_t
440v_cond_u32 (v_u32_t x)
441{
442 return x;
443}
444static inline v_f32_t
445v_abs_f32 (v_f32_t x)
446{
447 return vabsq_f32 (x);
448}
449static inline v_f32_t
450v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
451{
452 return vfmaq_f32 (z, x, y);
453}
454static inline v_f32_t
455v_round_f32 (v_f32_t x)
456{
457 return vrndaq_f32 (x);
458}
459static inline v_s32_t
460v_round_s32 (v_f32_t x)
461{
462 return vcvtaq_s32_f32 (x);
463}
464/* convert to type1 from type2. */
465static inline v_f32_t
466v_to_f32_s32 (v_s32_t x)
467{
468 return (v_f32_t){x[0], x[1], x[2], x[3]};
469}
470static inline v_f32_t
471v_to_f32_u32 (v_u32_t x)
472{
473 return (v_f32_t){x[0], x[1], x[2], x[3]};
474}
475/* reinterpret as type1 from type2. */
476static inline v_u32_t
477v_as_u32_f32 (v_f32_t x)
478{
479 union { v_f32_t f; v_u32_t u; } r = {x};
480 return r.u;
481}
482static inline v_f32_t
483v_as_f32_u32 (v_u32_t x)
484{
485 union { v_u32_t u; v_f32_t f; } r = {x};
486 return r.f;
487}
488static inline v_s32_t
489v_as_s32_u32 (v_u32_t x)
490{
491 union { v_u32_t u; v_s32_t i; } r = {x};
492 return r.i;
493}
494static inline v_u32_t
495v_as_u32_s32 (v_s32_t x)
496{
497 union { v_s32_t i; v_u32_t u; } r = {x};
498 return r.u;
499}
500static inline v_f32_t
501v_lookup_f32 (const f32_t *tab, v_u32_t idx)
502{
503 return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
504}
505static inline v_u32_t
506v_lookup_u32 (const u32_t *tab, v_u32_t idx)
507{
508 return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
509}
510static inline v_f32_t
511v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
512{
513 return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
514 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
515}
516static inline v_f32_t
517v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
518 v_u32_t p)
519{
520 return (
521 v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1],
522 p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
523}
524
525static inline int
526v_lanes64 (void)
527{
528 return 2;
529}
530static inline v_f64_t
531v_f64 (f64_t x)
532{
533 return (v_f64_t){x, x};
534}
535static inline v_u64_t
536v_u64 (u64_t x)
537{
538 return (v_u64_t){x, x};
539}
540static inline v_s64_t
541v_s64 (s64_t x)
542{
543 return (v_s64_t){x, x};
544}
545static inline f64_t
546v_get_f64 (v_f64_t x, int i)
547{
548 return x[i];
549}
550static inline void
551v_set_f64 (v_f64_t *x, int i, f64_t v)
552{
553 (*x)[i] = v;
554}
555/* true if any elements of a v_cond result is non-zero. */
556static inline int
557v_any_u64 (v_u64_t x)
558{
559 /* assume elements in x are either 0 or -1u. */
560 return vpaddd_u64 (x) != 0;
561}
562/* to wrap the result of relational operators. */
563static inline v_u64_t
564v_cond_u64 (v_u64_t x)
565{
566 return x;
567}
568static inline v_f64_t
569v_abs_f64 (v_f64_t x)
570{
571 return vabsq_f64 (x);
572}
573static inline v_f64_t
574v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
575{
576 return vfmaq_f64 (z, x, y);
577}
578static inline v_f64_t
579v_round_f64 (v_f64_t x)
580{
581 return vrndaq_f64 (x);
582}
583static inline v_s64_t
584v_round_s64 (v_f64_t x)
585{
586 return vcvtaq_s64_f64 (x);
587}
588/* convert to type1 from type2. */
589static inline v_f64_t
590v_to_f64_s64 (v_s64_t x)
591{
592 return (v_f64_t){x[0], x[1]};
593}
594static inline v_f64_t
595v_to_f64_u64 (v_u64_t x)
596{
597 return (v_f64_t){x[0], x[1]};
598}
599/* reinterpret as type1 from type2. */
600static inline v_u64_t
601v_as_u64_f64 (v_f64_t x)
602{
603 union { v_f64_t f; v_u64_t u; } r = {x};
604 return r.u;
605}
606static inline v_f64_t
607v_as_f64_u64 (v_u64_t x)
608{
609 union { v_u64_t u; v_f64_t f; } r = {x};
610 return r.f;
611}
612static inline v_s64_t
613v_as_s64_u64 (v_u64_t x)
614{
615 union { v_u64_t u; v_s64_t i; } r = {x};
616 return r.i;
617}
618static inline v_u64_t
619v_as_u64_s64 (v_s64_t x)
620{
621 union { v_s64_t i; v_u64_t u; } r = {x};
622 return r.u;
623}
624static inline v_f64_t
625v_lookup_f64 (const f64_t *tab, v_u64_t idx)
626{
627 return (v_f64_t){tab[idx[0]], tab[idx[1]]};
628}
629static inline v_u64_t
630v_lookup_u64 (const u64_t *tab, v_u64_t idx)
631{
632 return (v_u64_t){tab[idx[0]], tab[idx[1]]};
633}
634static inline v_f64_t
635v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
636{
637 return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]};
638}
639#endif
640
641#endif
642#endif
643

source code of libc/AOR_v20.02/math/v_math.h