v_math.h source code [libc/AOR_v20.02/math/v_math.h]

1	/*
2	* Vector math abstractions.
3	*
4	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5	* See https://llvm.org/LICENSE.txt for license information.
6	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7	*/
8
9	#ifndef _V_MATH_H
10	#define _V_MATH_H
11
12	#ifndef WANT_VMATH
13	/ Enable the build of vector math code. /
14	# define WANT_VMATH 1
15	#endif
16	#if WANT_VMATH
17
18	/ The goal of this header is to allow vector and scalar*
19	build of the same algorithm, the provided intrinsic
20	wrappers are also vector length agnostic so they can
21	be implemented for SVE too (or other simd architectures)
22	and then the code should work on those targets too. /*
23
24	#if SCALAR
25	#define V_NAME(x) __s_##x
26	#elif VPCS && __aarch64__
27	#define V_NAME(x) __vn_##x
28	#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
29	#else
30	#define V_NAME(x) __v_##x
31	#endif
32
33	#ifndef VPCS_ATTR
34	#define VPCS_ATTR
35	#endif
36	#ifndef VPCS_ALIAS
37	#define VPCS_ALIAS
38	#endif
39
40	#include <stdint.h>
41	#include "math_config.h"
42
43	typedef float f32_t;
44	typedef uint32_t u32_t;
45	typedef int32_t s32_t;
46	typedef double f64_t;
47	typedef uint64_t u64_t;
48	typedef int64_t s64_t;
49
50	/ reinterpret as type1 from type2. /
51	static inline u32_t
52	as_u32_f32 (f32_t x)
53	{
54	union { f32_t f; u32_t u; } r = {.f: x};
55	return r.u;
56	}
57	static inline f32_t
58	as_f32_u32 (u32_t x)
59	{
60	union { u32_t u; f32_t f; } r = {.u: x};
61	return r.f;
62	}
63	static inline s32_t
64	as_s32_u32 (u32_t x)
65	{
66	union { u32_t u; s32_t i; } r = {.u: x};
67	return r.i;
68	}
69	static inline u32_t
70	as_u32_s32 (s32_t x)
71	{
72	union { s32_t i; u32_t u; } r = {.i: x};
73	return r.u;
74	}
75	static inline u64_t
76	as_u64_f64 (f64_t x)
77	{
78	union { f64_t f; u64_t u; } r = {.f: x};
79	return r.u;
80	}
81	static inline f64_t
82	as_f64_u64 (u64_t x)
83	{
84	union { u64_t u; f64_t f; } r = {.u: x};
85	return r.f;
86	}
87	static inline s64_t
88	as_s64_u64 (u64_t x)
89	{
90	union { u64_t u; s64_t i; } r = {.u: x};
91	return r.i;
92	}
93	static inline u64_t
94	as_u64_s64 (s64_t x)
95	{
96	union { s64_t i; u64_t u; } r = {.i: x};
97	return r.u;
98	}
99
100	#if SCALAR
101	#define V_SUPPORTED 1
102	typedef f32_t v_f32_t;
103	typedef u32_t v_u32_t;
104	typedef s32_t v_s32_t;
105	typedef f64_t v_f64_t;
106	typedef u64_t v_u64_t;
107	typedef s64_t v_s64_t;
108
109	static inline int
110	v_lanes32 (void)
111	{
112	return `1`;
113	}
114
115	static inline v_f32_t
116	v_f32 (f32_t x)
117	{
118	return x;
119	}
120	static inline v_u32_t
121	v_u32 (u32_t x)
122	{
123	return x;
124	}
125	static inline v_s32_t
126	v_s32 (s32_t x)
127	{
128	return x;
129	}
130
131	static inline f32_t
132	v_get_f32 (v_f32_t x, int i)
133	{
134	return x;
135	}
136	static inline u32_t
137	v_get_u32 (v_u32_t x, int i)
138	{
139	return x;
140	}
141	static inline s32_t
142	v_get_s32 (v_s32_t x, int i)
143	{
144	return x;
145	}
146
147	static inline void
148	v_set_f32 (v_f32_t x, int* i, f32_t v)
149	{
150	*x = v;
151	}
152	static inline void
153	v_set_u32 (v_u32_t x, int* i, u32_t v)
154	{
155	*x = v;
156	}
157	static inline void
158	v_set_s32 (v_s32_t x, int* i, s32_t v)
159	{
160	*x = v;
161	}
162
163	/ true if any elements of a v_cond result is non-zero. /
164	static inline int
165	v_any_u32 (v_u32_t x)
166	{
167	return x != `0`;
168	}
169	/ to wrap the result of relational operators. /
170	static inline v_u32_t
171	v_cond_u32 (v_u32_t x)
172	{
173	return x ? -`1` : `0`;
174	}
175	static inline v_f32_t
176	v_abs_f32 (v_f32_t x)
177	{
178	return __builtin_fabsf (x);
179	}
180	static inline v_f32_t
181	v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
182	{
183	return __builtin_fmaf (x, y, z);
184	}
185	static inline v_f32_t
186	v_round_f32 (v_f32_t x)
187	{
188	return __builtin_roundf (x);
189	}
190	static inline v_s32_t
191	v_round_s32 (v_f32_t x)
192	{
193	return __builtin_lroundf (x); / relies on -fno-math-errno. /
194	}
195	/ convert to type1 from type2. /
196	static inline v_f32_t
197	v_to_f32_s32 (v_s32_t x)
198	{
199	return x;
200	}
201	static inline v_f32_t
202	v_to_f32_u32 (v_u32_t x)
203	{
204	return x;
205	}
206	/ reinterpret as type1 from type2. /
207	static inline v_u32_t
208	v_as_u32_f32 (v_f32_t x)
209	{
210	union { v_f32_t f; v_u32_t u; } r = {.f: x};
211	return r.u;
212	}
213	static inline v_f32_t
214	v_as_f32_u32 (v_u32_t x)
215	{
216	union { v_u32_t u; v_f32_t f; } r = {.u: x};
217	return r.f;
218	}
219	static inline v_s32_t
220	v_as_s32_u32 (v_u32_t x)
221	{
222	union { v_u32_t u; v_s32_t i; } r = {.u: x};
223	return r.i;
224	}
225	static inline v_u32_t
226	v_as_u32_s32 (v_s32_t x)
227	{
228	union { v_s32_t i; v_u32_t u; } r = {.i: x};
229	return r.u;
230	}
231	static inline v_f32_t
232	v_lookup_f32 (const f32_t *tab, v_u32_t idx)
233	{
234	return tab[idx];
235	}
236	static inline v_u32_t
237	v_lookup_u32 (const u32_t *tab, v_u32_t idx)
238	{
239	return tab[idx];
240	}
241	static inline v_f32_t
242	v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
243	{
244	return f (x);
245	}
246	static inline v_f32_t
247	v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
248	v_u32_t p)
249	{
250	return f (x1, x2);
251	}
252
253	static inline int
254	v_lanes64 (void)
255	{
256	return `1`;
257	}
258	static inline v_f64_t
259	v_f64 (f64_t x)
260	{
261	return x;
262	}
263	static inline v_u64_t
264	v_u64 (u64_t x)
265	{
266	return x;
267	}
268	static inline v_s64_t
269	v_s64 (s64_t x)
270	{
271	return x;
272	}
273	static inline f64_t
274	v_get_f64 (v_f64_t x, int i)
275	{
276	return x;
277	}
278	static inline void
279	v_set_f64 (v_f64_t x, int* i, f64_t v)
280	{
281	*x = v;
282	}
283	/ true if any elements of a v_cond result is non-zero. /
284	static inline int
285	v_any_u64 (v_u64_t x)
286	{
287	return x != `0`;
288	}
289	/ to wrap the result of relational operators. /
290	static inline v_u64_t
291	v_cond_u64 (v_u64_t x)
292	{
293	return x ? -`1` : `0`;
294	}
295	static inline v_f64_t
296	v_abs_f64 (v_f64_t x)
297	{
298	return __builtin_fabs (x);
299	}
300	static inline v_f64_t
301	v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
302	{
303	return __builtin_fma (x, y, z);
304	}
305	static inline v_f64_t
306	v_round_f64 (v_f64_t x)
307	{
308	return __builtin_round (x);
309	}
310	static inline v_s64_t
311	v_round_s64 (v_f64_t x)
312	{
313	return __builtin_lround (x); / relies on -fno-math-errno. /
314	}
315	/ convert to type1 from type2. /
316	static inline v_f64_t
317	v_to_f64_s64 (v_s64_t x)
318	{
319	return x;
320	}
321	static inline v_f64_t
322	v_to_f64_u64 (v_u64_t x)
323	{
324	return x;
325	}
326	/ reinterpret as type1 from type2. /
327	static inline v_u64_t
328	v_as_u64_f64 (v_f64_t x)
329	{
330	union { v_f64_t f; v_u64_t u; } r = {.f: x};
331	return r.u;
332	}
333	static inline v_f64_t
334	v_as_f64_u64 (v_u64_t x)
335	{
336	union { v_u64_t u; v_f64_t f; } r = {.u: x};
337	return r.f;
338	}
339	static inline v_s64_t
340	v_as_s64_u64 (v_u64_t x)
341	{
342	union { v_u64_t u; v_s64_t i; } r = {.u: x};
343	return r.i;
344	}
345	static inline v_u64_t
346	v_as_u64_s64 (v_s64_t x)
347	{
348	union { v_s64_t i; v_u64_t u; } r = {.i: x};
349	return r.u;
350	}
351	static inline v_f64_t
352	v_lookup_f64 (const f64_t *tab, v_u64_t idx)
353	{
354	return tab[idx];
355	}
356	static inline v_u64_t
357	v_lookup_u64 (const u64_t *tab, v_u64_t idx)
358	{
359	return tab[idx];
360	}
361	static inline v_f64_t
362	v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
363	{
364	return f (x);
365	}
366
367	#elif __aarch64__
368	#define V_SUPPORTED 1
369	#include <arm_neon.h>
370	typedef float32x4_t v_f32_t;
371	typedef uint32x4_t v_u32_t;
372	typedef int32x4_t v_s32_t;
373	typedef float64x2_t v_f64_t;
374	typedef uint64x2_t v_u64_t;
375	typedef int64x2_t v_s64_t;
376
377	static inline int
378	v_lanes32 (void)
379	{
380	return `4`;
381	}
382
383	static inline v_f32_t
384	v_f32 (f32_t x)
385	{
386	return (v_f32_t){x, x, x, x};
387	}
388	static inline v_u32_t
389	v_u32 (u32_t x)
390	{
391	return (v_u32_t){x, x, x, x};
392	}
393	static inline v_s32_t
394	v_s32 (s32_t x)
395	{
396	return (v_s32_t){x, x, x, x};
397	}
398
399	static inline f32_t
400	v_get_f32 (v_f32_t x, int i)
401	{
402	return x[i];
403	}
404	static inline u32_t
405	v_get_u32 (v_u32_t x, int i)
406	{
407	return x[i];
408	}
409	static inline s32_t
410	v_get_s32 (v_s32_t x, int i)
411	{
412	return x[i];
413	}
414
415	static inline void
416	v_set_f32 (v_f32_t x, int* i, f32_t v)
417	{
418	(*x)[i] = v;
419	}
420	static inline void
421	v_set_u32 (v_u32_t x, int* i, u32_t v)
422	{
423	(*x)[i] = v;
424	}
425	static inline void
426	v_set_s32 (v_s32_t x, int* i, s32_t v)
427	{
428	(*x)[i] = v;
429	}
430
431	/ true if any elements of a v_cond result is non-zero. /
432	static inline int
433	v_any_u32 (v_u32_t x)
434	{
435	/ assume elements in x are either 0 or -1u. /
436	return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != `0`;
437	}
438	/ to wrap the result of relational operators. /
439	static inline v_u32_t
440	v_cond_u32 (v_u32_t x)
441	{
442	return x;
443	}
444	static inline v_f32_t
445	v_abs_f32 (v_f32_t x)
446	{
447	return vabsq_f32 (x);
448	}
449	static inline v_f32_t
450	v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
451	{
452	return vfmaq_f32 (z, x, y);
453	}
454	static inline v_f32_t
455	v_round_f32 (v_f32_t x)
456	{
457	return vrndaq_f32 (x);
458	}
459	static inline v_s32_t
460	v_round_s32 (v_f32_t x)
461	{
462	return vcvtaq_s32_f32 (x);
463	}
464	/ convert to type1 from type2. /
465	static inline v_f32_t
466	v_to_f32_s32 (v_s32_t x)
467	{
468	return (v_f32_t){x[`0`], x[`1`], x[`2`], x[`3`]};
469	}
470	static inline v_f32_t
471	v_to_f32_u32 (v_u32_t x)
472	{
473	return (v_f32_t){x[`0`], x[`1`], x[`2`], x[`3`]};
474	}
475	/ reinterpret as type1 from type2. /
476	static inline v_u32_t
477	v_as_u32_f32 (v_f32_t x)
478	{
479	union { v_f32_t f; v_u32_t u; } r = {x};
480	return r.u;
481	}
482	static inline v_f32_t
483	v_as_f32_u32 (v_u32_t x)
484	{
485	union { v_u32_t u; v_f32_t f; } r = {x};
486	return r.f;
487	}
488	static inline v_s32_t
489	v_as_s32_u32 (v_u32_t x)
490	{
491	union { v_u32_t u; v_s32_t i; } r = {x};
492	return r.i;
493	}
494	static inline v_u32_t
495	v_as_u32_s32 (v_s32_t x)
496	{
497	union { v_s32_t i; v_u32_t u; } r = {x};
498	return r.u;
499	}
500	static inline v_f32_t
501	v_lookup_f32 (const f32_t *tab, v_u32_t idx)
502	{
503	return (v_f32_t){tab[idx[`0`]], tab[idx[`1`]], tab[idx[`2`]], tab[idx[`3`]]};
504	}
505	static inline v_u32_t
506	v_lookup_u32 (const u32_t *tab, v_u32_t idx)
507	{
508	return (v_u32_t){tab[idx[`0`]], tab[idx[`1`]], tab[idx[`2`]], tab[idx[`3`]]};
509	}
510	static inline v_f32_t
511	v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
512	{
513	return (v_f32_t){p[`0`] ? f (x[`0`]) : y[`0`], p[`1`] ? f (x[`1`]) : y[`1`],
514	p[`2`] ? f (x[`2`]) : y[`2`], p[`3`] ? f (x[`3`]) : y[`3`]};
515	}
516	static inline v_f32_t
517	v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
518	v_u32_t p)
519	{
520	return (
521	v_f32_t){p[`0`] ? f (x1[`0`], x2[`0`]) : y[`0`], p[`1`] ? f (x1[`1`], x2[`1`]) : y[`1`],
522	p[`2`] ? f (x1[`2`], x2[`2`]) : y[`2`], p[`3`] ? f (x1[`3`], x2[`3`]) : y[`3`]};
523	}
524
525	static inline int
526	v_lanes64 (void)
527	{
528	return `2`;
529	}
530	static inline v_f64_t
531	v_f64 (f64_t x)
532	{
533	return (v_f64_t){x, x};
534	}
535	static inline v_u64_t
536	v_u64 (u64_t x)
537	{
538	return (v_u64_t){x, x};
539	}
540	static inline v_s64_t
541	v_s64 (s64_t x)
542	{
543	return (v_s64_t){x, x};
544	}
545	static inline f64_t
546	v_get_f64 (v_f64_t x, int i)
547	{
548	return x[i];
549	}
550	static inline void
551	v_set_f64 (v_f64_t x, int* i, f64_t v)
552	{
553	(*x)[i] = v;
554	}
555	/ true if any elements of a v_cond result is non-zero. /
556	static inline int
557	v_any_u64 (v_u64_t x)
558	{
559	/ assume elements in x are either 0 or -1u. /
560	return vpaddd_u64 (x) != `0`;
561	}
562	/ to wrap the result of relational operators. /
563	static inline v_u64_t
564	v_cond_u64 (v_u64_t x)
565	{
566	return x;
567	}
568	static inline v_f64_t
569	v_abs_f64 (v_f64_t x)
570	{
571	return vabsq_f64 (x);
572	}
573	static inline v_f64_t
574	v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
575	{
576	return vfmaq_f64 (z, x, y);
577	}
578	static inline v_f64_t
579	v_round_f64 (v_f64_t x)
580	{
581	return vrndaq_f64 (x);
582	}
583	static inline v_s64_t
584	v_round_s64 (v_f64_t x)
585	{
586	return vcvtaq_s64_f64 (x);
587	}
588	/ convert to type1 from type2. /
589	static inline v_f64_t
590	v_to_f64_s64 (v_s64_t x)
591	{
592	return (v_f64_t){x[`0`], x[`1`]};
593	}
594	static inline v_f64_t
595	v_to_f64_u64 (v_u64_t x)
596	{
597	return (v_f64_t){x[`0`], x[`1`]};
598	}
599	/ reinterpret as type1 from type2. /
600	static inline v_u64_t
601	v_as_u64_f64 (v_f64_t x)
602	{
603	union { v_f64_t f; v_u64_t u; } r = {x};
604	return r.u;
605	}
606	static inline v_f64_t
607	v_as_f64_u64 (v_u64_t x)
608	{
609	union { v_u64_t u; v_f64_t f; } r = {x};
610	return r.f;
611	}
612	static inline v_s64_t
613	v_as_s64_u64 (v_u64_t x)
614	{
615	union { v_u64_t u; v_s64_t i; } r = {x};
616	return r.i;
617	}
618	static inline v_u64_t
619	v_as_u64_s64 (v_s64_t x)
620	{
621	union { v_s64_t i; v_u64_t u; } r = {x};
622	return r.u;
623	}
624	static inline v_f64_t
625	v_lookup_f64 (const f64_t *tab, v_u64_t idx)
626	{
627	return (v_f64_t){tab[idx[`0`]], tab[idx[`1`]]};
628	}
629	static inline v_u64_t
630	v_lookup_u64 (const u64_t *tab, v_u64_t idx)
631	{
632	return (v_u64_t){tab[idx[`0`]], tab[idx[`1`]]};
633	}
634	static inline v_f64_t
635	v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
636	{
637	return (v_f64_t){p[`0`] ? f (x[`0`]) : y[`0`], p[`1`] ? f (x[`1`]) : y[`1`]};
638	}
639	#endif
640
641	#endif
642	#endif
643

source code of libc/AOR_v20.02/math/v_math.h