simd_detail.h source code [include/c++/11/experimental/bits/simd_detail.h]

1	// Internal macros for the simd implementation -- C++ --
2
3	// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4	//
5	// This file is part of the GNU ISO C++ Library. This library is free
6	// software; you can redistribute it and/or modify it under the
7	// terms of the GNU General Public License as published by the
8	// Free Software Foundation; either version 3, or (at your option)
9	// any later version.
10
11	// This library is distributed in the hope that it will be useful,
12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	// GNU General Public License for more details.
15
16	// Under Section 7 of GPL version 3, you are granted additional
17	// permissions described in the GCC Runtime Library Exception, version
18	// 3.1, as published by the Free Software Foundation.
19
20	// You should have received a copy of the GNU General Public License and
21	// a copy of the GCC Runtime Library Exception along with this program;
22	// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23	// <http://www.gnu.org/licenses/>.
24
25	#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
26	#define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
27
28	#if __cplusplus >= 201703L
29
30	#include <cstddef>
31	#include <cstdint>
32
33	/// @cond undocumented
34
35	#define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
36	namespace std _GLIBCXX_VISIBILITY(default) \
37	{ \
38	_GLIBCXX_BEGIN_NAMESPACE_VERSION \
39	namespace experimental { \
40	inline namespace parallelism_v2 {
41	#define _GLIBCXX_SIMD_END_NAMESPACE \
42	} \
43	} \
44	_GLIBCXX_END_NAMESPACE_VERSION \
45	}
46
47	// ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
48	// macros ARM{{{
49	#if defined __ARM_NEON
50	#define _GLIBCXX_SIMD_HAVE_NEON 1
51	#else
52	#define _GLIBCXX_SIMD_HAVE_NEON 0
53	#endif
54	#if defined __ARM_NEON && (__ARM_ARCH >= 8 \|\| defined __aarch64__)
55	#define _GLIBCXX_SIMD_HAVE_NEON_A32 1
56	#else
57	#define _GLIBCXX_SIMD_HAVE_NEON_A32 0
58	#endif
59	#if defined __ARM_NEON && defined __aarch64__
60	#define _GLIBCXX_SIMD_HAVE_NEON_A64 1
61	#else
62	#define _GLIBCXX_SIMD_HAVE_NEON_A64 0
63	#endif
64	//}}}
65	// x86{{{
66	#ifdef __MMX__
67	#define _GLIBCXX_SIMD_HAVE_MMX 1
68	#else
69	#define _GLIBCXX_SIMD_HAVE_MMX 0
70	#endif
71	#if defined __SSE__ \|\| defined __x86_64__
72	#define _GLIBCXX_SIMD_HAVE_SSE 1
73	#else
74	#define _GLIBCXX_SIMD_HAVE_SSE 0
75	#endif
76	#if defined __SSE2__ \|\| defined __x86_64__
77	#define _GLIBCXX_SIMD_HAVE_SSE2 1
78	#else
79	#define _GLIBCXX_SIMD_HAVE_SSE2 0
80	#endif
81	#ifdef __SSE3__
82	#define _GLIBCXX_SIMD_HAVE_SSE3 1
83	#else
84	#define _GLIBCXX_SIMD_HAVE_SSE3 0
85	#endif
86	#ifdef __SSSE3__
87	#define _GLIBCXX_SIMD_HAVE_SSSE3 1
88	#else
89	#define _GLIBCXX_SIMD_HAVE_SSSE3 0
90	#endif
91	#ifdef __SSE4_1__
92	#define _GLIBCXX_SIMD_HAVE_SSE4_1 1
93	#else
94	#define _GLIBCXX_SIMD_HAVE_SSE4_1 0
95	#endif
96	#ifdef __SSE4_2__
97	#define _GLIBCXX_SIMD_HAVE_SSE4_2 1
98	#else
99	#define _GLIBCXX_SIMD_HAVE_SSE4_2 0
100	#endif
101	#ifdef __XOP__
102	#define _GLIBCXX_SIMD_HAVE_XOP 1
103	#else
104	#define _GLIBCXX_SIMD_HAVE_XOP 0
105	#endif
106	#ifdef __AVX__
107	#define _GLIBCXX_SIMD_HAVE_AVX 1
108	#else
109	#define _GLIBCXX_SIMD_HAVE_AVX 0
110	#endif
111	#ifdef __AVX2__
112	#define _GLIBCXX_SIMD_HAVE_AVX2 1
113	#else
114	#define _GLIBCXX_SIMD_HAVE_AVX2 0
115	#endif
116	#ifdef __BMI__
117	#define _GLIBCXX_SIMD_HAVE_BMI1 1
118	#else
119	#define _GLIBCXX_SIMD_HAVE_BMI1 0
120	#endif
121	#ifdef __BMI2__
122	#define _GLIBCXX_SIMD_HAVE_BMI2 1
123	#else
124	#define _GLIBCXX_SIMD_HAVE_BMI2 0
125	#endif
126	#ifdef __LZCNT__
127	#define _GLIBCXX_SIMD_HAVE_LZCNT 1
128	#else
129	#define _GLIBCXX_SIMD_HAVE_LZCNT 0
130	#endif
131	#ifdef __SSE4A__
132	#define _GLIBCXX_SIMD_HAVE_SSE4A 1
133	#else
134	#define _GLIBCXX_SIMD_HAVE_SSE4A 0
135	#endif
136	#ifdef __FMA__
137	#define _GLIBCXX_SIMD_HAVE_FMA 1
138	#else
139	#define _GLIBCXX_SIMD_HAVE_FMA 0
140	#endif
141	#ifdef __FMA4__
142	#define _GLIBCXX_SIMD_HAVE_FMA4 1
143	#else
144	#define _GLIBCXX_SIMD_HAVE_FMA4 0
145	#endif
146	#ifdef __F16C__
147	#define _GLIBCXX_SIMD_HAVE_F16C 1
148	#else
149	#define _GLIBCXX_SIMD_HAVE_F16C 0
150	#endif
151	#ifdef __POPCNT__
152	#define _GLIBCXX_SIMD_HAVE_POPCNT 1
153	#else
154	#define _GLIBCXX_SIMD_HAVE_POPCNT 0
155	#endif
156	#ifdef __AVX512F__
157	#define _GLIBCXX_SIMD_HAVE_AVX512F 1
158	#else
159	#define _GLIBCXX_SIMD_HAVE_AVX512F 0
160	#endif
161	#ifdef __AVX512DQ__
162	#define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
163	#else
164	#define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
165	#endif
166	#ifdef __AVX512VL__
167	#define _GLIBCXX_SIMD_HAVE_AVX512VL 1
168	#else
169	#define _GLIBCXX_SIMD_HAVE_AVX512VL 0
170	#endif
171	#ifdef __AVX512BW__
172	#define _GLIBCXX_SIMD_HAVE_AVX512BW 1
173	#else
174	#define _GLIBCXX_SIMD_HAVE_AVX512BW 0
175	#endif
176
177	#if _GLIBCXX_SIMD_HAVE_SSE
178	#define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
179	#else
180	#define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
181	#endif
182	#if _GLIBCXX_SIMD_HAVE_SSE2
183	#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
184	#else
185	#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
186	#endif
187
188	#if _GLIBCXX_SIMD_HAVE_AVX
189	#define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
190	#else
191	#define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
192	#endif
193	#if _GLIBCXX_SIMD_HAVE_AVX2
194	#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
195	#else
196	#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
197	#endif
198
199	#if _GLIBCXX_SIMD_HAVE_AVX512F
200	#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
201	#else
202	#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
203	#endif
204	#if _GLIBCXX_SIMD_HAVE_AVX512BW
205	#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
206	#else
207	#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
208	#endif
209
210	#if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
211	#error "Use of SSE2 is required on AMD64"
212	#endif
213	//}}}
214
215	#ifdef __clang__
216	#define _GLIBCXX_SIMD_NORMAL_MATH
217	#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
218	#else
219	#define _GLIBCXX_SIMD_NORMAL_MATH \
220	[[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
221	#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__))
222	#endif
223	#define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
224	#define _GLIBCXX_SIMD_INTRINSIC \
225	[[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
226	#define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
227	#define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
228	#define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
229
230	#if __STRICT_ANSI__ \|\| defined __clang__
231	#define _GLIBCXX_SIMD_CONSTEXPR
232	#define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
233	#else
234	#define _GLIBCXX_SIMD_CONSTEXPR constexpr
235	#define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
236	#endif
237
238	#if defined __clang__
239	#define _GLIBCXX_SIMD_USE_CONSTEXPR const
240	#else
241	#define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
242	#endif
243
244	#define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(\|) __macro(&) __macro(^)
245	#define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
246	#define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
247	__macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
248
249	#define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
250	_GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
251	#define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
252	_GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
253	#define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
254	_GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
255
256	#ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
257	#undef _GLIBCXX_SIMD_ALWAYS_INLINE
258	#define _GLIBCXX_SIMD_ALWAYS_INLINE inline
259	#undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
260	#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
261	#undef _GLIBCXX_SIMD_INTRINSIC
262	#define _GLIBCXX_SIMD_INTRINSIC inline
263	#endif
264
265	#if _GLIBCXX_SIMD_HAVE_SSE \|\| _GLIBCXX_SIMD_HAVE_MMX
266	#define _GLIBCXX_SIMD_X86INTRIN 1
267	#else
268	#define _GLIBCXX_SIMD_X86INTRIN 0
269	#endif
270
271	// workaround macros {{{
272	// use aliasing loads to help GCC understand the data accesses better
273	// This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
274	// fixed_size_simd<float, 16> x.
275	#define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
276
277	// vector conversions on x86 not optimized:
278	#if _GLIBCXX_SIMD_X86INTRIN
279	#define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
280	#endif
281
282	// integer division not optimized
283	#ifndef __clang__
284	#define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
285	#endif
286
287	// very bad codegen for extraction and concatenation of 128/256 "subregisters"
288	// with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
289	#if _GLIBCXX_SIMD_X86INTRIN
290	#define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
291	#endif
292
293	// bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
294	#define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
295
296	// bad codegen for zero-extend using simple concat(__x, 0)
297	#if _GLIBCXX_SIMD_X86INTRIN
298	#define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
299	#endif
300
301	// https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
302	// of static_simd_cast)
303	#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
304
305	// https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
306	// constraint on (static)_simd_cast)
307	#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
308	// }}}
309
310	/// @endcond
311
312	#endif // __cplusplus >= 201703L
313	#endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
314
315	// vim: foldmethod=marker
316

source code of include/c++/11/experimental/bits/simd_detail.h