1 | // Internal macros for the simd implementation -*- C++ -*- |
2 | |
3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc. |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free |
6 | // software; you can redistribute it and/or modify it under the |
7 | // terms of the GNU General Public License as published by the |
8 | // Free Software Foundation; either version 3, or (at your option) |
9 | // any later version. |
10 | |
11 | // This library is distributed in the hope that it will be useful, |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | // GNU General Public License for more details. |
15 | |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version |
18 | // 3.1, as published by the Free Software Foundation. |
19 | |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23 | // <http://www.gnu.org/licenses/>. |
24 | |
25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ |
26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ |
27 | |
28 | #if __cplusplus >= 201703L |
29 | |
30 | #include <cstddef> |
31 | #include <cstdint> |
32 | |
33 | /// @cond undocumented |
34 | |
35 | #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \ |
36 | namespace std _GLIBCXX_VISIBILITY(default) \ |
37 | { \ |
38 | _GLIBCXX_BEGIN_NAMESPACE_VERSION \ |
39 | namespace experimental { \ |
40 | inline namespace parallelism_v2 { |
41 | #define _GLIBCXX_SIMD_END_NAMESPACE \ |
42 | } \ |
43 | } \ |
44 | _GLIBCXX_END_NAMESPACE_VERSION \ |
45 | } |
46 | |
47 | // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX |
48 | // macros ARM{{{ |
49 | #if defined __ARM_NEON |
50 | #define _GLIBCXX_SIMD_HAVE_NEON 1 |
51 | #else |
52 | #define _GLIBCXX_SIMD_HAVE_NEON 0 |
53 | #endif |
54 | #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__) |
55 | #define _GLIBCXX_SIMD_HAVE_NEON_A32 1 |
56 | #else |
57 | #define _GLIBCXX_SIMD_HAVE_NEON_A32 0 |
58 | #endif |
59 | #if defined __ARM_NEON && defined __aarch64__ |
60 | #define _GLIBCXX_SIMD_HAVE_NEON_A64 1 |
61 | #else |
62 | #define _GLIBCXX_SIMD_HAVE_NEON_A64 0 |
63 | #endif |
64 | //}}} |
65 | // x86{{{ |
66 | #ifdef __MMX__ |
67 | #define _GLIBCXX_SIMD_HAVE_MMX 1 |
68 | #else |
69 | #define _GLIBCXX_SIMD_HAVE_MMX 0 |
70 | #endif |
71 | #if defined __SSE__ || defined __x86_64__ |
72 | #define _GLIBCXX_SIMD_HAVE_SSE 1 |
73 | #else |
74 | #define _GLIBCXX_SIMD_HAVE_SSE 0 |
75 | #endif |
76 | #if defined __SSE2__ || defined __x86_64__ |
77 | #define _GLIBCXX_SIMD_HAVE_SSE2 1 |
78 | #else |
79 | #define _GLIBCXX_SIMD_HAVE_SSE2 0 |
80 | #endif |
81 | #ifdef __SSE3__ |
82 | #define _GLIBCXX_SIMD_HAVE_SSE3 1 |
83 | #else |
84 | #define _GLIBCXX_SIMD_HAVE_SSE3 0 |
85 | #endif |
86 | #ifdef __SSSE3__ |
87 | #define _GLIBCXX_SIMD_HAVE_SSSE3 1 |
88 | #else |
89 | #define _GLIBCXX_SIMD_HAVE_SSSE3 0 |
90 | #endif |
91 | #ifdef __SSE4_1__ |
92 | #define _GLIBCXX_SIMD_HAVE_SSE4_1 1 |
93 | #else |
94 | #define _GLIBCXX_SIMD_HAVE_SSE4_1 0 |
95 | #endif |
96 | #ifdef __SSE4_2__ |
97 | #define _GLIBCXX_SIMD_HAVE_SSE4_2 1 |
98 | #else |
99 | #define _GLIBCXX_SIMD_HAVE_SSE4_2 0 |
100 | #endif |
101 | #ifdef __XOP__ |
102 | #define _GLIBCXX_SIMD_HAVE_XOP 1 |
103 | #else |
104 | #define _GLIBCXX_SIMD_HAVE_XOP 0 |
105 | #endif |
106 | #ifdef __AVX__ |
107 | #define _GLIBCXX_SIMD_HAVE_AVX 1 |
108 | #else |
109 | #define _GLIBCXX_SIMD_HAVE_AVX 0 |
110 | #endif |
111 | #ifdef __AVX2__ |
112 | #define _GLIBCXX_SIMD_HAVE_AVX2 1 |
113 | #else |
114 | #define _GLIBCXX_SIMD_HAVE_AVX2 0 |
115 | #endif |
116 | #ifdef __BMI__ |
117 | #define _GLIBCXX_SIMD_HAVE_BMI1 1 |
118 | #else |
119 | #define _GLIBCXX_SIMD_HAVE_BMI1 0 |
120 | #endif |
121 | #ifdef __BMI2__ |
122 | #define _GLIBCXX_SIMD_HAVE_BMI2 1 |
123 | #else |
124 | #define _GLIBCXX_SIMD_HAVE_BMI2 0 |
125 | #endif |
126 | #ifdef __LZCNT__ |
127 | #define _GLIBCXX_SIMD_HAVE_LZCNT 1 |
128 | #else |
129 | #define _GLIBCXX_SIMD_HAVE_LZCNT 0 |
130 | #endif |
131 | #ifdef __SSE4A__ |
132 | #define _GLIBCXX_SIMD_HAVE_SSE4A 1 |
133 | #else |
134 | #define _GLIBCXX_SIMD_HAVE_SSE4A 0 |
135 | #endif |
136 | #ifdef __FMA__ |
137 | #define _GLIBCXX_SIMD_HAVE_FMA 1 |
138 | #else |
139 | #define _GLIBCXX_SIMD_HAVE_FMA 0 |
140 | #endif |
141 | #ifdef __FMA4__ |
142 | #define _GLIBCXX_SIMD_HAVE_FMA4 1 |
143 | #else |
144 | #define _GLIBCXX_SIMD_HAVE_FMA4 0 |
145 | #endif |
146 | #ifdef __F16C__ |
147 | #define _GLIBCXX_SIMD_HAVE_F16C 1 |
148 | #else |
149 | #define _GLIBCXX_SIMD_HAVE_F16C 0 |
150 | #endif |
151 | #ifdef __POPCNT__ |
152 | #define _GLIBCXX_SIMD_HAVE_POPCNT 1 |
153 | #else |
154 | #define _GLIBCXX_SIMD_HAVE_POPCNT 0 |
155 | #endif |
156 | #ifdef __AVX512F__ |
157 | #define _GLIBCXX_SIMD_HAVE_AVX512F 1 |
158 | #else |
159 | #define _GLIBCXX_SIMD_HAVE_AVX512F 0 |
160 | #endif |
161 | #ifdef __AVX512DQ__ |
162 | #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1 |
163 | #else |
164 | #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0 |
165 | #endif |
166 | #ifdef __AVX512VL__ |
167 | #define _GLIBCXX_SIMD_HAVE_AVX512VL 1 |
168 | #else |
169 | #define _GLIBCXX_SIMD_HAVE_AVX512VL 0 |
170 | #endif |
171 | #ifdef __AVX512BW__ |
172 | #define _GLIBCXX_SIMD_HAVE_AVX512BW 1 |
173 | #else |
174 | #define _GLIBCXX_SIMD_HAVE_AVX512BW 0 |
175 | #endif |
176 | |
177 | #if _GLIBCXX_SIMD_HAVE_SSE |
178 | #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1 |
179 | #else |
180 | #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0 |
181 | #endif |
182 | #if _GLIBCXX_SIMD_HAVE_SSE2 |
183 | #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1 |
184 | #else |
185 | #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0 |
186 | #endif |
187 | |
188 | #if _GLIBCXX_SIMD_HAVE_AVX |
189 | #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1 |
190 | #else |
191 | #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0 |
192 | #endif |
193 | #if _GLIBCXX_SIMD_HAVE_AVX2 |
194 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1 |
195 | #else |
196 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0 |
197 | #endif |
198 | |
199 | #if _GLIBCXX_SIMD_HAVE_AVX512F |
200 | #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1 |
201 | #else |
202 | #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0 |
203 | #endif |
204 | #if _GLIBCXX_SIMD_HAVE_AVX512BW |
205 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1 |
206 | #else |
207 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0 |
208 | #endif |
209 | |
210 | #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2 |
211 | #error "Use of SSE2 is required on AMD64" |
212 | #endif |
213 | //}}} |
214 | |
215 | #ifdef __clang__ |
216 | #define _GLIBCXX_SIMD_NORMAL_MATH |
217 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
218 | #else |
219 | #define _GLIBCXX_SIMD_NORMAL_MATH \ |
220 | [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]] |
221 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__)) |
222 | #endif |
223 | #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]] |
224 | #define _GLIBCXX_SIMD_INTRINSIC \ |
225 | [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline |
226 | #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline |
227 | #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0) |
228 | #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1) |
229 | |
230 | #if __STRICT_ANSI__ || defined __clang__ |
231 | #define _GLIBCXX_SIMD_CONSTEXPR |
232 | #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const |
233 | #else |
234 | #define _GLIBCXX_SIMD_CONSTEXPR constexpr |
235 | #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr |
236 | #endif |
237 | |
238 | #if defined __clang__ |
239 | #define _GLIBCXX_SIMD_USE_CONSTEXPR const |
240 | #else |
241 | #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr |
242 | #endif |
243 | |
244 | #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^) |
245 | #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>) |
246 | #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \ |
247 | __macro(+) __macro(-) __macro(*) __macro(/) __macro(%) |
248 | |
249 | #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \ |
250 | _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true) |
251 | #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \ |
252 | _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true) |
253 | #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \ |
254 | _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true) |
255 | |
256 | #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE |
257 | #undef _GLIBCXX_SIMD_ALWAYS_INLINE |
258 | #define _GLIBCXX_SIMD_ALWAYS_INLINE inline |
259 | #undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
260 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
261 | #undef _GLIBCXX_SIMD_INTRINSIC |
262 | #define _GLIBCXX_SIMD_INTRINSIC inline |
263 | #endif |
264 | |
265 | #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX |
266 | #define _GLIBCXX_SIMD_X86INTRIN 1 |
267 | #else |
268 | #define _GLIBCXX_SIMD_X86INTRIN 0 |
269 | #endif |
270 | |
271 | // workaround macros {{{ |
272 | // use aliasing loads to help GCC understand the data accesses better |
273 | // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with |
274 | // fixed_size_simd<float, 16> x. |
275 | #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1 |
276 | |
277 | // vector conversions on x86 not optimized: |
278 | #if _GLIBCXX_SIMD_X86INTRIN |
279 | #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1 |
280 | #endif |
281 | |
282 | // integer division not optimized |
283 | #ifndef __clang__ |
284 | #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1 |
285 | #endif |
286 | |
287 | // very bad codegen for extraction and concatenation of 128/256 "subregisters" |
288 | // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM |
289 | #if _GLIBCXX_SIMD_X86INTRIN |
290 | #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1 |
291 | #endif |
292 | |
293 | // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16> |
294 | #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1 |
295 | |
296 | // bad codegen for zero-extend using simple concat(__x, 0) |
297 | #if _GLIBCXX_SIMD_X86INTRIN |
298 | #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1 |
299 | #endif |
300 | |
301 | // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type |
302 | // of static_simd_cast) |
303 | #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1 |
304 | |
305 | // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE |
306 | // constraint on (static)_simd_cast) |
307 | #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1 |
308 | // }}} |
309 | |
310 | /// @endcond |
311 | |
312 | #endif // __cplusplus >= 201703L |
313 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ |
314 | |
315 | // vim: foldmethod=marker |
316 | |