| 1 | // Internal macros for the simd implementation -*- C++ -*- |
| 2 | |
| 3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc. |
| 4 | // |
| 5 | // This file is part of the GNU ISO C++ Library. This library is free |
| 6 | // software; you can redistribute it and/or modify it under the |
| 7 | // terms of the GNU General Public License as published by the |
| 8 | // Free Software Foundation; either version 3, or (at your option) |
| 9 | // any later version. |
| 10 | |
| 11 | // This library is distributed in the hope that it will be useful, |
| 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | // GNU General Public License for more details. |
| 15 | |
| 16 | // Under Section 7 of GPL version 3, you are granted additional |
| 17 | // permissions described in the GCC Runtime Library Exception, version |
| 18 | // 3.1, as published by the Free Software Foundation. |
| 19 | |
| 20 | // You should have received a copy of the GNU General Public License and |
| 21 | // a copy of the GCC Runtime Library Exception along with this program; |
| 22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| 23 | // <http://www.gnu.org/licenses/>. |
| 24 | |
| 25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ |
| 26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ |
| 27 | |
| 28 | #if __cplusplus >= 201703L |
| 29 | |
| 30 | #include <cstddef> |
| 31 | #include <cstdint> |
| 32 | |
| 33 | /// @cond undocumented |
| 34 | |
| 35 | #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \ |
| 36 | namespace std _GLIBCXX_VISIBILITY(default) \ |
| 37 | { \ |
| 38 | _GLIBCXX_BEGIN_NAMESPACE_VERSION \ |
| 39 | namespace experimental { \ |
| 40 | inline namespace parallelism_v2 { |
| 41 | #define _GLIBCXX_SIMD_END_NAMESPACE \ |
| 42 | } \ |
| 43 | } \ |
| 44 | _GLIBCXX_END_NAMESPACE_VERSION \ |
| 45 | } |
| 46 | |
| 47 | // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX |
| 48 | // macros ARM{{{ |
| 49 | #if defined __ARM_NEON |
| 50 | #define _GLIBCXX_SIMD_HAVE_NEON 1 |
| 51 | #else |
| 52 | #define _GLIBCXX_SIMD_HAVE_NEON 0 |
| 53 | #endif |
| 54 | #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__) |
| 55 | #define _GLIBCXX_SIMD_HAVE_NEON_A32 1 |
| 56 | #else |
| 57 | #define _GLIBCXX_SIMD_HAVE_NEON_A32 0 |
| 58 | #endif |
| 59 | #if defined __ARM_NEON && defined __aarch64__ |
| 60 | #define _GLIBCXX_SIMD_HAVE_NEON_A64 1 |
| 61 | #else |
| 62 | #define _GLIBCXX_SIMD_HAVE_NEON_A64 0 |
| 63 | #endif |
| 64 | //}}} |
| 65 | // x86{{{ |
| 66 | #ifdef __MMX__ |
| 67 | #define _GLIBCXX_SIMD_HAVE_MMX 1 |
| 68 | #else |
| 69 | #define _GLIBCXX_SIMD_HAVE_MMX 0 |
| 70 | #endif |
| 71 | #if defined __SSE__ || defined __x86_64__ |
| 72 | #define _GLIBCXX_SIMD_HAVE_SSE 1 |
| 73 | #else |
| 74 | #define _GLIBCXX_SIMD_HAVE_SSE 0 |
| 75 | #endif |
| 76 | #if defined __SSE2__ || defined __x86_64__ |
| 77 | #define _GLIBCXX_SIMD_HAVE_SSE2 1 |
| 78 | #else |
| 79 | #define _GLIBCXX_SIMD_HAVE_SSE2 0 |
| 80 | #endif |
| 81 | #ifdef __SSE3__ |
| 82 | #define _GLIBCXX_SIMD_HAVE_SSE3 1 |
| 83 | #else |
| 84 | #define _GLIBCXX_SIMD_HAVE_SSE3 0 |
| 85 | #endif |
| 86 | #ifdef __SSSE3__ |
| 87 | #define _GLIBCXX_SIMD_HAVE_SSSE3 1 |
| 88 | #else |
| 89 | #define _GLIBCXX_SIMD_HAVE_SSSE3 0 |
| 90 | #endif |
| 91 | #ifdef __SSE4_1__ |
| 92 | #define _GLIBCXX_SIMD_HAVE_SSE4_1 1 |
| 93 | #else |
| 94 | #define _GLIBCXX_SIMD_HAVE_SSE4_1 0 |
| 95 | #endif |
| 96 | #ifdef __SSE4_2__ |
| 97 | #define _GLIBCXX_SIMD_HAVE_SSE4_2 1 |
| 98 | #else |
| 99 | #define _GLIBCXX_SIMD_HAVE_SSE4_2 0 |
| 100 | #endif |
| 101 | #ifdef __XOP__ |
| 102 | #define _GLIBCXX_SIMD_HAVE_XOP 1 |
| 103 | #else |
| 104 | #define _GLIBCXX_SIMD_HAVE_XOP 0 |
| 105 | #endif |
| 106 | #ifdef __AVX__ |
| 107 | #define _GLIBCXX_SIMD_HAVE_AVX 1 |
| 108 | #else |
| 109 | #define _GLIBCXX_SIMD_HAVE_AVX 0 |
| 110 | #endif |
| 111 | #ifdef __AVX2__ |
| 112 | #define _GLIBCXX_SIMD_HAVE_AVX2 1 |
| 113 | #else |
| 114 | #define _GLIBCXX_SIMD_HAVE_AVX2 0 |
| 115 | #endif |
| 116 | #ifdef __BMI__ |
| 117 | #define _GLIBCXX_SIMD_HAVE_BMI1 1 |
| 118 | #else |
| 119 | #define _GLIBCXX_SIMD_HAVE_BMI1 0 |
| 120 | #endif |
| 121 | #ifdef __BMI2__ |
| 122 | #define _GLIBCXX_SIMD_HAVE_BMI2 1 |
| 123 | #else |
| 124 | #define _GLIBCXX_SIMD_HAVE_BMI2 0 |
| 125 | #endif |
| 126 | #ifdef __LZCNT__ |
| 127 | #define _GLIBCXX_SIMD_HAVE_LZCNT 1 |
| 128 | #else |
| 129 | #define _GLIBCXX_SIMD_HAVE_LZCNT 0 |
| 130 | #endif |
| 131 | #ifdef __SSE4A__ |
| 132 | #define _GLIBCXX_SIMD_HAVE_SSE4A 1 |
| 133 | #else |
| 134 | #define _GLIBCXX_SIMD_HAVE_SSE4A 0 |
| 135 | #endif |
| 136 | #ifdef __FMA__ |
| 137 | #define _GLIBCXX_SIMD_HAVE_FMA 1 |
| 138 | #else |
| 139 | #define _GLIBCXX_SIMD_HAVE_FMA 0 |
| 140 | #endif |
| 141 | #ifdef __FMA4__ |
| 142 | #define _GLIBCXX_SIMD_HAVE_FMA4 1 |
| 143 | #else |
| 144 | #define _GLIBCXX_SIMD_HAVE_FMA4 0 |
| 145 | #endif |
| 146 | #ifdef __F16C__ |
| 147 | #define _GLIBCXX_SIMD_HAVE_F16C 1 |
| 148 | #else |
| 149 | #define _GLIBCXX_SIMD_HAVE_F16C 0 |
| 150 | #endif |
| 151 | #ifdef __POPCNT__ |
| 152 | #define _GLIBCXX_SIMD_HAVE_POPCNT 1 |
| 153 | #else |
| 154 | #define _GLIBCXX_SIMD_HAVE_POPCNT 0 |
| 155 | #endif |
| 156 | #ifdef __AVX512F__ |
| 157 | #define _GLIBCXX_SIMD_HAVE_AVX512F 1 |
| 158 | #else |
| 159 | #define _GLIBCXX_SIMD_HAVE_AVX512F 0 |
| 160 | #endif |
| 161 | #ifdef __AVX512DQ__ |
| 162 | #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1 |
| 163 | #else |
| 164 | #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0 |
| 165 | #endif |
| 166 | #ifdef __AVX512VL__ |
| 167 | #define _GLIBCXX_SIMD_HAVE_AVX512VL 1 |
| 168 | #else |
| 169 | #define _GLIBCXX_SIMD_HAVE_AVX512VL 0 |
| 170 | #endif |
| 171 | #ifdef __AVX512BW__ |
| 172 | #define _GLIBCXX_SIMD_HAVE_AVX512BW 1 |
| 173 | #else |
| 174 | #define _GLIBCXX_SIMD_HAVE_AVX512BW 0 |
| 175 | #endif |
| 176 | |
| 177 | #if _GLIBCXX_SIMD_HAVE_SSE |
| 178 | #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1 |
| 179 | #else |
| 180 | #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0 |
| 181 | #endif |
| 182 | #if _GLIBCXX_SIMD_HAVE_SSE2 |
| 183 | #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1 |
| 184 | #else |
| 185 | #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0 |
| 186 | #endif |
| 187 | |
| 188 | #if _GLIBCXX_SIMD_HAVE_AVX |
| 189 | #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1 |
| 190 | #else |
| 191 | #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0 |
| 192 | #endif |
| 193 | #if _GLIBCXX_SIMD_HAVE_AVX2 |
| 194 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1 |
| 195 | #else |
| 196 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0 |
| 197 | #endif |
| 198 | |
| 199 | #if _GLIBCXX_SIMD_HAVE_AVX512F |
| 200 | #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1 |
| 201 | #else |
| 202 | #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0 |
| 203 | #endif |
| 204 | #if _GLIBCXX_SIMD_HAVE_AVX512BW |
| 205 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1 |
| 206 | #else |
| 207 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0 |
| 208 | #endif |
| 209 | |
| 210 | #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2 |
| 211 | #error "Use of SSE2 is required on AMD64" |
| 212 | #endif |
| 213 | //}}} |
| 214 | |
| 215 | #ifdef __clang__ |
| 216 | #define _GLIBCXX_SIMD_NORMAL_MATH |
| 217 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
| 218 | #else |
| 219 | #define _GLIBCXX_SIMD_NORMAL_MATH \ |
| 220 | [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]] |
| 221 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__)) |
| 222 | #endif |
| 223 | #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]] |
| 224 | #define _GLIBCXX_SIMD_INTRINSIC \ |
| 225 | [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline |
| 226 | #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline |
| 227 | #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0) |
| 228 | #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1) |
| 229 | |
| 230 | #if __STRICT_ANSI__ || defined __clang__ |
| 231 | #define _GLIBCXX_SIMD_CONSTEXPR |
| 232 | #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const |
| 233 | #else |
| 234 | #define _GLIBCXX_SIMD_CONSTEXPR constexpr |
| 235 | #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr |
| 236 | #endif |
| 237 | |
| 238 | #if defined __clang__ |
| 239 | #define _GLIBCXX_SIMD_USE_CONSTEXPR const |
| 240 | #else |
| 241 | #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr |
| 242 | #endif |
| 243 | |
| 244 | #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^) |
| 245 | #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>) |
| 246 | #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \ |
| 247 | __macro(+) __macro(-) __macro(*) __macro(/) __macro(%) |
| 248 | |
| 249 | #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \ |
| 250 | _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true) |
| 251 | #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \ |
| 252 | _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true) |
| 253 | #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \ |
| 254 | _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true) |
| 255 | |
| 256 | #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE |
| 257 | #undef _GLIBCXX_SIMD_ALWAYS_INLINE |
| 258 | #define _GLIBCXX_SIMD_ALWAYS_INLINE inline |
| 259 | #undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
| 260 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
| 261 | #undef _GLIBCXX_SIMD_INTRINSIC |
| 262 | #define _GLIBCXX_SIMD_INTRINSIC inline |
| 263 | #endif |
| 264 | |
| 265 | #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX |
| 266 | #define _GLIBCXX_SIMD_X86INTRIN 1 |
| 267 | #else |
| 268 | #define _GLIBCXX_SIMD_X86INTRIN 0 |
| 269 | #endif |
| 270 | |
| 271 | // workaround macros {{{ |
| 272 | // use aliasing loads to help GCC understand the data accesses better |
| 273 | // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with |
| 274 | // fixed_size_simd<float, 16> x. |
| 275 | #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1 |
| 276 | |
| 277 | // vector conversions on x86 not optimized: |
| 278 | #if _GLIBCXX_SIMD_X86INTRIN |
| 279 | #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1 |
| 280 | #endif |
| 281 | |
| 282 | // integer division not optimized |
| 283 | #ifndef __clang__ |
| 284 | #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1 |
| 285 | #endif |
| 286 | |
| 287 | // very bad codegen for extraction and concatenation of 128/256 "subregisters" |
| 288 | // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM |
| 289 | #if _GLIBCXX_SIMD_X86INTRIN |
| 290 | #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1 |
| 291 | #endif |
| 292 | |
| 293 | // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16> |
| 294 | #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1 |
| 295 | |
| 296 | // bad codegen for zero-extend using simple concat(__x, 0) |
| 297 | #if _GLIBCXX_SIMD_X86INTRIN |
| 298 | #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1 |
| 299 | #endif |
| 300 | |
| 301 | // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type |
| 302 | // of static_simd_cast) |
| 303 | #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1 |
| 304 | |
| 305 | // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE |
| 306 | // constraint on (static)_simd_cast) |
| 307 | #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1 |
| 308 | // }}} |
| 309 | |
| 310 | /// @endcond |
| 311 | |
| 312 | #endif // __cplusplus >= 201703L |
| 313 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ |
| 314 | |
| 315 | // vim: foldmethod=marker |
| 316 | |