1 | // Definition of the public simd interfaces -*- C++ -*- |
2 | |
3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc. |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free |
6 | // software; you can redistribute it and/or modify it under the |
7 | // terms of the GNU General Public License as published by the |
8 | // Free Software Foundation; either version 3, or (at your option) |
9 | // any later version. |
10 | |
11 | // This library is distributed in the hope that it will be useful, |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | // GNU General Public License for more details. |
15 | |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version |
18 | // 3.1, as published by the Free Software Foundation. |
19 | |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23 | // <http://www.gnu.org/licenses/>. |
24 | |
25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H |
26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_H |
27 | |
28 | #if __cplusplus >= 201703L |
29 | |
30 | #include "simd_detail.h" |
31 | #include "numeric_traits.h" |
32 | #include <bit> |
33 | #include <bitset> |
34 | #ifdef _GLIBCXX_DEBUG_UB |
35 | #include <cstdio> // for stderr |
36 | #endif |
37 | #include <cstring> |
38 | #include <functional> |
39 | #include <iosfwd> |
40 | #include <utility> |
41 | |
42 | #if _GLIBCXX_SIMD_X86INTRIN |
43 | #include <x86intrin.h> |
44 | #elif _GLIBCXX_SIMD_HAVE_NEON |
45 | #include <arm_neon.h> |
46 | #endif |
47 | |
48 | /** @ingroup ts_simd |
49 | * @{ |
50 | */ |
51 | /* There are several closely related types, with the following naming |
52 | * convention: |
53 | * _Tp: vectorizable (arithmetic) type (or any type) |
54 | * _TV: __vector_type_t<_Tp, _Np> |
55 | * _TW: _SimdWrapper<_Tp, _Np> |
56 | * _TI: __intrinsic_type_t<_Tp, _Np> |
57 | * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW> |
58 | * If one additional type is needed use _U instead of _T. |
59 | * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d. |
60 | * |
61 | * More naming conventions: |
62 | * _Ap or _Abi: An ABI tag from the simd_abi namespace |
63 | * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp), |
64 | * _IV, _IW as for _TV, _TW |
65 | * _Np: number of elements (not bytes) |
66 | * _Bytes: number of bytes |
67 | * |
68 | * Variable names: |
69 | * __k: mask object (vector- or bitmask) |
70 | */ |
71 | _GLIBCXX_SIMD_BEGIN_NAMESPACE |
72 | |
73 | #if !_GLIBCXX_SIMD_X86INTRIN |
74 | using __m128 [[__gnu__::__vector_size__(16)]] = float; |
75 | using __m128d [[__gnu__::__vector_size__(16)]] = double; |
76 | using __m128i [[__gnu__::__vector_size__(16)]] = long long; |
77 | using __m256 [[__gnu__::__vector_size__(32)]] = float; |
78 | using __m256d [[__gnu__::__vector_size__(32)]] = double; |
79 | using __m256i [[__gnu__::__vector_size__(32)]] = long long; |
80 | using __m512 [[__gnu__::__vector_size__(64)]] = float; |
81 | using __m512d [[__gnu__::__vector_size__(64)]] = double; |
82 | using __m512i [[__gnu__::__vector_size__(64)]] = long long; |
83 | #endif |
84 | |
85 | namespace simd_abi { |
86 | // simd_abi forward declarations {{{ |
87 | // implementation details: |
88 | struct _Scalar; |
89 | |
90 | template <int _Np> |
91 | struct _Fixed; |
92 | |
93 | // There are two major ABIs that appear on different architectures. |
94 | // Both have non-boolean values packed into an N Byte register |
95 | // -> #elements = N / sizeof(T) |
96 | // Masks differ: |
97 | // 1. Use value vector registers for masks (all 0 or all 1) |
98 | // 2. Use bitmasks (mask registers) with one bit per value in the corresponding |
99 | // value vector |
100 | // |
101 | // Both can be partially used, masking off the rest when doing horizontal |
102 | // operations or operations that can trap (e.g. FP_INVALID or integer division |
103 | // by 0). This is encoded as the number of used bytes. |
104 | template <int _UsedBytes> |
105 | struct _VecBuiltin; |
106 | |
107 | template <int _UsedBytes> |
108 | struct _VecBltnBtmsk; |
109 | |
110 | template <typename _Tp, int _Np> |
111 | using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>; |
112 | |
113 | template <int _UsedBytes = 16> |
114 | using _Sse = _VecBuiltin<_UsedBytes>; |
115 | |
116 | template <int _UsedBytes = 32> |
117 | using _Avx = _VecBuiltin<_UsedBytes>; |
118 | |
119 | template <int _UsedBytes = 64> |
120 | using _Avx512 = _VecBltnBtmsk<_UsedBytes>; |
121 | |
122 | template <int _UsedBytes = 16> |
123 | using _Neon = _VecBuiltin<_UsedBytes>; |
124 | |
125 | // implementation-defined: |
126 | using __sse = _Sse<>; |
127 | using __avx = _Avx<>; |
128 | using __avx512 = _Avx512<>; |
129 | using __neon = _Neon<>; |
130 | using __neon128 = _Neon<16>; |
131 | using __neon64 = _Neon<8>; |
132 | |
133 | // standard: |
134 | template <typename _Tp, size_t _Np, typename...> |
135 | struct deduce; |
136 | |
137 | template <int _Np> |
138 | using fixed_size = _Fixed<_Np>; |
139 | |
140 | using scalar = _Scalar; |
141 | |
142 | // }}} |
143 | } // namespace simd_abi |
144 | // forward declarations is_simd(_mask), simd(_mask), simd_size {{{ |
145 | template <typename _Tp> |
146 | struct is_simd; |
147 | |
148 | template <typename _Tp> |
149 | struct is_simd_mask; |
150 | |
151 | template <typename _Tp, typename _Abi> |
152 | class simd; |
153 | |
154 | template <typename _Tp, typename _Abi> |
155 | class simd_mask; |
156 | |
157 | template <typename _Tp, typename _Abi> |
158 | struct simd_size; |
159 | |
160 | // }}} |
161 | // load/store flags {{{ |
162 | struct element_aligned_tag |
163 | { |
164 | template <typename _Tp, typename _Up = typename _Tp::value_type> |
165 | static constexpr size_t _S_alignment = alignof(_Up); |
166 | |
167 | template <typename _Tp, typename _Up> |
168 | _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* |
169 | _S_apply(_Up* __ptr) |
170 | { return __ptr; } |
171 | }; |
172 | |
173 | struct vector_aligned_tag |
174 | { |
175 | template <typename _Tp, typename _Up = typename _Tp::value_type> |
176 | static constexpr size_t _S_alignment |
177 | = std::__bit_ceil(sizeof(_Up) * _Tp::size()); |
178 | |
179 | template <typename _Tp, typename _Up> |
180 | _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* |
181 | _S_apply(_Up* __ptr) |
182 | { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); } |
183 | }; |
184 | |
185 | template <size_t _Np> struct overaligned_tag |
186 | { |
187 | template <typename _Tp, typename _Up = typename _Tp::value_type> |
188 | static constexpr size_t _S_alignment = _Np; |
189 | |
190 | template <typename _Tp, typename _Up> |
191 | _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* |
192 | _S_apply(_Up* __ptr) |
193 | { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); } |
194 | }; |
195 | |
196 | inline constexpr element_aligned_tag element_aligned = {}; |
197 | |
198 | inline constexpr vector_aligned_tag vector_aligned = {}; |
199 | |
200 | template <size_t _Np> |
201 | inline constexpr overaligned_tag<_Np> overaligned = {}; |
202 | |
203 | // }}} |
204 | template <size_t _Xp> |
205 | using _SizeConstant = integral_constant<size_t, _Xp>; |
206 | |
207 | namespace __detail |
208 | { |
209 | struct _Minimum |
210 | { |
211 | template <typename _Tp> |
212 | _GLIBCXX_SIMD_INTRINSIC constexpr |
213 | _Tp |
214 | operator()(_Tp __a, _Tp __b) const |
215 | { |
216 | using std::min; |
217 | return min(__a, __b); |
218 | } |
219 | }; |
220 | |
221 | struct _Maximum |
222 | { |
223 | template <typename _Tp> |
224 | _GLIBCXX_SIMD_INTRINSIC constexpr |
225 | _Tp |
226 | operator()(_Tp __a, _Tp __b) const |
227 | { |
228 | using std::max; |
229 | return max(__a, __b); |
230 | } |
231 | }; |
232 | } // namespace __detail |
233 | |
234 | // unrolled/pack execution helpers |
235 | // __execute_n_times{{{ |
236 | template <typename _Fp, size_t... _I> |
237 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
238 | __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>) |
239 | { ((void)__f(_SizeConstant<_I>()), ...); } |
240 | |
241 | template <typename _Fp> |
242 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
243 | __execute_on_index_sequence(_Fp&&, index_sequence<>) |
244 | { } |
245 | |
246 | template <size_t _Np, typename _Fp> |
247 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
248 | __execute_n_times(_Fp&& __f) |
249 | { |
250 | __execute_on_index_sequence(static_cast<_Fp&&>(__f), |
251 | make_index_sequence<_Np>{}); |
252 | } |
253 | |
254 | // }}} |
255 | // __generate_from_n_evaluations{{{ |
256 | template <typename _R, typename _Fp, size_t... _I> |
257 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
258 | __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>) |
259 | { return _R{__f(_SizeConstant<_I>())...}; } |
260 | |
261 | template <size_t _Np, typename _R, typename _Fp> |
262 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
263 | __generate_from_n_evaluations(_Fp&& __f) |
264 | { |
265 | return __execute_on_index_sequence_with_return<_R>( |
266 | static_cast<_Fp&&>(__f), make_index_sequence<_Np>{}); |
267 | } |
268 | |
269 | // }}} |
270 | // __call_with_n_evaluations{{{ |
271 | template <size_t... _I, typename _F0, typename _FArgs> |
272 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
273 | __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs) |
274 | { return __f0(__fargs(_SizeConstant<_I>())...); } |
275 | |
276 | template <size_t _Np, typename _F0, typename _FArgs> |
277 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
278 | __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs) |
279 | { |
280 | return __call_with_n_evaluations(make_index_sequence<_Np>{}, |
281 | static_cast<_F0&&>(__f0), |
282 | static_cast<_FArgs&&>(__fargs)); |
283 | } |
284 | |
285 | // }}} |
286 | // __call_with_subscripts{{{ |
287 | template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp> |
288 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
289 | __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun) |
290 | { return __fun(__x[_First + _It]...); } |
291 | |
292 | template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp> |
293 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
294 | __call_with_subscripts(_Tp&& __x, _Fp&& __fun) |
295 | { |
296 | return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x), |
297 | make_index_sequence<_Np>(), |
298 | static_cast<_Fp&&>(__fun)); |
299 | } |
300 | |
301 | // }}} |
302 | |
303 | // vvv ---- type traits ---- vvv |
304 | // integer type aliases{{{ |
305 | using _UChar = unsigned char; |
306 | using _SChar = signed char; |
307 | using _UShort = unsigned short; |
308 | using _UInt = unsigned int; |
309 | using _ULong = unsigned long; |
310 | using _ULLong = unsigned long long; |
311 | using _LLong = long long; |
312 | |
313 | //}}} |
314 | // __first_of_pack{{{ |
315 | template <typename _T0, typename...> |
316 | struct __first_of_pack |
317 | { using type = _T0; }; |
318 | |
319 | template <typename... _Ts> |
320 | using __first_of_pack_t = typename __first_of_pack<_Ts...>::type; |
321 | |
322 | //}}} |
323 | // __value_type_or_identity_t {{{ |
324 | template <typename _Tp> |
325 | typename _Tp::value_type |
326 | __value_type_or_identity_impl(int); |
327 | |
328 | template <typename _Tp> |
329 | _Tp |
330 | __value_type_or_identity_impl(float); |
331 | |
332 | template <typename _Tp> |
333 | using __value_type_or_identity_t |
334 | = decltype(__value_type_or_identity_impl<_Tp>(int())); |
335 | |
336 | // }}} |
337 | // __is_vectorizable {{{ |
338 | template <typename _Tp> |
339 | struct __is_vectorizable : public is_arithmetic<_Tp> {}; |
340 | |
341 | template <> |
342 | struct __is_vectorizable<bool> : public false_type {}; |
343 | |
344 | template <typename _Tp> |
345 | inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value; |
346 | |
347 | // Deduces to a vectorizable type |
348 | template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>> |
349 | using _Vectorizable = _Tp; |
350 | |
351 | // }}} |
352 | // _LoadStorePtr / __is_possible_loadstore_conversion {{{ |
353 | template <typename _Ptr, typename _ValueType> |
354 | struct __is_possible_loadstore_conversion |
355 | : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {}; |
356 | |
357 | template <> |
358 | struct __is_possible_loadstore_conversion<bool, bool> : true_type {}; |
359 | |
360 | // Deduces to a type allowed for load/store with the given value type. |
361 | template <typename _Ptr, typename _ValueType, |
362 | typename = enable_if_t< |
363 | __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>> |
364 | using _LoadStorePtr = _Ptr; |
365 | |
366 | // }}} |
367 | // __is_bitmask{{{ |
368 | template <typename _Tp, typename = void_t<>> |
369 | struct __is_bitmask : false_type {}; |
370 | |
371 | template <typename _Tp> |
372 | inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value; |
373 | |
374 | // the __mmaskXX case: |
375 | template <typename _Tp> |
376 | struct __is_bitmask<_Tp, |
377 | void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>> |
378 | : true_type {}; |
379 | |
380 | // }}} |
381 | // __int_for_sizeof{{{ |
382 | #pragma GCC diagnostic push |
383 | #pragma GCC diagnostic ignored "-Wpedantic" |
384 | template <size_t _Bytes> |
385 | constexpr auto |
386 | __int_for_sizeof() |
387 | { |
388 | if constexpr (_Bytes == sizeof(int)) |
389 | return int(); |
390 | #ifdef __clang__ |
391 | else if constexpr (_Bytes == sizeof(char)) |
392 | return char(); |
393 | #else |
394 | else if constexpr (_Bytes == sizeof(_SChar)) |
395 | return _SChar(); |
396 | #endif |
397 | else if constexpr (_Bytes == sizeof(short)) |
398 | return short(); |
399 | #ifndef __clang__ |
400 | else if constexpr (_Bytes == sizeof(long)) |
401 | return long(); |
402 | #endif |
403 | else if constexpr (_Bytes == sizeof(_LLong)) |
404 | return _LLong(); |
405 | #ifdef __SIZEOF_INT128__ |
406 | else if constexpr (_Bytes == sizeof(__int128)) |
407 | return __int128(); |
408 | #endif // __SIZEOF_INT128__ |
409 | else if constexpr (_Bytes % sizeof(int) == 0) |
410 | { |
411 | constexpr size_t _Np = _Bytes / sizeof(int); |
412 | struct _Ip |
413 | { |
414 | int _M_data[_Np]; |
415 | |
416 | _GLIBCXX_SIMD_INTRINSIC constexpr _Ip |
417 | operator&(_Ip __rhs) const |
418 | { |
419 | return __generate_from_n_evaluations<_Np, _Ip>( |
420 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
421 | return __rhs._M_data[__i] & _M_data[__i]; |
422 | }); |
423 | } |
424 | |
425 | _GLIBCXX_SIMD_INTRINSIC constexpr _Ip |
426 | operator|(_Ip __rhs) const |
427 | { |
428 | return __generate_from_n_evaluations<_Np, _Ip>( |
429 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
430 | return __rhs._M_data[__i] | _M_data[__i]; |
431 | }); |
432 | } |
433 | |
434 | _GLIBCXX_SIMD_INTRINSIC constexpr _Ip |
435 | operator^(_Ip __rhs) const |
436 | { |
437 | return __generate_from_n_evaluations<_Np, _Ip>( |
438 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
439 | return __rhs._M_data[__i] ^ _M_data[__i]; |
440 | }); |
441 | } |
442 | |
443 | _GLIBCXX_SIMD_INTRINSIC constexpr _Ip |
444 | operator~() const |
445 | { |
446 | return __generate_from_n_evaluations<_Np, _Ip>( |
447 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; }); |
448 | } |
449 | }; |
450 | return _Ip{}; |
451 | } |
452 | else |
453 | static_assert(_Bytes != _Bytes, "this should be unreachable" ); |
454 | } |
455 | #pragma GCC diagnostic pop |
456 | |
457 | template <typename _Tp> |
458 | using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>()); |
459 | |
460 | template <size_t _Np> |
461 | using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>()); |
462 | |
463 | // }}} |
464 | // __is_fixed_size_abi{{{ |
465 | template <typename _Tp> |
466 | struct __is_fixed_size_abi : false_type {}; |
467 | |
468 | template <int _Np> |
469 | struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {}; |
470 | |
471 | template <typename _Tp> |
472 | inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value; |
473 | |
474 | // }}} |
475 | // constexpr feature detection{{{ |
476 | constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX; |
477 | constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE; |
478 | constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2; |
479 | constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3; |
480 | constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3; |
481 | constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1; |
482 | constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2; |
483 | constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP; |
484 | constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX; |
485 | constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2; |
486 | constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1; |
487 | constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2; |
488 | constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT; |
489 | constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A; |
490 | constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA; |
491 | constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4; |
492 | constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C; |
493 | constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT; |
494 | constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F; |
495 | constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ; |
496 | constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL; |
497 | constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW; |
498 | constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl; |
499 | constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl; |
500 | |
501 | constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON; |
502 | constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32; |
503 | constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64; |
504 | constexpr inline bool __support_neon_float = |
505 | #if defined __GCC_IEC_559 |
506 | __GCC_IEC_559 == 0; |
507 | #elif defined __FAST_MATH__ |
508 | true; |
509 | #else |
510 | false; |
511 | #endif |
512 | |
513 | #ifdef _ARCH_PWR10 |
514 | constexpr inline bool __have_power10vec = true; |
515 | #else |
516 | constexpr inline bool __have_power10vec = false; |
517 | #endif |
518 | #ifdef __POWER9_VECTOR__ |
519 | constexpr inline bool __have_power9vec = true; |
520 | #else |
521 | constexpr inline bool __have_power9vec = false; |
522 | #endif |
523 | #if defined __POWER8_VECTOR__ |
524 | constexpr inline bool __have_power8vec = true; |
525 | #else |
526 | constexpr inline bool __have_power8vec = __have_power9vec; |
527 | #endif |
528 | #if defined __VSX__ |
529 | constexpr inline bool __have_power_vsx = true; |
530 | #else |
531 | constexpr inline bool __have_power_vsx = __have_power8vec; |
532 | #endif |
533 | #if defined __ALTIVEC__ |
534 | constexpr inline bool __have_power_vmx = true; |
535 | #else |
536 | constexpr inline bool __have_power_vmx = __have_power_vsx; |
537 | #endif |
538 | |
539 | // }}} |
540 | // __is_scalar_abi {{{ |
541 | template <typename _Abi> |
542 | constexpr bool |
543 | __is_scalar_abi() |
544 | { return is_same_v<simd_abi::scalar, _Abi>; } |
545 | |
546 | // }}} |
547 | // __abi_bytes_v {{{ |
548 | template <template <int> class _Abi, int _Bytes> |
549 | constexpr int |
550 | __abi_bytes_impl(_Abi<_Bytes>*) |
551 | { return _Bytes; } |
552 | |
553 | template <typename _Tp> |
554 | constexpr int |
555 | __abi_bytes_impl(_Tp*) |
556 | { return -1; } |
557 | |
558 | template <typename _Abi> |
559 | inline constexpr int __abi_bytes_v |
560 | = __abi_bytes_impl(static_cast<_Abi*>(nullptr)); |
561 | |
562 | // }}} |
563 | // __is_builtin_bitmask_abi {{{ |
564 | template <typename _Abi> |
565 | constexpr bool |
566 | __is_builtin_bitmask_abi() |
567 | { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; } |
568 | |
569 | // }}} |
570 | // __is_sse_abi {{{ |
571 | template <typename _Abi> |
572 | constexpr bool |
573 | __is_sse_abi() |
574 | { |
575 | constexpr auto _Bytes = __abi_bytes_v<_Abi>; |
576 | return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; |
577 | } |
578 | |
579 | // }}} |
580 | // __is_avx_abi {{{ |
581 | template <typename _Abi> |
582 | constexpr bool |
583 | __is_avx_abi() |
584 | { |
585 | constexpr auto _Bytes = __abi_bytes_v<_Abi>; |
586 | return _Bytes > 16 && _Bytes <= 32 |
587 | && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; |
588 | } |
589 | |
590 | // }}} |
591 | // __is_avx512_abi {{{ |
592 | template <typename _Abi> |
593 | constexpr bool |
594 | __is_avx512_abi() |
595 | { |
596 | constexpr auto _Bytes = __abi_bytes_v<_Abi>; |
597 | return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>; |
598 | } |
599 | |
600 | // }}} |
601 | // __is_neon_abi {{{ |
602 | template <typename _Abi> |
603 | constexpr bool |
604 | __is_neon_abi() |
605 | { |
606 | constexpr auto _Bytes = __abi_bytes_v<_Abi>; |
607 | return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; |
608 | } |
609 | |
610 | // }}} |
611 | // __make_dependent_t {{{ |
612 | template <typename, typename _Up> |
613 | struct __make_dependent |
614 | { using type = _Up; }; |
615 | |
616 | template <typename _Tp, typename _Up> |
617 | using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type; |
618 | |
619 | // }}} |
620 | // ^^^ ---- type traits ---- ^^^ |
621 | |
622 | // __invoke_ub{{{ |
623 | template <typename... _Args> |
624 | [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void |
625 | __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args) |
626 | { |
627 | #ifdef _GLIBCXX_DEBUG_UB |
628 | __builtin_fprintf(stderr, __msg, __args...); |
629 | __builtin_trap(); |
630 | #else |
631 | __builtin_unreachable(); |
632 | #endif |
633 | } |
634 | |
635 | // }}} |
636 | // __assert_unreachable{{{ |
637 | template <typename _Tp> |
638 | struct __assert_unreachable |
639 | { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable" ); }; |
640 | |
641 | // }}} |
642 | // __size_or_zero_v {{{ |
643 | template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value> |
644 | constexpr size_t |
645 | __size_or_zero_dispatch(int) |
646 | { return _Np; } |
647 | |
648 | template <typename _Tp, typename _Ap> |
649 | constexpr size_t |
650 | __size_or_zero_dispatch(float) |
651 | { return 0; } |
652 | |
653 | template <typename _Tp, typename _Ap> |
654 | inline constexpr size_t __size_or_zero_v |
655 | = __size_or_zero_dispatch<_Tp, _Ap>(0); |
656 | |
657 | // }}} |
658 | // __div_roundup {{{ |
659 | inline constexpr size_t |
660 | __div_roundup(size_t __a, size_t __b) |
661 | { return (__a + __b - 1) / __b; } |
662 | |
663 | // }}} |
664 | // _ExactBool{{{ |
665 | class _ExactBool |
666 | { |
667 | const bool _M_data; |
668 | |
669 | public: |
670 | _GLIBCXX_SIMD_INTRINSIC constexpr |
671 | _ExactBool(bool __b) : _M_data(__b) {} |
672 | |
673 | _ExactBool(int) = delete; |
674 | |
675 | _GLIBCXX_SIMD_INTRINSIC constexpr |
676 | operator bool() const |
677 | { return _M_data; } |
678 | }; |
679 | |
680 | // }}} |
681 | // __may_alias{{{ |
682 | /**@internal |
683 | * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an |
684 | * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers |
685 | * that support it). |
686 | */ |
687 | template <typename _Tp> |
688 | using __may_alias [[__gnu__::__may_alias__]] = _Tp; |
689 | |
690 | // }}} |
691 | // _UnsupportedBase {{{ |
692 | // simd and simd_mask base for unsupported <_Tp, _Abi> |
693 | struct _UnsupportedBase |
694 | { |
695 | _UnsupportedBase() = delete; |
696 | _UnsupportedBase(const _UnsupportedBase&) = delete; |
697 | _UnsupportedBase& operator=(const _UnsupportedBase&) = delete; |
698 | ~_UnsupportedBase() = delete; |
699 | }; |
700 | |
701 | // }}} |
702 | // _InvalidTraits {{{ |
703 | /** |
704 | * @internal |
705 | * Defines the implementation of __a given <_Tp, _Abi>. |
706 | * |
707 | * Implementations must ensure that only valid <_Tp, _Abi> instantiations are |
708 | * possible. Static assertions in the type definition do not suffice. It is |
709 | * important that SFINAE works. |
710 | */ |
711 | struct _InvalidTraits |
712 | { |
713 | using _IsValid = false_type; |
714 | using _SimdBase = _UnsupportedBase; |
715 | using _MaskBase = _UnsupportedBase; |
716 | |
717 | static constexpr size_t _S_full_size = 0; |
718 | static constexpr bool _S_is_partial = false; |
719 | |
720 | static constexpr size_t _S_simd_align = 1; |
721 | struct _SimdImpl; |
722 | struct _SimdMember {}; |
723 | struct _SimdCastType; |
724 | |
725 | static constexpr size_t _S_mask_align = 1; |
726 | struct _MaskImpl; |
727 | struct _MaskMember {}; |
728 | struct _MaskCastType; |
729 | }; |
730 | |
731 | // }}} |
732 | // _SimdTraits {{{ |
733 | template <typename _Tp, typename _Abi, typename = void_t<>> |
734 | struct _SimdTraits : _InvalidTraits {}; |
735 | |
736 | // }}} |
737 | // __private_init, __bitset_init{{{ |
738 | /** |
739 | * @internal |
740 | * Tag used for private init constructor of simd and simd_mask |
741 | */ |
742 | inline constexpr struct _PrivateInit {} __private_init = {}; |
743 | |
744 | inline constexpr struct _BitsetInit {} __bitset_init = {}; |
745 | |
746 | // }}} |
747 | // __is_narrowing_conversion<_From, _To>{{{ |
748 | template <typename _From, typename _To, bool = is_arithmetic_v<_From>, |
749 | bool = is_arithmetic_v<_To>> |
750 | struct __is_narrowing_conversion; |
751 | |
752 | // ignore "signed/unsigned mismatch" in the following trait. |
753 | // The implicit conversions will do the right thing here. |
754 | template <typename _From, typename _To> |
755 | struct __is_narrowing_conversion<_From, _To, true, true> |
756 | : public __bool_constant<( |
757 | __digits_v<_From> > __digits_v<_To> |
758 | || __finite_max_v<_From> > __finite_max_v<_To> |
759 | || __finite_min_v<_From> < __finite_min_v<_To> |
760 | || (is_signed_v<_From> && is_unsigned_v<_To>))> {}; |
761 | |
762 | template <typename _Tp> |
763 | struct __is_narrowing_conversion<_Tp, bool, true, true> |
764 | : public true_type {}; |
765 | |
766 | template <> |
767 | struct __is_narrowing_conversion<bool, bool, true, true> |
768 | : public false_type {}; |
769 | |
770 | template <typename _Tp> |
771 | struct __is_narrowing_conversion<_Tp, _Tp, true, true> |
772 | : public false_type {}; |
773 | |
774 | template <typename _From, typename _To> |
775 | struct __is_narrowing_conversion<_From, _To, false, true> |
776 | : public negation<is_convertible<_From, _To>> {}; |
777 | |
778 | // }}} |
779 | // __converts_to_higher_integer_rank{{{ |
780 | template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))> |
781 | struct __converts_to_higher_integer_rank : public true_type {}; |
782 | |
783 | // this may fail for char -> short if sizeof(char) == sizeof(short) |
784 | template <typename _From, typename _To> |
785 | struct __converts_to_higher_integer_rank<_From, _To, false> |
786 | : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {}; |
787 | |
788 | // }}} |
789 | // __data(simd/simd_mask) {{{ |
790 | template <typename _Tp, typename _Ap> |
791 | _GLIBCXX_SIMD_INTRINSIC constexpr const auto& |
792 | __data(const simd<_Tp, _Ap>& __x); |
793 | |
794 | template <typename _Tp, typename _Ap> |
795 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
796 | __data(simd<_Tp, _Ap>& __x); |
797 | |
798 | template <typename _Tp, typename _Ap> |
799 | _GLIBCXX_SIMD_INTRINSIC constexpr const auto& |
800 | __data(const simd_mask<_Tp, _Ap>& __x); |
801 | |
802 | template <typename _Tp, typename _Ap> |
803 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
804 | __data(simd_mask<_Tp, _Ap>& __x); |
805 | |
806 | // }}} |
807 | // _SimdConverter {{{ |
808 | template <typename _FromT, typename _FromA, typename _ToT, typename _ToA, |
809 | typename = void> |
810 | struct _SimdConverter; |
811 | |
812 | template <typename _Tp, typename _Ap> |
813 | struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void> |
814 | { |
815 | template <typename _Up> |
816 | _GLIBCXX_SIMD_INTRINSIC const _Up& |
817 | operator()(const _Up& __x) |
818 | { return __x; } |
819 | }; |
820 | |
821 | // }}} |
822 | // __to_value_type_or_member_type {{{ |
823 | template <typename _V> |
824 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
825 | __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x)) |
826 | { return __data(__x); } |
827 | |
828 | template <typename _V> |
829 | _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type& |
830 | __to_value_type_or_member_type(const typename _V::value_type& __x) |
831 | { return __x; } |
832 | |
833 | // }}} |
834 | // __bool_storage_member_type{{{ |
835 | template <size_t _Size> |
836 | struct __bool_storage_member_type; |
837 | |
838 | template <size_t _Size> |
839 | using __bool_storage_member_type_t = |
840 | typename __bool_storage_member_type<_Size>::type; |
841 | |
842 | // }}} |
843 | // _SimdTuple {{{ |
844 | // why not tuple? |
845 | // 1. tuple gives no guarantee about the storage order, but I require |
846 | // storage |
847 | // equivalent to array<_Tp, _Np> |
848 | // 2. direct access to the element type (first template argument) |
849 | // 3. enforces equal element type, only different _Abi types are allowed |
850 | template <typename _Tp, typename... _Abis> |
851 | struct _SimdTuple; |
852 | |
853 | //}}} |
854 | // __fixed_size_storage_t {{{ |
855 | template <typename _Tp, int _Np> |
856 | struct __fixed_size_storage; |
857 | |
858 | template <typename _Tp, int _Np> |
859 | using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type; |
860 | |
861 | // }}} |
862 | // _SimdWrapper fwd decl{{{ |
863 | template <typename _Tp, size_t _Size, typename = void_t<>> |
864 | struct _SimdWrapper; |
865 | |
866 | template <typename _Tp> |
867 | using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>; |
868 | template <typename _Tp> |
869 | using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>; |
870 | template <typename _Tp> |
871 | using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>; |
872 | template <typename _Tp> |
873 | using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>; |
874 | |
875 | // }}} |
876 | // __is_simd_wrapper {{{ |
877 | template <typename _Tp> |
878 | struct __is_simd_wrapper : false_type {}; |
879 | |
880 | template <typename _Tp, size_t _Np> |
881 | struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {}; |
882 | |
883 | template <typename _Tp> |
884 | inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value; |
885 | |
886 | // }}} |
887 | // _BitOps {{{ |
888 | struct _BitOps |
889 | { |
890 | // _S_bit_iteration {{{ |
891 | template <typename _Tp, typename _Fp> |
892 | static void |
893 | _S_bit_iteration(_Tp __mask, _Fp&& __f) |
894 | { |
895 | static_assert(sizeof(_ULLong) >= sizeof(_Tp)); |
896 | conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k; |
897 | if constexpr (is_convertible_v<_Tp, decltype(__k)>) |
898 | __k = __mask; |
899 | else |
900 | __k = __mask.to_ullong(); |
901 | while(__k) |
902 | { |
903 | __f(std::__countr_zero(__k)); |
904 | __k &= (__k - 1); |
905 | } |
906 | } |
907 | |
908 | //}}} |
909 | }; |
910 | |
911 | //}}} |
912 | // __increment, __decrement {{{ |
913 | template <typename _Tp = void> |
914 | struct __increment |
915 | { constexpr _Tp operator()(_Tp __a) const { return ++__a; } }; |
916 | |
917 | template <> |
918 | struct __increment<void> |
919 | { |
920 | template <typename _Tp> |
921 | constexpr _Tp |
922 | operator()(_Tp __a) const |
923 | { return ++__a; } |
924 | }; |
925 | |
926 | template <typename _Tp = void> |
927 | struct __decrement |
928 | { constexpr _Tp operator()(_Tp __a) const { return --__a; } }; |
929 | |
930 | template <> |
931 | struct __decrement<void> |
932 | { |
933 | template <typename _Tp> |
934 | constexpr _Tp |
935 | operator()(_Tp __a) const |
936 | { return --__a; } |
937 | }; |
938 | |
939 | // }}} |
940 | // _ValuePreserving(OrInt) {{{ |
941 | template <typename _From, typename _To, |
942 | typename = enable_if_t<negation< |
943 | __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>> |
944 | using _ValuePreserving = _From; |
945 | |
946 | template <typename _From, typename _To, |
947 | typename _DecayedFrom = __remove_cvref_t<_From>, |
948 | typename = enable_if_t<conjunction< |
949 | is_convertible<_From, _To>, |
950 | disjunction< |
951 | is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>, |
952 | conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>, |
953 | negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>> |
954 | using _ValuePreservingOrInt = _From; |
955 | |
956 | // }}} |
957 | // __intrinsic_type {{{ |
958 | template <typename _Tp, size_t _Bytes, typename = void_t<>> |
959 | struct __intrinsic_type; |
960 | |
961 | template <typename _Tp, size_t _Size> |
962 | using __intrinsic_type_t = |
963 | typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type; |
964 | |
965 | template <typename _Tp> |
966 | using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type; |
967 | template <typename _Tp> |
968 | using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type; |
969 | template <typename _Tp> |
970 | using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type; |
971 | template <typename _Tp> |
972 | using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type; |
973 | template <typename _Tp> |
974 | using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type; |
975 | template <typename _Tp> |
976 | using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type; |
977 | |
978 | // }}} |
979 | // _BitMask {{{ |
980 | template <size_t _Np, bool _Sanitized = false> |
981 | struct _BitMask; |
982 | |
983 | template <size_t _Np, bool _Sanitized> |
984 | struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {}; |
985 | |
986 | template <size_t _Np> |
987 | using _SanitizedBitMask = _BitMask<_Np, true>; |
988 | |
989 | template <size_t _Np, bool _Sanitized> |
990 | struct _BitMask |
991 | { |
992 | static_assert(_Np > 0); |
993 | |
994 | static constexpr size_t _NBytes = __div_roundup(a: _Np, __CHAR_BIT__); |
995 | |
996 | using _Tp = conditional_t<_Np == 1, bool, |
997 | make_unsigned_t<__int_with_sizeof_t<std::min( |
998 | a: sizeof(_ULLong), b: std::__bit_ceil(x: _NBytes))>>>; |
999 | |
1000 | static constexpr int _S_array_size = __div_roundup(a: _NBytes, b: sizeof(_Tp)); |
1001 | |
1002 | _Tp _M_bits[_S_array_size]; |
1003 | |
1004 | static constexpr int _S_unused_bits |
1005 | = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np; |
1006 | |
1007 | static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits; |
1008 | |
1009 | constexpr _BitMask() noexcept = default; |
1010 | |
1011 | constexpr _BitMask(unsigned long long __x) noexcept |
1012 | : _M_bits{static_cast<_Tp>(__x)} {} |
1013 | |
1014 | _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {} |
1015 | |
1016 | constexpr _BitMask(const _BitMask&) noexcept = default; |
1017 | |
1018 | template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false |
1019 | && _Sanitized == true>> |
1020 | constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept |
1021 | : _BitMask(__rhs._M_sanitized()) {} |
1022 | |
1023 | constexpr operator _SimdWrapper<bool, _Np>() const noexcept |
1024 | { |
1025 | static_assert(_S_array_size == 1); |
1026 | return _M_bits[0]; |
1027 | } |
1028 | |
1029 | // precondition: is sanitized |
1030 | constexpr _Tp |
1031 | _M_to_bits() const noexcept |
1032 | { |
1033 | static_assert(_S_array_size == 1); |
1034 | return _M_bits[0]; |
1035 | } |
1036 | |
1037 | // precondition: is sanitized |
1038 | constexpr unsigned long long |
1039 | to_ullong() const noexcept |
1040 | { |
1041 | static_assert(_S_array_size == 1); |
1042 | return _M_bits[0]; |
1043 | } |
1044 | |
1045 | // precondition: is sanitized |
1046 | constexpr unsigned long |
1047 | to_ulong() const noexcept |
1048 | { |
1049 | static_assert(_S_array_size == 1); |
1050 | return _M_bits[0]; |
1051 | } |
1052 | |
1053 | constexpr bitset<_Np> |
1054 | _M_to_bitset() const noexcept |
1055 | { |
1056 | static_assert(_S_array_size == 1); |
1057 | return _M_bits[0]; |
1058 | } |
1059 | |
1060 | constexpr decltype(auto) |
1061 | _M_sanitized() const noexcept |
1062 | { |
1063 | if constexpr (_Sanitized) |
1064 | return *this; |
1065 | else if constexpr (_Np == 1) |
1066 | return _SanitizedBitMask<_Np>(_M_bits[0]); |
1067 | else |
1068 | { |
1069 | _SanitizedBitMask<_Np> __r = {}; |
1070 | for (int __i = 0; __i < _S_array_size; ++__i) |
1071 | __r._M_bits[__i] = _M_bits[__i]; |
1072 | if constexpr (_S_unused_bits > 0) |
1073 | __r._M_bits[_S_array_size - 1] &= _S_bitmask; |
1074 | return __r; |
1075 | } |
1076 | } |
1077 | |
1078 | template <size_t _Mp, bool _LSanitized> |
1079 | constexpr _BitMask<_Np + _Mp, _Sanitized> |
1080 | _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept |
1081 | { |
1082 | constexpr size_t _RN = _Np + _Mp; |
1083 | using _Rp = _BitMask<_RN, _Sanitized>; |
1084 | if constexpr (_Rp::_S_array_size == 1) |
1085 | { |
1086 | _Rp __r{{_M_bits[0]}}; |
1087 | __r._M_bits[0] <<= _Mp; |
1088 | __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0]; |
1089 | return __r; |
1090 | } |
1091 | else |
1092 | __assert_unreachable<_Rp>(); |
1093 | } |
1094 | |
1095 | // Return a new _BitMask with size _NewSize while dropping _DropLsb least |
1096 | // significant bits. If the operation implicitly produces a sanitized bitmask, |
1097 | // the result type will have _Sanitized set. |
1098 | template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb> |
1099 | constexpr auto |
1100 | () const noexcept |
1101 | { |
1102 | static_assert(_Np > _DropLsb); |
1103 | static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__, |
1104 | "not implemented for bitmasks larger than one ullong" ); |
1105 | if constexpr (_NewSize == 1) |
1106 | // must sanitize because the return _Tp is bool |
1107 | return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb)); |
1108 | else |
1109 | return _BitMask<_NewSize, |
1110 | ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__ |
1111 | && _NewSize + _DropLsb <= _Np) |
1112 | || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__) |
1113 | && _NewSize + _DropLsb >= _Np))>(_M_bits[0] |
1114 | >> _DropLsb); |
1115 | } |
1116 | |
1117 | // True if all bits are set. Implicitly sanitizes if _Sanitized == false. |
1118 | constexpr bool |
1119 | all() const noexcept |
1120 | { |
1121 | if constexpr (_Np == 1) |
1122 | return _M_bits[0]; |
1123 | else if constexpr (!_Sanitized) |
1124 | return _M_sanitized().all(); |
1125 | else |
1126 | { |
1127 | constexpr _Tp __allbits = ~_Tp(); |
1128 | for (int __i = 0; __i < _S_array_size - 1; ++__i) |
1129 | if (_M_bits[__i] != __allbits) |
1130 | return false; |
1131 | return _M_bits[_S_array_size - 1] == _S_bitmask; |
1132 | } |
1133 | } |
1134 | |
1135 | // True if at least one bit is set. Implicitly sanitizes if _Sanitized == |
1136 | // false. |
1137 | constexpr bool |
1138 | any() const noexcept |
1139 | { |
1140 | if constexpr (_Np == 1) |
1141 | return _M_bits[0]; |
1142 | else if constexpr (!_Sanitized) |
1143 | return _M_sanitized().any(); |
1144 | else |
1145 | { |
1146 | for (int __i = 0; __i < _S_array_size - 1; ++__i) |
1147 | if (_M_bits[__i] != 0) |
1148 | return true; |
1149 | return _M_bits[_S_array_size - 1] != 0; |
1150 | } |
1151 | } |
1152 | |
1153 | // True if no bit is set. Implicitly sanitizes if _Sanitized == false. |
1154 | constexpr bool |
1155 | none() const noexcept |
1156 | { |
1157 | if constexpr (_Np == 1) |
1158 | return !_M_bits[0]; |
1159 | else if constexpr (!_Sanitized) |
1160 | return _M_sanitized().none(); |
1161 | else |
1162 | { |
1163 | for (int __i = 0; __i < _S_array_size - 1; ++__i) |
1164 | if (_M_bits[__i] != 0) |
1165 | return false; |
1166 | return _M_bits[_S_array_size - 1] == 0; |
1167 | } |
1168 | } |
1169 | |
1170 | // Returns the number of set bits. Implicitly sanitizes if _Sanitized == |
1171 | // false. |
1172 | constexpr int |
1173 | count() const noexcept |
1174 | { |
1175 | if constexpr (_Np == 1) |
1176 | return _M_bits[0]; |
1177 | else if constexpr (!_Sanitized) |
1178 | return _M_sanitized().none(); |
1179 | else |
1180 | { |
1181 | int __result = __builtin_popcountll(_M_bits[0]); |
1182 | for (int __i = 1; __i < _S_array_size; ++__i) |
1183 | __result += __builtin_popcountll(_M_bits[__i]); |
1184 | return __result; |
1185 | } |
1186 | } |
1187 | |
1188 | // Returns the bit at offset __i as bool. |
1189 | constexpr bool |
1190 | operator[](size_t __i) const noexcept |
1191 | { |
1192 | if constexpr (_Np == 1) |
1193 | return _M_bits[0]; |
1194 | else if constexpr (_S_array_size == 1) |
1195 | return (_M_bits[0] >> __i) & 1; |
1196 | else |
1197 | { |
1198 | const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); |
1199 | const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); |
1200 | return (_M_bits[__j] >> __shift) & 1; |
1201 | } |
1202 | } |
1203 | |
1204 | template <size_t __i> |
1205 | constexpr bool |
1206 | operator[](_SizeConstant<__i>) const noexcept |
1207 | { |
1208 | static_assert(__i < _Np); |
1209 | constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); |
1210 | constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); |
1211 | return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift)); |
1212 | } |
1213 | |
1214 | // Set the bit at offset __i to __x. |
1215 | constexpr void |
1216 | set(size_t __i, bool __x) noexcept |
1217 | { |
1218 | if constexpr (_Np == 1) |
1219 | _M_bits[0] = __x; |
1220 | else if constexpr (_S_array_size == 1) |
1221 | { |
1222 | _M_bits[0] &= ~_Tp(_Tp(1) << __i); |
1223 | _M_bits[0] |= _Tp(_Tp(__x) << __i); |
1224 | } |
1225 | else |
1226 | { |
1227 | const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); |
1228 | const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); |
1229 | _M_bits[__j] &= ~_Tp(_Tp(1) << __shift); |
1230 | _M_bits[__j] |= _Tp(_Tp(__x) << __shift); |
1231 | } |
1232 | } |
1233 | |
1234 | template <size_t __i> |
1235 | constexpr void |
1236 | set(_SizeConstant<__i>, bool __x) noexcept |
1237 | { |
1238 | static_assert(__i < _Np); |
1239 | if constexpr (_Np == 1) |
1240 | _M_bits[0] = __x; |
1241 | else |
1242 | { |
1243 | constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); |
1244 | constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); |
1245 | constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift); |
1246 | _M_bits[__j] &= __mask; |
1247 | _M_bits[__j] |= _Tp(_Tp(__x) << __shift); |
1248 | } |
1249 | } |
1250 | |
1251 | // Inverts all bits. Sanitized input leads to sanitized output. |
1252 | constexpr _BitMask |
1253 | operator~() const noexcept |
1254 | { |
1255 | if constexpr (_Np == 1) |
1256 | return !_M_bits[0]; |
1257 | else |
1258 | { |
1259 | _BitMask __result{}; |
1260 | for (int __i = 0; __i < _S_array_size - 1; ++__i) |
1261 | __result._M_bits[__i] = ~_M_bits[__i]; |
1262 | if constexpr (_Sanitized) |
1263 | __result._M_bits[_S_array_size - 1] |
1264 | = _M_bits[_S_array_size - 1] ^ _S_bitmask; |
1265 | else |
1266 | __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1]; |
1267 | return __result; |
1268 | } |
1269 | } |
1270 | |
1271 | constexpr _BitMask& |
1272 | operator^=(const _BitMask& __b) & noexcept |
1273 | { |
1274 | __execute_n_times<_S_array_size>( |
1275 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; }); |
1276 | return *this; |
1277 | } |
1278 | |
1279 | constexpr _BitMask& |
1280 | operator|=(const _BitMask& __b) & noexcept |
1281 | { |
1282 | __execute_n_times<_S_array_size>( |
1283 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; }); |
1284 | return *this; |
1285 | } |
1286 | |
1287 | constexpr _BitMask& |
1288 | operator&=(const _BitMask& __b) & noexcept |
1289 | { |
1290 | __execute_n_times<_S_array_size>( |
1291 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; }); |
1292 | return *this; |
1293 | } |
1294 | |
1295 | friend constexpr _BitMask |
1296 | operator^(const _BitMask& __a, const _BitMask& __b) noexcept |
1297 | { |
1298 | _BitMask __r = __a; |
1299 | __r ^= __b; |
1300 | return __r; |
1301 | } |
1302 | |
1303 | friend constexpr _BitMask |
1304 | operator|(const _BitMask& __a, const _BitMask& __b) noexcept |
1305 | { |
1306 | _BitMask __r = __a; |
1307 | __r |= __b; |
1308 | return __r; |
1309 | } |
1310 | |
1311 | friend constexpr _BitMask |
1312 | operator&(const _BitMask& __a, const _BitMask& __b) noexcept |
1313 | { |
1314 | _BitMask __r = __a; |
1315 | __r &= __b; |
1316 | return __r; |
1317 | } |
1318 | |
1319 | _GLIBCXX_SIMD_INTRINSIC |
1320 | constexpr bool |
1321 | _M_is_constprop() const |
1322 | { |
1323 | if constexpr (_S_array_size == 0) |
1324 | return __builtin_constant_p(_M_bits[0]); |
1325 | else |
1326 | { |
1327 | for (int __i = 0; __i < _S_array_size; ++__i) |
1328 | if (!__builtin_constant_p(_M_bits[__i])) |
1329 | return false; |
1330 | return true; |
1331 | } |
1332 | } |
1333 | }; |
1334 | |
1335 | // }}} |
1336 | |
1337 | // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv |
1338 | // __min_vector_size {{{ |
1339 | template <typename _Tp = void> |
1340 | static inline constexpr int __min_vector_size = 2 * sizeof(_Tp); |
1341 | |
1342 | #if _GLIBCXX_SIMD_HAVE_NEON |
1343 | template <> |
1344 | inline constexpr int __min_vector_size<void> = 8; |
1345 | #else |
1346 | template <> |
1347 | inline constexpr int __min_vector_size<void> = 16; |
1348 | #endif |
1349 | |
1350 | // }}} |
1351 | // __vector_type {{{ |
1352 | template <typename _Tp, size_t _Np, typename = void> |
1353 | struct __vector_type_n {}; |
1354 | |
1355 | // substition failure for 0-element case |
1356 | template <typename _Tp> |
1357 | struct __vector_type_n<_Tp, 0, void> {}; |
1358 | |
1359 | // special case 1-element to be _Tp itself |
1360 | template <typename _Tp> |
1361 | struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>> |
1362 | { using type = _Tp; }; |
1363 | |
1364 | // else, use GNU-style builtin vector types |
1365 | template <typename _Tp, size_t _Np> |
1366 | struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>> |
1367 | { |
1368 | static constexpr size_t _S_Np2 = std::__bit_ceil(x: _Np * sizeof(_Tp)); |
1369 | |
1370 | static constexpr size_t _S_Bytes = |
1371 | #ifdef __i386__ |
1372 | // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because |
1373 | // those objects are passed via MMX registers and nothing ever calls EMMS. |
1374 | _S_Np2 == 8 ? 16 : |
1375 | #endif |
1376 | _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp> |
1377 | : _S_Np2; |
1378 | |
1379 | using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp; |
1380 | }; |
1381 | |
1382 | template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)> |
1383 | struct __vector_type; |
1384 | |
1385 | template <typename _Tp, size_t _Bytes> |
1386 | struct __vector_type<_Tp, _Bytes, 0> |
1387 | : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {}; |
1388 | |
1389 | template <typename _Tp, size_t _Size> |
1390 | using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type; |
1391 | |
1392 | template <typename _Tp> |
1393 | using __vector_type2_t = typename __vector_type<_Tp, 2>::type; |
1394 | template <typename _Tp> |
1395 | using __vector_type4_t = typename __vector_type<_Tp, 4>::type; |
1396 | template <typename _Tp> |
1397 | using __vector_type8_t = typename __vector_type<_Tp, 8>::type; |
1398 | template <typename _Tp> |
1399 | using __vector_type16_t = typename __vector_type<_Tp, 16>::type; |
1400 | template <typename _Tp> |
1401 | using __vector_type32_t = typename __vector_type<_Tp, 32>::type; |
1402 | template <typename _Tp> |
1403 | using __vector_type64_t = typename __vector_type<_Tp, 64>::type; |
1404 | |
1405 | // }}} |
1406 | // __is_vector_type {{{ |
1407 | template <typename _Tp, typename = void_t<>> |
1408 | struct __is_vector_type : false_type {}; |
1409 | |
1410 | template <typename _Tp> |
1411 | struct __is_vector_type< |
1412 | _Tp, void_t<typename __vector_type< |
1413 | remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>> |
1414 | : is_same<_Tp, typename __vector_type< |
1415 | remove_reference_t<decltype(declval<_Tp>()[0])>, |
1416 | sizeof(_Tp)>::type> {}; |
1417 | |
1418 | template <typename _Tp> |
1419 | inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value; |
1420 | |
1421 | // }}} |
1422 | // __is_intrinsic_type {{{ |
1423 | #if _GLIBCXX_SIMD_HAVE_SSE_ABI |
1424 | template <typename _Tp> |
1425 | using __is_intrinsic_type = __is_vector_type<_Tp>; |
1426 | #else // not SSE (x86) |
1427 | template <typename _Tp, typename = void_t<>> |
1428 | struct __is_intrinsic_type : false_type {}; |
1429 | |
1430 | template <typename _Tp> |
1431 | struct __is_intrinsic_type< |
1432 | _Tp, void_t<typename __intrinsic_type< |
1433 | remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>> |
1434 | : is_same<_Tp, typename __intrinsic_type< |
1435 | remove_reference_t<decltype(declval<_Tp>()[0])>, |
1436 | sizeof(_Tp)>::type> {}; |
1437 | #endif |
1438 | |
1439 | template <typename _Tp> |
1440 | inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value; |
1441 | |
1442 | // }}} |
1443 | // _VectorTraits{{{ |
1444 | template <typename _Tp, typename = void_t<>> |
1445 | struct _VectorTraitsImpl; |
1446 | |
1447 | template <typename _Tp> |
1448 | struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp> |
1449 | || __is_intrinsic_type_v<_Tp>>> |
1450 | { |
1451 | using type = _Tp; |
1452 | using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>; |
1453 | static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type); |
1454 | using _Wrapper = _SimdWrapper<value_type, _S_full_size>; |
1455 | template <typename _Up, int _W = _S_full_size> |
1456 | static constexpr bool _S_is |
1457 | = is_same_v<value_type, _Up> && _W == _S_full_size; |
1458 | }; |
1459 | |
1460 | template <typename _Tp, size_t _Np> |
1461 | struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>, |
1462 | void_t<__vector_type_t<_Tp, _Np>>> |
1463 | { |
1464 | using type = __vector_type_t<_Tp, _Np>; |
1465 | using value_type = _Tp; |
1466 | static constexpr int _S_full_size = sizeof(type) / sizeof(value_type); |
1467 | using _Wrapper = _SimdWrapper<_Tp, _Np>; |
1468 | static constexpr bool _S_is_partial = (_Np == _S_full_size); |
1469 | static constexpr int _S_partial_width = _Np; |
1470 | template <typename _Up, int _W = _S_full_size> |
1471 | static constexpr bool _S_is |
1472 | = is_same_v<value_type, _Up>&& _W == _S_full_size; |
1473 | }; |
1474 | |
1475 | template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type> |
1476 | using _VectorTraits = _VectorTraitsImpl<_Tp>; |
1477 | |
1478 | // }}} |
1479 | // __as_vector{{{ |
1480 | template <typename _V> |
1481 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
1482 | __as_vector(_V __x) |
1483 | { |
1484 | if constexpr (__is_vector_type_v<_V>) |
1485 | return __x; |
1486 | else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value) |
1487 | return __data(__x)._M_data; |
1488 | else if constexpr (__is_vectorizable_v<_V>) |
1489 | return __vector_type_t<_V, 2>{__x}; |
1490 | else |
1491 | return __x._M_data; |
1492 | } |
1493 | |
1494 | // }}} |
1495 | // __as_wrapper{{{ |
1496 | template <size_t _Np = 0, typename _V> |
1497 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
1498 | __as_wrapper(_V __x) |
1499 | { |
1500 | if constexpr (__is_vector_type_v<_V>) |
1501 | return _SimdWrapper<typename _VectorTraits<_V>::value_type, |
1502 | (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x); |
1503 | else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value) |
1504 | { |
1505 | static_assert(_V::size() == _Np); |
1506 | return __data(__x); |
1507 | } |
1508 | else |
1509 | { |
1510 | static_assert(_V::_S_size == _Np); |
1511 | return __x; |
1512 | } |
1513 | } |
1514 | |
1515 | // }}} |
1516 | // __intrin_bitcast{{{ |
1517 | template <typename _To, typename _From> |
1518 | _GLIBCXX_SIMD_INTRINSIC constexpr _To |
1519 | __intrin_bitcast(_From __v) |
1520 | { |
1521 | static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>) |
1522 | && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>)); |
1523 | if constexpr (sizeof(_To) == sizeof(_From)) |
1524 | return reinterpret_cast<_To>(__v); |
1525 | else if constexpr (sizeof(_From) > sizeof(_To)) |
1526 | if constexpr (sizeof(_To) >= 16) |
1527 | return reinterpret_cast<const __may_alias<_To>&>(__v); |
1528 | else |
1529 | { |
1530 | _To __r; |
1531 | __builtin_memcpy(&__r, &__v, sizeof(_To)); |
1532 | return __r; |
1533 | } |
1534 | #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ |
1535 | else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32) |
1536 | return reinterpret_cast<_To>(__builtin_ia32_ps256_ps( |
1537 | reinterpret_cast<__vector_type_t<float, 4>>(__v))); |
1538 | else if constexpr (__have_avx512f && sizeof(_From) == 16 |
1539 | && sizeof(_To) == 64) |
1540 | return reinterpret_cast<_To>(__builtin_ia32_ps512_ps( |
1541 | reinterpret_cast<__vector_type_t<float, 4>>(__v))); |
1542 | else if constexpr (__have_avx512f && sizeof(_From) == 32 |
1543 | && sizeof(_To) == 64) |
1544 | return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps( |
1545 | reinterpret_cast<__vector_type_t<float, 8>>(__v))); |
1546 | #endif // _GLIBCXX_SIMD_X86INTRIN |
1547 | else if constexpr (sizeof(__v) <= 8) |
1548 | return reinterpret_cast<_To>( |
1549 | __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{ |
1550 | reinterpret_cast<__int_for_sizeof_t<_From>>(__v)}); |
1551 | else |
1552 | { |
1553 | static_assert(sizeof(_To) > sizeof(_From)); |
1554 | _To __r = {}; |
1555 | __builtin_memcpy(&__r, &__v, sizeof(_From)); |
1556 | return __r; |
1557 | } |
1558 | } |
1559 | |
1560 | // }}} |
1561 | // __vector_bitcast{{{ |
1562 | template <typename _To, size_t _NN = 0, typename _From, |
1563 | typename _FromVT = _VectorTraits<_From>, |
1564 | size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN> |
1565 | _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np> |
1566 | __vector_bitcast(_From __x) |
1567 | { |
1568 | using _R = __vector_type_t<_To, _Np>; |
1569 | return __intrin_bitcast<_R>(__x); |
1570 | } |
1571 | |
1572 | template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx, |
1573 | size_t _Np |
1574 | = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN> |
1575 | _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np> |
1576 | __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x) |
1577 | { |
1578 | static_assert(_Np > 1); |
1579 | return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data); |
1580 | } |
1581 | |
1582 | // }}} |
1583 | // __convert_x86 declarations {{{ |
1584 | #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048 |
1585 | template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> |
1586 | _To __convert_x86(_Tp); |
1587 | |
1588 | template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> |
1589 | _To __convert_x86(_Tp, _Tp); |
1590 | |
1591 | template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> |
1592 | _To __convert_x86(_Tp, _Tp, _Tp, _Tp); |
1593 | |
1594 | template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> |
1595 | _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp); |
1596 | |
1597 | template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> |
1598 | _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, |
1599 | _Tp, _Tp, _Tp, _Tp); |
1600 | #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048 |
1601 | |
1602 | //}}} |
1603 | // __bit_cast {{{ |
1604 | template <typename _To, typename _From> |
1605 | _GLIBCXX_SIMD_INTRINSIC constexpr _To |
1606 | __bit_cast(const _From __x) |
1607 | { |
1608 | // TODO: implement with / replace by __builtin_bit_cast ASAP |
1609 | static_assert(sizeof(_To) == sizeof(_From)); |
1610 | constexpr bool __to_is_vectorizable |
1611 | = is_arithmetic_v<_To> || is_enum_v<_To>; |
1612 | constexpr bool __from_is_vectorizable |
1613 | = is_arithmetic_v<_From> || is_enum_v<_From>; |
1614 | if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>) |
1615 | return reinterpret_cast<_To>(__x); |
1616 | else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable) |
1617 | { |
1618 | using _FV [[gnu::vector_size(sizeof(_From))]] = _From; |
1619 | return reinterpret_cast<_To>(_FV{__x}); |
1620 | } |
1621 | else if constexpr (__to_is_vectorizable && __from_is_vectorizable) |
1622 | { |
1623 | using _TV [[gnu::vector_size(sizeof(_To))]] = _To; |
1624 | using _FV [[gnu::vector_size(sizeof(_From))]] = _From; |
1625 | return reinterpret_cast<_TV>(_FV{__x})[0]; |
1626 | } |
1627 | else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>) |
1628 | { |
1629 | using _TV [[gnu::vector_size(sizeof(_To))]] = _To; |
1630 | return reinterpret_cast<_TV>(__x)[0]; |
1631 | } |
1632 | else |
1633 | { |
1634 | _To __r; |
1635 | __builtin_memcpy(reinterpret_cast<char*>(&__r), |
1636 | reinterpret_cast<const char*>(&__x), sizeof(_To)); |
1637 | return __r; |
1638 | } |
1639 | } |
1640 | |
1641 | // }}} |
1642 | // __to_intrin {{{ |
1643 | template <typename _Tp, typename _TVT = _VectorTraits<_Tp>, |
1644 | typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>> |
1645 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
1646 | __to_intrin(_Tp __x) |
1647 | { |
1648 | static_assert(sizeof(__x) <= sizeof(_R), |
1649 | "__to_intrin may never drop values off the end" ); |
1650 | if constexpr (sizeof(__x) == sizeof(_R)) |
1651 | return reinterpret_cast<_R>(__as_vector(__x)); |
1652 | else |
1653 | { |
1654 | using _Up = __int_for_sizeof_t<_Tp>; |
1655 | return reinterpret_cast<_R>( |
1656 | __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)}); |
1657 | } |
1658 | } |
1659 | |
1660 | // }}} |
1661 | // __make_vector{{{ |
1662 | template <typename _Tp, typename... _Args> |
1663 | _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)> |
1664 | __make_vector(const _Args&... __args) |
1665 | { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; } |
1666 | |
1667 | // }}} |
1668 | // __vector_broadcast{{{ |
1669 | template <size_t _Np, typename _Tp, size_t... _I> |
1670 | _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> |
1671 | __vector_broadcast_impl(_Tp __x, index_sequence<_I...>) |
1672 | { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; } |
1673 | |
1674 | template <size_t _Np, typename _Tp> |
1675 | _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> |
1676 | __vector_broadcast(_Tp __x) |
1677 | { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); } |
1678 | |
1679 | // }}} |
1680 | // __generate_vector{{{ |
1681 | template <typename _Tp, size_t _Np, typename _Gp, size_t... _I> |
1682 | _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> |
1683 | __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>) |
1684 | { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; } |
1685 | |
1686 | template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp> |
1687 | _GLIBCXX_SIMD_INTRINSIC constexpr _V |
1688 | __generate_vector(_Gp&& __gen) |
1689 | { |
1690 | if constexpr (__is_vector_type_v<_V>) |
1691 | return __generate_vector_impl<typename _VVT::value_type, |
1692 | _VVT::_S_full_size>( |
1693 | static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>()); |
1694 | else |
1695 | return __generate_vector_impl<typename _VVT::value_type, |
1696 | _VVT::_S_partial_width>( |
1697 | static_cast<_Gp&&>(__gen), |
1698 | make_index_sequence<_VVT::_S_partial_width>()); |
1699 | } |
1700 | |
1701 | template <typename _Tp, size_t _Np, typename _Gp> |
1702 | _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> |
1703 | __generate_vector(_Gp&& __gen) |
1704 | { |
1705 | return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen), |
1706 | make_index_sequence<_Np>()); |
1707 | } |
1708 | |
1709 | // }}} |
1710 | // __xor{{{ |
1711 | template <typename _TW> |
1712 | _GLIBCXX_SIMD_INTRINSIC constexpr _TW |
1713 | __xor(_TW __a, _TW __b) noexcept |
1714 | { |
1715 | if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) |
1716 | { |
1717 | using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, |
1718 | _VectorTraitsImpl<_TW>>::value_type; |
1719 | if constexpr (is_floating_point_v<_Tp>) |
1720 | { |
1721 | using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; |
1722 | return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) |
1723 | ^ __vector_bitcast<_Ip>(__b)); |
1724 | } |
1725 | else if constexpr (__is_vector_type_v<_TW>) |
1726 | return __a ^ __b; |
1727 | else |
1728 | return __a._M_data ^ __b._M_data; |
1729 | } |
1730 | else |
1731 | return __a ^ __b; |
1732 | } |
1733 | |
1734 | // }}} |
1735 | // __or{{{ |
1736 | template <typename _TW> |
1737 | _GLIBCXX_SIMD_INTRINSIC constexpr _TW |
1738 | __or(_TW __a, _TW __b) noexcept |
1739 | { |
1740 | if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) |
1741 | { |
1742 | using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, |
1743 | _VectorTraitsImpl<_TW>>::value_type; |
1744 | if constexpr (is_floating_point_v<_Tp>) |
1745 | { |
1746 | using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; |
1747 | return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) |
1748 | | __vector_bitcast<_Ip>(__b)); |
1749 | } |
1750 | else if constexpr (__is_vector_type_v<_TW>) |
1751 | return __a | __b; |
1752 | else |
1753 | return __a._M_data | __b._M_data; |
1754 | } |
1755 | else |
1756 | return __a | __b; |
1757 | } |
1758 | |
1759 | // }}} |
1760 | // __and{{{ |
1761 | template <typename _TW> |
1762 | _GLIBCXX_SIMD_INTRINSIC constexpr _TW |
1763 | __and(_TW __a, _TW __b) noexcept |
1764 | { |
1765 | if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) |
1766 | { |
1767 | using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, |
1768 | _VectorTraitsImpl<_TW>>::value_type; |
1769 | if constexpr (is_floating_point_v<_Tp>) |
1770 | { |
1771 | using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; |
1772 | return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) |
1773 | & __vector_bitcast<_Ip>(__b)); |
1774 | } |
1775 | else if constexpr (__is_vector_type_v<_TW>) |
1776 | return __a & __b; |
1777 | else |
1778 | return __a._M_data & __b._M_data; |
1779 | } |
1780 | else |
1781 | return __a & __b; |
1782 | } |
1783 | |
1784 | // }}} |
1785 | // __andnot{{{ |
1786 | #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ |
1787 | static constexpr struct |
1788 | { |
1789 | _GLIBCXX_SIMD_INTRINSIC __v4sf |
1790 | operator()(__v4sf __a, __v4sf __b) const noexcept |
1791 | { return __builtin_ia32_andnps(__a, __b); } |
1792 | |
1793 | _GLIBCXX_SIMD_INTRINSIC __v2df |
1794 | operator()(__v2df __a, __v2df __b) const noexcept |
1795 | { return __builtin_ia32_andnpd(__a, __b); } |
1796 | |
1797 | _GLIBCXX_SIMD_INTRINSIC __v2di |
1798 | operator()(__v2di __a, __v2di __b) const noexcept |
1799 | { return __builtin_ia32_pandn128(__a, __b); } |
1800 | |
1801 | _GLIBCXX_SIMD_INTRINSIC __v8sf |
1802 | operator()(__v8sf __a, __v8sf __b) const noexcept |
1803 | { return __builtin_ia32_andnps256(__a, __b); } |
1804 | |
1805 | _GLIBCXX_SIMD_INTRINSIC __v4df |
1806 | operator()(__v4df __a, __v4df __b) const noexcept |
1807 | { return __builtin_ia32_andnpd256(__a, __b); } |
1808 | |
1809 | _GLIBCXX_SIMD_INTRINSIC __v4di |
1810 | operator()(__v4di __a, __v4di __b) const noexcept |
1811 | { |
1812 | if constexpr (__have_avx2) |
1813 | return __builtin_ia32_andnotsi256(__a, __b); |
1814 | else |
1815 | return reinterpret_cast<__v4di>( |
1816 | __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a), |
1817 | reinterpret_cast<__v4df>(__b))); |
1818 | } |
1819 | |
1820 | _GLIBCXX_SIMD_INTRINSIC __v16sf |
1821 | operator()(__v16sf __a, __v16sf __b) const noexcept |
1822 | { |
1823 | if constexpr (__have_avx512dq) |
1824 | return _mm512_andnot_ps(__a, __b); |
1825 | else |
1826 | return reinterpret_cast<__v16sf>( |
1827 | _mm512_andnot_si512(reinterpret_cast<__v8di>(__a), |
1828 | reinterpret_cast<__v8di>(__b))); |
1829 | } |
1830 | |
1831 | _GLIBCXX_SIMD_INTRINSIC __v8df |
1832 | operator()(__v8df __a, __v8df __b) const noexcept |
1833 | { |
1834 | if constexpr (__have_avx512dq) |
1835 | return _mm512_andnot_pd(__a, __b); |
1836 | else |
1837 | return reinterpret_cast<__v8df>( |
1838 | _mm512_andnot_si512(reinterpret_cast<__v8di>(__a), |
1839 | reinterpret_cast<__v8di>(__b))); |
1840 | } |
1841 | |
1842 | _GLIBCXX_SIMD_INTRINSIC __v8di |
1843 | operator()(__v8di __a, __v8di __b) const noexcept |
1844 | { return _mm512_andnot_si512(__a, __b); } |
1845 | } _S_x86_andnot; |
1846 | #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__ |
1847 | |
1848 | template <typename _TW> |
1849 | _GLIBCXX_SIMD_INTRINSIC constexpr _TW |
1850 | __andnot(_TW __a, _TW __b) noexcept |
1851 | { |
1852 | if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) |
1853 | { |
1854 | using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW, |
1855 | _VectorTraitsImpl<_TW>>; |
1856 | using _Tp = typename _TVT::value_type; |
1857 | #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ |
1858 | if constexpr (sizeof(_TW) >= 16) |
1859 | { |
1860 | const auto __ai = __to_intrin(__a); |
1861 | const auto __bi = __to_intrin(__b); |
1862 | if (!__builtin_is_constant_evaluated() |
1863 | && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi))) |
1864 | { |
1865 | const auto __r = _S_x86_andnot(__ai, __bi); |
1866 | if constexpr (is_convertible_v<decltype(__r), _TW>) |
1867 | return __r; |
1868 | else |
1869 | return reinterpret_cast<typename _TVT::type>(__r); |
1870 | } |
1871 | } |
1872 | #endif // _GLIBCXX_SIMD_X86INTRIN |
1873 | using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; |
1874 | return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a) |
1875 | & __vector_bitcast<_Ip>(__b)); |
1876 | } |
1877 | else |
1878 | return ~__a & __b; |
1879 | } |
1880 | |
1881 | // }}} |
1882 | // __not{{{ |
1883 | template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> |
1884 | _GLIBCXX_SIMD_INTRINSIC constexpr _Tp |
1885 | __not(_Tp __a) noexcept |
1886 | { |
1887 | if constexpr (is_floating_point_v<typename _TVT::value_type>) |
1888 | return reinterpret_cast<typename _TVT::type>( |
1889 | ~__vector_bitcast<unsigned>(__a)); |
1890 | else |
1891 | return ~__a; |
1892 | } |
1893 | |
1894 | // }}} |
1895 | // __concat{{{ |
1896 | template <typename _Tp, typename _TVT = _VectorTraits<_Tp>, |
1897 | typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>> |
1898 | constexpr _R |
1899 | __concat(_Tp a_, _Tp b_) |
1900 | { |
1901 | #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1 |
1902 | using _W |
1903 | = conditional_t<is_floating_point_v<typename _TVT::value_type>, double, |
1904 | conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)), |
1905 | long long, typename _TVT::value_type>>; |
1906 | constexpr int input_width = sizeof(_Tp) / sizeof(_W); |
1907 | const auto __a = __vector_bitcast<_W>(a_); |
1908 | const auto __b = __vector_bitcast<_W>(b_); |
1909 | using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>; |
1910 | #else |
1911 | constexpr int input_width = _TVT::_S_full_size; |
1912 | const _Tp& __a = a_; |
1913 | const _Tp& __b = b_; |
1914 | using _Up = _R; |
1915 | #endif |
1916 | if constexpr (input_width == 2) |
1917 | return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]}); |
1918 | else if constexpr (input_width == 4) |
1919 | return reinterpret_cast<_R>( |
1920 | _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]}); |
1921 | else if constexpr (input_width == 8) |
1922 | return reinterpret_cast<_R>( |
1923 | _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7], |
1924 | __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]}); |
1925 | else if constexpr (input_width == 16) |
1926 | return reinterpret_cast<_R>( |
1927 | _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], |
1928 | __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13], |
1929 | __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4], |
1930 | __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11], |
1931 | __b[12], __b[13], __b[14], __b[15]}); |
1932 | else if constexpr (input_width == 32) |
1933 | return reinterpret_cast<_R>( |
1934 | _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], |
1935 | __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13], |
1936 | __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20], |
1937 | __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27], |
1938 | __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2], |
1939 | __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9], |
1940 | __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16], |
1941 | __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23], |
1942 | __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30], |
1943 | __b[31]}); |
1944 | } |
1945 | |
1946 | // }}} |
1947 | // __zero_extend {{{ |
1948 | template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> |
1949 | struct _ZeroExtendProxy |
1950 | { |
1951 | using value_type = typename _TVT::value_type; |
1952 | static constexpr size_t _Np = _TVT::_S_full_size; |
1953 | const _Tp __x; |
1954 | |
1955 | template <typename _To, typename _ToVT = _VectorTraits<_To>, |
1956 | typename |
1957 | = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>> |
1958 | _GLIBCXX_SIMD_INTRINSIC operator _To() const |
1959 | { |
1960 | constexpr size_t _ToN = _ToVT::_S_full_size; |
1961 | if constexpr (_ToN == _Np) |
1962 | return __x; |
1963 | else if constexpr (_ToN == 2 * _Np) |
1964 | { |
1965 | #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3 |
1966 | if constexpr (__have_avx && _TVT::template _S_is<float, 4>) |
1967 | return __vector_bitcast<value_type>( |
1968 | _mm256_insertf128_ps(__m256(), __x, 0)); |
1969 | else if constexpr (__have_avx && _TVT::template _S_is<double, 2>) |
1970 | return __vector_bitcast<value_type>( |
1971 | _mm256_insertf128_pd(__m256d(), __x, 0)); |
1972 | else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16) |
1973 | return __vector_bitcast<value_type>( |
1974 | _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0)); |
1975 | else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>) |
1976 | { |
1977 | if constexpr (__have_avx512dq) |
1978 | return __vector_bitcast<value_type>( |
1979 | _mm512_insertf32x8(__m512(), __x, 0)); |
1980 | else |
1981 | return reinterpret_cast<__m512>( |
1982 | _mm512_insertf64x4(__m512d(), |
1983 | reinterpret_cast<__m256d>(__x), 0)); |
1984 | } |
1985 | else if constexpr (__have_avx512f |
1986 | && _TVT::template _S_is<double, 4>) |
1987 | return __vector_bitcast<value_type>( |
1988 | _mm512_insertf64x4(__m512d(), __x, 0)); |
1989 | else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32) |
1990 | return __vector_bitcast<value_type>( |
1991 | _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0)); |
1992 | #endif |
1993 | return __concat(__x, _Tp()); |
1994 | } |
1995 | else if constexpr (_ToN == 4 * _Np) |
1996 | { |
1997 | #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3 |
1998 | if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>) |
1999 | { |
2000 | return __vector_bitcast<value_type>( |
2001 | _mm512_insertf64x2(__m512d(), __x, 0)); |
2002 | } |
2003 | else if constexpr (__have_avx512f |
2004 | && is_floating_point_v<value_type>) |
2005 | { |
2006 | return __vector_bitcast<value_type>( |
2007 | _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x), |
2008 | 0)); |
2009 | } |
2010 | else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16) |
2011 | { |
2012 | return __vector_bitcast<value_type>( |
2013 | _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0)); |
2014 | } |
2015 | #endif |
2016 | return __concat(__concat(__x, _Tp()), |
2017 | __vector_type_t<value_type, _Np * 2>()); |
2018 | } |
2019 | else if constexpr (_ToN == 8 * _Np) |
2020 | return __concat(operator __vector_type_t<value_type, _Np * 4>(), |
2021 | __vector_type_t<value_type, _Np * 4>()); |
2022 | else if constexpr (_ToN == 16 * _Np) |
2023 | return __concat(operator __vector_type_t<value_type, _Np * 8>(), |
2024 | __vector_type_t<value_type, _Np * 8>()); |
2025 | else |
2026 | __assert_unreachable<_Tp>(); |
2027 | } |
2028 | }; |
2029 | |
2030 | template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> |
2031 | _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT> |
2032 | __zero_extend(_Tp __x) |
2033 | { return {__x}; } |
2034 | |
2035 | // }}} |
2036 | // __extract<_Np, By>{{{ |
2037 | template <int _Offset, |
2038 | int _SplitBy, |
2039 | typename _Tp, |
2040 | typename _TVT = _VectorTraits<_Tp>, |
2041 | typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>> |
2042 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
2043 | (_Tp __in) |
2044 | { |
2045 | using value_type = typename _TVT::value_type; |
2046 | #if _GLIBCXX_SIMD_X86INTRIN // {{{ |
2047 | if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0) |
2048 | { |
2049 | if constexpr (__have_avx512dq && is_same_v<double, value_type>) |
2050 | return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset); |
2051 | else if constexpr (is_floating_point_v<value_type>) |
2052 | return __vector_bitcast<value_type>( |
2053 | _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset)); |
2054 | else |
2055 | return reinterpret_cast<_R>( |
2056 | _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in), |
2057 | _Offset)); |
2058 | } |
2059 | else |
2060 | #endif // _GLIBCXX_SIMD_X86INTRIN }}} |
2061 | { |
2062 | #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1 |
2063 | using _W = conditional_t< |
2064 | is_floating_point_v<value_type>, double, |
2065 | conditional_t<(sizeof(_R) >= 16), long long, value_type>>; |
2066 | static_assert(sizeof(_R) % sizeof(_W) == 0); |
2067 | constexpr int __return_width = sizeof(_R) / sizeof(_W); |
2068 | using _Up = __vector_type_t<_W, __return_width>; |
2069 | const auto __x = __vector_bitcast<_W>(__in); |
2070 | #else |
2071 | constexpr int __return_width = _TVT::_S_full_size / _SplitBy; |
2072 | using _Up = _R; |
2073 | const __vector_type_t<value_type, _TVT::_S_full_size>& __x |
2074 | = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np> |
2075 | #endif |
2076 | constexpr int _O = _Offset * __return_width; |
2077 | return __call_with_subscripts<__return_width, _O>( |
2078 | __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2079 | return reinterpret_cast<_R>(_Up{__entries...}); |
2080 | }); |
2081 | } |
2082 | } |
2083 | |
2084 | // }}} |
2085 | // __lo/__hi64[z]{{{ |
2086 | template <typename _Tp, |
2087 | typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> |
2088 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
2089 | __lo64(_Tp __x) |
2090 | { |
2091 | _R __r{}; |
2092 | __builtin_memcpy(&__r, &__x, 8); |
2093 | return __r; |
2094 | } |
2095 | |
2096 | template <typename _Tp, |
2097 | typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> |
2098 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
2099 | __hi64(_Tp __x) |
2100 | { |
2101 | static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it" ); |
2102 | _R __r{}; |
2103 | __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8); |
2104 | return __r; |
2105 | } |
2106 | |
2107 | template <typename _Tp, |
2108 | typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> |
2109 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
2110 | __hi64z([[maybe_unused]] _Tp __x) |
2111 | { |
2112 | _R __r{}; |
2113 | if constexpr (sizeof(_Tp) == 16) |
2114 | __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8); |
2115 | return __r; |
2116 | } |
2117 | |
2118 | // }}} |
2119 | // __lo/__hi128{{{ |
2120 | template <typename _Tp> |
2121 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
2122 | __lo128(_Tp __x) |
2123 | { return __extract<0, sizeof(_Tp) / 16>(__x); } |
2124 | |
2125 | template <typename _Tp> |
2126 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
2127 | __hi128(_Tp __x) |
2128 | { |
2129 | static_assert(sizeof(__x) == 32); |
2130 | return __extract<1, 2>(__x); |
2131 | } |
2132 | |
2133 | // }}} |
2134 | // __lo/__hi256{{{ |
2135 | template <typename _Tp> |
2136 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
2137 | __lo256(_Tp __x) |
2138 | { |
2139 | static_assert(sizeof(__x) == 64); |
2140 | return __extract<0, 2>(__x); |
2141 | } |
2142 | |
2143 | template <typename _Tp> |
2144 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
2145 | __hi256(_Tp __x) |
2146 | { |
2147 | static_assert(sizeof(__x) == 64); |
2148 | return __extract<1, 2>(__x); |
2149 | } |
2150 | |
2151 | // }}} |
2152 | // __auto_bitcast{{{ |
2153 | template <typename _Tp> |
2154 | struct _AutoCast |
2155 | { |
2156 | static_assert(__is_vector_type_v<_Tp>); |
2157 | |
2158 | const _Tp __x; |
2159 | |
2160 | template <typename _Up, typename _UVT = _VectorTraits<_Up>> |
2161 | _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const |
2162 | { return __intrin_bitcast<typename _UVT::type>(__x); } |
2163 | }; |
2164 | |
2165 | template <typename _Tp> |
2166 | _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp> |
2167 | __auto_bitcast(const _Tp& __x) |
2168 | { return {__x}; } |
2169 | |
2170 | template <typename _Tp, size_t _Np> |
2171 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2172 | _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType> |
2173 | __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x) |
2174 | { return {__x._M_data}; } |
2175 | |
2176 | // }}} |
2177 | // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^ |
2178 | |
2179 | #if _GLIBCXX_SIMD_HAVE_SSE_ABI |
2180 | // __bool_storage_member_type{{{ |
2181 | #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN |
2182 | template <size_t _Size> |
2183 | struct __bool_storage_member_type |
2184 | { |
2185 | static_assert((_Size & (_Size - 1)) != 0, |
2186 | "This trait may only be used for non-power-of-2 sizes. " |
2187 | "Power-of-2 sizes must be specialized." ); |
2188 | using type = |
2189 | typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type; |
2190 | }; |
2191 | |
2192 | template <> |
2193 | struct __bool_storage_member_type<1> { using type = bool; }; |
2194 | |
2195 | template <> |
2196 | struct __bool_storage_member_type<2> { using type = __mmask8; }; |
2197 | |
2198 | template <> |
2199 | struct __bool_storage_member_type<4> { using type = __mmask8; }; |
2200 | |
2201 | template <> |
2202 | struct __bool_storage_member_type<8> { using type = __mmask8; }; |
2203 | |
2204 | template <> |
2205 | struct __bool_storage_member_type<16> { using type = __mmask16; }; |
2206 | |
2207 | template <> |
2208 | struct __bool_storage_member_type<32> { using type = __mmask32; }; |
2209 | |
2210 | template <> |
2211 | struct __bool_storage_member_type<64> { using type = __mmask64; }; |
2212 | #endif // _GLIBCXX_SIMD_HAVE_AVX512F |
2213 | |
2214 | // }}} |
2215 | // __intrinsic_type (x86){{{ |
2216 | // the following excludes bool via __is_vectorizable |
2217 | #if _GLIBCXX_SIMD_HAVE_SSE |
2218 | template <typename _Tp, size_t _Bytes> |
2219 | struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>> |
2220 | { |
2221 | static_assert(!is_same_v<_Tp, long double>, |
2222 | "no __intrinsic_type support for long double on x86" ); |
2223 | |
2224 | static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64; |
2225 | |
2226 | using type [[__gnu__::__vector_size__(_S_VBytes)]] |
2227 | = conditional_t<is_integral_v<_Tp>, long long int, _Tp>; |
2228 | }; |
2229 | #endif // _GLIBCXX_SIMD_HAVE_SSE |
2230 | |
2231 | // }}} |
2232 | #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI |
2233 | // __intrinsic_type (ARM){{{ |
2234 | #if _GLIBCXX_SIMD_HAVE_NEON |
2235 | template <> |
2236 | struct __intrinsic_type<float, 8, void> |
2237 | { using type = float32x2_t; }; |
2238 | |
2239 | template <> |
2240 | struct __intrinsic_type<float, 16, void> |
2241 | { using type = float32x4_t; }; |
2242 | |
2243 | template <> |
2244 | struct __intrinsic_type<double, 8, void> |
2245 | { |
2246 | #if _GLIBCXX_SIMD_HAVE_NEON_A64 |
2247 | using type = float64x1_t; |
2248 | #endif |
2249 | }; |
2250 | |
2251 | template <> |
2252 | struct __intrinsic_type<double, 16, void> |
2253 | { |
2254 | #if _GLIBCXX_SIMD_HAVE_NEON_A64 |
2255 | using type = float64x2_t; |
2256 | #endif |
2257 | }; |
2258 | |
2259 | #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \ |
2260 | template <> \ |
2261 | struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \ |
2262 | _Np * _Bits / 8, void> \ |
2263 | { using type = int##_Bits##x##_Np##_t; }; \ |
2264 | template <> \ |
2265 | struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \ |
2266 | _Np * _Bits / 8, void> \ |
2267 | { using type = uint##_Bits##x##_Np##_t; } |
2268 | _GLIBCXX_SIMD_ARM_INTRIN(8, 8); |
2269 | _GLIBCXX_SIMD_ARM_INTRIN(8, 16); |
2270 | _GLIBCXX_SIMD_ARM_INTRIN(16, 4); |
2271 | _GLIBCXX_SIMD_ARM_INTRIN(16, 8); |
2272 | _GLIBCXX_SIMD_ARM_INTRIN(32, 2); |
2273 | _GLIBCXX_SIMD_ARM_INTRIN(32, 4); |
2274 | _GLIBCXX_SIMD_ARM_INTRIN(64, 1); |
2275 | _GLIBCXX_SIMD_ARM_INTRIN(64, 2); |
2276 | #undef _GLIBCXX_SIMD_ARM_INTRIN |
2277 | |
2278 | template <typename _Tp, size_t _Bytes> |
2279 | struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>> |
2280 | { |
2281 | static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16; |
2282 | |
2283 | using _Ip = __int_for_sizeof_t<_Tp>; |
2284 | |
2285 | using _Up = conditional_t< |
2286 | is_floating_point_v<_Tp>, _Tp, |
2287 | conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>; |
2288 | |
2289 | static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes, |
2290 | "should use explicit specialization above" ); |
2291 | |
2292 | using type = typename __intrinsic_type<_Up, _SVecBytes>::type; |
2293 | }; |
2294 | #endif // _GLIBCXX_SIMD_HAVE_NEON |
2295 | |
2296 | // }}} |
2297 | // __intrinsic_type (PPC){{{ |
2298 | #ifdef __ALTIVEC__ |
2299 | template <typename _Tp> |
2300 | struct __intrinsic_type_impl; |
2301 | |
2302 | #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \ |
2303 | template <> \ |
2304 | struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; } |
2305 | _GLIBCXX_SIMD_PPC_INTRIN(float); |
2306 | #ifdef __VSX__ |
2307 | _GLIBCXX_SIMD_PPC_INTRIN(double); |
2308 | #endif |
2309 | _GLIBCXX_SIMD_PPC_INTRIN(signed char); |
2310 | _GLIBCXX_SIMD_PPC_INTRIN(unsigned char); |
2311 | _GLIBCXX_SIMD_PPC_INTRIN(signed short); |
2312 | _GLIBCXX_SIMD_PPC_INTRIN(unsigned short); |
2313 | _GLIBCXX_SIMD_PPC_INTRIN(signed int); |
2314 | _GLIBCXX_SIMD_PPC_INTRIN(unsigned int); |
2315 | #if defined __VSX__ || __SIZEOF_LONG__ == 4 |
2316 | _GLIBCXX_SIMD_PPC_INTRIN(signed long); |
2317 | _GLIBCXX_SIMD_PPC_INTRIN(unsigned long); |
2318 | #endif |
2319 | #ifdef __VSX__ |
2320 | _GLIBCXX_SIMD_PPC_INTRIN(signed long long); |
2321 | _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long); |
2322 | #endif |
2323 | #undef _GLIBCXX_SIMD_PPC_INTRIN |
2324 | |
2325 | template <typename _Tp, size_t _Bytes> |
2326 | struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>> |
2327 | { |
2328 | static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>; |
2329 | |
2330 | // allow _Tp == long double with -mlong-double-64 |
2331 | static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)), |
2332 | "no __intrinsic_type support for 128-bit floating point on PowerPC" ); |
2333 | |
2334 | #ifndef __VSX__ |
2335 | static_assert(!(is_same_v<_Tp, double> |
2336 | || (_S_is_ldouble && sizeof(long double) == sizeof(double))), |
2337 | "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX" ); |
2338 | #endif |
2339 | |
2340 | static constexpr auto __element_type() |
2341 | { |
2342 | if constexpr (is_floating_point_v<_Tp>) |
2343 | { |
2344 | if constexpr (_S_is_ldouble) |
2345 | return double {}; |
2346 | else |
2347 | return _Tp {}; |
2348 | } |
2349 | else if constexpr (is_signed_v<_Tp>) |
2350 | { |
2351 | if constexpr (sizeof(_Tp) == sizeof(_SChar)) |
2352 | return _SChar {}; |
2353 | else if constexpr (sizeof(_Tp) == sizeof(short)) |
2354 | return short {}; |
2355 | else if constexpr (sizeof(_Tp) == sizeof(int)) |
2356 | return int {}; |
2357 | else if constexpr (sizeof(_Tp) == sizeof(_LLong)) |
2358 | return _LLong {}; |
2359 | } |
2360 | else |
2361 | { |
2362 | if constexpr (sizeof(_Tp) == sizeof(_UChar)) |
2363 | return _UChar {}; |
2364 | else if constexpr (sizeof(_Tp) == sizeof(_UShort)) |
2365 | return _UShort {}; |
2366 | else if constexpr (sizeof(_Tp) == sizeof(_UInt)) |
2367 | return _UInt {}; |
2368 | else if constexpr (sizeof(_Tp) == sizeof(_ULLong)) |
2369 | return _ULLong {}; |
2370 | } |
2371 | } |
2372 | |
2373 | using type = typename __intrinsic_type_impl<decltype(__element_type())>::type; |
2374 | }; |
2375 | #endif // __ALTIVEC__ |
2376 | |
2377 | // }}} |
2378 | // _SimdWrapper<bool>{{{1 |
2379 | template <size_t _Width> |
2380 | struct _SimdWrapper<bool, _Width, |
2381 | void_t<typename __bool_storage_member_type<_Width>::type>> |
2382 | { |
2383 | using _BuiltinType = typename __bool_storage_member_type<_Width>::type; |
2384 | using value_type = bool; |
2385 | |
2386 | static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__; |
2387 | |
2388 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size> |
2389 | __as_full_vector() const |
2390 | { return _M_data; } |
2391 | |
2392 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2393 | _SimdWrapper() = default; |
2394 | |
2395 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2396 | _SimdWrapper(_BuiltinType __k) : _M_data(__k) {}; |
2397 | |
2398 | _GLIBCXX_SIMD_INTRINSIC |
2399 | operator const _BuiltinType&() const |
2400 | { return _M_data; } |
2401 | |
2402 | _GLIBCXX_SIMD_INTRINSIC |
2403 | operator _BuiltinType&() |
2404 | { return _M_data; } |
2405 | |
2406 | _GLIBCXX_SIMD_INTRINSIC _BuiltinType |
2407 | __intrin() const |
2408 | { return _M_data; } |
2409 | |
2410 | _GLIBCXX_SIMD_INTRINSIC constexpr value_type |
2411 | operator[](size_t __i) const |
2412 | { return _M_data & (_BuiltinType(1) << __i); } |
2413 | |
2414 | template <size_t __i> |
2415 | _GLIBCXX_SIMD_INTRINSIC constexpr value_type |
2416 | operator[](_SizeConstant<__i>) const |
2417 | { return _M_data & (_BuiltinType(1) << __i); } |
2418 | |
2419 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
2420 | _M_set(size_t __i, value_type __x) |
2421 | { |
2422 | if (__x) |
2423 | _M_data |= (_BuiltinType(1) << __i); |
2424 | else |
2425 | _M_data &= ~(_BuiltinType(1) << __i); |
2426 | } |
2427 | |
2428 | _GLIBCXX_SIMD_INTRINSIC constexpr bool |
2429 | _M_is_constprop() const |
2430 | { return __builtin_constant_p(_M_data); } |
2431 | |
2432 | _GLIBCXX_SIMD_INTRINSIC constexpr bool |
2433 | _M_is_constprop_none_of() const |
2434 | { |
2435 | if (__builtin_constant_p(_M_data)) |
2436 | { |
2437 | constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__; |
2438 | constexpr _BuiltinType __active_mask |
2439 | = ~_BuiltinType() >> (__nbits - _Width); |
2440 | return (_M_data & __active_mask) == 0; |
2441 | } |
2442 | return false; |
2443 | } |
2444 | |
2445 | _GLIBCXX_SIMD_INTRINSIC constexpr bool |
2446 | _M_is_constprop_all_of() const |
2447 | { |
2448 | if (__builtin_constant_p(_M_data)) |
2449 | { |
2450 | constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__; |
2451 | constexpr _BuiltinType __active_mask |
2452 | = ~_BuiltinType() >> (__nbits - _Width); |
2453 | return (_M_data & __active_mask) == __active_mask; |
2454 | } |
2455 | return false; |
2456 | } |
2457 | |
2458 | _BuiltinType _M_data; |
2459 | }; |
2460 | |
2461 | // _SimdWrapperBase{{{1 |
2462 | template <bool _MustZeroInitPadding, typename _BuiltinType> |
2463 | struct _SimdWrapperBase; |
2464 | |
2465 | template <typename _BuiltinType> |
2466 | struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs |
2467 | { |
2468 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2469 | _SimdWrapperBase() = default; |
2470 | |
2471 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2472 | _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {} |
2473 | |
2474 | _BuiltinType _M_data; |
2475 | }; |
2476 | |
2477 | template <typename _BuiltinType> |
2478 | struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to |
2479 | // never become SNaN |
2480 | { |
2481 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2482 | _SimdWrapperBase() : _M_data() {} |
2483 | |
2484 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2485 | _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {} |
2486 | |
2487 | _BuiltinType _M_data; |
2488 | }; |
2489 | |
2490 | // }}} |
2491 | // _SimdWrapper{{{ |
2492 | template <typename _Tp, size_t _Width> |
2493 | struct _SimdWrapper< |
2494 | _Tp, _Width, |
2495 | void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>> |
2496 | : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value |
2497 | && sizeof(_Tp) * _Width |
2498 | == sizeof(__vector_type_t<_Tp, _Width>), |
2499 | __vector_type_t<_Tp, _Width>> |
2500 | { |
2501 | using _Base |
2502 | = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value |
2503 | && sizeof(_Tp) * _Width |
2504 | == sizeof(__vector_type_t<_Tp, _Width>), |
2505 | __vector_type_t<_Tp, _Width>>; |
2506 | |
2507 | static_assert(__is_vectorizable_v<_Tp>); |
2508 | static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then |
2509 | |
2510 | using _BuiltinType = __vector_type_t<_Tp, _Width>; |
2511 | using value_type = _Tp; |
2512 | |
2513 | static inline constexpr size_t _S_full_size |
2514 | = sizeof(_BuiltinType) / sizeof(value_type); |
2515 | static inline constexpr int _S_size = _Width; |
2516 | static inline constexpr bool _S_is_partial = _S_full_size != _S_size; |
2517 | |
2518 | using _Base::_M_data; |
2519 | |
2520 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size> |
2521 | __as_full_vector() const |
2522 | { return _M_data; } |
2523 | |
2524 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2525 | _SimdWrapper(initializer_list<_Tp> __init) |
2526 | : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>( |
2527 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2528 | return __init.begin()[__i.value]; |
2529 | })) {} |
2530 | |
2531 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2532 | _SimdWrapper() = default; |
2533 | |
2534 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2535 | _SimdWrapper(const _SimdWrapper&) = default; |
2536 | |
2537 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2538 | _SimdWrapper(_SimdWrapper&&) = default; |
2539 | |
2540 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper& |
2541 | operator=(const _SimdWrapper&) = default; |
2542 | |
2543 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper& |
2544 | operator=(_SimdWrapper&&) = default; |
2545 | |
2546 | template <typename _V, typename = enable_if_t<disjunction_v< |
2547 | is_same<_V, __vector_type_t<_Tp, _Width>>, |
2548 | is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>> |
2549 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2550 | _SimdWrapper(_V __x) |
2551 | // __vector_bitcast can convert e.g. __m128 to __vector(2) float |
2552 | : _Base(__vector_bitcast<_Tp, _Width>(__x)) {} |
2553 | |
2554 | template <typename... _As, |
2555 | typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...) |
2556 | && sizeof...(_As) <= _Width)>> |
2557 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2558 | operator _SimdTuple<_Tp, _As...>() const |
2559 | { |
2560 | return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>( |
2561 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
2562 | { return _M_data[int(__i)]; }); |
2563 | } |
2564 | |
2565 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2566 | operator const _BuiltinType&() const |
2567 | { return _M_data; } |
2568 | |
2569 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2570 | operator _BuiltinType&() |
2571 | { return _M_data; } |
2572 | |
2573 | _GLIBCXX_SIMD_INTRINSIC constexpr _Tp |
2574 | operator[](size_t __i) const |
2575 | { return _M_data[__i]; } |
2576 | |
2577 | template <size_t __i> |
2578 | _GLIBCXX_SIMD_INTRINSIC constexpr _Tp |
2579 | operator[](_SizeConstant<__i>) const |
2580 | { return _M_data[__i]; } |
2581 | |
2582 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
2583 | _M_set(size_t __i, _Tp __x) |
2584 | { |
2585 | if (__builtin_is_constant_evaluated()) |
2586 | _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) { |
2587 | return __j == __i ? __x : _M_data[__j()]; |
2588 | }); |
2589 | else |
2590 | _M_data[__i] = __x; |
2591 | } |
2592 | |
2593 | _GLIBCXX_SIMD_INTRINSIC |
2594 | constexpr bool |
2595 | _M_is_constprop() const |
2596 | { return __builtin_constant_p(_M_data); } |
2597 | |
2598 | _GLIBCXX_SIMD_INTRINSIC constexpr bool |
2599 | _M_is_constprop_none_of() const |
2600 | { |
2601 | if (__builtin_constant_p(_M_data)) |
2602 | { |
2603 | bool __r = true; |
2604 | if constexpr (is_floating_point_v<_Tp>) |
2605 | { |
2606 | using _Ip = __int_for_sizeof_t<_Tp>; |
2607 | const auto __intdata = __vector_bitcast<_Ip>(_M_data); |
2608 | __execute_n_times<_Width>( |
2609 | [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); }); |
2610 | } |
2611 | else |
2612 | __execute_n_times<_Width>( |
2613 | [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); }); |
2614 | if (__builtin_constant_p(__r)) |
2615 | return __r; |
2616 | } |
2617 | return false; |
2618 | } |
2619 | |
2620 | _GLIBCXX_SIMD_INTRINSIC constexpr bool |
2621 | _M_is_constprop_all_of() const |
2622 | { |
2623 | if (__builtin_constant_p(_M_data)) |
2624 | { |
2625 | bool __r = true; |
2626 | if constexpr (is_floating_point_v<_Tp>) |
2627 | { |
2628 | using _Ip = __int_for_sizeof_t<_Tp>; |
2629 | const auto __intdata = __vector_bitcast<_Ip>(_M_data); |
2630 | __execute_n_times<_Width>( |
2631 | [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); }); |
2632 | } |
2633 | else |
2634 | __execute_n_times<_Width>( |
2635 | [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); }); |
2636 | if (__builtin_constant_p(__r)) |
2637 | return __r; |
2638 | } |
2639 | return false; |
2640 | } |
2641 | }; |
2642 | |
2643 | // }}} |
2644 | |
2645 | // __vectorized_sizeof {{{ |
2646 | template <typename _Tp> |
2647 | constexpr size_t |
2648 | __vectorized_sizeof() |
2649 | { |
2650 | if constexpr (!__is_vectorizable_v<_Tp>) |
2651 | return 0; |
2652 | |
2653 | if constexpr (sizeof(_Tp) <= 8) |
2654 | { |
2655 | // X86: |
2656 | if constexpr (__have_avx512bw) |
2657 | return 64; |
2658 | if constexpr (__have_avx512f && sizeof(_Tp) >= 4) |
2659 | return 64; |
2660 | if constexpr (__have_avx2) |
2661 | return 32; |
2662 | if constexpr (__have_avx && is_floating_point_v<_Tp>) |
2663 | return 32; |
2664 | if constexpr (__have_sse2) |
2665 | return 16; |
2666 | if constexpr (__have_sse && is_same_v<_Tp, float>) |
2667 | return 16; |
2668 | /* The following is too much trouble because of mixed MMX and x87 code. |
2669 | * While nothing here explicitly calls MMX instructions of registers, |
2670 | * they are still emitted but no EMMS cleanup is done. |
2671 | if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>) |
2672 | return 8; |
2673 | */ |
2674 | |
2675 | // PowerPC: |
2676 | if constexpr (__have_power8vec |
2677 | || (__have_power_vmx && (sizeof(_Tp) < 8)) |
2678 | || (__have_power_vsx && is_floating_point_v<_Tp>) ) |
2679 | return 16; |
2680 | |
2681 | // ARM: |
2682 | if constexpr (__have_neon_a64 |
2683 | || (__have_neon_a32 && !is_same_v<_Tp, double>) ) |
2684 | return 16; |
2685 | if constexpr (__have_neon |
2686 | && sizeof(_Tp) < 8 |
2687 | // Only allow fp if the user allows non-ICE559 fp (e.g. |
2688 | // via -ffast-math). ARMv7 NEON fp is not conforming to |
2689 | // IEC559. |
2690 | && (__support_neon_float || !is_floating_point_v<_Tp>)) |
2691 | return 16; |
2692 | } |
2693 | |
2694 | return sizeof(_Tp); |
2695 | } |
2696 | |
2697 | // }}} |
2698 | namespace simd_abi { |
2699 | // most of simd_abi is defined in simd_detail.h |
2700 | template <typename _Tp> |
2701 | inline constexpr int max_fixed_size |
2702 | = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32; |
2703 | |
2704 | // compatible {{{ |
2705 | #if defined __x86_64__ || defined __aarch64__ |
2706 | template <typename _Tp> |
2707 | using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>; |
2708 | #elif defined __ARM_NEON |
2709 | // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float |
2710 | // ABI?) |
2711 | template <typename _Tp> |
2712 | using compatible |
2713 | = conditional_t<(sizeof(_Tp) < 8 |
2714 | && (__support_neon_float || !is_floating_point_v<_Tp>)), |
2715 | _VecBuiltin<16>, scalar>; |
2716 | #else |
2717 | template <typename> |
2718 | using compatible = scalar; |
2719 | #endif |
2720 | |
2721 | // }}} |
2722 | // native {{{ |
2723 | template <typename _Tp> |
2724 | constexpr auto |
2725 | __determine_native_abi() |
2726 | { |
2727 | constexpr size_t __bytes = __vectorized_sizeof<_Tp>(); |
2728 | if constexpr (__bytes == sizeof(_Tp)) |
2729 | return static_cast<scalar*>(nullptr); |
2730 | else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64)) |
2731 | return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr); |
2732 | else |
2733 | return static_cast<_VecBuiltin<__bytes>*>(nullptr); |
2734 | } |
2735 | |
2736 | template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>> |
2737 | using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>; |
2738 | |
2739 | // }}} |
2740 | // __default_abi {{{ |
2741 | #if defined _GLIBCXX_SIMD_DEFAULT_ABI |
2742 | template <typename _Tp> |
2743 | using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>; |
2744 | #else |
2745 | template <typename _Tp> |
2746 | using __default_abi = compatible<_Tp>; |
2747 | #endif |
2748 | |
2749 | // }}} |
2750 | } // namespace simd_abi |
2751 | |
2752 | // traits {{{1 |
2753 | template <typename _Tp> |
2754 | struct is_simd_flag_type |
2755 | : false_type |
2756 | {}; |
2757 | |
2758 | template <> |
2759 | struct is_simd_flag_type<element_aligned_tag> |
2760 | : true_type |
2761 | {}; |
2762 | |
2763 | template <> |
2764 | struct is_simd_flag_type<vector_aligned_tag> |
2765 | : true_type |
2766 | {}; |
2767 | |
2768 | template <size_t _Np> |
2769 | struct is_simd_flag_type<overaligned_tag<_Np>> |
2770 | : __bool_constant<(_Np > 0) and __has_single_bit(x: _Np)> |
2771 | {}; |
2772 | |
2773 | template <typename _Tp> |
2774 | inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value; |
2775 | |
2776 | template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>> |
2777 | using _IsSimdFlagType = _Tp; |
2778 | |
2779 | // is_abi_tag {{{2 |
2780 | template <typename _Tp, typename = void_t<>> |
2781 | struct is_abi_tag : false_type {}; |
2782 | |
2783 | template <typename _Tp> |
2784 | struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>> |
2785 | : public _Tp::_IsValidAbiTag {}; |
2786 | |
2787 | template <typename _Tp> |
2788 | inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value; |
2789 | |
2790 | // is_simd(_mask) {{{2 |
2791 | template <typename _Tp> |
2792 | struct is_simd : public false_type {}; |
2793 | |
2794 | template <typename _Tp> |
2795 | inline constexpr bool is_simd_v = is_simd<_Tp>::value; |
2796 | |
2797 | template <typename _Tp> |
2798 | struct is_simd_mask : public false_type {}; |
2799 | |
2800 | template <typename _Tp> |
2801 | inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value; |
2802 | |
2803 | // simd_size {{{2 |
2804 | template <typename _Tp, typename _Abi, typename = void> |
2805 | struct __simd_size_impl {}; |
2806 | |
2807 | template <typename _Tp, typename _Abi> |
2808 | struct __simd_size_impl< |
2809 | _Tp, _Abi, |
2810 | enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>> |
2811 | : _SizeConstant<_Abi::template _S_size<_Tp>> {}; |
2812 | |
2813 | template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> |
2814 | struct simd_size : __simd_size_impl<_Tp, _Abi> {}; |
2815 | |
2816 | template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> |
2817 | inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value; |
2818 | |
2819 | // simd_abi::deduce {{{2 |
2820 | template <typename _Tp, size_t _Np, typename = void> |
2821 | struct __deduce_impl; |
2822 | |
2823 | namespace simd_abi { |
2824 | /** |
2825 | * @tparam _Tp The requested `value_type` for the elements. |
2826 | * @tparam _Np The requested number of elements. |
2827 | * @tparam _Abis This parameter is ignored, since this implementation cannot |
2828 | * make any use of it. Either __a good native ABI is matched and used as `type` |
2829 | * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from |
2830 | * the best matching native ABIs. |
2831 | */ |
2832 | template <typename _Tp, size_t _Np, typename...> |
2833 | struct deduce : __deduce_impl<_Tp, _Np> {}; |
2834 | |
2835 | template <typename _Tp, size_t _Np, typename... _Abis> |
2836 | using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type; |
2837 | } // namespace simd_abi |
2838 | |
2839 | // }}}2 |
2840 | // rebind_simd {{{2 |
2841 | template <typename _Tp, typename _V, typename = void> |
2842 | struct rebind_simd; |
2843 | |
2844 | template <typename _Tp, typename _Up, typename _Abi> |
2845 | struct rebind_simd<_Tp, simd<_Up, _Abi>, |
2846 | void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>> |
2847 | { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; |
2848 | |
2849 | template <typename _Tp, typename _Up, typename _Abi> |
2850 | struct rebind_simd<_Tp, simd_mask<_Up, _Abi>, |
2851 | void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>> |
2852 | { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; |
2853 | |
2854 | template <typename _Tp, typename _V> |
2855 | using rebind_simd_t = typename rebind_simd<_Tp, _V>::type; |
2856 | |
2857 | // resize_simd {{{2 |
2858 | template <int _Np, typename _V, typename = void> |
2859 | struct resize_simd; |
2860 | |
2861 | template <int _Np, typename _Tp, typename _Abi> |
2862 | struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>> |
2863 | { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; }; |
2864 | |
2865 | template <int _Np, typename _Tp, typename _Abi> |
2866 | struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>> |
2867 | { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; }; |
2868 | |
2869 | template <int _Np, typename _V> |
2870 | using resize_simd_t = typename resize_simd<_Np, _V>::type; |
2871 | |
2872 | // }}}2 |
2873 | // memory_alignment {{{2 |
2874 | template <typename _Tp, typename _Up = typename _Tp::value_type> |
2875 | struct memory_alignment |
2876 | : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {}; |
2877 | |
2878 | template <typename _Tp, typename _Up = typename _Tp::value_type> |
2879 | inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value; |
2880 | |
2881 | // class template simd [simd] {{{1 |
2882 | template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> |
2883 | class simd; |
2884 | |
2885 | template <typename _Tp, typename _Abi> |
2886 | struct is_simd<simd<_Tp, _Abi>> : public true_type {}; |
2887 | |
2888 | template <typename _Tp> |
2889 | using native_simd = simd<_Tp, simd_abi::native<_Tp>>; |
2890 | |
2891 | template <typename _Tp, int _Np> |
2892 | using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>; |
2893 | |
2894 | template <typename _Tp, size_t _Np> |
2895 | using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>; |
2896 | |
2897 | // class template simd_mask [simd_mask] {{{1 |
2898 | template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> |
2899 | class simd_mask; |
2900 | |
2901 | template <typename _Tp, typename _Abi> |
2902 | struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {}; |
2903 | |
2904 | template <typename _Tp> |
2905 | using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>; |
2906 | |
2907 | template <typename _Tp, int _Np> |
2908 | using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>; |
2909 | |
2910 | template <typename _Tp, size_t _Np> |
2911 | using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>; |
2912 | |
2913 | // casts [simd.casts] {{{1 |
2914 | // static_simd_cast {{{2 |
2915 | template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void> |
2916 | struct __static_simd_cast_return_type; |
2917 | |
2918 | template <typename _Tp, typename _A0, typename _Up, typename _Ap> |
2919 | struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void> |
2920 | : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {}; |
2921 | |
2922 | template <typename _Tp, typename _Up, typename _Ap> |
2923 | struct __static_simd_cast_return_type< |
2924 | _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>> |
2925 | { using type = _Tp; }; |
2926 | |
2927 | template <typename _Tp, typename _Ap> |
2928 | struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false, |
2929 | #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 |
2930 | enable_if_t<__is_vectorizable_v<_Tp>> |
2931 | #else |
2932 | void |
2933 | #endif |
2934 | > |
2935 | { using type = simd<_Tp, _Ap>; }; |
2936 | |
2937 | template <typename _Tp, typename = void> |
2938 | struct __safe_make_signed { using type = _Tp;}; |
2939 | |
2940 | template <typename _Tp> |
2941 | struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>> |
2942 | { |
2943 | // the extra make_unsigned_t is because of PR85951 |
2944 | using type = make_signed_t<make_unsigned_t<_Tp>>; |
2945 | }; |
2946 | |
2947 | template <typename _Tp> |
2948 | using safe_make_signed_t = typename __safe_make_signed<_Tp>::type; |
2949 | |
2950 | template <typename _Tp, typename _Up, typename _Ap> |
2951 | struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false, |
2952 | #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 |
2953 | enable_if_t<__is_vectorizable_v<_Tp>> |
2954 | #else |
2955 | void |
2956 | #endif |
2957 | > |
2958 | { |
2959 | using type = conditional_t< |
2960 | (is_integral_v<_Up> && is_integral_v<_Tp> && |
2961 | #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 |
2962 | is_signed_v<_Up> != is_signed_v<_Tp> && |
2963 | #endif |
2964 | is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>), |
2965 | simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>; |
2966 | }; |
2967 | |
2968 | template <typename _Tp, typename _Up, typename _Ap, |
2969 | typename _R |
2970 | = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type> |
2971 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R |
2972 | static_simd_cast(const simd<_Up, _Ap>& __x) |
2973 | { |
2974 | if constexpr (is_same<_R, simd<_Up, _Ap>>::value) |
2975 | return __x; |
2976 | else |
2977 | { |
2978 | _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type> |
2979 | __c; |
2980 | return _R(__private_init, __c(__data(__x))); |
2981 | } |
2982 | } |
2983 | |
2984 | namespace __proposed { |
2985 | template <typename _Tp, typename _Up, typename _Ap, |
2986 | typename _R |
2987 | = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type> |
2988 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type |
2989 | static_simd_cast(const simd_mask<_Up, _Ap>& __x) |
2990 | { |
2991 | using _RM = typename _R::mask_type; |
2992 | return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert< |
2993 | typename _RM::simd_type::value_type>(__x)}; |
2994 | } |
2995 | } // namespace __proposed |
2996 | |
2997 | // simd_cast {{{2 |
2998 | template <typename _Tp, typename _Up, typename _Ap, |
2999 | typename _To = __value_type_or_identity_t<_Tp>> |
3000 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto |
3001 | simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x) |
3002 | -> decltype(static_simd_cast<_Tp>(__x)) |
3003 | { return static_simd_cast<_Tp>(__x); } |
3004 | |
3005 | namespace __proposed { |
3006 | template <typename _Tp, typename _Up, typename _Ap, |
3007 | typename _To = __value_type_or_identity_t<_Tp>> |
3008 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto |
3009 | simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x) |
3010 | -> decltype(static_simd_cast<_Tp>(__x)) |
3011 | { return static_simd_cast<_Tp>(__x); } |
3012 | } // namespace __proposed |
3013 | |
3014 | // }}}2 |
3015 | // resizing_simd_cast {{{ |
3016 | namespace __proposed { |
3017 | /* Proposed spec: |
3018 | |
3019 | template <class T, class U, class Abi> |
3020 | T resizing_simd_cast(const simd<U, Abi>& x) |
3021 | |
3022 | p1 Constraints: |
3023 | - is_simd_v<T> is true and |
3024 | - T::value_type is the same type as U |
3025 | |
3026 | p2 Returns: |
3027 | A simd object with the i^th element initialized to x[i] for all i in the |
3028 | range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger |
3029 | than simd_size_v<U, Abi>, the remaining elements are value-initialized. |
3030 | |
3031 | template <class T, class U, class Abi> |
3032 | T resizing_simd_cast(const simd_mask<U, Abi>& x) |
3033 | |
3034 | p1 Constraints: is_simd_mask_v<T> is true |
3035 | |
3036 | p2 Returns: |
3037 | A simd_mask object with the i^th element initialized to x[i] for all i in |
3038 | the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger |
3039 | than simd_size_v<U, Abi>, the remaining elements are initialized to false. |
3040 | |
3041 | */ |
3042 | |
3043 | template <typename _Tp, typename _Up, typename _Ap> |
3044 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t< |
3045 | conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp> |
3046 | resizing_simd_cast(const simd<_Up, _Ap>& __x) |
3047 | { |
3048 | if constexpr (is_same_v<typename _Tp::abi_type, _Ap>) |
3049 | return __x; |
3050 | else if (__builtin_is_constant_evaluated()) |
3051 | return _Tp([&](auto __i) constexpr { |
3052 | return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up(); |
3053 | }); |
3054 | else if constexpr (simd_size_v<_Up, _Ap> == 1) |
3055 | { |
3056 | _Tp __r{}; |
3057 | __r[0] = __x[0]; |
3058 | return __r; |
3059 | } |
3060 | else if constexpr (_Tp::size() == 1) |
3061 | return __x[0]; |
3062 | else if constexpr (sizeof(_Tp) == sizeof(__x) |
3063 | && !__is_fixed_size_abi_v<_Ap>) |
3064 | return {__private_init, |
3065 | __vector_bitcast<typename _Tp::value_type, _Tp::size()>( |
3066 | _Ap::_S_masked(__data(__x))._M_data)}; |
3067 | else |
3068 | { |
3069 | _Tp __r{}; |
3070 | __builtin_memcpy(&__data(__r), &__data(__x), |
3071 | sizeof(_Up) |
3072 | * std::min(_Tp::size(), simd_size_v<_Up, _Ap>)); |
3073 | return __r; |
3074 | } |
3075 | } |
3076 | |
3077 | template <typename _Tp, typename _Up, typename _Ap> |
3078 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3079 | enable_if_t<is_simd_mask_v<_Tp>, _Tp> |
3080 | resizing_simd_cast(const simd_mask<_Up, _Ap>& __x) |
3081 | { |
3082 | return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert< |
3083 | typename _Tp::simd_type::value_type>(__x)}; |
3084 | } |
3085 | } // namespace __proposed |
3086 | |
3087 | // }}} |
3088 | // to_fixed_size {{{2 |
3089 | template <typename _Tp, int _Np> |
3090 | _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np> |
3091 | to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x) |
3092 | { return __x; } |
3093 | |
3094 | template <typename _Tp, int _Np> |
3095 | _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np> |
3096 | to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x) |
3097 | { return __x; } |
3098 | |
3099 | template <typename _Tp, typename _Ap> |
3100 | _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>> |
3101 | to_fixed_size(const simd<_Tp, _Ap>& __x) |
3102 | { |
3103 | using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>; |
3104 | return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); |
3105 | } |
3106 | |
3107 | template <typename _Tp, typename _Ap> |
3108 | _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>> |
3109 | to_fixed_size(const simd_mask<_Tp, _Ap>& __x) |
3110 | { |
3111 | return {__private_init, |
3112 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }}; |
3113 | } |
3114 | |
3115 | // to_native {{{2 |
3116 | template <typename _Tp, int _Np> |
3117 | _GLIBCXX_SIMD_INTRINSIC |
3118 | enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>> |
3119 | to_native(const fixed_size_simd<_Tp, _Np>& __x) |
3120 | { |
3121 | alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np]; |
3122 | __x.copy_to(__mem, vector_aligned); |
3123 | return {__mem, vector_aligned}; |
3124 | } |
3125 | |
3126 | template <typename _Tp, size_t _Np> |
3127 | _GLIBCXX_SIMD_INTRINSIC |
3128 | enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>> |
3129 | to_native(const fixed_size_simd_mask<_Tp, _Np>& __x) |
3130 | { |
3131 | return native_simd_mask<_Tp>( |
3132 | __private_init, |
3133 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); |
3134 | } |
3135 | |
3136 | // to_compatible {{{2 |
3137 | template <typename _Tp, size_t _Np> |
3138 | _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>> |
3139 | to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x) |
3140 | { |
3141 | alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np]; |
3142 | __x.copy_to(__mem, vector_aligned); |
3143 | return {__mem, vector_aligned}; |
3144 | } |
3145 | |
3146 | template <typename _Tp, size_t _Np> |
3147 | _GLIBCXX_SIMD_INTRINSIC |
3148 | enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>> |
3149 | to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x) |
3150 | { |
3151 | return simd_mask<_Tp>( |
3152 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); |
3153 | } |
3154 | |
3155 | // masked assignment [simd_mask.where] {{{1 |
3156 | |
3157 | // where_expression {{{1 |
3158 | // const_where_expression<M, T> {{{2 |
3159 | template <typename _M, typename _Tp> |
3160 | class const_where_expression |
3161 | { |
3162 | using _V = _Tp; |
3163 | static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>); |
3164 | |
3165 | struct _Wrapper { using value_type = _V; }; |
3166 | |
3167 | protected: |
3168 | using _Impl = typename _V::_Impl; |
3169 | |
3170 | using value_type = |
3171 | typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type; |
3172 | |
3173 | _GLIBCXX_SIMD_INTRINSIC friend const _M& |
3174 | __get_mask(const const_where_expression& __x) |
3175 | { return __x._M_k; } |
3176 | |
3177 | _GLIBCXX_SIMD_INTRINSIC friend const _Tp& |
3178 | __get_lvalue(const const_where_expression& __x) |
3179 | { return __x._M_value; } |
3180 | |
3181 | const _M& _M_k; |
3182 | _Tp& _M_value; |
3183 | |
3184 | public: |
3185 | const_where_expression(const const_where_expression&) = delete; |
3186 | |
3187 | const_where_expression& operator=(const const_where_expression&) = delete; |
3188 | |
3189 | _GLIBCXX_SIMD_INTRINSIC constexpr |
3190 | const_where_expression(const _M& __kk, const _Tp& dd) |
3191 | : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {} |
3192 | |
3193 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V |
3194 | operator-() const&& |
3195 | { |
3196 | return {__private_init, |
3197 | _Impl::template _S_masked_unary<negate>(__data(_M_k), |
3198 | __data(_M_value))}; |
3199 | } |
3200 | |
3201 | template <typename _Up, typename _Flags> |
3202 | [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V |
3203 | copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&& |
3204 | { |
3205 | return {__private_init, |
3206 | _Impl::_S_masked_load(__data(_M_value), __data(_M_k), |
3207 | _Flags::template _S_apply<_V>(__mem))}; |
3208 | } |
3209 | |
3210 | template <typename _Up, typename _Flags> |
3211 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3212 | copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&& |
3213 | { |
3214 | _Impl::_S_masked_store(__data(_M_value), |
3215 | _Flags::template _S_apply<_V>(__mem), |
3216 | __data(_M_k)); |
3217 | } |
3218 | }; |
3219 | |
3220 | // const_where_expression<bool, T> {{{2 |
3221 | template <typename _Tp> |
3222 | class const_where_expression<bool, _Tp> |
3223 | { |
3224 | using _M = bool; |
3225 | using _V = _Tp; |
3226 | |
3227 | static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>); |
3228 | |
3229 | struct _Wrapper { using value_type = _V; }; |
3230 | |
3231 | protected: |
3232 | using value_type |
3233 | = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type; |
3234 | |
3235 | _GLIBCXX_SIMD_INTRINSIC friend const _M& |
3236 | __get_mask(const const_where_expression& __x) |
3237 | { return __x._M_k; } |
3238 | |
3239 | _GLIBCXX_SIMD_INTRINSIC friend const _Tp& |
3240 | __get_lvalue(const const_where_expression& __x) |
3241 | { return __x._M_value; } |
3242 | |
3243 | const bool _M_k; |
3244 | _Tp& _M_value; |
3245 | |
3246 | public: |
3247 | const_where_expression(const const_where_expression&) = delete; |
3248 | const_where_expression& operator=(const const_where_expression&) = delete; |
3249 | |
3250 | _GLIBCXX_SIMD_INTRINSIC constexpr |
3251 | const_where_expression(const bool __kk, const _Tp& dd) |
3252 | : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {} |
3253 | |
3254 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V |
3255 | operator-() const&& |
3256 | { return _M_k ? -_M_value : _M_value; } |
3257 | |
3258 | template <typename _Up, typename _Flags> |
3259 | [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V |
3260 | copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&& |
3261 | { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; } |
3262 | |
3263 | template <typename _Up, typename _Flags> |
3264 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3265 | copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&& |
3266 | { |
3267 | if (_M_k) |
3268 | __mem[0] = _M_value; |
3269 | } |
3270 | }; |
3271 | |
3272 | // where_expression<M, T> {{{2 |
3273 | template <typename _M, typename _Tp> |
3274 | class where_expression : public const_where_expression<_M, _Tp> |
3275 | { |
3276 | using _Impl = typename const_where_expression<_M, _Tp>::_Impl; |
3277 | |
3278 | static_assert(!is_const<_Tp>::value, |
3279 | "where_expression may only be instantiated with __a non-const " |
3280 | "_Tp parameter" ); |
3281 | |
3282 | using typename const_where_expression<_M, _Tp>::value_type; |
3283 | using const_where_expression<_M, _Tp>::_M_k; |
3284 | using const_where_expression<_M, _Tp>::_M_value; |
3285 | |
3286 | static_assert( |
3287 | is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "" ); |
3288 | static_assert(_M::size() == _Tp::size(), "" ); |
3289 | |
3290 | _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp& |
3291 | __get_lvalue(where_expression& __x) |
3292 | { return __x._M_value; } |
3293 | |
3294 | public: |
3295 | where_expression(const where_expression&) = delete; |
3296 | where_expression& operator=(const where_expression&) = delete; |
3297 | |
3298 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3299 | where_expression(const _M& __kk, _Tp& dd) |
3300 | : const_where_expression<_M, _Tp>(__kk, dd) {} |
3301 | |
3302 | template <typename _Up> |
3303 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3304 | operator=(_Up&& __x) && |
3305 | { |
3306 | _Impl::_S_masked_assign(__data(_M_k), __data(_M_value), |
3307 | __to_value_type_or_member_type<_Tp>( |
3308 | static_cast<_Up&&>(__x))); |
3309 | } |
3310 | |
3311 | #define _GLIBCXX_SIMD_OP_(__op, __name) \ |
3312 | template <typename _Up> \ |
3313 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \ |
3314 | operator __op##=(_Up&& __x)&& \ |
3315 | { \ |
3316 | _Impl::template _S_masked_cassign( \ |
3317 | __data(_M_k), __data(_M_value), \ |
3318 | __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \ |
3319 | [](auto __impl, auto __lhs, auto __rhs) \ |
3320 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ |
3321 | { return __impl.__name(__lhs, __rhs); }); \ |
3322 | } \ |
3323 | static_assert(true) |
3324 | _GLIBCXX_SIMD_OP_(+, _S_plus); |
3325 | _GLIBCXX_SIMD_OP_(-, _S_minus); |
3326 | _GLIBCXX_SIMD_OP_(*, _S_multiplies); |
3327 | _GLIBCXX_SIMD_OP_(/, _S_divides); |
3328 | _GLIBCXX_SIMD_OP_(%, _S_modulus); |
3329 | _GLIBCXX_SIMD_OP_(&, _S_bit_and); |
3330 | _GLIBCXX_SIMD_OP_(|, _S_bit_or); |
3331 | _GLIBCXX_SIMD_OP_(^, _S_bit_xor); |
3332 | _GLIBCXX_SIMD_OP_(<<, _S_shift_left); |
3333 | _GLIBCXX_SIMD_OP_(>>, _S_shift_right); |
3334 | #undef _GLIBCXX_SIMD_OP_ |
3335 | |
3336 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3337 | operator++() && |
3338 | { |
3339 | __data(_M_value) |
3340 | = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value)); |
3341 | } |
3342 | |
3343 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3344 | operator++(int) && |
3345 | { |
3346 | __data(_M_value) |
3347 | = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value)); |
3348 | } |
3349 | |
3350 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3351 | operator--() && |
3352 | { |
3353 | __data(_M_value) |
3354 | = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value)); |
3355 | } |
3356 | |
3357 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3358 | operator--(int) && |
3359 | { |
3360 | __data(_M_value) |
3361 | = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value)); |
3362 | } |
3363 | |
3364 | // intentionally hides const_where_expression::copy_from |
3365 | template <typename _Up, typename _Flags> |
3366 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3367 | copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) && |
3368 | { |
3369 | __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k), |
3370 | _Flags::template _S_apply<_Tp>(__mem)); |
3371 | } |
3372 | }; |
3373 | |
3374 | // where_expression<bool, T> {{{2 |
3375 | template <typename _Tp> |
3376 | class where_expression<bool, _Tp> |
3377 | : public const_where_expression<bool, _Tp> |
3378 | { |
3379 | using _M = bool; |
3380 | using typename const_where_expression<_M, _Tp>::value_type; |
3381 | using const_where_expression<_M, _Tp>::_M_k; |
3382 | using const_where_expression<_M, _Tp>::_M_value; |
3383 | |
3384 | public: |
3385 | where_expression(const where_expression&) = delete; |
3386 | where_expression& operator=(const where_expression&) = delete; |
3387 | |
3388 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3389 | where_expression(const _M& __kk, _Tp& dd) |
3390 | : const_where_expression<_M, _Tp>(__kk, dd) {} |
3391 | |
3392 | #define _GLIBCXX_SIMD_OP_(__op) \ |
3393 | template <typename _Up> \ |
3394 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \ |
3395 | operator __op(_Up&& __x)&& \ |
3396 | { if (_M_k) _M_value __op static_cast<_Up&&>(__x); } |
3397 | |
3398 | _GLIBCXX_SIMD_OP_(=) |
3399 | _GLIBCXX_SIMD_OP_(+=) |
3400 | _GLIBCXX_SIMD_OP_(-=) |
3401 | _GLIBCXX_SIMD_OP_(*=) |
3402 | _GLIBCXX_SIMD_OP_(/=) |
3403 | _GLIBCXX_SIMD_OP_(%=) |
3404 | _GLIBCXX_SIMD_OP_(&=) |
3405 | _GLIBCXX_SIMD_OP_(|=) |
3406 | _GLIBCXX_SIMD_OP_(^=) |
3407 | _GLIBCXX_SIMD_OP_(<<=) |
3408 | _GLIBCXX_SIMD_OP_(>>=) |
3409 | #undef _GLIBCXX_SIMD_OP_ |
3410 | |
3411 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3412 | operator++() && |
3413 | { if (_M_k) ++_M_value; } |
3414 | |
3415 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3416 | operator++(int) && |
3417 | { if (_M_k) ++_M_value; } |
3418 | |
3419 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3420 | operator--() && |
3421 | { if (_M_k) --_M_value; } |
3422 | |
3423 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3424 | operator--(int) && |
3425 | { if (_M_k) --_M_value; } |
3426 | |
3427 | // intentionally hides const_where_expression::copy_from |
3428 | template <typename _Up, typename _Flags> |
3429 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void |
3430 | copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) && |
3431 | { if (_M_k) _M_value = __mem[0]; } |
3432 | }; |
3433 | |
3434 | // where {{{1 |
3435 | template <typename _Tp, typename _Ap> |
3436 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3437 | where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>> |
3438 | where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value) |
3439 | { return {__k, __value}; } |
3440 | |
3441 | template <typename _Tp, typename _Ap> |
3442 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3443 | const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>> |
3444 | where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value) |
3445 | { return {__k, __value}; } |
3446 | |
3447 | template <typename _Tp, typename _Ap> |
3448 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3449 | where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>> |
3450 | where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value) |
3451 | { return {__k, __value}; } |
3452 | |
3453 | template <typename _Tp, typename _Ap> |
3454 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3455 | const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>> |
3456 | where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value) |
3457 | { return {__k, __value}; } |
3458 | |
3459 | template <typename _Tp> |
3460 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp> |
3461 | where(_ExactBool __k, _Tp& __value) |
3462 | { return {__k, __value}; } |
3463 | |
3464 | template <typename _Tp> |
3465 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp> |
3466 | where(_ExactBool __k, const _Tp& __value) |
3467 | { return {__k, __value}; } |
3468 | |
3469 | template <typename _Tp, typename _Ap> |
3470 | _GLIBCXX_SIMD_CONSTEXPR void |
3471 | where(bool __k, simd<_Tp, _Ap>& __value) = delete; |
3472 | |
3473 | template <typename _Tp, typename _Ap> |
3474 | _GLIBCXX_SIMD_CONSTEXPR void |
3475 | where(bool __k, const simd<_Tp, _Ap>& __value) = delete; |
3476 | |
3477 | // proposed mask iterations {{{1 |
3478 | namespace __proposed { |
3479 | template <size_t _Np> |
3480 | class where_range |
3481 | { |
3482 | const bitset<_Np> __bits; |
3483 | |
3484 | public: |
3485 | where_range(bitset<_Np> __b) : __bits(__b) {} |
3486 | |
3487 | class iterator |
3488 | { |
3489 | size_t __mask; |
3490 | size_t __bit; |
3491 | |
3492 | _GLIBCXX_SIMD_INTRINSIC void |
3493 | __next_bit() |
3494 | { __bit = __builtin_ctzl(__mask); } |
3495 | |
3496 | _GLIBCXX_SIMD_INTRINSIC void |
3497 | __reset_lsb() |
3498 | { |
3499 | // 01100100 - 1 = 01100011 |
3500 | __mask &= (__mask - 1); |
3501 | // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit)); |
3502 | } |
3503 | |
3504 | public: |
3505 | iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); } |
3506 | iterator(const iterator&) = default; |
3507 | iterator(iterator&&) = default; |
3508 | |
3509 | _GLIBCXX_SIMD_ALWAYS_INLINE size_t |
3510 | operator->() const |
3511 | { return __bit; } |
3512 | |
3513 | _GLIBCXX_SIMD_ALWAYS_INLINE size_t |
3514 | operator*() const |
3515 | { return __bit; } |
3516 | |
3517 | _GLIBCXX_SIMD_ALWAYS_INLINE iterator& |
3518 | operator++() |
3519 | { |
3520 | __reset_lsb(); |
3521 | __next_bit(); |
3522 | return *this; |
3523 | } |
3524 | |
3525 | _GLIBCXX_SIMD_ALWAYS_INLINE iterator |
3526 | operator++(int) |
3527 | { |
3528 | iterator __tmp = *this; |
3529 | __reset_lsb(); |
3530 | __next_bit(); |
3531 | return __tmp; |
3532 | } |
3533 | |
3534 | _GLIBCXX_SIMD_ALWAYS_INLINE bool |
3535 | operator==(const iterator& __rhs) const |
3536 | { return __mask == __rhs.__mask; } |
3537 | |
3538 | _GLIBCXX_SIMD_ALWAYS_INLINE bool |
3539 | operator!=(const iterator& __rhs) const |
3540 | { return __mask != __rhs.__mask; } |
3541 | }; |
3542 | |
3543 | iterator |
3544 | begin() const |
3545 | { return __bits.to_ullong(); } |
3546 | |
3547 | iterator |
3548 | end() const |
3549 | { return 0; } |
3550 | }; |
3551 | |
3552 | template <typename _Tp, typename _Ap> |
3553 | where_range<simd_size_v<_Tp, _Ap>> |
3554 | where(const simd_mask<_Tp, _Ap>& __k) |
3555 | { return __k.__to_bitset(); } |
3556 | |
3557 | } // namespace __proposed |
3558 | |
3559 | // }}}1 |
3560 | // reductions [simd.reductions] {{{1 |
3561 | template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>> |
3562 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp |
3563 | reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation()) |
3564 | { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); } |
3565 | |
3566 | template <typename _M, typename _V, typename _BinaryOperation = plus<>> |
3567 | _GLIBCXX_SIMD_INTRINSIC typename _V::value_type |
3568 | reduce(const const_where_expression<_M, _V>& __x, |
3569 | typename _V::value_type __identity_element, _BinaryOperation __binary_op) |
3570 | { |
3571 | if (__builtin_expect(none_of(__get_mask(__x)), false)) |
3572 | return __identity_element; |
3573 | |
3574 | _V __tmp = __identity_element; |
3575 | _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), |
3576 | __data(__get_lvalue(__x))); |
3577 | return reduce(__tmp, __binary_op); |
3578 | } |
3579 | |
3580 | template <typename _M, typename _V> |
3581 | _GLIBCXX_SIMD_INTRINSIC typename _V::value_type |
3582 | reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {}) |
3583 | { return reduce(__x, 0, __binary_op); } |
3584 | |
3585 | template <typename _M, typename _V> |
3586 | _GLIBCXX_SIMD_INTRINSIC typename _V::value_type |
3587 | reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op) |
3588 | { return reduce(__x, 1, __binary_op); } |
3589 | |
3590 | template <typename _M, typename _V> |
3591 | _GLIBCXX_SIMD_INTRINSIC typename _V::value_type |
3592 | reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op) |
3593 | { return reduce(__x, ~typename _V::value_type(), __binary_op); } |
3594 | |
3595 | template <typename _M, typename _V> |
3596 | _GLIBCXX_SIMD_INTRINSIC typename _V::value_type |
3597 | reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op) |
3598 | { return reduce(__x, 0, __binary_op); } |
3599 | |
3600 | template <typename _M, typename _V> |
3601 | _GLIBCXX_SIMD_INTRINSIC typename _V::value_type |
3602 | reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op) |
3603 | { return reduce(__x, 0, __binary_op); } |
3604 | |
3605 | template <typename _Tp, typename _Abi> |
3606 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp |
3607 | hmin(const simd<_Tp, _Abi>& __v) noexcept |
3608 | { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); } |
3609 | |
3610 | template <typename _Tp, typename _Abi> |
3611 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp |
3612 | hmax(const simd<_Tp, _Abi>& __v) noexcept |
3613 | { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); } |
3614 | |
3615 | template <typename _M, typename _V> |
3616 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3617 | typename _V::value_type |
3618 | hmin(const const_where_expression<_M, _V>& __x) noexcept |
3619 | { |
3620 | using _Tp = typename _V::value_type; |
3621 | constexpr _Tp __id_elem = |
3622 | #ifdef __FINITE_MATH_ONLY__ |
3623 | __finite_max_v<_Tp>; |
3624 | #else |
3625 | __value_or<__infinity, _Tp>(__finite_max_v<_Tp>); |
3626 | #endif |
3627 | _V __tmp = __id_elem; |
3628 | _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), |
3629 | __data(__get_lvalue(__x))); |
3630 | return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum()); |
3631 | } |
3632 | |
3633 | template <typename _M, typename _V> |
3634 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3635 | typename _V::value_type |
3636 | hmax(const const_where_expression<_M, _V>& __x) noexcept |
3637 | { |
3638 | using _Tp = typename _V::value_type; |
3639 | constexpr _Tp __id_elem = |
3640 | #ifdef __FINITE_MATH_ONLY__ |
3641 | __finite_min_v<_Tp>; |
3642 | #else |
3643 | [] { |
3644 | if constexpr (__value_exists_v<__infinity, _Tp>) |
3645 | return -__infinity_v<_Tp>; |
3646 | else |
3647 | return __finite_min_v<_Tp>; |
3648 | }(); |
3649 | #endif |
3650 | _V __tmp = __id_elem; |
3651 | _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), |
3652 | __data(__get_lvalue(__x))); |
3653 | return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum()); |
3654 | } |
3655 | |
3656 | // }}}1 |
3657 | // algorithms [simd.alg] {{{ |
3658 | template <typename _Tp, typename _Ap> |
3659 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> |
3660 | min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) |
3661 | { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; } |
3662 | |
3663 | template <typename _Tp, typename _Ap> |
3664 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> |
3665 | max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) |
3666 | { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; } |
3667 | |
3668 | template <typename _Tp, typename _Ap> |
3669 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
3670 | pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>> |
3671 | minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) |
3672 | { |
3673 | const auto pair_of_members |
3674 | = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b)); |
3675 | return {simd<_Tp, _Ap>(__private_init, pair_of_members.first), |
3676 | simd<_Tp, _Ap>(__private_init, pair_of_members.second)}; |
3677 | } |
3678 | |
3679 | template <typename _Tp, typename _Ap> |
3680 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> |
3681 | clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi) |
3682 | { |
3683 | using _Impl = typename _Ap::_SimdImpl; |
3684 | return {__private_init, |
3685 | _Impl::_S_min(__data(__hi), |
3686 | _Impl::_S_max(__data(__lo), __data(__v)))}; |
3687 | } |
3688 | |
3689 | // }}} |
3690 | |
3691 | template <size_t... _Sizes, typename _Tp, typename _Ap, |
3692 | typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>> |
3693 | inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...> |
3694 | split(const simd<_Tp, _Ap>&); |
3695 | |
3696 | // __extract_part {{{ |
3697 | template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np> |
3698 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr |
3699 | _SimdWrapper<_Tp, _Np / _Total * _Combine> |
3700 | (const _SimdWrapper<_Tp, _Np> __x); |
3701 | |
3702 | template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As> |
3703 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
3704 | (const _SimdTuple<_Tp, _A0, _As...>& __x); |
3705 | |
3706 | // }}} |
3707 | // _SizeList {{{ |
3708 | template <size_t _V0, size_t... _Values> |
3709 | struct _SizeList |
3710 | { |
3711 | template <size_t _I> |
3712 | static constexpr size_t |
3713 | _S_at(_SizeConstant<_I> = {}) |
3714 | { |
3715 | if constexpr (_I == 0) |
3716 | return _V0; |
3717 | else |
3718 | return _SizeList<_Values...>::template _S_at<_I - 1>(); |
3719 | } |
3720 | |
3721 | template <size_t _I> |
3722 | static constexpr auto |
3723 | _S_before(_SizeConstant<_I> = {}) |
3724 | { |
3725 | if constexpr (_I == 0) |
3726 | return _SizeConstant<0>(); |
3727 | else |
3728 | return _SizeConstant< |
3729 | _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>(); |
3730 | } |
3731 | |
3732 | template <size_t _Np> |
3733 | static constexpr auto |
3734 | _S_pop_front(_SizeConstant<_Np> = {}) |
3735 | { |
3736 | if constexpr (_Np == 0) |
3737 | return _SizeList(); |
3738 | else |
3739 | return _SizeList<_Values...>::template _S_pop_front<_Np - 1>(); |
3740 | } |
3741 | }; |
3742 | |
3743 | // }}} |
3744 | // __extract_center {{{ |
3745 | template <typename _Tp, size_t _Np> |
3746 | _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2> |
3747 | (_SimdWrapper<_Tp, _Np> __x) |
3748 | { |
3749 | static_assert(_Np >= 4); |
3750 | static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2} |
3751 | #if _GLIBCXX_SIMD_X86INTRIN // {{{ |
3752 | if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64) |
3753 | { |
3754 | const auto __intrin = __to_intrin(__x); |
3755 | if constexpr (is_integral_v<_Tp>) |
3756 | return __vector_bitcast<_Tp>(_mm512_castsi512_si256( |
3757 | _mm512_shuffle_i32x4(__intrin, __intrin, |
3758 | 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); |
3759 | else if constexpr (sizeof(_Tp) == 4) |
3760 | return __vector_bitcast<_Tp>(_mm512_castps512_ps256( |
3761 | _mm512_shuffle_f32x4(__intrin, __intrin, |
3762 | 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); |
3763 | else if constexpr (sizeof(_Tp) == 8) |
3764 | return __vector_bitcast<_Tp>(_mm512_castpd512_pd256( |
3765 | _mm512_shuffle_f64x2(__intrin, __intrin, |
3766 | 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); |
3767 | else |
3768 | __assert_unreachable<_Tp>(); |
3769 | } |
3770 | else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>) |
3771 | return __vector_bitcast<_Tp>( |
3772 | _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)), |
3773 | __hi128(__vector_bitcast<double>(__x)), 1)); |
3774 | else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32) |
3775 | return __vector_bitcast<_Tp>( |
3776 | _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)), |
3777 | __lo128(__vector_bitcast<_LLong>(__x)), |
3778 | sizeof(_Tp) * _Np / 4)); |
3779 | else |
3780 | #endif // _GLIBCXX_SIMD_X86INTRIN }}} |
3781 | { |
3782 | __vector_type_t<_Tp, _Np / 2> __r; |
3783 | __builtin_memcpy(&__r, |
3784 | reinterpret_cast<const char*>(&__x) |
3785 | + sizeof(_Tp) * _Np / 4, |
3786 | sizeof(_Tp) * _Np / 2); |
3787 | return __r; |
3788 | } |
3789 | } |
3790 | |
3791 | template <typename _Tp, typename _A0, typename... _As> |
3792 | _GLIBCXX_SIMD_INTRINSIC |
3793 | _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2> |
3794 | (const _SimdTuple<_Tp, _A0, _As...>& __x) |
3795 | { |
3796 | if constexpr (sizeof...(_As) == 0) |
3797 | return __extract_center(__x.first); |
3798 | else |
3799 | return __extract_part<1, 4, 2>(__x); |
3800 | } |
3801 | |
3802 | // }}} |
3803 | // __split_wrapper {{{ |
3804 | template <size_t... _Sizes, typename _Tp, typename... _As> |
3805 | auto |
3806 | __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x) |
3807 | { |
3808 | return split<_Sizes...>( |
3809 | fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init, |
3810 | __x)); |
3811 | } |
3812 | |
3813 | // }}} |
3814 | |
3815 | // split<simd>(simd) {{{ |
3816 | template <typename _V, typename _Ap, |
3817 | size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()> |
3818 | enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size() |
3819 | && is_simd_v<_V>, array<_V, _Parts>> |
3820 | split(const simd<typename _V::value_type, _Ap>& __x) |
3821 | { |
3822 | using _Tp = typename _V::value_type; |
3823 | if constexpr (_Parts == 1) |
3824 | { |
3825 | return {simd_cast<_V>(__x)}; |
3826 | } |
3827 | else if (__x._M_is_constprop()) |
3828 | { |
3829 | return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( |
3830 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3831 | return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
3832 | { return __x[__i * _V::size() + __j]; }); |
3833 | }); |
3834 | } |
3835 | else if constexpr ( |
3836 | __is_fixed_size_abi_v<_Ap> |
3837 | && (is_same_v<typename _V::abi_type, simd_abi::scalar> |
3838 | || (__is_fixed_size_abi_v<typename _V::abi_type> |
3839 | && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding |
3840 | ))) |
3841 | { |
3842 | // fixed_size -> fixed_size (w/o padding) or scalar |
3843 | #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS |
3844 | const __may_alias<_Tp>* const __element_ptr |
3845 | = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x)); |
3846 | return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( |
3847 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
3848 | { return _V(__element_ptr + __i * _V::size(), vector_aligned); }); |
3849 | #else |
3850 | const auto& __xx = __data(__x); |
3851 | return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( |
3852 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3853 | [[maybe_unused]] constexpr size_t __offset |
3854 | = decltype(__i)::value * _V::size(); |
3855 | return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3856 | constexpr _SizeConstant<__j + __offset> __k; |
3857 | return __xx[__k]; |
3858 | }); |
3859 | }); |
3860 | #endif |
3861 | } |
3862 | else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>) |
3863 | { |
3864 | // normally memcpy should work here as well |
3865 | return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( |
3866 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); |
3867 | } |
3868 | else |
3869 | { |
3870 | return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( |
3871 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3872 | if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>) |
3873 | return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3874 | return __x[__i * _V::size() + __j]; |
3875 | }); |
3876 | else |
3877 | return _V(__private_init, |
3878 | __extract_part<decltype(__i)::value, _Parts>(__data(__x))); |
3879 | }); |
3880 | } |
3881 | } |
3882 | |
3883 | // }}} |
3884 | // split<simd_mask>(simd_mask) {{{ |
3885 | template <typename _V, typename _Ap, |
3886 | size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()> |
3887 | enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename |
3888 | _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>> |
3889 | split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x) |
3890 | { |
3891 | if constexpr (is_same_v<_Ap, typename _V::abi_type>) |
3892 | return {__x}; |
3893 | else if constexpr (_Parts == 1) |
3894 | return {__proposed::static_simd_cast<_V>(__x)}; |
3895 | else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>() |
3896 | && __is_avx_abi<_Ap>()) |
3897 | return {_V(__private_init, __lo128(__data(__x))), |
3898 | _V(__private_init, __hi128(__data(__x)))}; |
3899 | else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong)) |
3900 | { |
3901 | const bitset __bits = __x.__to_bitset(); |
3902 | return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( |
3903 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3904 | constexpr size_t __offset = __i * _V::size(); |
3905 | return _V(__bitset_init, (__bits >> __offset).to_ullong()); |
3906 | }); |
3907 | } |
3908 | else |
3909 | { |
3910 | return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( |
3911 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3912 | constexpr size_t __offset = __i * _V::size(); |
3913 | return _V(__private_init, |
3914 | [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3915 | return __x[__j + __offset]; |
3916 | }); |
3917 | }); |
3918 | } |
3919 | } |
3920 | |
3921 | // }}} |
3922 | // split<_Sizes...>(simd) {{{ |
3923 | template <size_t... _Sizes, typename _Tp, typename _Ap, typename> |
3924 | _GLIBCXX_SIMD_ALWAYS_INLINE |
3925 | tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...> |
3926 | split(const simd<_Tp, _Ap>& __x) |
3927 | { |
3928 | using _SL = _SizeList<_Sizes...>; |
3929 | using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>; |
3930 | constexpr size_t _Np = simd_size_v<_Tp, _Ap>; |
3931 | constexpr size_t _N0 = _SL::template _S_at<0>(); |
3932 | using _V = __deduced_simd<_Tp, _N0>; |
3933 | |
3934 | if (__x._M_is_constprop()) |
3935 | return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>( |
3936 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3937 | using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; |
3938 | constexpr size_t __offset = _SL::_S_before(__i); |
3939 | return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
3940 | return __x[__offset + __j]; |
3941 | }); |
3942 | }); |
3943 | else if constexpr (_Np == _N0) |
3944 | { |
3945 | static_assert(sizeof...(_Sizes) == 1); |
3946 | return {simd_cast<_V>(__x)}; |
3947 | } |
3948 | else if constexpr // split from fixed_size, such that __x::first.size == _N0 |
3949 | (__is_fixed_size_abi_v< |
3950 | _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0) |
3951 | { |
3952 | static_assert( |
3953 | !__is_fixed_size_abi_v<typename _V::abi_type>, |
3954 | "How can <_Tp, _Np> be __a single _SimdTuple entry but __a " |
3955 | "fixed_size_simd " |
3956 | "when deduced?" ); |
3957 | // extract first and recurse (__split_wrapper is needed to deduce a new |
3958 | // _Sizes pack) |
3959 | return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)), |
3960 | __split_wrapper(_SL::template _S_pop_front<1>(), |
3961 | __data(__x).second)); |
3962 | } |
3963 | else if constexpr ((!is_same_v<simd_abi::scalar, |
3964 | simd_abi::deduce_t<_Tp, _Sizes>> && ...) |
3965 | && (!__is_fixed_size_abi_v< |
3966 | simd_abi::deduce_t<_Tp, _Sizes>> && ...)) |
3967 | { |
3968 | if constexpr (((_Sizes * 2 == _Np) && ...)) |
3969 | return {{__private_init, __extract_part<0, 2>(__data(__x))}, |
3970 | {__private_init, __extract_part<1, 2>(__data(__x))}}; |
3971 | else if constexpr (is_same_v<_SizeList<_Sizes...>, |
3972 | _SizeList<_Np / 3, _Np / 3, _Np / 3>>) |
3973 | return {{__private_init, __extract_part<0, 3>(__data(__x))}, |
3974 | {__private_init, __extract_part<1, 3>(__data(__x))}, |
3975 | {__private_init, __extract_part<2, 3>(__data(__x))}}; |
3976 | else if constexpr (is_same_v<_SizeList<_Sizes...>, |
3977 | _SizeList<2 * _Np / 3, _Np / 3>>) |
3978 | return {{__private_init, __extract_part<0, 3, 2>(__data(__x))}, |
3979 | {__private_init, __extract_part<2, 3>(__data(__x))}}; |
3980 | else if constexpr (is_same_v<_SizeList<_Sizes...>, |
3981 | _SizeList<_Np / 3, 2 * _Np / 3>>) |
3982 | return {{__private_init, __extract_part<0, 3>(__data(__x))}, |
3983 | {__private_init, __extract_part<1, 3, 2>(__data(__x))}}; |
3984 | else if constexpr (is_same_v<_SizeList<_Sizes...>, |
3985 | _SizeList<_Np / 2, _Np / 4, _Np / 4>>) |
3986 | return {{__private_init, __extract_part<0, 2>(__data(__x))}, |
3987 | {__private_init, __extract_part<2, 4>(__data(__x))}, |
3988 | {__private_init, __extract_part<3, 4>(__data(__x))}}; |
3989 | else if constexpr (is_same_v<_SizeList<_Sizes...>, |
3990 | _SizeList<_Np / 4, _Np / 4, _Np / 2>>) |
3991 | return {{__private_init, __extract_part<0, 4>(__data(__x))}, |
3992 | {__private_init, __extract_part<1, 4>(__data(__x))}, |
3993 | {__private_init, __extract_part<1, 2>(__data(__x))}}; |
3994 | else if constexpr (is_same_v<_SizeList<_Sizes...>, |
3995 | _SizeList<_Np / 4, _Np / 2, _Np / 4>>) |
3996 | return {{__private_init, __extract_part<0, 4>(__data(__x))}, |
3997 | {__private_init, __extract_center(__data(__x))}, |
3998 | {__private_init, __extract_part<3, 4>(__data(__x))}}; |
3999 | else if constexpr (((_Sizes * 4 == _Np) && ...)) |
4000 | return {{__private_init, __extract_part<0, 4>(__data(__x))}, |
4001 | {__private_init, __extract_part<1, 4>(__data(__x))}, |
4002 | {__private_init, __extract_part<2, 4>(__data(__x))}, |
4003 | {__private_init, __extract_part<3, 4>(__data(__x))}}; |
4004 | // else fall through |
4005 | } |
4006 | #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS |
4007 | const __may_alias<_Tp>* const __element_ptr |
4008 | = reinterpret_cast<const __may_alias<_Tp>*>(&__x); |
4009 | return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>( |
4010 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4011 | using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; |
4012 | constexpr size_t __offset = _SL::_S_before(__i); |
4013 | constexpr size_t __base_align = alignof(simd<_Tp, _Ap>); |
4014 | constexpr size_t __a |
4015 | = __base_align - ((__offset * sizeof(_Tp)) % __base_align); |
4016 | constexpr size_t __b = ((__a - 1) & __a) ^ __a; |
4017 | constexpr size_t __alignment = __b == 0 ? __a : __b; |
4018 | return _Vi(__element_ptr + __offset, overaligned<__alignment>); |
4019 | }); |
4020 | #else |
4021 | return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>( |
4022 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4023 | using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; |
4024 | const auto& __xx = __data(__x); |
4025 | using _Offset = decltype(_SL::_S_before(__i)); |
4026 | return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4027 | constexpr _SizeConstant<_Offset::value + __j> __k; |
4028 | return __xx[__k]; |
4029 | }); |
4030 | }); |
4031 | #endif |
4032 | } |
4033 | |
4034 | // }}} |
4035 | |
4036 | // __subscript_in_pack {{{ |
4037 | template <size_t _I, typename _Tp, typename _Ap, typename... _As> |
4038 | _GLIBCXX_SIMD_INTRINSIC constexpr _Tp |
4039 | __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs) |
4040 | { |
4041 | if constexpr (_I < simd_size_v<_Tp, _Ap>) |
4042 | return __x[_I]; |
4043 | else |
4044 | return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...); |
4045 | } |
4046 | |
4047 | // }}} |
4048 | // __store_pack_of_simd {{{ |
4049 | template <typename _Tp, typename _A0, typename... _As> |
4050 | _GLIBCXX_SIMD_INTRINSIC void |
4051 | __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs) |
4052 | { |
4053 | constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>; |
4054 | __builtin_memcpy(__mem, &__data(__x0), __n_bytes); |
4055 | if constexpr (sizeof...(__xs) > 0) |
4056 | __store_pack_of_simd(__mem + __n_bytes, __xs...); |
4057 | } |
4058 | |
4059 | // }}} |
4060 | // concat(simd...) {{{ |
4061 | template <typename _Tp, typename... _As> |
4062 | inline _GLIBCXX_SIMD_CONSTEXPR |
4063 | simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>> |
4064 | concat(const simd<_Tp, _As>&... __xs) |
4065 | { |
4066 | using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>; |
4067 | if constexpr (sizeof...(__xs) == 1) |
4068 | return simd_cast<_Rp>(__xs...); |
4069 | else if ((... && __xs._M_is_constprop())) |
4070 | return simd<_Tp, |
4071 | simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>( |
4072 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
4073 | { return __subscript_in_pack<__i>(__xs...); }); |
4074 | else |
4075 | { |
4076 | _Rp __r{}; |
4077 | __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...); |
4078 | return __r; |
4079 | } |
4080 | } |
4081 | |
4082 | // }}} |
4083 | // concat(array<simd>) {{{ |
4084 | template <typename _Tp, typename _Abi, size_t _Np> |
4085 | _GLIBCXX_SIMD_ALWAYS_INLINE |
4086 | _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np> |
4087 | concat(const array<simd<_Tp, _Abi>, _Np>& __x) |
4088 | { |
4089 | return __call_with_subscripts<_Np>( |
4090 | __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4091 | return concat(__xs...); |
4092 | }); |
4093 | } |
4094 | |
4095 | // }}} |
4096 | |
4097 | /// @cond undocumented |
4098 | // _SmartReference {{{ |
4099 | template <typename _Up, typename _Accessor = _Up, |
4100 | typename _ValueType = typename _Up::value_type> |
4101 | class _SmartReference |
4102 | { |
4103 | friend _Accessor; |
4104 | int _M_index; |
4105 | _Up& _M_obj; |
4106 | |
4107 | _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType |
4108 | _M_read() const noexcept |
4109 | { |
4110 | if constexpr (is_arithmetic_v<_Up>) |
4111 | return _M_obj; |
4112 | else |
4113 | return _M_obj[_M_index]; |
4114 | } |
4115 | |
4116 | template <typename _Tp> |
4117 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
4118 | _M_write(_Tp&& __x) const |
4119 | { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); } |
4120 | |
4121 | public: |
4122 | _GLIBCXX_SIMD_INTRINSIC constexpr |
4123 | _SmartReference(_Up& __o, int __i) noexcept |
4124 | : _M_index(__i), _M_obj(__o) {} |
4125 | |
4126 | using value_type = _ValueType; |
4127 | |
4128 | _GLIBCXX_SIMD_INTRINSIC |
4129 | _SmartReference(const _SmartReference&) = delete; |
4130 | |
4131 | _GLIBCXX_SIMD_INTRINSIC constexpr |
4132 | operator value_type() const noexcept |
4133 | { return _M_read(); } |
4134 | |
4135 | template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>> |
4136 | _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference |
4137 | operator=(_Tp&& __x) && |
4138 | { |
4139 | _M_write(static_cast<_Tp&&>(__x)); |
4140 | return {_M_obj, _M_index}; |
4141 | } |
4142 | |
4143 | #define _GLIBCXX_SIMD_OP_(__op) \ |
4144 | template <typename _Tp, \ |
4145 | typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \ |
4146 | typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \ |
4147 | typename = _ValuePreservingOrInt<_TT, value_type>> \ |
4148 | _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \ |
4149 | operator __op##=(_Tp&& __x) && \ |
4150 | { \ |
4151 | const value_type& __lhs = _M_read(); \ |
4152 | _M_write(__lhs __op __x); \ |
4153 | return {_M_obj, _M_index}; \ |
4154 | } |
4155 | _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_); |
4156 | _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_); |
4157 | _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_); |
4158 | #undef _GLIBCXX_SIMD_OP_ |
4159 | |
4160 | template <typename _Tp = void, |
4161 | typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())> |
4162 | _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference |
4163 | operator++() && |
4164 | { |
4165 | value_type __x = _M_read(); |
4166 | _M_write(++__x); |
4167 | return {_M_obj, _M_index}; |
4168 | } |
4169 | |
4170 | template <typename _Tp = void, |
4171 | typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)> |
4172 | _GLIBCXX_SIMD_INTRINSIC constexpr value_type |
4173 | operator++(int) && |
4174 | { |
4175 | const value_type __r = _M_read(); |
4176 | value_type __x = __r; |
4177 | _M_write(++__x); |
4178 | return __r; |
4179 | } |
4180 | |
4181 | template <typename _Tp = void, |
4182 | typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())> |
4183 | _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference |
4184 | operator--() && |
4185 | { |
4186 | value_type __x = _M_read(); |
4187 | _M_write(--__x); |
4188 | return {_M_obj, _M_index}; |
4189 | } |
4190 | |
4191 | template <typename _Tp = void, |
4192 | typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)> |
4193 | _GLIBCXX_SIMD_INTRINSIC constexpr value_type |
4194 | operator--(int) && |
4195 | { |
4196 | const value_type __r = _M_read(); |
4197 | value_type __x = __r; |
4198 | _M_write(--__x); |
4199 | return __r; |
4200 | } |
4201 | |
4202 | _GLIBCXX_SIMD_INTRINSIC friend void |
4203 | swap(_SmartReference&& __a, _SmartReference&& __b) noexcept( |
4204 | conjunction< |
4205 | is_nothrow_constructible<value_type, _SmartReference&&>, |
4206 | is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) |
4207 | { |
4208 | value_type __tmp = static_cast<_SmartReference&&>(__a); |
4209 | static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b); |
4210 | static_cast<_SmartReference&&>(__b) = std::move(__tmp); |
4211 | } |
4212 | |
4213 | _GLIBCXX_SIMD_INTRINSIC friend void |
4214 | swap(value_type& __a, _SmartReference&& __b) noexcept( |
4215 | conjunction< |
4216 | is_nothrow_constructible<value_type, value_type&&>, |
4217 | is_nothrow_assignable<value_type&, value_type&&>, |
4218 | is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) |
4219 | { |
4220 | value_type __tmp(std::move(__a)); |
4221 | __a = static_cast<value_type>(__b); |
4222 | static_cast<_SmartReference&&>(__b) = std::move(__tmp); |
4223 | } |
4224 | |
4225 | _GLIBCXX_SIMD_INTRINSIC friend void |
4226 | swap(_SmartReference&& __a, value_type& __b) noexcept( |
4227 | conjunction< |
4228 | is_nothrow_constructible<value_type, _SmartReference&&>, |
4229 | is_nothrow_assignable<value_type&, value_type&&>, |
4230 | is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) |
4231 | { |
4232 | value_type __tmp(__a); |
4233 | static_cast<_SmartReference&&>(__a) = std::move(__b); |
4234 | __b = std::move(__tmp); |
4235 | } |
4236 | }; |
4237 | |
4238 | // }}} |
4239 | // __scalar_abi_wrapper {{{ |
4240 | template <int _Bytes> |
4241 | struct __scalar_abi_wrapper |
4242 | { |
4243 | template <typename _Tp> static constexpr size_t _S_full_size = 1; |
4244 | template <typename _Tp> static constexpr size_t _S_size = 1; |
4245 | template <typename _Tp> static constexpr size_t _S_is_partial = false; |
4246 | |
4247 | template <typename _Tp, typename _Abi = simd_abi::scalar> |
4248 | static constexpr bool _S_is_valid_v |
4249 | = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes; |
4250 | }; |
4251 | |
4252 | // }}} |
4253 | // __decay_abi metafunction {{{ |
4254 | template <typename _Tp> |
4255 | struct __decay_abi { using type = _Tp; }; |
4256 | |
4257 | template <int _Bytes> |
4258 | struct __decay_abi<__scalar_abi_wrapper<_Bytes>> |
4259 | { using type = simd_abi::scalar; }; |
4260 | |
4261 | // }}} |
4262 | // __find_next_valid_abi metafunction {{{1 |
4263 | // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> == |
4264 | // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break |
4265 | // recursion at 2 elements in the resulting ABI tag. In this case |
4266 | // type::_S_is_valid_v<_Tp> may be false. |
4267 | template <template <int> class _Abi, int _Bytes, typename _Tp> |
4268 | struct __find_next_valid_abi |
4269 | { |
4270 | static constexpr auto |
4271 | _S_choose() |
4272 | { |
4273 | constexpr int _NextBytes = std::__bit_ceil(x: _Bytes) / 2; |
4274 | using _NextAbi = _Abi<_NextBytes>; |
4275 | if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion |
4276 | return _Abi<_Bytes>(); |
4277 | else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false |
4278 | && _NextAbi::template _S_is_valid_v<_Tp>) |
4279 | return _NextAbi(); |
4280 | else |
4281 | return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose(); |
4282 | } |
4283 | |
4284 | using type = decltype(_S_choose()); |
4285 | }; |
4286 | |
4287 | template <int _Bytes, typename _Tp> |
4288 | struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp> |
4289 | { using type = simd_abi::scalar; }; |
4290 | |
4291 | // _AbiList {{{1 |
4292 | template <template <int> class...> |
4293 | struct _AbiList |
4294 | { |
4295 | template <typename, int> static constexpr bool _S_has_valid_abi = false; |
4296 | template <typename, int> using _FirstValidAbi = void; |
4297 | template <typename, int> using _BestAbi = void; |
4298 | }; |
4299 | |
4300 | template <template <int> class _A0, template <int> class... _Rest> |
4301 | struct _AbiList<_A0, _Rest...> |
4302 | { |
4303 | template <typename _Tp, int _Np> |
4304 | static constexpr bool _S_has_valid_abi |
4305 | = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v< |
4306 | _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>; |
4307 | |
4308 | template <typename _Tp, int _Np> |
4309 | using _FirstValidAbi = conditional_t< |
4310 | _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>, |
4311 | typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type, |
4312 | typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>; |
4313 | |
4314 | template <typename _Tp, int _Np> |
4315 | static constexpr auto |
4316 | _S_determine_best_abi() |
4317 | { |
4318 | static_assert(_Np >= 1); |
4319 | constexpr int _Bytes = sizeof(_Tp) * _Np; |
4320 | if constexpr (_Np == 1) |
4321 | return __make_dependent_t<_Tp, simd_abi::scalar>{}; |
4322 | else |
4323 | { |
4324 | constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>; |
4325 | // _A0<_Bytes> is good if: |
4326 | // 1. The ABI tag is valid for _Tp |
4327 | // 2. The storage overhead is no more than padding to fill the next |
4328 | // power-of-2 number of bytes |
4329 | if constexpr (_A0<_Bytes>::template _S_is_valid_v< |
4330 | _Tp> && __fullsize / 2 < _Np) |
4331 | return typename __decay_abi<_A0<_Bytes>>::type{}; |
4332 | else |
4333 | { |
4334 | using _Bp = |
4335 | typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type; |
4336 | if constexpr (_Bp::template _S_is_valid_v< |
4337 | _Tp> && _Bp::template _S_size<_Tp> <= _Np) |
4338 | return _Bp{}; |
4339 | else |
4340 | return |
4341 | typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{}; |
4342 | } |
4343 | } |
4344 | } |
4345 | |
4346 | template <typename _Tp, int _Np> |
4347 | using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>()); |
4348 | }; |
4349 | |
4350 | // }}}1 |
4351 | |
4352 | // the following lists all native ABIs, which makes them accessible to |
4353 | // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order |
4354 | // matters: Whatever comes first has higher priority. |
4355 | using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin, |
4356 | __scalar_abi_wrapper>; |
4357 | |
4358 | // valid _SimdTraits specialization {{{1 |
4359 | template <typename _Tp, typename _Abi> |
4360 | struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>> |
4361 | : _Abi::template __traits<_Tp> {}; |
4362 | |
4363 | // __deduce_impl specializations {{{1 |
4364 | // try all native ABIs (including scalar) first |
4365 | template <typename _Tp, size_t _Np> |
4366 | struct __deduce_impl< |
4367 | _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>> |
4368 | { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; }; |
4369 | |
4370 | // fall back to fixed_size only if scalar and native ABIs don't match |
4371 | template <typename _Tp, size_t _Np, typename = void> |
4372 | struct __deduce_fixed_size_fallback {}; |
4373 | |
4374 | template <typename _Tp, size_t _Np> |
4375 | struct __deduce_fixed_size_fallback<_Tp, _Np, |
4376 | enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>> |
4377 | { using type = simd_abi::fixed_size<_Np>; }; |
4378 | |
4379 | template <typename _Tp, size_t _Np, typename> |
4380 | struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {}; |
4381 | |
4382 | //}}}1 |
4383 | /// @endcond |
4384 | |
4385 | // simd_mask {{{ |
4386 | template <typename _Tp, typename _Abi> |
4387 | class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase |
4388 | { |
4389 | // types, tags, and friends {{{ |
4390 | using _Traits = _SimdTraits<_Tp, _Abi>; |
4391 | using _MemberType = typename _Traits::_MaskMember; |
4392 | |
4393 | // We map all masks with equal element sizeof to a single integer type, the |
4394 | // one given by __int_for_sizeof_t<_Tp>. This is the approach |
4395 | // [[gnu::vector_size(N)]] types take as well and it reduces the number of |
4396 | // template specializations in the implementation classes. |
4397 | using _Ip = __int_for_sizeof_t<_Tp>; |
4398 | static constexpr _Ip* _S_type_tag = nullptr; |
4399 | |
4400 | friend typename _Traits::_MaskBase; |
4401 | friend class simd<_Tp, _Abi>; // to construct masks on return |
4402 | friend typename _Traits::_SimdImpl; // to construct masks on return and |
4403 | // inspect data on masked operations |
4404 | public: |
4405 | using _Impl = typename _Traits::_MaskImpl; |
4406 | friend _Impl; |
4407 | |
4408 | // }}} |
4409 | // member types {{{ |
4410 | using value_type = bool; |
4411 | using reference = _SmartReference<_MemberType, _Impl, value_type>; |
4412 | using simd_type = simd<_Tp, _Abi>; |
4413 | using abi_type = _Abi; |
4414 | |
4415 | // }}} |
4416 | static constexpr size_t size() // {{{ |
4417 | { return __size_or_zero_v<_Tp, _Abi>; } |
4418 | |
4419 | // }}} |
4420 | // constructors & assignment {{{ |
4421 | simd_mask() = default; |
4422 | simd_mask(const simd_mask&) = default; |
4423 | simd_mask(simd_mask&&) = default; |
4424 | simd_mask& operator=(const simd_mask&) = default; |
4425 | simd_mask& operator=(simd_mask&&) = default; |
4426 | |
4427 | // }}} |
4428 | // access to internal representation (optional feature) {{{ |
4429 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit |
4430 | simd_mask(typename _Traits::_MaskCastType __init) |
4431 | : _M_data{__init} {} |
4432 | // conversions to internal type is done in _MaskBase |
4433 | |
4434 | // }}} |
4435 | // bitset interface (extension to be proposed) {{{ |
4436 | // TS_FEEDBACK: |
4437 | // Conversion of simd_mask to and from bitset makes it much easier to |
4438 | // interface with other facilities. I suggest adding `static |
4439 | // simd_mask::from_bitset` and `simd_mask::to_bitset`. |
4440 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask |
4441 | __from_bitset(bitset<size()> bs) |
4442 | { return {__bitset_init, bs}; } |
4443 | |
4444 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()> |
4445 | __to_bitset() const |
4446 | { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); } |
4447 | |
4448 | // }}} |
4449 | // explicit broadcast constructor {{{ |
4450 | _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR |
4451 | simd_mask(value_type __x) |
4452 | : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {} |
4453 | |
4454 | // }}} |
4455 | // implicit type conversion constructor {{{ |
4456 | #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST |
4457 | // proposed improvement |
4458 | template <typename _Up, typename _A2, |
4459 | typename = enable_if_t<simd_size_v<_Up, _A2> == size()>> |
4460 | _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType) |
4461 | != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember)) |
4462 | simd_mask(const simd_mask<_Up, _A2>& __x) |
4463 | : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {} |
4464 | #else |
4465 | // conforming to ISO/IEC 19570:2018 |
4466 | template <typename _Up, typename = enable_if_t<conjunction< |
4467 | is_same<abi_type, simd_abi::fixed_size<size()>>, |
4468 | is_same<_Up, _Up>>::value>> |
4469 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR |
4470 | simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x) |
4471 | : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {} |
4472 | #endif |
4473 | |
4474 | // }}} |
4475 | // load constructor {{{ |
4476 | template <typename _Flags> |
4477 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR |
4478 | simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>) |
4479 | : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {} |
4480 | |
4481 | template <typename _Flags> |
4482 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR |
4483 | simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>) |
4484 | : _M_data{} |
4485 | { |
4486 | _M_data = _Impl::_S_masked_load(_M_data, __k._M_data, |
4487 | _Flags::template _S_apply<simd_mask>(__mem)); |
4488 | } |
4489 | |
4490 | // }}} |
4491 | // loads [simd_mask.load] {{{ |
4492 | template <typename _Flags> |
4493 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void |
4494 | copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>) |
4495 | { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); } |
4496 | |
4497 | // }}} |
4498 | // stores [simd_mask.store] {{{ |
4499 | template <typename _Flags> |
4500 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void |
4501 | copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const |
4502 | { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); } |
4503 | |
4504 | // }}} |
4505 | // scalar access {{{ |
4506 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference |
4507 | operator[](size_t __i) |
4508 | { |
4509 | if (__i >= size()) |
4510 | __invoke_ub(msg: "Subscript %d is out of range [0, %d]" , args: __i, args: size() - 1); |
4511 | return {_M_data, int(__i)}; |
4512 | } |
4513 | |
4514 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type |
4515 | operator[](size_t __i) const |
4516 | { |
4517 | if (__i >= size()) |
4518 | __invoke_ub(msg: "Subscript %d is out of range [0, %d]" , args: __i, args: size() - 1); |
4519 | if constexpr (__is_scalar_abi<_Abi>()) |
4520 | return _M_data; |
4521 | else |
4522 | return static_cast<bool>(_M_data[__i]); |
4523 | } |
4524 | |
4525 | // }}} |
4526 | // negation {{{ |
4527 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask |
4528 | operator!() const |
4529 | { return {__private_init, _Impl::_S_bit_not(_M_data)}; } |
4530 | |
4531 | // }}} |
4532 | // simd_mask binary operators [simd_mask.binary] {{{ |
4533 | #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST |
4534 | // simd_mask<int> && simd_mask<uint> needs disambiguation |
4535 | template <typename _Up, typename _A2, |
4536 | typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>> |
4537 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4538 | operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y) |
4539 | { |
4540 | return {__private_init, |
4541 | _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)}; |
4542 | } |
4543 | |
4544 | template <typename _Up, typename _A2, |
4545 | typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>> |
4546 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4547 | operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y) |
4548 | { |
4549 | return {__private_init, |
4550 | _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)}; |
4551 | } |
4552 | #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST |
4553 | |
4554 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4555 | operator&&(const simd_mask& __x, const simd_mask& __y) |
4556 | { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; } |
4557 | |
4558 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4559 | operator||(const simd_mask& __x, const simd_mask& __y) |
4560 | { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; } |
4561 | |
4562 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4563 | operator&(const simd_mask& __x, const simd_mask& __y) |
4564 | { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; } |
4565 | |
4566 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4567 | operator|(const simd_mask& __x, const simd_mask& __y) |
4568 | { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; } |
4569 | |
4570 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4571 | operator^(const simd_mask& __x, const simd_mask& __y) |
4572 | { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; } |
4573 | |
4574 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask& |
4575 | operator&=(simd_mask& __x, const simd_mask& __y) |
4576 | { |
4577 | __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data); |
4578 | return __x; |
4579 | } |
4580 | |
4581 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask& |
4582 | operator|=(simd_mask& __x, const simd_mask& __y) |
4583 | { |
4584 | __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data); |
4585 | return __x; |
4586 | } |
4587 | |
4588 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask& |
4589 | operator^=(simd_mask& __x, const simd_mask& __y) |
4590 | { |
4591 | __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data); |
4592 | return __x; |
4593 | } |
4594 | |
4595 | // }}} |
4596 | // simd_mask compares [simd_mask.comparison] {{{ |
4597 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4598 | operator==(const simd_mask& __x, const simd_mask& __y) |
4599 | { return !operator!=(__x, __y); } |
4600 | |
4601 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4602 | operator!=(const simd_mask& __x, const simd_mask& __y) |
4603 | { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; } |
4604 | |
4605 | // }}} |
4606 | // private_init ctor {{{ |
4607 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
4608 | simd_mask(_PrivateInit, typename _Traits::_MaskMember __init) |
4609 | : _M_data(__init) {} |
4610 | |
4611 | // }}} |
4612 | // private_init generator ctor {{{ |
4613 | template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))> |
4614 | _GLIBCXX_SIMD_INTRINSIC constexpr |
4615 | simd_mask(_PrivateInit, _Fp&& __gen) |
4616 | : _M_data() |
4617 | { |
4618 | __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4619 | _Impl::_S_set(_M_data, __i, __gen(__i)); |
4620 | }); |
4621 | } |
4622 | |
4623 | // }}} |
4624 | // bitset_init ctor {{{ |
4625 | _GLIBCXX_SIMD_INTRINSIC constexpr |
4626 | simd_mask(_BitsetInit, bitset<size()> __init) |
4627 | : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag)) |
4628 | {} |
4629 | |
4630 | // }}} |
4631 | // __cvt {{{ |
4632 | // TS_FEEDBACK: |
4633 | // The conversion operator this implements should be a ctor on simd_mask. |
4634 | // Once you call .__cvt() on a simd_mask it converts conveniently. |
4635 | // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))` |
4636 | struct _CvtProxy |
4637 | { |
4638 | template <typename _Up, typename _A2, |
4639 | typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>> |
4640 | operator simd_mask<_Up, _A2>() && |
4641 | { |
4642 | using namespace std::experimental::__proposed; |
4643 | return static_simd_cast<simd_mask<_Up, _A2>>(_M_data); |
4644 | } |
4645 | |
4646 | const simd_mask<_Tp, _Abi>& _M_data; |
4647 | }; |
4648 | |
4649 | _GLIBCXX_SIMD_INTRINSIC _CvtProxy |
4650 | __cvt() const |
4651 | { return {*this}; } |
4652 | |
4653 | // }}} |
4654 | // operator?: overloads (suggested extension) {{{ |
4655 | #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__ |
4656 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4657 | operator?:(const simd_mask& __k, const simd_mask& __where_true, |
4658 | const simd_mask& __where_false) |
4659 | { |
4660 | auto __ret = __where_false; |
4661 | _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data); |
4662 | return __ret; |
4663 | } |
4664 | |
4665 | template <typename _U1, typename _U2, |
4666 | typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>, |
4667 | typename = enable_if_t<conjunction_v< |
4668 | is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>, |
4669 | is_convertible<simd_mask, typename _Rp::mask_type>>>> |
4670 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp |
4671 | operator?:(const simd_mask& __k, const _U1& __where_true, |
4672 | const _U2& __where_false) |
4673 | { |
4674 | _Rp __ret = __where_false; |
4675 | _Rp::_Impl::_S_masked_assign( |
4676 | __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret), |
4677 | __data(static_cast<_Rp>(__where_true))); |
4678 | return __ret; |
4679 | } |
4680 | |
4681 | #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST |
4682 | template <typename _Kp, typename _Ak, typename _Up, typename _Au, |
4683 | typename = enable_if_t< |
4684 | conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>, |
4685 | is_convertible<simd_mask<_Up, _Au>, simd_mask>>>> |
4686 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask |
4687 | operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true, |
4688 | const simd_mask<_Up, _Au>& __where_false) |
4689 | { |
4690 | simd_mask __ret = __where_false; |
4691 | _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data, |
4692 | __where_true._M_data); |
4693 | return __ret; |
4694 | } |
4695 | #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST |
4696 | #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__ |
4697 | |
4698 | // }}} |
4699 | // _M_is_constprop {{{ |
4700 | _GLIBCXX_SIMD_INTRINSIC constexpr bool |
4701 | _M_is_constprop() const |
4702 | { |
4703 | if constexpr (__is_scalar_abi<_Abi>()) |
4704 | return __builtin_constant_p(_M_data); |
4705 | else |
4706 | return _M_data._M_is_constprop(); |
4707 | } |
4708 | |
4709 | // }}} |
4710 | |
4711 | private: |
4712 | friend const auto& __data<_Tp, abi_type>(const simd_mask&); |
4713 | friend auto& __data<_Tp, abi_type>(simd_mask&); |
4714 | alignas(_Traits::_S_mask_align) _MemberType _M_data; |
4715 | }; |
4716 | |
4717 | // }}} |
4718 | |
4719 | /// @cond undocumented |
4720 | // __data(simd_mask) {{{ |
4721 | template <typename _Tp, typename _Ap> |
4722 | _GLIBCXX_SIMD_INTRINSIC constexpr const auto& |
4723 | __data(const simd_mask<_Tp, _Ap>& __x) |
4724 | { return __x._M_data; } |
4725 | |
4726 | template <typename _Tp, typename _Ap> |
4727 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
4728 | __data(simd_mask<_Tp, _Ap>& __x) |
4729 | { return __x._M_data; } |
4730 | |
4731 | // }}} |
4732 | /// @endcond |
4733 | |
4734 | // simd_mask reductions [simd_mask.reductions] {{{ |
4735 | template <typename _Tp, typename _Abi> |
4736 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool |
4737 | all_of(const simd_mask<_Tp, _Abi>& __k) noexcept |
4738 | { |
4739 | if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) |
4740 | { |
4741 | for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) |
4742 | if (!__k[__i]) |
4743 | return false; |
4744 | return true; |
4745 | } |
4746 | else |
4747 | return _Abi::_MaskImpl::_S_all_of(__k); |
4748 | } |
4749 | |
4750 | template <typename _Tp, typename _Abi> |
4751 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool |
4752 | any_of(const simd_mask<_Tp, _Abi>& __k) noexcept |
4753 | { |
4754 | if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) |
4755 | { |
4756 | for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) |
4757 | if (__k[__i]) |
4758 | return true; |
4759 | return false; |
4760 | } |
4761 | else |
4762 | return _Abi::_MaskImpl::_S_any_of(__k); |
4763 | } |
4764 | |
4765 | template <typename _Tp, typename _Abi> |
4766 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool |
4767 | none_of(const simd_mask<_Tp, _Abi>& __k) noexcept |
4768 | { |
4769 | if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) |
4770 | { |
4771 | for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) |
4772 | if (__k[__i]) |
4773 | return false; |
4774 | return true; |
4775 | } |
4776 | else |
4777 | return _Abi::_MaskImpl::_S_none_of(__k); |
4778 | } |
4779 | |
4780 | template <typename _Tp, typename _Abi> |
4781 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool |
4782 | some_of(const simd_mask<_Tp, _Abi>& __k) noexcept |
4783 | { |
4784 | if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) |
4785 | { |
4786 | for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i) |
4787 | if (__k[__i] != __k[__i - 1]) |
4788 | return true; |
4789 | return false; |
4790 | } |
4791 | else |
4792 | return _Abi::_MaskImpl::_S_some_of(__k); |
4793 | } |
4794 | |
4795 | template <typename _Tp, typename _Abi> |
4796 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int |
4797 | popcount(const simd_mask<_Tp, _Abi>& __k) noexcept |
4798 | { |
4799 | if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) |
4800 | { |
4801 | const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>( |
4802 | __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4803 | return ((__elements != 0) + ...); |
4804 | }); |
4805 | if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r)) |
4806 | return __r; |
4807 | } |
4808 | return _Abi::_MaskImpl::_S_popcount(__k); |
4809 | } |
4810 | |
4811 | template <typename _Tp, typename _Abi> |
4812 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int |
4813 | find_first_set(const simd_mask<_Tp, _Abi>& __k) |
4814 | { |
4815 | if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) |
4816 | { |
4817 | constexpr size_t _Np = simd_size_v<_Tp, _Abi>; |
4818 | const size_t _Idx = __call_with_n_evaluations<_Np>( |
4819 | [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4820 | return std::min({__indexes...}); |
4821 | }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4822 | return __k[__i] ? +__i : _Np; |
4823 | }); |
4824 | if (_Idx >= _Np) |
4825 | __invoke_ub(msg: "find_first_set(empty mask) is UB" ); |
4826 | if (__builtin_constant_p(_Idx)) |
4827 | return _Idx; |
4828 | } |
4829 | return _Abi::_MaskImpl::_S_find_first_set(__k); |
4830 | } |
4831 | |
4832 | template <typename _Tp, typename _Abi> |
4833 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int |
4834 | find_last_set(const simd_mask<_Tp, _Abi>& __k) |
4835 | { |
4836 | if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) |
4837 | { |
4838 | constexpr size_t _Np = simd_size_v<_Tp, _Abi>; |
4839 | const int _Idx = __call_with_n_evaluations<_Np>( |
4840 | [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4841 | return std::max({__indexes...}); |
4842 | }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
4843 | return __k[__i] ? int(__i) : -1; |
4844 | }); |
4845 | if (_Idx < 0) |
4846 | __invoke_ub(msg: "find_first_set(empty mask) is UB" ); |
4847 | if (__builtin_constant_p(_Idx)) |
4848 | return _Idx; |
4849 | } |
4850 | return _Abi::_MaskImpl::_S_find_last_set(__k); |
4851 | } |
4852 | |
4853 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool |
4854 | all_of(_ExactBool __x) noexcept |
4855 | { return __x; } |
4856 | |
4857 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool |
4858 | any_of(_ExactBool __x) noexcept |
4859 | { return __x; } |
4860 | |
4861 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool |
4862 | none_of(_ExactBool __x) noexcept |
4863 | { return !__x; } |
4864 | |
4865 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool |
4866 | some_of(_ExactBool) noexcept |
4867 | { return false; } |
4868 | |
4869 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int |
4870 | popcount(_ExactBool __x) noexcept |
4871 | { return __x; } |
4872 | |
4873 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int |
4874 | find_first_set(_ExactBool) |
4875 | { return 0; } |
4876 | |
4877 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int |
4878 | find_last_set(_ExactBool) |
4879 | { return 0; } |
4880 | |
4881 | // }}} |
4882 | |
4883 | /// @cond undocumented |
4884 | // _SimdIntOperators{{{1 |
4885 | template <typename _V, typename _Impl, bool> |
4886 | class _SimdIntOperators {}; |
4887 | |
4888 | template <typename _V, typename _Impl> |
4889 | class _SimdIntOperators<_V, _Impl, true> |
4890 | { |
4891 | _GLIBCXX_SIMD_INTRINSIC constexpr const _V& |
4892 | __derived() const |
4893 | { return *static_cast<const _V*>(this); } |
4894 | |
4895 | template <typename _Tp> |
4896 | _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V |
4897 | _S_make_derived(_Tp&& __d) |
4898 | { return {__private_init, static_cast<_Tp&&>(__d)}; } |
4899 | |
4900 | public: |
4901 | _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x) |
4902 | { return __lhs = __lhs % __x; } |
4903 | |
4904 | _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x) |
4905 | { return __lhs = __lhs & __x; } |
4906 | |
4907 | _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x) |
4908 | { return __lhs = __lhs | __x; } |
4909 | |
4910 | _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x) |
4911 | { return __lhs = __lhs ^ __x; } |
4912 | |
4913 | _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x) |
4914 | { return __lhs = __lhs << __x; } |
4915 | |
4916 | _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x) |
4917 | { return __lhs = __lhs >> __x; } |
4918 | |
4919 | _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x) |
4920 | { return __lhs = __lhs << __x; } |
4921 | |
4922 | _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x) |
4923 | { return __lhs = __lhs >> __x; } |
4924 | |
4925 | _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y) |
4926 | { |
4927 | return _SimdIntOperators::_S_make_derived( |
4928 | _Impl::_S_modulus(__data(__x), __data(__y))); |
4929 | } |
4930 | |
4931 | _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y) |
4932 | { |
4933 | return _SimdIntOperators::_S_make_derived( |
4934 | _Impl::_S_bit_and(__data(__x), __data(__y))); |
4935 | } |
4936 | |
4937 | _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y) |
4938 | { |
4939 | return _SimdIntOperators::_S_make_derived( |
4940 | _Impl::_S_bit_or(__data(__x), __data(__y))); |
4941 | } |
4942 | |
4943 | _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y) |
4944 | { |
4945 | return _SimdIntOperators::_S_make_derived( |
4946 | _Impl::_S_bit_xor(__data(__x), __data(__y))); |
4947 | } |
4948 | |
4949 | _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y) |
4950 | { |
4951 | return _SimdIntOperators::_S_make_derived( |
4952 | _Impl::_S_bit_shift_left(__data(__x), __data(__y))); |
4953 | } |
4954 | |
4955 | _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y) |
4956 | { |
4957 | return _SimdIntOperators::_S_make_derived( |
4958 | _Impl::_S_bit_shift_right(__data(__x), __data(__y))); |
4959 | } |
4960 | |
4961 | template <typename _VV = _V> |
4962 | _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y) |
4963 | { |
4964 | using _Tp = typename _VV::value_type; |
4965 | if (__y < 0) |
4966 | __invoke_ub(msg: "The behavior is undefined if the right operand of a " |
4967 | "shift operation is negative. [expr.shift]\nA shift by " |
4968 | "%d was requested" , |
4969 | args: __y); |
4970 | if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) |
4971 | __invoke_ub( |
4972 | msg: "The behavior is undefined if the right operand of a " |
4973 | "shift operation is greater than or equal to the width of the " |
4974 | "promoted left operand. [expr.shift]\nA shift by %d was requested" , |
4975 | args: __y); |
4976 | return _SimdIntOperators::_S_make_derived( |
4977 | _Impl::_S_bit_shift_left(__data(__x), __y)); |
4978 | } |
4979 | |
4980 | template <typename _VV = _V> |
4981 | _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y) |
4982 | { |
4983 | using _Tp = typename _VV::value_type; |
4984 | if (__y < 0) |
4985 | __invoke_ub( |
4986 | msg: "The behavior is undefined if the right operand of a shift " |
4987 | "operation is negative. [expr.shift]\nA shift by %d was requested" , |
4988 | args: __y); |
4989 | if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) |
4990 | __invoke_ub( |
4991 | msg: "The behavior is undefined if the right operand of a shift " |
4992 | "operation is greater than or equal to the width of the promoted " |
4993 | "left operand. [expr.shift]\nA shift by %d was requested" , |
4994 | args: __y); |
4995 | return _SimdIntOperators::_S_make_derived( |
4996 | _Impl::_S_bit_shift_right(__data(__x), __y)); |
4997 | } |
4998 | |
4999 | // unary operators (for integral _Tp) |
5000 | _GLIBCXX_SIMD_CONSTEXPR _V operator~() const |
5001 | { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; } |
5002 | }; |
5003 | |
5004 | //}}}1 |
5005 | /// @endcond |
5006 | |
5007 | // simd {{{ |
5008 | template <typename _Tp, typename _Abi> |
5009 | class simd : public _SimdIntOperators< |
5010 | simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl, |
5011 | conjunction<is_integral<_Tp>, |
5012 | typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>, |
5013 | public _SimdTraits<_Tp, _Abi>::_SimdBase |
5014 | { |
5015 | using _Traits = _SimdTraits<_Tp, _Abi>; |
5016 | using _MemberType = typename _Traits::_SimdMember; |
5017 | using _CastType = typename _Traits::_SimdCastType; |
5018 | static constexpr _Tp* _S_type_tag = nullptr; |
5019 | friend typename _Traits::_SimdBase; |
5020 | |
5021 | public: |
5022 | using _Impl = typename _Traits::_SimdImpl; |
5023 | friend _Impl; |
5024 | friend _SimdIntOperators<simd, _Impl, true>; |
5025 | |
5026 | using value_type = _Tp; |
5027 | using reference = _SmartReference<_MemberType, _Impl, value_type>; |
5028 | using mask_type = simd_mask<_Tp, _Abi>; |
5029 | using abi_type = _Abi; |
5030 | |
5031 | static constexpr size_t size() |
5032 | { return __size_or_zero_v<_Tp, _Abi>; } |
5033 | |
5034 | _GLIBCXX_SIMD_CONSTEXPR simd() = default; |
5035 | _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default; |
5036 | _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default; |
5037 | _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default; |
5038 | _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default; |
5039 | |
5040 | // implicit broadcast constructor |
5041 | template <typename _Up, |
5042 | typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>> |
5043 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR |
5044 | simd(_ValuePreservingOrInt<_Up, value_type>&& __x) |
5045 | : _M_data( |
5046 | _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x)))) |
5047 | {} |
5048 | |
5049 | // implicit type conversion constructor (convert from fixed_size to |
5050 | // fixed_size) |
5051 | template <typename _Up> |
5052 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR |
5053 | simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x, |
5054 | enable_if_t< |
5055 | conjunction< |
5056 | is_same<simd_abi::fixed_size<size()>, abi_type>, |
5057 | negation<__is_narrowing_conversion<_Up, value_type>>, |
5058 | __converts_to_higher_integer_rank<_Up, value_type>>::value, |
5059 | void*> = nullptr) |
5060 | : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {} |
5061 | |
5062 | // explicit type conversion constructor |
5063 | #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST |
5064 | template <typename _Up, typename _A2, |
5065 | typename = decltype(static_simd_cast<simd>( |
5066 | declval<const simd<_Up, _A2>&>()))> |
5067 | _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR |
5068 | simd(const simd<_Up, _A2>& __x) |
5069 | : simd(static_simd_cast<simd>(__x)) {} |
5070 | #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST |
5071 | |
5072 | // generator constructor |
5073 | template <typename _Fp> |
5074 | _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR |
5075 | simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()( |
5076 | declval<_SizeConstant<0>&>())), |
5077 | value_type>* = nullptr) |
5078 | : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {} |
5079 | |
5080 | // load constructor |
5081 | template <typename _Up, typename _Flags> |
5082 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR |
5083 | simd(const _Up* __mem, _IsSimdFlagType<_Flags>) |
5084 | : _M_data( |
5085 | _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag)) |
5086 | {} |
5087 | |
5088 | // loads [simd.load] |
5089 | template <typename _Up, typename _Flags> |
5090 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void |
5091 | copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) |
5092 | { |
5093 | _M_data = static_cast<decltype(_M_data)>( |
5094 | _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag)); |
5095 | } |
5096 | |
5097 | // stores [simd.store] |
5098 | template <typename _Up, typename _Flags> |
5099 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void |
5100 | copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const |
5101 | { |
5102 | _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem), |
5103 | _S_type_tag); |
5104 | } |
5105 | |
5106 | // scalar access |
5107 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference |
5108 | operator[](size_t __i) |
5109 | { return {_M_data, int(__i)}; } |
5110 | |
5111 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type |
5112 | operator[]([[maybe_unused]] size_t __i) const |
5113 | { |
5114 | if constexpr (__is_scalar_abi<_Abi>()) |
5115 | { |
5116 | _GLIBCXX_DEBUG_ASSERT(__i == 0); |
5117 | return _M_data; |
5118 | } |
5119 | else |
5120 | return _M_data[__i]; |
5121 | } |
5122 | |
5123 | // increment and decrement: |
5124 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd& |
5125 | operator++() |
5126 | { |
5127 | _Impl::_S_increment(_M_data); |
5128 | return *this; |
5129 | } |
5130 | |
5131 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd |
5132 | operator++(int) |
5133 | { |
5134 | simd __r = *this; |
5135 | _Impl::_S_increment(_M_data); |
5136 | return __r; |
5137 | } |
5138 | |
5139 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd& |
5140 | operator--() |
5141 | { |
5142 | _Impl::_S_decrement(_M_data); |
5143 | return *this; |
5144 | } |
5145 | |
5146 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd |
5147 | operator--(int) |
5148 | { |
5149 | simd __r = *this; |
5150 | _Impl::_S_decrement(_M_data); |
5151 | return __r; |
5152 | } |
5153 | |
5154 | // unary operators (for any _Tp) |
5155 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type |
5156 | operator!() const |
5157 | { return {__private_init, _Impl::_S_negate(_M_data)}; } |
5158 | |
5159 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd |
5160 | operator+() const |
5161 | { return *this; } |
5162 | |
5163 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd |
5164 | operator-() const |
5165 | { return {__private_init, _Impl::_S_unary_minus(_M_data)}; } |
5166 | |
5167 | // access to internal representation (suggested extension) |
5168 | _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR |
5169 | simd(_CastType __init) : _M_data(__init) {} |
5170 | |
5171 | // compound assignment [simd.cassign] |
5172 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& |
5173 | operator+=(simd& __lhs, const simd& __x) |
5174 | { return __lhs = __lhs + __x; } |
5175 | |
5176 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& |
5177 | operator-=(simd& __lhs, const simd& __x) |
5178 | { return __lhs = __lhs - __x; } |
5179 | |
5180 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& |
5181 | operator*=(simd& __lhs, const simd& __x) |
5182 | { return __lhs = __lhs * __x; } |
5183 | |
5184 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& |
5185 | operator/=(simd& __lhs, const simd& __x) |
5186 | { return __lhs = __lhs / __x; } |
5187 | |
5188 | // binary operators [simd.binary] |
5189 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd |
5190 | operator+(const simd& __x, const simd& __y) |
5191 | { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; } |
5192 | |
5193 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd |
5194 | operator-(const simd& __x, const simd& __y) |
5195 | { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; } |
5196 | |
5197 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd |
5198 | operator*(const simd& __x, const simd& __y) |
5199 | { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; } |
5200 | |
5201 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd |
5202 | operator/(const simd& __x, const simd& __y) |
5203 | { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; } |
5204 | |
5205 | // compares [simd.comparison] |
5206 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type |
5207 | operator==(const simd& __x, const simd& __y) |
5208 | { return simd::_S_make_mask(k: _Impl::_S_equal_to(__x._M_data, __y._M_data)); } |
5209 | |
5210 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type |
5211 | operator!=(const simd& __x, const simd& __y) |
5212 | { |
5213 | return simd::_S_make_mask( |
5214 | k: _Impl::_S_not_equal_to(__x._M_data, __y._M_data)); |
5215 | } |
5216 | |
5217 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type |
5218 | operator<(const simd& __x, const simd& __y) |
5219 | { return simd::_S_make_mask(k: _Impl::_S_less(__x._M_data, __y._M_data)); } |
5220 | |
5221 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type |
5222 | operator<=(const simd& __x, const simd& __y) |
5223 | { |
5224 | return simd::_S_make_mask(k: _Impl::_S_less_equal(__x._M_data, __y._M_data)); |
5225 | } |
5226 | |
5227 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type |
5228 | operator>(const simd& __x, const simd& __y) |
5229 | { return simd::_S_make_mask(k: _Impl::_S_less(__y._M_data, __x._M_data)); } |
5230 | |
5231 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type |
5232 | operator>=(const simd& __x, const simd& __y) |
5233 | { |
5234 | return simd::_S_make_mask(k: _Impl::_S_less_equal(__y._M_data, __x._M_data)); |
5235 | } |
5236 | |
5237 | // operator?: overloads (suggested extension) {{{ |
5238 | #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__ |
5239 | _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd |
5240 | operator?:(const mask_type& __k, const simd& __where_true, |
5241 | const simd& __where_false) |
5242 | { |
5243 | auto __ret = __where_false; |
5244 | _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true)); |
5245 | return __ret; |
5246 | } |
5247 | |
5248 | #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__ |
5249 | // }}} |
5250 | |
5251 | // "private" because of the first arguments's namespace |
5252 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR |
5253 | simd(_PrivateInit, const _MemberType& __init) |
5254 | : _M_data(__init) {} |
5255 | |
5256 | // "private" because of the first arguments's namespace |
5257 | _GLIBCXX_SIMD_INTRINSIC |
5258 | simd(_BitsetInit, bitset<size()> __init) : _M_data() |
5259 | { where(mask_type(__bitset_init, __init), *this) = ~*this; } |
5260 | |
5261 | _GLIBCXX_SIMD_INTRINSIC constexpr bool |
5262 | _M_is_constprop() const |
5263 | { |
5264 | if constexpr (__is_scalar_abi<_Abi>()) |
5265 | return __builtin_constant_p(_M_data); |
5266 | else |
5267 | return _M_data._M_is_constprop(); |
5268 | } |
5269 | |
5270 | private: |
5271 | _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type |
5272 | _S_make_mask(typename mask_type::_MemberType __k) |
5273 | { return {__private_init, __k}; } |
5274 | |
5275 | friend const auto& __data<value_type, abi_type>(const simd&); |
5276 | friend auto& __data<value_type, abi_type>(simd&); |
5277 | alignas(_Traits::_S_simd_align) _MemberType _M_data; |
5278 | }; |
5279 | |
5280 | // }}} |
5281 | /// @cond undocumented |
5282 | // __data {{{ |
5283 | template <typename _Tp, typename _Ap> |
5284 | _GLIBCXX_SIMD_INTRINSIC constexpr const auto& |
5285 | __data(const simd<_Tp, _Ap>& __x) |
5286 | { return __x._M_data; } |
5287 | |
5288 | template <typename _Tp, typename _Ap> |
5289 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
5290 | __data(simd<_Tp, _Ap>& __x) |
5291 | { return __x._M_data; } |
5292 | |
5293 | // }}} |
5294 | namespace __float_bitwise_operators { //{{{ |
5295 | template <typename _Tp, typename _Ap> |
5296 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> |
5297 | operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) |
5298 | { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; } |
5299 | |
5300 | template <typename _Tp, typename _Ap> |
5301 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> |
5302 | operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) |
5303 | { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; } |
5304 | |
5305 | template <typename _Tp, typename _Ap> |
5306 | _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> |
5307 | operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) |
5308 | { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; } |
5309 | } // namespace __float_bitwise_operators }}} |
5310 | /// @endcond |
5311 | |
5312 | /// @} |
5313 | _GLIBCXX_SIMD_END_NAMESPACE |
5314 | |
5315 | #endif // __cplusplus >= 201703L |
5316 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H |
5317 | |
5318 | // vim: foldmethod=marker foldmarker={{{,}}} |
5319 | |