1 | // Simd fixed_size ABI specific implementations -*- C++ -*- |
2 | |
3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc. |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free |
6 | // software; you can redistribute it and/or modify it under the |
7 | // terms of the GNU General Public License as published by the |
8 | // Free Software Foundation; either version 3, or (at your option) |
9 | // any later version. |
10 | |
11 | // This library is distributed in the hope that it will be useful, |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | // GNU General Public License for more details. |
15 | |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version |
18 | // 3.1, as published by the Free Software Foundation. |
19 | |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23 | // <http://www.gnu.org/licenses/>. |
24 | |
25 | /* |
26 | * The fixed_size ABI gives the following guarantees: |
27 | * - simd objects are passed via the stack |
28 | * - memory layout of `simd<_Tp, _Np>` is equivalent to `array<_Tp, _Np>` |
29 | * - alignment of `simd<_Tp, _Np>` is `_Np * sizeof(_Tp)` if _Np is __a |
30 | * power-of-2 value, otherwise `std::__bit_ceil(_Np * sizeof(_Tp))` (Note: |
31 | * if the alignment were to exceed the system/compiler maximum, it is bounded |
32 | * to that maximum) |
33 | * - simd_mask objects are passed like bitset<_Np> |
34 | * - memory layout of `simd_mask<_Tp, _Np>` is equivalent to `bitset<_Np>` |
35 | * - alignment of `simd_mask<_Tp, _Np>` is equal to the alignment of |
36 | * `bitset<_Np>` |
37 | */ |
38 | |
39 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ |
40 | #define _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ |
41 | |
42 | #if __cplusplus >= 201703L |
43 | |
44 | #include <array> |
45 | |
46 | _GLIBCXX_SIMD_BEGIN_NAMESPACE |
47 | |
48 | // __simd_tuple_element {{{ |
49 | template <size_t _I, typename _Tp> |
50 | struct __simd_tuple_element; |
51 | |
52 | template <typename _Tp, typename _A0, typename... _As> |
53 | struct __simd_tuple_element<0, _SimdTuple<_Tp, _A0, _As...>> |
54 | { using type = simd<_Tp, _A0>; }; |
55 | |
56 | template <size_t _I, typename _Tp, typename _A0, typename... _As> |
57 | struct __simd_tuple_element<_I, _SimdTuple<_Tp, _A0, _As...>> |
58 | { using type = typename __simd_tuple_element<_I - 1, _SimdTuple<_Tp, _As...>>::type; }; |
59 | |
60 | template <size_t _I, typename _Tp> |
61 | using __simd_tuple_element_t = typename __simd_tuple_element<_I, _Tp>::type; |
62 | |
63 | // }}} |
64 | // __simd_tuple_concat {{{ |
65 | |
66 | template <typename _Tp, typename... _A0s, typename... _A1s> |
67 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0s..., _A1s...> |
68 | __simd_tuple_concat(const _SimdTuple<_Tp, _A0s...>& __left, |
69 | const _SimdTuple<_Tp, _A1s...>& __right) |
70 | { |
71 | if constexpr (sizeof...(_A0s) == 0) |
72 | return __right; |
73 | else if constexpr (sizeof...(_A1s) == 0) |
74 | return __left; |
75 | else |
76 | return {__left.first, __simd_tuple_concat(__left.second, __right)}; |
77 | } |
78 | |
79 | template <typename _Tp, typename _A10, typename... _A1s> |
80 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, simd_abi::scalar, _A10, _A1s...> |
81 | __simd_tuple_concat(const _Tp& __left, const _SimdTuple<_Tp, _A10, _A1s...>& __right) |
82 | { return {__left, __right}; } |
83 | |
84 | // }}} |
85 | // __simd_tuple_pop_front {{{ |
86 | // Returns the next _SimdTuple in __x that has _Np elements less. |
87 | // Precondition: _Np must match the number of elements in __first (recursively) |
88 | template <size_t _Np, typename _Tp> |
89 | _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) |
90 | __simd_tuple_pop_front(_Tp&& __x) |
91 | { |
92 | if constexpr (_Np == 0) |
93 | return static_cast<_Tp&&>(__x); |
94 | else |
95 | { |
96 | using _Up = __remove_cvref_t<_Tp>; |
97 | static_assert(_Np >= _Up::_S_first_size); |
98 | return __simd_tuple_pop_front<_Np - _Up::_S_first_size>(__x.second); |
99 | } |
100 | } |
101 | |
102 | // }}} |
103 | // __get_simd_at<_Np> {{{1 |
104 | struct __as_simd {}; |
105 | |
106 | struct __as_simd_tuple {}; |
107 | |
108 | template <typename _Tp, typename _A0, typename... _Abis> |
109 | _GLIBCXX_SIMD_INTRINSIC constexpr simd<_Tp, _A0> |
110 | __simd_tuple_get_impl(__as_simd, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) |
111 | { return {__private_init, __t.first}; } |
112 | |
113 | template <typename _Tp, typename _A0, typename... _Abis> |
114 | _GLIBCXX_SIMD_INTRINSIC constexpr const auto& |
115 | __simd_tuple_get_impl(__as_simd_tuple, const _SimdTuple<_Tp, _A0, _Abis...>& __t, |
116 | _SizeConstant<0>) |
117 | { return __t.first; } |
118 | |
119 | template <typename _Tp, typename _A0, typename... _Abis> |
120 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
121 | __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) |
122 | { return __t.first; } |
123 | |
124 | template <typename _R, size_t _Np, typename _Tp, typename... _Abis> |
125 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
126 | __simd_tuple_get_impl(_R, const _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) |
127 | { return __simd_tuple_get_impl(_R(), __t.second, _SizeConstant<_Np - 1>()); } |
128 | |
129 | template <size_t _Np, typename _Tp, typename... _Abis> |
130 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
131 | __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) |
132 | { return __simd_tuple_get_impl(__as_simd_tuple(), __t.second, _SizeConstant<_Np - 1>()); } |
133 | |
134 | template <size_t _Np, typename _Tp, typename... _Abis> |
135 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
136 | __get_simd_at(const _SimdTuple<_Tp, _Abis...>& __t) |
137 | { return __simd_tuple_get_impl(__as_simd(), __t, _SizeConstant<_Np>()); } |
138 | |
139 | // }}} |
140 | // __get_tuple_at<_Np> {{{ |
141 | template <size_t _Np, typename _Tp, typename... _Abis> |
142 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
143 | __get_tuple_at(const _SimdTuple<_Tp, _Abis...>& __t) |
144 | { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } |
145 | |
146 | template <size_t _Np, typename _Tp, typename... _Abis> |
147 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
148 | __get_tuple_at(_SimdTuple<_Tp, _Abis...>& __t) |
149 | { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } |
150 | |
151 | // __tuple_element_meta {{{1 |
152 | template <typename _Tp, typename _Abi, size_t _Offset> |
153 | struct __tuple_element_meta : public _Abi::_SimdImpl |
154 | { |
155 | static_assert(is_same_v<typename _Abi::_SimdImpl::abi_type, |
156 | _Abi>); // this fails e.g. when _SimdImpl is an |
157 | // alias for _SimdImplBuiltin<_DifferentAbi> |
158 | using value_type = _Tp; |
159 | using abi_type = _Abi; |
160 | using _Traits = _SimdTraits<_Tp, _Abi>; |
161 | using _MaskImpl = typename _Abi::_MaskImpl; |
162 | using _MaskMember = typename _Traits::_MaskMember; |
163 | using simd_type = simd<_Tp, _Abi>; |
164 | static constexpr size_t _S_offset = _Offset; |
165 | static constexpr size_t _S_size() { return simd_size<_Tp, _Abi>::value; } |
166 | static constexpr _MaskImpl _S_mask_impl = {}; |
167 | |
168 | template <size_t _Np, bool _Sanitized> |
169 | _GLIBCXX_SIMD_INTRINSIC static constexpr auto |
170 | _S_submask(_BitMask<_Np, _Sanitized> __bits) |
171 | { return __bits.template _M_extract<_Offset, _S_size()>(); } |
172 | |
173 | template <size_t _Np, bool _Sanitized> |
174 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
175 | _S_make_mask(_BitMask<_Np, _Sanitized> __bits) |
176 | { |
177 | return _MaskImpl::template _S_convert<_Tp>( |
178 | __bits.template _M_extract<_Offset, _S_size()>()._M_sanitized()); |
179 | } |
180 | |
181 | _GLIBCXX_SIMD_INTRINSIC static constexpr _ULLong |
182 | _S_mask_to_shifted_ullong(_MaskMember __k) |
183 | { return _MaskImpl::_S_to_bits(__k).to_ullong() << _Offset; } |
184 | }; |
185 | |
186 | template <size_t _Offset, typename _Tp, typename _Abi, typename... _As> |
187 | constexpr |
188 | __tuple_element_meta<_Tp, _Abi, _Offset> |
189 | __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&) |
190 | { return {}; } |
191 | |
192 | // }}}1 |
193 | // _WithOffset wrapper class {{{ |
194 | template <size_t _Offset, typename _Base> |
195 | struct _WithOffset : public _Base |
196 | { |
197 | static inline constexpr size_t _S_offset = _Offset; |
198 | |
199 | _GLIBCXX_SIMD_INTRINSIC char* |
200 | _M_as_charptr() |
201 | { return reinterpret_cast<char*>(this) + _S_offset * sizeof(typename _Base::value_type); } |
202 | |
203 | _GLIBCXX_SIMD_INTRINSIC const char* |
204 | _M_as_charptr() const |
205 | { return reinterpret_cast<const char*>(this) + _S_offset * sizeof(typename _Base::value_type); } |
206 | }; |
207 | |
208 | // make _WithOffset<_WithOffset> ill-formed to use: |
209 | template <size_t _O0, size_t _O1, typename _Base> |
210 | struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {}; |
211 | |
212 | template <size_t _Offset, typename _Tp> |
213 | decltype(auto) |
214 | __add_offset(_Tp& __base) |
215 | { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } |
216 | |
217 | template <size_t _Offset, typename _Tp> |
218 | decltype(auto) |
219 | __add_offset(const _Tp& __base) |
220 | { return static_cast<const _WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } |
221 | |
222 | template <size_t _Offset, size_t _ExistingOffset, typename _Tp> |
223 | decltype(auto) |
224 | __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base) |
225 | { return static_cast<_WithOffset<_Offset + _ExistingOffset, _Tp>&>(static_cast<_Tp&>(__base)); } |
226 | |
227 | template <size_t _Offset, size_t _ExistingOffset, typename _Tp> |
228 | decltype(auto) |
229 | __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base) |
230 | { |
231 | return static_cast<const _WithOffset<_Offset + _ExistingOffset, _Tp>&>( |
232 | static_cast<const _Tp&>(__base)); |
233 | } |
234 | |
235 | template <typename _Tp> |
236 | constexpr inline size_t __offset = 0; |
237 | |
238 | template <size_t _Offset, typename _Tp> |
239 | constexpr inline size_t __offset<_WithOffset<_Offset, _Tp>> |
240 | = _WithOffset<_Offset, _Tp>::_S_offset; |
241 | |
242 | template <typename _Tp> |
243 | constexpr inline size_t __offset<const _Tp> = __offset<_Tp>; |
244 | |
245 | template <typename _Tp> |
246 | constexpr inline size_t __offset<_Tp&> = __offset<_Tp>; |
247 | |
248 | template <typename _Tp> |
249 | constexpr inline size_t __offset<_Tp&&> = __offset<_Tp>; |
250 | |
251 | // }}} |
252 | // _SimdTuple specializations {{{1 |
253 | // empty {{{2 |
254 | template <typename _Tp> |
255 | struct _SimdTuple<_Tp> |
256 | { |
257 | using value_type = _Tp; |
258 | static constexpr size_t _S_tuple_size = 0; |
259 | static constexpr size_t _S_size() { return 0; } |
260 | }; |
261 | |
262 | // _SimdTupleData {{{2 |
263 | template <typename _FirstType, typename _SecondType> |
264 | struct _SimdTupleData |
265 | { |
266 | _FirstType first; |
267 | _SecondType second; |
268 | |
269 | _GLIBCXX_SIMD_INTRINSIC |
270 | constexpr bool |
271 | _M_is_constprop() const |
272 | { |
273 | if constexpr (is_class_v<_FirstType>) |
274 | return first._M_is_constprop() && second._M_is_constprop(); |
275 | else |
276 | return __builtin_constant_p(first) && second._M_is_constprop(); |
277 | } |
278 | }; |
279 | |
280 | template <typename _FirstType, typename _Tp> |
281 | struct _SimdTupleData<_FirstType, _SimdTuple<_Tp>> |
282 | { |
283 | _FirstType first; |
284 | static constexpr _SimdTuple<_Tp> second = {}; |
285 | |
286 | _GLIBCXX_SIMD_INTRINSIC |
287 | constexpr bool |
288 | _M_is_constprop() const |
289 | { |
290 | if constexpr (is_class_v<_FirstType>) |
291 | return first._M_is_constprop(); |
292 | else |
293 | return __builtin_constant_p(first); |
294 | } |
295 | }; |
296 | |
297 | // 1 or more {{{2 |
298 | template <typename _Tp, typename _Abi0, typename... _Abis> |
299 | struct _SimdTuple<_Tp, _Abi0, _Abis...> |
300 | : _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember, |
301 | _SimdTuple<_Tp, _Abis...>> |
302 | { |
303 | static_assert(!__is_fixed_size_abi_v<_Abi0>); |
304 | using value_type = _Tp; |
305 | using _FirstType = typename _SimdTraits<_Tp, _Abi0>::_SimdMember; |
306 | using _FirstAbi = _Abi0; |
307 | using _SecondType = _SimdTuple<_Tp, _Abis...>; |
308 | static constexpr size_t _S_tuple_size = sizeof...(_Abis) + 1; |
309 | |
310 | static constexpr size_t _S_size() |
311 | { return simd_size_v<_Tp, _Abi0> + _SecondType::_S_size(); } |
312 | |
313 | static constexpr size_t _S_first_size = simd_size_v<_Tp, _Abi0>; |
314 | static constexpr bool _S_is_homogeneous = (is_same_v<_Abi0, _Abis> && ...); |
315 | |
316 | using _Base = _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember, |
317 | _SimdTuple<_Tp, _Abis...>>; |
318 | using _Base::first; |
319 | using _Base::second; |
320 | |
321 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple() = default; |
322 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(const _SimdTuple&) = default; |
323 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple& operator=(const _SimdTuple&) |
324 | = default; |
325 | |
326 | template <typename _Up> |
327 | _GLIBCXX_SIMD_INTRINSIC constexpr |
328 | _SimdTuple(_Up&& __x) |
329 | : _Base{static_cast<_Up&&>(__x)} {} |
330 | |
331 | template <typename _Up, typename _Up2> |
332 | _GLIBCXX_SIMD_INTRINSIC constexpr |
333 | _SimdTuple(_Up&& __x, _Up2&& __y) |
334 | : _Base{static_cast<_Up&&>(__x), static_cast<_Up2&&>(__y)} {} |
335 | |
336 | template <typename _Up> |
337 | _GLIBCXX_SIMD_INTRINSIC constexpr |
338 | _SimdTuple(_Up&& __x, _SimdTuple<_Tp>) |
339 | : _Base{static_cast<_Up&&>(__x)} {} |
340 | |
341 | _GLIBCXX_SIMD_INTRINSIC char* |
342 | _M_as_charptr() |
343 | { return reinterpret_cast<char*>(this); } |
344 | |
345 | _GLIBCXX_SIMD_INTRINSIC const char* |
346 | _M_as_charptr() const |
347 | { return reinterpret_cast<const char*>(this); } |
348 | |
349 | template <size_t _Np> |
350 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
351 | _M_at() |
352 | { |
353 | if constexpr (_Np == 0) |
354 | return first; |
355 | else |
356 | return second.template _M_at<_Np - 1>(); |
357 | } |
358 | |
359 | template <size_t _Np> |
360 | _GLIBCXX_SIMD_INTRINSIC constexpr const auto& |
361 | _M_at() const |
362 | { |
363 | if constexpr (_Np == 0) |
364 | return first; |
365 | else |
366 | return second.template _M_at<_Np - 1>(); |
367 | } |
368 | |
369 | template <size_t _Np> |
370 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
371 | _M_simd_at() const |
372 | { |
373 | if constexpr (_Np == 0) |
374 | return simd<_Tp, _Abi0>(__private_init, first); |
375 | else |
376 | return second.template _M_simd_at<_Np - 1>(); |
377 | } |
378 | |
379 | template <size_t _Offset = 0, typename _Fp> |
380 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple |
381 | _S_generate(_Fp&& __gen, _SizeConstant<_Offset> = {}) |
382 | { |
383 | auto&& __first = __gen(__tuple_element_meta<_Tp, _Abi0, _Offset>()); |
384 | if constexpr (_S_tuple_size == 1) |
385 | return {__first}; |
386 | else |
387 | return {__first, |
388 | _SecondType::_S_generate( |
389 | static_cast<_Fp&&>(__gen), |
390 | _SizeConstant<_Offset + simd_size_v<_Tp, _Abi0>>())}; |
391 | } |
392 | |
393 | template <size_t _Offset = 0, typename _Fp, typename... _More> |
394 | _GLIBCXX_SIMD_INTRINSIC _SimdTuple |
395 | _M_apply_wrapped(_Fp&& __fun, const _More&... __more) const |
396 | { |
397 | auto&& __first |
398 | = __fun(__make_meta<_Offset>(*this), first, __more.first...); |
399 | if constexpr (_S_tuple_size == 1) |
400 | return {__first}; |
401 | else |
402 | return { |
403 | __first, |
404 | second.template _M_apply_wrapped<_Offset + simd_size_v<_Tp, _Abi0>>( |
405 | static_cast<_Fp&&>(__fun), __more.second...)}; |
406 | } |
407 | |
408 | template <typename _Tup> |
409 | _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) |
410 | (_Tup&& __tup) const |
411 | { |
412 | using _TupT = typename __remove_cvref_t<_Tup>::value_type; |
413 | if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>) |
414 | return __tup.first; |
415 | else if (__builtin_is_constant_evaluated()) |
416 | return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate( |
417 | [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
418 | return __meta._S_generator( |
419 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
420 | return __tup[__i]; |
421 | }, static_cast<_TupT*>(nullptr)); |
422 | }); |
423 | else |
424 | return [&]() { // not always_inline; allow the compiler to decide |
425 | __fixed_size_storage_t<_TupT, _S_first_size> __r; |
426 | __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(), |
427 | sizeof(__r)); |
428 | return __r; |
429 | }(); |
430 | } |
431 | |
432 | template <typename _Tup> |
433 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
434 | _M_skip_argument(_Tup&& __tup) const |
435 | { |
436 | static_assert(_S_tuple_size > 1); |
437 | using _Up = __remove_cvref_t<_Tup>; |
438 | constexpr size_t __off = __offset<_Up>; |
439 | if constexpr (_S_first_size == _Up::_S_first_size && __off == 0) |
440 | return __tup.second; |
441 | else if constexpr (_S_first_size > _Up::_S_first_size |
442 | && _S_first_size % _Up::_S_first_size == 0 |
443 | && __off == 0) |
444 | return __simd_tuple_pop_front<_S_first_size>(__tup); |
445 | else if constexpr (_S_first_size + __off < _Up::_S_first_size) |
446 | return __add_offset<_S_first_size>(__tup); |
447 | else if constexpr (_S_first_size + __off == _Up::_S_first_size) |
448 | return __tup.second; |
449 | else |
450 | __assert_unreachable<_Tup>(); |
451 | } |
452 | |
453 | template <size_t _Offset, typename... _More> |
454 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
455 | _M_assign_front(const _SimdTuple<_Tp, _Abi0, _More...>& __x) & |
456 | { |
457 | static_assert(_Offset == 0); |
458 | first = __x.first; |
459 | if constexpr (sizeof...(_More) > 0) |
460 | { |
461 | static_assert(sizeof...(_Abis) >= sizeof...(_More)); |
462 | second.template _M_assign_front<0>(__x.second); |
463 | } |
464 | } |
465 | |
466 | template <size_t _Offset> |
467 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
468 | _M_assign_front(const _FirstType& __x) & |
469 | { |
470 | static_assert(_Offset == 0); |
471 | first = __x; |
472 | } |
473 | |
474 | template <size_t _Offset, typename... _As> |
475 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
476 | _M_assign_front(const _SimdTuple<_Tp, _As...>& __x) & |
477 | { |
478 | __builtin_memcpy(_M_as_charptr() + _Offset * sizeof(value_type), |
479 | __x._M_as_charptr(), |
480 | sizeof(_Tp) * _SimdTuple<_Tp, _As...>::_S_size()); |
481 | } |
482 | |
483 | /* |
484 | * Iterate over the first objects in this _SimdTuple and call __fun for each |
485 | * of them. If additional arguments are passed via __more, chunk them into |
486 | * _SimdTuple or __vector_type_t objects of the same number of values. |
487 | */ |
488 | template <typename _Fp, typename... _More> |
489 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple |
490 | _M_apply_per_chunk(_Fp&& __fun, _More&&... __more) const |
491 | { |
492 | if constexpr ((... |
493 | || conjunction_v< |
494 | is_lvalue_reference<_More>, |
495 | negation<is_const<remove_reference_t<_More>>>>) ) |
496 | { |
497 | // need to write back at least one of __more after calling __fun |
498 | auto&& __first = [&](auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
499 | auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, |
500 | __args...); |
501 | [[maybe_unused]] auto&& __ignore_me = {( |
502 | [](auto&& __dst, const auto& __src) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
503 | if constexpr (is_assignable_v<decltype(__dst), |
504 | decltype(__dst)>) |
505 | { |
506 | __dst.template _M_assign_front<__offset<decltype(__dst)>>( |
507 | __src); |
508 | } |
509 | }(static_cast<_More&&>(__more), __args), |
510 | 0)...}; |
511 | return __r; |
512 | }(_M_extract_argument(__more)...); |
513 | if constexpr (_S_tuple_size == 1) |
514 | return {__first}; |
515 | else |
516 | return {__first, |
517 | second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), |
518 | _M_skip_argument(__more)...)}; |
519 | } |
520 | else |
521 | { |
522 | auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, |
523 | _M_extract_argument(__more)...); |
524 | if constexpr (_S_tuple_size == 1) |
525 | return {__first}; |
526 | else |
527 | return {__first, |
528 | second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), |
529 | _M_skip_argument(__more)...)}; |
530 | } |
531 | } |
532 | |
533 | template <typename _R = _Tp, typename _Fp, typename... _More> |
534 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
535 | _M_apply_r(_Fp&& __fun, const _More&... __more) const |
536 | { |
537 | auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, |
538 | __more.first...); |
539 | if constexpr (_S_tuple_size == 1) |
540 | return __first; |
541 | else |
542 | return __simd_tuple_concat<_R>( |
543 | __first, second.template _M_apply_r<_R>(static_cast<_Fp&&>(__fun), |
544 | __more.second...)); |
545 | } |
546 | |
547 | template <typename _Fp, typename... _More> |
548 | _GLIBCXX_SIMD_INTRINSIC constexpr friend _SanitizedBitMask<_S_size()> |
549 | _M_test(const _Fp& __fun, const _SimdTuple& __x, const _More&... __more) |
550 | { |
551 | const _SanitizedBitMask<_S_first_size> __first |
552 | = _Abi0::_MaskImpl::_S_to_bits( |
553 | __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), __x.first, |
554 | __more.first...)); |
555 | if constexpr (_S_tuple_size == 1) |
556 | return __first; |
557 | else |
558 | return _M_test(__fun, __x.second, __more.second...) |
559 | ._M_prepend(__first); |
560 | } |
561 | |
562 | template <typename _Up, _Up _I> |
563 | _GLIBCXX_SIMD_INTRINSIC constexpr _Tp |
564 | operator[](integral_constant<_Up, _I>) const noexcept |
565 | { |
566 | if constexpr (_I < simd_size_v<_Tp, _Abi0>) |
567 | return _M_subscript_read(i: _I); |
568 | else |
569 | return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()]; |
570 | } |
571 | |
572 | constexpr _Tp |
573 | operator[](size_t __i) const noexcept |
574 | { |
575 | if constexpr (_S_tuple_size == 1) |
576 | return _M_subscript_read(__i); |
577 | #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS |
578 | else if (not __builtin_is_constant_evaluated()) |
579 | return reinterpret_cast<const __may_alias<_Tp>*>(this)[__i]; |
580 | #endif |
581 | else if constexpr (__is_scalar_abi<_Abi0>()) |
582 | { |
583 | const _Tp* ptr = &first; |
584 | return ptr[__i]; |
585 | } |
586 | else |
587 | return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i) |
588 | : second[__i - simd_size_v<_Tp, _Abi0>]; |
589 | } |
590 | |
591 | constexpr void |
592 | _M_set(size_t __i, _Tp __val) noexcept |
593 | { |
594 | if constexpr (_S_tuple_size == 1) |
595 | return _M_subscript_write(__i, y: __val); |
596 | #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS |
597 | else if (not __builtin_is_constant_evaluated()) |
598 | reinterpret_cast<__may_alias<_Tp>*>(this)[__i] = __val; |
599 | #endif |
600 | else if (__i < simd_size_v<_Tp, _Abi0>) |
601 | _M_subscript_write(__i, y: __val); |
602 | else |
603 | second._M_set(__i - simd_size_v<_Tp, _Abi0>, __val); |
604 | } |
605 | |
606 | private: |
607 | // _M_subscript_read/_write {{{ |
608 | constexpr _Tp |
609 | _M_subscript_read([[maybe_unused]] size_t __i) const noexcept |
610 | { |
611 | if constexpr (__is_vectorizable_v<_FirstType>) |
612 | return first; |
613 | else |
614 | return first[__i]; |
615 | } |
616 | |
617 | constexpr void |
618 | _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept |
619 | { |
620 | if constexpr (__is_vectorizable_v<_FirstType>) |
621 | first = __y; |
622 | else |
623 | first._M_set(__i, __y); |
624 | } |
625 | |
626 | // }}} |
627 | }; |
628 | |
629 | // __make_simd_tuple {{{1 |
630 | template <typename _Tp, typename _A0> |
631 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> |
632 | __make_simd_tuple(simd<_Tp, _A0> __x0) |
633 | { return {__data(__x0)}; } |
634 | |
635 | template <typename _Tp, typename _A0, typename... _As> |
636 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _As...> |
637 | __make_simd_tuple(const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs) |
638 | { return {__data(__x0), __make_simd_tuple(__xs...)}; } |
639 | |
640 | template <typename _Tp, typename _A0> |
641 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> |
642 | __make_simd_tuple(const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0) |
643 | { return {__arg0}; } |
644 | |
645 | template <typename _Tp, typename _A0, typename _A1, typename... _Abis> |
646 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _A1, _Abis...> |
647 | __make_simd_tuple( |
648 | const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0, |
649 | const typename _SimdTraits<_Tp, _A1>::_SimdMember& __arg1, |
650 | const typename _SimdTraits<_Tp, _Abis>::_SimdMember&... __args) |
651 | { return {__arg0, __make_simd_tuple<_Tp, _A1, _Abis...>(__arg1, __args...)}; } |
652 | |
653 | // __to_simd_tuple {{{1 |
654 | template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX> |
655 | _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> |
656 | __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX); |
657 | |
658 | template <typename _Tp, size_t _Np, |
659 | size_t _Offset = 0, // skip this many elements in __from0 |
660 | typename _R = __fixed_size_storage_t<_Tp, _Np>, typename _V0, |
661 | typename _V0VT = _VectorTraits<_V0>, typename... _VX> |
662 | _GLIBCXX_SIMD_INTRINSIC _R constexpr __to_simd_tuple(const _V0 __from0, const _VX... __fromX) |
663 | { |
664 | static_assert(is_same_v<typename _V0VT::value_type, _Tp>); |
665 | static_assert(_Offset < _V0VT::_S_full_size); |
666 | using _R0 = __vector_type_t<_Tp, _R::_S_first_size>; |
667 | if constexpr (_R::_S_tuple_size == 1) |
668 | { |
669 | if constexpr (_Np == 1) |
670 | return _R{__from0[_Offset]}; |
671 | else if constexpr (_Offset == 0 && _V0VT::_S_full_size >= _Np) |
672 | return _R{__intrin_bitcast<_R0>(__from0)}; |
673 | else if constexpr (_Offset * 2 == _V0VT::_S_full_size |
674 | && _V0VT::_S_full_size / 2 >= _Np) |
675 | return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0))}; |
676 | else if constexpr (_Offset * 4 == _V0VT::_S_full_size |
677 | && _V0VT::_S_full_size / 4 >= _Np) |
678 | return _R{__intrin_bitcast<_R0>(__extract_part<1, 4>(__from0))}; |
679 | else |
680 | __assert_unreachable<_Tp>(); |
681 | } |
682 | else |
683 | { |
684 | if constexpr (1 == _R::_S_first_size) |
685 | { // extract one scalar and recurse |
686 | if constexpr (_Offset + 1 < _V0VT::_S_full_size) |
687 | return _R{__from0[_Offset], |
688 | __to_simd_tuple<_Tp, _Np - 1, _Offset + 1>(__from0, |
689 | __fromX...)}; |
690 | else |
691 | return _R{__from0[_Offset], |
692 | __to_simd_tuple<_Tp, _Np - 1, 0>(__fromX...)}; |
693 | } |
694 | |
695 | // place __from0 into _R::first and recurse for __fromX -> _R::second |
696 | else if constexpr (_V0VT::_S_full_size == _R::_S_first_size |
697 | && _Offset == 0) |
698 | return _R{__from0, |
699 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size>(__fromX...)}; |
700 | |
701 | // place lower part of __from0 into _R::first and recurse with _Offset |
702 | else if constexpr (_V0VT::_S_full_size > _R::_S_first_size |
703 | && _Offset == 0) |
704 | return _R{__intrin_bitcast<_R0>(__from0), |
705 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size, |
706 | _R::_S_first_size>(__from0, __fromX...)}; |
707 | |
708 | // place lower part of second quarter of __from0 into _R::first and |
709 | // recurse with _Offset |
710 | else if constexpr (_Offset * 4 == _V0VT::_S_full_size |
711 | && _V0VT::_S_full_size >= 4 * _R::_S_first_size) |
712 | return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), |
713 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size, |
714 | _Offset + _R::_S_first_size>(__from0, |
715 | __fromX...)}; |
716 | |
717 | // place lower half of high half of __from0 into _R::first and recurse |
718 | // with _Offset |
719 | else if constexpr (_Offset * 2 == _V0VT::_S_full_size |
720 | && _V0VT::_S_full_size >= 4 * _R::_S_first_size) |
721 | return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), |
722 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size, |
723 | _Offset + _R::_S_first_size>(__from0, |
724 | __fromX...)}; |
725 | |
726 | // place high half of __from0 into _R::first and recurse with __fromX |
727 | else if constexpr (_Offset * 2 == _V0VT::_S_full_size |
728 | && _V0VT::_S_full_size / 2 >= _R::_S_first_size) |
729 | return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0)), |
730 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 0>( |
731 | __fromX...)}; |
732 | |
733 | // ill-formed if some unforseen pattern is needed |
734 | else |
735 | __assert_unreachable<_Tp>(); |
736 | } |
737 | } |
738 | |
739 | template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX> |
740 | _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> |
741 | __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX) |
742 | { |
743 | if constexpr (is_same_v<_Tp, _V>) |
744 | { |
745 | static_assert( |
746 | sizeof...(_VX) == 0, |
747 | "An array of scalars must be the last argument to __to_simd_tuple" ); |
748 | return __call_with_subscripts( |
749 | __from, make_index_sequence<_NV>(), |
750 | [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
751 | return __simd_tuple_concat( |
752 | _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>()); |
753 | }); |
754 | } |
755 | else |
756 | return __call_with_subscripts( |
757 | __from, make_index_sequence<_NV>(), |
758 | [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
759 | return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...); |
760 | }); |
761 | } |
762 | |
763 | template <size_t, typename _Tp> |
764 | using __to_tuple_helper = _Tp; |
765 | |
766 | template <typename _Tp, typename _A0, size_t _NOut, size_t _Np, |
767 | size_t... _Indexes> |
768 | _GLIBCXX_SIMD_INTRINSIC __fixed_size_storage_t<_Tp, _NOut> |
769 | __to_simd_tuple_impl(index_sequence<_Indexes...>, |
770 | const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) |
771 | { |
772 | return __make_simd_tuple<_Tp, __to_tuple_helper<_Indexes, _A0>...>( |
773 | __args[_Indexes]...); |
774 | } |
775 | |
776 | template <typename _Tp, typename _A0, size_t _NOut, size_t _Np, |
777 | typename _R = __fixed_size_storage_t<_Tp, _NOut>> |
778 | _GLIBCXX_SIMD_INTRINSIC _R |
779 | __to_simd_tuple_sized( |
780 | const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) |
781 | { |
782 | static_assert(_Np * simd_size_v<_Tp, _A0> >= _NOut); |
783 | return __to_simd_tuple_impl<_Tp, _A0, _NOut>( |
784 | make_index_sequence<_R::_S_tuple_size>(), __args); |
785 | } |
786 | |
787 | // __optimize_simd_tuple {{{1 |
788 | template <typename _Tp> |
789 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp> |
790 | __optimize_simd_tuple(const _SimdTuple<_Tp>) |
791 | { return {}; } |
792 | |
793 | template <typename _Tp, typename _Ap> |
794 | _GLIBCXX_SIMD_INTRINSIC constexpr const _SimdTuple<_Tp, _Ap>& |
795 | __optimize_simd_tuple(const _SimdTuple<_Tp, _Ap>& __x) |
796 | { return __x; } |
797 | |
798 | template <typename _Tp, typename _A0, typename _A1, typename... _Abis, |
799 | typename _R = __fixed_size_storage_t< |
800 | _Tp, _SimdTuple<_Tp, _A0, _A1, _Abis...>::_S_size()>> |
801 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
802 | __optimize_simd_tuple(const _SimdTuple<_Tp, _A0, _A1, _Abis...>& __x) |
803 | { |
804 | using _Tup = _SimdTuple<_Tp, _A0, _A1, _Abis...>; |
805 | if constexpr (is_same_v<_R, _Tup>) |
806 | return __x; |
807 | else if constexpr (is_same_v<typename _R::_FirstType, |
808 | typename _Tup::_FirstType>) |
809 | return {__x.first, __optimize_simd_tuple(__x.second)}; |
810 | else if constexpr (__is_scalar_abi<_A0>() |
811 | || _A0::template _S_is_partial<_Tp>) |
812 | return {__generate_from_n_evaluations<_R::_S_first_size, |
813 | typename _R::_FirstType>( |
814 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }), |
815 | __optimize_simd_tuple( |
816 | __simd_tuple_pop_front<_R::_S_first_size>(__x))}; |
817 | else if constexpr (is_same_v<_A0, _A1> |
818 | && _R::_S_first_size == simd_size_v<_Tp, _A0> + simd_size_v<_Tp, _A1>) |
819 | return {__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), |
820 | __optimize_simd_tuple(__x.second.second)}; |
821 | else if constexpr (sizeof...(_Abis) >= 2 |
822 | && _R::_S_first_size == (4 * simd_size_v<_Tp, _A0>) |
823 | && simd_size_v<_Tp, _A0> == __simd_tuple_element_t< |
824 | (sizeof...(_Abis) >= 2 ? 3 : 0), _Tup>::size()) |
825 | return { |
826 | __concat(__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), |
827 | __concat(__x.template _M_at<2>(), __x.template _M_at<3>())), |
828 | __optimize_simd_tuple(__x.second.second.second.second)}; |
829 | else |
830 | { |
831 | static_assert(sizeof(_R) == sizeof(__x)); |
832 | _R __r; |
833 | __builtin_memcpy(__r._M_as_charptr(), __x._M_as_charptr(), |
834 | sizeof(_Tp) * _R::_S_size()); |
835 | return __r; |
836 | } |
837 | } |
838 | |
839 | // __for_each(const _SimdTuple &, Fun) {{{1 |
840 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> |
841 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
842 | __for_each(const _SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) |
843 | { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } |
844 | |
845 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, |
846 | typename... _As, typename _Fp> |
847 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
848 | __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) |
849 | { |
850 | __fun(__make_meta<_Offset>(__t), __t.first); |
851 | __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, |
852 | static_cast<_Fp&&>(__fun)); |
853 | } |
854 | |
855 | // __for_each(_SimdTuple &, Fun) {{{1 |
856 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> |
857 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
858 | __for_each(_SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) |
859 | { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } |
860 | |
861 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, |
862 | typename... _As, typename _Fp> |
863 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
864 | __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) |
865 | { |
866 | __fun(__make_meta<_Offset>(__t), __t.first); |
867 | __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, |
868 | static_cast<_Fp&&>(__fun)); |
869 | } |
870 | |
871 | // __for_each(_SimdTuple &, const _SimdTuple &, Fun) {{{1 |
872 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> |
873 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
874 | __for_each(_SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) |
875 | { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } |
876 | |
877 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, |
878 | typename... _As, typename _Fp> |
879 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
880 | __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __a, |
881 | const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) |
882 | { |
883 | __fun(__make_meta<_Offset>(__a), __a.first, __b.first); |
884 | __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, |
885 | static_cast<_Fp&&>(__fun)); |
886 | } |
887 | |
888 | // __for_each(const _SimdTuple &, const _SimdTuple &, Fun) {{{1 |
889 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> |
890 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
891 | __for_each(const _SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) |
892 | { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } |
893 | |
894 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, |
895 | typename... _As, typename _Fp> |
896 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
897 | __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __a, |
898 | const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) |
899 | { |
900 | __fun(__make_meta<_Offset>(__a), __a.first, __b.first); |
901 | __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, |
902 | static_cast<_Fp&&>(__fun)); |
903 | } |
904 | |
905 | // }}}1 |
906 | // __extract_part(_SimdTuple) {{{ |
907 | template <int _Index, int _Total, int _Combine, typename _Tp, typename _A0, typename... _As> |
908 | _GLIBCXX_SIMD_INTRINSIC constexpr auto // __vector_type_t or _SimdTuple |
909 | (const _SimdTuple<_Tp, _A0, _As...>& __x) |
910 | { |
911 | // worst cases: |
912 | // (a) 4, 4, 4 => 3, 3, 3, 3 (_Total = 4) |
913 | // (b) 2, 2, 2 => 3, 3 (_Total = 2) |
914 | // (c) 4, 2 => 2, 2, 2 (_Total = 3) |
915 | using _Tuple = _SimdTuple<_Tp, _A0, _As...>; |
916 | static_assert(_Index + _Combine <= _Total && _Index >= 0 && _Total >= 1); |
917 | constexpr size_t _Np = _Tuple::_S_size(); |
918 | static_assert(_Np >= _Total && _Np % _Total == 0); |
919 | constexpr size_t __values_per_part = _Np / _Total; |
920 | [[maybe_unused]] constexpr size_t __values_to_skip |
921 | = _Index * __values_per_part; |
922 | constexpr size_t __return_size = __values_per_part * _Combine; |
923 | using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>; |
924 | |
925 | // handle (optimize) the simple cases |
926 | if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size) |
927 | return __x.first._M_data; |
928 | else if constexpr (_Index == 0 && _Total == _Combine) |
929 | return __x; |
930 | else if constexpr (_Index == 0 && _Tuple::_S_first_size >= __return_size) |
931 | return __intrin_bitcast<__vector_type_t<_Tp, __return_size>>( |
932 | __as_vector(__x.first)); |
933 | |
934 | // recurse to skip unused data members at the beginning of _SimdTuple |
935 | else if constexpr (__values_to_skip >= _Tuple::_S_first_size) |
936 | { // recurse |
937 | if constexpr (_Tuple::_S_first_size % __values_per_part == 0) |
938 | { |
939 | constexpr int __parts_in_first |
940 | = _Tuple::_S_first_size / __values_per_part; |
941 | return __extract_part<_Index - __parts_in_first, |
942 | _Total - __parts_in_first, _Combine>( |
943 | __x.second); |
944 | } |
945 | else |
946 | return __extract_part<__values_to_skip - _Tuple::_S_first_size, |
947 | _Np - _Tuple::_S_first_size, __return_size>( |
948 | __x.second); |
949 | } |
950 | |
951 | // extract from multiple _SimdTuple data members |
952 | else if constexpr (__return_size > _Tuple::_S_first_size - __values_to_skip) |
953 | { |
954 | #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS |
955 | const __may_alias<_Tp>* const element_ptr |
956 | = reinterpret_cast<const __may_alias<_Tp>*>(&__x) + __values_to_skip; |
957 | return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned)); |
958 | #else |
959 | [[maybe_unused]] constexpr size_t __offset = __values_to_skip; |
960 | return __as_vector(simd<_Tp, _RetAbi>( |
961 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
962 | constexpr _SizeConstant<__i + __offset> __k; |
963 | return __x[__k]; |
964 | })); |
965 | #endif |
966 | } |
967 | |
968 | // all of the return values are in __x.first |
969 | else if constexpr (_Tuple::_S_first_size % __values_per_part == 0) |
970 | return __extract_part<_Index, _Tuple::_S_first_size / __values_per_part, |
971 | _Combine>(__x.first); |
972 | else |
973 | return __extract_part<__values_to_skip, _Tuple::_S_first_size, |
974 | _Combine * __values_per_part>(__x.first); |
975 | } |
976 | |
977 | // }}} |
978 | // __fixed_size_storage_t<_Tp, _Np>{{{ |
979 | template <typename _Tp, int _Np, typename _Tuple, |
980 | typename _Next = simd<_Tp, _AllNativeAbis::_BestAbi<_Tp, _Np>>, |
981 | int _Remain = _Np - int(_Next::size())> |
982 | struct __fixed_size_storage_builder; |
983 | |
984 | template <typename _Tp, int _Np> |
985 | struct __fixed_size_storage |
986 | : public __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp>> {}; |
987 | |
988 | template <typename _Tp, int _Np, typename... _As, typename _Next> |
989 | struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, |
990 | 0> |
991 | { using type = _SimdTuple<_Tp, _As..., typename _Next::abi_type>; }; |
992 | |
993 | template <typename _Tp, int _Np, typename... _As, typename _Next, int _Remain> |
994 | struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, |
995 | _Remain> |
996 | { |
997 | using type = typename __fixed_size_storage_builder< |
998 | _Tp, _Remain, _SimdTuple<_Tp, _As..., typename _Next::abi_type>>::type; |
999 | }; |
1000 | |
1001 | // }}} |
1002 | // _AbisInSimdTuple {{{ |
1003 | template <typename _Tp> |
1004 | struct _SeqOp; |
1005 | |
1006 | template <size_t _I0, size_t... _Is> |
1007 | struct _SeqOp<index_sequence<_I0, _Is...>> |
1008 | { |
1009 | using _FirstPlusOne = index_sequence<_I0 + 1, _Is...>; |
1010 | using _NotFirstPlusOne = index_sequence<_I0, (_Is + 1)...>; |
1011 | template <size_t _First, size_t _Add> |
1012 | using _Prepend = index_sequence<_First, _I0 + _Add, (_Is + _Add)...>; |
1013 | }; |
1014 | |
1015 | template <typename _Tp> |
1016 | struct _AbisInSimdTuple; |
1017 | |
1018 | template <typename _Tp> |
1019 | struct _AbisInSimdTuple<_SimdTuple<_Tp>> |
1020 | { |
1021 | using _Counts = index_sequence<0>; |
1022 | using _Begins = index_sequence<0>; |
1023 | }; |
1024 | |
1025 | template <typename _Tp, typename _Ap> |
1026 | struct _AbisInSimdTuple<_SimdTuple<_Tp, _Ap>> |
1027 | { |
1028 | using _Counts = index_sequence<1>; |
1029 | using _Begins = index_sequence<0>; |
1030 | }; |
1031 | |
1032 | template <typename _Tp, typename _A0, typename... _As> |
1033 | struct _AbisInSimdTuple<_SimdTuple<_Tp, _A0, _A0, _As...>> |
1034 | { |
1035 | using _Counts = typename _SeqOp<typename _AbisInSimdTuple< |
1036 | _SimdTuple<_Tp, _A0, _As...>>::_Counts>::_FirstPlusOne; |
1037 | using _Begins = typename _SeqOp<typename _AbisInSimdTuple< |
1038 | _SimdTuple<_Tp, _A0, _As...>>::_Begins>::_NotFirstPlusOne; |
1039 | }; |
1040 | |
1041 | template <typename _Tp, typename _A0, typename _A1, typename... _As> |
1042 | struct _AbisInSimdTuple<_SimdTuple<_Tp, _A0, _A1, _As...>> |
1043 | { |
1044 | using _Counts = typename _SeqOp<typename _AbisInSimdTuple< |
1045 | _SimdTuple<_Tp, _A1, _As...>>::_Counts>::template _Prepend<1, 0>; |
1046 | using _Begins = typename _SeqOp<typename _AbisInSimdTuple< |
1047 | _SimdTuple<_Tp, _A1, _As...>>::_Begins>::template _Prepend<0, 1>; |
1048 | }; |
1049 | |
1050 | // }}} |
1051 | // __autocvt_to_simd {{{ |
1052 | template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>> |
1053 | struct __autocvt_to_simd |
1054 | { |
1055 | _Tp _M_data; |
1056 | using _TT = __remove_cvref_t<_Tp>; |
1057 | |
1058 | constexpr |
1059 | operator _TT() |
1060 | { return _M_data; } |
1061 | |
1062 | constexpr |
1063 | operator _TT&() |
1064 | { |
1065 | static_assert(is_lvalue_reference<_Tp>::value, "" ); |
1066 | static_assert(!is_const<_Tp>::value, "" ); |
1067 | return _M_data; |
1068 | } |
1069 | |
1070 | constexpr |
1071 | operator _TT*() |
1072 | { |
1073 | static_assert(is_lvalue_reference<_Tp>::value, "" ); |
1074 | static_assert(!is_const<_Tp>::value, "" ); |
1075 | return &_M_data; |
1076 | } |
1077 | |
1078 | constexpr inline |
1079 | __autocvt_to_simd(_Tp dd) : _M_data(dd) {} |
1080 | |
1081 | template <typename _Abi> |
1082 | constexpr |
1083 | operator simd<typename _TT::value_type, _Abi>() |
1084 | { return {__private_init, _M_data}; } |
1085 | |
1086 | template <typename _Abi> |
1087 | constexpr |
1088 | operator simd<typename _TT::value_type, _Abi>&() |
1089 | { return *reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); } |
1090 | |
1091 | template <typename _Abi> |
1092 | constexpr |
1093 | operator simd<typename _TT::value_type, _Abi>*() |
1094 | { return reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); } |
1095 | }; |
1096 | |
1097 | template <typename _Tp> |
1098 | __autocvt_to_simd(_Tp &&) -> __autocvt_to_simd<_Tp>; |
1099 | |
1100 | template <typename _Tp> |
1101 | struct __autocvt_to_simd<_Tp, true> |
1102 | { |
1103 | using _TT = __remove_cvref_t<_Tp>; |
1104 | _Tp _M_data; |
1105 | fixed_size_simd<_TT, 1> _M_fd; |
1106 | |
1107 | constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} |
1108 | |
1109 | ~__autocvt_to_simd() |
1110 | { _M_data = __data(_M_fd).first; } |
1111 | |
1112 | constexpr |
1113 | operator fixed_size_simd<_TT, 1>() |
1114 | { return _M_fd; } |
1115 | |
1116 | constexpr |
1117 | operator fixed_size_simd<_TT, 1> &() |
1118 | { |
1119 | static_assert(is_lvalue_reference<_Tp>::value, "" ); |
1120 | static_assert(!is_const<_Tp>::value, "" ); |
1121 | return _M_fd; |
1122 | } |
1123 | |
1124 | constexpr |
1125 | operator fixed_size_simd<_TT, 1> *() |
1126 | { |
1127 | static_assert(is_lvalue_reference<_Tp>::value, "" ); |
1128 | static_assert(!is_const<_Tp>::value, "" ); |
1129 | return &_M_fd; |
1130 | } |
1131 | }; |
1132 | |
1133 | // }}} |
1134 | |
1135 | struct _CommonImplFixedSize; |
1136 | template <int _Np> struct _SimdImplFixedSize; |
1137 | template <int _Np> struct _MaskImplFixedSize; |
1138 | // simd_abi::_Fixed {{{ |
1139 | template <int _Np> |
1140 | struct simd_abi::_Fixed |
1141 | { |
1142 | template <typename _Tp> static constexpr size_t _S_size = _Np; |
1143 | template <typename _Tp> static constexpr size_t _S_full_size = _Np; |
1144 | // validity traits {{{ |
1145 | struct _IsValidAbiTag : public __bool_constant<(_Np > 0)> {}; |
1146 | |
1147 | template <typename _Tp> |
1148 | struct _IsValidSizeFor |
1149 | : __bool_constant<(_Np <= simd_abi::max_fixed_size<_Tp>)> {}; |
1150 | |
1151 | template <typename _Tp> |
1152 | struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>, |
1153 | _IsValidSizeFor<_Tp>> {}; |
1154 | |
1155 | template <typename _Tp> |
1156 | static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; |
1157 | |
1158 | // }}} |
1159 | // _S_masked {{{ |
1160 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> |
1161 | _S_masked(_BitMask<_Np> __x) |
1162 | { return __x._M_sanitized(); } |
1163 | |
1164 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> |
1165 | _S_masked(_SanitizedBitMask<_Np> __x) |
1166 | { return __x; } |
1167 | |
1168 | // }}} |
1169 | // _*Impl {{{ |
1170 | using _CommonImpl = _CommonImplFixedSize; |
1171 | using _SimdImpl = _SimdImplFixedSize<_Np>; |
1172 | using _MaskImpl = _MaskImplFixedSize<_Np>; |
1173 | |
1174 | // }}} |
1175 | // __traits {{{ |
1176 | template <typename _Tp, bool = _S_is_valid_v<_Tp>> |
1177 | struct __traits : _InvalidTraits {}; |
1178 | |
1179 | template <typename _Tp> |
1180 | struct __traits<_Tp, true> |
1181 | { |
1182 | using _IsValid = true_type; |
1183 | using _SimdImpl = _SimdImplFixedSize<_Np>; |
1184 | using _MaskImpl = _MaskImplFixedSize<_Np>; |
1185 | |
1186 | // simd and simd_mask member types {{{ |
1187 | using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; |
1188 | using _MaskMember = _SanitizedBitMask<_Np>; |
1189 | |
1190 | static constexpr size_t _S_simd_align |
1191 | = std::__bit_ceil(x: _Np * sizeof(_Tp)); |
1192 | |
1193 | static constexpr size_t _S_mask_align = alignof(_MaskMember); |
1194 | |
1195 | // }}} |
1196 | // _SimdBase / base class for simd, providing extra conversions {{{ |
1197 | struct _SimdBase |
1198 | { |
1199 | // The following ensures, function arguments are passed via the stack. |
1200 | // This is important for ABI compatibility across TU boundaries |
1201 | constexpr |
1202 | _SimdBase(const _SimdBase&) {} |
1203 | |
1204 | _SimdBase() = default; |
1205 | |
1206 | constexpr explicit |
1207 | operator const _SimdMember &() const |
1208 | { return static_cast<const simd<_Tp, _Fixed>*>(this)->_M_data; } |
1209 | |
1210 | constexpr explicit |
1211 | operator array<_Tp, _Np>() const |
1212 | { |
1213 | array<_Tp, _Np> __r; |
1214 | // _SimdMember can be larger because of higher alignment |
1215 | static_assert(sizeof(__r) <= sizeof(_SimdMember), "" ); |
1216 | __builtin_memcpy(__r.data(), &static_cast<const _SimdMember&>(*this), |
1217 | sizeof(__r)); |
1218 | return __r; |
1219 | } |
1220 | }; |
1221 | |
1222 | // }}} |
1223 | // _MaskBase {{{ |
1224 | // empty. The bitset interface suffices |
1225 | struct _MaskBase {}; |
1226 | |
1227 | // }}} |
1228 | // _SimdCastType {{{ |
1229 | struct _SimdCastType |
1230 | { |
1231 | constexpr |
1232 | _SimdCastType(const array<_Tp, _Np>&); |
1233 | |
1234 | constexpr |
1235 | _SimdCastType(const _SimdMember& dd) : _M_data(dd) {} |
1236 | |
1237 | constexpr explicit |
1238 | operator const _SimdMember &() const { return _M_data; } |
1239 | |
1240 | private: |
1241 | const _SimdMember& _M_data; |
1242 | }; |
1243 | |
1244 | // }}} |
1245 | // _MaskCastType {{{ |
1246 | class _MaskCastType |
1247 | { |
1248 | _MaskCastType() = delete; |
1249 | }; |
1250 | // }}} |
1251 | }; |
1252 | // }}} |
1253 | }; |
1254 | |
1255 | // }}} |
1256 | // _CommonImplFixedSize {{{ |
1257 | struct _CommonImplFixedSize |
1258 | { |
1259 | // _S_store {{{ |
1260 | template <typename _Tp, typename... _As> |
1261 | _GLIBCXX_SIMD_INTRINSIC static void |
1262 | _S_store(const _SimdTuple<_Tp, _As...>& __x, void* __addr) |
1263 | { |
1264 | constexpr size_t _Np = _SimdTuple<_Tp, _As...>::_S_size(); |
1265 | __builtin_memcpy(__addr, &__x, _Np * sizeof(_Tp)); |
1266 | } |
1267 | |
1268 | // }}} |
1269 | }; |
1270 | |
1271 | // }}} |
1272 | // _SimdImplFixedSize {{{1 |
1273 | // fixed_size should not inherit from _SimdMathFallback in order for |
1274 | // specializations in the used _SimdTuple Abis to get used |
1275 | template <int _Np> |
1276 | struct _SimdImplFixedSize |
1277 | { |
1278 | // member types {{{2 |
1279 | using _MaskMember = _SanitizedBitMask<_Np>; |
1280 | |
1281 | template <typename _Tp> |
1282 | using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; |
1283 | |
1284 | template <typename _Tp> |
1285 | static constexpr size_t _S_tuple_size = _SimdMember<_Tp>::_S_tuple_size; |
1286 | |
1287 | template <typename _Tp> |
1288 | using _Simd = simd<_Tp, simd_abi::fixed_size<_Np>>; |
1289 | |
1290 | template <typename _Tp> |
1291 | using _TypeTag = _Tp*; |
1292 | |
1293 | // broadcast {{{2 |
1294 | template <typename _Tp> |
1295 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> |
1296 | _S_broadcast(_Tp __x) noexcept |
1297 | { |
1298 | return _SimdMember<_Tp>::_S_generate( |
1299 | [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1300 | return __meta._S_broadcast(__x); |
1301 | }); |
1302 | } |
1303 | |
1304 | // _S_generator {{{2 |
1305 | template <typename _Fp, typename _Tp> |
1306 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> |
1307 | _S_generator(_Fp&& __gen, _TypeTag<_Tp>) |
1308 | { |
1309 | return _SimdMember<_Tp>::_S_generate( |
1310 | [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1311 | return __meta._S_generator( |
1312 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1313 | return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>()) |
1314 | : 0; |
1315 | }, |
1316 | _TypeTag<_Tp>()); |
1317 | }); |
1318 | } |
1319 | |
1320 | // _S_load {{{2 |
1321 | template <typename _Tp, typename _Up> |
1322 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> |
1323 | _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept |
1324 | { |
1325 | return _SimdMember<_Tp>::_S_generate( |
1326 | [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1327 | return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>()); |
1328 | }); |
1329 | } |
1330 | |
1331 | // _S_masked_load {{{2 |
1332 | template <typename _Tp, typename... _As, typename _Up> |
1333 | _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...> |
1334 | _S_masked_load(const _SimdTuple<_Tp, _As...>& __old, |
1335 | const _MaskMember __bits, const _Up* __mem) noexcept |
1336 | { |
1337 | auto __merge = __old; |
1338 | __for_each(__merge, [&](auto __meta, auto& __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1339 | if (__meta._S_submask(__bits).any()) |
1340 | #pragma GCC diagnostic push |
1341 | // Dereferencing __mem + __meta._S_offset could be UB ([expr.add]/4.3). |
1342 | // It is the responsibility of the caller of the masked load (via the mask's value) to |
1343 | // avoid UB. Consequently, the compiler may assume this branch is unreachable, if the |
1344 | // pointer arithmetic is UB. |
1345 | #pragma GCC diagnostic ignored "-Warray-bounds" |
1346 | __native |
1347 | = __meta._S_masked_load(__native, __meta._S_make_mask(__bits), |
1348 | __mem + __meta._S_offset); |
1349 | #pragma GCC diagnostic pop |
1350 | }); |
1351 | return __merge; |
1352 | } |
1353 | |
1354 | // _S_store {{{2 |
1355 | template <typename _Tp, typename _Up> |
1356 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
1357 | _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept |
1358 | { |
1359 | __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1360 | __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>()); |
1361 | }); |
1362 | } |
1363 | |
1364 | // _S_masked_store {{{2 |
1365 | template <typename _Tp, typename... _As, typename _Up> |
1366 | _GLIBCXX_SIMD_INTRINSIC static void |
1367 | _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem, |
1368 | const _MaskMember __bits) noexcept |
1369 | { |
1370 | __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1371 | if (__meta._S_submask(__bits).any()) |
1372 | #pragma GCC diagnostic push |
1373 | // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts |
1374 | // the responsibility for avoiding UB to the caller of the masked |
1375 | // store via the mask. Consequently, the compiler may assume this |
1376 | // branch is unreachable, if the pointer arithmetic is UB. |
1377 | #pragma GCC diagnostic ignored "-Warray-bounds" |
1378 | __meta._S_masked_store(__native, __mem + __meta._S_offset, |
1379 | __meta._S_make_mask(__bits)); |
1380 | #pragma GCC diagnostic pop |
1381 | }); |
1382 | } |
1383 | |
1384 | // negation {{{2 |
1385 | template <typename _Tp, typename... _As> |
1386 | static constexpr inline _MaskMember |
1387 | _S_negate(const _SimdTuple<_Tp, _As...>& __x) noexcept |
1388 | { |
1389 | _MaskMember __bits = 0; |
1390 | __for_each( |
1391 | __x, [&__bits](auto __meta, auto __native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1392 | __bits |
1393 | |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native)); |
1394 | }); |
1395 | return __bits; |
1396 | } |
1397 | |
1398 | // reductions {{{2 |
1399 | template <typename _Tp, typename _BinaryOperation> |
1400 | static constexpr inline _Tp _S_reduce(const _Simd<_Tp>& __x, |
1401 | const _BinaryOperation& __binary_op) |
1402 | { |
1403 | using _Tup = _SimdMember<_Tp>; |
1404 | const _Tup& __tup = __data(__x); |
1405 | if constexpr (_Tup::_S_tuple_size == 1) |
1406 | return _Tup::_FirstAbi::_SimdImpl::_S_reduce( |
1407 | __tup.template _M_simd_at<0>(), __binary_op); |
1408 | else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 2 |
1409 | && _Tup::_SecondType::_S_size() == 1) |
1410 | { |
1411 | return __binary_op(simd<_Tp, simd_abi::scalar>( |
1412 | reduce(__tup.template _M_simd_at<0>(), |
1413 | __binary_op)), |
1414 | __tup.template _M_simd_at<1>())[0]; |
1415 | } |
1416 | else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 4 |
1417 | && _Tup::_SecondType::_S_size() == 2) |
1418 | { |
1419 | return __binary_op( |
1420 | simd<_Tp, simd_abi::scalar>( |
1421 | reduce(__tup.template _M_simd_at<0>(), __binary_op)), |
1422 | simd<_Tp, simd_abi::scalar>( |
1423 | reduce(__tup.template _M_simd_at<1>(), __binary_op)))[0]; |
1424 | } |
1425 | else |
1426 | { |
1427 | const auto& __x2 = __call_with_n_evaluations< |
1428 | __div_roundup(_Tup::_S_tuple_size, 2)>( |
1429 | [](auto __first_simd, auto... __remaining) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1430 | if constexpr (sizeof...(__remaining) == 0) |
1431 | return __first_simd; |
1432 | else |
1433 | { |
1434 | using _Tup2 |
1435 | = _SimdTuple<_Tp, |
1436 | typename decltype(__first_simd)::abi_type, |
1437 | typename decltype(__remaining)::abi_type...>; |
1438 | return fixed_size_simd<_Tp, _Tup2::_S_size()>( |
1439 | __private_init, |
1440 | __make_simd_tuple(__first_simd, __remaining...)); |
1441 | } |
1442 | }, |
1443 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1444 | auto __left = __tup.template _M_simd_at<2 * __i>(); |
1445 | if constexpr (2 * __i + 1 == _Tup::_S_tuple_size) |
1446 | return __left; |
1447 | else |
1448 | { |
1449 | auto __right = __tup.template _M_simd_at<2 * __i + 1>(); |
1450 | using _LT = decltype(__left); |
1451 | using _RT = decltype(__right); |
1452 | if constexpr (_LT::size() == _RT::size()) |
1453 | return __binary_op(__left, __right); |
1454 | else |
1455 | { |
1456 | _GLIBCXX_SIMD_USE_CONSTEXPR_API |
1457 | typename _LT::mask_type __k( |
1458 | __private_init, |
1459 | [](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1460 | return __j < _RT::size(); |
1461 | }); |
1462 | _LT __ext_right = __left; |
1463 | where(__k, __ext_right) |
1464 | = __proposed::resizing_simd_cast<_LT>(__right); |
1465 | where(__k, __left) = __binary_op(__left, __ext_right); |
1466 | return __left; |
1467 | } |
1468 | } |
1469 | }); |
1470 | return reduce(__x2, __binary_op); |
1471 | } |
1472 | } |
1473 | |
1474 | // _S_min, _S_max {{{2 |
1475 | template <typename _Tp, typename... _As> |
1476 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
1477 | _S_min(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) |
1478 | { |
1479 | return __a._M_apply_per_chunk( |
1480 | [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1481 | return __impl._S_min(__aa, __bb); |
1482 | }, |
1483 | __b); |
1484 | } |
1485 | |
1486 | template <typename _Tp, typename... _As> |
1487 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
1488 | _S_max(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) |
1489 | { |
1490 | return __a._M_apply_per_chunk( |
1491 | [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1492 | return __impl._S_max(__aa, __bb); |
1493 | }, |
1494 | __b); |
1495 | } |
1496 | |
1497 | // _S_complement {{{2 |
1498 | template <typename _Tp, typename... _As> |
1499 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
1500 | _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept |
1501 | { |
1502 | return __x._M_apply_per_chunk( |
1503 | [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1504 | return __impl._S_complement(__xx); |
1505 | }); |
1506 | } |
1507 | |
1508 | // _S_unary_minus {{{2 |
1509 | template <typename _Tp, typename... _As> |
1510 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
1511 | _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept |
1512 | { |
1513 | return __x._M_apply_per_chunk( |
1514 | [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1515 | return __impl._S_unary_minus(__xx); |
1516 | }); |
1517 | } |
1518 | |
1519 | // arithmetic operators {{{2 |
1520 | |
1521 | #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \ |
1522 | template <typename _Tp, typename... _As> \ |
1523 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> name_( \ |
1524 | const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y) \ |
1525 | { \ |
1526 | return __x._M_apply_per_chunk( \ |
1527 | [](auto __impl, auto __xx, auto __yy) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ |
1528 | return __impl.name_(__xx, __yy); \ |
1529 | }, \ |
1530 | __y); \ |
1531 | } |
1532 | |
1533 | _GLIBCXX_SIMD_FIXED_OP(_S_plus, +) |
1534 | _GLIBCXX_SIMD_FIXED_OP(_S_minus, -) |
1535 | _GLIBCXX_SIMD_FIXED_OP(_S_multiplies, *) |
1536 | _GLIBCXX_SIMD_FIXED_OP(_S_divides, /) |
1537 | _GLIBCXX_SIMD_FIXED_OP(_S_modulus, %) |
1538 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_and, &) |
1539 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_or, |) |
1540 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_xor, ^) |
1541 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_left, <<) |
1542 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_right, >>) |
1543 | #undef _GLIBCXX_SIMD_FIXED_OP |
1544 | |
1545 | template <typename _Tp, typename... _As> |
1546 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
1547 | _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y) |
1548 | { |
1549 | return __x._M_apply_per_chunk( |
1550 | [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1551 | return __impl._S_bit_shift_left(__xx, __y); |
1552 | }); |
1553 | } |
1554 | |
1555 | template <typename _Tp, typename... _As> |
1556 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
1557 | _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y) |
1558 | { |
1559 | return __x._M_apply_per_chunk( |
1560 | [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1561 | return __impl._S_bit_shift_right(__xx, __y); |
1562 | }); |
1563 | } |
1564 | |
1565 | // math {{{2 |
1566 | #define _GLIBCXX_SIMD_APPLY_ON_TUPLE(_RetTp, __name) \ |
1567 | template <typename _Tp, typename... _As, typename... _More> \ |
1568 | static inline __fixed_size_storage_t<_RetTp, _Np> \ |
1569 | _S_##__name(const _SimdTuple<_Tp, _As...>& __x, \ |
1570 | const _More&... __more) \ |
1571 | { \ |
1572 | if constexpr (sizeof...(_More) == 0) \ |
1573 | { \ |
1574 | if constexpr (is_same_v<_Tp, _RetTp>) \ |
1575 | return __x._M_apply_per_chunk( \ |
1576 | [](auto __impl, auto __xx) \ |
1577 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ |
1578 | { \ |
1579 | using _V = typename decltype(__impl)::simd_type; \ |
1580 | return __data(__name(_V(__private_init, __xx))); \ |
1581 | }); \ |
1582 | else \ |
1583 | return __optimize_simd_tuple( \ |
1584 | __x.template _M_apply_r<_RetTp>( \ |
1585 | [](auto __impl, auto __xx) \ |
1586 | _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ |
1587 | { return __impl._S_##__name(__xx); })); \ |
1588 | } \ |
1589 | else if constexpr ( \ |
1590 | is_same_v< \ |
1591 | _Tp, \ |
1592 | _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) ) \ |
1593 | return __x._M_apply_per_chunk( \ |
1594 | [](auto __impl, auto __xx, auto... __pack) \ |
1595 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ |
1596 | { \ |
1597 | using _V = typename decltype(__impl)::simd_type; \ |
1598 | return __data(__name(_V(__private_init, __xx), \ |
1599 | _V(__private_init, __pack)...)); \ |
1600 | }, __more...); \ |
1601 | else if constexpr (is_same_v<_Tp, _RetTp>) \ |
1602 | return __x._M_apply_per_chunk( \ |
1603 | [](auto __impl, auto __xx, auto... __pack) \ |
1604 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ |
1605 | { \ |
1606 | using _V = typename decltype(__impl)::simd_type; \ |
1607 | return __data(__name(_V(__private_init, __xx), \ |
1608 | __autocvt_to_simd(__pack)...)); \ |
1609 | }, __more...); \ |
1610 | else \ |
1611 | __assert_unreachable<_Tp>(); \ |
1612 | } |
1613 | |
1614 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acos) |
1615 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asin) |
1616 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan) |
1617 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan2) |
1618 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cos) |
1619 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sin) |
1620 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tan) |
1621 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acosh) |
1622 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asinh) |
1623 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atanh) |
1624 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cosh) |
1625 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sinh) |
1626 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tanh) |
1627 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp) |
1628 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp2) |
1629 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, expm1) |
1630 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, ilogb) |
1631 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log) |
1632 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log10) |
1633 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log1p) |
1634 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log2) |
1635 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, logb) |
1636 | // modf implemented in simd_math.h |
1637 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, |
1638 | scalbn) // double scalbn(double x, int exp); |
1639 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbln) |
1640 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cbrt) |
1641 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, abs) |
1642 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fabs) |
1643 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, pow) |
1644 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sqrt) |
1645 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erf) |
1646 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erfc) |
1647 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, lgamma) |
1648 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tgamma) |
1649 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, trunc) |
1650 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ceil) |
1651 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, floor) |
1652 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nearbyint) |
1653 | |
1654 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, rint) |
1655 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lrint) |
1656 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llrint) |
1657 | |
1658 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, round) |
1659 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lround) |
1660 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llround) |
1661 | |
1662 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp) |
1663 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod) |
1664 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder) |
1665 | // copysign in simd_math.h |
1666 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter) |
1667 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim) |
1668 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax) |
1669 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmin) |
1670 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fma) |
1671 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, fpclassify) |
1672 | #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE |
1673 | |
1674 | template <typename _Tp, typename... _Abis> |
1675 | static inline _SimdTuple<_Tp, _Abis...> |
1676 | _S_remquo(const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y, |
1677 | __fixed_size_storage_t<int, _SimdTuple<_Tp, _Abis...>::_S_size()>* __z) |
1678 | { |
1679 | return __x._M_apply_per_chunk( |
1680 | [](auto __impl, const auto __xx, const auto __yy, auto& __zz) |
1681 | _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
1682 | { return __impl._S_remquo(__xx, __yy, &__zz); }, |
1683 | __y, *__z); |
1684 | } |
1685 | |
1686 | template <typename _Tp, typename... _As> |
1687 | static inline _SimdTuple<_Tp, _As...> |
1688 | _S_frexp(const _SimdTuple<_Tp, _As...>& __x, |
1689 | __fixed_size_storage_t<int, _Np>& __exp) noexcept |
1690 | { |
1691 | return __x._M_apply_per_chunk( |
1692 | [](auto __impl, const auto& __a, auto& __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1693 | return __data(frexp(typename decltype(__impl)::simd_type(__private_init, __a), |
1694 | __autocvt_to_simd(__b))); |
1695 | }, __exp); |
1696 | } |
1697 | |
1698 | #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_) \ |
1699 | template <typename _Tp, typename... _As> \ |
1700 | static inline _MaskMember \ |
1701 | _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept \ |
1702 | { \ |
1703 | return _M_test([] (auto __impl, auto __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ |
1704 | return __impl._S_##name_(__xx); \ |
1705 | }, __x); \ |
1706 | } |
1707 | |
1708 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf) |
1709 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(isfinite) |
1710 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnan) |
1711 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnormal) |
1712 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(signbit) |
1713 | #undef _GLIBCXX_SIMD_TEST_ON_TUPLE_ |
1714 | |
1715 | // _S_increment & _S_decrement{{{2 |
1716 | template <typename... _Ts> |
1717 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
1718 | _S_increment(_SimdTuple<_Ts...>& __x) |
1719 | { |
1720 | __for_each( |
1721 | __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1722 | __meta._S_increment(native); |
1723 | }); |
1724 | } |
1725 | |
1726 | template <typename... _Ts> |
1727 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
1728 | _S_decrement(_SimdTuple<_Ts...>& __x) |
1729 | { |
1730 | __for_each( |
1731 | __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1732 | __meta._S_decrement(native); |
1733 | }); |
1734 | } |
1735 | |
1736 | // compares {{{2 |
1737 | #define _GLIBCXX_SIMD_CMP_OPERATIONS(__cmp) \ |
1738 | template <typename _Tp, typename... _As> \ |
1739 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember \ |
1740 | __cmp(const _SimdTuple<_Tp, _As...>& __x, \ |
1741 | const _SimdTuple<_Tp, _As...>& __y) \ |
1742 | { \ |
1743 | return _M_test([](auto __impl, auto __xx, auto __yy) \ |
1744 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ |
1745 | { return __impl.__cmp(__xx, __yy); }, \ |
1746 | __x, __y); \ |
1747 | } |
1748 | |
1749 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to) |
1750 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_not_equal_to) |
1751 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less) |
1752 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less_equal) |
1753 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isless) |
1754 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessequal) |
1755 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreater) |
1756 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreaterequal) |
1757 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessgreater) |
1758 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isunordered) |
1759 | #undef _GLIBCXX_SIMD_CMP_OPERATIONS |
1760 | |
1761 | // smart_reference access {{{2 |
1762 | template <typename _Tp, typename... _As, typename _Up> |
1763 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
1764 | _S_set(_SimdTuple<_Tp, _As...>& __v, int __i, _Up&& __x) noexcept |
1765 | { __v._M_set(__i, static_cast<_Up&&>(__x)); } |
1766 | |
1767 | // _S_masked_assign {{{2 |
1768 | template <typename _Tp, typename... _As> |
1769 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
1770 | _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, |
1771 | const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs) |
1772 | { |
1773 | __for_each(__lhs, __rhs, |
1774 | [&](auto __meta, auto& __native_lhs, auto __native_rhs) |
1775 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
1776 | { |
1777 | __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, |
1778 | __native_rhs); |
1779 | }); |
1780 | } |
1781 | |
1782 | // Optimization for the case where the RHS is a scalar. No need to broadcast |
1783 | // the scalar to a simd first. |
1784 | template <typename _Tp, typename... _As> |
1785 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
1786 | _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, |
1787 | const __type_identity_t<_Tp> __rhs) |
1788 | { |
1789 | __for_each( |
1790 | __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1791 | __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, |
1792 | __rhs); |
1793 | }); |
1794 | } |
1795 | |
1796 | // _S_masked_cassign {{{2 |
1797 | template <typename _Op, typename _Tp, typename... _As> |
1798 | static constexpr inline void |
1799 | _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, |
1800 | const _SimdTuple<_Tp, _As...>& __rhs, _Op __op) |
1801 | { |
1802 | __for_each(__lhs, __rhs, |
1803 | [&](auto __meta, auto& __native_lhs, auto __native_rhs) |
1804 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
1805 | { |
1806 | __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), |
1807 | __native_lhs, __native_rhs, __op); |
1808 | }); |
1809 | } |
1810 | |
1811 | // Optimization for the case where the RHS is a scalar. No need to broadcast |
1812 | // the scalar to a simd first. |
1813 | template <typename _Op, typename _Tp, typename... _As> |
1814 | static constexpr inline void |
1815 | _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, |
1816 | const _Tp& __rhs, _Op __op) |
1817 | { |
1818 | __for_each( |
1819 | __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1820 | __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), |
1821 | __native_lhs, __rhs, __op); |
1822 | }); |
1823 | } |
1824 | |
1825 | // _S_masked_unary {{{2 |
1826 | template <template <typename> class _Op, typename _Tp, typename... _As> |
1827 | static constexpr inline _SimdTuple<_Tp, _As...> |
1828 | _S_masked_unary(const _MaskMember __bits, |
1829 | const _SimdTuple<_Tp, _As...> __v) // TODO: const-ref __v? |
1830 | { |
1831 | return __v._M_apply_wrapped([&__bits](auto __meta, |
1832 | auto __native) constexpr { |
1833 | return __meta.template _S_masked_unary<_Op>(__meta._S_make_mask( |
1834 | __bits), |
1835 | __native); |
1836 | }); |
1837 | } |
1838 | |
1839 | // }}}2 |
1840 | }; |
1841 | |
1842 | // _MaskImplFixedSize {{{1 |
1843 | template <int _Np> |
1844 | struct _MaskImplFixedSize |
1845 | { |
1846 | static_assert( |
1847 | sizeof(_ULLong) * __CHAR_BIT__ >= _Np, |
1848 | "The fixed_size implementation relies on one _ULLong being able to store " |
1849 | "all boolean elements." ); // required in load & store |
1850 | |
1851 | // member types {{{ |
1852 | using _Abi = simd_abi::fixed_size<_Np>; |
1853 | |
1854 | using _MaskMember = _SanitizedBitMask<_Np>; |
1855 | |
1856 | template <typename _Tp> |
1857 | using _FirstAbi = typename __fixed_size_storage_t<_Tp, _Np>::_FirstAbi; |
1858 | |
1859 | template <typename _Tp> |
1860 | using _TypeTag = _Tp*; |
1861 | |
1862 | // }}} |
1863 | // _S_broadcast {{{ |
1864 | template <typename> |
1865 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1866 | _S_broadcast(bool __x) |
1867 | { return __x ? ~_MaskMember() : _MaskMember(); } |
1868 | |
1869 | // }}} |
1870 | // _S_load {{{ |
1871 | template <typename> |
1872 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1873 | _S_load(const bool* __mem) |
1874 | { |
1875 | if (__builtin_is_constant_evaluated()) |
1876 | { |
1877 | _MaskMember __r{}; |
1878 | for (size_t __i = 0; __i < _Np; ++__i) |
1879 | __r.set(__i, __mem[__i]); |
1880 | return __r; |
1881 | } |
1882 | using _Ip = __int_for_sizeof_t<bool>; |
1883 | // the following load uses element_aligned and relies on __mem already |
1884 | // carrying alignment information from when this load function was |
1885 | // called. |
1886 | const simd<_Ip, _Abi> __bools(reinterpret_cast<const __may_alias<_Ip>*>( |
1887 | __mem), |
1888 | element_aligned); |
1889 | return __data(__bools != 0); |
1890 | } |
1891 | |
1892 | // }}} |
1893 | // _S_to_bits {{{ |
1894 | template <bool _Sanitized> |
1895 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> |
1896 | _S_to_bits(_BitMask<_Np, _Sanitized> __x) |
1897 | { |
1898 | if constexpr (_Sanitized) |
1899 | return __x; |
1900 | else |
1901 | return __x._M_sanitized(); |
1902 | } |
1903 | |
1904 | // }}} |
1905 | // _S_convert {{{ |
1906 | template <typename _Tp, typename _Up, typename _UAbi> |
1907 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1908 | _S_convert(simd_mask<_Up, _UAbi> __x) |
1909 | { |
1910 | return _UAbi::_MaskImpl::_S_to_bits(__data(__x)) |
1911 | .template _M_extract<0, _Np>(); |
1912 | } |
1913 | |
1914 | // }}} |
1915 | // _S_from_bitmask {{{2 |
1916 | template <typename _Tp> |
1917 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1918 | _S_from_bitmask(_MaskMember __bits, _TypeTag<_Tp>) noexcept |
1919 | { return __bits; } |
1920 | |
1921 | // _S_load {{{2 |
1922 | static constexpr inline _MaskMember |
1923 | _S_load(const bool* __mem) noexcept |
1924 | { |
1925 | // TODO: _UChar is not necessarily the best type to use here. For smaller |
1926 | // _Np _UShort, _UInt, _ULLong, float, and double can be more efficient. |
1927 | _ULLong __r = 0; |
1928 | using _Vs = __fixed_size_storage_t<_UChar, _Np>; |
1929 | __for_each(_Vs{}, [&](auto __meta, auto) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1930 | __r |= __meta._S_mask_to_shifted_ullong( |
1931 | __meta._S_mask_impl._S_load(&__mem[__meta._S_offset], |
1932 | _SizeConstant<__meta._S_size()>())); |
1933 | }); |
1934 | return __r; |
1935 | } |
1936 | |
1937 | // _S_masked_load {{{2 |
1938 | static constexpr inline _MaskMember |
1939 | _S_masked_load(_MaskMember __merge, _MaskMember __mask, const bool* __mem) noexcept |
1940 | { |
1941 | _BitOps::_S_bit_iteration(__mask.to_ullong(), |
1942 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1943 | __merge.set(__i, __mem[__i]); |
1944 | }); |
1945 | return __merge; |
1946 | } |
1947 | |
1948 | // _S_store {{{2 |
1949 | static constexpr inline void |
1950 | _S_store(const _MaskMember __bitmask, bool* __mem) noexcept |
1951 | { |
1952 | if constexpr (_Np == 1) |
1953 | __mem[0] = __bitmask[0]; |
1954 | else |
1955 | _FirstAbi<_UChar>::_CommonImpl::_S_store_bool_array(__bitmask, __mem); |
1956 | } |
1957 | |
1958 | // _S_masked_store {{{2 |
1959 | static constexpr inline void |
1960 | _S_masked_store(const _MaskMember __v, bool* __mem, const _MaskMember __k) noexcept |
1961 | { |
1962 | _BitOps::_S_bit_iteration( |
1963 | __k, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; }); |
1964 | } |
1965 | |
1966 | // logical and bitwise operators {{{2 |
1967 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1968 | _S_logical_and(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1969 | { return __x & __y; } |
1970 | |
1971 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1972 | _S_logical_or(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1973 | { return __x | __y; } |
1974 | |
1975 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1976 | _S_bit_not(const _MaskMember& __x) noexcept |
1977 | { return ~__x; } |
1978 | |
1979 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1980 | _S_bit_and(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1981 | { return __x & __y; } |
1982 | |
1983 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1984 | _S_bit_or(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1985 | { return __x | __y; } |
1986 | |
1987 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
1988 | _S_bit_xor(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1989 | { return __x ^ __y; } |
1990 | |
1991 | // smart_reference access {{{2 |
1992 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
1993 | _S_set(_MaskMember& __k, int __i, bool __x) noexcept |
1994 | { __k.set(__i, __x); } |
1995 | |
1996 | // _S_masked_assign {{{2 |
1997 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
1998 | _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const _MaskMember __rhs) |
1999 | { __lhs = (__lhs & ~__k) | (__rhs & __k); } |
2000 | |
2001 | // Optimization for the case where the RHS is a scalar. |
2002 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
2003 | _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const bool __rhs) |
2004 | { |
2005 | if (__rhs) |
2006 | __lhs |= __k; |
2007 | else |
2008 | __lhs &= ~__k; |
2009 | } |
2010 | |
2011 | // }}}2 |
2012 | // _S_all_of {{{ |
2013 | template <typename _Tp> |
2014 | _GLIBCXX_SIMD_INTRINSIC static constexpr bool |
2015 | _S_all_of(simd_mask<_Tp, _Abi> __k) |
2016 | { return __data(__k).all(); } |
2017 | |
2018 | // }}} |
2019 | // _S_any_of {{{ |
2020 | template <typename _Tp> |
2021 | _GLIBCXX_SIMD_INTRINSIC static constexpr bool |
2022 | _S_any_of(simd_mask<_Tp, _Abi> __k) |
2023 | { return __data(__k).any(); } |
2024 | |
2025 | // }}} |
2026 | // _S_none_of {{{ |
2027 | template <typename _Tp> |
2028 | _GLIBCXX_SIMD_INTRINSIC static constexpr bool |
2029 | _S_none_of(simd_mask<_Tp, _Abi> __k) |
2030 | { return __data(__k).none(); } |
2031 | |
2032 | // }}} |
2033 | // _S_some_of {{{ |
2034 | template <typename _Tp> |
2035 | _GLIBCXX_SIMD_INTRINSIC static constexpr bool |
2036 | _S_some_of([[maybe_unused]] simd_mask<_Tp, _Abi> __k) |
2037 | { |
2038 | if constexpr (_Np == 1) |
2039 | return false; |
2040 | else |
2041 | return __data(__k).any() && !__data(__k).all(); |
2042 | } |
2043 | |
2044 | // }}} |
2045 | // _S_popcount {{{ |
2046 | template <typename _Tp> |
2047 | _GLIBCXX_SIMD_INTRINSIC static constexpr int |
2048 | _S_popcount(simd_mask<_Tp, _Abi> __k) |
2049 | { return __data(__k).count(); } |
2050 | |
2051 | // }}} |
2052 | // _S_find_first_set {{{ |
2053 | template <typename _Tp> |
2054 | _GLIBCXX_SIMD_INTRINSIC static constexpr int |
2055 | _S_find_first_set(simd_mask<_Tp, _Abi> __k) |
2056 | { return std::__countr_zero(__data(__k).to_ullong()); } |
2057 | |
2058 | // }}} |
2059 | // _S_find_last_set {{{ |
2060 | template <typename _Tp> |
2061 | _GLIBCXX_SIMD_INTRINSIC static constexpr int |
2062 | _S_find_last_set(simd_mask<_Tp, _Abi> __k) |
2063 | { return std::__bit_width(__data(__k).to_ullong()) - 1; } |
2064 | |
2065 | // }}} |
2066 | }; |
2067 | // }}}1 |
2068 | |
2069 | _GLIBCXX_SIMD_END_NAMESPACE |
2070 | #endif // __cplusplus >= 201703L |
2071 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ |
2072 | |
2073 | // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80 |
2074 | |