Warning: This file is not a C or C++ file. It does not have highlighting.
| 1 | //===----------------------------------------------------------------------===// |
|---|---|
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H |
| 10 | #define _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H |
| 11 | |
| 12 | #include <__assert> |
| 13 | #include <__config> |
| 14 | #include <__iterator/concepts.h> |
| 15 | #include <__iterator/iterator_traits.h> |
| 16 | #include <__numeric/transform_reduce.h> |
| 17 | #include <__pstl/backend_fwd.h> |
| 18 | #include <__pstl/cpu_algos/cpu_traits.h> |
| 19 | #include <__type_traits/desugars_to.h> |
| 20 | #include <__type_traits/is_arithmetic.h> |
| 21 | #include <__type_traits/is_execution_policy.h> |
| 22 | #include <__utility/move.h> |
| 23 | #include <optional> |
| 24 | |
| 25 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 26 | # pragma GCC system_header |
| 27 | #endif |
| 28 | |
| 29 | _LIBCPP_PUSH_MACROS |
| 30 | #include <__undef_macros> |
| 31 | |
| 32 | #if _LIBCPP_STD_VER >= 17 |
| 33 | |
| 34 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 35 | namespace __pstl { |
| 36 | |
| 37 | template <typename _Backend, |
| 38 | typename _DifferenceType, |
| 39 | typename _Tp, |
| 40 | typename _BinaryOperation, |
| 41 | typename _UnaryOperation, |
| 42 | typename _UnaryResult = invoke_result_t<_UnaryOperation, _DifferenceType>, |
| 43 | __enable_if_t<__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> && |
| 44 | is_arithmetic_v<_UnaryResult>, |
| 45 | int> = 0> |
| 46 | _LIBCPP_HIDE_FROM_ABI _Tp |
| 47 | __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept { |
| 48 | _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init) |
| 49 | for (_DifferenceType __i = 0; __i < __n; ++__i) |
| 50 | __init += __f(__i); |
| 51 | return __init; |
| 52 | } |
| 53 | |
| 54 | template <typename _Backend, |
| 55 | typename _Size, |
| 56 | typename _Tp, |
| 57 | typename _BinaryOperation, |
| 58 | typename _UnaryOperation, |
| 59 | typename _UnaryResult = invoke_result_t<_UnaryOperation, _Size>, |
| 60 | __enable_if_t<!(__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> && |
| 61 | is_arithmetic_v<_UnaryResult>), |
| 62 | int> = 0> |
| 63 | _LIBCPP_HIDE_FROM_ABI _Tp |
| 64 | __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept { |
| 65 | constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size; |
| 66 | const _Size __block_size = __lane_size / sizeof(_Tp); |
| 67 | if (__n > 2 * __block_size && __block_size > 1) { |
| 68 | alignas(__lane_size) char __lane_buffer[__lane_size]; |
| 69 | _Tp* __lane = reinterpret_cast<_Tp*>(__lane_buffer); |
| 70 | |
| 71 | // initializer |
| 72 | _PSTL_PRAGMA_SIMD |
| 73 | for (_Size __i = 0; __i < __block_size; ++__i) { |
| 74 | ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i))); |
| 75 | } |
| 76 | // main loop |
| 77 | _Size __i = 2 * __block_size; |
| 78 | const _Size __last_iteration = __block_size * (__n / __block_size); |
| 79 | for (; __i < __last_iteration; __i += __block_size) { |
| 80 | _PSTL_PRAGMA_SIMD |
| 81 | for (_Size __j = 0; __j < __block_size; ++__j) { |
| 82 | __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__i + __j)); |
| 83 | } |
| 84 | } |
| 85 | // remainder |
| 86 | _PSTL_PRAGMA_SIMD |
| 87 | for (_Size __j = 0; __j < __n - __last_iteration; ++__j) { |
| 88 | __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__last_iteration + __j)); |
| 89 | } |
| 90 | // combiner |
| 91 | for (_Size __j = 0; __j < __block_size; ++__j) { |
| 92 | __init = __binary_op(std::move(__init), std::move(__lane[__j])); |
| 93 | } |
| 94 | // destroyer |
| 95 | _PSTL_PRAGMA_SIMD |
| 96 | for (_Size __j = 0; __j < __block_size; ++__j) { |
| 97 | __lane[__j].~_Tp(); |
| 98 | } |
| 99 | } else { |
| 100 | for (_Size __i = 0; __i < __n; ++__i) { |
| 101 | __init = __binary_op(std::move(__init), __f(__i)); |
| 102 | } |
| 103 | } |
| 104 | return __init; |
| 105 | } |
| 106 | |
| 107 | template <class _Backend, class _RawExecutionPolicy> |
| 108 | struct __cpu_parallel_transform_reduce_binary { |
| 109 | template <class _Policy, |
| 110 | class _ForwardIterator1, |
| 111 | class _ForwardIterator2, |
| 112 | class _Tp, |
| 113 | class _BinaryOperation1, |
| 114 | class _BinaryOperation2> |
| 115 | _LIBCPP_HIDE_FROM_ABI optional<_Tp> operator()( |
| 116 | _Policy&& __policy, |
| 117 | _ForwardIterator1 __first1, |
| 118 | _ForwardIterator1 __last1, |
| 119 | _ForwardIterator2 __first2, |
| 120 | _Tp __init, |
| 121 | _BinaryOperation1 __reduce, |
| 122 | _BinaryOperation2 __transform) const noexcept { |
| 123 | if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> && |
| 124 | __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && |
| 125 | __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { |
| 126 | return __cpu_traits<_Backend>::__transform_reduce( |
| 127 | __first1, |
| 128 | std::move(__last1), |
| 129 | [__first1, __first2, __transform](_ForwardIterator1 __iter) { |
| 130 | return __transform(*__iter, *(__first2 + (__iter - __first1))); |
| 131 | }, |
| 132 | std::move(__init), |
| 133 | std::move(__reduce), |
| 134 | [&__policy, __first1, __first2, __reduce, __transform]( |
| 135 | _ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) { |
| 136 | using _TransformReduceBinaryUnseq = |
| 137 | __pstl::__transform_reduce_binary<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>; |
| 138 | return *_TransformReduceBinaryUnseq()( |
| 139 | std::__remove_parallel_policy(__policy), |
| 140 | __brick_first, |
| 141 | std::move(__brick_last), |
| 142 | __first2 + (__brick_first - __first1), |
| 143 | std::move(__brick_init), |
| 144 | std::move(__reduce), |
| 145 | std::move(__transform)); |
| 146 | }); |
| 147 | } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && |
| 148 | __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && |
| 149 | __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { |
| 150 | return __pstl::__simd_transform_reduce<_Backend>( |
| 151 | __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) { |
| 152 | return __transform(__first1[__i], __first2[__i]); |
| 153 | }); |
| 154 | } else { |
| 155 | return std::transform_reduce( |
| 156 | std::move(__first1), |
| 157 | std::move(__last1), |
| 158 | std::move(__first2), |
| 159 | std::move(__init), |
| 160 | std::move(__reduce), |
| 161 | std::move(__transform)); |
| 162 | } |
| 163 | } |
| 164 | }; |
| 165 | |
| 166 | template <class _Backend, class _RawExecutionPolicy> |
| 167 | struct __cpu_parallel_transform_reduce { |
| 168 | template <class _Policy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation> |
| 169 | _LIBCPP_HIDE_FROM_ABI optional<_Tp> |
| 170 | operator()(_Policy&& __policy, |
| 171 | _ForwardIterator __first, |
| 172 | _ForwardIterator __last, |
| 173 | _Tp __init, |
| 174 | _BinaryOperation __reduce, |
| 175 | _UnaryOperation __transform) const noexcept { |
| 176 | if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> && |
| 177 | __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { |
| 178 | return __cpu_traits<_Backend>::__transform_reduce( |
| 179 | std::move(__first), |
| 180 | std::move(__last), |
| 181 | [__transform](_ForwardIterator __iter) { return __transform(*__iter); }, |
| 182 | std::move(__init), |
| 183 | __reduce, |
| 184 | [&__policy, __transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) { |
| 185 | using _TransformReduceUnseq = |
| 186 | __pstl::__transform_reduce<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>; |
| 187 | auto __res = _TransformReduceUnseq()( |
| 188 | std::__remove_parallel_policy(__policy), |
| 189 | std::move(__brick_first), |
| 190 | std::move(__brick_last), |
| 191 | std::move(__brick_init), |
| 192 | std::move(__reduce), |
| 193 | std::move(__transform)); |
| 194 | _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); |
| 195 | return *std::move(__res); |
| 196 | }); |
| 197 | } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && |
| 198 | __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { |
| 199 | return __pstl::__simd_transform_reduce<_Backend>( |
| 200 | __last - __first, |
| 201 | std::move(__init), |
| 202 | std::move(__reduce), |
| 203 | [=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); }); |
| 204 | } else { |
| 205 | return std::transform_reduce( |
| 206 | std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform)); |
| 207 | } |
| 208 | } |
| 209 | }; |
| 210 | |
| 211 | } // namespace __pstl |
| 212 | _LIBCPP_END_NAMESPACE_STD |
| 213 | |
| 214 | #endif // _LIBCPP_STD_VER >= 17 |
| 215 | |
| 216 | _LIBCPP_POP_MACROS |
| 217 | |
| 218 | #endif // _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H |
| 219 |
Warning: This file is not a C or C++ file. It does not have highlighting.
