Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===----------------------------------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H |
10 | #define _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H |
11 | |
12 | #include <__assert> |
13 | #include <__config> |
14 | #include <__iterator/concepts.h> |
15 | #include <__iterator/iterator_traits.h> |
16 | #include <__numeric/transform_reduce.h> |
17 | #include <__pstl/backend_fwd.h> |
18 | #include <__pstl/cpu_algos/cpu_traits.h> |
19 | #include <__type_traits/desugars_to.h> |
20 | #include <__type_traits/is_arithmetic.h> |
21 | #include <__type_traits/is_execution_policy.h> |
22 | #include <__utility/move.h> |
23 | #include <optional> |
24 | |
25 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
26 | # pragma GCC system_header |
27 | #endif |
28 | |
29 | _LIBCPP_PUSH_MACROS |
30 | #include <__undef_macros> |
31 | |
32 | #if _LIBCPP_STD_VER >= 17 |
33 | |
34 | _LIBCPP_BEGIN_NAMESPACE_STD |
35 | namespace __pstl { |
36 | |
37 | template <typename _Backend, |
38 | typename _DifferenceType, |
39 | typename _Tp, |
40 | typename _BinaryOperation, |
41 | typename _UnaryOperation, |
42 | typename _UnaryResult = invoke_result_t<_UnaryOperation, _DifferenceType>, |
43 | __enable_if_t<__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> && |
44 | is_arithmetic_v<_UnaryResult>, |
45 | int> = 0> |
46 | _LIBCPP_HIDE_FROM_ABI _Tp |
47 | __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept { |
48 | _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init) |
49 | for (_DifferenceType __i = 0; __i < __n; ++__i) |
50 | __init += __f(__i); |
51 | return __init; |
52 | } |
53 | |
54 | template <typename _Backend, |
55 | typename _Size, |
56 | typename _Tp, |
57 | typename _BinaryOperation, |
58 | typename _UnaryOperation, |
59 | typename _UnaryResult = invoke_result_t<_UnaryOperation, _Size>, |
60 | __enable_if_t<!(__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> && |
61 | is_arithmetic_v<_UnaryResult>), |
62 | int> = 0> |
63 | _LIBCPP_HIDE_FROM_ABI _Tp |
64 | __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept { |
65 | constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size; |
66 | const _Size __block_size = __lane_size / sizeof(_Tp); |
67 | if (__n > 2 * __block_size && __block_size > 1) { |
68 | alignas(__lane_size) char __lane_buffer[__lane_size]; |
69 | _Tp* __lane = reinterpret_cast<_Tp*>(__lane_buffer); |
70 | |
71 | // initializer |
72 | _PSTL_PRAGMA_SIMD |
73 | for (_Size __i = 0; __i < __block_size; ++__i) { |
74 | ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i))); |
75 | } |
76 | // main loop |
77 | _Size __i = 2 * __block_size; |
78 | const _Size __last_iteration = __block_size * (__n / __block_size); |
79 | for (; __i < __last_iteration; __i += __block_size) { |
80 | _PSTL_PRAGMA_SIMD |
81 | for (_Size __j = 0; __j < __block_size; ++__j) { |
82 | __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__i + __j)); |
83 | } |
84 | } |
85 | // remainder |
86 | _PSTL_PRAGMA_SIMD |
87 | for (_Size __j = 0; __j < __n - __last_iteration; ++__j) { |
88 | __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__last_iteration + __j)); |
89 | } |
90 | // combiner |
91 | for (_Size __j = 0; __j < __block_size; ++__j) { |
92 | __init = __binary_op(std::move(__init), std::move(__lane[__j])); |
93 | } |
94 | // destroyer |
95 | _PSTL_PRAGMA_SIMD |
96 | for (_Size __j = 0; __j < __block_size; ++__j) { |
97 | __lane[__j].~_Tp(); |
98 | } |
99 | } else { |
100 | for (_Size __i = 0; __i < __n; ++__i) { |
101 | __init = __binary_op(std::move(__init), __f(__i)); |
102 | } |
103 | } |
104 | return __init; |
105 | } |
106 | |
107 | template <class _Backend, class _RawExecutionPolicy> |
108 | struct __cpu_parallel_transform_reduce_binary { |
109 | template <class _Policy, |
110 | class _ForwardIterator1, |
111 | class _ForwardIterator2, |
112 | class _Tp, |
113 | class _BinaryOperation1, |
114 | class _BinaryOperation2> |
115 | _LIBCPP_HIDE_FROM_ABI optional<_Tp> operator()( |
116 | _Policy&& __policy, |
117 | _ForwardIterator1 __first1, |
118 | _ForwardIterator1 __last1, |
119 | _ForwardIterator2 __first2, |
120 | _Tp __init, |
121 | _BinaryOperation1 __reduce, |
122 | _BinaryOperation2 __transform) const noexcept { |
123 | if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> && |
124 | __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && |
125 | __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { |
126 | return __cpu_traits<_Backend>::__transform_reduce( |
127 | __first1, |
128 | std::move(__last1), |
129 | [__first1, __first2, __transform](_ForwardIterator1 __iter) { |
130 | return __transform(*__iter, *(__first2 + (__iter - __first1))); |
131 | }, |
132 | std::move(__init), |
133 | std::move(__reduce), |
134 | [&__policy, __first1, __first2, __reduce, __transform]( |
135 | _ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) { |
136 | using _TransformReduceBinaryUnseq = |
137 | __pstl::__transform_reduce_binary<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>; |
138 | return *_TransformReduceBinaryUnseq()( |
139 | std::__remove_parallel_policy(__policy), |
140 | __brick_first, |
141 | std::move(__brick_last), |
142 | __first2 + (__brick_first - __first1), |
143 | std::move(__brick_init), |
144 | std::move(__reduce), |
145 | std::move(__transform)); |
146 | }); |
147 | } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && |
148 | __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && |
149 | __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { |
150 | return __pstl::__simd_transform_reduce<_Backend>( |
151 | __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) { |
152 | return __transform(__first1[__i], __first2[__i]); |
153 | }); |
154 | } else { |
155 | return std::transform_reduce( |
156 | std::move(__first1), |
157 | std::move(__last1), |
158 | std::move(__first2), |
159 | std::move(__init), |
160 | std::move(__reduce), |
161 | std::move(__transform)); |
162 | } |
163 | } |
164 | }; |
165 | |
166 | template <class _Backend, class _RawExecutionPolicy> |
167 | struct __cpu_parallel_transform_reduce { |
168 | template <class _Policy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation> |
169 | _LIBCPP_HIDE_FROM_ABI optional<_Tp> |
170 | operator()(_Policy&& __policy, |
171 | _ForwardIterator __first, |
172 | _ForwardIterator __last, |
173 | _Tp __init, |
174 | _BinaryOperation __reduce, |
175 | _UnaryOperation __transform) const noexcept { |
176 | if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> && |
177 | __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { |
178 | return __cpu_traits<_Backend>::__transform_reduce( |
179 | std::move(__first), |
180 | std::move(__last), |
181 | [__transform](_ForwardIterator __iter) { return __transform(*__iter); }, |
182 | std::move(__init), |
183 | __reduce, |
184 | [&__policy, __transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) { |
185 | using _TransformReduceUnseq = |
186 | __pstl::__transform_reduce<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>; |
187 | auto __res = _TransformReduceUnseq()( |
188 | std::__remove_parallel_policy(__policy), |
189 | std::move(__brick_first), |
190 | std::move(__brick_last), |
191 | std::move(__brick_init), |
192 | std::move(__reduce), |
193 | std::move(__transform)); |
194 | _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); |
195 | return *std::move(__res); |
196 | }); |
197 | } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && |
198 | __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { |
199 | return __pstl::__simd_transform_reduce<_Backend>( |
200 | __last - __first, |
201 | std::move(__init), |
202 | std::move(__reduce), |
203 | [=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); }); |
204 | } else { |
205 | return std::transform_reduce( |
206 | std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform)); |
207 | } |
208 | } |
209 | }; |
210 | |
211 | } // namespace __pstl |
212 | _LIBCPP_END_NAMESPACE_STD |
213 | |
214 | #endif // _LIBCPP_STD_VER >= 17 |
215 | |
216 | _LIBCPP_POP_MACROS |
217 | |
218 | #endif // _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H |
219 |
Warning: This file is not a C or C++ file. It does not have highlighting.