1 | // -*- C++ -*- |
2 | //===-- numeric_impl.h ----------------------------------------------------===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef _PSTL_NUMERIC_IMPL_H |
11 | #define _PSTL_NUMERIC_IMPL_H |
12 | |
13 | #include <iterator> |
14 | #include <type_traits> |
15 | #include <numeric> |
16 | |
17 | #include "parallel_backend.h" |
18 | #include "pstl_config.h" |
19 | #include "execution_impl.h" |
20 | #include "unseq_backend_simd.h" |
21 | #include "algorithm_fwd.h" |
22 | |
23 | namespace __pstl |
24 | { |
25 | namespace __internal |
26 | { |
27 | |
28 | //------------------------------------------------------------------------ |
29 | // transform_reduce (version with two binary functions, according to draft N4659) |
30 | //------------------------------------------------------------------------ |
31 | |
32 | template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> |
33 | _Tp |
34 | __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, |
35 | _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, |
36 | /*is_vector=*/std::false_type) noexcept |
37 | { |
38 | return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2); |
39 | } |
40 | |
41 | template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> |
42 | _Tp |
43 | __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, |
44 | _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, |
45 | /*is_vector=*/std::true_type) noexcept |
46 | { |
47 | typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; |
48 | return __unseq_backend::__simd_transform_reduce( |
49 | __last1 - __first1, __init, __binary_op1, |
50 | [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); |
51 | } |
52 | |
53 | template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, |
54 | class _BinaryOperation2, class _IsVector> |
55 | _Tp |
56 | __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, |
57 | _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, |
58 | _BinaryOperation2 __binary_op2, _IsVector __is_vector, |
59 | /*is_parallel=*/std::false_type) noexcept |
60 | { |
61 | return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector); |
62 | } |
63 | |
64 | template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp, |
65 | class _BinaryOperation1, class _BinaryOperation2, class _IsVector> |
66 | _Tp |
67 | __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, |
68 | _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, |
69 | _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/std::true_type) |
70 | { |
71 | return __internal::__except_handler([&]() { |
72 | return __par_backend::__parallel_transform_reduce( |
73 | std::forward<_ExecutionPolicy>(__exec), __first1, __last1, |
74 | [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { |
75 | return __binary_op2(*__i, *(__first2 + (__i - __first1))); |
76 | }, |
77 | __init, |
78 | __binary_op1, // Combine |
79 | [__first1, __first2, __binary_op1, __binary_op2, |
80 | __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp { |
81 | return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1, |
82 | __binary_op2, __is_vector); |
83 | }); |
84 | }); |
85 | } |
86 | |
87 | //------------------------------------------------------------------------ |
88 | // transform_reduce (version with unary and binary functions) |
89 | //------------------------------------------------------------------------ |
90 | |
91 | template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation> |
92 | _Tp |
93 | __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, |
94 | _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept |
95 | { |
96 | return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op); |
97 | } |
98 | |
99 | template <class _ForwardIterator, class _Tp, class _UnaryOperation, class _BinaryOperation> |
100 | _Tp |
101 | __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, |
102 | _UnaryOperation __unary_op, /*is_vector=*/std::true_type) noexcept |
103 | { |
104 | typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; |
105 | return __unseq_backend::__simd_transform_reduce( |
106 | __last - __first, __init, __binary_op, |
107 | [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); |
108 | } |
109 | |
110 | template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation, |
111 | class _IsVector> |
112 | _Tp |
113 | __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, |
114 | _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, |
115 | /*is_parallel=*/std::false_type) noexcept |
116 | { |
117 | return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector); |
118 | } |
119 | |
120 | template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation, |
121 | class _IsVector> |
122 | _Tp |
123 | __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, |
124 | _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, |
125 | /*is_parallel=*/std::true_type) |
126 | { |
127 | return __internal::__except_handler([&]() { |
128 | return __par_backend::__parallel_transform_reduce( |
129 | std::forward<_ExecutionPolicy>(__exec), __first, __last, |
130 | [__unary_op](_ForwardIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, |
131 | [__unary_op, __binary_op, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _Tp __init) { |
132 | return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector); |
133 | }); |
134 | }); |
135 | } |
136 | |
137 | //------------------------------------------------------------------------ |
138 | // transform_exclusive_scan |
139 | // |
140 | // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) |
141 | //------------------------------------------------------------------------ |
142 | |
143 | // Exclusive form |
144 | template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> |
145 | std::pair<_OutputIterator, _Tp> |
146 | __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, |
147 | _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
148 | /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept |
149 | { |
150 | for (; __first != __last; ++__first, ++__result) |
151 | { |
152 | *__result = __init; |
153 | _PSTL_PRAGMA_FORCEINLINE |
154 | __init = __binary_op(__init, __unary_op(*__first)); |
155 | } |
156 | return std::make_pair(__result, __init); |
157 | } |
158 | |
159 | // Inclusive form |
160 | template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> |
161 | std::pair<_OutputIterator, _Tp> |
162 | __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, |
163 | _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
164 | /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept |
165 | { |
166 | for (; __first != __last; ++__first, ++__result) |
167 | { |
168 | _PSTL_PRAGMA_FORCEINLINE |
169 | __init = __binary_op(__init, __unary_op(*__first)); |
170 | *__result = __init; |
171 | } |
172 | return std::make_pair(__result, __init); |
173 | } |
174 | |
175 | // type is arithmetic and binary operation is a user defined operation. |
176 | template <typename _Tp, typename _BinaryOperation> |
177 | using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value && |
178 | !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>; |
179 | |
180 | // [restriction] - T shall be DefaultConstructible. |
181 | // [violation] - default ctor of T shall set the identity value for binary_op. |
182 | template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, |
183 | class _Inclusive> |
184 | typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type |
185 | __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, |
186 | _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, |
187 | /*is_vector=*/std::true_type) noexcept |
188 | { |
189 | #if (_PSTL_UDS_PRESENT) |
190 | return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op, |
191 | _Inclusive()); |
192 | #else |
193 | // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value |
194 | return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), |
195 | /*is_vector=*/std::false_type()); |
196 | #endif |
197 | } |
198 | |
199 | template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, |
200 | class _Inclusive> |
201 | typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type |
202 | __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, |
203 | _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, |
204 | /*is_vector=*/std::true_type) noexcept |
205 | { |
206 | return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), |
207 | /*is_vector=*/std::false_type()); |
208 | } |
209 | |
210 | template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, |
211 | class _BinaryOperation, class _Inclusive, class _IsVector> |
212 | _OutputIterator |
213 | __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, |
214 | _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
215 | _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept |
216 | { |
217 | return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), |
218 | __is_vector) |
219 | .first; |
220 | } |
221 | |
222 | template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, |
223 | class _BinaryOperation, class _Inclusive, class _IsVector> |
224 | typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type |
225 | __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, |
226 | _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
227 | _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type) |
228 | { |
229 | typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; |
230 | |
231 | return __internal::__except_handler([&]() { |
232 | __par_backend::__parallel_transform_scan( |
233 | std::forward<_ExecutionPolicy>(__exec), __last - __first, |
234 | [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, |
235 | __binary_op, |
236 | [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { |
237 | // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan. |
238 | return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, |
239 | __unary_op, |
240 | /*__is_vector*/ std::false_type()); |
241 | }, |
242 | [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j, |
243 | _Tp __init) { |
244 | return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, |
245 | __init, __binary_op, _Inclusive(), __is_vector) |
246 | .second; |
247 | }); |
248 | return __result + (__last - __first); |
249 | }); |
250 | } |
251 | |
252 | template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, |
253 | class _BinaryOperation, class _Inclusive, class _IsVector> |
254 | typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type |
255 | __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, |
256 | _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
257 | _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type) |
258 | { |
259 | typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; |
260 | _DifferenceType __n = __last - __first; |
261 | |
262 | if (__n <= 0) |
263 | { |
264 | return __result; |
265 | } |
266 | return __internal::__except_handler([&]() { |
267 | __par_backend::__parallel_strict_scan( |
268 | std::forward<_ExecutionPolicy>(__exec), __n, __init, |
269 | [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { |
270 | return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, |
271 | __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector) |
272 | .second; |
273 | }, |
274 | __binary_op, |
275 | [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) { |
276 | return *(std::transform(__result + __i, __result + __i + __len, __result + __i, |
277 | [&__initial, &__binary_op](const _Tp& __x) { |
278 | _PSTL_PRAGMA_FORCEINLINE |
279 | return __binary_op(__initial, __x); |
280 | }) - |
281 | 1); |
282 | }, |
283 | [](_Tp) {}); |
284 | return __result + (__last - __first); |
285 | }); |
286 | } |
287 | |
288 | //------------------------------------------------------------------------ |
289 | // adjacent_difference |
290 | //------------------------------------------------------------------------ |
291 | |
292 | template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation> |
293 | _OutputIterator |
294 | __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first, |
295 | _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept |
296 | { |
297 | return std::adjacent_difference(__first, __last, __d_first, __op); |
298 | } |
299 | |
300 | template <class _ForwardIterator1, class _ForwardIterator2, class BinaryOperation> |
301 | _ForwardIterator2 |
302 | __brick_adjacent_difference(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first, |
303 | BinaryOperation __op, /*is_vector=*/std::true_type) noexcept |
304 | { |
305 | _PSTL_ASSERT(__first != __last); |
306 | |
307 | typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; |
308 | typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; |
309 | |
310 | auto __n = __last - __first; |
311 | *__d_first = *__first; |
312 | return __unseq_backend::__simd_walk_3( |
313 | __first + 1, __n - 1, __first, __d_first + 1, |
314 | [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); |
315 | } |
316 | |
317 | template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation, |
318 | class _IsVector> |
319 | _OutputIterator |
320 | __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, |
321 | _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector, |
322 | /*is_parallel*/ std::false_type) noexcept |
323 | { |
324 | return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector); |
325 | } |
326 | |
327 | template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation, |
328 | class _IsVector> |
329 | _ForwardIterator2 |
330 | __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, |
331 | _ForwardIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector, |
332 | /*is_parallel=*/std::true_type) |
333 | { |
334 | _PSTL_ASSERT(__first != __last); |
335 | typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; |
336 | typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; |
337 | |
338 | *__d_first = *__first; |
339 | __par_backend::__parallel_for( |
340 | std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, |
341 | [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) { |
342 | _ForwardIterator2 __d_b = __d_first + (__b - __first); |
343 | __internal::__brick_walk3( |
344 | __b, __e, __b + 1, __d_b + 1, |
345 | [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, |
346 | __is_vector); |
347 | }); |
348 | return __d_first + (__last - __first); |
349 | } |
350 | |
351 | } // namespace __internal |
352 | } // namespace __pstl |
353 | |
354 | #endif /* _PSTL_NUMERIC_IMPL_H */ |
355 | |