| 1 | // -*- C++ -*- |
| 2 | //===-- numeric_impl.h ----------------------------------------------------===// |
| 3 | // |
| 4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | // See https://llvm.org/LICENSE.txt for license information. |
| 6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef _PSTL_NUMERIC_IMPL_H |
| 11 | #define _PSTL_NUMERIC_IMPL_H |
| 12 | |
| 13 | #include <iterator> |
| 14 | #include <type_traits> |
| 15 | #include <numeric> |
| 16 | |
| 17 | #include "parallel_backend.h" |
| 18 | #include "pstl_config.h" |
| 19 | #include "execution_impl.h" |
| 20 | #include "unseq_backend_simd.h" |
| 21 | #include "algorithm_fwd.h" |
| 22 | |
| 23 | namespace __pstl |
| 24 | { |
| 25 | namespace __internal |
| 26 | { |
| 27 | |
| 28 | //------------------------------------------------------------------------ |
| 29 | // transform_reduce (version with two binary functions, according to draft N4659) |
| 30 | //------------------------------------------------------------------------ |
| 31 | |
| 32 | template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> |
| 33 | _Tp |
| 34 | __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, |
| 35 | _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, |
| 36 | /*is_vector=*/std::false_type) noexcept |
| 37 | { |
| 38 | return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2); |
| 39 | } |
| 40 | |
| 41 | template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> |
| 42 | _Tp |
| 43 | __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, |
| 44 | _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, |
| 45 | /*is_vector=*/std::true_type) noexcept |
| 46 | { |
| 47 | typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; |
| 48 | return __unseq_backend::__simd_transform_reduce( |
| 49 | __last1 - __first1, __init, __binary_op1, |
| 50 | [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); |
| 51 | } |
| 52 | |
| 53 | template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, |
| 54 | class _BinaryOperation2, class _IsVector> |
| 55 | _Tp |
| 56 | __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, |
| 57 | _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, |
| 58 | _BinaryOperation2 __binary_op2, _IsVector __is_vector, |
| 59 | /*is_parallel=*/std::false_type) noexcept |
| 60 | { |
| 61 | return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector); |
| 62 | } |
| 63 | |
| 64 | template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp, |
| 65 | class _BinaryOperation1, class _BinaryOperation2, class _IsVector> |
| 66 | _Tp |
| 67 | __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, |
| 68 | _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, |
| 69 | _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/std::true_type) |
| 70 | { |
| 71 | return __internal::__except_handler([&]() { |
| 72 | return __par_backend::__parallel_transform_reduce( |
| 73 | std::forward<_ExecutionPolicy>(__exec), __first1, __last1, |
| 74 | [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { |
| 75 | return __binary_op2(*__i, *(__first2 + (__i - __first1))); |
| 76 | }, |
| 77 | __init, |
| 78 | __binary_op1, // Combine |
| 79 | [__first1, __first2, __binary_op1, __binary_op2, |
| 80 | __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp { |
| 81 | return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1, |
| 82 | __binary_op2, __is_vector); |
| 83 | }); |
| 84 | }); |
| 85 | } |
| 86 | |
| 87 | //------------------------------------------------------------------------ |
| 88 | // transform_reduce (version with unary and binary functions) |
| 89 | //------------------------------------------------------------------------ |
| 90 | |
| 91 | template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation> |
| 92 | _Tp |
| 93 | __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, |
| 94 | _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept |
| 95 | { |
| 96 | return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op); |
| 97 | } |
| 98 | |
| 99 | template <class _ForwardIterator, class _Tp, class _UnaryOperation, class _BinaryOperation> |
| 100 | _Tp |
| 101 | __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, |
| 102 | _UnaryOperation __unary_op, /*is_vector=*/std::true_type) noexcept |
| 103 | { |
| 104 | typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; |
| 105 | return __unseq_backend::__simd_transform_reduce( |
| 106 | __last - __first, __init, __binary_op, |
| 107 | [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); |
| 108 | } |
| 109 | |
| 110 | template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation, |
| 111 | class _IsVector> |
| 112 | _Tp |
| 113 | __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, |
| 114 | _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, |
| 115 | /*is_parallel=*/std::false_type) noexcept |
| 116 | { |
| 117 | return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector); |
| 118 | } |
| 119 | |
| 120 | template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation, |
| 121 | class _IsVector> |
| 122 | _Tp |
| 123 | __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, |
| 124 | _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, |
| 125 | /*is_parallel=*/std::true_type) |
| 126 | { |
| 127 | return __internal::__except_handler([&]() { |
| 128 | return __par_backend::__parallel_transform_reduce( |
| 129 | std::forward<_ExecutionPolicy>(__exec), __first, __last, |
| 130 | [__unary_op](_ForwardIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, |
| 131 | [__unary_op, __binary_op, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _Tp __init) { |
| 132 | return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector); |
| 133 | }); |
| 134 | }); |
| 135 | } |
| 136 | |
| 137 | //------------------------------------------------------------------------ |
| 138 | // transform_exclusive_scan |
| 139 | // |
| 140 | // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) |
| 141 | //------------------------------------------------------------------------ |
| 142 | |
| 143 | // Exclusive form |
| 144 | template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> |
| 145 | std::pair<_OutputIterator, _Tp> |
| 146 | __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, |
| 147 | _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
| 148 | /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept |
| 149 | { |
| 150 | for (; __first != __last; ++__first, ++__result) |
| 151 | { |
| 152 | *__result = __init; |
| 153 | _PSTL_PRAGMA_FORCEINLINE |
| 154 | __init = __binary_op(__init, __unary_op(*__first)); |
| 155 | } |
| 156 | return std::make_pair(__result, __init); |
| 157 | } |
| 158 | |
| 159 | // Inclusive form |
| 160 | template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> |
| 161 | std::pair<_OutputIterator, _Tp> |
| 162 | __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, |
| 163 | _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
| 164 | /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept |
| 165 | { |
| 166 | for (; __first != __last; ++__first, ++__result) |
| 167 | { |
| 168 | _PSTL_PRAGMA_FORCEINLINE |
| 169 | __init = __binary_op(__init, __unary_op(*__first)); |
| 170 | *__result = __init; |
| 171 | } |
| 172 | return std::make_pair(__result, __init); |
| 173 | } |
| 174 | |
| 175 | // type is arithmetic and binary operation is a user defined operation. |
| 176 | template <typename _Tp, typename _BinaryOperation> |
| 177 | using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value && |
| 178 | !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>; |
| 179 | |
| 180 | // [restriction] - T shall be DefaultConstructible. |
| 181 | // [violation] - default ctor of T shall set the identity value for binary_op. |
| 182 | template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, |
| 183 | class _Inclusive> |
| 184 | typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type |
| 185 | __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, |
| 186 | _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, |
| 187 | /*is_vector=*/std::true_type) noexcept |
| 188 | { |
| 189 | #if (_PSTL_UDS_PRESENT) |
| 190 | return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op, |
| 191 | _Inclusive()); |
| 192 | #else |
| 193 | // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value |
| 194 | return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), |
| 195 | /*is_vector=*/std::false_type()); |
| 196 | #endif |
| 197 | } |
| 198 | |
| 199 | template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, |
| 200 | class _Inclusive> |
| 201 | typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type |
| 202 | __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, |
| 203 | _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, |
| 204 | /*is_vector=*/std::true_type) noexcept |
| 205 | { |
| 206 | return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), |
| 207 | /*is_vector=*/std::false_type()); |
| 208 | } |
| 209 | |
| 210 | template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, |
| 211 | class _BinaryOperation, class _Inclusive, class _IsVector> |
| 212 | _OutputIterator |
| 213 | __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, |
| 214 | _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
| 215 | _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept |
| 216 | { |
| 217 | return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), |
| 218 | __is_vector) |
| 219 | .first; |
| 220 | } |
| 221 | |
| 222 | template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, |
| 223 | class _BinaryOperation, class _Inclusive, class _IsVector> |
| 224 | typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type |
| 225 | __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, |
| 226 | _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
| 227 | _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type) |
| 228 | { |
| 229 | typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; |
| 230 | |
| 231 | return __internal::__except_handler([&]() { |
| 232 | __par_backend::__parallel_transform_scan( |
| 233 | std::forward<_ExecutionPolicy>(__exec), __last - __first, |
| 234 | [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, |
| 235 | __binary_op, |
| 236 | [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { |
| 237 | // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan. |
| 238 | return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, |
| 239 | __unary_op, |
| 240 | /*__is_vector*/ std::false_type()); |
| 241 | }, |
| 242 | [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j, |
| 243 | _Tp __init) { |
| 244 | return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, |
| 245 | __init, __binary_op, _Inclusive(), __is_vector) |
| 246 | .second; |
| 247 | }); |
| 248 | return __result + (__last - __first); |
| 249 | }); |
| 250 | } |
| 251 | |
| 252 | template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, |
| 253 | class _BinaryOperation, class _Inclusive, class _IsVector> |
| 254 | typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type |
| 255 | __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, |
| 256 | _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, |
| 257 | _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type) |
| 258 | { |
| 259 | typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; |
| 260 | _DifferenceType __n = __last - __first; |
| 261 | |
| 262 | if (__n <= 0) |
| 263 | { |
| 264 | return __result; |
| 265 | } |
| 266 | return __internal::__except_handler([&]() { |
| 267 | __par_backend::__parallel_strict_scan( |
| 268 | std::forward<_ExecutionPolicy>(__exec), __n, __init, |
| 269 | [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { |
| 270 | return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, |
| 271 | __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector) |
| 272 | .second; |
| 273 | }, |
| 274 | __binary_op, |
| 275 | [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) { |
| 276 | return *(std::transform(__result + __i, __result + __i + __len, __result + __i, |
| 277 | [&__initial, &__binary_op](const _Tp& __x) { |
| 278 | _PSTL_PRAGMA_FORCEINLINE |
| 279 | return __binary_op(__initial, __x); |
| 280 | }) - |
| 281 | 1); |
| 282 | }, |
| 283 | [](_Tp) {}); |
| 284 | return __result + (__last - __first); |
| 285 | }); |
| 286 | } |
| 287 | |
| 288 | //------------------------------------------------------------------------ |
| 289 | // adjacent_difference |
| 290 | //------------------------------------------------------------------------ |
| 291 | |
| 292 | template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation> |
| 293 | _OutputIterator |
| 294 | __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first, |
| 295 | _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept |
| 296 | { |
| 297 | return std::adjacent_difference(__first, __last, __d_first, __op); |
| 298 | } |
| 299 | |
| 300 | template <class _ForwardIterator1, class _ForwardIterator2, class BinaryOperation> |
| 301 | _ForwardIterator2 |
| 302 | __brick_adjacent_difference(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first, |
| 303 | BinaryOperation __op, /*is_vector=*/std::true_type) noexcept |
| 304 | { |
| 305 | _PSTL_ASSERT(__first != __last); |
| 306 | |
| 307 | typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; |
| 308 | typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; |
| 309 | |
| 310 | auto __n = __last - __first; |
| 311 | *__d_first = *__first; |
| 312 | return __unseq_backend::__simd_walk_3( |
| 313 | __first + 1, __n - 1, __first, __d_first + 1, |
| 314 | [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); |
| 315 | } |
| 316 | |
| 317 | template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation, |
| 318 | class _IsVector> |
| 319 | _OutputIterator |
| 320 | __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, |
| 321 | _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector, |
| 322 | /*is_parallel*/ std::false_type) noexcept |
| 323 | { |
| 324 | return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector); |
| 325 | } |
| 326 | |
| 327 | template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation, |
| 328 | class _IsVector> |
| 329 | _ForwardIterator2 |
| 330 | __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, |
| 331 | _ForwardIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector, |
| 332 | /*is_parallel=*/std::true_type) |
| 333 | { |
| 334 | _PSTL_ASSERT(__first != __last); |
| 335 | typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; |
| 336 | typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; |
| 337 | |
| 338 | *__d_first = *__first; |
| 339 | __par_backend::__parallel_for( |
| 340 | std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, |
| 341 | [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) { |
| 342 | _ForwardIterator2 __d_b = __d_first + (__b - __first); |
| 343 | __internal::__brick_walk3( |
| 344 | __b, __e, __b + 1, __d_b + 1, |
| 345 | [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, |
| 346 | __is_vector); |
| 347 | }); |
| 348 | return __d_first + (__last - __first); |
| 349 | } |
| 350 | |
| 351 | } // namespace __internal |
| 352 | } // namespace __pstl |
| 353 | |
| 354 | #endif /* _PSTL_NUMERIC_IMPL_H */ |
| 355 | |