| 1 | /* |
| 2 | Copyright (c) 2005-2021 Intel Corporation |
| 3 | |
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | you may not use this file except in compliance with the License. |
| 6 | You may obtain a copy of the License at |
| 7 | |
| 8 | http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | Unless required by applicable law or agreed to in writing, software |
| 11 | distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | See the License for the specific language governing permissions and |
| 14 | limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef __TBB_parallel_reduce_H |
| 18 | #define __TBB_parallel_reduce_H |
| 19 | |
| 20 | #include <new> |
| 21 | #include "detail/_namespace_injection.h" |
| 22 | #include "detail/_task.h" |
| 23 | #include "detail/_aligned_space.h" |
| 24 | #include "detail/_small_object_pool.h" |
| 25 | #include "detail/_range_common.h" |
| 26 | |
| 27 | #include "task_group.h" // task_group_context |
| 28 | #include "partitioner.h" |
| 29 | #include "profiling.h" |
| 30 | |
| 31 | namespace tbb { |
| 32 | namespace detail { |
| 33 | #if __TBB_CPP20_CONCEPTS_PRESENT |
| 34 | inline namespace d0 { |
| 35 | |
| 36 | template <typename Body, typename Range> |
| 37 | concept parallel_reduce_body = splittable<Body> && |
| 38 | requires( Body& body, const Range& range, Body& rhs ) { |
| 39 | body(range); |
| 40 | body.join(rhs); |
| 41 | }; |
| 42 | |
| 43 | template <typename Function, typename Range, typename Value> |
| 44 | concept parallel_reduce_function = requires( const std::remove_reference_t<Function>& func, |
| 45 | const Range& range, |
| 46 | const Value& value ) { |
| 47 | { func(range, value) } -> std::convertible_to<Value>; |
| 48 | }; |
| 49 | |
| 50 | template <typename Combine, typename Value> |
| 51 | concept parallel_reduce_combine = requires( const std::remove_reference_t<Combine>& combine, |
| 52 | const Value& lhs, const Value& rhs ) { |
| 53 | { combine(lhs, rhs) } -> std::convertible_to<Value>; |
| 54 | }; |
| 55 | |
| 56 | } // namespace d0 |
| 57 | #endif // __TBB_CPP20_CONCEPTS_PRESENT |
| 58 | namespace d1 { |
| 59 | |
| 60 | //! Tree node type for parallel_reduce. |
| 61 | /** @ingroup algorithms */ |
| 62 | //TODO: consider folding tree via bypass execution(instead of manual folding) |
| 63 | // for better cancellation and critical tasks handling (performance measurements required). |
| 64 | template<typename Body> |
| 65 | struct reduction_tree_node : public tree_node { |
| 66 | tbb::detail::aligned_space<Body> zombie_space; |
| 67 | Body& left_body; |
| 68 | bool has_right_zombie{false}; |
| 69 | |
| 70 | reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : |
| 71 | tree_node{parent, ref_count, alloc}, |
| 72 | left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ |
| 73 | {} |
| 74 | |
| 75 | void join(task_group_context* context) { |
| 76 | if (has_right_zombie && !context->is_group_execution_cancelled()) |
| 77 | left_body.join(*zombie_space.begin()); |
| 78 | } |
| 79 | |
| 80 | ~reduction_tree_node() { |
| 81 | if( has_right_zombie ) zombie_space.begin()->~Body(); |
| 82 | } |
| 83 | }; |
| 84 | |
| 85 | //! Task type used to split the work of parallel_reduce. |
| 86 | /** @ingroup algorithms */ |
| 87 | template<typename Range, typename Body, typename Partitioner> |
| 88 | struct start_reduce : public task { |
| 89 | Range my_range; |
| 90 | Body* my_body; |
| 91 | node* my_parent; |
| 92 | |
| 93 | typename Partitioner::task_partition_type my_partition; |
| 94 | small_object_allocator my_allocator; |
| 95 | bool is_right_child; |
| 96 | |
| 97 | task* execute(execution_data&) override; |
| 98 | task* cancel(execution_data&) override; |
| 99 | void finalize(const execution_data&); |
| 100 | |
| 101 | using tree_node_type = reduction_tree_node<Body>; |
| 102 | |
| 103 | //! Constructor reduce root task. |
| 104 | start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : |
| 105 | my_range(range), |
| 106 | my_body(&body), |
| 107 | my_partition(partitioner), |
| 108 | my_allocator(alloc), |
| 109 | is_right_child(false) {} |
| 110 | //! Splitting constructor used to generate children. |
| 111 | /** parent_ becomes left child. Newly constructed object is right child. */ |
| 112 | start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : |
| 113 | my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), |
| 114 | my_body(parent_.my_body), |
| 115 | my_partition(parent_.my_partition, split_obj), |
| 116 | my_allocator(alloc), |
| 117 | is_right_child(true) |
| 118 | { |
| 119 | parent_.is_right_child = false; |
| 120 | } |
| 121 | //! Construct right child from the given range as response to the demand. |
| 122 | /** parent_ remains left child. Newly constructed object is right child. */ |
| 123 | start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : |
| 124 | my_range(r), |
| 125 | my_body(parent_.my_body), |
| 126 | my_partition(parent_.my_partition, split()), |
| 127 | my_allocator(alloc), |
| 128 | is_right_child(true) |
| 129 | { |
| 130 | my_partition.align_depth( d ); |
| 131 | parent_.is_right_child = false; |
| 132 | } |
| 133 | static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { |
| 134 | if ( !range.empty() ) { |
| 135 | wait_node wn; |
| 136 | small_object_allocator alloc{}; |
| 137 | auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); |
| 138 | reduce_task->my_parent = &wn; |
| 139 | execute_and_wait(*reduce_task, context, wn.m_wait, context); |
| 140 | } |
| 141 | } |
| 142 | static void run(const Range& range, Body& body, Partitioner& partitioner) { |
| 143 | // Bound context prevents exceptions from body to affect nesting or sibling algorithms, |
| 144 | // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. |
| 145 | task_group_context context(PARALLEL_REDUCE); |
| 146 | run(range, body, partitioner, context); |
| 147 | } |
| 148 | //! Run body for range, serves as callback for partitioner |
| 149 | void run_body( Range &r ) { |
| 150 | (*my_body)(r); |
| 151 | } |
| 152 | |
| 153 | //! spawn right task, serves as callback for partitioner |
| 154 | void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { |
| 155 | offer_work_impl(ed, *this, split_obj); |
| 156 | } |
| 157 | //! spawn right task, serves as callback for partitioner |
| 158 | void offer_work(const Range& r, depth_t d, execution_data& ed) { |
| 159 | offer_work_impl(ed, *this, r, d); |
| 160 | } |
| 161 | |
| 162 | private: |
| 163 | template <typename... Args> |
| 164 | void offer_work_impl(execution_data& ed, Args&&... args) { |
| 165 | small_object_allocator alloc{}; |
| 166 | // New right child |
| 167 | auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); |
| 168 | |
| 169 | // New root node as a continuation and ref count. Left and right child attach to the new parent. |
| 170 | right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); |
| 171 | |
| 172 | // Spawn the right sibling |
| 173 | right_child->spawn_self(ed); |
| 174 | } |
| 175 | |
| 176 | void spawn_self(execution_data& ed) { |
| 177 | my_partition.spawn_task(*this, *context(ed)); |
| 178 | } |
| 179 | }; |
| 180 | |
| 181 | //! fold the tree and deallocate the task |
| 182 | template<typename Range, typename Body, typename Partitioner> |
| 183 | void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { |
| 184 | // Get the current parent and wait object before an object destruction |
| 185 | node* parent = my_parent; |
| 186 | auto allocator = my_allocator; |
| 187 | // Task execution finished - destroy it |
| 188 | this->~start_reduce(); |
| 189 | // Unwind the tree decrementing the parent`s reference count |
| 190 | fold_tree<tree_node_type>(parent, ed); |
| 191 | allocator.deallocate(this, ed); |
| 192 | } |
| 193 | |
| 194 | //! Execute parallel_reduce task |
| 195 | template<typename Range, typename Body, typename Partitioner> |
| 196 | task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { |
| 197 | if (!is_same_affinity(ed)) { |
| 198 | my_partition.note_affinity(execution_slot(ed)); |
| 199 | } |
| 200 | my_partition.check_being_stolen(*this, ed); |
| 201 | |
| 202 | // The acquire barrier synchronizes the data pointed with my_body if the left |
| 203 | // task has already finished. |
| 204 | if( is_right_child && my_parent->m_ref_count.load(m: std::memory_order_acquire) == 2 ) { |
| 205 | tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); |
| 206 | my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); |
| 207 | parent_ptr->has_right_zombie = true; |
| 208 | } |
| 209 | __TBB_ASSERT(my_body != nullptr, "Incorrect body value" ); |
| 210 | |
| 211 | my_partition.execute(*this, my_range, ed); |
| 212 | |
| 213 | finalize(ed); |
| 214 | return nullptr; |
| 215 | } |
| 216 | |
| 217 | //! Cancel parallel_reduce task |
| 218 | template<typename Range, typename Body, typename Partitioner> |
| 219 | task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { |
| 220 | finalize(ed); |
| 221 | return nullptr; |
| 222 | } |
| 223 | |
| 224 | //! Tree node type for parallel_deterministic_reduce. |
| 225 | /** @ingroup algorithms */ |
| 226 | template<typename Body> |
| 227 | struct deterministic_reduction_tree_node : public tree_node { |
| 228 | Body right_body; |
| 229 | Body& left_body; |
| 230 | |
| 231 | deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : |
| 232 | tree_node{parent, ref_count, alloc}, |
| 233 | right_body{input_left_body, detail::split()}, |
| 234 | left_body(input_left_body) |
| 235 | {} |
| 236 | |
| 237 | void join(task_group_context* context) { |
| 238 | if (!context->is_group_execution_cancelled()) |
| 239 | left_body.join(right_body); |
| 240 | } |
| 241 | }; |
| 242 | |
| 243 | //! Task type used to split the work of parallel_deterministic_reduce. |
| 244 | /** @ingroup algorithms */ |
| 245 | template<typename Range, typename Body, typename Partitioner> |
| 246 | struct start_deterministic_reduce : public task { |
| 247 | Range my_range; |
| 248 | Body& my_body; |
| 249 | node* my_parent; |
| 250 | |
| 251 | typename Partitioner::task_partition_type my_partition; |
| 252 | small_object_allocator my_allocator; |
| 253 | |
| 254 | task* execute(execution_data&) override; |
| 255 | task* cancel(execution_data&) override; |
| 256 | void finalize(const execution_data&); |
| 257 | |
| 258 | using tree_node_type = deterministic_reduction_tree_node<Body>; |
| 259 | |
| 260 | //! Constructor deterministic_reduce root task. |
| 261 | start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : |
| 262 | my_range(range), |
| 263 | my_body(body), |
| 264 | my_partition(partitioner), |
| 265 | my_allocator(alloc) {} |
| 266 | //! Splitting constructor used to generate children. |
| 267 | /** parent_ becomes left child. Newly constructed object is right child. */ |
| 268 | start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, |
| 269 | small_object_allocator& alloc ) : |
| 270 | my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), |
| 271 | my_body(body), |
| 272 | my_partition(parent_.my_partition, split_obj), |
| 273 | my_allocator(alloc) {} |
| 274 | static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { |
| 275 | if ( !range.empty() ) { |
| 276 | wait_node wn; |
| 277 | small_object_allocator alloc{}; |
| 278 | auto deterministic_reduce_task = |
| 279 | alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); |
| 280 | deterministic_reduce_task->my_parent = &wn; |
| 281 | execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); |
| 282 | } |
| 283 | } |
| 284 | static void run(const Range& range, Body& body, Partitioner& partitioner) { |
| 285 | // Bound context prevents exceptions from body to affect nesting or sibling algorithms, |
| 286 | // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce |
| 287 | // in the try-block. |
| 288 | task_group_context context(PARALLEL_REDUCE); |
| 289 | run(range, body, partitioner, context); |
| 290 | } |
| 291 | //! Run body for range, serves as callback for partitioner |
| 292 | void run_body( Range &r ) { |
| 293 | my_body( r ); |
| 294 | } |
| 295 | //! Spawn right task, serves as callback for partitioner |
| 296 | void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { |
| 297 | offer_work_impl(ed, *this, split_obj); |
| 298 | } |
| 299 | private: |
| 300 | template <typename... Args> |
| 301 | void offer_work_impl(execution_data& ed, Args&&... args) { |
| 302 | small_object_allocator alloc{}; |
| 303 | // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. |
| 304 | auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); |
| 305 | |
| 306 | // New right child |
| 307 | auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); |
| 308 | |
| 309 | right_child->my_parent = my_parent = new_tree_node; |
| 310 | |
| 311 | // Spawn the right sibling |
| 312 | right_child->spawn_self(ed); |
| 313 | } |
| 314 | |
| 315 | void spawn_self(execution_data& ed) { |
| 316 | my_partition.spawn_task(*this, *context(ed)); |
| 317 | } |
| 318 | }; |
| 319 | |
| 320 | //! Fold the tree and deallocate the task |
| 321 | template<typename Range, typename Body, typename Partitioner> |
| 322 | void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { |
| 323 | // Get the current parent and wait object before an object destruction |
| 324 | node* parent = my_parent; |
| 325 | |
| 326 | auto allocator = my_allocator; |
| 327 | // Task execution finished - destroy it |
| 328 | this->~start_deterministic_reduce(); |
| 329 | // Unwind the tree decrementing the parent`s reference count |
| 330 | fold_tree<tree_node_type>(parent, ed); |
| 331 | allocator.deallocate(this, ed); |
| 332 | } |
| 333 | |
| 334 | //! Execute parallel_deterministic_reduce task |
| 335 | template<typename Range, typename Body, typename Partitioner> |
| 336 | task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { |
| 337 | if (!is_same_affinity(ed)) { |
| 338 | my_partition.note_affinity(execution_slot(ed)); |
| 339 | } |
| 340 | my_partition.check_being_stolen(*this, ed); |
| 341 | |
| 342 | my_partition.execute(*this, my_range, ed); |
| 343 | |
| 344 | finalize(ed); |
| 345 | return NULL; |
| 346 | } |
| 347 | |
| 348 | //! Cancel parallel_deterministic_reduce task |
| 349 | template<typename Range, typename Body, typename Partitioner> |
| 350 | task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { |
| 351 | finalize(ed); |
| 352 | return NULL; |
| 353 | } |
| 354 | |
| 355 | |
| 356 | //! Auxiliary class for parallel_reduce; for internal use only. |
| 357 | /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" |
| 358 | using given \ref parallel_reduce_lambda_req "anonymous function objects". |
| 359 | **/ |
| 360 | /** @ingroup algorithms */ |
| 361 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 362 | class lambda_reduce_body { |
| 363 | //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced |
| 364 | // (might require some performance measurements) |
| 365 | |
| 366 | const Value& my_identity_element; |
| 367 | const RealBody& my_real_body; |
| 368 | const Reduction& my_reduction; |
| 369 | Value my_value; |
| 370 | lambda_reduce_body& operator= ( const lambda_reduce_body& other ); |
| 371 | public: |
| 372 | lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) |
| 373 | : my_identity_element(identity) |
| 374 | , my_real_body(body) |
| 375 | , my_reduction(reduction) |
| 376 | , my_value(identity) |
| 377 | { } |
| 378 | lambda_reduce_body( const lambda_reduce_body& other ) = default; |
| 379 | lambda_reduce_body( lambda_reduce_body& other, tbb::split ) |
| 380 | : my_identity_element(other.my_identity_element) |
| 381 | , my_real_body(other.my_real_body) |
| 382 | , my_reduction(other.my_reduction) |
| 383 | , my_value(other.my_identity_element) |
| 384 | { } |
| 385 | void operator()(Range& range) { |
| 386 | my_value = my_real_body(range, const_cast<const Value&>(my_value)); |
| 387 | } |
| 388 | void join( lambda_reduce_body& rhs ) { |
| 389 | my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); |
| 390 | } |
| 391 | Value result() const { |
| 392 | return my_value; |
| 393 | } |
| 394 | }; |
| 395 | |
| 396 | |
| 397 | // Requirements on Range concept are documented in blocked_range.h |
| 398 | |
| 399 | /** \page parallel_reduce_body_req Requirements on parallel_reduce body |
| 400 | Class \c Body implementing the concept of parallel_reduce body must define: |
| 401 | - \code Body::Body( Body&, split ); \endcode Splitting constructor. |
| 402 | Must be able to run concurrently with operator() and method \c join |
| 403 | - \code Body::~Body(); \endcode Destructor |
| 404 | - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r |
| 405 | and accumulating the result |
| 406 | - \code void Body::join( Body& b ); \endcode Join results. |
| 407 | The result in \c b should be merged into the result of \c this |
| 408 | **/ |
| 409 | |
| 410 | /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) |
| 411 | TO BE DOCUMENTED |
| 412 | **/ |
| 413 | |
| 414 | /** \name parallel_reduce |
| 415 | See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ |
| 416 | //@{ |
| 417 | |
| 418 | //! Parallel iteration with reduction and default partitioner. |
| 419 | /** @ingroup algorithms **/ |
| 420 | template<typename Range, typename Body> |
| 421 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 422 | void parallel_reduce( const Range& range, Body& body ) { |
| 423 | start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); |
| 424 | } |
| 425 | |
| 426 | //! Parallel iteration with reduction and simple_partitioner |
| 427 | /** @ingroup algorithms **/ |
| 428 | template<typename Range, typename Body> |
| 429 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 430 | void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { |
| 431 | start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); |
| 432 | } |
| 433 | |
| 434 | //! Parallel iteration with reduction and auto_partitioner |
| 435 | /** @ingroup algorithms **/ |
| 436 | template<typename Range, typename Body> |
| 437 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 438 | void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { |
| 439 | start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); |
| 440 | } |
| 441 | |
| 442 | //! Parallel iteration with reduction and static_partitioner |
| 443 | /** @ingroup algorithms **/ |
| 444 | template<typename Range, typename Body> |
| 445 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 446 | void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { |
| 447 | start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); |
| 448 | } |
| 449 | |
| 450 | //! Parallel iteration with reduction and affinity_partitioner |
| 451 | /** @ingroup algorithms **/ |
| 452 | template<typename Range, typename Body> |
| 453 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 454 | void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { |
| 455 | start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); |
| 456 | } |
| 457 | |
| 458 | //! Parallel iteration with reduction, default partitioner and user-supplied context. |
| 459 | /** @ingroup algorithms **/ |
| 460 | template<typename Range, typename Body> |
| 461 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 462 | void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { |
| 463 | start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); |
| 464 | } |
| 465 | |
| 466 | //! Parallel iteration with reduction, simple partitioner and user-supplied context. |
| 467 | /** @ingroup algorithms **/ |
| 468 | template<typename Range, typename Body> |
| 469 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 470 | void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { |
| 471 | start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); |
| 472 | } |
| 473 | |
| 474 | //! Parallel iteration with reduction, auto_partitioner and user-supplied context |
| 475 | /** @ingroup algorithms **/ |
| 476 | template<typename Range, typename Body> |
| 477 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 478 | void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { |
| 479 | start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); |
| 480 | } |
| 481 | |
| 482 | //! Parallel iteration with reduction, static_partitioner and user-supplied context |
| 483 | /** @ingroup algorithms **/ |
| 484 | template<typename Range, typename Body> |
| 485 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 486 | void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { |
| 487 | start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); |
| 488 | } |
| 489 | |
| 490 | //! Parallel iteration with reduction, affinity_partitioner and user-supplied context |
| 491 | /** @ingroup algorithms **/ |
| 492 | template<typename Range, typename Body> |
| 493 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 494 | void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { |
| 495 | start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); |
| 496 | } |
| 497 | /** parallel_reduce overloads that work with anonymous function objects |
| 498 | (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ |
| 499 | |
| 500 | //! Parallel iteration with reduction and default partitioner. |
| 501 | /** @ingroup algorithms **/ |
| 502 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 503 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 504 | parallel_reduce_combine<Reduction, Value>) |
| 505 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { |
| 506 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 507 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> |
| 508 | ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); |
| 509 | return body.result(); |
| 510 | } |
| 511 | |
| 512 | //! Parallel iteration with reduction and simple_partitioner. |
| 513 | /** @ingroup algorithms **/ |
| 514 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 515 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 516 | parallel_reduce_combine<Reduction, Value>) |
| 517 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 518 | const simple_partitioner& partitioner ) { |
| 519 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 520 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> |
| 521 | ::run(range, body, partitioner ); |
| 522 | return body.result(); |
| 523 | } |
| 524 | |
| 525 | //! Parallel iteration with reduction and auto_partitioner |
| 526 | /** @ingroup algorithms **/ |
| 527 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 528 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 529 | parallel_reduce_combine<Reduction, Value>) |
| 530 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 531 | const auto_partitioner& partitioner ) { |
| 532 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 533 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> |
| 534 | ::run( range, body, partitioner ); |
| 535 | return body.result(); |
| 536 | } |
| 537 | |
| 538 | //! Parallel iteration with reduction and static_partitioner |
| 539 | /** @ingroup algorithms **/ |
| 540 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 541 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 542 | parallel_reduce_combine<Reduction, Value>) |
| 543 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 544 | const static_partitioner& partitioner ) { |
| 545 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 546 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> |
| 547 | ::run( range, body, partitioner ); |
| 548 | return body.result(); |
| 549 | } |
| 550 | |
| 551 | //! Parallel iteration with reduction and affinity_partitioner |
| 552 | /** @ingroup algorithms **/ |
| 553 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 554 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 555 | parallel_reduce_combine<Reduction, Value>) |
| 556 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 557 | affinity_partitioner& partitioner ) { |
| 558 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 559 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> |
| 560 | ::run( range, body, partitioner ); |
| 561 | return body.result(); |
| 562 | } |
| 563 | |
| 564 | //! Parallel iteration with reduction, default partitioner and user-supplied context. |
| 565 | /** @ingroup algorithms **/ |
| 566 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 567 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 568 | parallel_reduce_combine<Reduction, Value>) |
| 569 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 570 | task_group_context& context ) { |
| 571 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 572 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> |
| 573 | ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); |
| 574 | return body.result(); |
| 575 | } |
| 576 | |
| 577 | //! Parallel iteration with reduction, simple partitioner and user-supplied context. |
| 578 | /** @ingroup algorithms **/ |
| 579 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 580 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 581 | parallel_reduce_combine<Reduction, Value>) |
| 582 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 583 | const simple_partitioner& partitioner, task_group_context& context ) { |
| 584 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 585 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> |
| 586 | ::run( range, body, partitioner, context ); |
| 587 | return body.result(); |
| 588 | } |
| 589 | |
| 590 | //! Parallel iteration with reduction, auto_partitioner and user-supplied context |
| 591 | /** @ingroup algorithms **/ |
| 592 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 593 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 594 | parallel_reduce_combine<Reduction, Value>) |
| 595 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 596 | const auto_partitioner& partitioner, task_group_context& context ) { |
| 597 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 598 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> |
| 599 | ::run( range, body, partitioner, context ); |
| 600 | return body.result(); |
| 601 | } |
| 602 | |
| 603 | //! Parallel iteration with reduction, static_partitioner and user-supplied context |
| 604 | /** @ingroup algorithms **/ |
| 605 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 606 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 607 | parallel_reduce_combine<Reduction, Value>) |
| 608 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 609 | const static_partitioner& partitioner, task_group_context& context ) { |
| 610 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 611 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> |
| 612 | ::run( range, body, partitioner, context ); |
| 613 | return body.result(); |
| 614 | } |
| 615 | |
| 616 | //! Parallel iteration with reduction, affinity_partitioner and user-supplied context |
| 617 | /** @ingroup algorithms **/ |
| 618 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 619 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 620 | parallel_reduce_combine<Reduction, Value>) |
| 621 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 622 | affinity_partitioner& partitioner, task_group_context& context ) { |
| 623 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 624 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> |
| 625 | ::run( range, body, partitioner, context ); |
| 626 | return body.result(); |
| 627 | } |
| 628 | |
| 629 | //! Parallel iteration with deterministic reduction and default simple partitioner. |
| 630 | /** @ingroup algorithms **/ |
| 631 | template<typename Range, typename Body> |
| 632 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 633 | void parallel_deterministic_reduce( const Range& range, Body& body ) { |
| 634 | start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); |
| 635 | } |
| 636 | |
| 637 | //! Parallel iteration with deterministic reduction and simple partitioner. |
| 638 | /** @ingroup algorithms **/ |
| 639 | template<typename Range, typename Body> |
| 640 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 641 | void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { |
| 642 | start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); |
| 643 | } |
| 644 | |
| 645 | //! Parallel iteration with deterministic reduction and static partitioner. |
| 646 | /** @ingroup algorithms **/ |
| 647 | template<typename Range, typename Body> |
| 648 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 649 | void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { |
| 650 | start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); |
| 651 | } |
| 652 | |
| 653 | //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. |
| 654 | /** @ingroup algorithms **/ |
| 655 | template<typename Range, typename Body> |
| 656 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 657 | void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { |
| 658 | start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); |
| 659 | } |
| 660 | |
| 661 | //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. |
| 662 | /** @ingroup algorithms **/ |
| 663 | template<typename Range, typename Body> |
| 664 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 665 | void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { |
| 666 | start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); |
| 667 | } |
| 668 | |
| 669 | //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. |
| 670 | /** @ingroup algorithms **/ |
| 671 | template<typename Range, typename Body> |
| 672 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
| 673 | void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { |
| 674 | start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); |
| 675 | } |
| 676 | |
| 677 | /** parallel_reduce overloads that work with anonymous function objects |
| 678 | (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ |
| 679 | |
| 680 | //! Parallel iteration with deterministic reduction and default simple partitioner. |
| 681 | // TODO: consider making static_partitioner the default |
| 682 | /** @ingroup algorithms **/ |
| 683 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 684 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 685 | parallel_reduce_combine<Reduction, Value>) |
| 686 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { |
| 687 | return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); |
| 688 | } |
| 689 | |
| 690 | //! Parallel iteration with deterministic reduction and simple partitioner. |
| 691 | /** @ingroup algorithms **/ |
| 692 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 693 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 694 | parallel_reduce_combine<Reduction, Value>) |
| 695 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { |
| 696 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
| 697 | start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> |
| 698 | ::run(range, body, partitioner); |
| 699 | return body.result(); |
| 700 | } |
| 701 | |
| 702 | //! Parallel iteration with deterministic reduction and static partitioner. |
| 703 | /** @ingroup algorithms **/ |
| 704 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 705 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 706 | parallel_reduce_combine<Reduction, Value>) |
| 707 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { |
| 708 | lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
| 709 | start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> |
| 710 | ::run(range, body, partitioner); |
| 711 | return body.result(); |
| 712 | } |
| 713 | |
| 714 | //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. |
| 715 | /** @ingroup algorithms **/ |
| 716 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 717 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 718 | parallel_reduce_combine<Reduction, Value>) |
| 719 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 720 | task_group_context& context ) { |
| 721 | return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); |
| 722 | } |
| 723 | |
| 724 | //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. |
| 725 | /** @ingroup algorithms **/ |
| 726 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 727 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 728 | parallel_reduce_combine<Reduction, Value>) |
| 729 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 730 | const simple_partitioner& partitioner, task_group_context& context ) { |
| 731 | lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
| 732 | start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> |
| 733 | ::run(range, body, partitioner, context); |
| 734 | return body.result(); |
| 735 | } |
| 736 | |
| 737 | //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. |
| 738 | /** @ingroup algorithms **/ |
| 739 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
| 740 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
| 741 | parallel_reduce_combine<Reduction, Value>) |
| 742 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
| 743 | const static_partitioner& partitioner, task_group_context& context ) { |
| 744 | lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
| 745 | start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> |
| 746 | ::run(range, body, partitioner, context); |
| 747 | return body.result(); |
| 748 | } |
| 749 | //@} |
| 750 | |
| 751 | } // namespace d1 |
| 752 | } // namespace detail |
| 753 | |
| 754 | inline namespace v1 { |
| 755 | using detail::d1::parallel_reduce; |
| 756 | using detail::d1::parallel_deterministic_reduce; |
| 757 | // Split types |
| 758 | using detail::split; |
| 759 | using detail::proportional_split; |
| 760 | } // namespace v1 |
| 761 | |
| 762 | } // namespace tbb |
| 763 | #endif /* __TBB_parallel_reduce_H */ |
| 764 | |