1/*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_parallel_reduce_H
18#define __TBB_parallel_reduce_H
19
20#include <new>
21#include "detail/_namespace_injection.h"
22#include "detail/_task.h"
23#include "detail/_aligned_space.h"
24#include "detail/_small_object_pool.h"
25#include "detail/_range_common.h"
26
27#include "task_group.h" // task_group_context
28#include "partitioner.h"
29#include "profiling.h"
30
31namespace tbb {
32namespace detail {
33#if __TBB_CPP20_CONCEPTS_PRESENT
34inline namespace d0 {
35
36template <typename Body, typename Range>
37concept parallel_reduce_body = splittable<Body> &&
38 requires( Body& body, const Range& range, Body& rhs ) {
39 body(range);
40 body.join(rhs);
41 };
42
43template <typename Function, typename Range, typename Value>
44concept parallel_reduce_function = requires( const std::remove_reference_t<Function>& func,
45 const Range& range,
46 const Value& value ) {
47 { func(range, value) } -> std::convertible_to<Value>;
48};
49
50template <typename Combine, typename Value>
51concept parallel_reduce_combine = requires( const std::remove_reference_t<Combine>& combine,
52 const Value& lhs, const Value& rhs ) {
53 { combine(lhs, rhs) } -> std::convertible_to<Value>;
54};
55
56} // namespace d0
57#endif // __TBB_CPP20_CONCEPTS_PRESENT
58namespace d1 {
59
60//! Tree node type for parallel_reduce.
61/** @ingroup algorithms */
62//TODO: consider folding tree via bypass execution(instead of manual folding)
63// for better cancellation and critical tasks handling (performance measurements required).
64template<typename Body>
65struct reduction_tree_node : public tree_node {
66 tbb::detail::aligned_space<Body> zombie_space;
67 Body& left_body;
68 bool has_right_zombie{false};
69
70 reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
71 tree_node{parent, ref_count, alloc},
72 left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */
73 {}
74
75 void join(task_group_context* context) {
76 if (has_right_zombie && !context->is_group_execution_cancelled())
77 left_body.join(*zombie_space.begin());
78 }
79
80 ~reduction_tree_node() {
81 if( has_right_zombie ) zombie_space.begin()->~Body();
82 }
83};
84
85//! Task type used to split the work of parallel_reduce.
86/** @ingroup algorithms */
87template<typename Range, typename Body, typename Partitioner>
88struct start_reduce : public task {
89 Range my_range;
90 Body* my_body;
91 node* my_parent;
92
93 typename Partitioner::task_partition_type my_partition;
94 small_object_allocator my_allocator;
95 bool is_right_child;
96
97 task* execute(execution_data&) override;
98 task* cancel(execution_data&) override;
99 void finalize(const execution_data&);
100
101 using tree_node_type = reduction_tree_node<Body>;
102
103 //! Constructor reduce root task.
104 start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
105 my_range(range),
106 my_body(&body),
107 my_partition(partitioner),
108 my_allocator(alloc),
109 is_right_child(false) {}
110 //! Splitting constructor used to generate children.
111 /** parent_ becomes left child. Newly constructed object is right child. */
112 start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
113 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
114 my_body(parent_.my_body),
115 my_partition(parent_.my_partition, split_obj),
116 my_allocator(alloc),
117 is_right_child(true)
118 {
119 parent_.is_right_child = false;
120 }
121 //! Construct right child from the given range as response to the demand.
122 /** parent_ remains left child. Newly constructed object is right child. */
123 start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
124 my_range(r),
125 my_body(parent_.my_body),
126 my_partition(parent_.my_partition, split()),
127 my_allocator(alloc),
128 is_right_child(true)
129 {
130 my_partition.align_depth( d );
131 parent_.is_right_child = false;
132 }
133 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
134 if ( !range.empty() ) {
135 wait_node wn;
136 small_object_allocator alloc{};
137 auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc);
138 reduce_task->my_parent = &wn;
139 execute_and_wait(*reduce_task, context, wn.m_wait, context);
140 }
141 }
142 static void run(const Range& range, Body& body, Partitioner& partitioner) {
143 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
144 // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block.
145 task_group_context context(PARALLEL_REDUCE);
146 run(range, body, partitioner, context);
147 }
148 //! Run body for range, serves as callback for partitioner
149 void run_body( Range &r ) {
150 (*my_body)(r);
151 }
152
153 //! spawn right task, serves as callback for partitioner
154 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
155 offer_work_impl(ed, *this, split_obj);
156 }
157 //! spawn right task, serves as callback for partitioner
158 void offer_work(const Range& r, depth_t d, execution_data& ed) {
159 offer_work_impl(ed, *this, r, d);
160 }
161
162private:
163 template <typename... Args>
164 void offer_work_impl(execution_data& ed, Args&&... args) {
165 small_object_allocator alloc{};
166 // New right child
167 auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc);
168
169 // New root node as a continuation and ref count. Left and right child attach to the new parent.
170 right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc);
171
172 // Spawn the right sibling
173 right_child->spawn_self(ed);
174 }
175
176 void spawn_self(execution_data& ed) {
177 my_partition.spawn_task(*this, *context(ed));
178 }
179};
180
181//! fold the tree and deallocate the task
182template<typename Range, typename Body, typename Partitioner>
183void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
184 // Get the current parent and wait object before an object destruction
185 node* parent = my_parent;
186 auto allocator = my_allocator;
187 // Task execution finished - destroy it
188 this->~start_reduce();
189 // Unwind the tree decrementing the parent`s reference count
190 fold_tree<tree_node_type>(parent, ed);
191 allocator.deallocate(this, ed);
192}
193
194//! Execute parallel_reduce task
195template<typename Range, typename Body, typename Partitioner>
196task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
197 if (!is_same_affinity(ed)) {
198 my_partition.note_affinity(execution_slot(ed));
199 }
200 my_partition.check_being_stolen(*this, ed);
201
202 // The acquire barrier synchronizes the data pointed with my_body if the left
203 // task has already finished.
204 if( is_right_child && my_parent->m_ref_count.load(m: std::memory_order_acquire) == 2 ) {
205 tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent);
206 my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split());
207 parent_ptr->has_right_zombie = true;
208 }
209 __TBB_ASSERT(my_body != nullptr, "Incorrect body value");
210
211 my_partition.execute(*this, my_range, ed);
212
213 finalize(ed);
214 return nullptr;
215}
216
217//! Cancel parallel_reduce task
218template<typename Range, typename Body, typename Partitioner>
219task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
220 finalize(ed);
221 return nullptr;
222}
223
224//! Tree node type for parallel_deterministic_reduce.
225/** @ingroup algorithms */
226template<typename Body>
227struct deterministic_reduction_tree_node : public tree_node {
228 Body right_body;
229 Body& left_body;
230
231 deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
232 tree_node{parent, ref_count, alloc},
233 right_body{input_left_body, detail::split()},
234 left_body(input_left_body)
235 {}
236
237 void join(task_group_context* context) {
238 if (!context->is_group_execution_cancelled())
239 left_body.join(right_body);
240 }
241};
242
243//! Task type used to split the work of parallel_deterministic_reduce.
244/** @ingroup algorithms */
245template<typename Range, typename Body, typename Partitioner>
246struct start_deterministic_reduce : public task {
247 Range my_range;
248 Body& my_body;
249 node* my_parent;
250
251 typename Partitioner::task_partition_type my_partition;
252 small_object_allocator my_allocator;
253
254 task* execute(execution_data&) override;
255 task* cancel(execution_data&) override;
256 void finalize(const execution_data&);
257
258 using tree_node_type = deterministic_reduction_tree_node<Body>;
259
260 //! Constructor deterministic_reduce root task.
261 start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) :
262 my_range(range),
263 my_body(body),
264 my_partition(partitioner),
265 my_allocator(alloc) {}
266 //! Splitting constructor used to generate children.
267 /** parent_ becomes left child. Newly constructed object is right child. */
268 start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body,
269 small_object_allocator& alloc ) :
270 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
271 my_body(body),
272 my_partition(parent_.my_partition, split_obj),
273 my_allocator(alloc) {}
274 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
275 if ( !range.empty() ) {
276 wait_node wn;
277 small_object_allocator alloc{};
278 auto deterministic_reduce_task =
279 alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc);
280 deterministic_reduce_task->my_parent = &wn;
281 execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context);
282 }
283 }
284 static void run(const Range& range, Body& body, Partitioner& partitioner) {
285 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
286 // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce
287 // in the try-block.
288 task_group_context context(PARALLEL_REDUCE);
289 run(range, body, partitioner, context);
290 }
291 //! Run body for range, serves as callback for partitioner
292 void run_body( Range &r ) {
293 my_body( r );
294 }
295 //! Spawn right task, serves as callback for partitioner
296 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
297 offer_work_impl(ed, *this, split_obj);
298 }
299private:
300 template <typename... Args>
301 void offer_work_impl(execution_data& ed, Args&&... args) {
302 small_object_allocator alloc{};
303 // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body.
304 auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc);
305
306 // New right child
307 auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc);
308
309 right_child->my_parent = my_parent = new_tree_node;
310
311 // Spawn the right sibling
312 right_child->spawn_self(ed);
313 }
314
315 void spawn_self(execution_data& ed) {
316 my_partition.spawn_task(*this, *context(ed));
317 }
318};
319
320//! Fold the tree and deallocate the task
321template<typename Range, typename Body, typename Partitioner>
322void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
323 // Get the current parent and wait object before an object destruction
324 node* parent = my_parent;
325
326 auto allocator = my_allocator;
327 // Task execution finished - destroy it
328 this->~start_deterministic_reduce();
329 // Unwind the tree decrementing the parent`s reference count
330 fold_tree<tree_node_type>(parent, ed);
331 allocator.deallocate(this, ed);
332}
333
334//! Execute parallel_deterministic_reduce task
335template<typename Range, typename Body, typename Partitioner>
336task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
337 if (!is_same_affinity(ed)) {
338 my_partition.note_affinity(execution_slot(ed));
339 }
340 my_partition.check_being_stolen(*this, ed);
341
342 my_partition.execute(*this, my_range, ed);
343
344 finalize(ed);
345 return NULL;
346}
347
348//! Cancel parallel_deterministic_reduce task
349template<typename Range, typename Body, typename Partitioner>
350task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
351 finalize(ed);
352 return NULL;
353}
354
355
356//! Auxiliary class for parallel_reduce; for internal use only.
357/** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
358 using given \ref parallel_reduce_lambda_req "anonymous function objects".
359 **/
360/** @ingroup algorithms */
361template<typename Range, typename Value, typename RealBody, typename Reduction>
362class lambda_reduce_body {
363//TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced
364// (might require some performance measurements)
365
366 const Value& my_identity_element;
367 const RealBody& my_real_body;
368 const Reduction& my_reduction;
369 Value my_value;
370 lambda_reduce_body& operator= ( const lambda_reduce_body& other );
371public:
372 lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
373 : my_identity_element(identity)
374 , my_real_body(body)
375 , my_reduction(reduction)
376 , my_value(identity)
377 { }
378 lambda_reduce_body( const lambda_reduce_body& other ) = default;
379 lambda_reduce_body( lambda_reduce_body& other, tbb::split )
380 : my_identity_element(other.my_identity_element)
381 , my_real_body(other.my_real_body)
382 , my_reduction(other.my_reduction)
383 , my_value(other.my_identity_element)
384 { }
385 void operator()(Range& range) {
386 my_value = my_real_body(range, const_cast<const Value&>(my_value));
387 }
388 void join( lambda_reduce_body& rhs ) {
389 my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
390 }
391 Value result() const {
392 return my_value;
393 }
394};
395
396
397// Requirements on Range concept are documented in blocked_range.h
398
399/** \page parallel_reduce_body_req Requirements on parallel_reduce body
400 Class \c Body implementing the concept of parallel_reduce body must define:
401 - \code Body::Body( Body&, split ); \endcode Splitting constructor.
402 Must be able to run concurrently with operator() and method \c join
403 - \code Body::~Body(); \endcode Destructor
404 - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
405 and accumulating the result
406 - \code void Body::join( Body& b ); \endcode Join results.
407 The result in \c b should be merged into the result of \c this
408**/
409
410/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
411 TO BE DOCUMENTED
412**/
413
414/** \name parallel_reduce
415 See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
416//@{
417
418//! Parallel iteration with reduction and default partitioner.
419/** @ingroup algorithms **/
420template<typename Range, typename Body>
421 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
422void parallel_reduce( const Range& range, Body& body ) {
423 start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
424}
425
426//! Parallel iteration with reduction and simple_partitioner
427/** @ingroup algorithms **/
428template<typename Range, typename Body>
429 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
430void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
431 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
432}
433
434//! Parallel iteration with reduction and auto_partitioner
435/** @ingroup algorithms **/
436template<typename Range, typename Body>
437 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
438void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
439 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
440}
441
442//! Parallel iteration with reduction and static_partitioner
443/** @ingroup algorithms **/
444template<typename Range, typename Body>
445 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
446void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
447 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
448}
449
450//! Parallel iteration with reduction and affinity_partitioner
451/** @ingroup algorithms **/
452template<typename Range, typename Body>
453 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
454void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
455 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
456}
457
458//! Parallel iteration with reduction, default partitioner and user-supplied context.
459/** @ingroup algorithms **/
460template<typename Range, typename Body>
461 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
462void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
463 start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
464}
465
466//! Parallel iteration with reduction, simple partitioner and user-supplied context.
467/** @ingroup algorithms **/
468template<typename Range, typename Body>
469 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
470void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
471 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
472}
473
474//! Parallel iteration with reduction, auto_partitioner and user-supplied context
475/** @ingroup algorithms **/
476template<typename Range, typename Body>
477 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
478void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
479 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
480}
481
482//! Parallel iteration with reduction, static_partitioner and user-supplied context
483/** @ingroup algorithms **/
484template<typename Range, typename Body>
485 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
486void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
487 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
488}
489
490//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
491/** @ingroup algorithms **/
492template<typename Range, typename Body>
493 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
494void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
495 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
496}
497/** parallel_reduce overloads that work with anonymous function objects
498 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
499
500//! Parallel iteration with reduction and default partitioner.
501/** @ingroup algorithms **/
502template<typename Range, typename Value, typename RealBody, typename Reduction>
503 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
504 parallel_reduce_combine<Reduction, Value>)
505Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
506 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
507 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
508 ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
509 return body.result();
510}
511
512//! Parallel iteration with reduction and simple_partitioner.
513/** @ingroup algorithms **/
514template<typename Range, typename Value, typename RealBody, typename Reduction>
515 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
516 parallel_reduce_combine<Reduction, Value>)
517Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
518 const simple_partitioner& partitioner ) {
519 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
520 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
521 ::run(range, body, partitioner );
522 return body.result();
523}
524
525//! Parallel iteration with reduction and auto_partitioner
526/** @ingroup algorithms **/
527template<typename Range, typename Value, typename RealBody, typename Reduction>
528 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
529 parallel_reduce_combine<Reduction, Value>)
530Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
531 const auto_partitioner& partitioner ) {
532 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
533 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
534 ::run( range, body, partitioner );
535 return body.result();
536}
537
538//! Parallel iteration with reduction and static_partitioner
539/** @ingroup algorithms **/
540template<typename Range, typename Value, typename RealBody, typename Reduction>
541 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
542 parallel_reduce_combine<Reduction, Value>)
543Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
544 const static_partitioner& partitioner ) {
545 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
546 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
547 ::run( range, body, partitioner );
548 return body.result();
549}
550
551//! Parallel iteration with reduction and affinity_partitioner
552/** @ingroup algorithms **/
553template<typename Range, typename Value, typename RealBody, typename Reduction>
554 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
555 parallel_reduce_combine<Reduction, Value>)
556Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
557 affinity_partitioner& partitioner ) {
558 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
559 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
560 ::run( range, body, partitioner );
561 return body.result();
562}
563
564//! Parallel iteration with reduction, default partitioner and user-supplied context.
565/** @ingroup algorithms **/
566template<typename Range, typename Value, typename RealBody, typename Reduction>
567 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
568 parallel_reduce_combine<Reduction, Value>)
569Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
570 task_group_context& context ) {
571 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
572 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
573 ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
574 return body.result();
575}
576
577//! Parallel iteration with reduction, simple partitioner and user-supplied context.
578/** @ingroup algorithms **/
579template<typename Range, typename Value, typename RealBody, typename Reduction>
580 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
581 parallel_reduce_combine<Reduction, Value>)
582Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
583 const simple_partitioner& partitioner, task_group_context& context ) {
584 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
585 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
586 ::run( range, body, partitioner, context );
587 return body.result();
588}
589
590//! Parallel iteration with reduction, auto_partitioner and user-supplied context
591/** @ingroup algorithms **/
592template<typename Range, typename Value, typename RealBody, typename Reduction>
593 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
594 parallel_reduce_combine<Reduction, Value>)
595Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
596 const auto_partitioner& partitioner, task_group_context& context ) {
597 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
598 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
599 ::run( range, body, partitioner, context );
600 return body.result();
601}
602
603//! Parallel iteration with reduction, static_partitioner and user-supplied context
604/** @ingroup algorithms **/
605template<typename Range, typename Value, typename RealBody, typename Reduction>
606 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
607 parallel_reduce_combine<Reduction, Value>)
608Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
609 const static_partitioner& partitioner, task_group_context& context ) {
610 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
611 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
612 ::run( range, body, partitioner, context );
613 return body.result();
614}
615
616//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
617/** @ingroup algorithms **/
618template<typename Range, typename Value, typename RealBody, typename Reduction>
619 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
620 parallel_reduce_combine<Reduction, Value>)
621Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
622 affinity_partitioner& partitioner, task_group_context& context ) {
623 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
624 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
625 ::run( range, body, partitioner, context );
626 return body.result();
627}
628
629//! Parallel iteration with deterministic reduction and default simple partitioner.
630/** @ingroup algorithms **/
631template<typename Range, typename Body>
632 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
633void parallel_deterministic_reduce( const Range& range, Body& body ) {
634 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner());
635}
636
637//! Parallel iteration with deterministic reduction and simple partitioner.
638/** @ingroup algorithms **/
639template<typename Range, typename Body>
640 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
641void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
642 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner);
643}
644
645//! Parallel iteration with deterministic reduction and static partitioner.
646/** @ingroup algorithms **/
647template<typename Range, typename Body>
648 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
649void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
650 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner);
651}
652
653//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
654/** @ingroup algorithms **/
655template<typename Range, typename Body>
656 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
657void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
658 start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context );
659}
660
661//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
662/** @ingroup algorithms **/
663template<typename Range, typename Body>
664 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
665void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
666 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context);
667}
668
669//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
670/** @ingroup algorithms **/
671template<typename Range, typename Body>
672 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
673void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
674 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context);
675}
676
677/** parallel_reduce overloads that work with anonymous function objects
678 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
679
680//! Parallel iteration with deterministic reduction and default simple partitioner.
681// TODO: consider making static_partitioner the default
682/** @ingroup algorithms **/
683template<typename Range, typename Value, typename RealBody, typename Reduction>
684 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
685 parallel_reduce_combine<Reduction, Value>)
686Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
687 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner());
688}
689
690//! Parallel iteration with deterministic reduction and simple partitioner.
691/** @ingroup algorithms **/
692template<typename Range, typename Value, typename RealBody, typename Reduction>
693 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
694 parallel_reduce_combine<Reduction, Value>)
695Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) {
696 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
697 start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
698 ::run(range, body, partitioner);
699 return body.result();
700}
701
702//! Parallel iteration with deterministic reduction and static partitioner.
703/** @ingroup algorithms **/
704template<typename Range, typename Value, typename RealBody, typename Reduction>
705 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
706 parallel_reduce_combine<Reduction, Value>)
707Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) {
708 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
709 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
710 ::run(range, body, partitioner);
711 return body.result();
712}
713
714//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
715/** @ingroup algorithms **/
716template<typename Range, typename Value, typename RealBody, typename Reduction>
717 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
718 parallel_reduce_combine<Reduction, Value>)
719Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
720 task_group_context& context ) {
721 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context);
722}
723
724//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
725/** @ingroup algorithms **/
726template<typename Range, typename Value, typename RealBody, typename Reduction>
727 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
728 parallel_reduce_combine<Reduction, Value>)
729Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
730 const simple_partitioner& partitioner, task_group_context& context ) {
731 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
732 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
733 ::run(range, body, partitioner, context);
734 return body.result();
735}
736
737//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
738/** @ingroup algorithms **/
739template<typename Range, typename Value, typename RealBody, typename Reduction>
740 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
741 parallel_reduce_combine<Reduction, Value>)
742Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
743 const static_partitioner& partitioner, task_group_context& context ) {
744 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
745 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
746 ::run(range, body, partitioner, context);
747 return body.result();
748}
749//@}
750
751} // namespace d1
752} // namespace detail
753
754inline namespace v1 {
755using detail::d1::parallel_reduce;
756using detail::d1::parallel_deterministic_reduce;
757// Split types
758using detail::split;
759using detail::proportional_split;
760} // namespace v1
761
762} // namespace tbb
763#endif /* __TBB_parallel_reduce_H */
764

source code of include/oneapi/tbb/parallel_reduce.h