parallel_reduce.h source code [include/oneapi/tbb/parallel_reduce.h]

1	/*
2	Copyright (c) 2005-2021 Intel Corporation
3
4	Licensed under the Apache License, Version 2.0 (the "License");
5	you may not use this file except in compliance with the License.
6	You may obtain a copy of the License at
7
8	http://www.apache.org/licenses/LICENSE-2.0
9
10	Unless required by applicable law or agreed to in writing, software
11	distributed under the License is distributed on an "AS IS" BASIS,
12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	See the License for the specific language governing permissions and
14	limitations under the License.
15	*/
16
17	#ifndef __TBB_parallel_reduce_H
18	#define __TBB_parallel_reduce_H
19
20	#include <new>
21	#include "detail/_namespace_injection.h"
22	#include "detail/_task.h"
23	#include "detail/_aligned_space.h"
24	#include "detail/_small_object_pool.h"
25	#include "detail/_range_common.h"
26
27	#include "task_group.h" // task_group_context
28	#include "partitioner.h"
29	#include "profiling.h"
30
31	namespace tbb {
32	namespace detail {
33	#if __TBB_CPP20_CONCEPTS_PRESENT
34	inline namespace d0 {
35
36	template <typename Body, typename Range>
37	concept parallel_reduce_body = splittable<Body> &&
38	requires( Body& body, const Range& range, Body& rhs ) {
39	body(range);
40	body.join(rhs);
41	};
42
43	template <typename Function, typename Range, typename Value>
44	concept parallel_reduce_function = requires( const std::remove_reference_t<Function>& func,
45	const Range& range,
46	const Value& value ) {
47	{ func(range, value) } -> std::convertible_to<Value>;
48	};
49
50	template <typename Combine, typename Value>
51	concept parallel_reduce_combine = requires( const std::remove_reference_t<Combine>& combine,
52	const Value& lhs, const Value& rhs ) {
53	{ combine(lhs, rhs) } -> std::convertible_to<Value>;
54	};
55
56	} // namespace d0
57	#endif // __TBB_CPP20_CONCEPTS_PRESENT
58	namespace d1 {
59
60	//! Tree node type for parallel_reduce.
61	/* @ingroup algorithms /
62	//TODO: consider folding tree via bypass execution(instead of manual folding)
63	// for better cancellation and critical tasks handling (performance measurements required).
64	template<typename Body>
65	struct reduction_tree_node : public tree_node {
66	tbb::detail::aligned_space<Body> zombie_space;
67	Body& left_body;
68	bool has_right_zombie{false};
69
70	reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
71	tree_node{parent, ref_count, alloc},
72	left_body(input_left_body) / gcc4.8 bug - braced-initialization doesn't work for class members of reference type /
73	{}
74
75	void join(task_group_context* context) {
76	if (has_right_zombie && !context->is_group_execution_cancelled())
77	left_body.join(*zombie_space.begin());
78	}
79
80	~reduction_tree_node() {
81	if( has_right_zombie ) zombie_space.begin()->~Body();
82	}
83	};
84
85	//! Task type used to split the work of parallel_reduce.
86	/* @ingroup algorithms /
87	template<typename Range, typename Body, typename Partitioner>
88	struct start_reduce : public task {
89	Range my_range;
90	Body* my_body;
91	node* my_parent;
92
93	typename Partitioner::task_partition_type my_partition;
94	small_object_allocator my_allocator;
95	bool is_right_child;
96
97	task* execute(execution_data&) override;
98	task* cancel(execution_data&) override;
99	void finalize(const execution_data&);
100
101	using tree_node_type = reduction_tree_node<Body>;
102
103	//! Constructor reduce root task.
104	start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
105	my_range(range),
106	my_body(&body),
107	my_partition(partitioner),
108	my_allocator (alloc),
109	is_right_child(false) {}
110	//! Splitting constructor used to generate children.
111	/* parent_ becomes left child. Newly constructed object is right child. /
112	start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
113	my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
114	my_body(parent_.my_body),
115	my_partition(parent_.my_partition, split_obj),
116	my_allocator (alloc),
117	is_right_child(true)
118	{
119	parent_.is_right_child = false;
120	}
121	//! Construct right child from the given range as response to the demand.
122	/* parent_ remains left child. Newly constructed object is right child. /
123	start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
124	my_range(r),
125	my_body(parent_.my_body),
126	my_partition(parent_.my_partition, split ()),
127	my_allocator (alloc),
128	is_right_child(true)
129	{
130	my_partition.align_depth( d );
131	parent_.is_right_child = false;
132	}
133	static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
134	if ( !range.empty() ) {
135	wait_node wn;
136	small_object_allocator alloc{};
137	auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc);
138	reduce_task->my_parent = &wn;
139	execute_and_wait(*reduce_task, context, wn.m_wait, context);
140	}
141	}
142	static void run(const Range& range, Body& body, Partitioner& partitioner) {
143	// Bound context prevents exceptions from body to affect nesting or sibling algorithms,
144	// and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block.
145	task_group_context context(PARALLEL_REDUCE);
146	run(range, body, partitioner, context);
147	}
148	//! Run body for range, serves as callback for partitioner
149	void run_body( Range &r ) {
150	(*my_body)(r);
151	}
152
153	//! spawn right task, serves as callback for partitioner
154	void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
155	offer_work_impl(ed, *this, split_obj);
156	}
157	//! spawn right task, serves as callback for partitioner
158	void offer_work(const Range& r, depth_t d, execution_data& ed) {
159	offer_work_impl(ed, *this, r, d);
160	}
161
162	private:
163	template <typename... Args>
164	void offer_work_impl(execution_data& ed, Args&&... args) {
165	small_object_allocator alloc{};
166	// New right child
167	auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc);
168
169	// New root node as a continuation and ref count. Left and right child attach to the new parent.
170	right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, `2`, *my_body, alloc);
171
172	// Spawn the right sibling
173	right_child->spawn_self(ed);
174	}
175
176	void spawn_self(execution_data& ed) {
177	my_partition.spawn_task(*this, *context(ed));
178	}
179	};
180
181	//! fold the tree and deallocate the task
182	template<typename Range, typename Body, typename Partitioner>
183	void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
184	// Get the current parent and wait object before an object destruction
185	node* parent = my_parent;
186	auto allocator = my_allocator;
187	// Task execution finished - destroy it
188	this->~start_reduce();
189	// Unwind the tree decrementing the parent`s reference count
190	fold_tree<tree_node_type>(parent, ed);
191	allocator.deallocate(this, ed);
192	}
193
194	//! Execute parallel_reduce task
195	template<typename Range, typename Body, typename Partitioner>
196	task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
197	if (!is_same_affinity(ed)) {
198	my_partition.note_affinity(execution_slot(ed));
199	}
200	my_partition.check_being_stolen(*this, ed);
201
202	// The acquire barrier synchronizes the data pointed with my_body if the left
203	// task has already finished.
204	if( is_right_child && my_parent->m_ref_count.load(m: std::memory_order_acquire) == `2` ) {
205	tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent);
206	my_body = (Body) new( parent_ptr->zombie_space.begin() ) Body(my_body, split ());
207	parent_ptr->has_right_zombie = true;
208	}
209	__TBB_ASSERT(my_body != nullptr, "Incorrect body value");
210
211	my_partition.execute(*this, my_range, ed);
212
213	finalize(ed);
214	return nullptr;
215	}
216
217	//! Cancel parallel_reduce task
218	template<typename Range, typename Body, typename Partitioner>
219	task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
220	finalize(ed);
221	return nullptr;
222	}
223
224	//! Tree node type for parallel_deterministic_reduce.
225	/* @ingroup algorithms /
226	template<typename Body>
227	struct deterministic_reduction_tree_node : public tree_node {
228	Body right_body;
229	Body& left_body;
230
231	deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
232	tree_node{parent, ref_count, alloc},
233	right_body{input_left_body, detail::split ()},
234	left_body(input_left_body)
235	{}
236
237	void join(task_group_context* context) {
238	if (!context->is_group_execution_cancelled())
239	left_body.join(right_body);
240	}
241	};
242
243	//! Task type used to split the work of parallel_deterministic_reduce.
244	/* @ingroup algorithms /
245	template<typename Range, typename Body, typename Partitioner>
246	struct start_deterministic_reduce : public task {
247	Range my_range;
248	Body& my_body;
249	node* my_parent;
250
251	typename Partitioner::task_partition_type my_partition;
252	small_object_allocator my_allocator;
253
254	task* execute(execution_data&) override;
255	task* cancel(execution_data&) override;
256	void finalize(const execution_data&);
257
258	using tree_node_type = deterministic_reduction_tree_node<Body>;
259
260	//! Constructor deterministic_reduce root task.
261	start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) :
262	my_range(range),
263	my_body(body),
264	my_partition(partitioner),
265	my_allocator (alloc) {}
266	//! Splitting constructor used to generate children.
267	/* parent_ becomes left child. Newly constructed object is right child. /
268	start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body,
269	small_object_allocator& alloc ) :
270	my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
271	my_body(body),
272	my_partition(parent_.my_partition, split_obj),
273	my_allocator (alloc) {}
274	static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
275	if ( !range.empty() ) {
276	wait_node wn;
277	small_object_allocator alloc{};
278	auto deterministic_reduce_task =
279	alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc);
280	deterministic_reduce_task->my_parent = &wn;
281	execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context);
282	}
283	}
284	static void run(const Range& range, Body& body, Partitioner& partitioner) {
285	// Bound context prevents exceptions from body to affect nesting or sibling algorithms,
286	// and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce
287	// in the try-block.
288	task_group_context context(PARALLEL_REDUCE);
289	run(range, body, partitioner, context);
290	}
291	//! Run body for range, serves as callback for partitioner
292	void run_body( Range &r ) {
293	my_body( r );
294	}
295	//! Spawn right task, serves as callback for partitioner
296	void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
297	offer_work_impl(ed, *this, split_obj);
298	}
299	private:
300	template <typename... Args>
301	void offer_work_impl(execution_data& ed, Args&&... args) {
302	small_object_allocator alloc{};
303	// New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body.
304	auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, `2`, my_body, alloc);
305
306	// New right child
307	auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc);
308
309	right_child->my_parent = my_parent = new_tree_node;
310
311	// Spawn the right sibling
312	right_child->spawn_self(ed);
313	}
314
315	void spawn_self(execution_data& ed) {
316	my_partition.spawn_task(*this, *context(ed));
317	}
318	};
319
320	//! Fold the tree and deallocate the task
321	template<typename Range, typename Body, typename Partitioner>
322	void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
323	// Get the current parent and wait object before an object destruction
324	node* parent = my_parent;
325
326	auto allocator = my_allocator;
327	// Task execution finished - destroy it
328	this->~start_deterministic_reduce();
329	// Unwind the tree decrementing the parent`s reference count
330	fold_tree<tree_node_type>(parent, ed);
331	allocator.deallocate(this, ed);
332	}
333
334	//! Execute parallel_deterministic_reduce task
335	template<typename Range, typename Body, typename Partitioner>
336	task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
337	if (!is_same_affinity(ed)) {
338	my_partition.note_affinity(execution_slot(ed));
339	}
340	my_partition.check_being_stolen(*this, ed);
341
342	my_partition.execute(*this, my_range, ed);
343
344	finalize(ed);
345	return NULL;
346	}
347
348	//! Cancel parallel_deterministic_reduce task
349	template<typename Range, typename Body, typename Partitioner>
350	task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
351	finalize(ed);
352	return NULL;
353	}
354
355
356	//! Auxiliary class for parallel_reduce; for internal use only.
357	/* The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"*
358	using given \ref parallel_reduce_lambda_req "anonymous function objects".
359	**/
360	/* @ingroup algorithms /
361	template<typename Range, typename Value, typename RealBody, typename Reduction>
362	class lambda_reduce_body {
363	//TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced
364	// (might require some performance measurements)
365
366	const Value& my_identity_element;
367	const RealBody& my_real_body;
368	const Reduction& my_reduction;
369	Value my_value;
370	lambda_reduce_body& operator= ( const lambda_reduce_body& other );
371	public:
372	lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
373	: my_identity_element(identity)
374	, my_real_body(body)
375	, my_reduction(reduction)
376	, my_value(identity)
377	{ }
378	lambda_reduce_body( const lambda_reduce_body& other ) = default;
379	lambda_reduce_body( lambda_reduce_body& other, tbb::split )
380	: my_identity_element(other.my_identity_element)
381	, my_real_body(other.my_real_body)
382	, my_reduction(other.my_reduction)
383	, my_value(other.my_identity_element)
384	{ }
385	void operator()(Range& range) {
386	my_value = my_real_body(range, const_cast<const Value&>(my_value));
387	}
388	void join( lambda_reduce_body& rhs ) {
389	my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
390	}
391	Value result() const {
392	return my_value;
393	}
394	};
395
396
397	// Requirements on Range concept are documented in blocked_range.h
398
399	/* \page parallel_reduce_body_req Requirements on parallel_reduce body*
400	Class \c Body implementing the concept of parallel_reduce body must define:
401	- \code Body::Body( Body&, split ); \endcode Splitting constructor.
402	Must be able to run concurrently with operator() and method \c join
403	- \code Body::~Body(); \endcode Destructor
404	- \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
405	and accumulating the result
406	- \code void Body::join( Body& b ); \endcode Join results.
407	The result in \c b should be merged into the result of \c this
408	**/
409
410	/* \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)*
411	TO BE DOCUMENTED
412	**/
413
414	/* \name parallel_reduce*
415	See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". /
416	//@{
417
418	//! Parallel iteration with reduction and default partitioner.
419	/* @ingroup algorithms */
420	template<typename Range, typename Body>
421	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
422	void parallel_reduce( const Range& range, Body& body ) {
423	start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER () );
424	}
425
426	//! Parallel iteration with reduction and simple_partitioner
427	/* @ingroup algorithms */
428	template<typename Range, typename Body>
429	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
430	void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
431	start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
432	}
433
434	//! Parallel iteration with reduction and auto_partitioner
435	/* @ingroup algorithms */
436	template<typename Range, typename Body>
437	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
438	void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
439	start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
440	}
441
442	//! Parallel iteration with reduction and static_partitioner
443	/* @ingroup algorithms */
444	template<typename Range, typename Body>
445	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
446	void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
447	start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
448	}
449
450	//! Parallel iteration with reduction and affinity_partitioner
451	/* @ingroup algorithms */
452	template<typename Range, typename Body>
453	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
454	void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
455	start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
456	}
457
458	//! Parallel iteration with reduction, default partitioner and user-supplied context.
459	/* @ingroup algorithms */
460	template<typename Range, typename Body>
461	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
462	void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
463	start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER (), context );
464	}
465
466	//! Parallel iteration with reduction, simple partitioner and user-supplied context.
467	/* @ingroup algorithms */
468	template<typename Range, typename Body>
469	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
470	void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
471	start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
472	}
473
474	//! Parallel iteration with reduction, auto_partitioner and user-supplied context
475	/* @ingroup algorithms */
476	template<typename Range, typename Body>
477	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
478	void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
479	start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
480	}
481
482	//! Parallel iteration with reduction, static_partitioner and user-supplied context
483	/* @ingroup algorithms */
484	template<typename Range, typename Body>
485	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
486	void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
487	start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
488	}
489
490	//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
491	/* @ingroup algorithms */
492	template<typename Range, typename Body>
493	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
494	void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
495	start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
496	}
497	/* parallel_reduce overloads that work with anonymous function objects*
498	(see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). /
499
500	//! Parallel iteration with reduction and default partitioner.
501	/* @ingroup algorithms */
502	template<typename Range, typename Value, typename RealBody, typename Reduction>
503	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
504	parallel_reduce_combine<Reduction, Value>)
505	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
506	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
507	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
508	::run(range, body, __TBB_DEFAULT_PARTITIONER () );
509	return body.result();
510	}
511
512	//! Parallel iteration with reduction and simple_partitioner.
513	/* @ingroup algorithms */
514	template<typename Range, typename Value, typename RealBody, typename Reduction>
515	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
516	parallel_reduce_combine<Reduction, Value>)
517	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
518	const simple_partitioner& partitioner ) {
519	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
520	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
521	::run(range, body, partitioner );
522	return body.result();
523	}
524
525	//! Parallel iteration with reduction and auto_partitioner
526	/* @ingroup algorithms */
527	template<typename Range, typename Value, typename RealBody, typename Reduction>
528	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
529	parallel_reduce_combine<Reduction, Value>)
530	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
531	const auto_partitioner& partitioner ) {
532	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
533	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
534	::run( range, body, partitioner );
535	return body.result();
536	}
537
538	//! Parallel iteration with reduction and static_partitioner
539	/* @ingroup algorithms */
540	template<typename Range, typename Value, typename RealBody, typename Reduction>
541	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
542	parallel_reduce_combine<Reduction, Value>)
543	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
544	const static_partitioner& partitioner ) {
545	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
546	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
547	::run( range, body, partitioner );
548	return body.result();
549	}
550
551	//! Parallel iteration with reduction and affinity_partitioner
552	/* @ingroup algorithms */
553	template<typename Range, typename Value, typename RealBody, typename Reduction>
554	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
555	parallel_reduce_combine<Reduction, Value>)
556	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
557	affinity_partitioner& partitioner ) {
558	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
559	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
560	::run( range, body, partitioner );
561	return body.result();
562	}
563
564	//! Parallel iteration with reduction, default partitioner and user-supplied context.
565	/* @ingroup algorithms */
566	template<typename Range, typename Value, typename RealBody, typename Reduction>
567	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
568	parallel_reduce_combine<Reduction, Value>)
569	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
570	task_group_context& context ) {
571	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
572	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
573	::run( range, body, __TBB_DEFAULT_PARTITIONER (), context );
574	return body.result();
575	}
576
577	//! Parallel iteration with reduction, simple partitioner and user-supplied context.
578	/* @ingroup algorithms */
579	template<typename Range, typename Value, typename RealBody, typename Reduction>
580	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
581	parallel_reduce_combine<Reduction, Value>)
582	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
583	const simple_partitioner& partitioner, task_group_context& context ) {
584	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
585	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
586	::run( range, body, partitioner, context );
587	return body.result();
588	}
589
590	//! Parallel iteration with reduction, auto_partitioner and user-supplied context
591	/* @ingroup algorithms */
592	template<typename Range, typename Value, typename RealBody, typename Reduction>
593	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
594	parallel_reduce_combine<Reduction, Value>)
595	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
596	const auto_partitioner& partitioner, task_group_context& context ) {
597	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
598	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
599	::run( range, body, partitioner, context );
600	return body.result();
601	}
602
603	//! Parallel iteration with reduction, static_partitioner and user-supplied context
604	/* @ingroup algorithms */
605	template<typename Range, typename Value, typename RealBody, typename Reduction>
606	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
607	parallel_reduce_combine<Reduction, Value>)
608	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
609	const static_partitioner& partitioner, task_group_context& context ) {
610	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
611	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
612	::run( range, body, partitioner, context );
613	return body.result();
614	}
615
616	//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
617	/* @ingroup algorithms */
618	template<typename Range, typename Value, typename RealBody, typename Reduction>
619	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
620	parallel_reduce_combine<Reduction, Value>)
621	Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
622	affinity_partitioner& partitioner, task_group_context& context ) {
623	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
624	start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
625	::run( range, body, partitioner, context );
626	return body.result();
627	}
628
629	//! Parallel iteration with deterministic reduction and default simple partitioner.
630	/* @ingroup algorithms */
631	template<typename Range, typename Body>
632	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
633	void parallel_deterministic_reduce( const Range& range, Body& body ) {
634	start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner ());
635	}
636
637	//! Parallel iteration with deterministic reduction and simple partitioner.
638	/* @ingroup algorithms */
639	template<typename Range, typename Body>
640	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
641	void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
642	start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner);
643	}
644
645	//! Parallel iteration with deterministic reduction and static partitioner.
646	/* @ingroup algorithms */
647	template<typename Range, typename Body>
648	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
649	void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
650	start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner);
651	}
652
653	//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
654	/* @ingroup algorithms */
655	template<typename Range, typename Body>
656	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
657	void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
658	start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner (), context );
659	}
660
661	//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
662	/* @ingroup algorithms */
663	template<typename Range, typename Body>
664	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
665	void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
666	start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context);
667	}
668
669	//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
670	/* @ingroup algorithms */
671	template<typename Range, typename Body>
672	__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
673	void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
674	start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context);
675	}
676
677	/* parallel_reduce overloads that work with anonymous function objects*
678	(see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). /
679
680	//! Parallel iteration with deterministic reduction and default simple partitioner.
681	// TODO: consider making static_partitioner the default
682	/* @ingroup algorithms */
683	template<typename Range, typename Value, typename RealBody, typename Reduction>
684	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
685	parallel_reduce_combine<Reduction, Value>)
686	Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
687	return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner ());
688	}
689
690	//! Parallel iteration with deterministic reduction and simple partitioner.
691	/* @ingroup algorithms */
692	template<typename Range, typename Value, typename RealBody, typename Reduction>
693	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
694	parallel_reduce_combine<Reduction, Value>)
695	Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) {
696	lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
697	start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
698	::run(range, body, partitioner);
699	return body.result();
700	}
701
702	//! Parallel iteration with deterministic reduction and static partitioner.
703	/* @ingroup algorithms */
704	template<typename Range, typename Value, typename RealBody, typename Reduction>
705	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
706	parallel_reduce_combine<Reduction, Value>)
707	Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) {
708	lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
709	start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
710	::run(range, body, partitioner);
711	return body.result();
712	}
713
714	//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
715	/* @ingroup algorithms */
716	template<typename Range, typename Value, typename RealBody, typename Reduction>
717	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
718	parallel_reduce_combine<Reduction, Value>)
719	Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
720	task_group_context& context ) {
721	return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner (), context);
722	}
723
724	//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
725	/* @ingroup algorithms */
726	template<typename Range, typename Value, typename RealBody, typename Reduction>
727	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
728	parallel_reduce_combine<Reduction, Value>)
729	Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
730	const simple_partitioner& partitioner, task_group_context& context ) {
731	lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
732	start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
733	::run(range, body, partitioner, context);
734	return body.result();
735	}
736
737	//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
738	/* @ingroup algorithms */
739	template<typename Range, typename Value, typename RealBody, typename Reduction>
740	__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
741	parallel_reduce_combine<Reduction, Value>)
742	Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
743	const static_partitioner& partitioner, task_group_context& context ) {
744	lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
745	start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
746	::run(range, body, partitioner, context);
747	return body.result();
748	}
749	//@}
750
751	} // namespace d1
752	} // namespace detail
753
754	inline namespace v1 {
755	using detail::d1::parallel_reduce;
756	using detail::d1::parallel_deterministic_reduce;
757	// Split types
758	using detail::split;
759	using detail::proportional_split;
760	} // namespace v1
761
762	} // namespace tbb
763	#endif /* __TBB_parallel_reduce_H */
764

source code of include/oneapi/tbb/parallel_reduce.h