1 | /* |
2 | Copyright (c) 2005-2021 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #ifndef __TBB_parallel_reduce_H |
18 | #define __TBB_parallel_reduce_H |
19 | |
20 | #include <new> |
21 | #include "detail/_namespace_injection.h" |
22 | #include "detail/_task.h" |
23 | #include "detail/_aligned_space.h" |
24 | #include "detail/_small_object_pool.h" |
25 | #include "detail/_range_common.h" |
26 | |
27 | #include "task_group.h" // task_group_context |
28 | #include "partitioner.h" |
29 | #include "profiling.h" |
30 | |
31 | namespace tbb { |
32 | namespace detail { |
33 | #if __TBB_CPP20_CONCEPTS_PRESENT |
34 | inline namespace d0 { |
35 | |
36 | template <typename Body, typename Range> |
37 | concept parallel_reduce_body = splittable<Body> && |
38 | requires( Body& body, const Range& range, Body& rhs ) { |
39 | body(range); |
40 | body.join(rhs); |
41 | }; |
42 | |
43 | template <typename Function, typename Range, typename Value> |
44 | concept parallel_reduce_function = requires( const std::remove_reference_t<Function>& func, |
45 | const Range& range, |
46 | const Value& value ) { |
47 | { func(range, value) } -> std::convertible_to<Value>; |
48 | }; |
49 | |
50 | template <typename Combine, typename Value> |
51 | concept parallel_reduce_combine = requires( const std::remove_reference_t<Combine>& combine, |
52 | const Value& lhs, const Value& rhs ) { |
53 | { combine(lhs, rhs) } -> std::convertible_to<Value>; |
54 | }; |
55 | |
56 | } // namespace d0 |
57 | #endif // __TBB_CPP20_CONCEPTS_PRESENT |
58 | namespace d1 { |
59 | |
60 | //! Tree node type for parallel_reduce. |
61 | /** @ingroup algorithms */ |
62 | //TODO: consider folding tree via bypass execution(instead of manual folding) |
63 | // for better cancellation and critical tasks handling (performance measurements required). |
64 | template<typename Body> |
65 | struct reduction_tree_node : public tree_node { |
66 | tbb::detail::aligned_space<Body> zombie_space; |
67 | Body& left_body; |
68 | bool has_right_zombie{false}; |
69 | |
70 | reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : |
71 | tree_node{parent, ref_count, alloc}, |
72 | left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ |
73 | {} |
74 | |
75 | void join(task_group_context* context) { |
76 | if (has_right_zombie && !context->is_group_execution_cancelled()) |
77 | left_body.join(*zombie_space.begin()); |
78 | } |
79 | |
80 | ~reduction_tree_node() { |
81 | if( has_right_zombie ) zombie_space.begin()->~Body(); |
82 | } |
83 | }; |
84 | |
85 | //! Task type used to split the work of parallel_reduce. |
86 | /** @ingroup algorithms */ |
87 | template<typename Range, typename Body, typename Partitioner> |
88 | struct start_reduce : public task { |
89 | Range my_range; |
90 | Body* my_body; |
91 | node* my_parent; |
92 | |
93 | typename Partitioner::task_partition_type my_partition; |
94 | small_object_allocator my_allocator; |
95 | bool is_right_child; |
96 | |
97 | task* execute(execution_data&) override; |
98 | task* cancel(execution_data&) override; |
99 | void finalize(const execution_data&); |
100 | |
101 | using tree_node_type = reduction_tree_node<Body>; |
102 | |
103 | //! Constructor reduce root task. |
104 | start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : |
105 | my_range(range), |
106 | my_body(&body), |
107 | my_partition(partitioner), |
108 | my_allocator(alloc), |
109 | is_right_child(false) {} |
110 | //! Splitting constructor used to generate children. |
111 | /** parent_ becomes left child. Newly constructed object is right child. */ |
112 | start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : |
113 | my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), |
114 | my_body(parent_.my_body), |
115 | my_partition(parent_.my_partition, split_obj), |
116 | my_allocator(alloc), |
117 | is_right_child(true) |
118 | { |
119 | parent_.is_right_child = false; |
120 | } |
121 | //! Construct right child from the given range as response to the demand. |
122 | /** parent_ remains left child. Newly constructed object is right child. */ |
123 | start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : |
124 | my_range(r), |
125 | my_body(parent_.my_body), |
126 | my_partition(parent_.my_partition, split()), |
127 | my_allocator(alloc), |
128 | is_right_child(true) |
129 | { |
130 | my_partition.align_depth( d ); |
131 | parent_.is_right_child = false; |
132 | } |
133 | static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { |
134 | if ( !range.empty() ) { |
135 | wait_node wn; |
136 | small_object_allocator alloc{}; |
137 | auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); |
138 | reduce_task->my_parent = &wn; |
139 | execute_and_wait(*reduce_task, context, wn.m_wait, context); |
140 | } |
141 | } |
142 | static void run(const Range& range, Body& body, Partitioner& partitioner) { |
143 | // Bound context prevents exceptions from body to affect nesting or sibling algorithms, |
144 | // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. |
145 | task_group_context context(PARALLEL_REDUCE); |
146 | run(range, body, partitioner, context); |
147 | } |
148 | //! Run body for range, serves as callback for partitioner |
149 | void run_body( Range &r ) { |
150 | (*my_body)(r); |
151 | } |
152 | |
153 | //! spawn right task, serves as callback for partitioner |
154 | void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { |
155 | offer_work_impl(ed, *this, split_obj); |
156 | } |
157 | //! spawn right task, serves as callback for partitioner |
158 | void offer_work(const Range& r, depth_t d, execution_data& ed) { |
159 | offer_work_impl(ed, *this, r, d); |
160 | } |
161 | |
162 | private: |
163 | template <typename... Args> |
164 | void offer_work_impl(execution_data& ed, Args&&... args) { |
165 | small_object_allocator alloc{}; |
166 | // New right child |
167 | auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); |
168 | |
169 | // New root node as a continuation and ref count. Left and right child attach to the new parent. |
170 | right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); |
171 | |
172 | // Spawn the right sibling |
173 | right_child->spawn_self(ed); |
174 | } |
175 | |
176 | void spawn_self(execution_data& ed) { |
177 | my_partition.spawn_task(*this, *context(ed)); |
178 | } |
179 | }; |
180 | |
181 | //! fold the tree and deallocate the task |
182 | template<typename Range, typename Body, typename Partitioner> |
183 | void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { |
184 | // Get the current parent and wait object before an object destruction |
185 | node* parent = my_parent; |
186 | auto allocator = my_allocator; |
187 | // Task execution finished - destroy it |
188 | this->~start_reduce(); |
189 | // Unwind the tree decrementing the parent`s reference count |
190 | fold_tree<tree_node_type>(parent, ed); |
191 | allocator.deallocate(this, ed); |
192 | } |
193 | |
194 | //! Execute parallel_reduce task |
195 | template<typename Range, typename Body, typename Partitioner> |
196 | task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { |
197 | if (!is_same_affinity(ed)) { |
198 | my_partition.note_affinity(execution_slot(ed)); |
199 | } |
200 | my_partition.check_being_stolen(*this, ed); |
201 | |
202 | // The acquire barrier synchronizes the data pointed with my_body if the left |
203 | // task has already finished. |
204 | if( is_right_child && my_parent->m_ref_count.load(m: std::memory_order_acquire) == 2 ) { |
205 | tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); |
206 | my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); |
207 | parent_ptr->has_right_zombie = true; |
208 | } |
209 | __TBB_ASSERT(my_body != nullptr, "Incorrect body value" ); |
210 | |
211 | my_partition.execute(*this, my_range, ed); |
212 | |
213 | finalize(ed); |
214 | return nullptr; |
215 | } |
216 | |
217 | //! Cancel parallel_reduce task |
218 | template<typename Range, typename Body, typename Partitioner> |
219 | task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { |
220 | finalize(ed); |
221 | return nullptr; |
222 | } |
223 | |
224 | //! Tree node type for parallel_deterministic_reduce. |
225 | /** @ingroup algorithms */ |
226 | template<typename Body> |
227 | struct deterministic_reduction_tree_node : public tree_node { |
228 | Body right_body; |
229 | Body& left_body; |
230 | |
231 | deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : |
232 | tree_node{parent, ref_count, alloc}, |
233 | right_body{input_left_body, detail::split()}, |
234 | left_body(input_left_body) |
235 | {} |
236 | |
237 | void join(task_group_context* context) { |
238 | if (!context->is_group_execution_cancelled()) |
239 | left_body.join(right_body); |
240 | } |
241 | }; |
242 | |
243 | //! Task type used to split the work of parallel_deterministic_reduce. |
244 | /** @ingroup algorithms */ |
245 | template<typename Range, typename Body, typename Partitioner> |
246 | struct start_deterministic_reduce : public task { |
247 | Range my_range; |
248 | Body& my_body; |
249 | node* my_parent; |
250 | |
251 | typename Partitioner::task_partition_type my_partition; |
252 | small_object_allocator my_allocator; |
253 | |
254 | task* execute(execution_data&) override; |
255 | task* cancel(execution_data&) override; |
256 | void finalize(const execution_data&); |
257 | |
258 | using tree_node_type = deterministic_reduction_tree_node<Body>; |
259 | |
260 | //! Constructor deterministic_reduce root task. |
261 | start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : |
262 | my_range(range), |
263 | my_body(body), |
264 | my_partition(partitioner), |
265 | my_allocator(alloc) {} |
266 | //! Splitting constructor used to generate children. |
267 | /** parent_ becomes left child. Newly constructed object is right child. */ |
268 | start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, |
269 | small_object_allocator& alloc ) : |
270 | my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), |
271 | my_body(body), |
272 | my_partition(parent_.my_partition, split_obj), |
273 | my_allocator(alloc) {} |
274 | static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { |
275 | if ( !range.empty() ) { |
276 | wait_node wn; |
277 | small_object_allocator alloc{}; |
278 | auto deterministic_reduce_task = |
279 | alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); |
280 | deterministic_reduce_task->my_parent = &wn; |
281 | execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); |
282 | } |
283 | } |
284 | static void run(const Range& range, Body& body, Partitioner& partitioner) { |
285 | // Bound context prevents exceptions from body to affect nesting or sibling algorithms, |
286 | // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce |
287 | // in the try-block. |
288 | task_group_context context(PARALLEL_REDUCE); |
289 | run(range, body, partitioner, context); |
290 | } |
291 | //! Run body for range, serves as callback for partitioner |
292 | void run_body( Range &r ) { |
293 | my_body( r ); |
294 | } |
295 | //! Spawn right task, serves as callback for partitioner |
296 | void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { |
297 | offer_work_impl(ed, *this, split_obj); |
298 | } |
299 | private: |
300 | template <typename... Args> |
301 | void offer_work_impl(execution_data& ed, Args&&... args) { |
302 | small_object_allocator alloc{}; |
303 | // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. |
304 | auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); |
305 | |
306 | // New right child |
307 | auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); |
308 | |
309 | right_child->my_parent = my_parent = new_tree_node; |
310 | |
311 | // Spawn the right sibling |
312 | right_child->spawn_self(ed); |
313 | } |
314 | |
315 | void spawn_self(execution_data& ed) { |
316 | my_partition.spawn_task(*this, *context(ed)); |
317 | } |
318 | }; |
319 | |
320 | //! Fold the tree and deallocate the task |
321 | template<typename Range, typename Body, typename Partitioner> |
322 | void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { |
323 | // Get the current parent and wait object before an object destruction |
324 | node* parent = my_parent; |
325 | |
326 | auto allocator = my_allocator; |
327 | // Task execution finished - destroy it |
328 | this->~start_deterministic_reduce(); |
329 | // Unwind the tree decrementing the parent`s reference count |
330 | fold_tree<tree_node_type>(parent, ed); |
331 | allocator.deallocate(this, ed); |
332 | } |
333 | |
334 | //! Execute parallel_deterministic_reduce task |
335 | template<typename Range, typename Body, typename Partitioner> |
336 | task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { |
337 | if (!is_same_affinity(ed)) { |
338 | my_partition.note_affinity(execution_slot(ed)); |
339 | } |
340 | my_partition.check_being_stolen(*this, ed); |
341 | |
342 | my_partition.execute(*this, my_range, ed); |
343 | |
344 | finalize(ed); |
345 | return NULL; |
346 | } |
347 | |
348 | //! Cancel parallel_deterministic_reduce task |
349 | template<typename Range, typename Body, typename Partitioner> |
350 | task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { |
351 | finalize(ed); |
352 | return NULL; |
353 | } |
354 | |
355 | |
356 | //! Auxiliary class for parallel_reduce; for internal use only. |
357 | /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" |
358 | using given \ref parallel_reduce_lambda_req "anonymous function objects". |
359 | **/ |
360 | /** @ingroup algorithms */ |
361 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
362 | class lambda_reduce_body { |
363 | //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced |
364 | // (might require some performance measurements) |
365 | |
366 | const Value& my_identity_element; |
367 | const RealBody& my_real_body; |
368 | const Reduction& my_reduction; |
369 | Value my_value; |
370 | lambda_reduce_body& operator= ( const lambda_reduce_body& other ); |
371 | public: |
372 | lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) |
373 | : my_identity_element(identity) |
374 | , my_real_body(body) |
375 | , my_reduction(reduction) |
376 | , my_value(identity) |
377 | { } |
378 | lambda_reduce_body( const lambda_reduce_body& other ) = default; |
379 | lambda_reduce_body( lambda_reduce_body& other, tbb::split ) |
380 | : my_identity_element(other.my_identity_element) |
381 | , my_real_body(other.my_real_body) |
382 | , my_reduction(other.my_reduction) |
383 | , my_value(other.my_identity_element) |
384 | { } |
385 | void operator()(Range& range) { |
386 | my_value = my_real_body(range, const_cast<const Value&>(my_value)); |
387 | } |
388 | void join( lambda_reduce_body& rhs ) { |
389 | my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); |
390 | } |
391 | Value result() const { |
392 | return my_value; |
393 | } |
394 | }; |
395 | |
396 | |
397 | // Requirements on Range concept are documented in blocked_range.h |
398 | |
399 | /** \page parallel_reduce_body_req Requirements on parallel_reduce body |
400 | Class \c Body implementing the concept of parallel_reduce body must define: |
401 | - \code Body::Body( Body&, split ); \endcode Splitting constructor. |
402 | Must be able to run concurrently with operator() and method \c join |
403 | - \code Body::~Body(); \endcode Destructor |
404 | - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r |
405 | and accumulating the result |
406 | - \code void Body::join( Body& b ); \endcode Join results. |
407 | The result in \c b should be merged into the result of \c this |
408 | **/ |
409 | |
410 | /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) |
411 | TO BE DOCUMENTED |
412 | **/ |
413 | |
414 | /** \name parallel_reduce |
415 | See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ |
416 | //@{ |
417 | |
418 | //! Parallel iteration with reduction and default partitioner. |
419 | /** @ingroup algorithms **/ |
420 | template<typename Range, typename Body> |
421 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
422 | void parallel_reduce( const Range& range, Body& body ) { |
423 | start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); |
424 | } |
425 | |
426 | //! Parallel iteration with reduction and simple_partitioner |
427 | /** @ingroup algorithms **/ |
428 | template<typename Range, typename Body> |
429 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
430 | void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { |
431 | start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); |
432 | } |
433 | |
434 | //! Parallel iteration with reduction and auto_partitioner |
435 | /** @ingroup algorithms **/ |
436 | template<typename Range, typename Body> |
437 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
438 | void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { |
439 | start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); |
440 | } |
441 | |
442 | //! Parallel iteration with reduction and static_partitioner |
443 | /** @ingroup algorithms **/ |
444 | template<typename Range, typename Body> |
445 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
446 | void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { |
447 | start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); |
448 | } |
449 | |
450 | //! Parallel iteration with reduction and affinity_partitioner |
451 | /** @ingroup algorithms **/ |
452 | template<typename Range, typename Body> |
453 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
454 | void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { |
455 | start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); |
456 | } |
457 | |
458 | //! Parallel iteration with reduction, default partitioner and user-supplied context. |
459 | /** @ingroup algorithms **/ |
460 | template<typename Range, typename Body> |
461 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
462 | void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { |
463 | start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); |
464 | } |
465 | |
466 | //! Parallel iteration with reduction, simple partitioner and user-supplied context. |
467 | /** @ingroup algorithms **/ |
468 | template<typename Range, typename Body> |
469 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
470 | void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { |
471 | start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); |
472 | } |
473 | |
474 | //! Parallel iteration with reduction, auto_partitioner and user-supplied context |
475 | /** @ingroup algorithms **/ |
476 | template<typename Range, typename Body> |
477 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
478 | void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { |
479 | start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); |
480 | } |
481 | |
482 | //! Parallel iteration with reduction, static_partitioner and user-supplied context |
483 | /** @ingroup algorithms **/ |
484 | template<typename Range, typename Body> |
485 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
486 | void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { |
487 | start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); |
488 | } |
489 | |
490 | //! Parallel iteration with reduction, affinity_partitioner and user-supplied context |
491 | /** @ingroup algorithms **/ |
492 | template<typename Range, typename Body> |
493 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
494 | void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { |
495 | start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); |
496 | } |
497 | /** parallel_reduce overloads that work with anonymous function objects |
498 | (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ |
499 | |
500 | //! Parallel iteration with reduction and default partitioner. |
501 | /** @ingroup algorithms **/ |
502 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
503 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
504 | parallel_reduce_combine<Reduction, Value>) |
505 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { |
506 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
507 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> |
508 | ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); |
509 | return body.result(); |
510 | } |
511 | |
512 | //! Parallel iteration with reduction and simple_partitioner. |
513 | /** @ingroup algorithms **/ |
514 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
515 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
516 | parallel_reduce_combine<Reduction, Value>) |
517 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
518 | const simple_partitioner& partitioner ) { |
519 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
520 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> |
521 | ::run(range, body, partitioner ); |
522 | return body.result(); |
523 | } |
524 | |
525 | //! Parallel iteration with reduction and auto_partitioner |
526 | /** @ingroup algorithms **/ |
527 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
528 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
529 | parallel_reduce_combine<Reduction, Value>) |
530 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
531 | const auto_partitioner& partitioner ) { |
532 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
533 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> |
534 | ::run( range, body, partitioner ); |
535 | return body.result(); |
536 | } |
537 | |
538 | //! Parallel iteration with reduction and static_partitioner |
539 | /** @ingroup algorithms **/ |
540 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
541 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
542 | parallel_reduce_combine<Reduction, Value>) |
543 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
544 | const static_partitioner& partitioner ) { |
545 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
546 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> |
547 | ::run( range, body, partitioner ); |
548 | return body.result(); |
549 | } |
550 | |
551 | //! Parallel iteration with reduction and affinity_partitioner |
552 | /** @ingroup algorithms **/ |
553 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
554 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
555 | parallel_reduce_combine<Reduction, Value>) |
556 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
557 | affinity_partitioner& partitioner ) { |
558 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
559 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> |
560 | ::run( range, body, partitioner ); |
561 | return body.result(); |
562 | } |
563 | |
564 | //! Parallel iteration with reduction, default partitioner and user-supplied context. |
565 | /** @ingroup algorithms **/ |
566 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
567 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
568 | parallel_reduce_combine<Reduction, Value>) |
569 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
570 | task_group_context& context ) { |
571 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
572 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> |
573 | ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); |
574 | return body.result(); |
575 | } |
576 | |
577 | //! Parallel iteration with reduction, simple partitioner and user-supplied context. |
578 | /** @ingroup algorithms **/ |
579 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
580 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
581 | parallel_reduce_combine<Reduction, Value>) |
582 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
583 | const simple_partitioner& partitioner, task_group_context& context ) { |
584 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
585 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> |
586 | ::run( range, body, partitioner, context ); |
587 | return body.result(); |
588 | } |
589 | |
590 | //! Parallel iteration with reduction, auto_partitioner and user-supplied context |
591 | /** @ingroup algorithms **/ |
592 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
593 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
594 | parallel_reduce_combine<Reduction, Value>) |
595 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
596 | const auto_partitioner& partitioner, task_group_context& context ) { |
597 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
598 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> |
599 | ::run( range, body, partitioner, context ); |
600 | return body.result(); |
601 | } |
602 | |
603 | //! Parallel iteration with reduction, static_partitioner and user-supplied context |
604 | /** @ingroup algorithms **/ |
605 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
606 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
607 | parallel_reduce_combine<Reduction, Value>) |
608 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
609 | const static_partitioner& partitioner, task_group_context& context ) { |
610 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
611 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> |
612 | ::run( range, body, partitioner, context ); |
613 | return body.result(); |
614 | } |
615 | |
616 | //! Parallel iteration with reduction, affinity_partitioner and user-supplied context |
617 | /** @ingroup algorithms **/ |
618 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
619 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
620 | parallel_reduce_combine<Reduction, Value>) |
621 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
622 | affinity_partitioner& partitioner, task_group_context& context ) { |
623 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
624 | start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> |
625 | ::run( range, body, partitioner, context ); |
626 | return body.result(); |
627 | } |
628 | |
629 | //! Parallel iteration with deterministic reduction and default simple partitioner. |
630 | /** @ingroup algorithms **/ |
631 | template<typename Range, typename Body> |
632 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
633 | void parallel_deterministic_reduce( const Range& range, Body& body ) { |
634 | start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); |
635 | } |
636 | |
637 | //! Parallel iteration with deterministic reduction and simple partitioner. |
638 | /** @ingroup algorithms **/ |
639 | template<typename Range, typename Body> |
640 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
641 | void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { |
642 | start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); |
643 | } |
644 | |
645 | //! Parallel iteration with deterministic reduction and static partitioner. |
646 | /** @ingroup algorithms **/ |
647 | template<typename Range, typename Body> |
648 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
649 | void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { |
650 | start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); |
651 | } |
652 | |
653 | //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. |
654 | /** @ingroup algorithms **/ |
655 | template<typename Range, typename Body> |
656 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
657 | void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { |
658 | start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); |
659 | } |
660 | |
661 | //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. |
662 | /** @ingroup algorithms **/ |
663 | template<typename Range, typename Body> |
664 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
665 | void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { |
666 | start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); |
667 | } |
668 | |
669 | //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. |
670 | /** @ingroup algorithms **/ |
671 | template<typename Range, typename Body> |
672 | __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) |
673 | void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { |
674 | start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); |
675 | } |
676 | |
677 | /** parallel_reduce overloads that work with anonymous function objects |
678 | (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ |
679 | |
680 | //! Parallel iteration with deterministic reduction and default simple partitioner. |
681 | // TODO: consider making static_partitioner the default |
682 | /** @ingroup algorithms **/ |
683 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
684 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
685 | parallel_reduce_combine<Reduction, Value>) |
686 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { |
687 | return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); |
688 | } |
689 | |
690 | //! Parallel iteration with deterministic reduction and simple partitioner. |
691 | /** @ingroup algorithms **/ |
692 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
693 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
694 | parallel_reduce_combine<Reduction, Value>) |
695 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { |
696 | lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
697 | start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> |
698 | ::run(range, body, partitioner); |
699 | return body.result(); |
700 | } |
701 | |
702 | //! Parallel iteration with deterministic reduction and static partitioner. |
703 | /** @ingroup algorithms **/ |
704 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
705 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
706 | parallel_reduce_combine<Reduction, Value>) |
707 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { |
708 | lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
709 | start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> |
710 | ::run(range, body, partitioner); |
711 | return body.result(); |
712 | } |
713 | |
714 | //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. |
715 | /** @ingroup algorithms **/ |
716 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
717 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
718 | parallel_reduce_combine<Reduction, Value>) |
719 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
720 | task_group_context& context ) { |
721 | return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); |
722 | } |
723 | |
724 | //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. |
725 | /** @ingroup algorithms **/ |
726 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
727 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
728 | parallel_reduce_combine<Reduction, Value>) |
729 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
730 | const simple_partitioner& partitioner, task_group_context& context ) { |
731 | lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
732 | start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> |
733 | ::run(range, body, partitioner, context); |
734 | return body.result(); |
735 | } |
736 | |
737 | //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. |
738 | /** @ingroup algorithms **/ |
739 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
740 | __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && |
741 | parallel_reduce_combine<Reduction, Value>) |
742 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
743 | const static_partitioner& partitioner, task_group_context& context ) { |
744 | lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
745 | start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> |
746 | ::run(range, body, partitioner, context); |
747 | return body.result(); |
748 | } |
749 | //@} |
750 | |
751 | } // namespace d1 |
752 | } // namespace detail |
753 | |
754 | inline namespace v1 { |
755 | using detail::d1::parallel_reduce; |
756 | using detail::d1::parallel_deterministic_reduce; |
757 | // Split types |
758 | using detail::split; |
759 | using detail::proportional_split; |
760 | } // namespace v1 |
761 | |
762 | } // namespace tbb |
763 | #endif /* __TBB_parallel_reduce_H */ |
764 | |