1/*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_partitioner_H
18#define __TBB_partitioner_H
19
20#ifndef __TBB_INITIAL_CHUNKS
21// initial task divisions per thread
22#define __TBB_INITIAL_CHUNKS 2
23#endif
24#ifndef __TBB_RANGE_POOL_CAPACITY
25// maximum number of elements in range pool
26#define __TBB_RANGE_POOL_CAPACITY 8
27#endif
28#ifndef __TBB_INIT_DEPTH
29// initial value for depth of range pool
30#define __TBB_INIT_DEPTH 5
31#endif
32#ifndef __TBB_DEMAND_DEPTH_ADD
33// when imbalance is found range splits this value times more
34#define __TBB_DEMAND_DEPTH_ADD 1
35#endif
36
37#include "detail/_config.h"
38#include "detail/_namespace_injection.h"
39#include "detail/_aligned_space.h"
40#include "detail/_utils.h"
41#include "detail/_template_helpers.h"
42#include "detail/_range_common.h"
43#include "detail/_task.h"
44#include "detail/_small_object_pool.h"
45
46#include "cache_aligned_allocator.h"
47#include "task_group.h" // task_group_context
48#include "task_arena.h"
49
50#include <algorithm>
51#include <atomic>
52#include <type_traits>
53
54#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
55 // Workaround for overzealous compiler warnings
56 #pragma warning (push)
57 #pragma warning (disable: 4244)
58#endif
59
60namespace tbb {
61namespace detail {
62
63namespace d1 {
64class auto_partitioner;
65class simple_partitioner;
66class static_partitioner;
67class affinity_partitioner;
68class affinity_partition_type;
69class affinity_partitioner_base;
70
71inline std::size_t get_initial_auto_partitioner_divisor() {
72 const std::size_t factor = 4;
73 return factor * max_concurrency();
74}
75
76//! Defines entry point for affinity partitioner into oneTBB run-time library.
77class affinity_partitioner_base: no_copy {
78 friend class affinity_partitioner;
79 friend class affinity_partition_type;
80 //! Array that remembers affinities of tree positions to affinity_id.
81 /** NULL if my_size==0. */
82 slot_id* my_array;
83 //! Number of elements in my_array.
84 std::size_t my_size;
85 //! Zeros the fields.
86 affinity_partitioner_base() : my_array(nullptr), my_size(0) {}
87 //! Deallocates my_array.
88 ~affinity_partitioner_base() { resize(factor: 0); }
89 //! Resize my_array.
90 /** Retains values if resulting size is the same. */
91 void resize(unsigned factor) {
92 // Check factor to avoid asking for number of workers while there might be no arena.
93 unsigned max_threads_in_arena = max_concurrency();
94 std::size_t new_size = factor ? factor * max_threads_in_arena : 0;
95 if (new_size != my_size) {
96 if (my_array) {
97 r1::cache_aligned_deallocate(p: my_array);
98 // Following two assignments must be done here for sake of exception safety.
99 my_array = nullptr;
100 my_size = 0;
101 }
102 if (new_size) {
103 my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(size: new_size * sizeof(slot_id)));
104 std::fill_n(first: my_array, n: new_size, value: no_slot);
105 my_size = new_size;
106 }
107 }
108 }
109};
110
111template<typename Range, typename Body, typename Partitioner> struct start_for;
112template<typename Range, typename Body, typename Partitioner> struct start_scan;
113template<typename Range, typename Body, typename Partitioner> struct start_reduce;
114template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce;
115
116struct node {
117 node* my_parent{};
118 std::atomic<int> m_ref_count{};
119
120 node() = default;
121 node(node* parent, int ref_count) :
122 my_parent{parent}, m_ref_count{ref_count} {
123 __TBB_ASSERT(ref_count > 0, "The ref count must be positive");
124 }
125};
126
127struct wait_node : node {
128 wait_node() : node{ nullptr, 1 } {}
129 wait_context m_wait{1};
130};
131
132//! Join task node that contains shared flag for stealing feedback
133struct tree_node : public node {
134 small_object_allocator m_allocator;
135 std::atomic<bool> m_child_stolen{false};
136
137 tree_node(node* parent, int ref_count, small_object_allocator& alloc)
138 : node{parent, ref_count}
139 , m_allocator{alloc} {}
140
141 void join(task_group_context*) {/*dummy, required only for reduction algorithms*/};
142
143 template <typename Task>
144 static void mark_task_stolen(Task &t) {
145 std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen;
146#if TBB_USE_PROFILING_TOOLS
147 // Threading tools respect lock prefix but report false-positive data-race via plain store
148 flag.exchange(true);
149#else
150 flag.store(i: true, m: std::memory_order_relaxed);
151#endif // TBB_USE_PROFILING_TOOLS
152 }
153 template <typename Task>
154 static bool is_peer_stolen(Task &t) {
155 return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(m: std::memory_order_relaxed);
156 }
157};
158
159// Context used to check cancellation state during reduction join process
160template<typename TreeNodeType>
161void fold_tree(node* n, const execution_data& ed) {
162 for (;;) {
163 __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive.");
164 call_itt_task_notify(releasing, n);
165 if (--n->m_ref_count > 0) {
166 return;
167 }
168 node* parent = n->my_parent;
169 if (!parent) {
170 break;
171 };
172
173 call_itt_task_notify(acquired, n);
174 TreeNodeType* self = static_cast<TreeNodeType*>(n);
175 self->join(ed.context);
176 self->m_allocator.delete_object(self, ed);
177 n = parent;
178 }
179 // Finish parallel for execution when the root (last node) is reached
180 static_cast<wait_node*>(n)->m_wait.release();
181}
182
183//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
184//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented
185//! by a number that cannot fit into machine word.
186typedef unsigned char depth_t;
187
188//! Range pool stores ranges of type T in a circular buffer with MaxCapacity
189template <typename T, depth_t MaxCapacity>
190class range_vector {
191 depth_t my_head;
192 depth_t my_tail;
193 depth_t my_size;
194 depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
195 tbb::detail::aligned_space<T, MaxCapacity> my_pool;
196
197public:
198 //! initialize via first range in pool
199 range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
200 my_depth[0] = 0;
201 new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move?
202 }
203 ~range_vector() {
204 while( !empty() ) pop_back();
205 }
206 bool empty() const { return my_size == 0; }
207 depth_t size() const { return my_size; }
208 //! Populates range pool via ranges up to max depth or while divisible
209 //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
210 void split_to_fill(depth_t max_depth) {
211 while( my_size < MaxCapacity && is_divisible(max_depth) ) {
212 depth_t prev = my_head;
213 my_head = (my_head + 1) % MaxCapacity;
214 new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
215 my_pool.begin()[prev].~T(); // instead of assignment
216 new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split
217 my_depth[my_head] = ++my_depth[prev];
218 my_size++;
219 }
220 }
221 void pop_back() {
222 __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
223 my_pool.begin()[my_head].~T();
224 my_size--;
225 my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
226 }
227 void pop_front() {
228 __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
229 my_pool.begin()[my_tail].~T();
230 my_size--;
231 my_tail = (my_tail + 1) % MaxCapacity;
232 }
233 T& back() {
234 __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
235 return my_pool.begin()[my_head];
236 }
237 T& front() {
238 __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
239 return my_pool.begin()[my_tail];
240 }
241 //! similarly to front(), returns depth of the first range in the pool
242 depth_t front_depth() {
243 __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
244 return my_depth[my_tail];
245 }
246 depth_t back_depth() {
247 __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size");
248 return my_depth[my_head];
249 }
250 bool is_divisible(depth_t max_depth) {
251 return back_depth() < max_depth && back().is_divisible();
252 }
253};
254
255//! Provides default methods for partition objects and common algorithm blocks.
256template <typename Partition>
257struct partition_type_base {
258 typedef detail::split split_type;
259 // decision makers
260 void note_affinity( slot_id ) {}
261 template <typename Task>
262 bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range()
263 template <typename Range> split_type get_split() { return split(); }
264 Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper
265
266 template<typename StartType, typename Range>
267 void work_balance(StartType &start, Range &range, const execution_data&) {
268 start.run_body( range ); // static partitioner goes here
269 }
270
271 template<typename StartType, typename Range>
272 void execute(StartType &start, Range &range, execution_data& ed) {
273 // The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
274 // [If this task is stolen, adjust depth and divisions if necessary, set flag].
275 // If range is divisible {
276 // Spread the work while [initial divisions left];
277 // Create trap task [if necessary];
278 // }
279 // If not divisible or [max depth is reached], execute, else do the range pool part
280 if ( range.is_divisible() ) {
281 if ( self().is_divisible() ) {
282 do { // split until is divisible
283 typename Partition::split_type split_obj = self().template get_split<Range>();
284 start.offer_work( split_obj, ed );
285 } while ( range.is_divisible() && self().is_divisible() );
286 }
287 }
288 self().work_balance(start, range, ed);
289 }
290};
291
292//! Provides default splitting strategy for partition objects.
293template <typename Partition>
294struct adaptive_mode : partition_type_base<Partition> {
295 typedef Partition my_partition;
296 std::size_t my_divisor;
297 // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves.
298 // A task which has only one index must produce the right split without reserved index in order to avoid
299 // it to be overwritten in note_affinity() of the created (right) task.
300 // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order)
301 static const unsigned factor = 1;
302 adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {}
303 adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {}
304 adaptive_mode(adaptive_mode&, const proportional_split&) : my_divisor(0)
305 {
306 // left blank as my_divisor gets overridden in the successors' constructors
307 }
308 /*! Override do_split methods in order to specify splitting strategy */
309 std::size_t do_split(adaptive_mode &src, split) {
310 return src.my_divisor /= 2u;
311 }
312};
313
314//! Helper type for checking availability of proportional_split constructor
315template <typename T> using supports_proportional_splitting = typename std::is_constructible<T, T&, proportional_split&>;
316
317//! A helper class to create a proportional_split object for a given type of Range.
318/** If the Range has proportional_split constructor,
319 then created object splits a provided value in an implemenation-defined proportion;
320 otherwise it represents equal-size split. */
321// TODO: check if this helper can be a nested class of proportional_mode.
322template <typename Range, typename = void>
323struct proportion_helper {
324 static proportional_split get_split(std::size_t) { return proportional_split(1,1); }
325};
326
327template <typename Range>
328struct proportion_helper<Range, typename std::enable_if<supports_proportional_splitting<Range>::value>::type> {
329 static proportional_split get_split(std::size_t n) {
330 std::size_t right = n / 2;
331 std::size_t left = n - right;
332 return proportional_split(left, right);
333 }
334};
335
336//! Provides proportional splitting strategy for partition objects
337template <typename Partition>
338struct proportional_mode : adaptive_mode<Partition> {
339 typedef Partition my_partition;
340 using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes
341
342 proportional_mode() : adaptive_mode<Partition>() {}
343 proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {}
344 proportional_mode(proportional_mode &src, const proportional_split& split_obj)
345 : adaptive_mode<Partition>(src, split_obj)
346 {
347 self().my_divisor = do_split(src, split_obj);
348 }
349 std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) {
350 std::size_t portion = split_obj.right() * my_partition::factor;
351 portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor);
352 src.my_divisor -= portion;
353 return portion;
354 }
355 bool is_divisible() { // part of old should_execute_range()
356 return self().my_divisor > my_partition::factor;
357 }
358 template <typename Range>
359 proportional_split get_split() {
360 // Create a proportion for the number of threads expected to handle "this" subrange
361 return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor );
362 }
363};
364
365static std::size_t get_initial_partition_head() {
366 int current_index = tbb::this_task_arena::current_thread_index();
367 if (current_index == tbb::task_arena::not_initialized)
368 current_index = 0;
369 return size_t(current_index);
370}
371
372//! Provides default linear indexing of partitioner's sequence
373template <typename Partition>
374struct linear_affinity_mode : proportional_mode<Partition> {
375 std::size_t my_head;
376 std::size_t my_max_affinity;
377 using proportional_mode<Partition>::self;
378 linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()),
379 my_max_affinity(self().my_divisor) {}
380 linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split())
381 , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
382 linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj)
383 , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
384 void spawn_task(task& t, task_group_context& ctx) {
385 if (self().my_divisor) {
386 spawn(t, ctx, id: slot_id(my_head));
387 } else {
388 spawn(t, ctx);
389 }
390 }
391};
392
393static bool is_stolen_task(const execution_data& ed) {
394 return execution_slot(ed) != original_slot(ed);
395}
396
397/*! Determine work-balance phase implementing splitting & stealing actions */
398template<class Mode>
399struct dynamic_grainsize_mode : Mode {
400 using Mode::self;
401 enum {
402 begin = 0,
403 run,
404 pass
405 } my_delay;
406 depth_t my_max_depth;
407 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
408 dynamic_grainsize_mode(): Mode()
409 , my_delay(begin)
410 , my_max_depth(__TBB_INIT_DEPTH) {}
411 dynamic_grainsize_mode(dynamic_grainsize_mode& p, split)
412 : Mode(p, split())
413 , my_delay(pass)
414 , my_max_depth(p.my_max_depth) {}
415 dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj)
416 : Mode(p, split_obj)
417 , my_delay(begin)
418 , my_max_depth(p.my_max_depth) {}
419 template <typename Task>
420 bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range()
421 if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree
422 self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
423 if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task
424#if __TBB_USE_OPTIONAL_RTTI
425 // RTTI is available, check whether the cast is valid
426 // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node*>(t.m_parent), 0);
427 // correctness of the cast relies on avoiding the root task for which:
428 // - initial value of my_divisor != 0 (protected by separate assertion)
429 // - is_stolen_task() always returns false for the root task.
430#endif
431 tree_node::mark_task_stolen(t);
432 if( !my_max_depth ) my_max_depth++;
433 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
434 return true;
435 }
436 }
437 return false;
438 }
439 depth_t max_depth() { return my_max_depth; }
440 void align_depth(depth_t base) {
441 __TBB_ASSERT(base <= my_max_depth, 0);
442 my_max_depth -= base;
443 }
444 template<typename StartType, typename Range>
445 void work_balance(StartType &start, Range &range, execution_data& ed) {
446 if( !range.is_divisible() || !self().max_depth() ) {
447 start.run_body( range );
448 }
449 else { // do range pool
450 range_vector<Range, range_pool_size> range_pool(range);
451 do {
452 range_pool.split_to_fill(self().max_depth()); // fill range pool
453 if( self().check_for_demand( start ) ) {
454 if( range_pool.size() > 1 ) {
455 start.offer_work( range_pool.front(), range_pool.front_depth(), ed );
456 range_pool.pop_front();
457 continue;
458 }
459 if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task
460 continue; // note: next split_to_fill() should split range at least once
461 }
462 start.run_body( range_pool.back() );
463 range_pool.pop_back();
464 } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() );
465 }
466 }
467 template <typename Task>
468 bool check_for_demand(Task& t) {
469 if ( pass == my_delay ) {
470 if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array
471 return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more
472 else if ( self().my_divisor && my_max_depth ) { // make balancing task
473 self().my_divisor = 0; // once for each task; depth will be decreased in align_depth()
474 return true;
475 }
476 else if ( tree_node::is_peer_stolen(t) ) {
477 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
478 return true;
479 }
480 } else if( begin == my_delay ) {
481 my_delay = pass;
482 }
483 return false;
484 }
485};
486
487class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > {
488public:
489 auto_partition_type( const auto_partitioner& )
490 : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() {
491 my_divisor *= __TBB_INITIAL_CHUNKS;
492 }
493 auto_partition_type( auto_partition_type& src, split)
494 : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {}
495 bool is_divisible() { // part of old should_execute_range()
496 if( my_divisor > 1 ) return true;
497 if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead
498 // keep same fragmentation while splitting for the local task pool
499 my_max_depth--;
500 my_divisor = 0; // decrease max_depth once per task
501 return true;
502 } else return false;
503 }
504 template <typename Task>
505 bool check_for_demand(Task& t) {
506 if (tree_node::is_peer_stolen(t)) {
507 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
508 return true;
509 } else return false;
510 }
511 void spawn_task(task& t, task_group_context& ctx) {
512 spawn(t, ctx);
513 }
514};
515
516class simple_partition_type: public partition_type_base<simple_partition_type> {
517public:
518 simple_partition_type( const simple_partitioner& ) {}
519 simple_partition_type( const simple_partition_type&, split ) {}
520 //! simplified algorithm
521 template<typename StartType, typename Range>
522 void execute(StartType &start, Range &range, execution_data& ed) {
523 split_type split_obj = split(); // start.offer_work accepts split_type as reference
524 while( range.is_divisible() )
525 start.offer_work( split_obj, ed );
526 start.run_body( range );
527 }
528 void spawn_task(task& t, task_group_context& ctx) {
529 spawn(t, ctx);
530 }
531};
532
533class static_partition_type : public linear_affinity_mode<static_partition_type> {
534public:
535 typedef detail::proportional_split split_type;
536 static_partition_type( const static_partitioner& )
537 : linear_affinity_mode<static_partition_type>() {}
538 static_partition_type( static_partition_type& p, const proportional_split& split_obj )
539 : linear_affinity_mode<static_partition_type>(p, split_obj) {}
540};
541
542class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > {
543 static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units
544 slot_id* my_array;
545public:
546 static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task
547 typedef detail::proportional_split split_type;
548 affinity_partition_type( affinity_partitioner_base& ap )
549 : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() {
550 __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
551 ap.resize(factor);
552 my_array = ap.my_array;
553 my_max_depth = factor_power + 1;
554 __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
555 }
556 affinity_partition_type(affinity_partition_type& p, split)
557 : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split())
558 , my_array(p.my_array) {}
559 affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
560 : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
561 , my_array(p.my_array) {}
562 void note_affinity(slot_id id) {
563 if( my_divisor )
564 my_array[my_head] = id;
565 }
566 void spawn_task(task& t, task_group_context& ctx) {
567 if (my_divisor) {
568 if (!my_array[my_head]) {
569 // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse
570 spawn(t, ctx, id: slot_id(my_head / factor));
571 } else {
572 spawn(t, ctx, id: my_array[my_head]);
573 }
574 } else {
575 spawn(t, ctx);
576 }
577 }
578};
579
580//! A simple partitioner
581/** Divides the range until the range is not divisible.
582 @ingroup algorithms */
583class simple_partitioner {
584public:
585 simple_partitioner() {}
586private:
587 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
588 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
589 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
590 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
591 // new implementation just extends existing interface
592 typedef simple_partition_type task_partition_type;
593 // TODO: consider to make split_type public
594 typedef simple_partition_type::split_type split_type;
595
596 // for parallel_scan only
597 class partition_type {
598 public:
599 bool should_execute_range(const execution_data& ) {return false;}
600 partition_type( const simple_partitioner& ) {}
601 partition_type( const partition_type&, split ) {}
602 };
603};
604
605//! An auto partitioner
606/** The range is initial divided into several large chunks.
607 Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
608 @ingroup algorithms */
609class auto_partitioner {
610public:
611 auto_partitioner() {}
612
613private:
614 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
615 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
616 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
617 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
618 // new implementation just extends existing interface
619 typedef auto_partition_type task_partition_type;
620 // TODO: consider to make split_type public
621 typedef auto_partition_type::split_type split_type;
622
623 //! Backward-compatible partition for auto and affinity partition objects.
624 class partition_type {
625 size_t num_chunks;
626 static const size_t VICTIM_CHUNKS = 4;
627 public:
628 bool should_execute_range(const execution_data& ed) {
629 if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) )
630 num_chunks = VICTIM_CHUNKS;
631 return num_chunks==1;
632 }
633 partition_type( const auto_partitioner& )
634 : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
635 partition_type( partition_type& pt, split ) {
636 num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
637 }
638 };
639};
640
641//! A static partitioner
642class static_partitioner {
643public:
644 static_partitioner() {}
645private:
646 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
647 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
648 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
649 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
650 // new implementation just extends existing interface
651 typedef static_partition_type task_partition_type;
652 // TODO: consider to make split_type public
653 typedef static_partition_type::split_type split_type;
654};
655
656//! An affinity partitioner
657class affinity_partitioner : affinity_partitioner_base {
658public:
659 affinity_partitioner() {}
660
661private:
662 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
663 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
664 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
665 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
666 // new implementation just extends existing interface
667 typedef affinity_partition_type task_partition_type;
668 // TODO: consider to make split_type public
669 typedef affinity_partition_type::split_type split_type;
670};
671
672} // namespace d1
673} // namespace detail
674
675inline namespace v1 {
676// Partitioners
677using detail::d1::auto_partitioner;
678using detail::d1::simple_partitioner;
679using detail::d1::static_partitioner;
680using detail::d1::affinity_partitioner;
681// Split types
682using detail::split;
683using detail::proportional_split;
684} // namespace v1
685
686} // namespace tbb
687
688#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
689 #pragma warning (pop)
690#endif // warning 4244 is back
691
692#undef __TBB_INITIAL_CHUNKS
693#undef __TBB_RANGE_POOL_CAPACITY
694#undef __TBB_INIT_DEPTH
695
696#endif /* __TBB_partitioner_H */
697

source code of include/oneapi/tbb/partitioner.h