partitioner.h source code [include/oneapi/tbb/partitioner.h]

1	/*
2	Copyright (c) 2005-2021 Intel Corporation
3
4	Licensed under the Apache License, Version 2.0 (the "License");
5	you may not use this file except in compliance with the License.
6	You may obtain a copy of the License at
7
8	http://www.apache.org/licenses/LICENSE-2.0
9
10	Unless required by applicable law or agreed to in writing, software
11	distributed under the License is distributed on an "AS IS" BASIS,
12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	See the License for the specific language governing permissions and
14	limitations under the License.
15	*/
16
17	#ifndef __TBB_partitioner_H
18	#define __TBB_partitioner_H
19
20	#ifndef __TBB_INITIAL_CHUNKS
21	// initial task divisions per thread
22	#define __TBB_INITIAL_CHUNKS 2
23	#endif
24	#ifndef __TBB_RANGE_POOL_CAPACITY
25	// maximum number of elements in range pool
26	#define __TBB_RANGE_POOL_CAPACITY 8
27	#endif
28	#ifndef __TBB_INIT_DEPTH
29	// initial value for depth of range pool
30	#define __TBB_INIT_DEPTH 5
31	#endif
32	#ifndef __TBB_DEMAND_DEPTH_ADD
33	// when imbalance is found range splits this value times more
34	#define __TBB_DEMAND_DEPTH_ADD 1
35	#endif
36
37	#include "detail/_config.h"
38	#include "detail/_namespace_injection.h"
39	#include "detail/_aligned_space.h"
40	#include "detail/_utils.h"
41	#include "detail/_template_helpers.h"
42	#include "detail/_range_common.h"
43	#include "detail/_task.h"
44	#include "detail/_small_object_pool.h"
45
46	#include "cache_aligned_allocator.h"
47	#include "task_group.h" // task_group_context
48	#include "task_arena.h"
49
50	#include <algorithm>
51	#include <atomic>
52	#include <type_traits>
53
54	#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
55	// Workaround for overzealous compiler warnings
56	#pragma warning (push)
57	#pragma warning (disable: 4244)
58	#endif
59
60	namespace tbb {
61	namespace detail {
62
63	namespace d1 {
64	class auto_partitioner;
65	class simple_partitioner;
66	class static_partitioner;
67	class affinity_partitioner;
68	class affinity_partition_type;
69	class affinity_partitioner_base;
70
71	inline std::size_t get_initial_auto_partitioner_divisor() {
72	const std::size_t factor = `4`;
73	return factor * max_concurrency();
74	}
75
76	//! Defines entry point for affinity partitioner into oneTBB run-time library.
77	class affinity_partitioner_base: no_copy {
78	friend class affinity_partitioner;
79	friend class affinity_partition_type;
80	//! Array that remembers affinities of tree positions to affinity_id.
81	/* NULL if my_size==0. /
82	slot_id* my_array;
83	//! Number of elements in my_array.
84	std::size_t my_size;
85	//! Zeros the fields.
86	affinity_partitioner_base() : my_array(nullptr), my_size(`0`) {}
87	//! Deallocates my_array.
88	~affinity_partitioner_base() { resize(factor: `0`); }
89	//! Resize my_array.
90	/* Retains values if resulting size is the same. /
91	void resize(unsigned factor) {
92	// Check factor to avoid asking for number of workers while there might be no arena.
93	unsigned max_threads_in_arena = max_concurrency();
94	std::size_t new_size = factor ? factor * max_threads_in_arena : `0`;
95	if (new_size != my_size) {
96	if (my_array) {
97	r1::cache_aligned_deallocate(p: my_array);
98	// Following two assignments must be done here for sake of exception safety.
99	my_array = nullptr;
100	my_size = `0`;
101	}
102	if (new_size) {
103	my_array = static_cast<slot_id>(r1::cache_aligned_allocate(size: new_size sizeof(slot_id)));
104	std::fill_n(first: my_array, n: new_size, value: no_slot);
105	my_size = new_size;
106	}
107	}
108	}
109	};
110
111	template<typename Range, typename Body, typename Partitioner> struct start_for;
112	template<typename Range, typename Body, typename Partitioner> struct start_scan;
113	template<typename Range, typename Body, typename Partitioner> struct start_reduce;
114	template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce;
115
116	struct node {
117	node* my_parent{};
118	std::atomic<int> m_ref_count{};
119
120	node() = default;
121	node(node* parent, int ref_count) :
122	my_parent{parent}, m_ref_count {ref_count} {
123	__TBB_ASSERT(ref_count > `0`, "The ref count must be positive");
124	}
125	};
126
127	struct wait_node : node {
128	wait_node() : node { nullptr, `1` } {}
129	wait_context m_wait{`1`};
130	};
131
132	//! Join task node that contains shared flag for stealing feedback
133	struct tree_node : public node {
134	small_object_allocator m_allocator;
135	std::atomic<bool> m_child_stolen{false};
136
137	tree_node(node* parent, int ref_count, small_object_allocator& alloc)
138	: node {parent, ref_count}
139	, m_allocator {alloc} {}
140
141	void join(task_group_context) {/dummy, required only for reduction algorithms/*};
142
143	template <typename Task>
144	static void mark_task_stolen(Task &t) {
145	std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen;
146	#if TBB_USE_PROFILING_TOOLS
147	// Threading tools respect lock prefix but report false-positive data-race via plain store
148	flag.exchange(i: true);
149	#else
150	flag.store(true, std::memory_order_relaxed);
151	#endif // TBB_USE_PROFILING_TOOLS
152	}
153	template <typename Task>
154	static bool is_peer_stolen(Task &t) {
155	return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(m: std::memory_order_relaxed);
156	}
157	};
158
159	// Context used to check cancellation state during reduction join process
160	template<typename TreeNodeType>
161	void fold_tree(node* n, const execution_data& ed) {
162	for (;;) {
163	__TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > `0`, "The refcount must be positive.");
164	call_itt_task_notify(t: releasing, ptr: n);
165	if (--n->m_ref_count > `0`) {
166	return;
167	}
168	node* parent = n->my_parent;
169	if (!parent) {
170	break;
171	};
172
173	call_itt_task_notify(t: acquired, ptr: n);
174	TreeNodeType* self = static_cast<TreeNodeType*>(n);
175	self->join(ed.context);
176	self->m_allocator.delete_object(self, ed);
177	n = parent;
178	}
179	// Finish parallel for execution when the root (last node) is reached
180	static_cast<wait_node*>(n)->m_wait.release();
181	}
182
183	//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
184	//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented
185	//! by a number that cannot fit into machine word.
186	typedef unsigned char depth_t;
187
188	//! Range pool stores ranges of type T in a circular buffer with MaxCapacity
189	template <typename T, depth_t MaxCapacity>
190	class range_vector {
191	depth_t my_head;
192	depth_t my_tail;
193	depth_t my_size;
194	depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
195	tbb::detail::aligned_space<T, MaxCapacity> my_pool;
196
197	public:
198	//! initialize via first range in pool
199	range_vector(const T& elem) : my_head(`0`), my_tail(`0`), my_size(`1`) {
200	my_depth[`0`] = `0`;
201	new( static_cast<void >(my_pool.begin()) ) T(elem);//TODO: std::move?*
202	}
203	~range_vector() {
204	while( !empty() ) pop_back();
205	}
206	bool empty() const { return my_size == `0`; }
207	depth_t size() const { return my_size; }
208	//! Populates range pool via ranges up to max depth or while divisible
209	//! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
210	void split_to_fill(depth_t max_depth) {
211	while( my_size < MaxCapacity && is_divisible(max_depth) ) {
212	depth_t prev = my_head;
213	my_head = (my_head + `1`) % MaxCapacity;
214	new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
215	my_pool.begin()[prev].~T(); // instead of assignment
216	new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split ()); // do 'inverse' split
217	my_depth[my_head] = ++my_depth[prev];
218	my_size++;
219	}
220	}
221	void pop_back() {
222	__TBB_ASSERT(my_size > `0`, "range_vector::pop_back() with empty size");
223	my_pool.begin()[my_head].~T();
224	my_size--;
225	my_head = (my_head + MaxCapacity - `1`) % MaxCapacity;
226	}
227	void pop_front() {
228	__TBB_ASSERT(my_size > `0`, "range_vector::pop_front() with empty size");
229	my_pool.begin()[my_tail].~T();
230	my_size--;
231	my_tail = (my_tail + `1`) % MaxCapacity;
232	}
233	T& back() {
234	__TBB_ASSERT(my_size > `0`, "range_vector::back() with empty size");
235	return my_pool.begin()[my_head];
236	}
237	T& front() {
238	__TBB_ASSERT(my_size > `0`, "range_vector::front() with empty size");
239	return my_pool.begin()[my_tail];
240	}
241	//! similarly to front(), returns depth of the first range in the pool
242	depth_t front_depth() {
243	__TBB_ASSERT(my_size > `0`, "range_vector::front_depth() with empty size");
244	return my_depth[my_tail];
245	}
246	depth_t back_depth() {
247	__TBB_ASSERT(my_size > `0`, "range_vector::back_depth() with empty size");
248	return my_depth[my_head];
249	}
250	bool is_divisible(depth_t max_depth) {
251	return back_depth() < max_depth && back().is_divisible();
252	}
253	};
254
255	//! Provides default methods for partition objects and common algorithm blocks.
256	template <typename Partition>
257	struct partition_type_base {
258	typedef detail::split split_type;
259	// decision makers
260	void note_affinity( slot_id ) {}
261	template <typename Task>
262	bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range()
263	template <typename Range> split_type get_split() { return split (); }
264	Partition& self() { return *static_cast<Partition>(this); } // CRTP helper*
265
266	template<typename StartType, typename Range>
267	void work_balance(StartType &start, Range &range, const execution_data&) {
268	start.run_body( range ); // static partitioner goes here
269	}
270
271	template<typename StartType, typename Range>
272	void execute(StartType &start, Range &range, execution_data& ed) {
273	// The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
274	// [If this task is stolen, adjust depth and divisions if necessary, set flag].
275	// If range is divisible {
276	// Spread the work while [initial divisions left];
277	// Create trap task [if necessary];
278	// }
279	// If not divisible or [max depth is reached], execute, else do the range pool part
280	if ( range.is_divisible() ) {
281	if ( self().is_divisible() ) {
282	do { // split until is divisible
283	typename Partition::split_type split_obj = self().template get_split<Range>();
284	start.offer_work( split_obj, ed );
285	} while ( range.is_divisible() && self().is_divisible() );
286	}
287	}
288	self().work_balance(start, range, ed);
289	}
290	};
291
292	//! Provides default splitting strategy for partition objects.
293	template <typename Partition>
294	struct adaptive_mode : partition_type_base<Partition> {
295	typedef Partition my_partition;
296	std::size_t my_divisor;
297	// For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves.
298	// A task which has only one index must produce the right split without reserved index in order to avoid
299	// it to be overwritten in note_affinity() of the created (right) task.
300	// I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order)
301	static const unsigned factor = `1`;
302	adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / `4` * my_partition::factor) {}
303	adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split ())) {}
304	adaptive_mode(adaptive_mode&, const proportional_split&) : my_divisor(`0`)
305	{
306	// left blank as my_divisor gets overridden in the successors' constructors
307	}
308	/! Override do_split methods in order to specify splitting strategy /
309	std::size_t do_split(adaptive_mode &src, split) {
310	return src.my_divisor /= `2u`;
311	}
312	};
313
314	//! Helper type for checking availability of proportional_split constructor
315	template <typename T> using supports_proportional_splitting = typename std::is_constructible<T, T&, proportional_split&>;
316
317	//! A helper class to create a proportional_split object for a given type of Range.
318	/* If the Range has proportional_split constructor,*
319	then created object splits a provided value in an implemenation-defined proportion;
320	otherwise it represents equal-size split. /*
321	// TODO: check if this helper can be a nested class of proportional_mode.
322	template <typename Range, typename = void>
323	struct proportion_helper {
324	static proportional_split get_split(std::size_t) { return proportional_split (`1`,`1`); }
325	};
326
327	template <typename Range>
328	struct proportion_helper<Range, typename std::enable_if<supports_proportional_splitting<Range>::value>::type> {
329	static proportional_split get_split(std::size_t n) {
330	std::size_t right = n / `2`;
331	std::size_t left = n - right;
332	return proportional_split (left, right);
333	}
334	};
335
336	//! Provides proportional splitting strategy for partition objects
337	template <typename Partition>
338	struct proportional_mode : adaptive_mode<Partition> {
339	typedef Partition my_partition;
340	using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes
341
342	proportional_mode() : adaptive_mode<Partition>() {}
343	proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split ()) {}
344	proportional_mode(proportional_mode &src, const proportional_split& split_obj)
345	: adaptive_mode<Partition>(src, split_obj)
346	{
347	self().my_divisor = do_split(src, split_obj);
348	}
349	std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) {
350	std::size_t portion = split_obj.right() * my_partition::factor;
351	portion = (portion + my_partition::factor/`2`) & (`0ul` - my_partition::factor);
352	src.my_divisor -= portion;
353	return portion;
354	}
355	bool is_divisible() { // part of old should_execute_range()
356	return self().my_divisor > my_partition::factor;
357	}
358	template <typename Range>
359	proportional_split get_split() {
360	// Create a proportion for the number of threads expected to handle "this" subrange
361	return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor );
362	}
363	};
364
365	static std::size_t get_initial_partition_head() {
366	int current_index = tbb::this_task_arena::current_thread_index();
367	if (current_index == tbb::task_arena::not_initialized)
368	current_index = `0`;
369	return size_t(current_index);
370	}
371
372	//! Provides default linear indexing of partitioner's sequence
373	template <typename Partition>
374	struct linear_affinity_mode : proportional_mode<Partition> {
375	std::size_t my_head;
376	std::size_t my_max_affinity;
377	using proportional_mode<Partition>::self;
378	linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()),
379	my_max_affinity(self().my_divisor) {}
380	linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split ())
381	, my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
382	linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj)
383	, my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
384	void spawn_task(task& t, task_group_context& ctx) {
385	if (self().my_divisor) {
386	spawn(t, ctx, id: slot_id(my_head));
387	} else {
388	spawn(t, ctx);
389	}
390	}
391	};
392
393	static bool is_stolen_task(const execution_data& ed) {
394	return execution_slot(ed) != original_slot(ed);
395	}
396
397	/! Determine work-balance phase implementing splitting & stealing actions /
398	template<class Mode>
399	struct dynamic_grainsize_mode : Mode {
400	using Mode::self;
401	enum {
402	begin = `0`,
403	run,
404	pass
405	} my_delay;
406	depth_t my_max_depth;
407	static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
408	dynamic_grainsize_mode(): Mode()
409	, my_delay(begin)
410	, my_max_depth(__TBB_INIT_DEPTH) {}
411	dynamic_grainsize_mode(dynamic_grainsize_mode& p, split)
412	: Mode(p, split ())
413	, my_delay(pass)
414	, my_max_depth(p.my_max_depth) {}
415	dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj)
416	: Mode(p, split_obj)
417	, my_delay(begin)
418	, my_max_depth(p.my_max_depth) {}
419	template <typename Task>
420	bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range()
421	if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree
422	self().my_divisor = `1`; // TODO: replace by on-stack flag (partition_state's member)?
423	if( is_stolen_task(ed) && t.my_parent->m_ref_count >= `2` ) { // runs concurrently with the left task
424	#if __TBB_USE_OPTIONAL_RTTI
425	// RTTI is available, check whether the cast is valid
426	// TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node>(t.m_parent), 0);*
427	// correctness of the cast relies on avoiding the root task for which:
428	// - initial value of my_divisor != 0 (protected by separate assertion)
429	// - is_stolen_task() always returns false for the root task.
430	#endif
431	tree_node::mark_task_stolen(t);
432	if( !my_max_depth ) my_max_depth++;
433	my_max_depth += __TBB_DEMAND_DEPTH_ADD;
434	return true;
435	}
436	}
437	return false;
438	}
439	depth_t max_depth() { return my_max_depth; }
440	void align_depth(depth_t base) {
441	__TBB_ASSERT(base <= my_max_depth, `0`);
442	my_max_depth -= base;
443	}
444	template<typename StartType, typename Range>
445	void work_balance(StartType &start, Range &range, execution_data& ed) {
446	if( !range.is_divisible() \|\| !self().max_depth() ) {
447	start.run_body( range );
448	}
449	else { // do range pool
450	range_vector<Range, range_pool_size> range_pool(range);
451	do {
452	range_pool.split_to_fill(self().max_depth()); // fill range pool
453	if( self().check_for_demand( start ) ) {
454	if( range_pool.size() > `1` ) {
455	start.offer_work( range_pool.front(), range_pool.front_depth(), ed );
456	range_pool.pop_front();
457	continue;
458	}
459	if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task
460	continue; // note: next split_to_fill() should split range at least once
461	}
462	start.run_body( range_pool.back() );
463	range_pool.pop_back();
464	} while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() );
465	}
466	}
467	template <typename Task>
468	bool check_for_demand(Task& t) {
469	if ( pass == my_delay ) {
470	if ( self().my_divisor > `1` ) // produce affinitized tasks while they have slot in array
471	return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more
472	else if ( self().my_divisor && my_max_depth ) { // make balancing task
473	self().my_divisor = `0`; // once for each task; depth will be decreased in align_depth()
474	return true;
475	}
476	else if ( tree_node::is_peer_stolen(t) ) {
477	my_max_depth += __TBB_DEMAND_DEPTH_ADD;
478	return true;
479	}
480	} else if( begin == my_delay ) {
481	my_delay = pass;
482	}
483	return false;
484	}
485	};
486
487	class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > {
488	public:
489	auto_partition_type( const auto_partitioner& )
490	: dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() {
491	my_divisor *= __TBB_INITIAL_CHUNKS;
492	}
493	auto_partition_type( auto_partition_type& src, split)
494	: dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split ()) {}
495	bool is_divisible() { // part of old should_execute_range()
496	if( my_divisor > `1` ) return true;
497	if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead
498	// keep same fragmentation while splitting for the local task pool
499	my_max_depth--;
500	my_divisor = `0`; // decrease max_depth once per task
501	return true;
502	} else return false;
503	}
504	template <typename Task>
505	bool check_for_demand(Task& t) {
506	if (tree_node::is_peer_stolen(t)) {
507	my_max_depth += __TBB_DEMAND_DEPTH_ADD;
508	return true;
509	} else return false;
510	}
511	void spawn_task(task& t, task_group_context& ctx) {
512	spawn(t, ctx);
513	}
514	};
515
516	class simple_partition_type: public partition_type_base<simple_partition_type> {
517	public:
518	simple_partition_type( const simple_partitioner& ) {}
519	simple_partition_type( const simple_partition_type&, split ) {}
520	//! simplified algorithm
521	template<typename StartType, typename Range>
522	void execute(StartType &start, Range &range, execution_data& ed) {
523	split_type split_obj = split (); // start.offer_work accepts split_type as reference
524	while( range.is_divisible() )
525	start.offer_work( split_obj, ed );
526	start.run_body( range );
527	}
528	void spawn_task(task& t, task_group_context& ctx) {
529	spawn(t, ctx);
530	}
531	};
532
533	class static_partition_type : public linear_affinity_mode<static_partition_type> {
534	public:
535	typedef detail::proportional_split split_type;
536	static_partition_type( const static_partitioner& )
537	: linear_affinity_mode<static_partition_type>() {}
538	static_partition_type( static_partition_type& p, const proportional_split& split_obj )
539	: linear_affinity_mode<static_partition_type>(p, split_obj) {}
540	};
541
542	class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > {
543	static const unsigned factor_power = `4`; // TODO: get a unified formula based on number of computing units
544	slot_id* my_array;
545	public:
546	static const unsigned factor = `1` << factor_power; // number of slots in affinity array per task
547	typedef detail::proportional_split split_type;
548	affinity_partition_type( affinity_partitioner_base& ap )
549	: dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() {
550	__TBB_ASSERT( (factor&(factor-`1`))==`0`, "factor must be power of two" );
551	ap.resize(factor);
552	my_array = ap.my_array;
553	my_max_depth = factor_power + `1`;
554	__TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, `0` );
555	}
556	affinity_partition_type(affinity_partition_type& p, split)
557	: dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split ())
558	, my_array(p.my_array) {}
559	affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
560	: dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
561	, my_array(p.my_array) {}
562	void note_affinity(slot_id id) {
563	if( my_divisor )
564	my_array[my_head] = id;
565	}
566	void spawn_task(task& t, task_group_context& ctx) {
567	if (my_divisor) {
568	if (!my_array[my_head]) {
569	// TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse
570	spawn(t, ctx, id: slot_id(my_head / factor));
571	} else {
572	spawn(t, ctx, id: my_array[my_head]);
573	}
574	} else {
575	spawn(t, ctx);
576	}
577	}
578	};
579
580	//! A simple partitioner
581	/* Divides the range until the range is not divisible.*
582	@ingroup algorithms /*
583	class simple_partitioner {
584	public:
585	simple_partitioner() {}
586	private:
587	template<typename Range, typename Body, typename Partitioner> friend struct start_for;
588	template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
589	template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
590	template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
591	// new implementation just extends existing interface
592	typedef simple_partition_type task_partition_type;
593	// TODO: consider to make split_type public
594	typedef simple_partition_type::split_type split_type;
595
596	// for parallel_scan only
597	class partition_type {
598	public:
599	bool should_execute_range(const execution_data& ) {return false;}
600	partition_type( const simple_partitioner& ) {}
601	partition_type( const partition_type&, split ) {}
602	};
603	};
604
605	//! An auto partitioner
606	/* The range is initial divided into several large chunks.*
607	Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
608	@ingroup algorithms /*
609	class auto_partitioner {
610	public:
611	auto_partitioner() {}
612
613	private:
614	template<typename Range, typename Body, typename Partitioner> friend struct start_for;
615	template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
616	template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
617	template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
618	// new implementation just extends existing interface
619	typedef auto_partition_type task_partition_type;
620	// TODO: consider to make split_type public
621	typedef auto_partition_type::split_type split_type;
622
623	//! Backward-compatible partition for auto and affinity partition objects.
624	class partition_type {
625	size_t num_chunks;
626	static const size_t VICTIM_CHUNKS = `4`;
627	public:
628	bool should_execute_range(const execution_data& ed) {
629	if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) )
630	num_chunks = VICTIM_CHUNKS;
631	return num_chunks==`1`;
632	}
633	partition_type( const auto_partitioner& )
634	: num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/`4`) {}
635	partition_type( partition_type& pt, split ) {
636	num_chunks = pt.num_chunks = (pt.num_chunks+`1u`) / `2u`;
637	}
638	};
639	};
640
641	//! A static partitioner
642	class static_partitioner {
643	public:
644	static_partitioner() {}
645	private:
646	template<typename Range, typename Body, typename Partitioner> friend struct start_for;
647	template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
648	template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
649	template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
650	// new implementation just extends existing interface
651	typedef static_partition_type task_partition_type;
652	// TODO: consider to make split_type public
653	typedef static_partition_type::split_type split_type;
654	};
655
656	//! An affinity partitioner
657	class affinity_partitioner : affinity_partitioner_base {
658	public:
659	affinity_partitioner() {}
660
661	private:
662	template<typename Range, typename Body, typename Partitioner> friend struct start_for;
663	template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
664	template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
665	template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
666	// new implementation just extends existing interface
667	typedef affinity_partition_type task_partition_type;
668	// TODO: consider to make split_type public
669	typedef affinity_partition_type::split_type split_type;
670	};
671
672	} // namespace d1
673	} // namespace detail
674
675	inline namespace v1 {
676	// Partitioners
677	using detail::d1::auto_partitioner;
678	using detail::d1::simple_partitioner;
679	using detail::d1::static_partitioner;
680	using detail::d1::affinity_partitioner;
681	// Split types
682	using detail::split;
683	using detail::proportional_split;
684	} // namespace v1
685
686	} // namespace tbb
687
688	#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
689	#pragma warning (pop)
690	#endif // warning 4244 is back
691
692	#undef __TBB_INITIAL_CHUNKS
693	#undef __TBB_RANGE_POOL_CAPACITY
694	#undef __TBB_INIT_DEPTH
695
696	#endif /* __TBB_partitioner_H */
697

source code of include/oneapi/tbb/partitioner.h