parallel_for_for.h source code [qtquick3d/src/3rdparty/embree/common/algorithms/parallel_for_for.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "parallel_for.h"
7
8	namespace embree
9	{
10	template<typename ArrayArray, typename Func>
11	__forceinline void sequential_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
12	{
13	size_t k=`0`;
14	for (size_t i=`0`; i!=array2.size(); ++i) {
15	const size_t N = array2[i]->size();
16	if (N) func(array2[i],range<size_t>(`0`,N),k);
17	k+=N;
18	}
19	}
20
21	class ParallelForForState
22	{
23	public:
24
25	enum { MAX_TASKS = `64` };
26
27	__forceinline ParallelForForState ()
28	: taskCount(`0`) {}
29
30	template<typename ArrayArray>
31	__forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) {
32	init(array2,minStepSize);
33	}
34
35	template<typename ArrayArray>
36	__forceinline void init ( ArrayArray& array2, const size_t minStepSize )
37	{
38	/ first calculate total number of elements /
39	size_t N = `0`;
40	for (size_t i=`0`; i<array2.size(); i++) {
41	N += array2[i] ? array2[i]->size() : `0`;
42	}
43	this->N = N;
44
45	/ calculate number of tasks to use /
46	const size_t numThreads = TaskScheduler::threadCount();
47	const size_t numBlocks = (N+minStepSize-`1`)/minStepSize;
48	taskCount = max(a: size_t(`1`),b: min(a: numThreads,b: numBlocks,c: size_t(ParallelForForState::MAX_TASKS)));
49
50	/ calculate start (i,j) for each task /
51	size_t taskIndex = `0`;
52	i0[taskIndex] = `0`;
53	j0[taskIndex] = `0`;
54	size_t k0 = (++taskIndex)*N/taskCount;
55	for (size_t i=`0`, k=`0`; taskIndex < taskCount; i++)
56	{
57	assert(i<array2.size());
58	size_t j=`0`, M = array2[i] ? array2[i]->size() : `0`;
59	while (j<M && k+M-j >= k0 && taskIndex < taskCount) {
60	assert(taskIndex<taskCount);
61	i0[taskIndex] = i;
62	j0[taskIndex] = j += k0-k;
63	k=k0;
64	k0 = (++taskIndex)*N/taskCount;
65	}
66	k+=M-j;
67	}
68	}
69
70	__forceinline size_t size() const {
71	return N;
72	}
73
74	public:
75	size_t i0[MAX_TASKS];
76	size_t j0[MAX_TASKS];
77	size_t taskCount;
78	size_t N;
79	};
80
81	template<typename ArrayArray, typename Func>
82	__forceinline void parallel_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
83	{
84	ParallelForForState state(array2,minStepSize);
85
86	parallel_for(state.taskCount, [&](const size_t taskIndex)
87	{
88	/ calculate range /
89	const size_t k0 = (taskIndex+`0`)*state.size()/state.taskCount;
90	const size_t k1 = (taskIndex+`1`)*state.size()/state.taskCount;
91	size_t i0 = state.i0[taskIndex];
92	size_t j0 = state.j0[taskIndex];
93
94	/ iterate over arrays /
95	size_t k=k0;
96	for (size_t i=i0; k<k1; i++) {
97	const size_t N = array2[i] ? array2[i]->size() : `0`;
98	const size_t r0 = j0, r1 = min(a: N,b: r0+k1-k);
99	if (r1 > r0) func(array2[i],range<size_t>(r0,r1),k);
100	k+=r1-r0; j0 = `0`;
101	}
102	});
103	}
104
105	template<typename ArrayArray, typename Func>
106	__forceinline void parallel_for_for( ArrayArray& array2, const Func& func )
107	{
108	parallel_for_for(array2,`1`,func);
109	}
110
111	template<typename ArrayArray, typename Value, typename Func, typename Reduction>
112	__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const size_t minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
113	{
114	ParallelForForState state(array2,minStepSize);
115	Value temp[ParallelForForState::MAX_TASKS];
116
117	for (size_t i=`0`; i<state.taskCount; i++)
118	temp[i] = identity;
119
120	parallel_for(state.taskCount, [&](const size_t taskIndex)
121	{
122	/ calculate range /
123	const size_t k0 = (taskIndex+`0`)*state.size()/state.taskCount;
124	const size_t k1 = (taskIndex+`1`)*state.size()/state.taskCount;
125	size_t i0 = state.i0[taskIndex];
126	size_t j0 = state.j0[taskIndex];
127
128	/ iterate over arrays /
129	size_t k=k0;
130	for (size_t i=i0; k<k1; i++) {
131	const size_t N = array2[i] ? array2[i]->size() : `0`;
132	const size_t r0 = j0, r1 = min(a: N,b: r0+k1-k);
133	if (r1 > r0) temp[taskIndex] = reduction(temp[taskIndex],func(array2[i],range<size_t>(r0,r1),k));
134	k+=r1-r0; j0 = `0`;
135	}
136	});
137
138	Value ret = identity;
139	for (size_t i=`0`; i<state.taskCount; i++)
140	ret = reduction(ret,temp[i]);
141	return ret;
142	}
143
144	template<typename ArrayArray, typename Value, typename Func, typename Reduction>
145	__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction)
146	{
147	return parallel_for_for_reduce(array2,`1`,identity,func,reduction);
148	}
149	}
150

Provided by KDAB

Learn Advanced QML with KDAB

Find out more

Definitions

source code of qtquick3d/src/3rdparty/embree/common/algorithms/parallel_for_for.h