1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "parallel_for.h"
7
8namespace embree
9{
10 template<typename ArrayArray, typename Func>
11 __forceinline void sequential_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
12 {
13 size_t k=0;
14 for (size_t i=0; i!=array2.size(); ++i) {
15 const size_t N = array2[i]->size();
16 if (N) func(array2[i],range<size_t>(0,N),k);
17 k+=N;
18 }
19 }
20
21 class ParallelForForState
22 {
23 public:
24
25 enum { MAX_TASKS = 64 };
26
27 __forceinline ParallelForForState ()
28 : taskCount(0) {}
29
30 template<typename ArrayArray>
31 __forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) {
32 init(array2,minStepSize);
33 }
34
35 template<typename ArrayArray>
36 __forceinline void init ( ArrayArray& array2, const size_t minStepSize )
37 {
38 /* first calculate total number of elements */
39 size_t N = 0;
40 for (size_t i=0; i<array2.size(); i++) {
41 N += array2[i] ? array2[i]->size() : 0;
42 }
43 this->N = N;
44
45 /* calculate number of tasks to use */
46 const size_t numThreads = TaskScheduler::threadCount();
47 const size_t numBlocks = (N+minStepSize-1)/minStepSize;
48 taskCount = max(a: size_t(1),b: min(a: numThreads,b: numBlocks,c: size_t(ParallelForForState::MAX_TASKS)));
49
50 /* calculate start (i,j) for each task */
51 size_t taskIndex = 0;
52 i0[taskIndex] = 0;
53 j0[taskIndex] = 0;
54 size_t k0 = (++taskIndex)*N/taskCount;
55 for (size_t i=0, k=0; taskIndex < taskCount; i++)
56 {
57 assert(i<array2.size());
58 size_t j=0, M = array2[i] ? array2[i]->size() : 0;
59 while (j<M && k+M-j >= k0 && taskIndex < taskCount) {
60 assert(taskIndex<taskCount);
61 i0[taskIndex] = i;
62 j0[taskIndex] = j += k0-k;
63 k=k0;
64 k0 = (++taskIndex)*N/taskCount;
65 }
66 k+=M-j;
67 }
68 }
69
70 __forceinline size_t size() const {
71 return N;
72 }
73
74 public:
75 size_t i0[MAX_TASKS];
76 size_t j0[MAX_TASKS];
77 size_t taskCount;
78 size_t N;
79 };
80
81 template<typename ArrayArray, typename Func>
82 __forceinline void parallel_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
83 {
84 ParallelForForState state(array2,minStepSize);
85
86 parallel_for(state.taskCount, [&](const size_t taskIndex)
87 {
88 /* calculate range */
89 const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
90 const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
91 size_t i0 = state.i0[taskIndex];
92 size_t j0 = state.j0[taskIndex];
93
94 /* iterate over arrays */
95 size_t k=k0;
96 for (size_t i=i0; k<k1; i++) {
97 const size_t N = array2[i] ? array2[i]->size() : 0;
98 const size_t r0 = j0, r1 = min(a: N,b: r0+k1-k);
99 if (r1 > r0) func(array2[i],range<size_t>(r0,r1),k);
100 k+=r1-r0; j0 = 0;
101 }
102 });
103 }
104
105 template<typename ArrayArray, typename Func>
106 __forceinline void parallel_for_for( ArrayArray& array2, const Func& func )
107 {
108 parallel_for_for(array2,1,func);
109 }
110
111 template<typename ArrayArray, typename Value, typename Func, typename Reduction>
112 __forceinline Value parallel_for_for_reduce( ArrayArray& array2, const size_t minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
113 {
114 ParallelForForState state(array2,minStepSize);
115 Value temp[ParallelForForState::MAX_TASKS];
116
117 for (size_t i=0; i<state.taskCount; i++)
118 temp[i] = identity;
119
120 parallel_for(state.taskCount, [&](const size_t taskIndex)
121 {
122 /* calculate range */
123 const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
124 const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
125 size_t i0 = state.i0[taskIndex];
126 size_t j0 = state.j0[taskIndex];
127
128 /* iterate over arrays */
129 size_t k=k0;
130 for (size_t i=i0; k<k1; i++) {
131 const size_t N = array2[i] ? array2[i]->size() : 0;
132 const size_t r0 = j0, r1 = min(a: N,b: r0+k1-k);
133 if (r1 > r0) temp[taskIndex] = reduction(temp[taskIndex],func(array2[i],range<size_t>(r0,r1),k));
134 k+=r1-r0; j0 = 0;
135 }
136 });
137
138 Value ret = identity;
139 for (size_t i=0; i<state.taskCount; i++)
140 ret = reduction(ret,temp[i]);
141 return ret;
142 }
143
144 template<typename ArrayArray, typename Value, typename Func, typename Reduction>
145 __forceinline Value parallel_for_for_reduce( ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction)
146 {
147 return parallel_for_for_reduce(array2,1,identity,func,reduction);
148 }
149}
150

source code of qtquick3d/src/3rdparty/embree/common/algorithms/parallel_for_for.h