1 | //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation of the class that manages parallel work on BinaryFunctions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/ParallelUtilities.h" |
14 | #include "bolt/Core/BinaryContext.h" |
15 | #include "bolt/Core/BinaryFunction.h" |
16 | #include "llvm/Support/RWMutex.h" |
17 | #include "llvm/Support/ThreadPool.h" |
18 | #include "llvm/Support/Timer.h" |
19 | #include <mutex> |
20 | |
21 | #define DEBUG_TYPE "par-utils" |
22 | |
23 | namespace opts { |
24 | extern cl::OptionCategory BoltCategory; |
25 | |
26 | cl::opt<unsigned> |
27 | ThreadCount("thread-count" , |
28 | cl::desc("number of threads" ), |
29 | cl::init(Val: hardware_concurrency().compute_thread_count()), |
30 | cl::cat(BoltCategory)); |
31 | |
32 | cl::opt<bool> |
33 | NoThreads("no-threads" , |
34 | cl::desc("disable multithreading" ), |
35 | cl::init(Val: false), |
36 | cl::cat(BoltCategory)); |
37 | |
38 | cl::opt<unsigned> |
39 | TaskCount("tasks-per-thread" , |
40 | cl::desc("number of tasks to be created per thread" ), |
41 | cl::init(Val: 20), |
42 | cl::cat(BoltCategory)); |
43 | |
44 | } // namespace opts |
45 | |
46 | namespace llvm { |
47 | namespace bolt { |
48 | namespace ParallelUtilities { |
49 | |
50 | namespace { |
51 | /// A single thread pool that is used to run parallel tasks |
52 | std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr; |
53 | |
54 | unsigned computeCostFor(const BinaryFunction &BF, |
55 | const PredicateTy &SkipPredicate, |
56 | const SchedulingPolicy &SchedPolicy) { |
57 | if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) |
58 | return 1; |
59 | |
60 | if (SkipPredicate && SkipPredicate(BF)) |
61 | return 0; |
62 | |
63 | switch (SchedPolicy) { |
64 | case SchedulingPolicy::SP_CONSTANT: |
65 | return 1; |
66 | case SchedulingPolicy::SP_INST_LINEAR: |
67 | return BF.getSize(); |
68 | case SchedulingPolicy::SP_INST_QUADRATIC: |
69 | return BF.getSize() * BF.getSize(); |
70 | case SchedulingPolicy::SP_BB_LINEAR: |
71 | return BF.size(); |
72 | case SchedulingPolicy::SP_BB_QUADRATIC: |
73 | return BF.size() * BF.size(); |
74 | default: |
75 | llvm_unreachable("unsupported scheduling policy" ); |
76 | } |
77 | } |
78 | |
79 | inline unsigned estimateTotalCost(const BinaryContext &BC, |
80 | const PredicateTy &SkipPredicate, |
81 | SchedulingPolicy &SchedPolicy) { |
82 | if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) |
83 | return BC.getBinaryFunctions().size(); |
84 | |
85 | unsigned TotalCost = 0; |
86 | for (auto &BFI : BC.getBinaryFunctions()) { |
87 | const BinaryFunction &BF = BFI.second; |
88 | TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
89 | } |
90 | |
91 | // Switch to trivial scheduling if total estimated work is zero |
92 | if (TotalCost == 0) { |
93 | BC.outs() |
94 | << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " |
95 | "switch to trivial scheduling.\n" ; |
96 | |
97 | SchedPolicy = SP_TRIVIAL; |
98 | TotalCost = BC.getBinaryFunctions().size(); |
99 | } |
100 | return TotalCost; |
101 | } |
102 | |
103 | } // namespace |
104 | |
105 | ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) { |
106 | if (ThreadPoolPtr) |
107 | return *ThreadPoolPtr; |
108 | |
109 | if (ThreadsCount > 1) |
110 | ThreadPoolPtr = std::make_unique<DefaultThreadPool>( |
111 | args: llvm::hardware_concurrency(ThreadCount: ThreadsCount)); |
112 | else |
113 | ThreadPoolPtr = std::make_unique<SingleThreadExecutor>(); |
114 | return *ThreadPoolPtr; |
115 | } |
116 | |
117 | void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, |
118 | WorkFuncTy WorkFunction, PredicateTy SkipPredicate, |
119 | std::string LogName, bool ForceSequential, |
120 | unsigned TasksPerThread) { |
121 | if (BC.getBinaryFunctions().size() == 0) |
122 | return; |
123 | |
124 | auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, |
125 | std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { |
126 | Timer T(LogName, LogName); |
127 | LLVM_DEBUG(T.startTimer()); |
128 | |
129 | for (auto It = BlockBegin; It != BlockEnd; ++It) { |
130 | BinaryFunction &BF = It->second; |
131 | if (SkipPredicate && SkipPredicate(BF)) |
132 | continue; |
133 | |
134 | WorkFunction(BF); |
135 | } |
136 | LLVM_DEBUG(T.stopTimer()); |
137 | }; |
138 | |
139 | if (opts::NoThreads || ForceSequential) { |
140 | runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); |
141 | return; |
142 | } |
143 | |
144 | // Estimate the overall runtime cost using the scheduling policy |
145 | const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); |
146 | const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; |
147 | const unsigned BlockCost = |
148 | TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; |
149 | |
150 | // Divide work into blocks of equal cost |
151 | ThreadPoolInterface &Pool = getThreadPool(); |
152 | auto BlockBegin = BC.getBinaryFunctions().begin(); |
153 | unsigned CurrentCost = 0; |
154 | |
155 | for (auto It = BC.getBinaryFunctions().begin(); |
156 | It != BC.getBinaryFunctions().end(); ++It) { |
157 | BinaryFunction &BF = It->second; |
158 | CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
159 | |
160 | if (CurrentCost >= BlockCost) { |
161 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It)); |
162 | BlockBegin = std::next(x: It); |
163 | CurrentCost = 0; |
164 | } |
165 | } |
166 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end()); |
167 | Pool.wait(); |
168 | } |
169 | |
170 | void runOnEachFunctionWithUniqueAllocId( |
171 | BinaryContext &BC, SchedulingPolicy SchedPolicy, |
172 | WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, |
173 | std::string LogName, bool ForceSequential, unsigned TasksPerThread) { |
174 | if (BC.getBinaryFunctions().size() == 0) |
175 | return; |
176 | |
177 | llvm::sys::RWMutex MainLock; |
178 | auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, |
179 | std::map<uint64_t, BinaryFunction>::iterator BlockEnd, |
180 | MCPlusBuilder::AllocatorIdTy AllocId) { |
181 | Timer T(LogName, LogName); |
182 | LLVM_DEBUG(T.startTimer()); |
183 | std::shared_lock<llvm::sys::RWMutex> Lock(MainLock); |
184 | for (auto It = BlockBegin; It != BlockEnd; ++It) { |
185 | BinaryFunction &BF = It->second; |
186 | if (SkipPredicate && SkipPredicate(BF)) |
187 | continue; |
188 | |
189 | WorkFunction(BF, AllocId); |
190 | } |
191 | LLVM_DEBUG(T.stopTimer()); |
192 | }; |
193 | |
194 | unsigned AllocId = 1; |
195 | auto EnsureAllocatorExists = [&BC](unsigned AllocId) { |
196 | if (!BC.MIB->checkAllocatorExists(AllocatorId: AllocId)) { |
197 | MCPlusBuilder::AllocatorIdTy Id = |
198 | BC.MIB->initializeNewAnnotationAllocator(); |
199 | (void)Id; |
200 | assert(AllocId == Id && "unexpected allocator id created" ); |
201 | } |
202 | }; |
203 | |
204 | if (opts::NoThreads || ForceSequential) { |
205 | EnsureAllocatorExists(AllocId); |
206 | runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), |
207 | AllocId); |
208 | return; |
209 | } |
210 | // This lock is used to postpone task execution |
211 | std::unique_lock<llvm::sys::RWMutex> Lock(MainLock); |
212 | |
213 | // Estimate the overall runtime cost using the scheduling policy |
214 | const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); |
215 | const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; |
216 | const unsigned BlockCost = |
217 | TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; |
218 | |
219 | // Divide work into blocks of equal cost |
220 | ThreadPoolInterface &Pool = getThreadPool(); |
221 | auto BlockBegin = BC.getBinaryFunctions().begin(); |
222 | unsigned CurrentCost = 0; |
223 | for (auto It = BC.getBinaryFunctions().begin(); |
224 | It != BC.getBinaryFunctions().end(); ++It) { |
225 | BinaryFunction &BF = It->second; |
226 | CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
227 | |
228 | if (CurrentCost >= BlockCost) { |
229 | EnsureAllocatorExists(AllocId); |
230 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It), ArgList&: AllocId); |
231 | AllocId++; |
232 | BlockBegin = std::next(x: It); |
233 | CurrentCost = 0; |
234 | } |
235 | } |
236 | |
237 | EnsureAllocatorExists(AllocId); |
238 | |
239 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end(), ArgList&: AllocId); |
240 | Lock.unlock(); |
241 | Pool.wait(); |
242 | } |
243 | |
244 | } // namespace ParallelUtilities |
245 | } // namespace bolt |
246 | } // namespace llvm |
247 | |