1 | //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation of the class that manages parallel work on BinaryFunctions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/ParallelUtilities.h" |
14 | #include "bolt/Core/BinaryContext.h" |
15 | #include "bolt/Core/BinaryFunction.h" |
16 | #include "llvm/Support/RWMutex.h" |
17 | #include "llvm/Support/ThreadPool.h" |
18 | #include "llvm/Support/Timer.h" |
19 | #include <mutex> |
20 | |
21 | #define DEBUG_TYPE "par-utils" |
22 | |
23 | namespace opts { |
24 | extern cl::OptionCategory BoltCategory; |
25 | |
26 | cl::opt<unsigned> |
27 | ThreadCount("thread-count" , |
28 | cl::desc("number of threads" ), |
29 | cl::init(Val: hardware_concurrency().compute_thread_count()), |
30 | cl::cat(BoltCategory)); |
31 | |
32 | cl::opt<bool> |
33 | NoThreads("no-threads" , |
34 | cl::desc("disable multithreading" ), |
35 | cl::init(Val: false), |
36 | cl::cat(BoltCategory)); |
37 | |
38 | cl::opt<unsigned> |
39 | TaskCount("tasks-per-thread" , |
40 | cl::desc("number of tasks to be created per thread" ), |
41 | cl::init(Val: 20), |
42 | cl::cat(BoltCategory)); |
43 | |
44 | } // namespace opts |
45 | |
46 | namespace llvm { |
47 | namespace bolt { |
48 | namespace ParallelUtilities { |
49 | |
50 | namespace { |
51 | /// A single thread pool that is used to run parallel tasks |
52 | std::unique_ptr<DefaultThreadPool> ThreadPoolPtr; |
53 | |
54 | unsigned computeCostFor(const BinaryFunction &BF, |
55 | const PredicateTy &SkipPredicate, |
56 | const SchedulingPolicy &SchedPolicy) { |
57 | if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) |
58 | return 1; |
59 | |
60 | if (SkipPredicate && SkipPredicate(BF)) |
61 | return 0; |
62 | |
63 | switch (SchedPolicy) { |
64 | case SchedulingPolicy::SP_CONSTANT: |
65 | return 1; |
66 | case SchedulingPolicy::SP_INST_LINEAR: |
67 | return BF.getSize(); |
68 | case SchedulingPolicy::SP_INST_QUADRATIC: |
69 | return BF.getSize() * BF.getSize(); |
70 | case SchedulingPolicy::SP_BB_LINEAR: |
71 | return BF.size(); |
72 | case SchedulingPolicy::SP_BB_QUADRATIC: |
73 | return BF.size() * BF.size(); |
74 | default: |
75 | llvm_unreachable("unsupported scheduling policy" ); |
76 | } |
77 | } |
78 | |
79 | inline unsigned estimateTotalCost(const BinaryContext &BC, |
80 | const PredicateTy &SkipPredicate, |
81 | SchedulingPolicy &SchedPolicy) { |
82 | if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) |
83 | return BC.getBinaryFunctions().size(); |
84 | |
85 | unsigned TotalCost = 0; |
86 | for (auto &BFI : BC.getBinaryFunctions()) { |
87 | const BinaryFunction &BF = BFI.second; |
88 | TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
89 | } |
90 | |
91 | // Switch to trivial scheduling if total estimated work is zero |
92 | if (TotalCost == 0) { |
93 | BC.outs() |
94 | << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " |
95 | "switch to trivial scheduling.\n" ; |
96 | |
97 | SchedPolicy = SP_TRIVIAL; |
98 | TotalCost = BC.getBinaryFunctions().size(); |
99 | } |
100 | return TotalCost; |
101 | } |
102 | |
103 | } // namespace |
104 | |
105 | ThreadPoolInterface &getThreadPool() { |
106 | if (ThreadPoolPtr.get()) |
107 | return *ThreadPoolPtr; |
108 | |
109 | ThreadPoolPtr = std::make_unique<DefaultThreadPool>( |
110 | args: llvm::hardware_concurrency(ThreadCount: opts::ThreadCount)); |
111 | return *ThreadPoolPtr; |
112 | } |
113 | |
114 | void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, |
115 | WorkFuncTy WorkFunction, PredicateTy SkipPredicate, |
116 | std::string LogName, bool ForceSequential, |
117 | unsigned TasksPerThread) { |
118 | if (BC.getBinaryFunctions().size() == 0) |
119 | return; |
120 | |
121 | auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, |
122 | std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { |
123 | Timer T(LogName, LogName); |
124 | LLVM_DEBUG(T.startTimer()); |
125 | |
126 | for (auto It = BlockBegin; It != BlockEnd; ++It) { |
127 | BinaryFunction &BF = It->second; |
128 | if (SkipPredicate && SkipPredicate(BF)) |
129 | continue; |
130 | |
131 | WorkFunction(BF); |
132 | } |
133 | LLVM_DEBUG(T.stopTimer()); |
134 | }; |
135 | |
136 | if (opts::NoThreads || ForceSequential) { |
137 | runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); |
138 | return; |
139 | } |
140 | |
141 | // Estimate the overall runtime cost using the scheduling policy |
142 | const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); |
143 | const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; |
144 | const unsigned BlockCost = |
145 | TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; |
146 | |
147 | // Divide work into blocks of equal cost |
148 | ThreadPoolInterface &Pool = getThreadPool(); |
149 | auto BlockBegin = BC.getBinaryFunctions().begin(); |
150 | unsigned CurrentCost = 0; |
151 | |
152 | for (auto It = BC.getBinaryFunctions().begin(); |
153 | It != BC.getBinaryFunctions().end(); ++It) { |
154 | BinaryFunction &BF = It->second; |
155 | CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
156 | |
157 | if (CurrentCost >= BlockCost) { |
158 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It)); |
159 | BlockBegin = std::next(x: It); |
160 | CurrentCost = 0; |
161 | } |
162 | } |
163 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end()); |
164 | Pool.wait(); |
165 | } |
166 | |
167 | void runOnEachFunctionWithUniqueAllocId( |
168 | BinaryContext &BC, SchedulingPolicy SchedPolicy, |
169 | WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, |
170 | std::string LogName, bool ForceSequential, unsigned TasksPerThread) { |
171 | if (BC.getBinaryFunctions().size() == 0) |
172 | return; |
173 | |
174 | llvm::sys::RWMutex MainLock; |
175 | auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, |
176 | std::map<uint64_t, BinaryFunction>::iterator BlockEnd, |
177 | MCPlusBuilder::AllocatorIdTy AllocId) { |
178 | Timer T(LogName, LogName); |
179 | LLVM_DEBUG(T.startTimer()); |
180 | std::shared_lock<llvm::sys::RWMutex> Lock(MainLock); |
181 | for (auto It = BlockBegin; It != BlockEnd; ++It) { |
182 | BinaryFunction &BF = It->second; |
183 | if (SkipPredicate && SkipPredicate(BF)) |
184 | continue; |
185 | |
186 | WorkFunction(BF, AllocId); |
187 | } |
188 | LLVM_DEBUG(T.stopTimer()); |
189 | }; |
190 | |
191 | if (opts::NoThreads || ForceSequential) { |
192 | runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0); |
193 | return; |
194 | } |
195 | // This lock is used to postpone task execution |
196 | std::unique_lock<llvm::sys::RWMutex> Lock(MainLock); |
197 | |
198 | // Estimate the overall runtime cost using the scheduling policy |
199 | const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); |
200 | const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; |
201 | const unsigned BlockCost = |
202 | TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; |
203 | |
204 | // Divide work into blocks of equal cost |
205 | ThreadPoolInterface &Pool = getThreadPool(); |
206 | auto BlockBegin = BC.getBinaryFunctions().begin(); |
207 | unsigned CurrentCost = 0; |
208 | unsigned AllocId = 1; |
209 | for (auto It = BC.getBinaryFunctions().begin(); |
210 | It != BC.getBinaryFunctions().end(); ++It) { |
211 | BinaryFunction &BF = It->second; |
212 | CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
213 | |
214 | if (CurrentCost >= BlockCost) { |
215 | if (!BC.MIB->checkAllocatorExists(AllocatorId: AllocId)) { |
216 | MCPlusBuilder::AllocatorIdTy Id = |
217 | BC.MIB->initializeNewAnnotationAllocator(); |
218 | (void)Id; |
219 | assert(AllocId == Id && "unexpected allocator id created" ); |
220 | } |
221 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It), ArgList&: AllocId); |
222 | AllocId++; |
223 | BlockBegin = std::next(x: It); |
224 | CurrentCost = 0; |
225 | } |
226 | } |
227 | |
228 | if (!BC.MIB->checkAllocatorExists(AllocatorId: AllocId)) { |
229 | MCPlusBuilder::AllocatorIdTy Id = |
230 | BC.MIB->initializeNewAnnotationAllocator(); |
231 | (void)Id; |
232 | assert(AllocId == Id && "unexpected allocator id created" ); |
233 | } |
234 | |
235 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end(), ArgList&: AllocId); |
236 | Lock.unlock(); |
237 | Pool.wait(); |
238 | } |
239 | |
240 | } // namespace ParallelUtilities |
241 | } // namespace bolt |
242 | } // namespace llvm |
243 | |