1//===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the class that manages parallel work on BinaryFunctions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/ParallelUtilities.h"
14#include "bolt/Core/BinaryContext.h"
15#include "bolt/Core/BinaryFunction.h"
16#include "llvm/Support/RWMutex.h"
17#include "llvm/Support/ThreadPool.h"
18#include "llvm/Support/Timer.h"
19#include <mutex>
20
21#define DEBUG_TYPE "par-utils"
22
23namespace opts {
24extern cl::OptionCategory BoltCategory;
25
26cl::opt<unsigned>
27ThreadCount("thread-count",
28 cl::desc("number of threads"),
29 cl::init(Val: hardware_concurrency().compute_thread_count()),
30 cl::cat(BoltCategory));
31
32cl::opt<bool>
33NoThreads("no-threads",
34 cl::desc("disable multithreading"),
35 cl::init(Val: false),
36 cl::cat(BoltCategory));
37
38cl::opt<unsigned>
39TaskCount("tasks-per-thread",
40 cl::desc("number of tasks to be created per thread"),
41 cl::init(Val: 20),
42 cl::cat(BoltCategory));
43
44} // namespace opts
45
46namespace llvm {
47namespace bolt {
48namespace ParallelUtilities {
49
50namespace {
51/// A single thread pool that is used to run parallel tasks
52std::unique_ptr<DefaultThreadPool> ThreadPoolPtr;
53
54unsigned computeCostFor(const BinaryFunction &BF,
55 const PredicateTy &SkipPredicate,
56 const SchedulingPolicy &SchedPolicy) {
57 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58 return 1;
59
60 if (SkipPredicate && SkipPredicate(BF))
61 return 0;
62
63 switch (SchedPolicy) {
64 case SchedulingPolicy::SP_CONSTANT:
65 return 1;
66 case SchedulingPolicy::SP_INST_LINEAR:
67 return BF.getSize();
68 case SchedulingPolicy::SP_INST_QUADRATIC:
69 return BF.getSize() * BF.getSize();
70 case SchedulingPolicy::SP_BB_LINEAR:
71 return BF.size();
72 case SchedulingPolicy::SP_BB_QUADRATIC:
73 return BF.size() * BF.size();
74 default:
75 llvm_unreachable("unsupported scheduling policy");
76 }
77}
78
79inline unsigned estimateTotalCost(const BinaryContext &BC,
80 const PredicateTy &SkipPredicate,
81 SchedulingPolicy &SchedPolicy) {
82 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83 return BC.getBinaryFunctions().size();
84
85 unsigned TotalCost = 0;
86 for (auto &BFI : BC.getBinaryFunctions()) {
87 const BinaryFunction &BF = BFI.second;
88 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
89 }
90
91 // Switch to trivial scheduling if total estimated work is zero
92 if (TotalCost == 0) {
93 BC.outs()
94 << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
95 "switch to trivial scheduling.\n";
96
97 SchedPolicy = SP_TRIVIAL;
98 TotalCost = BC.getBinaryFunctions().size();
99 }
100 return TotalCost;
101}
102
103} // namespace
104
105ThreadPoolInterface &getThreadPool() {
106 if (ThreadPoolPtr.get())
107 return *ThreadPoolPtr;
108
109 ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
110 args: llvm::hardware_concurrency(ThreadCount: opts::ThreadCount));
111 return *ThreadPoolPtr;
112}
113
114void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
115 WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
116 std::string LogName, bool ForceSequential,
117 unsigned TasksPerThread) {
118 if (BC.getBinaryFunctions().size() == 0)
119 return;
120
121 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
122 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
123 Timer T(LogName, LogName);
124 LLVM_DEBUG(T.startTimer());
125
126 for (auto It = BlockBegin; It != BlockEnd; ++It) {
127 BinaryFunction &BF = It->second;
128 if (SkipPredicate && SkipPredicate(BF))
129 continue;
130
131 WorkFunction(BF);
132 }
133 LLVM_DEBUG(T.stopTimer());
134 };
135
136 if (opts::NoThreads || ForceSequential) {
137 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
138 return;
139 }
140
141 // Estimate the overall runtime cost using the scheduling policy
142 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
143 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
144 const unsigned BlockCost =
145 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
146
147 // Divide work into blocks of equal cost
148 ThreadPoolInterface &Pool = getThreadPool();
149 auto BlockBegin = BC.getBinaryFunctions().begin();
150 unsigned CurrentCost = 0;
151
152 for (auto It = BC.getBinaryFunctions().begin();
153 It != BC.getBinaryFunctions().end(); ++It) {
154 BinaryFunction &BF = It->second;
155 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
156
157 if (CurrentCost >= BlockCost) {
158 Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It));
159 BlockBegin = std::next(x: It);
160 CurrentCost = 0;
161 }
162 }
163 Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end());
164 Pool.wait();
165}
166
167void runOnEachFunctionWithUniqueAllocId(
168 BinaryContext &BC, SchedulingPolicy SchedPolicy,
169 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
170 std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
171 if (BC.getBinaryFunctions().size() == 0)
172 return;
173
174 llvm::sys::RWMutex MainLock;
175 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
176 std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
177 MCPlusBuilder::AllocatorIdTy AllocId) {
178 Timer T(LogName, LogName);
179 LLVM_DEBUG(T.startTimer());
180 std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
181 for (auto It = BlockBegin; It != BlockEnd; ++It) {
182 BinaryFunction &BF = It->second;
183 if (SkipPredicate && SkipPredicate(BF))
184 continue;
185
186 WorkFunction(BF, AllocId);
187 }
188 LLVM_DEBUG(T.stopTimer());
189 };
190
191 if (opts::NoThreads || ForceSequential) {
192 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0);
193 return;
194 }
195 // This lock is used to postpone task execution
196 std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
197
198 // Estimate the overall runtime cost using the scheduling policy
199 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
200 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
201 const unsigned BlockCost =
202 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
203
204 // Divide work into blocks of equal cost
205 ThreadPoolInterface &Pool = getThreadPool();
206 auto BlockBegin = BC.getBinaryFunctions().begin();
207 unsigned CurrentCost = 0;
208 unsigned AllocId = 1;
209 for (auto It = BC.getBinaryFunctions().begin();
210 It != BC.getBinaryFunctions().end(); ++It) {
211 BinaryFunction &BF = It->second;
212 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
213
214 if (CurrentCost >= BlockCost) {
215 if (!BC.MIB->checkAllocatorExists(AllocatorId: AllocId)) {
216 MCPlusBuilder::AllocatorIdTy Id =
217 BC.MIB->initializeNewAnnotationAllocator();
218 (void)Id;
219 assert(AllocId == Id && "unexpected allocator id created");
220 }
221 Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It), ArgList&: AllocId);
222 AllocId++;
223 BlockBegin = std::next(x: It);
224 CurrentCost = 0;
225 }
226 }
227
228 if (!BC.MIB->checkAllocatorExists(AllocatorId: AllocId)) {
229 MCPlusBuilder::AllocatorIdTy Id =
230 BC.MIB->initializeNewAnnotationAllocator();
231 (void)Id;
232 assert(AllocId == Id && "unexpected allocator id created");
233 }
234
235 Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end(), ArgList&: AllocId);
236 Lock.unlock();
237 Pool.wait();
238}
239
240} // namespace ParallelUtilities
241} // namespace bolt
242} // namespace llvm
243

source code of bolt/lib/Core/ParallelUtilities.cpp