ParallelUtilities.cpp source code [bolt/lib/Core/ParallelUtilities.cpp]

1	//===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Implementation of the class that manages parallel work on BinaryFunctions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "bolt/Core/ParallelUtilities.h"
14	#include "bolt/Core/BinaryContext.h"
15	#include "bolt/Core/BinaryFunction.h"
16	#include "llvm/Support/RWMutex.h"
17	#include "llvm/Support/ThreadPool.h"
18	#include "llvm/Support/Timer.h"
19	#include <mutex>
20
21	#define DEBUG_TYPE "par-utils"
22
23	namespace opts {
24	extern cl::OptionCategory BoltCategory;
25
26	cl::opt<unsigned>
27	ThreadCount("thread-count",
28	cl::desc ("number of threads"),
29	cl::init(Val: hardware_concurrency().compute_thread_count()),
30	cl::cat (BoltCategory));
31
32	cl::opt<bool>
33	NoThreads("no-threads",
34	cl::desc ("disable multithreading"),
35	cl::init(Val: false),
36	cl::cat (BoltCategory));
37
38	cl::opt<unsigned>
39	TaskCount("tasks-per-thread",
40	cl::desc ("number of tasks to be created per thread"),
41	cl::init(Val: `20`),
42	cl::cat (BoltCategory));
43
44	} // namespace opts
45
46	namespace llvm {
47	namespace bolt {
48	namespace ParallelUtilities {
49
50	namespace {
51	/// A single thread pool that is used to run parallel tasks
52	std::unique_ptr<DefaultThreadPool> ThreadPoolPtr;
53
54	unsigned computeCostFor(const BinaryFunction &BF,
55	const PredicateTy &SkipPredicate,
56	const SchedulingPolicy &SchedPolicy) {
57	if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58	return `1`;
59
60	if (SkipPredicate && SkipPredicate (BF))
61	return `0`;
62
63	switch (SchedPolicy) {
64	case SchedulingPolicy::SP_CONSTANT:
65	return `1`;
66	case SchedulingPolicy::SP_INST_LINEAR:
67	return BF.getSize();
68	case SchedulingPolicy::SP_INST_QUADRATIC:
69	return BF.getSize() * BF.getSize();
70	case SchedulingPolicy::SP_BB_LINEAR:
71	return BF.size();
72	case SchedulingPolicy::SP_BB_QUADRATIC:
73	return BF.size() * BF.size();
74	default:
75	llvm_unreachable("unsupported scheduling policy");
76	}
77	}
78
79	inline unsigned estimateTotalCost(const BinaryContext &BC,
80	const PredicateTy &SkipPredicate,
81	SchedulingPolicy &SchedPolicy) {
82	if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83	return BC.getBinaryFunctions().size();
84
85	unsigned TotalCost = `0`;
86	for (auto &BFI : BC.getBinaryFunctions()) {
87	const BinaryFunction &BF = BFI.second;
88	TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
89	}
90
91	// Switch to trivial scheduling if total estimated work is zero
92	if (TotalCost == `0`) {
93	BC.outs()
94	<< "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
95	"switch to trivial scheduling.\n";
96
97	SchedPolicy = SP_TRIVIAL;
98	TotalCost = BC.getBinaryFunctions().size();
99	}
100	return TotalCost;
101	}
102
103	} // namespace
104
105	ThreadPoolInterface &getThreadPool() {
106	if (ThreadPoolPtr.get())
107	return *ThreadPoolPtr;
108
109	ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
110	args: llvm::hardware_concurrency(ThreadCount: opts::ThreadCount));
111	return *ThreadPoolPtr;
112	}
113
114	void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
115	WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
116	std::string LogName, bool ForceSequential,
117	unsigned TasksPerThread) {
118	if (BC.getBinaryFunctions().size() == `0`)
119	return;
120
121	auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
122	std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
123	Timer T(LogName, LogName);
124	LLVM_DEBUG(T.startTimer());
125
126	for (auto It = BlockBegin; It != BlockEnd; ++It) {
127	BinaryFunction &BF = It ->second;
128	if (SkipPredicate && SkipPredicate (BF))
129	continue;
130
131	WorkFunction (BF);
132	}
133	LLVM_DEBUG(T.stopTimer());
134	};
135
136	if (opts::NoThreads \|\| ForceSequential) {
137	runBlock (BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
138	return;
139	}
140
141	// Estimate the overall runtime cost using the scheduling policy
142	const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
143	const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
144	const unsigned BlockCost =
145	TotalCost > BlocksCount ? TotalCost / BlocksCount : `1`;
146
147	// Divide work into blocks of equal cost
148	ThreadPoolInterface &Pool = getThreadPool();
149	auto BlockBegin = BC.getBinaryFunctions().begin();
150	unsigned CurrentCost = `0`;
151
152	for (auto It = BC.getBinaryFunctions().begin();
153	It != BC.getBinaryFunctions().end(); ++It) {
154	BinaryFunction &BF = It ->second;
155	CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
156
157	if (CurrentCost >= BlockCost) {
158	Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It));
159	BlockBegin = std::next(x: It);
160	CurrentCost = `0`;
161	}
162	}
163	Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end());
164	Pool.wait();
165	}
166
167	void runOnEachFunctionWithUniqueAllocId(
168	BinaryContext &BC, SchedulingPolicy SchedPolicy,
169	WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
170	std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
171	if (BC.getBinaryFunctions().size() == `0`)
172	return;
173
174	llvm::sys::RWMutex MainLock;
175	auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
176	std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
177	MCPlusBuilder::AllocatorIdTy AllocId) {
178	Timer T(LogName, LogName);
179	LLVM_DEBUG(T.startTimer());
180	std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
181	for (auto It = BlockBegin; It != BlockEnd; ++It) {
182	BinaryFunction &BF = It ->second;
183	if (SkipPredicate && SkipPredicate (BF))
184	continue;
185
186	WorkFunction (BF, AllocId);
187	}
188	LLVM_DEBUG(T.stopTimer());
189	};
190
191	if (opts::NoThreads \|\| ForceSequential) {
192	runBlock (BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), `0`);
193	return;
194	}
195	// This lock is used to postpone task execution
196	std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
197
198	// Estimate the overall runtime cost using the scheduling policy
199	const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
200	const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
201	const unsigned BlockCost =
202	TotalCost > BlocksCount ? TotalCost / BlocksCount : `1`;
203
204	// Divide work into blocks of equal cost
205	ThreadPoolInterface &Pool = getThreadPool();
206	auto BlockBegin = BC.getBinaryFunctions().begin();
207	unsigned CurrentCost = `0`;
208	unsigned AllocId = `1`;
209	for (auto It = BC.getBinaryFunctions().begin();
210	It != BC.getBinaryFunctions().end(); ++It) {
211	BinaryFunction &BF = It ->second;
212	CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
213
214	if (CurrentCost >= BlockCost) {
215	if (!BC.MIB ->checkAllocatorExists(AllocatorId: AllocId)) {
216	MCPlusBuilder::AllocatorIdTy Id =
217	BC.MIB ->initializeNewAnnotationAllocator();
218	(void)Id;
219	assert(AllocId == Id && "unexpected allocator id created");
220	}
221	Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It), ArgList&: AllocId);
222	AllocId++;
223	BlockBegin = std::next(x: It);
224	CurrentCost = `0`;
225	}
226	}
227
228	if (!BC.MIB ->checkAllocatorExists(AllocatorId: AllocId)) {
229	MCPlusBuilder::AllocatorIdTy Id =
230	BC.MIB ->initializeNewAnnotationAllocator();
231	(void)Id;
232	assert(AllocId == Id && "unexpected allocator id created");
233	}
234
235	Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end(), ArgList&: AllocId);
236	Lock.unlock();
237	Pool.wait();
238	}
239
240	} // namespace ParallelUtilities
241	} // namespace bolt
242	} // namespace llvm
243

source code of bolt/lib/Core/ParallelUtilities.cpp