HFSort.cpp source code [bolt/lib/Passes/HFSort.cpp]

1	//===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Implementation of HFSort algorithm for function ordering:
10	// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "bolt/Passes/HFSort.h"
15	#include "llvm/Support/CommandLine.h"
16	#include "llvm/Support/Debug.h"
17	#include "llvm/Support/Format.h"
18	#include "llvm/Support/raw_ostream.h"
19	#include <unordered_set>
20
21	#define DEBUG_TYPE "hfsort"
22
23	namespace opts {
24	extern llvm::cl::opt<unsigned> Verbosity;
25	}
26
27	namespace llvm {
28	namespace bolt {
29
30	using NodeId = CallGraph::NodeId;
31	using Arc = CallGraph::Arc;
32	using Node = CallGraph::Node;
33
34	namespace {
35
36	// The number of pages to reserve for the functions with highest
37	// density (samples / size). The functions put in these pages are not
38	// considered for clustering.
39	constexpr uint32_t FrozenPages = `0`;
40
41	// The minimum approximate probability of a callee being called from a
42	// particular arc to consider merging with the caller's cluster.
43	constexpr double MinArcProbability = `0.1`;
44
45	// This is a factor to determine by how much a caller cluster is
46	// willing to degrade it's density by merging a callee.
47	constexpr int CallerDegradeFactor = `8`;
48
49	} // namespace
50
51	////////////////////////////////////////////////////////////////////////////////
52
53	Cluster::Cluster(NodeId Id, const Node &Func)
54	: Samples(Func.samples()), Size(Func.size()),
55	Density((double)Samples / Size) {
56	Targets.push_back(x: Id);
57	}
58
59	Cluster::Cluster(const std::vector<NodeId> &Nodes, const CallGraph &Cg) {
60	Samples = `0`;
61	Size = `0`;
62	for (NodeId TargetId : Nodes) {
63	Targets.push_back(x: TargetId);
64	Samples += Cg.samples(Id: TargetId);
65	Size += Cg.size(Id: TargetId);
66	}
67	Density = (double)Samples / Size;
68	}
69
70	std::string Cluster::toString() const {
71	std::string Str;
72	raw_string_ostream CS(Str);
73	bool PrintComma = false;
74	CS << "funcs = [";
75	for (const NodeId &Target : Targets) {
76	if (PrintComma)
77	CS << ", ";
78	CS << Target;
79	PrintComma = true;
80	}
81	CS << "]";
82	return CS.str();
83	}
84
85	namespace {
86
87	void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) {
88	uint32_t TotalSize = `0`;
89	llvm::sort(C&: Clusters, Comp: compareClustersDensity);
90	for (Cluster &C : Clusters) {
91	uint32_t NewSize = TotalSize + C.size();
92	if (NewSize > FrozenPages * HugePageSize)
93	break;
94	C.freeze();
95	TotalSize = NewSize;
96	LLVM_DEBUG(NodeId Fid = C.target(`0`);
97	dbgs() << format(
98	"freezing cluster for func %d, size = %u, samples = %lu)\n",
99	Fid, Cg.size(Fid), Cg.samples(Fid)););
100	}
101	}
102
103	} // namespace
104
105	void Cluster::reverseTargets() { std::reverse(first: Targets.begin(), last: Targets.end()); }
106
107	void Cluster::merge(const Cluster &Other, const double Aw) {
108	Targets.insert(position: Targets.end(), first: Other.Targets.begin(), last: Other.Targets.end());
109	Size += Other.Size;
110	Samples += Other.Samples;
111	Density = (double)Samples / Size;
112	}
113
114	void Cluster::merge(const Cluster &Other,
115	const std::vector<CallGraph::NodeId> &Targets_) {
116	Targets = Targets_;
117	Size += Other.Size;
118	Samples += Other.Samples;
119	Density = (double)Samples / Size;
120	}
121
122	void Cluster::clear() {
123	Id = -`1u`;
124	Size = `0`;
125	Samples = `0`;
126	Density = `0.0`;
127	Targets.clear();
128	Frozen = false;
129	}
130
131	std::vector<Cluster> clusterize(const CallGraph &Cg) {
132	std::vector<NodeId> SortedFuncs;
133
134	// indexed by NodeId, keeps it's current cluster
135	std::vector<Cluster > FuncCluster(Cg.numNodes(), nullptr*);
136	std::vector<Cluster> Clusters;
137	Clusters.reserve(n: Cg.numNodes());
138
139	for (NodeId F = `0`; F < Cg.numNodes(); F++) {
140	if (Cg.samples(Id: F) == `0`)
141	continue;
142	Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F));
143	SortedFuncs.push_back(x: F);
144	}
145
146	freezeClusters(Cg, Clusters);
147
148	// The size and order of Clusters is fixed until we reshuffle it immediately
149	// before returning.
150	for (Cluster &Cluster : Clusters)
151	FuncCluster [Cluster.targets().front()] = &Cluster;
152
153	llvm::sort(C&: SortedFuncs, Comp: [&](const NodeId F1, const NodeId F2) {
154	const CallGraph::Node &Func1 = Cg.getNode(Id: F1);
155	const CallGraph::Node &Func2 = Cg.getNode(Id: F2);
156	return Func1.samples() * Func2.size() > // TODO: is this correct?
157	Func2.samples() * Func1.size();
158	});
159
160	// Process each function, and consider merging its cluster with the
161	// one containing its most likely predecessor.
162	for (const NodeId Fid : SortedFuncs) {
163	Cluster *Cluster = FuncCluster [Fid];
164	if (Cluster->frozen())
165	continue;
166
167	// Find best predecessor.
168	NodeId BestPred = CallGraph::InvalidId;
169	double BestProb = `0`;
170
171	for (const NodeId Src : Cg.predecessors(Id: Fid)) {
172	const Arc &Arc = *Cg.findArc(Src, Dst: Fid);
173	if (BestPred == CallGraph::InvalidId \|\|
174	Arc.normalizedWeight() > BestProb) {
175	BestPred = Arc.src();
176	BestProb = Arc.normalizedWeight();
177	}
178	}
179
180	// Check if the merge is good for the callee.
181	// Don't merge if the probability of getting to the callee from the
182	// caller is too low.
183	if (BestProb < MinArcProbability)
184	continue;
185
186	assert(BestPred != CallGraph::InvalidId);
187
188	class Cluster *PredCluster = FuncCluster [BestPred];
189
190	// Skip if no predCluster (predecessor w/ no samples), or if same
191	// as cluster, of it's frozen.
192	if (PredCluster == nullptr \|\| PredCluster == Cluster \|\|
193	PredCluster->frozen())
194	continue;
195
196	// Skip if merged cluster would be bigger than the threshold.
197	if (Cluster->size() + PredCluster->size() > MaxClusterSize)
198	continue;
199
200	// Check if the merge is good for the caller.
201	// Don't merge if the caller's density is significantly better
202	// than the density resulting from the merge.
203	const double NewDensity =
204	((double)PredCluster->samples() + Cluster->samples()) /
205	(PredCluster->size() + Cluster->size());
206	if (PredCluster->density() > NewDensity * CallerDegradeFactor) {
207	continue;
208	}
209
210	LLVM_DEBUG(if (opts::Verbosity > `1`) {
211	dbgs() << format("merging %s -> %s: %u\n",
212	PredCluster->toString().c_str(),
213	Cluster->toString().c_str(), Cg.samples(Fid));
214	});
215
216	for (NodeId F : Cluster->targets())
217	FuncCluster [F] = PredCluster;
218
219	PredCluster->merge(Other: *Cluster);
220	Cluster->clear();
221	}
222
223	// Return the set of Clusters that are left, which are the ones that
224	// didn't get merged (so their first func is its original func).
225	std::vector<Cluster> SortedClusters;
226	std::unordered_set<Cluster *> Visited;
227	for (const NodeId Func : SortedFuncs) {
228	Cluster *Cluster = FuncCluster [Func];
229	if (!Cluster \|\| Visited.count(x: Cluster) == `1` \|\| Cluster->target(N: `0`) != Func)
230	continue;
231
232	SortedClusters.emplace_back(args: std::move(*Cluster));
233	Visited.insert(x: Cluster);
234	}
235
236	llvm::sort(C&: SortedClusters, Comp: compareClustersDensity);
237
238	return SortedClusters;
239	}
240
241	std::vector<Cluster> randomClusters(const CallGraph &Cg) {
242	std::vector<Cluster> Clusters;
243	Clusters.reserve(n: Cg.numNodes());
244
245	for (NodeId F = `0`; F < Cg.numNodes(); F++) {
246	if (Cg.samples(Id: F) == `0`)
247	continue;
248	Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F));
249	}
250
251	llvm::sort(C&: Clusters, Comp: [](const Cluster &A, const Cluster &B) {
252	return A.size() < B.size();
253	});
254
255	auto pickMergeCluster = [&Clusters](const size_t Idx) {
256	size_t MaxIdx = Idx + `1`;
257
258	while (MaxIdx < Clusters.size() &&
259	Clusters [Idx].size() + Clusters [MaxIdx].size() <= MaxClusterSize)
260	++MaxIdx;
261
262	if (MaxIdx - Idx > `1`) {
263	size_t MergeIdx = (std::rand() % (MaxIdx - Idx - `1`)) + Idx + `1`;
264	assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <=
265	MaxClusterSize);
266	return MergeIdx;
267	}
268	return Clusters.size();
269	};
270
271	size_t Idx = `0`;
272	while (Idx < Clusters.size()) {
273	size_t MergeIdx = pickMergeCluster (Idx);
274	if (MergeIdx == Clusters.size()) {
275	++Idx;
276	} else {
277	Clusters [Idx].merge(Other: Clusters [MergeIdx]);
278	Clusters.erase(position: Clusters.begin() + MergeIdx);
279	}
280	}
281
282	return Clusters;
283	}
284
285	} // namespace bolt
286	} // namespace llvm
287

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of bolt/lib/Passes/HFSort.cpp