1//===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of HFSort algorithm for function ordering:
10// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
11//
12//===----------------------------------------------------------------------===//
13
14#include "bolt/Passes/HFSort.h"
15#include "llvm/Support/CommandLine.h"
16#include "llvm/Support/Debug.h"
17#include "llvm/Support/Format.h"
18#include "llvm/Support/raw_ostream.h"
19#include <unordered_set>
20
21#define DEBUG_TYPE "hfsort"
22
23namespace opts {
24extern llvm::cl::opt<unsigned> Verbosity;
25}
26
27namespace llvm {
28namespace bolt {
29
30using NodeId = CallGraph::NodeId;
31using Arc = CallGraph::Arc;
32using Node = CallGraph::Node;
33
34namespace {
35
36// The number of pages to reserve for the functions with highest
37// density (samples / size). The functions put in these pages are not
38// considered for clustering.
39constexpr uint32_t FrozenPages = 0;
40
41// The minimum approximate probability of a callee being called from a
42// particular arc to consider merging with the caller's cluster.
43constexpr double MinArcProbability = 0.1;
44
45// This is a factor to determine by how much a caller cluster is
46// willing to degrade it's density by merging a callee.
47constexpr int CallerDegradeFactor = 8;
48
49} // namespace
50
51////////////////////////////////////////////////////////////////////////////////
52
53Cluster::Cluster(NodeId Id, const Node &Func)
54 : Samples(Func.samples()), Size(Func.size()),
55 Density((double)Samples / Size) {
56 Targets.push_back(x: Id);
57}
58
59Cluster::Cluster(const std::vector<NodeId> &Nodes, const CallGraph &Cg) {
60 Samples = 0;
61 Size = 0;
62 for (NodeId TargetId : Nodes) {
63 Targets.push_back(x: TargetId);
64 Samples += Cg.samples(Id: TargetId);
65 Size += Cg.size(Id: TargetId);
66 }
67 Density = (double)Samples / Size;
68}
69
70std::string Cluster::toString() const {
71 std::string Str;
72 raw_string_ostream CS(Str);
73 bool PrintComma = false;
74 CS << "funcs = [";
75 for (const NodeId &Target : Targets) {
76 if (PrintComma)
77 CS << ", ";
78 CS << Target;
79 PrintComma = true;
80 }
81 CS << "]";
82 return CS.str();
83}
84
85namespace {
86
87void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) {
88 uint32_t TotalSize = 0;
89 llvm::sort(C&: Clusters, Comp: compareClustersDensity);
90 for (Cluster &C : Clusters) {
91 uint32_t NewSize = TotalSize + C.size();
92 if (NewSize > FrozenPages * HugePageSize)
93 break;
94 C.freeze();
95 TotalSize = NewSize;
96 LLVM_DEBUG(NodeId Fid = C.target(0);
97 dbgs() << format(
98 "freezing cluster for func %d, size = %u, samples = %lu)\n",
99 Fid, Cg.size(Fid), Cg.samples(Fid)););
100 }
101}
102
103} // namespace
104
105void Cluster::reverseTargets() { std::reverse(first: Targets.begin(), last: Targets.end()); }
106
107void Cluster::merge(const Cluster &Other, const double Aw) {
108 Targets.insert(position: Targets.end(), first: Other.Targets.begin(), last: Other.Targets.end());
109 Size += Other.Size;
110 Samples += Other.Samples;
111 Density = (double)Samples / Size;
112}
113
114void Cluster::merge(const Cluster &Other,
115 const std::vector<CallGraph::NodeId> &Targets_) {
116 Targets = Targets_;
117 Size += Other.Size;
118 Samples += Other.Samples;
119 Density = (double)Samples / Size;
120}
121
122void Cluster::clear() {
123 Id = -1u;
124 Size = 0;
125 Samples = 0;
126 Density = 0.0;
127 Targets.clear();
128 Frozen = false;
129}
130
131std::vector<Cluster> clusterize(const CallGraph &Cg) {
132 std::vector<NodeId> SortedFuncs;
133
134 // indexed by NodeId, keeps it's current cluster
135 std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr);
136 std::vector<Cluster> Clusters;
137 Clusters.reserve(n: Cg.numNodes());
138
139 for (NodeId F = 0; F < Cg.numNodes(); F++) {
140 if (Cg.samples(Id: F) == 0)
141 continue;
142 Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F));
143 SortedFuncs.push_back(x: F);
144 }
145
146 freezeClusters(Cg, Clusters);
147
148 // The size and order of Clusters is fixed until we reshuffle it immediately
149 // before returning.
150 for (Cluster &Cluster : Clusters)
151 FuncCluster[Cluster.targets().front()] = &Cluster;
152
153 llvm::sort(C&: SortedFuncs, Comp: [&](const NodeId F1, const NodeId F2) {
154 const CallGraph::Node &Func1 = Cg.getNode(Id: F1);
155 const CallGraph::Node &Func2 = Cg.getNode(Id: F2);
156 return Func1.samples() * Func2.size() > // TODO: is this correct?
157 Func2.samples() * Func1.size();
158 });
159
160 // Process each function, and consider merging its cluster with the
161 // one containing its most likely predecessor.
162 for (const NodeId Fid : SortedFuncs) {
163 Cluster *Cluster = FuncCluster[Fid];
164 if (Cluster->frozen())
165 continue;
166
167 // Find best predecessor.
168 NodeId BestPred = CallGraph::InvalidId;
169 double BestProb = 0;
170
171 for (const NodeId Src : Cg.predecessors(Id: Fid)) {
172 const Arc &Arc = *Cg.findArc(Src, Dst: Fid);
173 if (BestPred == CallGraph::InvalidId ||
174 Arc.normalizedWeight() > BestProb) {
175 BestPred = Arc.src();
176 BestProb = Arc.normalizedWeight();
177 }
178 }
179
180 // Check if the merge is good for the callee.
181 // Don't merge if the probability of getting to the callee from the
182 // caller is too low.
183 if (BestProb < MinArcProbability)
184 continue;
185
186 assert(BestPred != CallGraph::InvalidId);
187
188 class Cluster *PredCluster = FuncCluster[BestPred];
189
190 // Skip if no predCluster (predecessor w/ no samples), or if same
191 // as cluster, of it's frozen.
192 if (PredCluster == nullptr || PredCluster == Cluster ||
193 PredCluster->frozen())
194 continue;
195
196 // Skip if merged cluster would be bigger than the threshold.
197 if (Cluster->size() + PredCluster->size() > MaxClusterSize)
198 continue;
199
200 // Check if the merge is good for the caller.
201 // Don't merge if the caller's density is significantly better
202 // than the density resulting from the merge.
203 const double NewDensity =
204 ((double)PredCluster->samples() + Cluster->samples()) /
205 (PredCluster->size() + Cluster->size());
206 if (PredCluster->density() > NewDensity * CallerDegradeFactor) {
207 continue;
208 }
209
210 LLVM_DEBUG(if (opts::Verbosity > 1) {
211 dbgs() << format("merging %s -> %s: %u\n",
212 PredCluster->toString().c_str(),
213 Cluster->toString().c_str(), Cg.samples(Fid));
214 });
215
216 for (NodeId F : Cluster->targets())
217 FuncCluster[F] = PredCluster;
218
219 PredCluster->merge(Other: *Cluster);
220 Cluster->clear();
221 }
222
223 // Return the set of Clusters that are left, which are the ones that
224 // didn't get merged (so their first func is its original func).
225 std::vector<Cluster> SortedClusters;
226 std::unordered_set<Cluster *> Visited;
227 for (const NodeId Func : SortedFuncs) {
228 Cluster *Cluster = FuncCluster[Func];
229 if (!Cluster || Visited.count(x: Cluster) == 1 || Cluster->target(N: 0) != Func)
230 continue;
231
232 SortedClusters.emplace_back(args: std::move(*Cluster));
233 Visited.insert(x: Cluster);
234 }
235
236 llvm::sort(C&: SortedClusters, Comp: compareClustersDensity);
237
238 return SortedClusters;
239}
240
241std::vector<Cluster> randomClusters(const CallGraph &Cg) {
242 std::vector<NodeId> FuncIds(Cg.numNodes(), 0);
243 std::vector<Cluster> Clusters;
244 Clusters.reserve(n: Cg.numNodes());
245
246 for (NodeId F = 0; F < Cg.numNodes(); F++) {
247 if (Cg.samples(Id: F) == 0)
248 continue;
249 Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F));
250 }
251
252 llvm::sort(C&: Clusters, Comp: [](const Cluster &A, const Cluster &B) {
253 return A.size() < B.size();
254 });
255
256 auto pickMergeCluster = [&Clusters](const size_t Idx) {
257 size_t MaxIdx = Idx + 1;
258
259 while (MaxIdx < Clusters.size() &&
260 Clusters[Idx].size() + Clusters[MaxIdx].size() <= MaxClusterSize)
261 ++MaxIdx;
262
263 if (MaxIdx - Idx > 1) {
264 size_t MergeIdx = (std::rand() % (MaxIdx - Idx - 1)) + Idx + 1;
265 assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <=
266 MaxClusterSize);
267 return MergeIdx;
268 }
269 return Clusters.size();
270 };
271
272 size_t Idx = 0;
273 while (Idx < Clusters.size()) {
274 size_t MergeIdx = pickMergeCluster(Idx);
275 if (MergeIdx == Clusters.size()) {
276 ++Idx;
277 } else {
278 Clusters[Idx].merge(Other: Clusters[MergeIdx]);
279 Clusters.erase(position: Clusters.begin() + MergeIdx);
280 }
281 }
282
283 return Clusters;
284}
285
286} // namespace bolt
287} // namespace llvm
288

source code of bolt/lib/Passes/HFSort.cpp