1 | //===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation of HFSort algorithm for function ordering: |
10 | // https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "bolt/Passes/HFSort.h" |
15 | #include "llvm/Support/CommandLine.h" |
16 | #include "llvm/Support/Debug.h" |
17 | #include "llvm/Support/Format.h" |
18 | #include "llvm/Support/raw_ostream.h" |
19 | #include <unordered_set> |
20 | |
21 | #define DEBUG_TYPE "hfsort" |
22 | |
23 | namespace opts { |
24 | extern llvm::cl::opt<unsigned> Verbosity; |
25 | } |
26 | |
27 | namespace llvm { |
28 | namespace bolt { |
29 | |
30 | using NodeId = CallGraph::NodeId; |
31 | using Arc = CallGraph::Arc; |
32 | using Node = CallGraph::Node; |
33 | |
34 | namespace { |
35 | |
36 | // The number of pages to reserve for the functions with highest |
37 | // density (samples / size). The functions put in these pages are not |
38 | // considered for clustering. |
39 | constexpr uint32_t FrozenPages = 0; |
40 | |
41 | // The minimum approximate probability of a callee being called from a |
42 | // particular arc to consider merging with the caller's cluster. |
43 | constexpr double MinArcProbability = 0.1; |
44 | |
45 | // This is a factor to determine by how much a caller cluster is |
46 | // willing to degrade it's density by merging a callee. |
47 | constexpr int CallerDegradeFactor = 8; |
48 | |
49 | } // namespace |
50 | |
51 | //////////////////////////////////////////////////////////////////////////////// |
52 | |
53 | Cluster::Cluster(NodeId Id, const Node &Func) |
54 | : Samples(Func.samples()), Size(Func.size()), |
55 | Density((double)Samples / Size) { |
56 | Targets.push_back(x: Id); |
57 | } |
58 | |
59 | Cluster::Cluster(const std::vector<NodeId> &Nodes, const CallGraph &Cg) { |
60 | Samples = 0; |
61 | Size = 0; |
62 | for (NodeId TargetId : Nodes) { |
63 | Targets.push_back(x: TargetId); |
64 | Samples += Cg.samples(Id: TargetId); |
65 | Size += Cg.size(Id: TargetId); |
66 | } |
67 | Density = (double)Samples / Size; |
68 | } |
69 | |
70 | std::string Cluster::toString() const { |
71 | std::string Str; |
72 | raw_string_ostream CS(Str); |
73 | bool PrintComma = false; |
74 | CS << "funcs = [" ; |
75 | for (const NodeId &Target : Targets) { |
76 | if (PrintComma) |
77 | CS << ", " ; |
78 | CS << Target; |
79 | PrintComma = true; |
80 | } |
81 | CS << "]" ; |
82 | return CS.str(); |
83 | } |
84 | |
85 | namespace { |
86 | |
87 | void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) { |
88 | uint32_t TotalSize = 0; |
89 | llvm::sort(C&: Clusters, Comp: compareClustersDensity); |
90 | for (Cluster &C : Clusters) { |
91 | uint32_t NewSize = TotalSize + C.size(); |
92 | if (NewSize > FrozenPages * HugePageSize) |
93 | break; |
94 | C.freeze(); |
95 | TotalSize = NewSize; |
96 | LLVM_DEBUG(NodeId Fid = C.target(0); |
97 | dbgs() << format( |
98 | "freezing cluster for func %d, size = %u, samples = %lu)\n" , |
99 | Fid, Cg.size(Fid), Cg.samples(Fid));); |
100 | } |
101 | } |
102 | |
103 | } // namespace |
104 | |
105 | void Cluster::reverseTargets() { std::reverse(first: Targets.begin(), last: Targets.end()); } |
106 | |
107 | void Cluster::merge(const Cluster &Other, const double Aw) { |
108 | Targets.insert(position: Targets.end(), first: Other.Targets.begin(), last: Other.Targets.end()); |
109 | Size += Other.Size; |
110 | Samples += Other.Samples; |
111 | Density = (double)Samples / Size; |
112 | } |
113 | |
114 | void Cluster::merge(const Cluster &Other, |
115 | const std::vector<CallGraph::NodeId> &Targets_) { |
116 | Targets = Targets_; |
117 | Size += Other.Size; |
118 | Samples += Other.Samples; |
119 | Density = (double)Samples / Size; |
120 | } |
121 | |
122 | void Cluster::clear() { |
123 | Id = -1u; |
124 | Size = 0; |
125 | Samples = 0; |
126 | Density = 0.0; |
127 | Targets.clear(); |
128 | Frozen = false; |
129 | } |
130 | |
131 | std::vector<Cluster> clusterize(const CallGraph &Cg) { |
132 | std::vector<NodeId> SortedFuncs; |
133 | |
134 | // indexed by NodeId, keeps it's current cluster |
135 | std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr); |
136 | std::vector<Cluster> Clusters; |
137 | Clusters.reserve(n: Cg.numNodes()); |
138 | |
139 | for (NodeId F = 0; F < Cg.numNodes(); F++) { |
140 | if (Cg.samples(Id: F) == 0) |
141 | continue; |
142 | Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F)); |
143 | SortedFuncs.push_back(x: F); |
144 | } |
145 | |
146 | freezeClusters(Cg, Clusters); |
147 | |
148 | // The size and order of Clusters is fixed until we reshuffle it immediately |
149 | // before returning. |
150 | for (Cluster &Cluster : Clusters) |
151 | FuncCluster[Cluster.targets().front()] = &Cluster; |
152 | |
153 | llvm::sort(C&: SortedFuncs, Comp: [&](const NodeId F1, const NodeId F2) { |
154 | const CallGraph::Node &Func1 = Cg.getNode(Id: F1); |
155 | const CallGraph::Node &Func2 = Cg.getNode(Id: F2); |
156 | return Func1.samples() * Func2.size() > // TODO: is this correct? |
157 | Func2.samples() * Func1.size(); |
158 | }); |
159 | |
160 | // Process each function, and consider merging its cluster with the |
161 | // one containing its most likely predecessor. |
162 | for (const NodeId Fid : SortedFuncs) { |
163 | Cluster *Cluster = FuncCluster[Fid]; |
164 | if (Cluster->frozen()) |
165 | continue; |
166 | |
167 | // Find best predecessor. |
168 | NodeId BestPred = CallGraph::InvalidId; |
169 | double BestProb = 0; |
170 | |
171 | for (const NodeId Src : Cg.predecessors(Id: Fid)) { |
172 | const Arc &Arc = *Cg.findArc(Src, Dst: Fid); |
173 | if (BestPred == CallGraph::InvalidId || |
174 | Arc.normalizedWeight() > BestProb) { |
175 | BestPred = Arc.src(); |
176 | BestProb = Arc.normalizedWeight(); |
177 | } |
178 | } |
179 | |
180 | // Check if the merge is good for the callee. |
181 | // Don't merge if the probability of getting to the callee from the |
182 | // caller is too low. |
183 | if (BestProb < MinArcProbability) |
184 | continue; |
185 | |
186 | assert(BestPred != CallGraph::InvalidId); |
187 | |
188 | class Cluster *PredCluster = FuncCluster[BestPred]; |
189 | |
190 | // Skip if no predCluster (predecessor w/ no samples), or if same |
191 | // as cluster, of it's frozen. |
192 | if (PredCluster == nullptr || PredCluster == Cluster || |
193 | PredCluster->frozen()) |
194 | continue; |
195 | |
196 | // Skip if merged cluster would be bigger than the threshold. |
197 | if (Cluster->size() + PredCluster->size() > MaxClusterSize) |
198 | continue; |
199 | |
200 | // Check if the merge is good for the caller. |
201 | // Don't merge if the caller's density is significantly better |
202 | // than the density resulting from the merge. |
203 | const double NewDensity = |
204 | ((double)PredCluster->samples() + Cluster->samples()) / |
205 | (PredCluster->size() + Cluster->size()); |
206 | if (PredCluster->density() > NewDensity * CallerDegradeFactor) { |
207 | continue; |
208 | } |
209 | |
210 | LLVM_DEBUG(if (opts::Verbosity > 1) { |
211 | dbgs() << format("merging %s -> %s: %u\n" , |
212 | PredCluster->toString().c_str(), |
213 | Cluster->toString().c_str(), Cg.samples(Fid)); |
214 | }); |
215 | |
216 | for (NodeId F : Cluster->targets()) |
217 | FuncCluster[F] = PredCluster; |
218 | |
219 | PredCluster->merge(Other: *Cluster); |
220 | Cluster->clear(); |
221 | } |
222 | |
223 | // Return the set of Clusters that are left, which are the ones that |
224 | // didn't get merged (so their first func is its original func). |
225 | std::vector<Cluster> SortedClusters; |
226 | std::unordered_set<Cluster *> Visited; |
227 | for (const NodeId Func : SortedFuncs) { |
228 | Cluster *Cluster = FuncCluster[Func]; |
229 | if (!Cluster || Visited.count(x: Cluster) == 1 || Cluster->target(N: 0) != Func) |
230 | continue; |
231 | |
232 | SortedClusters.emplace_back(args: std::move(*Cluster)); |
233 | Visited.insert(x: Cluster); |
234 | } |
235 | |
236 | llvm::sort(C&: SortedClusters, Comp: compareClustersDensity); |
237 | |
238 | return SortedClusters; |
239 | } |
240 | |
241 | std::vector<Cluster> randomClusters(const CallGraph &Cg) { |
242 | std::vector<NodeId> FuncIds(Cg.numNodes(), 0); |
243 | std::vector<Cluster> Clusters; |
244 | Clusters.reserve(n: Cg.numNodes()); |
245 | |
246 | for (NodeId F = 0; F < Cg.numNodes(); F++) { |
247 | if (Cg.samples(Id: F) == 0) |
248 | continue; |
249 | Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F)); |
250 | } |
251 | |
252 | llvm::sort(C&: Clusters, Comp: [](const Cluster &A, const Cluster &B) { |
253 | return A.size() < B.size(); |
254 | }); |
255 | |
256 | auto pickMergeCluster = [&Clusters](const size_t Idx) { |
257 | size_t MaxIdx = Idx + 1; |
258 | |
259 | while (MaxIdx < Clusters.size() && |
260 | Clusters[Idx].size() + Clusters[MaxIdx].size() <= MaxClusterSize) |
261 | ++MaxIdx; |
262 | |
263 | if (MaxIdx - Idx > 1) { |
264 | size_t MergeIdx = (std::rand() % (MaxIdx - Idx - 1)) + Idx + 1; |
265 | assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <= |
266 | MaxClusterSize); |
267 | return MergeIdx; |
268 | } |
269 | return Clusters.size(); |
270 | }; |
271 | |
272 | size_t Idx = 0; |
273 | while (Idx < Clusters.size()) { |
274 | size_t MergeIdx = pickMergeCluster(Idx); |
275 | if (MergeIdx == Clusters.size()) { |
276 | ++Idx; |
277 | } else { |
278 | Clusters[Idx].merge(Other: Clusters[MergeIdx]); |
279 | Clusters.erase(position: Clusters.begin() + MergeIdx); |
280 | } |
281 | } |
282 | |
283 | return Clusters; |
284 | } |
285 | |
286 | } // namespace bolt |
287 | } // namespace llvm |
288 | |