| 1 | //===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Implementation of HFSort algorithm for function ordering: |
| 10 | // https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "bolt/Passes/HFSort.h" |
| 15 | #include "llvm/Support/CommandLine.h" |
| 16 | #include "llvm/Support/Debug.h" |
| 17 | #include "llvm/Support/Format.h" |
| 18 | #include "llvm/Support/raw_ostream.h" |
| 19 | #include <unordered_set> |
| 20 | |
| 21 | #define DEBUG_TYPE "hfsort" |
| 22 | |
| 23 | namespace opts { |
| 24 | extern llvm::cl::opt<unsigned> Verbosity; |
| 25 | } |
| 26 | |
| 27 | namespace llvm { |
| 28 | namespace bolt { |
| 29 | |
| 30 | using NodeId = CallGraph::NodeId; |
| 31 | using Arc = CallGraph::Arc; |
| 32 | using Node = CallGraph::Node; |
| 33 | |
| 34 | namespace { |
| 35 | |
| 36 | // The number of pages to reserve for the functions with highest |
| 37 | // density (samples / size). The functions put in these pages are not |
| 38 | // considered for clustering. |
| 39 | constexpr uint32_t FrozenPages = 0; |
| 40 | |
| 41 | // The minimum approximate probability of a callee being called from a |
| 42 | // particular arc to consider merging with the caller's cluster. |
| 43 | constexpr double MinArcProbability = 0.1; |
| 44 | |
| 45 | // This is a factor to determine by how much a caller cluster is |
| 46 | // willing to degrade it's density by merging a callee. |
| 47 | constexpr int CallerDegradeFactor = 8; |
| 48 | |
| 49 | } // namespace |
| 50 | |
| 51 | //////////////////////////////////////////////////////////////////////////////// |
| 52 | |
| 53 | Cluster::Cluster(NodeId Id, const Node &Func) |
| 54 | : Samples(Func.samples()), Size(Func.size()), |
| 55 | Density((double)Samples / Size) { |
| 56 | Targets.push_back(x: Id); |
| 57 | } |
| 58 | |
| 59 | Cluster::Cluster(const std::vector<NodeId> &Nodes, const CallGraph &Cg) { |
| 60 | Samples = 0; |
| 61 | Size = 0; |
| 62 | for (NodeId TargetId : Nodes) { |
| 63 | Targets.push_back(x: TargetId); |
| 64 | Samples += Cg.samples(Id: TargetId); |
| 65 | Size += Cg.size(Id: TargetId); |
| 66 | } |
| 67 | Density = (double)Samples / Size; |
| 68 | } |
| 69 | |
| 70 | std::string Cluster::toString() const { |
| 71 | std::string Str; |
| 72 | raw_string_ostream CS(Str); |
| 73 | bool PrintComma = false; |
| 74 | CS << "funcs = [" ; |
| 75 | for (const NodeId &Target : Targets) { |
| 76 | if (PrintComma) |
| 77 | CS << ", " ; |
| 78 | CS << Target; |
| 79 | PrintComma = true; |
| 80 | } |
| 81 | CS << "]" ; |
| 82 | return CS.str(); |
| 83 | } |
| 84 | |
| 85 | namespace { |
| 86 | |
| 87 | void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) { |
| 88 | uint32_t TotalSize = 0; |
| 89 | llvm::sort(C&: Clusters, Comp: compareClustersDensity); |
| 90 | for (Cluster &C : Clusters) { |
| 91 | uint32_t NewSize = TotalSize + C.size(); |
| 92 | if (NewSize > FrozenPages * HugePageSize) |
| 93 | break; |
| 94 | C.freeze(); |
| 95 | TotalSize = NewSize; |
| 96 | LLVM_DEBUG(NodeId Fid = C.target(0); |
| 97 | dbgs() << format( |
| 98 | "freezing cluster for func %d, size = %u, samples = %lu)\n" , |
| 99 | Fid, Cg.size(Fid), Cg.samples(Fid));); |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | } // namespace |
| 104 | |
| 105 | void Cluster::reverseTargets() { std::reverse(first: Targets.begin(), last: Targets.end()); } |
| 106 | |
| 107 | void Cluster::merge(const Cluster &Other, const double Aw) { |
| 108 | Targets.insert(position: Targets.end(), first: Other.Targets.begin(), last: Other.Targets.end()); |
| 109 | Size += Other.Size; |
| 110 | Samples += Other.Samples; |
| 111 | Density = (double)Samples / Size; |
| 112 | } |
| 113 | |
| 114 | void Cluster::merge(const Cluster &Other, |
| 115 | const std::vector<CallGraph::NodeId> &Targets_) { |
| 116 | Targets = Targets_; |
| 117 | Size += Other.Size; |
| 118 | Samples += Other.Samples; |
| 119 | Density = (double)Samples / Size; |
| 120 | } |
| 121 | |
| 122 | void Cluster::clear() { |
| 123 | Id = -1u; |
| 124 | Size = 0; |
| 125 | Samples = 0; |
| 126 | Density = 0.0; |
| 127 | Targets.clear(); |
| 128 | Frozen = false; |
| 129 | } |
| 130 | |
| 131 | std::vector<Cluster> clusterize(const CallGraph &Cg) { |
| 132 | std::vector<NodeId> SortedFuncs; |
| 133 | |
| 134 | // indexed by NodeId, keeps it's current cluster |
| 135 | std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr); |
| 136 | std::vector<Cluster> Clusters; |
| 137 | Clusters.reserve(n: Cg.numNodes()); |
| 138 | |
| 139 | for (NodeId F = 0; F < Cg.numNodes(); F++) { |
| 140 | if (Cg.samples(Id: F) == 0) |
| 141 | continue; |
| 142 | Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F)); |
| 143 | SortedFuncs.push_back(x: F); |
| 144 | } |
| 145 | |
| 146 | freezeClusters(Cg, Clusters); |
| 147 | |
| 148 | // The size and order of Clusters is fixed until we reshuffle it immediately |
| 149 | // before returning. |
| 150 | for (Cluster &Cluster : Clusters) |
| 151 | FuncCluster[Cluster.targets().front()] = &Cluster; |
| 152 | |
| 153 | llvm::sort(C&: SortedFuncs, Comp: [&](const NodeId F1, const NodeId F2) { |
| 154 | const CallGraph::Node &Func1 = Cg.getNode(Id: F1); |
| 155 | const CallGraph::Node &Func2 = Cg.getNode(Id: F2); |
| 156 | return Func1.samples() * Func2.size() > // TODO: is this correct? |
| 157 | Func2.samples() * Func1.size(); |
| 158 | }); |
| 159 | |
| 160 | // Process each function, and consider merging its cluster with the |
| 161 | // one containing its most likely predecessor. |
| 162 | for (const NodeId Fid : SortedFuncs) { |
| 163 | Cluster *Cluster = FuncCluster[Fid]; |
| 164 | if (Cluster->frozen()) |
| 165 | continue; |
| 166 | |
| 167 | // Find best predecessor. |
| 168 | NodeId BestPred = CallGraph::InvalidId; |
| 169 | double BestProb = 0; |
| 170 | |
| 171 | for (const NodeId Src : Cg.predecessors(Id: Fid)) { |
| 172 | const Arc &Arc = *Cg.findArc(Src, Dst: Fid); |
| 173 | if (BestPred == CallGraph::InvalidId || |
| 174 | Arc.normalizedWeight() > BestProb) { |
| 175 | BestPred = Arc.src(); |
| 176 | BestProb = Arc.normalizedWeight(); |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | // Check if the merge is good for the callee. |
| 181 | // Don't merge if the probability of getting to the callee from the |
| 182 | // caller is too low. |
| 183 | if (BestProb < MinArcProbability) |
| 184 | continue; |
| 185 | |
| 186 | assert(BestPred != CallGraph::InvalidId); |
| 187 | |
| 188 | class Cluster *PredCluster = FuncCluster[BestPred]; |
| 189 | |
| 190 | // Skip if no predCluster (predecessor w/ no samples), or if same |
| 191 | // as cluster, of it's frozen. |
| 192 | if (PredCluster == nullptr || PredCluster == Cluster || |
| 193 | PredCluster->frozen()) |
| 194 | continue; |
| 195 | |
| 196 | // Skip if merged cluster would be bigger than the threshold. |
| 197 | if (Cluster->size() + PredCluster->size() > MaxClusterSize) |
| 198 | continue; |
| 199 | |
| 200 | // Check if the merge is good for the caller. |
| 201 | // Don't merge if the caller's density is significantly better |
| 202 | // than the density resulting from the merge. |
| 203 | const double NewDensity = |
| 204 | ((double)PredCluster->samples() + Cluster->samples()) / |
| 205 | (PredCluster->size() + Cluster->size()); |
| 206 | if (PredCluster->density() > NewDensity * CallerDegradeFactor) { |
| 207 | continue; |
| 208 | } |
| 209 | |
| 210 | LLVM_DEBUG(if (opts::Verbosity > 1) { |
| 211 | dbgs() << format("merging %s -> %s: %u\n" , |
| 212 | PredCluster->toString().c_str(), |
| 213 | Cluster->toString().c_str(), Cg.samples(Fid)); |
| 214 | }); |
| 215 | |
| 216 | for (NodeId F : Cluster->targets()) |
| 217 | FuncCluster[F] = PredCluster; |
| 218 | |
| 219 | PredCluster->merge(Other: *Cluster); |
| 220 | Cluster->clear(); |
| 221 | } |
| 222 | |
| 223 | // Return the set of Clusters that are left, which are the ones that |
| 224 | // didn't get merged (so their first func is its original func). |
| 225 | std::vector<Cluster> SortedClusters; |
| 226 | std::unordered_set<Cluster *> Visited; |
| 227 | for (const NodeId Func : SortedFuncs) { |
| 228 | Cluster *Cluster = FuncCluster[Func]; |
| 229 | if (!Cluster || Visited.count(x: Cluster) == 1 || Cluster->target(N: 0) != Func) |
| 230 | continue; |
| 231 | |
| 232 | SortedClusters.emplace_back(args: std::move(*Cluster)); |
| 233 | Visited.insert(x: Cluster); |
| 234 | } |
| 235 | |
| 236 | llvm::sort(C&: SortedClusters, Comp: compareClustersDensity); |
| 237 | |
| 238 | return SortedClusters; |
| 239 | } |
| 240 | |
| 241 | std::vector<Cluster> randomClusters(const CallGraph &Cg) { |
| 242 | std::vector<Cluster> Clusters; |
| 243 | Clusters.reserve(n: Cg.numNodes()); |
| 244 | |
| 245 | for (NodeId F = 0; F < Cg.numNodes(); F++) { |
| 246 | if (Cg.samples(Id: F) == 0) |
| 247 | continue; |
| 248 | Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F)); |
| 249 | } |
| 250 | |
| 251 | llvm::sort(C&: Clusters, Comp: [](const Cluster &A, const Cluster &B) { |
| 252 | return A.size() < B.size(); |
| 253 | }); |
| 254 | |
| 255 | auto pickMergeCluster = [&Clusters](const size_t Idx) { |
| 256 | size_t MaxIdx = Idx + 1; |
| 257 | |
| 258 | while (MaxIdx < Clusters.size() && |
| 259 | Clusters[Idx].size() + Clusters[MaxIdx].size() <= MaxClusterSize) |
| 260 | ++MaxIdx; |
| 261 | |
| 262 | if (MaxIdx - Idx > 1) { |
| 263 | size_t MergeIdx = (std::rand() % (MaxIdx - Idx - 1)) + Idx + 1; |
| 264 | assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <= |
| 265 | MaxClusterSize); |
| 266 | return MergeIdx; |
| 267 | } |
| 268 | return Clusters.size(); |
| 269 | }; |
| 270 | |
| 271 | size_t Idx = 0; |
| 272 | while (Idx < Clusters.size()) { |
| 273 | size_t MergeIdx = pickMergeCluster(Idx); |
| 274 | if (MergeIdx == Clusters.size()) { |
| 275 | ++Idx; |
| 276 | } else { |
| 277 | Clusters[Idx].merge(Other: Clusters[MergeIdx]); |
| 278 | Clusters.erase(position: Clusters.begin() + MergeIdx); |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | return Clusters; |
| 283 | } |
| 284 | |
| 285 | } // namespace bolt |
| 286 | } // namespace llvm |
| 287 | |