1//===- bolt/Passes/PettisAndHansen.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The file implements Pettis and Hansen code-layout algorithm.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/HFSort.h"
14#include "llvm/Support/Debug.h"
15#include "llvm/Support/Format.h"
16#include "llvm/Support/raw_ostream.h"
17#include <set>
18#include <unordered_map>
19
20#define DEBUG_TYPE "hfsort"
21
22namespace llvm {
23namespace bolt {
24
25using NodeId = CallGraph::NodeId;
26using Arc = CallGraph::Arc;
27using Node = CallGraph::Node;
28
29namespace {
30class ClusterArc {
31public:
32 ClusterArc(Cluster *Ca, Cluster *Cb, double W = 0)
33 : C1(std::min(a: Ca, b: Cb)), C2(std::max(a: Ca, b: Cb)), Weight(W) {}
34
35 friend bool operator==(const ClusterArc &Lhs, const ClusterArc &Rhs) {
36 return Lhs.C1 == Rhs.C1 && Lhs.C2 == Rhs.C2;
37 }
38
39 Cluster *const C1;
40 Cluster *const C2;
41 mutable double Weight;
42};
43
44class ClusterArcHash {
45public:
46 int64_t operator()(const ClusterArc &Arc) const {
47 std::hash<int64_t> Hasher;
48 return hashCombine(Seed: Hasher(int64_t(Arc.C1)), Val: int64_t(Arc.C2));
49 }
50};
51
52using ClusterArcSet = std::unordered_set<ClusterArc, ClusterArcHash>;
53
54void orderFuncs(const CallGraph &Cg, Cluster *C1, Cluster *C2) {
55 NodeId C1head = C1->targets().front();
56 NodeId C1tail = C1->targets().back();
57 NodeId C2head = C2->targets().front();
58 NodeId C2tail = C2->targets().back();
59
60 double C1headC2head = 0;
61 double C1headC2tail = 0;
62 double C1tailC2head = 0;
63 double C1tailC2tail = 0;
64
65 for (const Arc &Arc : Cg.arcs()) {
66 if ((Arc.src() == C1head && Arc.dst() == C2head) ||
67 (Arc.dst() == C1head && Arc.src() == C2head))
68 C1headC2head += Arc.weight();
69 else if ((Arc.src() == C1head && Arc.dst() == C2tail) ||
70 (Arc.dst() == C1head && Arc.src() == C2tail))
71 C1headC2tail += Arc.weight();
72 else if ((Arc.src() == C1tail && Arc.dst() == C2head) ||
73 (Arc.dst() == C1tail && Arc.src() == C2head))
74 C1tailC2head += Arc.weight();
75 else if ((Arc.src() == C1tail && Arc.dst() == C2tail) ||
76 (Arc.dst() == C1tail && Arc.src() == C2tail))
77 C1tailC2tail += Arc.weight();
78 }
79
80 const double Max = std::max(a: std::max(a: C1headC2head, b: C1headC2tail),
81 b: std::max(a: C1tailC2head, b: C1tailC2tail));
82
83 if (C1headC2head == Max) {
84 // flip C1
85 C1->reverseTargets();
86 } else if (C1headC2tail == Max) {
87 // flip C1 C2
88 C1->reverseTargets();
89 C2->reverseTargets();
90 } else if (C1tailC2tail == Max) {
91 // flip C2
92 C2->reverseTargets();
93 }
94}
95} // namespace
96
97std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) {
98 // indexed by NodeId, keeps its current cluster
99 std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr);
100 std::vector<Cluster> Clusters;
101 std::vector<NodeId> Funcs;
102
103 Clusters.reserve(n: Cg.numNodes());
104
105 for (NodeId F = 0; F < Cg.numNodes(); F++) {
106 if (Cg.samples(Id: F) == 0)
107 continue;
108 Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F));
109 FuncCluster[F] = &Clusters.back();
110 Funcs.push_back(x: F);
111 }
112
113 ClusterArcSet Carcs;
114
115 auto insertOrInc = [&](Cluster *C1, Cluster *C2, double Weight) {
116 auto Res = Carcs.emplace(args&: C1, args&: C2, args&: Weight);
117 if (!Res.second)
118 Res.first->Weight += Weight;
119 };
120
121 // Create a std::vector of cluster arcs
122
123 for (const Arc &Arc : Cg.arcs()) {
124 if (Arc.weight() == 0)
125 continue;
126
127 Cluster *const S = FuncCluster[Arc.src()];
128 Cluster *const D = FuncCluster[Arc.dst()];
129
130 // ignore if s or d is nullptr
131
132 if (S == nullptr || D == nullptr)
133 continue;
134
135 // ignore self-edges
136
137 if (S == D)
138 continue;
139
140 insertOrInc(S, D, Arc.weight());
141 }
142
143 // Find an arc with max weight and merge its nodes
144
145 while (!Carcs.empty()) {
146 auto Maxpos =
147 std::max_element(first: Carcs.begin(), last: Carcs.end(),
148 comp: [&](const ClusterArc &Carc1, const ClusterArc &Carc2) {
149 return Carc1.Weight < Carc2.Weight;
150 });
151
152 ClusterArc Max = *Maxpos;
153 Carcs.erase(position: Maxpos);
154
155 Cluster *const C1 = Max.C1;
156 Cluster *const C2 = Max.C2;
157
158 if (C1->size() + C2->size() > MaxClusterSize)
159 continue;
160
161 if (C1->frozen() || C2->frozen())
162 continue;
163
164 // order functions and merge cluster
165
166 orderFuncs(Cg, C1, C2);
167
168 LLVM_DEBUG(dbgs() << format("merging %s -> %s: %.1f\n",
169 C2->toString().c_str(), C1->toString().c_str(),
170 Max.Weight));
171
172 // update carcs: merge C1arcs to C2arcs
173
174 std::unordered_map<ClusterArc, Cluster *, ClusterArcHash> C2arcs;
175 for (const ClusterArc &Carc : Carcs) {
176 if (Carc.C1 == C2)
177 C2arcs.emplace(args: Carc, args: Carc.C2);
178 if (Carc.C2 == C2)
179 C2arcs.emplace(args: Carc, args: Carc.C1);
180 }
181
182 for (auto It : C2arcs) {
183 Cluster *const C = It.second;
184 ClusterArc const C2arc = It.first;
185
186 insertOrInc(C, C1, C2arc.Weight);
187 Carcs.erase(x: C2arc);
188 }
189
190 // update FuncCluster
191
192 for (NodeId F : C2->targets())
193 FuncCluster[F] = C1;
194
195 C1->merge(Other: *C2, Aw: Max.Weight);
196 C2->clear();
197 }
198
199 // Return the set of Clusters that are left, which are the ones that
200 // didn't get merged.
201
202 std::set<Cluster *> LiveClusters;
203 std::vector<Cluster> OutClusters;
204
205 for (NodeId Fid : Funcs)
206 LiveClusters.insert(x: FuncCluster[Fid]);
207 for (Cluster *C : LiveClusters)
208 OutClusters.push_back(x: std::move(*C));
209
210 llvm::sort(C&: OutClusters, Comp: compareClustersDensity);
211
212 return OutClusters;
213}
214
215} // namespace bolt
216} // namespace llvm
217

source code of bolt/lib/Passes/PettisAndHansen.cpp