1 | //===- bolt/Passes/PettisAndHansen.cpp ------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The file implements Pettis and Hansen code-layout algorithm. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/HFSort.h" |
14 | #include "llvm/Support/Debug.h" |
15 | #include "llvm/Support/Format.h" |
16 | #include "llvm/Support/raw_ostream.h" |
17 | #include <set> |
18 | #include <unordered_map> |
19 | |
20 | #define DEBUG_TYPE "hfsort" |
21 | |
22 | namespace llvm { |
23 | namespace bolt { |
24 | |
25 | using NodeId = CallGraph::NodeId; |
26 | using Arc = CallGraph::Arc; |
27 | using Node = CallGraph::Node; |
28 | |
29 | namespace { |
30 | class ClusterArc { |
31 | public: |
32 | ClusterArc(Cluster *Ca, Cluster *Cb, double W = 0) |
33 | : C1(std::min(a: Ca, b: Cb)), C2(std::max(a: Ca, b: Cb)), Weight(W) {} |
34 | |
35 | friend bool operator==(const ClusterArc &Lhs, const ClusterArc &Rhs) { |
36 | return Lhs.C1 == Rhs.C1 && Lhs.C2 == Rhs.C2; |
37 | } |
38 | |
39 | Cluster *const C1; |
40 | Cluster *const C2; |
41 | mutable double Weight; |
42 | }; |
43 | |
44 | class ClusterArcHash { |
45 | public: |
46 | int64_t operator()(const ClusterArc &Arc) const { |
47 | std::hash<int64_t> Hasher; |
48 | return hashCombine(Seed: Hasher(int64_t(Arc.C1)), Val: int64_t(Arc.C2)); |
49 | } |
50 | }; |
51 | |
52 | using ClusterArcSet = std::unordered_set<ClusterArc, ClusterArcHash>; |
53 | |
54 | void orderFuncs(const CallGraph &Cg, Cluster *C1, Cluster *C2) { |
55 | NodeId C1head = C1->targets().front(); |
56 | NodeId C1tail = C1->targets().back(); |
57 | NodeId C2head = C2->targets().front(); |
58 | NodeId C2tail = C2->targets().back(); |
59 | |
60 | double C1headC2head = 0; |
61 | double C1headC2tail = 0; |
62 | double C1tailC2head = 0; |
63 | double C1tailC2tail = 0; |
64 | |
65 | for (const Arc &Arc : Cg.arcs()) { |
66 | if ((Arc.src() == C1head && Arc.dst() == C2head) || |
67 | (Arc.dst() == C1head && Arc.src() == C2head)) |
68 | C1headC2head += Arc.weight(); |
69 | else if ((Arc.src() == C1head && Arc.dst() == C2tail) || |
70 | (Arc.dst() == C1head && Arc.src() == C2tail)) |
71 | C1headC2tail += Arc.weight(); |
72 | else if ((Arc.src() == C1tail && Arc.dst() == C2head) || |
73 | (Arc.dst() == C1tail && Arc.src() == C2head)) |
74 | C1tailC2head += Arc.weight(); |
75 | else if ((Arc.src() == C1tail && Arc.dst() == C2tail) || |
76 | (Arc.dst() == C1tail && Arc.src() == C2tail)) |
77 | C1tailC2tail += Arc.weight(); |
78 | } |
79 | |
80 | const double Max = std::max(a: std::max(a: C1headC2head, b: C1headC2tail), |
81 | b: std::max(a: C1tailC2head, b: C1tailC2tail)); |
82 | |
83 | if (C1headC2head == Max) { |
84 | // flip C1 |
85 | C1->reverseTargets(); |
86 | } else if (C1headC2tail == Max) { |
87 | // flip C1 C2 |
88 | C1->reverseTargets(); |
89 | C2->reverseTargets(); |
90 | } else if (C1tailC2tail == Max) { |
91 | // flip C2 |
92 | C2->reverseTargets(); |
93 | } |
94 | } |
95 | } // namespace |
96 | |
97 | std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) { |
98 | // indexed by NodeId, keeps its current cluster |
99 | std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr); |
100 | std::vector<Cluster> Clusters; |
101 | std::vector<NodeId> Funcs; |
102 | |
103 | Clusters.reserve(n: Cg.numNodes()); |
104 | |
105 | for (NodeId F = 0; F < Cg.numNodes(); F++) { |
106 | if (Cg.samples(Id: F) == 0) |
107 | continue; |
108 | Clusters.emplace_back(args&: F, args: Cg.getNode(Id: F)); |
109 | FuncCluster[F] = &Clusters.back(); |
110 | Funcs.push_back(x: F); |
111 | } |
112 | |
113 | ClusterArcSet Carcs; |
114 | |
115 | auto insertOrInc = [&](Cluster *C1, Cluster *C2, double Weight) { |
116 | auto Res = Carcs.emplace(args&: C1, args&: C2, args&: Weight); |
117 | if (!Res.second) |
118 | Res.first->Weight += Weight; |
119 | }; |
120 | |
121 | // Create a std::vector of cluster arcs |
122 | |
123 | for (const Arc &Arc : Cg.arcs()) { |
124 | if (Arc.weight() == 0) |
125 | continue; |
126 | |
127 | Cluster *const S = FuncCluster[Arc.src()]; |
128 | Cluster *const D = FuncCluster[Arc.dst()]; |
129 | |
130 | // ignore if s or d is nullptr |
131 | |
132 | if (S == nullptr || D == nullptr) |
133 | continue; |
134 | |
135 | // ignore self-edges |
136 | |
137 | if (S == D) |
138 | continue; |
139 | |
140 | insertOrInc(S, D, Arc.weight()); |
141 | } |
142 | |
143 | // Find an arc with max weight and merge its nodes |
144 | |
145 | while (!Carcs.empty()) { |
146 | auto Maxpos = |
147 | std::max_element(first: Carcs.begin(), last: Carcs.end(), |
148 | comp: [&](const ClusterArc &Carc1, const ClusterArc &Carc2) { |
149 | return Carc1.Weight < Carc2.Weight; |
150 | }); |
151 | |
152 | ClusterArc Max = *Maxpos; |
153 | Carcs.erase(position: Maxpos); |
154 | |
155 | Cluster *const C1 = Max.C1; |
156 | Cluster *const C2 = Max.C2; |
157 | |
158 | if (C1->size() + C2->size() > MaxClusterSize) |
159 | continue; |
160 | |
161 | if (C1->frozen() || C2->frozen()) |
162 | continue; |
163 | |
164 | // order functions and merge cluster |
165 | |
166 | orderFuncs(Cg, C1, C2); |
167 | |
168 | LLVM_DEBUG(dbgs() << format("merging %s -> %s: %.1f\n" , |
169 | C2->toString().c_str(), C1->toString().c_str(), |
170 | Max.Weight)); |
171 | |
172 | // update carcs: merge C1arcs to C2arcs |
173 | |
174 | std::unordered_map<ClusterArc, Cluster *, ClusterArcHash> C2arcs; |
175 | for (const ClusterArc &Carc : Carcs) { |
176 | if (Carc.C1 == C2) |
177 | C2arcs.emplace(args: Carc, args: Carc.C2); |
178 | if (Carc.C2 == C2) |
179 | C2arcs.emplace(args: Carc, args: Carc.C1); |
180 | } |
181 | |
182 | for (auto It : C2arcs) { |
183 | Cluster *const C = It.second; |
184 | ClusterArc const C2arc = It.first; |
185 | |
186 | insertOrInc(C, C1, C2arc.Weight); |
187 | Carcs.erase(x: C2arc); |
188 | } |
189 | |
190 | // update FuncCluster |
191 | |
192 | for (NodeId F : C2->targets()) |
193 | FuncCluster[F] = C1; |
194 | |
195 | C1->merge(Other: *C2, Aw: Max.Weight); |
196 | C2->clear(); |
197 | } |
198 | |
199 | // Return the set of Clusters that are left, which are the ones that |
200 | // didn't get merged. |
201 | |
202 | std::set<Cluster *> LiveClusters; |
203 | std::vector<Cluster> OutClusters; |
204 | |
205 | for (NodeId Fid : Funcs) |
206 | LiveClusters.insert(x: FuncCluster[Fid]); |
207 | for (Cluster *C : LiveClusters) |
208 | OutClusters.push_back(x: std::move(*C)); |
209 | |
210 | llvm::sort(C&: OutClusters, Comp: compareClustersDensity); |
211 | |
212 | return OutClusters; |
213 | } |
214 | |
215 | } // namespace bolt |
216 | } // namespace llvm |
217 | |