1 | //===- CallGraphSort.cpp --------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details |
10 | /// about the algorithm. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "CallGraphSort.h" |
15 | #include "COFFLinkerContext.h" |
16 | #include "InputFiles.h" |
17 | #include "SymbolTable.h" |
18 | #include "Symbols.h" |
19 | #include "lld/Common/ErrorHandler.h" |
20 | |
21 | #include <numeric> |
22 | |
23 | using namespace llvm; |
24 | using namespace lld; |
25 | using namespace lld::coff; |
26 | |
27 | namespace { |
28 | struct Edge { |
29 | int from; |
30 | uint64_t weight; |
31 | }; |
32 | |
33 | struct Cluster { |
34 | Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} |
35 | |
36 | double getDensity() const { |
37 | if (size == 0) |
38 | return 0; |
39 | return double(weight) / double(size); |
40 | } |
41 | |
42 | int next; |
43 | int prev; |
44 | uint64_t size; |
45 | uint64_t weight = 0; |
46 | uint64_t initialWeight = 0; |
47 | Edge bestPred = {.from: -1, .weight: 0}; |
48 | }; |
49 | |
50 | class CallGraphSort { |
51 | public: |
52 | CallGraphSort(const COFFLinkerContext &ctx); |
53 | |
54 | DenseMap<const SectionChunk *, int> run(); |
55 | |
56 | private: |
57 | std::vector<Cluster> clusters; |
58 | std::vector<const SectionChunk *> sections; |
59 | |
60 | const COFFLinkerContext &ctx; |
61 | }; |
62 | |
63 | // Maximum amount the combined cluster density can be worse than the original |
64 | // cluster to consider merging. |
65 | constexpr int MAX_DENSITY_DEGRADATION = 8; |
66 | |
67 | // Maximum cluster size in bytes. |
68 | constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024; |
69 | } // end anonymous namespace |
70 | |
71 | using SectionPair = std::pair<const SectionChunk *, const SectionChunk *>; |
72 | |
73 | // Take the edge list in Config->CallGraphProfile, resolve symbol names to |
74 | // Symbols, and generate a graph between InputSections with the provided |
75 | // weights. |
76 | CallGraphSort::CallGraphSort(const COFFLinkerContext &ctx) : ctx(ctx) { |
77 | const MapVector<SectionPair, uint64_t> &profile = ctx.config.callGraphProfile; |
78 | DenseMap<const SectionChunk *, int> secToCluster; |
79 | |
80 | auto getOrCreateNode = [&](const SectionChunk *isec) -> int { |
81 | auto res = secToCluster.try_emplace(Key: isec, Args: clusters.size()); |
82 | if (res.second) { |
83 | sections.push_back(x: isec); |
84 | clusters.emplace_back(args: clusters.size(), args: isec->getSize()); |
85 | } |
86 | return res.first->second; |
87 | }; |
88 | |
89 | // Create the graph. |
90 | for (const std::pair<SectionPair, uint64_t> &c : profile) { |
91 | const auto *fromSec = cast<SectionChunk>(Val: c.first.first->repl); |
92 | const auto *toSec = cast<SectionChunk>(Val: c.first.second->repl); |
93 | uint64_t weight = c.second; |
94 | |
95 | // Ignore edges between input sections belonging to different output |
96 | // sections. This is done because otherwise we would end up with clusters |
97 | // containing input sections that can't actually be placed adjacently in the |
98 | // output. This messes with the cluster size and density calculations. We |
99 | // would also end up moving input sections in other output sections without |
100 | // moving them closer to what calls them. |
101 | if (ctx.getOutputSection(c: fromSec) != ctx.getOutputSection(c: toSec)) |
102 | continue; |
103 | |
104 | int from = getOrCreateNode(fromSec); |
105 | int to = getOrCreateNode(toSec); |
106 | |
107 | clusters[to].weight += weight; |
108 | |
109 | if (from == to) |
110 | continue; |
111 | |
112 | // Remember the best edge. |
113 | Cluster &toC = clusters[to]; |
114 | if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { |
115 | toC.bestPred.from = from; |
116 | toC.bestPred.weight = weight; |
117 | } |
118 | } |
119 | for (Cluster &c : clusters) |
120 | c.initialWeight = c.weight; |
121 | } |
122 | |
123 | // It's bad to merge clusters which would degrade the density too much. |
124 | static bool isNewDensityBad(Cluster &a, Cluster &b) { |
125 | double newDensity = double(a.weight + b.weight) / double(a.size + b.size); |
126 | return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; |
127 | } |
128 | |
129 | // Find the leader of V's belonged cluster (represented as an equivalence |
130 | // class). We apply union-find path-halving technique (simple to implement) in |
131 | // the meantime as it decreases depths and the time complexity. |
132 | static int getLeader(std::vector<int> &leaders, int v) { |
133 | while (leaders[v] != v) { |
134 | leaders[v] = leaders[leaders[v]]; |
135 | v = leaders[v]; |
136 | } |
137 | return v; |
138 | } |
139 | |
140 | static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, |
141 | Cluster &from, int fromIdx) { |
142 | int tail1 = into.prev, tail2 = from.prev; |
143 | into.prev = tail2; |
144 | cs[tail2].next = intoIdx; |
145 | from.prev = tail1; |
146 | cs[tail1].next = fromIdx; |
147 | into.size += from.size; |
148 | into.weight += from.weight; |
149 | from.size = 0; |
150 | from.weight = 0; |
151 | } |
152 | |
153 | // Group InputSections into clusters using the Call-Chain Clustering heuristic |
154 | // then sort the clusters by density. |
155 | DenseMap<const SectionChunk *, int> CallGraphSort::run() { |
156 | std::vector<int> sorted(clusters.size()); |
157 | std::vector<int> leaders(clusters.size()); |
158 | |
159 | std::iota(first: leaders.begin(), last: leaders.end(), value: 0); |
160 | std::iota(first: sorted.begin(), last: sorted.end(), value: 0); |
161 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
162 | return clusters[a].getDensity() > clusters[b].getDensity(); |
163 | }); |
164 | |
165 | for (int l : sorted) { |
166 | // The cluster index is the same as the index of its leader here because |
167 | // clusters[L] has not been merged into another cluster yet. |
168 | Cluster &c = clusters[l]; |
169 | |
170 | // Don't consider merging if the edge is unlikely. |
171 | if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) |
172 | continue; |
173 | |
174 | int predL = getLeader(leaders, v: c.bestPred.from); |
175 | if (l == predL) |
176 | continue; |
177 | |
178 | Cluster *predC = &clusters[predL]; |
179 | if (c.size + predC->size > MAX_CLUSTER_SIZE) |
180 | continue; |
181 | |
182 | if (isNewDensityBad(a&: *predC, b&: c)) |
183 | continue; |
184 | |
185 | leaders[l] = predL; |
186 | mergeClusters(cs&: clusters, into&: *predC, intoIdx: predL, from&: c, fromIdx: l); |
187 | } |
188 | |
189 | // Sort remaining non-empty clusters by density. |
190 | sorted.clear(); |
191 | for (int i = 0, e = (int)clusters.size(); i != e; ++i) |
192 | if (clusters[i].size > 0) |
193 | sorted.push_back(x: i); |
194 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
195 | return clusters[a].getDensity() > clusters[b].getDensity(); |
196 | }); |
197 | |
198 | DenseMap<const SectionChunk *, int> orderMap; |
199 | // Sections will be sorted by increasing order. Absent sections will have |
200 | // priority 0 and be placed at the end of sections. |
201 | int curOrder = INT_MIN; |
202 | for (int leader : sorted) { |
203 | for (int i = leader;;) { |
204 | orderMap[sections[i]] = curOrder++; |
205 | i = clusters[i].next; |
206 | if (i == leader) |
207 | break; |
208 | } |
209 | } |
210 | if (!ctx.config.printSymbolOrder.empty()) { |
211 | std::error_code ec; |
212 | raw_fd_ostream os(ctx.config.printSymbolOrder, ec, sys::fs::OF_None); |
213 | if (ec) { |
214 | error(msg: "cannot open " + ctx.config.printSymbolOrder + ": " + ec.message()); |
215 | return orderMap; |
216 | } |
217 | // Print the symbols ordered by C3, in the order of increasing curOrder |
218 | // Instead of sorting all the orderMap, just repeat the loops above. |
219 | for (int leader : sorted) |
220 | for (int i = leader;;) { |
221 | const SectionChunk *sc = sections[i]; |
222 | |
223 | // Search all the symbols in the file of the section |
224 | // and find out a DefinedCOFF symbol with name that is within the |
225 | // section. |
226 | for (Symbol *sym : sc->file->getSymbols()) |
227 | if (auto *d = dyn_cast_or_null<DefinedCOFF>(Val: sym)) |
228 | // Filter out non-COMDAT symbols and section symbols. |
229 | if (d->isCOMDAT && !d->getCOFFSymbol().isSection() && |
230 | sc == d->getChunk()) |
231 | os << sym->getName() << "\n" ; |
232 | i = clusters[i].next; |
233 | if (i == leader) |
234 | break; |
235 | } |
236 | } |
237 | |
238 | return orderMap; |
239 | } |
240 | |
241 | // Sort sections by the profile data provided by /call-graph-ordering-file |
242 | // |
243 | // This first builds a call graph based on the profile data then merges sections |
244 | // according to the C³ heuristic. All clusters are then sorted by a density |
245 | // metric to further improve locality. |
246 | DenseMap<const SectionChunk *, int> |
247 | coff::computeCallGraphProfileOrder(const COFFLinkerContext &ctx) { |
248 | return CallGraphSort(ctx).run(); |
249 | } |
250 | |