1 | //===- CallGraphSort.cpp --------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details |
10 | /// about the algorithm. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "CallGraphSort.h" |
15 | #include "COFFLinkerContext.h" |
16 | #include "InputFiles.h" |
17 | #include "SymbolTable.h" |
18 | #include "Symbols.h" |
19 | |
20 | #include <numeric> |
21 | |
22 | using namespace llvm; |
23 | using namespace lld; |
24 | using namespace lld::coff; |
25 | |
26 | namespace { |
27 | struct Edge { |
28 | int from; |
29 | uint64_t weight; |
30 | }; |
31 | |
32 | struct Cluster { |
33 | Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} |
34 | |
35 | double getDensity() const { |
36 | if (size == 0) |
37 | return 0; |
38 | return double(weight) / double(size); |
39 | } |
40 | |
41 | int next; |
42 | int prev; |
43 | uint64_t size; |
44 | uint64_t weight = 0; |
45 | uint64_t initialWeight = 0; |
46 | Edge bestPred = {.from: -1, .weight: 0}; |
47 | }; |
48 | |
49 | class CallGraphSort { |
50 | public: |
51 | CallGraphSort(COFFLinkerContext &ctx); |
52 | |
53 | DenseMap<const SectionChunk *, int> run(); |
54 | |
55 | private: |
56 | std::vector<Cluster> clusters; |
57 | std::vector<const SectionChunk *> sections; |
58 | |
59 | COFFLinkerContext &ctx; |
60 | }; |
61 | |
62 | // Maximum amount the combined cluster density can be worse than the original |
63 | // cluster to consider merging. |
64 | constexpr int MAX_DENSITY_DEGRADATION = 8; |
65 | |
66 | // Maximum cluster size in bytes. |
67 | constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024; |
68 | } // end anonymous namespace |
69 | |
70 | using SectionPair = std::pair<const SectionChunk *, const SectionChunk *>; |
71 | |
72 | // Take the edge list in Config->CallGraphProfile, resolve symbol names to |
73 | // Symbols, and generate a graph between InputSections with the provided |
74 | // weights. |
75 | CallGraphSort::CallGraphSort(COFFLinkerContext &ctx) : ctx(ctx) { |
76 | const MapVector<SectionPair, uint64_t> &profile = ctx.config.callGraphProfile; |
77 | DenseMap<const SectionChunk *, int> secToCluster; |
78 | |
79 | auto getOrCreateNode = [&](const SectionChunk *isec) -> int { |
80 | auto res = secToCluster.try_emplace(Key: isec, Args: clusters.size()); |
81 | if (res.second) { |
82 | sections.push_back(x: isec); |
83 | clusters.emplace_back(args: clusters.size(), args: isec->getSize()); |
84 | } |
85 | return res.first->second; |
86 | }; |
87 | |
88 | // Create the graph. |
89 | for (const std::pair<SectionPair, uint64_t> &c : profile) { |
90 | const auto *fromSec = cast<SectionChunk>(Val: c.first.first->repl); |
91 | const auto *toSec = cast<SectionChunk>(Val: c.first.second->repl); |
92 | uint64_t weight = c.second; |
93 | |
94 | // Ignore edges between input sections belonging to different output |
95 | // sections. This is done because otherwise we would end up with clusters |
96 | // containing input sections that can't actually be placed adjacently in the |
97 | // output. This messes with the cluster size and density calculations. We |
98 | // would also end up moving input sections in other output sections without |
99 | // moving them closer to what calls them. |
100 | if (ctx.getOutputSection(c: fromSec) != ctx.getOutputSection(c: toSec)) |
101 | continue; |
102 | |
103 | int from = getOrCreateNode(fromSec); |
104 | int to = getOrCreateNode(toSec); |
105 | |
106 | clusters[to].weight += weight; |
107 | |
108 | if (from == to) |
109 | continue; |
110 | |
111 | // Remember the best edge. |
112 | Cluster &toC = clusters[to]; |
113 | if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { |
114 | toC.bestPred.from = from; |
115 | toC.bestPred.weight = weight; |
116 | } |
117 | } |
118 | for (Cluster &c : clusters) |
119 | c.initialWeight = c.weight; |
120 | } |
121 | |
122 | // It's bad to merge clusters which would degrade the density too much. |
123 | static bool isNewDensityBad(Cluster &a, Cluster &b) { |
124 | double newDensity = double(a.weight + b.weight) / double(a.size + b.size); |
125 | return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; |
126 | } |
127 | |
128 | // Find the leader of V's belonged cluster (represented as an equivalence |
129 | // class). We apply union-find path-halving technique (simple to implement) in |
130 | // the meantime as it decreases depths and the time complexity. |
131 | static int getLeader(std::vector<int> &leaders, int v) { |
132 | while (leaders[v] != v) { |
133 | leaders[v] = leaders[leaders[v]]; |
134 | v = leaders[v]; |
135 | } |
136 | return v; |
137 | } |
138 | |
139 | static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, |
140 | Cluster &from, int fromIdx) { |
141 | int tail1 = into.prev, tail2 = from.prev; |
142 | into.prev = tail2; |
143 | cs[tail2].next = intoIdx; |
144 | from.prev = tail1; |
145 | cs[tail1].next = fromIdx; |
146 | into.size += from.size; |
147 | into.weight += from.weight; |
148 | from.size = 0; |
149 | from.weight = 0; |
150 | } |
151 | |
152 | // Group InputSections into clusters using the Call-Chain Clustering heuristic |
153 | // then sort the clusters by density. |
154 | DenseMap<const SectionChunk *, int> CallGraphSort::run() { |
155 | std::vector<int> sorted(clusters.size()); |
156 | std::vector<int> leaders(clusters.size()); |
157 | |
158 | std::iota(first: leaders.begin(), last: leaders.end(), value: 0); |
159 | std::iota(first: sorted.begin(), last: sorted.end(), value: 0); |
160 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
161 | return clusters[a].getDensity() > clusters[b].getDensity(); |
162 | }); |
163 | |
164 | for (int l : sorted) { |
165 | // The cluster index is the same as the index of its leader here because |
166 | // clusters[L] has not been merged into another cluster yet. |
167 | Cluster &c = clusters[l]; |
168 | |
169 | // Don't consider merging if the edge is unlikely. |
170 | if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) |
171 | continue; |
172 | |
173 | int predL = getLeader(leaders, v: c.bestPred.from); |
174 | if (l == predL) |
175 | continue; |
176 | |
177 | Cluster *predC = &clusters[predL]; |
178 | if (c.size + predC->size > MAX_CLUSTER_SIZE) |
179 | continue; |
180 | |
181 | if (isNewDensityBad(a&: *predC, b&: c)) |
182 | continue; |
183 | |
184 | leaders[l] = predL; |
185 | mergeClusters(cs&: clusters, into&: *predC, intoIdx: predL, from&: c, fromIdx: l); |
186 | } |
187 | |
188 | // Sort remaining non-empty clusters by density. |
189 | sorted.clear(); |
190 | for (int i = 0, e = (int)clusters.size(); i != e; ++i) |
191 | if (clusters[i].size > 0) |
192 | sorted.push_back(x: i); |
193 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
194 | return clusters[a].getDensity() > clusters[b].getDensity(); |
195 | }); |
196 | |
197 | DenseMap<const SectionChunk *, int> orderMap; |
198 | // Sections will be sorted by increasing order. Absent sections will have |
199 | // priority 0 and be placed at the end of sections. |
200 | int curOrder = INT_MIN; |
201 | for (int leader : sorted) { |
202 | for (int i = leader;;) { |
203 | orderMap[sections[i]] = curOrder++; |
204 | i = clusters[i].next; |
205 | if (i == leader) |
206 | break; |
207 | } |
208 | } |
209 | if (!ctx.config.printSymbolOrder.empty()) { |
210 | std::error_code ec; |
211 | raw_fd_ostream os(ctx.config.printSymbolOrder, ec, sys::fs::OF_None); |
212 | if (ec) { |
213 | Err(ctx) << "cannot open " << ctx.config.printSymbolOrder << ": " |
214 | << ec.message(); |
215 | return orderMap; |
216 | } |
217 | // Print the symbols ordered by C3, in the order of increasing curOrder |
218 | // Instead of sorting all the orderMap, just repeat the loops above. |
219 | for (int leader : sorted) |
220 | for (int i = leader;;) { |
221 | const SectionChunk *sc = sections[i]; |
222 | |
223 | // Search all the symbols in the file of the section |
224 | // and find out a DefinedCOFF symbol with name that is within the |
225 | // section. |
226 | for (Symbol *sym : sc->file->getSymbols()) |
227 | if (auto *d = dyn_cast_or_null<DefinedCOFF>(Val: sym)) |
228 | // Filter out non-COMDAT symbols and section symbols. |
229 | if (d->isCOMDAT && !d->getCOFFSymbol().isSection() && |
230 | sc == d->getChunk()) |
231 | os << sym->getName() << "\n" ; |
232 | i = clusters[i].next; |
233 | if (i == leader) |
234 | break; |
235 | } |
236 | } |
237 | |
238 | return orderMap; |
239 | } |
240 | |
241 | // Sort sections by the profile data provided by /call-graph-ordering-file |
242 | // |
243 | // This first builds a call graph based on the profile data then merges sections |
244 | // according to the C³ heuristic. All clusters are then sorted by a density |
245 | // metric to further improve locality. |
246 | DenseMap<const SectionChunk *, int> |
247 | coff::computeCallGraphProfileOrder(COFFLinkerContext &ctx) { |
248 | return CallGraphSort(ctx).run(); |
249 | } |
250 | |