1 | //===- SectionPriorities.cpp ----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details |
10 | /// about the algorithm. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "SectionPriorities.h" |
15 | #include "Config.h" |
16 | #include "InputFiles.h" |
17 | #include "Symbols.h" |
18 | #include "Target.h" |
19 | |
20 | #include "lld/Common/Args.h" |
21 | #include "lld/Common/CommonLinkerContext.h" |
22 | #include "lld/Common/ErrorHandler.h" |
23 | #include "llvm/ADT/DenseMap.h" |
24 | #include "llvm/ADT/MapVector.h" |
25 | #include "llvm/Support/Path.h" |
26 | #include "llvm/Support/TimeProfiler.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | |
29 | #include <numeric> |
30 | |
31 | using namespace llvm; |
32 | using namespace llvm::MachO; |
33 | using namespace llvm::sys; |
34 | using namespace lld; |
35 | using namespace lld::macho; |
36 | |
37 | PriorityBuilder macho::priorityBuilder; |
38 | |
39 | namespace { |
40 | |
41 | size_t highestAvailablePriority = std::numeric_limits<size_t>::max(); |
42 | |
43 | struct Edge { |
44 | int from; |
45 | uint64_t weight; |
46 | }; |
47 | |
48 | struct Cluster { |
49 | Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} |
50 | |
51 | double getDensity() const { |
52 | if (size == 0) |
53 | return 0; |
54 | return double(weight) / double(size); |
55 | } |
56 | |
57 | int next; |
58 | int prev; |
59 | uint64_t size; |
60 | uint64_t weight = 0; |
61 | uint64_t initialWeight = 0; |
62 | Edge bestPred = {.from: -1, .weight: 0}; |
63 | }; |
64 | |
65 | class CallGraphSort { |
66 | public: |
67 | CallGraphSort(const MapVector<SectionPair, uint64_t> &profile); |
68 | |
69 | DenseMap<const InputSection *, size_t> run(); |
70 | |
71 | private: |
72 | std::vector<Cluster> clusters; |
73 | std::vector<const InputSection *> sections; |
74 | }; |
75 | // Maximum amount the combined cluster density can be worse than the original |
76 | // cluster to consider merging. |
77 | constexpr int MAX_DENSITY_DEGRADATION = 8; |
78 | } // end anonymous namespace |
79 | |
80 | // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and |
81 | // generate a graph between InputSections with the provided weights. |
82 | CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) { |
83 | DenseMap<const InputSection *, int> secToCluster; |
84 | |
85 | auto getOrCreateCluster = [&](const InputSection *isec) -> int { |
86 | auto res = secToCluster.try_emplace(Key: isec, Args: clusters.size()); |
87 | if (res.second) { |
88 | sections.push_back(x: isec); |
89 | clusters.emplace_back(args: clusters.size(), args: isec->getSize()); |
90 | } |
91 | return res.first->second; |
92 | }; |
93 | |
94 | // Create the graph |
95 | for (const std::pair<SectionPair, uint64_t> &c : profile) { |
96 | const auto fromSec = c.first.first->canonical(); |
97 | const auto toSec = c.first.second->canonical(); |
98 | uint64_t weight = c.second; |
99 | // Ignore edges between input sections belonging to different output |
100 | // sections. This is done because otherwise we would end up with clusters |
101 | // containing input sections that can't actually be placed adjacently in the |
102 | // output. This messes with the cluster size and density calculations. We |
103 | // would also end up moving input sections in other output sections without |
104 | // moving them closer to what calls them. |
105 | if (fromSec->parent != toSec->parent) |
106 | continue; |
107 | |
108 | int from = getOrCreateCluster(fromSec); |
109 | int to = getOrCreateCluster(toSec); |
110 | |
111 | clusters[to].weight += weight; |
112 | |
113 | if (from == to) |
114 | continue; |
115 | |
116 | // Remember the best edge. |
117 | Cluster &toC = clusters[to]; |
118 | if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { |
119 | toC.bestPred.from = from; |
120 | toC.bestPred.weight = weight; |
121 | } |
122 | } |
123 | for (Cluster &c : clusters) |
124 | c.initialWeight = c.weight; |
125 | } |
126 | |
127 | // It's bad to merge clusters which would degrade the density too much. |
128 | static bool isNewDensityBad(Cluster &a, Cluster &b) { |
129 | double newDensity = double(a.weight + b.weight) / double(a.size + b.size); |
130 | return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; |
131 | } |
132 | |
133 | // Find the leader of V's belonged cluster (represented as an equivalence |
134 | // class). We apply union-find path-halving technique (simple to implement) in |
135 | // the meantime as it decreases depths and the time complexity. |
136 | static int getLeader(std::vector<int> &leaders, int v) { |
137 | while (leaders[v] != v) { |
138 | leaders[v] = leaders[leaders[v]]; |
139 | v = leaders[v]; |
140 | } |
141 | return v; |
142 | } |
143 | |
144 | static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, |
145 | Cluster &from, int fromIdx) { |
146 | int tail1 = into.prev, tail2 = from.prev; |
147 | into.prev = tail2; |
148 | cs[tail2].next = intoIdx; |
149 | from.prev = tail1; |
150 | cs[tail1].next = fromIdx; |
151 | into.size += from.size; |
152 | into.weight += from.weight; |
153 | from.size = 0; |
154 | from.weight = 0; |
155 | } |
156 | |
157 | // Group InputSections into clusters using the Call-Chain Clustering heuristic |
158 | // then sort the clusters by density. |
159 | DenseMap<const InputSection *, size_t> CallGraphSort::run() { |
160 | const uint64_t maxClusterSize = target->getPageSize(); |
161 | |
162 | // Cluster indices sorted by density. |
163 | std::vector<int> sorted(clusters.size()); |
164 | // For union-find. |
165 | std::vector<int> leaders(clusters.size()); |
166 | |
167 | std::iota(first: leaders.begin(), last: leaders.end(), value: 0); |
168 | std::iota(first: sorted.begin(), last: sorted.end(), value: 0); |
169 | |
170 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
171 | return clusters[a].getDensity() > clusters[b].getDensity(); |
172 | }); |
173 | |
174 | for (int l : sorted) { |
175 | // The cluster index is the same as the index of its leader here because |
176 | // clusters[L] has not been merged into another cluster yet. |
177 | Cluster &c = clusters[l]; |
178 | |
179 | // Don't consider merging if the edge is unlikely. |
180 | if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) |
181 | continue; |
182 | |
183 | int predL = getLeader(leaders, v: c.bestPred.from); |
184 | // Already in the same cluster. |
185 | if (l == predL) |
186 | continue; |
187 | |
188 | Cluster *predC = &clusters[predL]; |
189 | if (c.size + predC->size > maxClusterSize) |
190 | continue; |
191 | |
192 | if (isNewDensityBad(a&: *predC, b&: c)) |
193 | continue; |
194 | |
195 | leaders[l] = predL; |
196 | mergeClusters(cs&: clusters, into&: *predC, intoIdx: predL, from&: c, fromIdx: l); |
197 | } |
198 | // Sort remaining non-empty clusters by density. |
199 | sorted.clear(); |
200 | for (int i = 0, e = (int)clusters.size(); i != e; ++i) |
201 | if (clusters[i].size > 0) |
202 | sorted.push_back(x: i); |
203 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
204 | return clusters[a].getDensity() > clusters[b].getDensity(); |
205 | }); |
206 | |
207 | DenseMap<const InputSection *, size_t> orderMap; |
208 | |
209 | // Sections will be sorted by decreasing order. Absent sections will have |
210 | // priority 0 and be placed at the end of sections. |
211 | // NB: This is opposite from COFF/ELF to be compatible with the existing |
212 | // order-file code. |
213 | int curOrder = highestAvailablePriority; |
214 | for (int leader : sorted) { |
215 | for (int i = leader;;) { |
216 | orderMap[sections[i]] = curOrder--; |
217 | i = clusters[i].next; |
218 | if (i == leader) |
219 | break; |
220 | } |
221 | } |
222 | if (!config->printSymbolOrder.empty()) { |
223 | std::error_code ec; |
224 | raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); |
225 | if (ec) { |
226 | error(msg: "cannot open " + config->printSymbolOrder + ": " + ec.message()); |
227 | return orderMap; |
228 | } |
229 | // Print the symbols ordered by C3, in the order of decreasing curOrder |
230 | // Instead of sorting all the orderMap, just repeat the loops above. |
231 | for (int leader : sorted) |
232 | for (int i = leader;;) { |
233 | const InputSection *isec = sections[i]; |
234 | // Search all the symbols in the file of the section |
235 | // and find out a Defined symbol with name that is within the |
236 | // section. |
237 | for (Symbol *sym : isec->getFile()->symbols) { |
238 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) { |
239 | if (d->isec() == isec) |
240 | os << sym->getName() << "\n" ; |
241 | } |
242 | } |
243 | i = clusters[i].next; |
244 | if (i == leader) |
245 | break; |
246 | } |
247 | } |
248 | |
249 | return orderMap; |
250 | } |
251 | |
252 | std::optional<size_t> |
253 | macho::PriorityBuilder::getSymbolPriority(const Defined *sym) { |
254 | if (sym->isAbsolute()) |
255 | return std::nullopt; |
256 | |
257 | auto it = priorities.find(Val: sym->getName()); |
258 | if (it == priorities.end()) |
259 | return std::nullopt; |
260 | const SymbolPriorityEntry &entry = it->second; |
261 | const InputFile *f = sym->isec()->getFile(); |
262 | if (!f) |
263 | return entry.anyObjectFile; |
264 | // We don't use toString(InputFile *) here because it returns the full path |
265 | // for object files, and we only want the basename. |
266 | StringRef filename; |
267 | if (f->archiveName.empty()) |
268 | filename = path::filename(path: f->getName()); |
269 | else |
270 | filename = saver().save(S: path::filename(path: f->archiveName) + "(" + |
271 | path::filename(path: f->getName()) + ")" ); |
272 | return std::max(a: entry.objectFiles.lookup(Val: filename), b: entry.anyObjectFile); |
273 | } |
274 | |
275 | void macho::PriorityBuilder::() { |
276 | TimeTraceScope timeScope("Extract call graph profile" ); |
277 | bool hasOrderFile = !priorities.empty(); |
278 | for (const InputFile *file : inputFiles) { |
279 | auto *obj = dyn_cast_or_null<ObjFile>(Val: file); |
280 | if (!obj) |
281 | continue; |
282 | for (const CallGraphEntry &entry : obj->callGraph) { |
283 | assert(entry.fromIndex < obj->symbols.size() && |
284 | entry.toIndex < obj->symbols.size()); |
285 | auto *fromSym = dyn_cast_or_null<Defined>(Val: obj->symbols[entry.fromIndex]); |
286 | auto *toSym = dyn_cast_or_null<Defined>(Val: obj->symbols[entry.toIndex]); |
287 | if (fromSym && toSym && |
288 | (!hasOrderFile || |
289 | (!getSymbolPriority(sym: fromSym) && !getSymbolPriority(sym: toSym)))) |
290 | callGraphProfile[{fromSym->isec(), toSym->isec()}] += entry.count; |
291 | } |
292 | } |
293 | } |
294 | |
295 | void macho::PriorityBuilder::parseOrderFile(StringRef path) { |
296 | assert(callGraphProfile.empty() && |
297 | "Order file must be parsed before call graph profile is processed" ); |
298 | std::optional<MemoryBufferRef> buffer = readFile(path); |
299 | if (!buffer) { |
300 | error(msg: "Could not read order file at " + path); |
301 | return; |
302 | } |
303 | |
304 | MemoryBufferRef mbref = *buffer; |
305 | for (StringRef line : args::getLines(mb: mbref)) { |
306 | StringRef objectFile, symbol; |
307 | line = line.take_until(F: [](char c) { return c == '#'; }); // ignore comments |
308 | line = line.ltrim(); |
309 | |
310 | CPUType cpuType = StringSwitch<CPUType>(line) |
311 | .StartsWith(S: "i386:" , Value: CPU_TYPE_I386) |
312 | .StartsWith(S: "x86_64:" , Value: CPU_TYPE_X86_64) |
313 | .StartsWith(S: "arm:" , Value: CPU_TYPE_ARM) |
314 | .StartsWith(S: "arm64:" , Value: CPU_TYPE_ARM64) |
315 | .StartsWith(S: "ppc:" , Value: CPU_TYPE_POWERPC) |
316 | .StartsWith(S: "ppc64:" , Value: CPU_TYPE_POWERPC64) |
317 | .Default(Value: CPU_TYPE_ANY); |
318 | |
319 | if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType) |
320 | continue; |
321 | |
322 | // Drop the CPU type as well as the colon |
323 | if (cpuType != CPU_TYPE_ANY) |
324 | line = line.drop_until(F: [](char c) { return c == ':'; }).drop_front(); |
325 | |
326 | constexpr std::array<StringRef, 2> fileEnds = {".o:" , ".o):" }; |
327 | for (StringRef fileEnd : fileEnds) { |
328 | size_t pos = line.find(Str: fileEnd); |
329 | if (pos != StringRef::npos) { |
330 | // Split the string around the colon |
331 | objectFile = line.take_front(N: pos + fileEnd.size() - 1); |
332 | line = line.drop_front(N: pos + fileEnd.size()); |
333 | break; |
334 | } |
335 | } |
336 | symbol = line.trim(); |
337 | |
338 | if (!symbol.empty()) { |
339 | SymbolPriorityEntry &entry = priorities[symbol]; |
340 | if (!objectFile.empty()) |
341 | entry.objectFiles.insert( |
342 | KV: std::make_pair(x&: objectFile, y&: highestAvailablePriority)); |
343 | else |
344 | entry.anyObjectFile = |
345 | std::max(a: entry.anyObjectFile, b: highestAvailablePriority); |
346 | } |
347 | |
348 | --highestAvailablePriority; |
349 | } |
350 | } |
351 | |
352 | DenseMap<const InputSection *, size_t> |
353 | macho::PriorityBuilder::buildInputSectionPriorities() { |
354 | DenseMap<const InputSection *, size_t> sectionPriorities; |
355 | if (config->callGraphProfileSort) { |
356 | // Sort sections by the profile data provided by __LLVM,__cg_profile |
357 | // sections. |
358 | // |
359 | // This first builds a call graph based on the profile data then merges |
360 | // sections according to the C³ heuristic. All clusters are then sorted by a |
361 | // density metric to further improve locality. |
362 | TimeTraceScope timeScope("Call graph profile sort" ); |
363 | sectionPriorities = CallGraphSort(callGraphProfile).run(); |
364 | } |
365 | |
366 | if (priorities.empty()) |
367 | return sectionPriorities; |
368 | |
369 | auto addSym = [&](const Defined *sym) { |
370 | std::optional<size_t> symbolPriority = getSymbolPriority(sym); |
371 | if (!symbolPriority) |
372 | return; |
373 | size_t &priority = sectionPriorities[sym->isec()]; |
374 | priority = std::max(a: priority, b: *symbolPriority); |
375 | }; |
376 | |
377 | // TODO: Make sure this handles weak symbols correctly. |
378 | for (const InputFile *file : inputFiles) { |
379 | if (isa<ObjFile>(Val: file)) |
380 | for (Symbol *sym : file->symbols) |
381 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) |
382 | addSym(d); |
383 | } |
384 | |
385 | return sectionPriorities; |
386 | } |
387 | |