1 | //===- SectionPriorities.cpp ----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details |
10 | /// about the algorithm. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "SectionPriorities.h" |
15 | #include "BPSectionOrderer.h" |
16 | #include "Config.h" |
17 | #include "InputFiles.h" |
18 | #include "Symbols.h" |
19 | #include "Target.h" |
20 | |
21 | #include "lld/Common/Args.h" |
22 | #include "lld/Common/CommonLinkerContext.h" |
23 | #include "lld/Common/ErrorHandler.h" |
24 | #include "lld/Common/Utils.h" |
25 | #include "llvm/ADT/DenseMap.h" |
26 | #include "llvm/ADT/MapVector.h" |
27 | #include "llvm/Support/Path.h" |
28 | #include "llvm/Support/TimeProfiler.h" |
29 | #include "llvm/Support/raw_ostream.h" |
30 | |
31 | #include <numeric> |
32 | |
33 | using namespace llvm; |
34 | using namespace llvm::MachO; |
35 | using namespace llvm::sys; |
36 | using namespace lld; |
37 | using namespace lld::macho; |
38 | |
39 | PriorityBuilder macho::priorityBuilder; |
40 | |
41 | namespace { |
42 | struct Edge { |
43 | int from; |
44 | uint64_t weight; |
45 | }; |
46 | |
47 | struct Cluster { |
48 | Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} |
49 | |
50 | double getDensity() const { |
51 | if (size == 0) |
52 | return 0; |
53 | return double(weight) / double(size); |
54 | } |
55 | |
56 | int next; |
57 | int prev; |
58 | uint64_t size; |
59 | uint64_t weight = 0; |
60 | uint64_t initialWeight = 0; |
61 | Edge bestPred = {.from: -1, .weight: 0}; |
62 | }; |
63 | |
64 | class CallGraphSort { |
65 | public: |
66 | CallGraphSort(const MapVector<SectionPair, uint64_t> &profile); |
67 | |
68 | DenseMap<const InputSection *, int> run(); |
69 | |
70 | private: |
71 | std::vector<Cluster> clusters; |
72 | std::vector<const InputSection *> sections; |
73 | }; |
74 | // Maximum amount the combined cluster density can be worse than the original |
75 | // cluster to consider merging. |
76 | constexpr int MAX_DENSITY_DEGRADATION = 8; |
77 | } // end anonymous namespace |
78 | |
79 | // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and |
80 | // generate a graph between InputSections with the provided weights. |
81 | CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) { |
82 | DenseMap<const InputSection *, int> secToCluster; |
83 | |
84 | auto getOrCreateCluster = [&](const InputSection *isec) -> int { |
85 | auto res = secToCluster.try_emplace(Key: isec, Args: clusters.size()); |
86 | if (res.second) { |
87 | sections.push_back(x: isec); |
88 | clusters.emplace_back(args: clusters.size(), args: isec->getSize()); |
89 | } |
90 | return res.first->second; |
91 | }; |
92 | |
93 | // Create the graph |
94 | for (const std::pair<SectionPair, uint64_t> &c : profile) { |
95 | const auto fromSec = c.first.first->canonical(); |
96 | const auto toSec = c.first.second->canonical(); |
97 | uint64_t weight = c.second; |
98 | // Ignore edges between input sections belonging to different output |
99 | // sections. This is done because otherwise we would end up with clusters |
100 | // containing input sections that can't actually be placed adjacently in the |
101 | // output. This messes with the cluster size and density calculations. We |
102 | // would also end up moving input sections in other output sections without |
103 | // moving them closer to what calls them. |
104 | if (fromSec->parent != toSec->parent) |
105 | continue; |
106 | |
107 | int from = getOrCreateCluster(fromSec); |
108 | int to = getOrCreateCluster(toSec); |
109 | |
110 | clusters[to].weight += weight; |
111 | |
112 | if (from == to) |
113 | continue; |
114 | |
115 | // Remember the best edge. |
116 | Cluster &toC = clusters[to]; |
117 | if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { |
118 | toC.bestPred.from = from; |
119 | toC.bestPred.weight = weight; |
120 | } |
121 | } |
122 | for (Cluster &c : clusters) |
123 | c.initialWeight = c.weight; |
124 | } |
125 | |
126 | // It's bad to merge clusters which would degrade the density too much. |
127 | static bool isNewDensityBad(Cluster &a, Cluster &b) { |
128 | double newDensity = double(a.weight + b.weight) / double(a.size + b.size); |
129 | return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; |
130 | } |
131 | |
132 | // Find the leader of V's belonged cluster (represented as an equivalence |
133 | // class). We apply union-find path-halving technique (simple to implement) in |
134 | // the meantime as it decreases depths and the time complexity. |
135 | static int getLeader(std::vector<int> &leaders, int v) { |
136 | while (leaders[v] != v) { |
137 | leaders[v] = leaders[leaders[v]]; |
138 | v = leaders[v]; |
139 | } |
140 | return v; |
141 | } |
142 | |
143 | static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, |
144 | Cluster &from, int fromIdx) { |
145 | int tail1 = into.prev, tail2 = from.prev; |
146 | into.prev = tail2; |
147 | cs[tail2].next = intoIdx; |
148 | from.prev = tail1; |
149 | cs[tail1].next = fromIdx; |
150 | into.size += from.size; |
151 | into.weight += from.weight; |
152 | from.size = 0; |
153 | from.weight = 0; |
154 | } |
155 | |
156 | // Group InputSections into clusters using the Call-Chain Clustering heuristic |
157 | // then sort the clusters by density. |
158 | DenseMap<const InputSection *, int> CallGraphSort::run() { |
159 | const uint64_t maxClusterSize = target->getPageSize(); |
160 | |
161 | // Cluster indices sorted by density. |
162 | std::vector<int> sorted(clusters.size()); |
163 | // For union-find. |
164 | std::vector<int> leaders(clusters.size()); |
165 | |
166 | std::iota(first: leaders.begin(), last: leaders.end(), value: 0); |
167 | std::iota(first: sorted.begin(), last: sorted.end(), value: 0); |
168 | |
169 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
170 | return clusters[a].getDensity() > clusters[b].getDensity(); |
171 | }); |
172 | |
173 | for (int l : sorted) { |
174 | // The cluster index is the same as the index of its leader here because |
175 | // clusters[L] has not been merged into another cluster yet. |
176 | Cluster &c = clusters[l]; |
177 | |
178 | // Don't consider merging if the edge is unlikely. |
179 | if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) |
180 | continue; |
181 | |
182 | int predL = getLeader(leaders, v: c.bestPred.from); |
183 | // Already in the same cluster. |
184 | if (l == predL) |
185 | continue; |
186 | |
187 | Cluster *predC = &clusters[predL]; |
188 | if (c.size + predC->size > maxClusterSize) |
189 | continue; |
190 | |
191 | if (isNewDensityBad(a&: *predC, b&: c)) |
192 | continue; |
193 | |
194 | leaders[l] = predL; |
195 | mergeClusters(cs&: clusters, into&: *predC, intoIdx: predL, from&: c, fromIdx: l); |
196 | } |
197 | // Sort remaining non-empty clusters by density. |
198 | sorted.clear(); |
199 | for (int i = 0, e = (int)clusters.size(); i != e; ++i) |
200 | if (clusters[i].size > 0) |
201 | sorted.push_back(x: i); |
202 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
203 | return clusters[a].getDensity() > clusters[b].getDensity(); |
204 | }); |
205 | |
206 | DenseMap<const InputSection *, int> orderMap; |
207 | |
208 | // Sections will be sorted by decreasing order. Absent sections will have |
209 | // priority 0 and be placed at the end of sections. |
210 | int curOrder = -clusters.size(); |
211 | for (int leader : sorted) { |
212 | for (int i = leader;;) { |
213 | orderMap[sections[i]] = curOrder++; |
214 | i = clusters[i].next; |
215 | if (i == leader) |
216 | break; |
217 | } |
218 | } |
219 | if (!config->printSymbolOrder.empty()) { |
220 | std::error_code ec; |
221 | raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); |
222 | if (ec) { |
223 | error(msg: "cannot open " + config->printSymbolOrder + ": " + ec.message()); |
224 | return orderMap; |
225 | } |
226 | // Print the symbols ordered by C3, in the order of decreasing curOrder |
227 | // Instead of sorting all the orderMap, just repeat the loops above. |
228 | for (int leader : sorted) |
229 | for (int i = leader;;) { |
230 | const InputSection *isec = sections[i]; |
231 | // Search all the symbols in the file of the section |
232 | // and find out a Defined symbol with name that is within the |
233 | // section. |
234 | for (Symbol *sym : isec->getFile()->symbols) { |
235 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) { |
236 | if (d->isec() == isec) |
237 | os << sym->getName() << "\n" ; |
238 | } |
239 | } |
240 | i = clusters[i].next; |
241 | if (i == leader) |
242 | break; |
243 | } |
244 | } |
245 | |
246 | return orderMap; |
247 | } |
248 | |
249 | std::optional<int> |
250 | macho::PriorityBuilder::getSymbolOrCStringPriority(const StringRef key, |
251 | InputFile *f) { |
252 | |
253 | auto it = priorities.find(Val: key); |
254 | if (it == priorities.end()) |
255 | return std::nullopt; |
256 | const SymbolPriorityEntry &entry = it->second; |
257 | if (!f) |
258 | return entry.anyObjectFile; |
259 | // We don't use toString(InputFile *) here because it returns the full path |
260 | // for object files, and we only want the basename. |
261 | StringRef filename; |
262 | if (f->archiveName.empty()) |
263 | filename = path::filename(path: f->getName()); |
264 | else |
265 | filename = saver().save(S: path::filename(path: f->archiveName) + "(" + |
266 | path::filename(path: f->getName()) + ")" ); |
267 | return std::min(a: entry.objectFiles.lookup(Val: filename), b: entry.anyObjectFile); |
268 | } |
269 | |
270 | std::optional<int> |
271 | macho::PriorityBuilder::getSymbolPriority(const Defined *sym) { |
272 | if (sym->isAbsolute()) |
273 | return std::nullopt; |
274 | return getSymbolOrCStringPriority(key: utils::getRootSymbol(Name: sym->getName()), |
275 | f: sym->isec()->getFile()); |
276 | } |
277 | |
278 | void macho::PriorityBuilder::() { |
279 | TimeTraceScope timeScope("Extract call graph profile" ); |
280 | bool hasOrderFile = !priorities.empty(); |
281 | for (const InputFile *file : inputFiles) { |
282 | auto *obj = dyn_cast_or_null<ObjFile>(Val: file); |
283 | if (!obj) |
284 | continue; |
285 | for (const CallGraphEntry &entry : obj->callGraph) { |
286 | assert(entry.fromIndex < obj->symbols.size() && |
287 | entry.toIndex < obj->symbols.size()); |
288 | auto *fromSym = dyn_cast_or_null<Defined>(Val: obj->symbols[entry.fromIndex]); |
289 | auto *toSym = dyn_cast_or_null<Defined>(Val: obj->symbols[entry.toIndex]); |
290 | if (fromSym && toSym && |
291 | (!hasOrderFile || |
292 | (!getSymbolPriority(sym: fromSym) && !getSymbolPriority(sym: toSym)))) |
293 | callGraphProfile[{fromSym->isec(), toSym->isec()}] += entry.count; |
294 | } |
295 | } |
296 | } |
297 | |
298 | void macho::PriorityBuilder::parseOrderFile(StringRef path) { |
299 | assert(callGraphProfile.empty() && |
300 | "Order file must be parsed before call graph profile is processed" ); |
301 | std::optional<MemoryBufferRef> buffer = readFile(path); |
302 | if (!buffer) { |
303 | error(msg: "Could not read order file at " + path); |
304 | return; |
305 | } |
306 | |
307 | int prio = std::numeric_limits<int>::min(); |
308 | MemoryBufferRef mbref = *buffer; |
309 | for (StringRef line : args::getLines(mb: mbref)) { |
310 | StringRef objectFile, symbolOrCStrHash; |
311 | line = line.take_until(F: [](char c) { return c == '#'; }); // ignore comments |
312 | line = line.ltrim(); |
313 | |
314 | CPUType cpuType = StringSwitch<CPUType>(line) |
315 | .StartsWith(S: "i386:" , Value: CPU_TYPE_I386) |
316 | .StartsWith(S: "x86_64:" , Value: CPU_TYPE_X86_64) |
317 | .StartsWith(S: "arm:" , Value: CPU_TYPE_ARM) |
318 | .StartsWith(S: "arm64:" , Value: CPU_TYPE_ARM64) |
319 | .StartsWith(S: "ppc:" , Value: CPU_TYPE_POWERPC) |
320 | .StartsWith(S: "ppc64:" , Value: CPU_TYPE_POWERPC64) |
321 | .Default(Value: CPU_TYPE_ANY); |
322 | |
323 | if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType) |
324 | continue; |
325 | // Drop the CPU type as well as the colon |
326 | if (cpuType != CPU_TYPE_ANY) |
327 | line = line.drop_until(F: [](char c) { return c == ':'; }).drop_front(); |
328 | |
329 | constexpr std::array<StringRef, 2> fileEnds = {".o:" , ".o):" }; |
330 | for (StringRef fileEnd : fileEnds) { |
331 | size_t pos = line.find(Str: fileEnd); |
332 | if (pos != StringRef::npos) { |
333 | // Split the string around the colon |
334 | objectFile = line.take_front(N: pos + fileEnd.size() - 1); |
335 | line = line.drop_front(N: pos + fileEnd.size()); |
336 | break; |
337 | } |
338 | } |
339 | |
340 | // The rest of the line is either <symbol name> or |
341 | // CStringEntryPrefix<cstring hash> |
342 | line = line.trim(); |
343 | if (line.starts_with(Prefix: CStringEntryPrefix)) { |
344 | StringRef possibleHash = line.drop_front(N: CStringEntryPrefix.size()); |
345 | uint32_t hash = 0; |
346 | if (to_integer(S: possibleHash, Num&: hash)) |
347 | symbolOrCStrHash = possibleHash; |
348 | } else |
349 | symbolOrCStrHash = utils::getRootSymbol(Name: line); |
350 | |
351 | if (!symbolOrCStrHash.empty()) { |
352 | SymbolPriorityEntry &entry = priorities[symbolOrCStrHash]; |
353 | if (!objectFile.empty()) |
354 | entry.objectFiles.insert(KV: std::make_pair(x&: objectFile, y&: prio)); |
355 | else |
356 | entry.anyObjectFile = std::min(a: entry.anyObjectFile, b: prio); |
357 | } |
358 | |
359 | ++prio; |
360 | } |
361 | } |
362 | |
363 | DenseMap<const InputSection *, int> |
364 | macho::PriorityBuilder::buildInputSectionPriorities() { |
365 | DenseMap<const InputSection *, int> sectionPriorities; |
366 | if (config->bpStartupFunctionSort || config->bpFunctionOrderForCompression || |
367 | config->bpDataOrderForCompression) { |
368 | TimeTraceScope timeScope("Balanced Partitioning Section Orderer" ); |
369 | sectionPriorities = runBalancedPartitioning( |
370 | profilePath: config->bpStartupFunctionSort ? config->irpgoProfilePath : "" , |
371 | forFunctionCompression: config->bpFunctionOrderForCompression, |
372 | forDataCompression: config->bpDataOrderForCompression, |
373 | compressionSortStartupFunctions: config->bpCompressionSortStartupFunctions, |
374 | verbose: config->bpVerboseSectionOrderer); |
375 | } else if (config->callGraphProfileSort) { |
376 | // Sort sections by the profile data provided by __LLVM,__cg_profile |
377 | // sections. |
378 | // |
379 | // This first builds a call graph based on the profile data then merges |
380 | // sections according to the C³ heuristic. All clusters are then sorted by a |
381 | // density metric to further improve locality. |
382 | TimeTraceScope timeScope("Call graph profile sort" ); |
383 | sectionPriorities = CallGraphSort(callGraphProfile).run(); |
384 | } |
385 | |
386 | if (priorities.empty()) |
387 | return sectionPriorities; |
388 | |
389 | auto addSym = [&](const Defined *sym) { |
390 | std::optional<int> symbolPriority = getSymbolPriority(sym); |
391 | if (!symbolPriority) |
392 | return; |
393 | int &priority = sectionPriorities[sym->isec()]; |
394 | priority = std::min(a: priority, b: *symbolPriority); |
395 | }; |
396 | |
397 | // TODO: Make sure this handles weak symbols correctly. |
398 | for (const InputFile *file : inputFiles) { |
399 | if (isa<ObjFile>(Val: file)) |
400 | for (Symbol *sym : file->symbols) |
401 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) |
402 | addSym(d); |
403 | } |
404 | |
405 | return sectionPriorities; |
406 | } |
407 | |
408 | std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities( |
409 | ArrayRef<CStringInputSection *> inputs) { |
410 | // Split the input strings into hold and cold sets. |
411 | // Order hot set based on -order_file_cstring for performance improvement; |
412 | // TODO: Order cold set of cstrings for compression via BP. |
413 | std::vector<std::pair<int, StringPiecePair>> |
414 | hotStringPrioritiesAndStringPieces; |
415 | std::vector<StringPiecePair> coldStringPieces; |
416 | std::vector<StringPiecePair> orderedStringPieces; |
417 | |
418 | for (CStringInputSection *isec : inputs) { |
419 | for (const auto &[stringPieceIdx, piece] : llvm::enumerate(First&: isec->pieces)) { |
420 | if (!piece.live) |
421 | continue; |
422 | |
423 | std::optional<int> priority = getSymbolOrCStringPriority( |
424 | key: std::to_string(val: piece.hash), f: isec->getFile()); |
425 | if (!priority) |
426 | coldStringPieces.emplace_back(args&: isec, args&: stringPieceIdx); |
427 | else |
428 | hotStringPrioritiesAndStringPieces.emplace_back( |
429 | args&: *priority, args: std::make_pair(x&: isec, y&: stringPieceIdx)); |
430 | } |
431 | } |
432 | |
433 | // Order hot set for perf |
434 | llvm::stable_sort(Range&: hotStringPrioritiesAndStringPieces); |
435 | for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces) |
436 | orderedStringPieces.push_back(x: stringPiecePair); |
437 | |
438 | // TODO: Order cold set for compression |
439 | |
440 | orderedStringPieces.insert(position: orderedStringPieces.end(), |
441 | first: coldStringPieces.begin(), last: coldStringPieces.end()); |
442 | |
443 | return orderedStringPieces; |
444 | } |
445 | |