SectionPriorities.cpp source code [lld/MachO/SectionPriorities.cpp]

1	//===- SectionPriorities.cpp ----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
10	/// about the algorithm.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "SectionPriorities.h"
15	#include "BPSectionOrderer.h"
16	#include "Config.h"
17	#include "InputFiles.h"
18	#include "Symbols.h"
19	#include "Target.h"
20
21	#include "lld/Common/Args.h"
22	#include "lld/Common/CommonLinkerContext.h"
23	#include "lld/Common/ErrorHandler.h"
24	#include "lld/Common/Utils.h"
25	#include "llvm/ADT/DenseMap.h"
26	#include "llvm/ADT/MapVector.h"
27	#include "llvm/Support/Path.h"
28	#include "llvm/Support/TimeProfiler.h"
29	#include "llvm/Support/raw_ostream.h"
30
31	#include <numeric>
32
33	using namespace llvm;
34	using namespace llvm::MachO;
35	using namespace llvm::sys;
36	using namespace lld;
37	using namespace lld::macho;
38
39	PriorityBuilder macho::priorityBuilder;
40
41	namespace {
42	struct Edge {
43	int from;
44	uint64_t weight;
45	};
46
47	struct Cluster {
48	Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
49
50	double getDensity() const {
51	if (size == `0`)
52	return `0`;
53	return double(weight) / double(size);
54	}
55
56	int next;
57	int prev;
58	uint64_t size;
59	uint64_t weight = `0`;
60	uint64_t initialWeight = `0`;
61	Edge bestPred = {.from: -`1`, .weight: `0`};
62	};
63
64	class CallGraphSort {
65	public:
66	CallGraphSort(const MapVector<SectionPair, uint64_t> &profile);
67
68	DenseMap<const InputSection , int*> run();
69
70	private:
71	std::vector<Cluster> clusters;
72	std::vector<const InputSection *> sections;
73	};
74	// Maximum amount the combined cluster density can be worse than the original
75	// cluster to consider merging.
76	constexpr int MAX_DENSITY_DEGRADATION = `8`;
77	} // end anonymous namespace
78
79	// Take the edge list in callGraphProfile, resolve symbol names to Symbols, and
80	// generate a graph between InputSections with the provided weights.
81	CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) {
82	DenseMap<const InputSection , int*> secToCluster;
83
84	auto getOrCreateCluster = [&](const InputSection isec) -> int* {
85	auto res = secToCluster.try_emplace(Key: isec, Args: clusters.size());
86	if (res.second) {
87	sections.push_back(x: isec);
88	clusters.emplace_back(args: clusters.size(), args: isec->getSize());
89	}
90	return res.first ->second;
91	};
92
93	// Create the graph
94	for (const std::pair<SectionPair, uint64_t> &c : profile) {
95	const auto fromSec = c.first.first->canonical();
96	const auto toSec = c.first.second->canonical();
97	uint64_t weight = c.second;
98	// Ignore edges between input sections belonging to different output
99	// sections. This is done because otherwise we would end up with clusters
100	// containing input sections that can't actually be placed adjacently in the
101	// output. This messes with the cluster size and density calculations. We
102	// would also end up moving input sections in other output sections without
103	// moving them closer to what calls them.
104	if (fromSec->parent != toSec->parent)
105	continue;
106
107	int from = getOrCreateCluster (fromSec);
108	int to = getOrCreateCluster (toSec);
109
110	clusters [to].weight += weight;
111
112	if (from == to)
113	continue;
114
115	// Remember the best edge.
116	Cluster &toC = clusters [to];
117	if (toC.bestPred.from == -`1` \|\| toC.bestPred.weight < weight) {
118	toC.bestPred.from = from;
119	toC.bestPred.weight = weight;
120	}
121	}
122	for (Cluster &c : clusters)
123	c.initialWeight = c.weight;
124	}
125
126	// It's bad to merge clusters which would degrade the density too much.
127	static bool isNewDensityBad(Cluster &a, Cluster &b) {
128	double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
129	return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
130	}
131
132	// Find the leader of V's belonged cluster (represented as an equivalence
133	// class). We apply union-find path-halving technique (simple to implement) in
134	// the meantime as it decreases depths and the time complexity.
135	static int getLeader(std::vector<int> &leaders, int v) {
136	while (leaders [v] != v) {
137	leaders [v] = leaders [leaders [v]];
138	v = leaders [v];
139	}
140	return v;
141	}
142
143	static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
144	Cluster &from, int fromIdx) {
145	int tail1 = into.prev, tail2 = from.prev;
146	into.prev = tail2;
147	cs [tail2].next = intoIdx;
148	from.prev = tail1;
149	cs [tail1].next = fromIdx;
150	into.size += from.size;
151	into.weight += from.weight;
152	from.size = `0`;
153	from.weight = `0`;
154	}
155
156	// Group InputSections into clusters using the Call-Chain Clustering heuristic
157	// then sort the clusters by density.
158	DenseMap<const InputSection , int*> CallGraphSort::run() {
159	const uint64_t maxClusterSize = target->getPageSize();
160
161	// Cluster indices sorted by density.
162	std::vector<int> sorted(clusters.size());
163	// For union-find.
164	std::vector<int> leaders(clusters.size());
165
166	std::iota(first: leaders.begin(), last: leaders.end(), value: `0`);
167	std::iota(first: sorted.begin(), last: sorted.end(), value: `0`);
168
169	llvm::stable_sort(Range&: sorted, C: [&](int a, int b) {
170	return clusters [a].getDensity() > clusters [b].getDensity();
171	});
172
173	for (int l : sorted) {
174	// The cluster index is the same as the index of its leader here because
175	// clusters[L] has not been merged into another cluster yet.
176	Cluster &c = clusters [l];
177
178	// Don't consider merging if the edge is unlikely.
179	if (c.bestPred.from == -`1` \|\| c.bestPred.weight * `10` <= c.initialWeight)
180	continue;
181
182	int predL = getLeader(leaders, v: c.bestPred.from);
183	// Already in the same cluster.
184	if (l == predL)
185	continue;
186
187	Cluster *predC = &clusters [predL];
188	if (c.size + predC->size > maxClusterSize)
189	continue;
190
191	if (isNewDensityBad(a&: *predC, b&: c))
192	continue;
193
194	leaders [l] = predL;
195	mergeClusters(cs&: clusters, into&: *predC, intoIdx: predL, from&: c, fromIdx: l);
196	}
197	// Sort remaining non-empty clusters by density.
198	sorted.clear();
199	for (int i = `0`, e = (int)clusters.size(); i != e; ++i)
200	if (clusters [i].size > `0`)
201	sorted.push_back(x: i);
202	llvm::stable_sort(Range&: sorted, C: [&](int a, int b) {
203	return clusters [a].getDensity() > clusters [b].getDensity();
204	});
205
206	DenseMap<const InputSection , int*> orderMap;
207
208	// Sections will be sorted by decreasing order. Absent sections will have
209	// priority 0 and be placed at the end of sections.
210	int curOrder = -clusters.size();
211	for (int leader : sorted) {
212	for (int i = leader;;) {
213	orderMap [sections [i]] = curOrder++;
214	i = clusters [i].next;
215	if (i == leader)
216	break;
217	}
218	}
219	if (!config ->printSymbolOrder.empty()) {
220	std::error_code ec;
221	raw_fd_ostream os(config ->printSymbolOrder, ec, sys::fs::OF_None);
222	if (ec) {
223	error(msg: "cannot open " + config ->printSymbolOrder + ": " + ec.message());
224	return orderMap;
225	}
226	// Print the symbols ordered by C3, in the order of decreasing curOrder
227	// Instead of sorting all the orderMap, just repeat the loops above.
228	for (int leader : sorted)
229	for (int i = leader;;) {
230	const InputSection *isec = sections [i];
231	// Search all the symbols in the file of the section
232	// and find out a Defined symbol with name that is within the
233	// section.
234	for (Symbol *sym : isec->getFile()->symbols) {
235	if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) {
236	if (d->isec() == isec)
237	os << sym->getName() << "\n";
238	}
239	}
240	i = clusters [i].next;
241	if (i == leader)
242	break;
243	}
244	}
245
246	return orderMap;
247	}
248
249	std::optional<int>
250	macho::PriorityBuilder::getSymbolOrCStringPriority(const StringRef key,
251	InputFile *f) {
252
253	auto it = priorities.find(Val: key);
254	if (it == priorities.end())
255	return std::nullopt;
256	const SymbolPriorityEntry &entry = it ->second;
257	if (!f)
258	return entry.anyObjectFile;
259	// We don't use toString(InputFile ) here because it returns the full path*
260	// for object files, and we only want the basename.
261	StringRef filename;
262	if (f->archiveName.empty())
263	filename = path::filename(path: f->getName());
264	else
265	filename = saver().save(S: path::filename(path: f->archiveName) + "(" +
266	path::filename(path: f->getName()) + ")");
267	return std::min(a: entry.objectFiles.lookup(Val: filename), b: entry.anyObjectFile);
268	}
269
270	std::optional<int>
271	macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
272	if (sym->isAbsolute())
273	return std::nullopt;
274	return getSymbolOrCStringPriority(key: utils::getRootSymbol(Name: sym->getName()),
275	f: sym->isec()->getFile());
276	}
277
278	void macho::PriorityBuilder::extractCallGraphProfile() {
279	TimeTraceScope timeScope("Extract call graph profile");
280	bool hasOrderFile = !priorities.empty();
281	for (const InputFile *file : inputFiles) {
282	auto *obj = dyn_cast_or_null<ObjFile>(Val: file);
283	if (!obj)
284	continue;
285	for (const CallGraphEntry &entry : obj->callGraph) {
286	assert(entry.fromIndex < obj->symbols.size() &&
287	entry.toIndex < obj->symbols.size());
288	auto *fromSym = dyn_cast_or_null<Defined>(Val: obj->symbols [entry.fromIndex]);
289	auto *toSym = dyn_cast_or_null<Defined>(Val: obj->symbols [entry.toIndex]);
290	if (fromSym && toSym &&
291	(!hasOrderFile \|\|
292	(!getSymbolPriority(sym: fromSym) && !getSymbolPriority(sym: toSym))))
293	callGraphProfile [{fromSym->isec(), toSym->isec()}] += entry.count;
294	}
295	}
296	}
297
298	void macho::PriorityBuilder::parseOrderFile(StringRef path) {
299	assert(callGraphProfile.empty() &&
300	"Order file must be parsed before call graph profile is processed");
301	std::optional<MemoryBufferRef> buffer = readFile(path);
302	if (!buffer) {
303	error(msg: "Could not read order file at " + path);
304	return;
305	}
306
307	int prio = std::numeric_limits<int>::min();
308	MemoryBufferRef mbref = *buffer;
309	for (StringRef line : args::getLines(mb: mbref)) {
310	StringRef objectFile, symbolOrCStrHash;
311	line = line.take_until(F: [](char c) { return c == `'#'`; }); // ignore comments
312	line = line.ltrim();
313
314	CPUType cpuType = StringSwitch<CPUType>(line)
315	.StartsWith(S: "i386:", Value: CPU_TYPE_I386)
316	.StartsWith(S: "x86_64:", Value: CPU_TYPE_X86_64)
317	.StartsWith(S: "arm:", Value: CPU_TYPE_ARM)
318	.StartsWith(S: "arm64:", Value: CPU_TYPE_ARM64)
319	.StartsWith(S: "ppc:", Value: CPU_TYPE_POWERPC)
320	.StartsWith(S: "ppc64:", Value: CPU_TYPE_POWERPC64)
321	.Default(Value: CPU_TYPE_ANY);
322
323	if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType)
324	continue;
325	// Drop the CPU type as well as the colon
326	if (cpuType != CPU_TYPE_ANY)
327	line = line.drop_until(F: [](char c) { return c == `':'`; }).drop_front();
328
329	constexpr std::array<StringRef, `2`> fileEnds = {".o:", ".o):"};
330	for (StringRef fileEnd : fileEnds) {
331	size_t pos = line.find(Str: fileEnd);
332	if (pos != StringRef::npos) {
333	// Split the string around the colon
334	objectFile = line.take_front(N: pos + fileEnd.size() - `1`);
335	line = line.drop_front(N: pos + fileEnd.size());
336	break;
337	}
338	}
339
340	// The rest of the line is either <symbol name> or
341	// CStringEntryPrefix<cstring hash>
342	line = line.trim();
343	if (line.starts_with(Prefix: CStringEntryPrefix)) {
344	StringRef possibleHash = line.drop_front(N: CStringEntryPrefix.size());
345	uint32_t hash = `0`;
346	if (to_integer(S: possibleHash, Num&: hash))
347	symbolOrCStrHash = possibleHash;
348	} else
349	symbolOrCStrHash = utils::getRootSymbol(Name: line);
350
351	if (!symbolOrCStrHash.empty()) {
352	SymbolPriorityEntry &entry = priorities [symbolOrCStrHash];
353	if (!objectFile.empty())
354	entry.objectFiles.insert(KV: std::make_pair(x&: objectFile, y&: prio));
355	else
356	entry.anyObjectFile = std::min(a: entry.anyObjectFile, b: prio);
357	}
358
359	++prio;
360	}
361	}
362
363	DenseMap<const InputSection , int*>
364	macho::PriorityBuilder::buildInputSectionPriorities() {
365	DenseMap<const InputSection , int*> sectionPriorities;
366	if (config ->bpStartupFunctionSort \|\| config ->bpFunctionOrderForCompression \|\|
367	config ->bpDataOrderForCompression) {
368	TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
369	sectionPriorities = runBalancedPartitioning(
370	profilePath: config ->bpStartupFunctionSort ? config ->irpgoProfilePath : "",
371	forFunctionCompression: config ->bpFunctionOrderForCompression,
372	forDataCompression: config ->bpDataOrderForCompression,
373	compressionSortStartupFunctions: config ->bpCompressionSortStartupFunctions,
374	verbose: config ->bpVerboseSectionOrderer);
375	} else if (config ->callGraphProfileSort) {
376	// Sort sections by the profile data provided by __LLVM,__cg_profile
377	// sections.
378	//
379	// This first builds a call graph based on the profile data then merges
380	// sections according to the C³ heuristic. All clusters are then sorted by a
381	// density metric to further improve locality.
382	TimeTraceScope timeScope("Call graph profile sort");
383	sectionPriorities = CallGraphSort (callGraphProfile).run();
384	}
385
386	if (priorities.empty())
387	return sectionPriorities;
388
389	auto addSym = [&](const Defined *sym) {
390	std::optional<int> symbolPriority = getSymbolPriority(sym);
391	if (!symbolPriority)
392	return;
393	int &priority = sectionPriorities [sym->isec()];
394	priority = std::min(a: priority, b: *symbolPriority);
395	};
396
397	// TODO: Make sure this handles weak symbols correctly.
398	for (const InputFile *file : inputFiles) {
399	if (isa<ObjFile>(Val: file))
400	for (Symbol *sym : file->symbols)
401	if (auto *d = dyn_cast_or_null<Defined>(Val: sym))
402	addSym (d);
403	}
404
405	return sectionPriorities;
406	}
407
408	std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities(
409	ArrayRef<CStringInputSection *> inputs) {
410	// Split the input strings into hold and cold sets.
411	// Order hot set based on -order_file_cstring for performance improvement;
412	// TODO: Order cold set of cstrings for compression via BP.
413	std::vector<std::pair<int, StringPiecePair>>
414	hotStringPrioritiesAndStringPieces;
415	std::vector<StringPiecePair> coldStringPieces;
416	std::vector<StringPiecePair> orderedStringPieces;
417
418	for (CStringInputSection *isec : inputs) {
419	for (const auto &[stringPieceIdx, piece] : llvm::enumerate(First&: isec->pieces)) {
420	if (!piece.live)
421	continue;
422
423	std::optional<int> priority = getSymbolOrCStringPriority(
424	key: std::to_string(val: piece.hash), f: isec->getFile());
425	if (!priority)
426	coldStringPieces.emplace_back(args&: isec, args&: stringPieceIdx);
427	else
428	hotStringPrioritiesAndStringPieces.emplace_back(
429	args&: *priority, args: std::make_pair(x&: isec, y&: stringPieceIdx));
430	}
431	}
432
433	// Order hot set for perf
434	llvm::stable_sort(Range&: hotStringPrioritiesAndStringPieces);
435	for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces)
436	orderedStringPieces.push_back(x: stringPiecePair);
437
438	// TODO: Order cold set for compression
439
440	orderedStringPieces.insert(position: orderedStringPieces.end(),
441	first: coldStringPieces.begin(), last: coldStringPieces.end());
442
443	return orderedStringPieces;
444	}
445

Provided by KDAB

Definitions

priorityBuilder
Edge
Cluster
Cluster
getDensity
CallGraphSort
MAX_DENSITY_DEGRADATION
CallGraphSort
isNewDensityBad
getLeader
mergeClusters
run
getSymbolOrCStringPriority
getSymbolPriority
extractCallGraphProfile
parseOrderFile
buildInputSectionPriorities

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of lld/MachO/SectionPriorities.cpp