CallGraphSort.cpp source code [lld/COFF/CallGraphSort.cpp]

1	//===- CallGraphSort.cpp --------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
10	/// about the algorithm.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "CallGraphSort.h"
15	#include "COFFLinkerContext.h"
16	#include "InputFiles.h"
17	#include "SymbolTable.h"
18	#include "Symbols.h"
19	#include "lld/Common/ErrorHandler.h"
20
21	#include <numeric>
22
23	using namespace llvm;
24	using namespace lld;
25	using namespace lld::coff;
26
27	namespace {
28	struct Edge {
29	int from;
30	uint64_t weight;
31	};
32
33	struct Cluster {
34	Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
35
36	double getDensity() const {
37	if (size == `0`)
38	return `0`;
39	return double(weight) / double(size);
40	}
41
42	int next;
43	int prev;
44	uint64_t size;
45	uint64_t weight = `0`;
46	uint64_t initialWeight = `0`;
47	Edge bestPred = {.from: -`1`, .weight: `0`};
48	};
49
50	class CallGraphSort {
51	public:
52	CallGraphSort(const COFFLinkerContext &ctx);
53
54	DenseMap<const SectionChunk , int*> run();
55
56	private:
57	std::vector<Cluster> clusters;
58	std::vector<const SectionChunk *> sections;
59
60	const COFFLinkerContext &ctx;
61	};
62
63	// Maximum amount the combined cluster density can be worse than the original
64	// cluster to consider merging.
65	constexpr int MAX_DENSITY_DEGRADATION = `8`;
66
67	// Maximum cluster size in bytes.
68	constexpr uint64_t MAX_CLUSTER_SIZE = `1024` * `1024`;
69	} // end anonymous namespace
70
71	using SectionPair = std::pair<const SectionChunk , const* SectionChunk *>;
72
73	// Take the edge list in Config->CallGraphProfile, resolve symbol names to
74	// Symbols, and generate a graph between InputSections with the provided
75	// weights.
76	CallGraphSort::CallGraphSort(const COFFLinkerContext &ctx) : ctx(ctx) {
77	const MapVector<SectionPair, uint64_t> &profile = ctx.config.callGraphProfile;
78	DenseMap<const SectionChunk , int*> secToCluster;
79
80	auto getOrCreateNode = [&](const SectionChunk isec) -> int* {
81	auto res = secToCluster.try_emplace(Key: isec, Args: clusters.size());
82	if (res.second) {
83	sections.push_back(x: isec);
84	clusters.emplace_back(args: clusters.size(), args: isec->getSize());
85	}
86	return res.first ->second;
87	};
88
89	// Create the graph.
90	for (const std::pair<SectionPair, uint64_t> &c : profile) {
91	const auto *fromSec = cast<SectionChunk>(Val: c.first.first->repl);
92	const auto *toSec = cast<SectionChunk>(Val: c.first.second->repl);
93	uint64_t weight = c.second;
94
95	// Ignore edges between input sections belonging to different output
96	// sections. This is done because otherwise we would end up with clusters
97	// containing input sections that can't actually be placed adjacently in the
98	// output. This messes with the cluster size and density calculations. We
99	// would also end up moving input sections in other output sections without
100	// moving them closer to what calls them.
101	if (ctx.getOutputSection(c: fromSec) != ctx.getOutputSection(c: toSec))
102	continue;
103
104	int from = getOrCreateNode (fromSec);
105	int to = getOrCreateNode (toSec);
106
107	clusters [to].weight += weight;
108
109	if (from == to)
110	continue;
111
112	// Remember the best edge.
113	Cluster &toC = clusters [to];
114	if (toC.bestPred.from == -`1` \|\| toC.bestPred.weight < weight) {
115	toC.bestPred.from = from;
116	toC.bestPred.weight = weight;
117	}
118	}
119	for (Cluster &c : clusters)
120	c.initialWeight = c.weight;
121	}
122
123	// It's bad to merge clusters which would degrade the density too much.
124	static bool isNewDensityBad(Cluster &a, Cluster &b) {
125	double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
126	return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
127	}
128
129	// Find the leader of V's belonged cluster (represented as an equivalence
130	// class). We apply union-find path-halving technique (simple to implement) in
131	// the meantime as it decreases depths and the time complexity.
132	static int getLeader(std::vector<int> &leaders, int v) {
133	while (leaders [v] != v) {
134	leaders [v] = leaders [leaders [v]];
135	v = leaders [v];
136	}
137	return v;
138	}
139
140	static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
141	Cluster &from, int fromIdx) {
142	int tail1 = into.prev, tail2 = from.prev;
143	into.prev = tail2;
144	cs [tail2].next = intoIdx;
145	from.prev = tail1;
146	cs [tail1].next = fromIdx;
147	into.size += from.size;
148	into.weight += from.weight;
149	from.size = `0`;
150	from.weight = `0`;
151	}
152
153	// Group InputSections into clusters using the Call-Chain Clustering heuristic
154	// then sort the clusters by density.
155	DenseMap<const SectionChunk , int*> CallGraphSort::run() {
156	std::vector<int> sorted(clusters.size());
157	std::vector<int> leaders(clusters.size());
158
159	std::iota(first: leaders.begin(), last: leaders.end(), value: `0`);
160	std::iota(first: sorted.begin(), last: sorted.end(), value: `0`);
161	llvm::stable_sort(Range&: sorted, C: [&](int a, int b) {
162	return clusters [a].getDensity() > clusters [b].getDensity();
163	});
164
165	for (int l : sorted) {
166	// The cluster index is the same as the index of its leader here because
167	// clusters[L] has not been merged into another cluster yet.
168	Cluster &c = clusters [l];
169
170	// Don't consider merging if the edge is unlikely.
171	if (c.bestPred.from == -`1` \|\| c.bestPred.weight * `10` <= c.initialWeight)
172	continue;
173
174	int predL = getLeader(leaders, v: c.bestPred.from);
175	if (l == predL)
176	continue;
177
178	Cluster *predC = &clusters [predL];
179	if (c.size + predC->size > MAX_CLUSTER_SIZE)
180	continue;
181
182	if (isNewDensityBad(a&: *predC, b&: c))
183	continue;
184
185	leaders [l] = predL;
186	mergeClusters(cs&: clusters, into&: *predC, intoIdx: predL, from&: c, fromIdx: l);
187	}
188
189	// Sort remaining non-empty clusters by density.
190	sorted.clear();
191	for (int i = `0`, e = (int)clusters.size(); i != e; ++i)
192	if (clusters [i].size > `0`)
193	sorted.push_back(x: i);
194	llvm::stable_sort(Range&: sorted, C: [&](int a, int b) {
195	return clusters [a].getDensity() > clusters [b].getDensity();
196	});
197
198	DenseMap<const SectionChunk , int*> orderMap;
199	// Sections will be sorted by increasing order. Absent sections will have
200	// priority 0 and be placed at the end of sections.
201	int curOrder = INT_MIN;
202	for (int leader : sorted) {
203	for (int i = leader;;) {
204	orderMap [sections [i]] = curOrder++;
205	i = clusters [i].next;
206	if (i == leader)
207	break;
208	}
209	}
210	if (!ctx.config.printSymbolOrder.empty()) {
211	std::error_code ec;
212	raw_fd_ostream os(ctx.config.printSymbolOrder, ec, sys::fs::OF_None);
213	if (ec) {
214	error(msg: "cannot open " + ctx.config.printSymbolOrder + ": " + ec.message());
215	return orderMap;
216	}
217	// Print the symbols ordered by C3, in the order of increasing curOrder
218	// Instead of sorting all the orderMap, just repeat the loops above.
219	for (int leader : sorted)
220	for (int i = leader;;) {
221	const SectionChunk *sc = sections [i];
222
223	// Search all the symbols in the file of the section
224	// and find out a DefinedCOFF symbol with name that is within the
225	// section.
226	for (Symbol *sym : sc->file->getSymbols())
227	if (auto *d = dyn_cast_or_null<DefinedCOFF>(Val: sym))
228	// Filter out non-COMDAT symbols and section symbols.
229	if (d->isCOMDAT && !d->getCOFFSymbol().isSection() &&
230	sc == d->getChunk())
231	os << sym->getName() << "\n";
232	i = clusters [i].next;
233	if (i == leader)
234	break;
235	}
236	}
237
238	return orderMap;
239	}
240
241	// Sort sections by the profile data provided by /call-graph-ordering-file
242	//
243	// This first builds a call graph based on the profile data then merges sections
244	// according to the C³ heuristic. All clusters are then sorted by a density
245	// metric to further improve locality.
246	DenseMap<const SectionChunk , int*>
247	coff::computeCallGraphProfileOrder(const COFFLinkerContext &ctx) {
248	return CallGraphSort (ctx).run();
249	}
250

source code of lld/COFF/CallGraphSort.cpp