1//===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements ReorderFunctions class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/ReorderFunctions.h"
14#include "bolt/Passes/HFSort.h"
15#include "bolt/Utils/Utils.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Transforms/Utils/CodeLayout.h"
19#include <fstream>
20
21#define DEBUG_TYPE "hfsort"
22
23using namespace llvm;
24
25namespace opts {
26
27extern cl::OptionCategory BoltOptCategory;
28extern cl::opt<unsigned> Verbosity;
29extern cl::opt<uint32_t> RandomSeed;
30
31extern size_t padFunction(const bolt::BinaryFunction &Function);
32
33extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
34cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
35 "reorder-functions",
36 cl::desc("reorder and cluster functions (works only with relocations)"),
37 cl::init(Val: bolt::ReorderFunctions::RT_NONE),
38 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
39 "do not reorder functions"),
40 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
41 "order by execution count"),
42 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
43 "use hfsort algorithm"),
44 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
45 "use cache-directed sort"),
46 clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort",
47 "use cache-directed sort"),
48 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
49 "pettis-hansen", "use Pettis-Hansen algorithm"),
50 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
51 "reorder functions randomly"),
52 clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
53 "use function order specified by -function-order")),
54 cl::ZeroOrMore, cl::cat(BoltOptCategory),
55 cl::callback(CB: [](const bolt::ReorderFunctions::ReorderType &option) {
56 if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) {
57 errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated,"
58 << " please use '-reorder-functions=cdsort' instead\n";
59 ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT;
60 }
61 }));
62
63static cl::opt<bool> ReorderFunctionsUseHotSize(
64 "reorder-functions-use-hot-size",
65 cl::desc("use a function's hot size when doing clustering"), cl::init(Val: true),
66 cl::cat(BoltOptCategory));
67
68static cl::opt<std::string> FunctionOrderFile(
69 "function-order",
70 cl::desc("file containing an ordered list of functions to use for function "
71 "reordering"),
72 cl::cat(BoltOptCategory));
73
74static cl::opt<std::string> GenerateFunctionOrderFile(
75 "generate-function-order",
76 cl::desc("file to dump the ordered list of functions to use for function "
77 "reordering"),
78 cl::cat(BoltOptCategory));
79
80static cl::opt<std::string> LinkSectionsFile(
81 "generate-link-sections",
82 cl::desc("generate a list of function sections in a format suitable for "
83 "inclusion in a linker script"),
84 cl::cat(BoltOptCategory));
85
86static cl::opt<bool>
87 UseEdgeCounts("use-edge-counts",
88 cl::desc("use edge count data when doing clustering"),
89 cl::init(Val: true), cl::cat(BoltOptCategory));
90
91static cl::opt<bool> CgFromPerfData(
92 "cg-from-perf-data",
93 cl::desc("use perf data directly when constructing the call graph"
94 " for stale functions"),
95 cl::init(Val: true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
96
97static cl::opt<bool> CgIgnoreRecursiveCalls(
98 "cg-ignore-recursive-calls",
99 cl::desc("ignore recursive calls when constructing the call graph"),
100 cl::init(Val: true), cl::cat(BoltOptCategory));
101
102static cl::opt<bool> CgUseSplitHotSize(
103 "cg-use-split-hot-size",
104 cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
105 "call graph functions"),
106 cl::init(Val: false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
107
108} // namespace opts
109
110namespace llvm {
111namespace bolt {
112
113using NodeId = CallGraph::NodeId;
114using Arc = CallGraph::Arc;
115using Node = CallGraph::Node;
116
117void ReorderFunctions::reorder(BinaryContext &BC,
118 std::vector<Cluster> &&Clusters,
119 std::map<uint64_t, BinaryFunction> &BFs) {
120 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
121 uint64_t TotalSize = 0;
122 uint32_t Index = 0;
123
124 // Set order of hot functions based on clusters.
125 for (const Cluster &Cluster : Clusters) {
126 for (const NodeId FuncId : Cluster.targets()) {
127 Cg.nodeIdToFunc(Id: FuncId)->setIndex(Index++);
128 FuncAddr[FuncId] = TotalSize;
129 TotalSize += Cg.size(Id: FuncId);
130 }
131 }
132
133 // Assign valid index for functions with valid profile.
134 for (auto &It : BFs) {
135 BinaryFunction &BF = It.second;
136 if (!BF.hasValidIndex() && BF.hasValidProfile())
137 BF.setIndex(Index++);
138 }
139
140 if (opts::ReorderFunctions == RT_NONE)
141 return;
142
143 printStats(BC, Clusters, FuncAddr);
144}
145
146void ReorderFunctions::printStats(BinaryContext &BC,
147 const std::vector<Cluster> &Clusters,
148 const std::vector<uint64_t> &FuncAddr) {
149 if (opts::Verbosity == 0) {
150#ifndef NDEBUG
151 if (!DebugFlag || !isCurrentDebugType(Type: "hfsort"))
152 return;
153#else
154 return;
155#endif
156 }
157
158 bool PrintDetailed = opts::Verbosity > 1;
159#ifndef NDEBUG
160 PrintDetailed |=
161 (DebugFlag && isCurrentDebugType(Type: "hfsort") && opts::Verbosity > 0);
162#endif
163 uint64_t TotalSize = 0;
164 uint64_t CurPage = 0;
165 uint64_t Hotfuncs = 0;
166 double TotalDistance = 0;
167 double TotalCalls = 0;
168 double TotalCalls64B = 0;
169 double TotalCalls4KB = 0;
170 double TotalCalls2MB = 0;
171 if (PrintDetailed)
172 BC.outs() << "BOLT-INFO: Function reordering page layout\n"
173 << "BOLT-INFO: ============== page 0 ==============\n";
174 for (const Cluster &Cluster : Clusters) {
175 if (PrintDetailed)
176 BC.outs() << format(
177 Fmt: "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
178 Vals: Cluster.density(), Vals: Cluster.samples(), Vals: Cluster.size());
179
180 for (NodeId FuncId : Cluster.targets()) {
181 if (Cg.samples(Id: FuncId) > 0) {
182 Hotfuncs++;
183
184 if (PrintDetailed)
185 BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(Id: FuncId)
186 << " (" << Cg.size(Id: FuncId) << ")\n";
187
188 uint64_t Dist = 0;
189 uint64_t Calls = 0;
190 for (NodeId Dst : Cg.successors(Id: FuncId)) {
191 if (FuncId == Dst) // ignore recursive calls in stats
192 continue;
193 const Arc &Arc = *Cg.findArc(Src: FuncId, Dst);
194 const auto D = std::abs(x: FuncAddr[Arc.dst()] -
195 (FuncAddr[FuncId] + Arc.avgCallOffset()));
196 const double W = Arc.weight();
197 if (D < 64 && PrintDetailed && opts::Verbosity > 2)
198 BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n"
199 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(Id: FuncId)
200 << "\n"
201 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Id: Dst) << "\n"
202 << "BOLT-INFO: Weight = " << W << "\n"
203 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset()
204 << "\n";
205 Calls += W;
206 if (D < 64)
207 TotalCalls64B += W;
208 if (D < 4096)
209 TotalCalls4KB += W;
210 if (D < (2 << 20))
211 TotalCalls2MB += W;
212 Dist += Arc.weight() * D;
213 if (PrintDetailed)
214 BC.outs() << format(Fmt: "BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
215 "weight = %.0lf, callDist = %f\n",
216 Vals: Arc.src(), Vals: FuncAddr[Arc.src()],
217 Vals: Arc.avgCallOffset(), Vals: Arc.dst(),
218 Vals: FuncAddr[Arc.dst()], Vals: Arc.weight(), Vals: D);
219 }
220 TotalCalls += Calls;
221 TotalDistance += Dist;
222 TotalSize += Cg.size(Id: FuncId);
223
224 if (PrintDetailed) {
225 BC.outs() << format(Fmt: "BOLT-INFO: start = %6u : avgCallDist = %lu : ",
226 Vals: TotalSize, Vals: Calls ? Dist / Calls : 0)
227 << Cg.nodeIdToFunc(Id: FuncId)->getPrintName() << '\n';
228 const uint64_t NewPage = TotalSize / HugePageSize;
229 if (NewPage != CurPage) {
230 CurPage = NewPage;
231 BC.outs() << format(
232 Fmt: "BOLT-INFO: ============== page %u ==============\n", Vals: CurPage);
233 }
234 }
235 }
236 }
237 }
238 BC.outs() << "BOLT-INFO: Function reordering stats\n"
239 << format(Fmt: "BOLT-INFO: Number of hot functions: %u\n"
240 "BOLT-INFO: Number of clusters: %lu\n",
241 Vals: Hotfuncs, Vals: Clusters.size())
242 << format(Fmt: "BOLT-INFO: Final average call distance = %.1lf "
243 "(%.0lf / %.0lf)\n",
244 Vals: TotalCalls ? TotalDistance / TotalCalls : 0,
245 Vals: TotalDistance, Vals: TotalCalls)
246 << format(Fmt: "BOLT-INFO: Total Calls = %.0lf\n", Vals: TotalCalls);
247 if (TotalCalls)
248 BC.outs()
249 << format(Fmt: "BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n",
250 Vals: TotalCalls64B, Vals: 100 * TotalCalls64B / TotalCalls)
251 << format(Fmt: "BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n",
252 Vals: TotalCalls4KB, Vals: 100 * TotalCalls4KB / TotalCalls)
253 << format(Fmt: "BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n",
254 Vals: TotalCalls2MB, Vals: 100 * TotalCalls2MB / TotalCalls);
255}
256
257Error ReorderFunctions::readFunctionOrderFile(
258 std::vector<std::string> &FunctionNames) {
259 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
260 if (!FuncsFile)
261 return createFatalBOLTError(S: Twine("Ordered functions file \"") +
262 Twine(opts::FunctionOrderFile) +
263 Twine("\" can't be opened."));
264
265 std::string FuncName;
266 while (std::getline(is&: FuncsFile, str&: FuncName))
267 FunctionNames.push_back(x: FuncName);
268 return Error::success();
269}
270
271Error ReorderFunctions::runOnFunctions(BinaryContext &BC) {
272 auto &BFs = BC.getBinaryFunctions();
273 if (opts::ReorderFunctions != RT_NONE &&
274 opts::ReorderFunctions != RT_EXEC_COUNT &&
275 opts::ReorderFunctions != RT_USER) {
276 Cg = buildCallGraph(
277 BC,
278 Filter: [](const BinaryFunction &BF) {
279 if (!BF.hasProfile())
280 return true;
281 if (BF.getState() != BinaryFunction::State::CFG)
282 return true;
283 return false;
284 },
285 CgFromPerfData: opts::CgFromPerfData,
286 /*IncludeSplitCalls=*/false, UseFunctionHotSize: opts::ReorderFunctionsUseHotSize,
287 UseSplitHotSize: opts::CgUseSplitHotSize, UseEdgeCounts: opts::UseEdgeCounts,
288 IgnoreRecursiveCalls: opts::CgIgnoreRecursiveCalls);
289 Cg.normalizeArcWeights();
290 }
291
292 std::vector<Cluster> Clusters;
293
294 switch (opts::ReorderFunctions) {
295 case RT_NONE:
296 break;
297 case RT_EXEC_COUNT: {
298 std::vector<BinaryFunction *> SortedFunctions(BFs.size());
299 llvm::transform(Range: llvm::make_second_range(c&: BFs), d_first: SortedFunctions.begin(),
300 F: [](BinaryFunction &BF) { return &BF; });
301 llvm::stable_sort(Range&: SortedFunctions,
302 C: [&](const BinaryFunction *A, const BinaryFunction *B) {
303 if (A->isIgnored())
304 return false;
305 if (B->isIgnored())
306 return true;
307 const size_t PadA = opts::padFunction(Function: *A);
308 const size_t PadB = opts::padFunction(Function: *B);
309 if (!PadA || !PadB) {
310 if (PadA)
311 return true;
312 if (PadB)
313 return false;
314 }
315 if (!A->hasProfile())
316 return false;
317 if (!B->hasProfile())
318 return true;
319 return A->getExecutionCount() > B->getExecutionCount();
320 });
321 uint32_t Index = 0;
322 for (BinaryFunction *BF : SortedFunctions)
323 if (BF->hasProfile()) {
324 BF->setIndex(Index++);
325 LLVM_DEBUG(if (opts::Verbosity > 1) {
326 dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " ("
327 << BF->getExecutionCount() << ")\n";
328 });
329 }
330 } break;
331 case RT_HFSORT:
332 Clusters = clusterize(Cg);
333 break;
334 case RT_CDSORT: {
335 // It is required that the sum of incoming arc weights is not greater
336 // than the number of samples for every function. Ensuring the call graph
337 // obeys the property before running the algorithm.
338 Cg.adjustArcWeights();
339
340 // Initialize CFG nodes and their data
341 std::vector<uint64_t> FuncSizes;
342 std::vector<uint64_t> FuncCounts;
343 std::vector<codelayout::EdgeCount> CallCounts;
344 std::vector<uint64_t> CallOffsets;
345 for (NodeId F = 0; F < Cg.numNodes(); ++F) {
346 FuncSizes.push_back(x: Cg.size(Id: F));
347 FuncCounts.push_back(x: Cg.samples(Id: F));
348 for (NodeId Succ : Cg.successors(Id: F)) {
349 const Arc &Arc = *Cg.findArc(Src: F, Dst: Succ);
350 CallCounts.push_back(x: {.src: F, .dst: Succ, .count: uint64_t(Arc.weight())});
351 CallOffsets.push_back(x: uint64_t(Arc.avgCallOffset()));
352 }
353 }
354
355 // Run the layout algorithm.
356 std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout(
357 FuncSizes, FuncCounts, CallCounts, CallOffsets);
358
359 // Create a single cluster from the computed order of hot functions.
360 std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());
361 Clusters.emplace_back(args: Cluster(NodeOrder, Cg));
362 } break;
363 case RT_PETTIS_HANSEN:
364 Clusters = pettisAndHansen(Cg);
365 break;
366 case RT_RANDOM:
367 std::srand(seed: opts::RandomSeed);
368 Clusters = randomClusters(Cg);
369 break;
370 case RT_USER: {
371 // Build LTOCommonNameMap
372 StringMap<std::vector<uint64_t>> LTOCommonNameMap;
373 for (const BinaryFunction &BF : llvm::make_second_range(c&: BFs))
374 for (StringRef Name : BF.getNames())
375 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
376 LTOCommonNameMap[*LTOCommonName].push_back(x: BF.getAddress());
377
378 uint32_t Index = 0;
379 uint32_t InvalidEntries = 0;
380 std::vector<std::string> FunctionNames;
381 if (Error E = readFunctionOrderFile(FunctionNames))
382 return Error(std::move(E));
383
384 for (const std::string &Function : FunctionNames) {
385 std::vector<uint64_t> FuncAddrs;
386
387 BinaryData *BD = BC.getBinaryDataByName(Name: Function);
388 if (!BD) {
389 // If we can't find the main symbol name, look for alternates.
390 uint32_t LocalID = 1;
391 while (true) {
392 const std::string FuncName = Function + "/" + std::to_string(val: LocalID);
393 BD = BC.getBinaryDataByName(Name: FuncName);
394 if (BD)
395 FuncAddrs.push_back(x: BD->getAddress());
396 else
397 break;
398 LocalID++;
399 }
400 // Strip LTO suffixes
401 if (std::optional<StringRef> CommonName = getLTOCommonName(Name: Function))
402 if (LTOCommonNameMap.contains(Key: *CommonName))
403 llvm::append_range(C&: FuncAddrs, R&: LTOCommonNameMap[*CommonName]);
404 } else {
405 FuncAddrs.push_back(x: BD->getAddress());
406 }
407
408 if (FuncAddrs.empty()) {
409 if (opts::Verbosity >= 1)
410 BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
411 << "for " << Function << "\n";
412 ++InvalidEntries;
413 continue;
414 }
415
416 for (const uint64_t FuncAddr : FuncAddrs) {
417 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(Address: FuncAddr);
418 assert(FuncBD);
419
420 BinaryFunction *BF = BC.getFunctionForSymbol(Symbol: FuncBD->getSymbol());
421 if (!BF) {
422 if (opts::Verbosity >= 1)
423 BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
424 << "for " << Function << "\n";
425 ++InvalidEntries;
426 break;
427 }
428 if (!BF->hasValidIndex())
429 BF->setIndex(Index++);
430 else if (opts::Verbosity > 0)
431 BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
432 << "\n";
433 }
434 }
435 if (InvalidEntries)
436 BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
437 << InvalidEntries << " entries in -function-order list\n";
438 } break;
439
440 default:
441 llvm_unreachable("unexpected layout type");
442 }
443
444 reorder(BC, Clusters: std::move(Clusters), BFs);
445
446 BC.HasFinalizedFunctionOrder = true;
447
448 std::unique_ptr<std::ofstream> FuncsFile;
449 if (!opts::GenerateFunctionOrderFile.empty()) {
450 FuncsFile = std::make_unique<std::ofstream>(args&: opts::GenerateFunctionOrderFile,
451 args: std::ios::out);
452 if (!FuncsFile) {
453 BC.errs() << "BOLT-ERROR: ordered functions file "
454 << opts::GenerateFunctionOrderFile << " cannot be opened\n";
455 return createFatalBOLTError(S: "");
456 }
457 }
458
459 std::unique_ptr<std::ofstream> LinkSectionsFile;
460 if (!opts::LinkSectionsFile.empty()) {
461 LinkSectionsFile =
462 std::make_unique<std::ofstream>(args&: opts::LinkSectionsFile, args: std::ios::out);
463 if (!LinkSectionsFile) {
464 BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
465 << " cannot be opened\n";
466 return createFatalBOLTError(S: "");
467 }
468 }
469
470 if (FuncsFile || LinkSectionsFile) {
471 std::vector<BinaryFunction *> SortedFunctions(BFs.size());
472 llvm::transform(Range: llvm::make_second_range(c&: BFs), d_first: SortedFunctions.begin(),
473 F: [](BinaryFunction &BF) { return &BF; });
474
475 // Sort functions by index.
476 llvm::stable_sort(Range&: SortedFunctions,
477 C: [](const BinaryFunction *A, const BinaryFunction *B) {
478 if (A->hasValidIndex() && B->hasValidIndex())
479 return A->getIndex() < B->getIndex();
480 if (A->hasValidIndex() && !B->hasValidIndex())
481 return true;
482 if (!A->hasValidIndex() && B->hasValidIndex())
483 return false;
484 return A->getAddress() < B->getAddress();
485 });
486
487 for (const BinaryFunction *Func : SortedFunctions) {
488 if (!Func->hasValidIndex())
489 break;
490 if (Func->isPLTFunction())
491 continue;
492
493 if (FuncsFile)
494 *FuncsFile << Func->getOneName().str() << '\n';
495
496 if (LinkSectionsFile) {
497 const char *Indent = "";
498 std::vector<StringRef> AllNames = Func->getNames();
499 llvm::sort(C&: AllNames);
500 for (StringRef Name : AllNames) {
501 const size_t SlashPos = Name.find(C: '/');
502 if (SlashPos != std::string::npos) {
503 // Avoid duplicates for local functions.
504 if (Name.find(C: '/', From: SlashPos + 1) != std::string::npos)
505 continue;
506 Name = Name.substr(Start: 0, N: SlashPos);
507 }
508 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
509 Indent = " ";
510 }
511 }
512 }
513
514 if (FuncsFile) {
515 FuncsFile->close();
516 BC.outs() << "BOLT-INFO: dumped function order to "
517 << opts::GenerateFunctionOrderFile << '\n';
518 }
519
520 if (LinkSectionsFile) {
521 LinkSectionsFile->close();
522 BC.outs() << "BOLT-INFO: dumped linker section order to "
523 << opts::LinkSectionsFile << '\n';
524 }
525 }
526 return Error::success();
527}
528
529} // namespace bolt
530} // namespace llvm
531

source code of bolt/lib/Passes/ReorderFunctions.cpp