1//===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements ReorderFunctions class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/ReorderFunctions.h"
14#include "bolt/Passes/HFSort.h"
15#include "bolt/Utils/Utils.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Transforms/Utils/CodeLayout.h"
19#include <fstream>
20
21#define DEBUG_TYPE "hfsort"
22
23using namespace llvm;
24
25namespace opts {
26
27extern cl::OptionCategory BoltOptCategory;
28extern cl::opt<unsigned> Verbosity;
29extern cl::opt<uint32_t> RandomSeed;
30
31extern size_t padFunctionBefore(const bolt::BinaryFunction &Function);
32extern size_t padFunctionAfter(const bolt::BinaryFunction &Function);
33
34extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
35cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
36 "reorder-functions",
37 cl::desc("reorder and cluster functions (works only with relocations)"),
38 cl::init(Val: bolt::ReorderFunctions::RT_NONE),
39 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
40 "do not reorder functions"),
41 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
42 "order by execution count"),
43 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
44 "use hfsort algorithm"),
45 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
46 "use cache-directed sort"),
47 clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort",
48 "use cache-directed sort"),
49 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
50 "pettis-hansen", "use Pettis-Hansen algorithm"),
51 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
52 "reorder functions randomly"),
53 clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
54 "use function order specified by -function-order")),
55 cl::ZeroOrMore, cl::cat(BoltOptCategory),
56 cl::callback(CB: [](const bolt::ReorderFunctions::ReorderType &option) {
57 if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) {
58 errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated,"
59 << " please use '-reorder-functions=cdsort' instead\n";
60 ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT;
61 }
62 }));
63
64static cl::opt<bool> ReorderFunctionsUseHotSize(
65 "reorder-functions-use-hot-size",
66 cl::desc("use a function's hot size when doing clustering"), cl::init(Val: true),
67 cl::cat(BoltOptCategory));
68
69static cl::opt<std::string> FunctionOrderFile(
70 "function-order",
71 cl::desc("file containing an ordered list of functions to use for function "
72 "reordering"),
73 cl::cat(BoltOptCategory));
74
75static cl::opt<std::string> GenerateFunctionOrderFile(
76 "generate-function-order",
77 cl::desc("file to dump the ordered list of functions to use for function "
78 "reordering"),
79 cl::cat(BoltOptCategory));
80
81static cl::opt<std::string> LinkSectionsFile(
82 "generate-link-sections",
83 cl::desc("generate a list of function sections in a format suitable for "
84 "inclusion in a linker script"),
85 cl::cat(BoltOptCategory));
86
87static cl::opt<bool>
88 UseEdgeCounts("use-edge-counts",
89 cl::desc("use edge count data when doing clustering"),
90 cl::init(Val: true), cl::cat(BoltOptCategory));
91
92static cl::opt<bool> CgFromPerfData(
93 "cg-from-perf-data",
94 cl::desc("use perf data directly when constructing the call graph"
95 " for stale functions"),
96 cl::init(Val: true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
97
98static cl::opt<bool> CgIgnoreRecursiveCalls(
99 "cg-ignore-recursive-calls",
100 cl::desc("ignore recursive calls when constructing the call graph"),
101 cl::init(Val: true), cl::cat(BoltOptCategory));
102
103static cl::opt<bool> CgUseSplitHotSize(
104 "cg-use-split-hot-size",
105 cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
106 "call graph functions"),
107 cl::init(Val: false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
108
109} // namespace opts
110
111namespace llvm {
112namespace bolt {
113
114using NodeId = CallGraph::NodeId;
115using Arc = CallGraph::Arc;
116using Node = CallGraph::Node;
117
118void ReorderFunctions::reorder(BinaryContext &BC,
119 std::vector<Cluster> &&Clusters,
120 std::map<uint64_t, BinaryFunction> &BFs) {
121 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
122 uint64_t TotalSize = 0;
123 uint32_t Index = 0;
124
125 // Set order of hot functions based on clusters.
126 for (const Cluster &Cluster : Clusters) {
127 for (const NodeId FuncId : Cluster.targets()) {
128 Cg.nodeIdToFunc(Id: FuncId)->setIndex(Index++);
129 FuncAddr[FuncId] = TotalSize;
130 TotalSize += Cg.size(Id: FuncId);
131 }
132 }
133
134 // Assign valid index for functions with valid profile.
135 for (auto &It : BFs) {
136 BinaryFunction &BF = It.second;
137 if (!BF.hasValidIndex() && BF.hasValidProfile())
138 BF.setIndex(Index++);
139 }
140
141 if (opts::ReorderFunctions == RT_NONE)
142 return;
143
144 printStats(BC, Clusters, FuncAddr);
145}
146
147void ReorderFunctions::printStats(BinaryContext &BC,
148 const std::vector<Cluster> &Clusters,
149 const std::vector<uint64_t> &FuncAddr) {
150 if (opts::Verbosity == 0) {
151#ifndef NDEBUG
152 if (!DebugFlag || !isCurrentDebugType(Type: "hfsort"))
153 return;
154#else
155 return;
156#endif
157 }
158
159 bool PrintDetailed = opts::Verbosity > 1;
160#ifndef NDEBUG
161 PrintDetailed |=
162 (DebugFlag && isCurrentDebugType(Type: "hfsort") && opts::Verbosity > 0);
163#endif
164 uint64_t TotalSize = 0;
165 uint64_t CurPage = 0;
166 uint64_t Hotfuncs = 0;
167 double TotalDistance = 0;
168 double TotalCalls = 0;
169 double TotalCalls64B = 0;
170 double TotalCalls4KB = 0;
171 double TotalCalls2MB = 0;
172 if (PrintDetailed)
173 BC.outs() << "BOLT-INFO: Function reordering page layout\n"
174 << "BOLT-INFO: ============== page 0 ==============\n";
175 for (const Cluster &Cluster : Clusters) {
176 if (PrintDetailed)
177 BC.outs() << format(
178 Fmt: "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
179 Vals: Cluster.density(), Vals: Cluster.samples(), Vals: Cluster.size());
180
181 for (NodeId FuncId : Cluster.targets()) {
182 if (Cg.samples(Id: FuncId) > 0) {
183 Hotfuncs++;
184
185 if (PrintDetailed)
186 BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(Id: FuncId)
187 << " (" << Cg.size(Id: FuncId) << ")\n";
188
189 uint64_t Dist = 0;
190 uint64_t Calls = 0;
191 for (NodeId Dst : Cg.successors(Id: FuncId)) {
192 if (FuncId == Dst) // ignore recursive calls in stats
193 continue;
194 const Arc &Arc = *Cg.findArc(Src: FuncId, Dst);
195 const auto D = std::abs(x: FuncAddr[Arc.dst()] -
196 (FuncAddr[FuncId] + Arc.avgCallOffset()));
197 const double W = Arc.weight();
198 if (D < 64 && PrintDetailed && opts::Verbosity > 2)
199 BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n"
200 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(Id: FuncId)
201 << "\n"
202 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Id: Dst) << "\n"
203 << "BOLT-INFO: Weight = " << W << "\n"
204 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset()
205 << "\n";
206 Calls += W;
207 if (D < 64)
208 TotalCalls64B += W;
209 if (D < 4096)
210 TotalCalls4KB += W;
211 if (D < (2 << 20))
212 TotalCalls2MB += W;
213 Dist += Arc.weight() * D;
214 if (PrintDetailed)
215 BC.outs() << format(Fmt: "BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
216 "weight = %.0lf, callDist = %f\n",
217 Vals: Arc.src(), Vals: FuncAddr[Arc.src()],
218 Vals: Arc.avgCallOffset(), Vals: Arc.dst(),
219 Vals: FuncAddr[Arc.dst()], Vals: Arc.weight(), Vals: D);
220 }
221 TotalCalls += Calls;
222 TotalDistance += Dist;
223 TotalSize += Cg.size(Id: FuncId);
224
225 if (PrintDetailed) {
226 BC.outs() << format(Fmt: "BOLT-INFO: start = %6u : avgCallDist = %lu : ",
227 Vals: TotalSize, Vals: Calls ? Dist / Calls : 0)
228 << Cg.nodeIdToFunc(Id: FuncId)->getPrintName() << '\n';
229 const uint64_t NewPage = TotalSize / HugePageSize;
230 if (NewPage != CurPage) {
231 CurPage = NewPage;
232 BC.outs() << format(
233 Fmt: "BOLT-INFO: ============== page %u ==============\n", Vals: CurPage);
234 }
235 }
236 }
237 }
238 }
239 BC.outs() << "BOLT-INFO: Function reordering stats\n"
240 << format(Fmt: "BOLT-INFO: Number of hot functions: %u\n"
241 "BOLT-INFO: Number of clusters: %lu\n",
242 Vals: Hotfuncs, Vals: Clusters.size())
243 << format(Fmt: "BOLT-INFO: Final average call distance = %.1lf "
244 "(%.0lf / %.0lf)\n",
245 Vals: TotalCalls ? TotalDistance / TotalCalls : 0,
246 Vals: TotalDistance, Vals: TotalCalls)
247 << format(Fmt: "BOLT-INFO: Total Calls = %.0lf\n", Vals: TotalCalls);
248 if (TotalCalls)
249 BC.outs()
250 << format(Fmt: "BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n",
251 Vals: TotalCalls64B, Vals: 100 * TotalCalls64B / TotalCalls)
252 << format(Fmt: "BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n",
253 Vals: TotalCalls4KB, Vals: 100 * TotalCalls4KB / TotalCalls)
254 << format(Fmt: "BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n",
255 Vals: TotalCalls2MB, Vals: 100 * TotalCalls2MB / TotalCalls);
256}
257
258Error ReorderFunctions::readFunctionOrderFile(
259 std::vector<std::string> &FunctionNames) {
260 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
261 if (!FuncsFile)
262 return createFatalBOLTError(S: Twine("Ordered functions file \"") +
263 Twine(opts::FunctionOrderFile) +
264 Twine("\" can't be opened."));
265
266 std::string FuncName;
267 while (std::getline(is&: FuncsFile, str&: FuncName))
268 FunctionNames.push_back(x: FuncName);
269 return Error::success();
270}
271
272Error ReorderFunctions::runOnFunctions(BinaryContext &BC) {
273 auto &BFs = BC.getBinaryFunctions();
274 if (opts::ReorderFunctions != RT_NONE &&
275 opts::ReorderFunctions != RT_EXEC_COUNT &&
276 opts::ReorderFunctions != RT_USER) {
277 Cg = buildCallGraph(
278 BC,
279 Filter: [](const BinaryFunction &BF) {
280 if (!BF.hasProfile())
281 return true;
282 if (BF.getState() != BinaryFunction::State::CFG)
283 return true;
284 return false;
285 },
286 CgFromPerfData: opts::CgFromPerfData,
287 /*IncludeSplitCalls=*/false, UseFunctionHotSize: opts::ReorderFunctionsUseHotSize,
288 UseSplitHotSize: opts::CgUseSplitHotSize, UseEdgeCounts: opts::UseEdgeCounts,
289 IgnoreRecursiveCalls: opts::CgIgnoreRecursiveCalls);
290 Cg.normalizeArcWeights();
291 }
292
293 std::vector<Cluster> Clusters;
294
295 switch (opts::ReorderFunctions) {
296 case RT_NONE:
297 break;
298 case RT_EXEC_COUNT: {
299 std::vector<BinaryFunction *> SortedFunctions(BFs.size());
300 llvm::transform(Range: llvm::make_second_range(c&: BFs), d_first: SortedFunctions.begin(),
301 F: [](BinaryFunction &BF) { return &BF; });
302 llvm::stable_sort(Range&: SortedFunctions,
303 C: [&](const BinaryFunction *A, const BinaryFunction *B) {
304 if (A->isIgnored())
305 return false;
306 if (B->isIgnored())
307 return true;
308 const size_t PadA = opts::padFunctionBefore(Function: *A) +
309 opts::padFunctionAfter(Function: *A);
310 const size_t PadB = opts::padFunctionBefore(Function: *B) +
311 opts::padFunctionAfter(Function: *B);
312 if (!PadA || !PadB) {
313 if (PadA)
314 return true;
315 if (PadB)
316 return false;
317 }
318 if (!A->hasProfile())
319 return false;
320 if (!B->hasProfile())
321 return true;
322 return A->getExecutionCount() > B->getExecutionCount();
323 });
324 uint32_t Index = 0;
325 for (BinaryFunction *BF : SortedFunctions)
326 if (BF->hasProfile()) {
327 BF->setIndex(Index++);
328 LLVM_DEBUG(if (opts::Verbosity > 1) {
329 dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " ("
330 << BF->getExecutionCount() << ")\n";
331 });
332 }
333 } break;
334 case RT_HFSORT:
335 Clusters = clusterize(Cg);
336 break;
337 case RT_CDSORT: {
338 // It is required that the sum of incoming arc weights is not greater
339 // than the number of samples for every function. Ensuring the call graph
340 // obeys the property before running the algorithm.
341 Cg.adjustArcWeights();
342
343 // Initialize CFG nodes and their data
344 std::vector<uint64_t> FuncSizes;
345 std::vector<uint64_t> FuncCounts;
346 std::vector<codelayout::EdgeCount> CallCounts;
347 std::vector<uint64_t> CallOffsets;
348 for (NodeId F = 0; F < Cg.numNodes(); ++F) {
349 FuncSizes.push_back(x: Cg.size(Id: F));
350 FuncCounts.push_back(x: Cg.samples(Id: F));
351 for (NodeId Succ : Cg.successors(Id: F)) {
352 const Arc &Arc = *Cg.findArc(Src: F, Dst: Succ);
353 CallCounts.push_back(x: {.src: F, .dst: Succ, .count: uint64_t(Arc.weight())});
354 CallOffsets.push_back(x: uint64_t(Arc.avgCallOffset()));
355 }
356 }
357
358 // Run the layout algorithm.
359 std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout(
360 FuncSizes, FuncCounts, CallCounts, CallOffsets);
361
362 // Create a single cluster from the computed order of hot functions.
363 std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());
364 Clusters.emplace_back(args: Cluster(NodeOrder, Cg));
365 } break;
366 case RT_PETTIS_HANSEN:
367 Clusters = pettisAndHansen(Cg);
368 break;
369 case RT_RANDOM:
370 std::srand(seed: opts::RandomSeed);
371 Clusters = randomClusters(Cg);
372 break;
373 case RT_USER: {
374 // Build LTOCommonNameMap
375 StringMap<std::vector<uint64_t>> LTOCommonNameMap;
376 for (const BinaryFunction &BF : llvm::make_second_range(c&: BFs))
377 for (StringRef Name : BF.getNames())
378 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
379 LTOCommonNameMap[*LTOCommonName].push_back(x: BF.getAddress());
380
381 uint32_t Index = 0;
382 uint32_t InvalidEntries = 0;
383 std::vector<std::string> FunctionNames;
384 if (Error E = readFunctionOrderFile(FunctionNames))
385 return Error(std::move(E));
386
387 for (const std::string &Function : FunctionNames) {
388 std::vector<uint64_t> FuncAddrs;
389
390 BinaryData *BD = BC.getBinaryDataByName(Name: Function);
391 if (!BD) {
392 // If we can't find the main symbol name, look for alternates.
393 uint32_t LocalID = 1;
394 while (true) {
395 const std::string FuncName = Function + "/" + std::to_string(val: LocalID);
396 BD = BC.getBinaryDataByName(Name: FuncName);
397 if (BD)
398 FuncAddrs.push_back(x: BD->getAddress());
399 else
400 break;
401 LocalID++;
402 }
403 // Strip LTO suffixes
404 if (std::optional<StringRef> CommonName = getLTOCommonName(Name: Function))
405 if (LTOCommonNameMap.contains(Key: *CommonName))
406 llvm::append_range(C&: FuncAddrs, R&: LTOCommonNameMap[*CommonName]);
407 } else {
408 FuncAddrs.push_back(x: BD->getAddress());
409 }
410
411 if (FuncAddrs.empty()) {
412 if (opts::Verbosity >= 1)
413 BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
414 << "for " << Function << "\n";
415 ++InvalidEntries;
416 continue;
417 }
418
419 for (const uint64_t FuncAddr : FuncAddrs) {
420 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(Address: FuncAddr);
421 assert(FuncBD);
422
423 BinaryFunction *BF = BC.getFunctionForSymbol(Symbol: FuncBD->getSymbol());
424 if (!BF) {
425 if (opts::Verbosity >= 1)
426 BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
427 << "for " << Function << "\n";
428 ++InvalidEntries;
429 break;
430 }
431 if (!BF->hasValidIndex())
432 BF->setIndex(Index++);
433 else if (opts::Verbosity > 0)
434 BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
435 << "\n";
436 }
437 }
438 if (InvalidEntries)
439 BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
440 << InvalidEntries << " entries in -function-order list\n";
441 } break;
442
443 default:
444 llvm_unreachable("unexpected layout type");
445 }
446
447 reorder(BC, Clusters: std::move(Clusters), BFs);
448
449 BC.HasFinalizedFunctionOrder = true;
450
451 std::unique_ptr<std::ofstream> FuncsFile;
452 if (!opts::GenerateFunctionOrderFile.empty()) {
453 FuncsFile = std::make_unique<std::ofstream>(args&: opts::GenerateFunctionOrderFile,
454 args: std::ios::out);
455 if (!FuncsFile) {
456 BC.errs() << "BOLT-ERROR: ordered functions file "
457 << opts::GenerateFunctionOrderFile << " cannot be opened\n";
458 return createFatalBOLTError(S: "");
459 }
460 }
461
462 std::unique_ptr<std::ofstream> LinkSectionsFile;
463 if (!opts::LinkSectionsFile.empty()) {
464 LinkSectionsFile =
465 std::make_unique<std::ofstream>(args&: opts::LinkSectionsFile, args: std::ios::out);
466 if (!LinkSectionsFile) {
467 BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
468 << " cannot be opened\n";
469 return createFatalBOLTError(S: "");
470 }
471 }
472
473 if (FuncsFile || LinkSectionsFile) {
474 std::vector<BinaryFunction *> SortedFunctions(BFs.size());
475 llvm::transform(Range: llvm::make_second_range(c&: BFs), d_first: SortedFunctions.begin(),
476 F: [](BinaryFunction &BF) { return &BF; });
477
478 // Sort functions by index.
479 llvm::stable_sort(Range&: SortedFunctions, C: compareBinaryFunctionByIndex);
480
481 for (const BinaryFunction *Func : SortedFunctions) {
482 if (!Func->hasValidIndex())
483 break;
484 if (Func->isPLTFunction())
485 continue;
486
487 if (FuncsFile)
488 *FuncsFile << Func->getOneName().str() << '\n';
489
490 if (LinkSectionsFile) {
491 const char *Indent = "";
492 std::vector<StringRef> AllNames = Func->getNames();
493 llvm::sort(C&: AllNames);
494 for (StringRef Name : AllNames) {
495 const size_t SlashPos = Name.find(C: '/');
496 if (SlashPos != std::string::npos) {
497 // Avoid duplicates for local functions.
498 if (Name.find(C: '/', From: SlashPos + 1) != std::string::npos)
499 continue;
500 Name = Name.substr(Start: 0, N: SlashPos);
501 }
502 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
503 Indent = " ";
504 }
505 }
506 }
507
508 if (FuncsFile) {
509 FuncsFile->close();
510 BC.outs() << "BOLT-INFO: dumped function order to "
511 << opts::GenerateFunctionOrderFile << '\n';
512 }
513
514 if (LinkSectionsFile) {
515 LinkSectionsFile->close();
516 BC.outs() << "BOLT-INFO: dumped linker section order to "
517 << opts::LinkSectionsFile << '\n';
518 }
519 }
520 return Error::success();
521}
522
523} // namespace bolt
524} // namespace llvm
525

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of bolt/lib/Passes/ReorderFunctions.cpp