| 1 | //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements ReorderFunctions class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Passes/ReorderFunctions.h" |
| 14 | #include "bolt/Passes/HFSort.h" |
| 15 | #include "bolt/Utils/Utils.h" |
| 16 | #include "llvm/ADT/STLExtras.h" |
| 17 | #include "llvm/Support/CommandLine.h" |
| 18 | #include "llvm/Transforms/Utils/CodeLayout.h" |
| 19 | #include <fstream> |
| 20 | |
| 21 | #define DEBUG_TYPE "hfsort" |
| 22 | |
| 23 | using namespace llvm; |
| 24 | |
| 25 | namespace opts { |
| 26 | |
| 27 | extern cl::OptionCategory BoltOptCategory; |
| 28 | extern cl::opt<unsigned> Verbosity; |
| 29 | extern cl::opt<uint32_t> RandomSeed; |
| 30 | |
| 31 | extern size_t padFunctionBefore(const bolt::BinaryFunction &Function); |
| 32 | extern size_t padFunctionAfter(const bolt::BinaryFunction &Function); |
| 33 | |
| 34 | extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; |
| 35 | cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions( |
| 36 | "reorder-functions" , |
| 37 | cl::desc("reorder and cluster functions (works only with relocations)" ), |
| 38 | cl::init(Val: bolt::ReorderFunctions::RT_NONE), |
| 39 | cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none" , |
| 40 | "do not reorder functions" ), |
| 41 | clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count" , |
| 42 | "order by execution count" ), |
| 43 | clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort" , |
| 44 | "use hfsort algorithm" ), |
| 45 | clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+" , |
| 46 | "use cache-directed sort" ), |
| 47 | clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort" , |
| 48 | "use cache-directed sort" ), |
| 49 | clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, |
| 50 | "pettis-hansen" , "use Pettis-Hansen algorithm" ), |
| 51 | clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random" , |
| 52 | "reorder functions randomly" ), |
| 53 | clEnumValN(bolt::ReorderFunctions::RT_USER, "user" , |
| 54 | "use function order specified by -function-order" )), |
| 55 | cl::ZeroOrMore, cl::cat(BoltOptCategory), |
| 56 | cl::callback(CB: [](const bolt::ReorderFunctions::ReorderType &option) { |
| 57 | if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) { |
| 58 | errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated," |
| 59 | << " please use '-reorder-functions=cdsort' instead\n" ; |
| 60 | ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT; |
| 61 | } |
| 62 | })); |
| 63 | |
| 64 | static cl::opt<bool> ReorderFunctionsUseHotSize( |
| 65 | "reorder-functions-use-hot-size" , |
| 66 | cl::desc("use a function's hot size when doing clustering" ), cl::init(Val: true), |
| 67 | cl::cat(BoltOptCategory)); |
| 68 | |
| 69 | static cl::opt<std::string> FunctionOrderFile( |
| 70 | "function-order" , |
| 71 | cl::desc("file containing an ordered list of functions to use for function " |
| 72 | "reordering" ), |
| 73 | cl::cat(BoltOptCategory)); |
| 74 | |
| 75 | static cl::opt<std::string> GenerateFunctionOrderFile( |
| 76 | "generate-function-order" , |
| 77 | cl::desc("file to dump the ordered list of functions to use for function " |
| 78 | "reordering" ), |
| 79 | cl::cat(BoltOptCategory)); |
| 80 | |
| 81 | static cl::opt<std::string> LinkSectionsFile( |
| 82 | "generate-link-sections" , |
| 83 | cl::desc("generate a list of function sections in a format suitable for " |
| 84 | "inclusion in a linker script" ), |
| 85 | cl::cat(BoltOptCategory)); |
| 86 | |
| 87 | static cl::opt<bool> |
| 88 | UseEdgeCounts("use-edge-counts" , |
| 89 | cl::desc("use edge count data when doing clustering" ), |
| 90 | cl::init(Val: true), cl::cat(BoltOptCategory)); |
| 91 | |
| 92 | static cl::opt<bool> CgFromPerfData( |
| 93 | "cg-from-perf-data" , |
| 94 | cl::desc("use perf data directly when constructing the call graph" |
| 95 | " for stale functions" ), |
| 96 | cl::init(Val: true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); |
| 97 | |
| 98 | static cl::opt<bool> CgIgnoreRecursiveCalls( |
| 99 | "cg-ignore-recursive-calls" , |
| 100 | cl::desc("ignore recursive calls when constructing the call graph" ), |
| 101 | cl::init(Val: true), cl::cat(BoltOptCategory)); |
| 102 | |
| 103 | static cl::opt<bool> CgUseSplitHotSize( |
| 104 | "cg-use-split-hot-size" , |
| 105 | cl::desc("use hot/cold data on basic blocks to determine hot sizes for " |
| 106 | "call graph functions" ), |
| 107 | cl::init(Val: false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); |
| 108 | |
| 109 | } // namespace opts |
| 110 | |
| 111 | namespace llvm { |
| 112 | namespace bolt { |
| 113 | |
| 114 | using NodeId = CallGraph::NodeId; |
| 115 | using Arc = CallGraph::Arc; |
| 116 | using Node = CallGraph::Node; |
| 117 | |
| 118 | void ReorderFunctions::reorder(BinaryContext &BC, |
| 119 | std::vector<Cluster> &&Clusters, |
| 120 | std::map<uint64_t, BinaryFunction> &BFs) { |
| 121 | std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats |
| 122 | uint64_t TotalSize = 0; |
| 123 | uint32_t Index = 0; |
| 124 | |
| 125 | // Set order of hot functions based on clusters. |
| 126 | for (const Cluster &Cluster : Clusters) { |
| 127 | for (const NodeId FuncId : Cluster.targets()) { |
| 128 | Cg.nodeIdToFunc(Id: FuncId)->setIndex(Index++); |
| 129 | FuncAddr[FuncId] = TotalSize; |
| 130 | TotalSize += Cg.size(Id: FuncId); |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | // Assign valid index for functions with valid profile. |
| 135 | for (auto &It : BFs) { |
| 136 | BinaryFunction &BF = It.second; |
| 137 | if (!BF.hasValidIndex() && BF.hasValidProfile()) |
| 138 | BF.setIndex(Index++); |
| 139 | } |
| 140 | |
| 141 | if (opts::ReorderFunctions == RT_NONE) |
| 142 | return; |
| 143 | |
| 144 | printStats(BC, Clusters, FuncAddr); |
| 145 | } |
| 146 | |
| 147 | void ReorderFunctions::printStats(BinaryContext &BC, |
| 148 | const std::vector<Cluster> &Clusters, |
| 149 | const std::vector<uint64_t> &FuncAddr) { |
| 150 | if (opts::Verbosity == 0) { |
| 151 | #ifndef NDEBUG |
| 152 | if (!DebugFlag || !isCurrentDebugType(Type: "hfsort" )) |
| 153 | return; |
| 154 | #else |
| 155 | return; |
| 156 | #endif |
| 157 | } |
| 158 | |
| 159 | bool PrintDetailed = opts::Verbosity > 1; |
| 160 | #ifndef NDEBUG |
| 161 | PrintDetailed |= |
| 162 | (DebugFlag && isCurrentDebugType(Type: "hfsort" ) && opts::Verbosity > 0); |
| 163 | #endif |
| 164 | uint64_t TotalSize = 0; |
| 165 | uint64_t CurPage = 0; |
| 166 | uint64_t Hotfuncs = 0; |
| 167 | double TotalDistance = 0; |
| 168 | double TotalCalls = 0; |
| 169 | double TotalCalls64B = 0; |
| 170 | double TotalCalls4KB = 0; |
| 171 | double TotalCalls2MB = 0; |
| 172 | if (PrintDetailed) |
| 173 | BC.outs() << "BOLT-INFO: Function reordering page layout\n" |
| 174 | << "BOLT-INFO: ============== page 0 ==============\n" ; |
| 175 | for (const Cluster &Cluster : Clusters) { |
| 176 | if (PrintDetailed) |
| 177 | BC.outs() << format( |
| 178 | Fmt: "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n" , |
| 179 | Vals: Cluster.density(), Vals: Cluster.samples(), Vals: Cluster.size()); |
| 180 | |
| 181 | for (NodeId FuncId : Cluster.targets()) { |
| 182 | if (Cg.samples(Id: FuncId) > 0) { |
| 183 | Hotfuncs++; |
| 184 | |
| 185 | if (PrintDetailed) |
| 186 | BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(Id: FuncId) |
| 187 | << " (" << Cg.size(Id: FuncId) << ")\n" ; |
| 188 | |
| 189 | uint64_t Dist = 0; |
| 190 | uint64_t Calls = 0; |
| 191 | for (NodeId Dst : Cg.successors(Id: FuncId)) { |
| 192 | if (FuncId == Dst) // ignore recursive calls in stats |
| 193 | continue; |
| 194 | const Arc &Arc = *Cg.findArc(Src: FuncId, Dst); |
| 195 | const auto D = std::abs(x: FuncAddr[Arc.dst()] - |
| 196 | (FuncAddr[FuncId] + Arc.avgCallOffset())); |
| 197 | const double W = Arc.weight(); |
| 198 | if (D < 64 && PrintDetailed && opts::Verbosity > 2) |
| 199 | BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n" |
| 200 | << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(Id: FuncId) |
| 201 | << "\n" |
| 202 | << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Id: Dst) << "\n" |
| 203 | << "BOLT-INFO: Weight = " << W << "\n" |
| 204 | << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() |
| 205 | << "\n" ; |
| 206 | Calls += W; |
| 207 | if (D < 64) |
| 208 | TotalCalls64B += W; |
| 209 | if (D < 4096) |
| 210 | TotalCalls4KB += W; |
| 211 | if (D < (2 << 20)) |
| 212 | TotalCalls2MB += W; |
| 213 | Dist += Arc.weight() * D; |
| 214 | if (PrintDetailed) |
| 215 | BC.outs() << format(Fmt: "BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " |
| 216 | "weight = %.0lf, callDist = %f\n" , |
| 217 | Vals: Arc.src(), Vals: FuncAddr[Arc.src()], |
| 218 | Vals: Arc.avgCallOffset(), Vals: Arc.dst(), |
| 219 | Vals: FuncAddr[Arc.dst()], Vals: Arc.weight(), Vals: D); |
| 220 | } |
| 221 | TotalCalls += Calls; |
| 222 | TotalDistance += Dist; |
| 223 | TotalSize += Cg.size(Id: FuncId); |
| 224 | |
| 225 | if (PrintDetailed) { |
| 226 | BC.outs() << format(Fmt: "BOLT-INFO: start = %6u : avgCallDist = %lu : " , |
| 227 | Vals: TotalSize, Vals: Calls ? Dist / Calls : 0) |
| 228 | << Cg.nodeIdToFunc(Id: FuncId)->getPrintName() << '\n'; |
| 229 | const uint64_t NewPage = TotalSize / HugePageSize; |
| 230 | if (NewPage != CurPage) { |
| 231 | CurPage = NewPage; |
| 232 | BC.outs() << format( |
| 233 | Fmt: "BOLT-INFO: ============== page %u ==============\n" , Vals: CurPage); |
| 234 | } |
| 235 | } |
| 236 | } |
| 237 | } |
| 238 | } |
| 239 | BC.outs() << "BOLT-INFO: Function reordering stats\n" |
| 240 | << format(Fmt: "BOLT-INFO: Number of hot functions: %u\n" |
| 241 | "BOLT-INFO: Number of clusters: %lu\n" , |
| 242 | Vals: Hotfuncs, Vals: Clusters.size()) |
| 243 | << format(Fmt: "BOLT-INFO: Final average call distance = %.1lf " |
| 244 | "(%.0lf / %.0lf)\n" , |
| 245 | Vals: TotalCalls ? TotalDistance / TotalCalls : 0, |
| 246 | Vals: TotalDistance, Vals: TotalCalls) |
| 247 | << format(Fmt: "BOLT-INFO: Total Calls = %.0lf\n" , Vals: TotalCalls); |
| 248 | if (TotalCalls) |
| 249 | BC.outs() |
| 250 | << format(Fmt: "BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n" , |
| 251 | Vals: TotalCalls64B, Vals: 100 * TotalCalls64B / TotalCalls) |
| 252 | << format(Fmt: "BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n" , |
| 253 | Vals: TotalCalls4KB, Vals: 100 * TotalCalls4KB / TotalCalls) |
| 254 | << format(Fmt: "BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n" , |
| 255 | Vals: TotalCalls2MB, Vals: 100 * TotalCalls2MB / TotalCalls); |
| 256 | } |
| 257 | |
| 258 | Error ReorderFunctions::readFunctionOrderFile( |
| 259 | std::vector<std::string> &FunctionNames) { |
| 260 | std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); |
| 261 | if (!FuncsFile) |
| 262 | return createFatalBOLTError(S: Twine("Ordered functions file \"" ) + |
| 263 | Twine(opts::FunctionOrderFile) + |
| 264 | Twine("\" can't be opened." )); |
| 265 | |
| 266 | std::string FuncName; |
| 267 | while (std::getline(is&: FuncsFile, str&: FuncName)) |
| 268 | FunctionNames.push_back(x: FuncName); |
| 269 | return Error::success(); |
| 270 | } |
| 271 | |
| 272 | Error ReorderFunctions::runOnFunctions(BinaryContext &BC) { |
| 273 | auto &BFs = BC.getBinaryFunctions(); |
| 274 | if (opts::ReorderFunctions != RT_NONE && |
| 275 | opts::ReorderFunctions != RT_EXEC_COUNT && |
| 276 | opts::ReorderFunctions != RT_USER) { |
| 277 | Cg = buildCallGraph( |
| 278 | BC, |
| 279 | Filter: [](const BinaryFunction &BF) { |
| 280 | if (!BF.hasProfile()) |
| 281 | return true; |
| 282 | if (BF.getState() != BinaryFunction::State::CFG) |
| 283 | return true; |
| 284 | return false; |
| 285 | }, |
| 286 | CgFromPerfData: opts::CgFromPerfData, |
| 287 | /*IncludeSplitCalls=*/false, UseFunctionHotSize: opts::ReorderFunctionsUseHotSize, |
| 288 | UseSplitHotSize: opts::CgUseSplitHotSize, UseEdgeCounts: opts::UseEdgeCounts, |
| 289 | IgnoreRecursiveCalls: opts::CgIgnoreRecursiveCalls); |
| 290 | Cg.normalizeArcWeights(); |
| 291 | } |
| 292 | |
| 293 | std::vector<Cluster> Clusters; |
| 294 | |
| 295 | switch (opts::ReorderFunctions) { |
| 296 | case RT_NONE: |
| 297 | break; |
| 298 | case RT_EXEC_COUNT: { |
| 299 | std::vector<BinaryFunction *> SortedFunctions(BFs.size()); |
| 300 | llvm::transform(Range: llvm::make_second_range(c&: BFs), d_first: SortedFunctions.begin(), |
| 301 | F: [](BinaryFunction &BF) { return &BF; }); |
| 302 | llvm::stable_sort(Range&: SortedFunctions, |
| 303 | C: [&](const BinaryFunction *A, const BinaryFunction *B) { |
| 304 | if (A->isIgnored()) |
| 305 | return false; |
| 306 | if (B->isIgnored()) |
| 307 | return true; |
| 308 | const size_t PadA = opts::padFunctionBefore(Function: *A) + |
| 309 | opts::padFunctionAfter(Function: *A); |
| 310 | const size_t PadB = opts::padFunctionBefore(Function: *B) + |
| 311 | opts::padFunctionAfter(Function: *B); |
| 312 | if (!PadA || !PadB) { |
| 313 | if (PadA) |
| 314 | return true; |
| 315 | if (PadB) |
| 316 | return false; |
| 317 | } |
| 318 | if (!A->hasProfile()) |
| 319 | return false; |
| 320 | if (!B->hasProfile()) |
| 321 | return true; |
| 322 | return A->getExecutionCount() > B->getExecutionCount(); |
| 323 | }); |
| 324 | uint32_t Index = 0; |
| 325 | for (BinaryFunction *BF : SortedFunctions) |
| 326 | if (BF->hasProfile()) { |
| 327 | BF->setIndex(Index++); |
| 328 | LLVM_DEBUG(if (opts::Verbosity > 1) { |
| 329 | dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " (" |
| 330 | << BF->getExecutionCount() << ")\n" ; |
| 331 | }); |
| 332 | } |
| 333 | } break; |
| 334 | case RT_HFSORT: |
| 335 | Clusters = clusterize(Cg); |
| 336 | break; |
| 337 | case RT_CDSORT: { |
| 338 | // It is required that the sum of incoming arc weights is not greater |
| 339 | // than the number of samples for every function. Ensuring the call graph |
| 340 | // obeys the property before running the algorithm. |
| 341 | Cg.adjustArcWeights(); |
| 342 | |
| 343 | // Initialize CFG nodes and their data |
| 344 | std::vector<uint64_t> FuncSizes; |
| 345 | std::vector<uint64_t> FuncCounts; |
| 346 | std::vector<codelayout::EdgeCount> CallCounts; |
| 347 | std::vector<uint64_t> CallOffsets; |
| 348 | for (NodeId F = 0; F < Cg.numNodes(); ++F) { |
| 349 | FuncSizes.push_back(x: Cg.size(Id: F)); |
| 350 | FuncCounts.push_back(x: Cg.samples(Id: F)); |
| 351 | for (NodeId Succ : Cg.successors(Id: F)) { |
| 352 | const Arc &Arc = *Cg.findArc(Src: F, Dst: Succ); |
| 353 | CallCounts.push_back(x: {.src: F, .dst: Succ, .count: uint64_t(Arc.weight())}); |
| 354 | CallOffsets.push_back(x: uint64_t(Arc.avgCallOffset())); |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | // Run the layout algorithm. |
| 359 | std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout( |
| 360 | FuncSizes, FuncCounts, CallCounts, CallOffsets); |
| 361 | |
| 362 | // Create a single cluster from the computed order of hot functions. |
| 363 | std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end()); |
| 364 | Clusters.emplace_back(args: Cluster(NodeOrder, Cg)); |
| 365 | } break; |
| 366 | case RT_PETTIS_HANSEN: |
| 367 | Clusters = pettisAndHansen(Cg); |
| 368 | break; |
| 369 | case RT_RANDOM: |
| 370 | std::srand(seed: opts::RandomSeed); |
| 371 | Clusters = randomClusters(Cg); |
| 372 | break; |
| 373 | case RT_USER: { |
| 374 | // Build LTOCommonNameMap |
| 375 | StringMap<std::vector<uint64_t>> LTOCommonNameMap; |
| 376 | for (const BinaryFunction &BF : llvm::make_second_range(c&: BFs)) |
| 377 | for (StringRef Name : BF.getNames()) |
| 378 | if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name)) |
| 379 | LTOCommonNameMap[*LTOCommonName].push_back(x: BF.getAddress()); |
| 380 | |
| 381 | uint32_t Index = 0; |
| 382 | uint32_t InvalidEntries = 0; |
| 383 | std::vector<std::string> FunctionNames; |
| 384 | if (Error E = readFunctionOrderFile(FunctionNames)) |
| 385 | return Error(std::move(E)); |
| 386 | |
| 387 | for (const std::string &Function : FunctionNames) { |
| 388 | std::vector<uint64_t> FuncAddrs; |
| 389 | |
| 390 | BinaryData *BD = BC.getBinaryDataByName(Name: Function); |
| 391 | if (!BD) { |
| 392 | // If we can't find the main symbol name, look for alternates. |
| 393 | uint32_t LocalID = 1; |
| 394 | while (true) { |
| 395 | const std::string FuncName = Function + "/" + std::to_string(val: LocalID); |
| 396 | BD = BC.getBinaryDataByName(Name: FuncName); |
| 397 | if (BD) |
| 398 | FuncAddrs.push_back(x: BD->getAddress()); |
| 399 | else |
| 400 | break; |
| 401 | LocalID++; |
| 402 | } |
| 403 | // Strip LTO suffixes |
| 404 | if (std::optional<StringRef> CommonName = getLTOCommonName(Name: Function)) |
| 405 | if (LTOCommonNameMap.contains(Key: *CommonName)) |
| 406 | llvm::append_range(C&: FuncAddrs, R&: LTOCommonNameMap[*CommonName]); |
| 407 | } else { |
| 408 | FuncAddrs.push_back(x: BD->getAddress()); |
| 409 | } |
| 410 | |
| 411 | if (FuncAddrs.empty()) { |
| 412 | if (opts::Verbosity >= 1) |
| 413 | BC.errs() << "BOLT-WARNING: Reorder functions: can't find function " |
| 414 | << "for " << Function << "\n" ; |
| 415 | ++InvalidEntries; |
| 416 | continue; |
| 417 | } |
| 418 | |
| 419 | for (const uint64_t FuncAddr : FuncAddrs) { |
| 420 | const BinaryData *FuncBD = BC.getBinaryDataAtAddress(Address: FuncAddr); |
| 421 | assert(FuncBD); |
| 422 | |
| 423 | BinaryFunction *BF = BC.getFunctionForSymbol(Symbol: FuncBD->getSymbol()); |
| 424 | if (!BF) { |
| 425 | if (opts::Verbosity >= 1) |
| 426 | BC.errs() << "BOLT-WARNING: Reorder functions: can't find function " |
| 427 | << "for " << Function << "\n" ; |
| 428 | ++InvalidEntries; |
| 429 | break; |
| 430 | } |
| 431 | if (!BF->hasValidIndex()) |
| 432 | BF->setIndex(Index++); |
| 433 | else if (opts::Verbosity > 0) |
| 434 | BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function |
| 435 | << "\n" ; |
| 436 | } |
| 437 | } |
| 438 | if (InvalidEntries) |
| 439 | BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for " |
| 440 | << InvalidEntries << " entries in -function-order list\n" ; |
| 441 | } break; |
| 442 | |
| 443 | default: |
| 444 | llvm_unreachable("unexpected layout type" ); |
| 445 | } |
| 446 | |
| 447 | reorder(BC, Clusters: std::move(Clusters), BFs); |
| 448 | |
| 449 | BC.HasFinalizedFunctionOrder = true; |
| 450 | |
| 451 | std::unique_ptr<std::ofstream> FuncsFile; |
| 452 | if (!opts::GenerateFunctionOrderFile.empty()) { |
| 453 | FuncsFile = std::make_unique<std::ofstream>(args&: opts::GenerateFunctionOrderFile, |
| 454 | args: std::ios::out); |
| 455 | if (!FuncsFile) { |
| 456 | BC.errs() << "BOLT-ERROR: ordered functions file " |
| 457 | << opts::GenerateFunctionOrderFile << " cannot be opened\n" ; |
| 458 | return createFatalBOLTError(S: "" ); |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | std::unique_ptr<std::ofstream> LinkSectionsFile; |
| 463 | if (!opts::LinkSectionsFile.empty()) { |
| 464 | LinkSectionsFile = |
| 465 | std::make_unique<std::ofstream>(args&: opts::LinkSectionsFile, args: std::ios::out); |
| 466 | if (!LinkSectionsFile) { |
| 467 | BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile |
| 468 | << " cannot be opened\n" ; |
| 469 | return createFatalBOLTError(S: "" ); |
| 470 | } |
| 471 | } |
| 472 | |
| 473 | if (FuncsFile || LinkSectionsFile) { |
| 474 | std::vector<BinaryFunction *> SortedFunctions(BFs.size()); |
| 475 | llvm::transform(Range: llvm::make_second_range(c&: BFs), d_first: SortedFunctions.begin(), |
| 476 | F: [](BinaryFunction &BF) { return &BF; }); |
| 477 | |
| 478 | // Sort functions by index. |
| 479 | llvm::stable_sort(Range&: SortedFunctions, C: compareBinaryFunctionByIndex); |
| 480 | |
| 481 | for (const BinaryFunction *Func : SortedFunctions) { |
| 482 | if (!Func->hasValidIndex()) |
| 483 | break; |
| 484 | if (Func->isPLTFunction()) |
| 485 | continue; |
| 486 | |
| 487 | if (FuncsFile) |
| 488 | *FuncsFile << Func->getOneName().str() << '\n'; |
| 489 | |
| 490 | if (LinkSectionsFile) { |
| 491 | const char *Indent = "" ; |
| 492 | std::vector<StringRef> AllNames = Func->getNames(); |
| 493 | llvm::sort(C&: AllNames); |
| 494 | for (StringRef Name : AllNames) { |
| 495 | const size_t SlashPos = Name.find(C: '/'); |
| 496 | if (SlashPos != std::string::npos) { |
| 497 | // Avoid duplicates for local functions. |
| 498 | if (Name.find(C: '/', From: SlashPos + 1) != std::string::npos) |
| 499 | continue; |
| 500 | Name = Name.substr(Start: 0, N: SlashPos); |
| 501 | } |
| 502 | *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; |
| 503 | Indent = " " ; |
| 504 | } |
| 505 | } |
| 506 | } |
| 507 | |
| 508 | if (FuncsFile) { |
| 509 | FuncsFile->close(); |
| 510 | BC.outs() << "BOLT-INFO: dumped function order to " |
| 511 | << opts::GenerateFunctionOrderFile << '\n'; |
| 512 | } |
| 513 | |
| 514 | if (LinkSectionsFile) { |
| 515 | LinkSectionsFile->close(); |
| 516 | BC.outs() << "BOLT-INFO: dumped linker section order to " |
| 517 | << opts::LinkSectionsFile << '\n'; |
| 518 | } |
| 519 | } |
| 520 | return Error::success(); |
| 521 | } |
| 522 | |
| 523 | } // namespace bolt |
| 524 | } // namespace llvm |
| 525 | |