1//===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "bolt/Profile/YAMLProfileReader.h"
10#include "bolt/Core/BinaryBasicBlock.h"
11#include "bolt/Core/BinaryFunction.h"
12#include "bolt/Passes/MCF.h"
13#include "bolt/Profile/ProfileYAMLMapping.h"
14#include "bolt/Utils/NameResolver.h"
15#include "bolt/Utils/Utils.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/edit_distance.h"
18#include "llvm/Demangle/Demangle.h"
19#include "llvm/MC/MCPseudoProbe.h"
20#include "llvm/Support/CommandLine.h"
21
22using namespace llvm;
23
24namespace opts {
25
26extern cl::opt<unsigned> Verbosity;
27extern cl::OptionCategory BoltOptCategory;
28extern cl::opt<bool> InferStaleProfile;
29extern cl::opt<bool> Lite;
30
31static cl::opt<unsigned> NameSimilarityFunctionMatchingThreshold(
32 "name-similarity-function-matching-threshold",
33 cl::desc("Match functions using namespace and edit distance"), cl::init(Val: 0),
34 cl::Hidden, cl::cat(BoltOptCategory));
35
36static llvm::cl::opt<bool>
37 IgnoreHash("profile-ignore-hash",
38 cl::desc("ignore hash while reading function profile"),
39 cl::Hidden, cl::cat(BoltOptCategory));
40
41static llvm::cl::opt<bool>
42 MatchProfileWithFunctionHash("match-profile-with-function-hash",
43 cl::desc("Match profile with function hash"),
44 cl::Hidden, cl::cat(BoltOptCategory));
45static llvm::cl::opt<bool>
46 MatchWithCallGraph("match-with-call-graph",
47 cl::desc("Match functions with call graph"), cl::Hidden,
48 cl::cat(BoltOptCategory));
49
50llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
51 cl::desc("use DFS order for YAML profile"),
52 cl::Hidden, cl::cat(BoltOptCategory));
53
54extern llvm::cl::opt<bool> StaleMatchingWithPseudoProbes;
55} // namespace opts
56
57namespace llvm {
58namespace bolt {
59
60YAMLProfileReader::CallGraphMatcher::CallGraphMatcher(
61 BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
62 ProfileLookupMap &IdToYAMLBF) {
63 constructBFCG(BC, YamlBP);
64 constructYAMLFCG(YamlBP, IdToYAMLBF);
65 computeBFNeighborHashes(BC);
66}
67
68void YAMLProfileReader::CallGraphMatcher::constructBFCG(
69 BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP) {
70 for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
71 for (const BinaryBasicBlock &BB : BF->blocks()) {
72 for (const MCInst &Instr : BB) {
73 if (!BC.MIB->isCall(Inst: Instr))
74 continue;
75 const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Inst: Instr);
76 if (!CallSymbol)
77 continue;
78 BinaryData *BD = BC.getBinaryDataByName(Name: CallSymbol->getName());
79 if (!BD)
80 continue;
81 BinaryFunction *CalleeBF = BC.getFunctionForSymbol(Symbol: BD->getSymbol());
82 if (!CalleeBF)
83 continue;
84
85 BFAdjacencyMap[CalleeBF].insert(x: BF);
86 BFAdjacencyMap[BF].insert(x: CalleeBF);
87 }
88 }
89 }
90}
91
92void YAMLProfileReader::CallGraphMatcher::computeBFNeighborHashes(
93 BinaryContext &BC) {
94 for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
95 auto It = BFAdjacencyMap.find(Val: BF);
96 if (It == BFAdjacencyMap.end())
97 continue;
98 auto &AdjacentBFs = It->second;
99 std::string HashStr;
100 for (BinaryFunction *BF : AdjacentBFs)
101 HashStr += BF->getOneName();
102 uint64_t Hash = std::hash<std::string>{}(HashStr);
103 NeighborHashToBFs[Hash].push_back(x: BF);
104 }
105}
106
107void YAMLProfileReader::CallGraphMatcher::constructYAMLFCG(
108 yaml::bolt::BinaryProfile &YamlBP, ProfileLookupMap &IdToYAMLBF) {
109
110 for (auto &CallerYamlBF : YamlBP.Functions) {
111 for (auto &YamlBB : CallerYamlBF.Blocks) {
112 for (auto &CallSite : YamlBB.CallSites) {
113 auto IdToYAMLBFIt = IdToYAMLBF.find(Val: CallSite.DestId);
114 if (IdToYAMLBFIt == IdToYAMLBF.end())
115 continue;
116 YamlBFAdjacencyMap[&CallerYamlBF].insert(x: IdToYAMLBFIt->second);
117 YamlBFAdjacencyMap[IdToYAMLBFIt->second].insert(x: &CallerYamlBF);
118 }
119 }
120 }
121}
122
123bool YAMLProfileReader::isYAML(const StringRef Filename) {
124 if (auto MB = MemoryBuffer::getFileOrSTDIN(Filename)) {
125 StringRef Buffer = (*MB)->getBuffer();
126 return Buffer.starts_with(Prefix: "---\n");
127 } else {
128 report_error(Message: Filename, EC: MB.getError());
129 }
130 return false;
131}
132
133void YAMLProfileReader::buildNameMaps(BinaryContext &BC) {
134 auto lookupFunction = [&](StringRef Name) -> BinaryFunction * {
135 if (BinaryData *BD = BC.getBinaryDataByName(Name))
136 return BC.getFunctionForSymbol(Symbol: BD->getSymbol());
137 return nullptr;
138 };
139
140 ProfileBFs.reserve(n: YamlBP.Functions.size());
141
142 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
143 StringRef Name = YamlBF.Name;
144 const size_t Pos = Name.find(Str: "(*");
145 if (Pos != StringRef::npos)
146 Name = Name.substr(Start: 0, N: Pos);
147 ProfileFunctionNames.insert(key: Name);
148 ProfileBFs.push_back(x: lookupFunction(Name));
149 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
150 LTOCommonNameMap[*CommonName].push_back(x: &YamlBF);
151 }
152 for (auto &[Symbol, BF] : BC.SymbolToFunctionMap) {
153 StringRef Name = Symbol->getName();
154 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
155 LTOCommonNameFunctionMap[*CommonName].insert(x: BF);
156 }
157}
158
159bool YAMLProfileReader::hasLocalsWithFileName() const {
160 return llvm::any_of(Range: ProfileFunctionNames.keys(), P: [](StringRef FuncName) {
161 return FuncName.count(C: '/') == 2 && FuncName[0] != '/';
162 });
163}
164
165bool YAMLProfileReader::parseFunctionProfile(
166 BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
167 BinaryContext &BC = BF.getBinaryContext();
168
169 const bool IsDFSOrder = YamlBP.Header.IsDFSOrder;
170 const HashFunction HashFunction = YamlBP.Header.HashFunction;
171 bool ProfileMatched = true;
172 uint64_t MismatchedBlocks = 0;
173 uint64_t MismatchedCalls = 0;
174 uint64_t MismatchedEdges = 0;
175
176 uint64_t FunctionExecutionCount = 0;
177
178 BF.setExecutionCount(YamlBF.ExecCount);
179 BF.setExternEntryCount(YamlBF.ExternEntryCount);
180
181 uint64_t FuncRawBranchCount = 0;
182 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
183 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
184 FuncRawBranchCount += YamlSI.Count;
185 BF.setRawSampleCount(FuncRawBranchCount);
186
187 if (BF.empty())
188 return true;
189
190 if (!opts::IgnoreHash) {
191 if (!BF.getHash())
192 BF.computeHash(UseDFS: IsDFSOrder, HashFunction);
193 if (YamlBF.Hash != BF.getHash()) {
194 if (opts::Verbosity >= 1)
195 errs() << "BOLT-WARNING: function hash mismatch\n";
196 ProfileMatched = false;
197 }
198 }
199
200 if (YamlBF.NumBasicBlocks != BF.size()) {
201 if (opts::Verbosity >= 1)
202 errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
203 ProfileMatched = false;
204 }
205
206 BinaryFunction::BasicBlockOrderType Order;
207 if (IsDFSOrder)
208 llvm::copy(Range: BF.dfs(), Out: std::back_inserter(x&: Order));
209 else
210 llvm::copy(Range: BF.getLayout().blocks(), Out: std::back_inserter(x&: Order));
211
212 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
213 if (YamlBB.Index >= Order.size()) {
214 if (opts::Verbosity >= 2)
215 errs() << "BOLT-WARNING: index " << YamlBB.Index
216 << " is out of bounds\n";
217 ++MismatchedBlocks;
218 continue;
219 }
220
221 BinaryBasicBlock &BB = *Order[YamlBB.Index];
222
223 // Basic samples profile (without LBR) does not have branches information
224 // and needs a special processing.
225 if (YamlBP.Header.Flags & BinaryFunction::PF_BASIC) {
226 if (!YamlBB.EventCount) {
227 BB.setExecutionCount(0);
228 continue;
229 }
230 uint64_t NumSamples = YamlBB.EventCount * 1000;
231 if (NormalizeByInsnCount && BB.getNumNonPseudos())
232 NumSamples /= BB.getNumNonPseudos();
233 else if (NormalizeByCalls)
234 NumSamples /= BB.getNumCalls() + 1;
235
236 BB.setExecutionCount(NumSamples);
237 if (BB.isEntryPoint())
238 FunctionExecutionCount += NumSamples;
239 continue;
240 }
241
242 BB.setExecutionCount(YamlBB.ExecCount);
243
244 for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
245 BinaryFunction *Callee = YamlProfileToFunction.lookup(Val: YamlCSI.DestId);
246 bool IsFunction = Callee ? true : false;
247 MCSymbol *CalleeSymbol = nullptr;
248 if (IsFunction)
249 CalleeSymbol = Callee->getSymbolForEntryID(EntryNum: YamlCSI.EntryDiscriminator);
250
251 BF.getAllCallSites().emplace_back(Args&: CalleeSymbol, Args: YamlCSI.Count,
252 Args: YamlCSI.Mispreds, Args: YamlCSI.Offset);
253
254 if (YamlCSI.Offset >= BB.getOriginalSize()) {
255 if (opts::Verbosity >= 2)
256 errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
257 << " out of bounds in block " << BB.getName() << '\n';
258 ++MismatchedCalls;
259 continue;
260 }
261
262 MCInst *Instr =
263 BF.getInstructionAtOffset(Offset: BB.getInputOffset() + YamlCSI.Offset);
264 if (!Instr) {
265 if (opts::Verbosity >= 2)
266 errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
267 << " in block " << BB.getName() << '\n';
268 ++MismatchedCalls;
269 continue;
270 }
271 if (!BC.MIB->isCall(Inst: *Instr) && !BC.MIB->isIndirectBranch(Inst: *Instr)) {
272 if (opts::Verbosity >= 2)
273 errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
274 << " in block " << BB.getName() << '\n';
275 ++MismatchedCalls;
276 continue;
277 }
278
279 auto setAnnotation = [&](StringRef Name, uint64_t Count) {
280 if (BC.MIB->hasAnnotation(Inst: *Instr, Name)) {
281 if (opts::Verbosity >= 1)
282 errs() << "BOLT-WARNING: ignoring duplicate " << Name
283 << " info for offset 0x" << Twine::utohexstr(Val: YamlCSI.Offset)
284 << " in function " << BF << '\n';
285 return;
286 }
287 BC.MIB->addAnnotation(Inst&: *Instr, Name, Val: Count);
288 };
289
290 if (BC.MIB->isIndirectCall(Inst: *Instr) || BC.MIB->isIndirectBranch(Inst: *Instr)) {
291 auto &CSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
292 Inst&: *Instr, Name: "CallProfile");
293 CSP.emplace_back(Args&: CalleeSymbol, Args: YamlCSI.Count, Args: YamlCSI.Mispreds);
294 } else if (BC.MIB->getConditionalTailCall(Inst: *Instr)) {
295 setAnnotation("CTCTakenCount", YamlCSI.Count);
296 setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
297 } else {
298 setAnnotation("Count", YamlCSI.Count);
299 }
300 }
301
302 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
303 if (YamlSI.Index >= Order.size()) {
304 if (opts::Verbosity >= 1)
305 errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
306 ++MismatchedEdges;
307 continue;
308 }
309
310 BinaryBasicBlock *ToBB = Order[YamlSI.Index];
311 if (!BB.getSuccessor(Label: ToBB->getLabel())) {
312 // Allow passthrough blocks.
313 BinaryBasicBlock *FTSuccessor = BB.getConditionalSuccessor(Condition: false);
314 if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
315 FTSuccessor->getSuccessor(Label: ToBB->getLabel())) {
316 BinaryBasicBlock::BinaryBranchInfo &FTBI =
317 FTSuccessor->getBranchInfo(Succ: *ToBB);
318 FTBI.Count += YamlSI.Count;
319 FTBI.MispredictedCount += YamlSI.Mispreds;
320 ToBB = FTSuccessor;
321 } else {
322 if (opts::Verbosity >= 1)
323 errs() << "BOLT-WARNING: no successor for block " << BB.getName()
324 << " that matches index " << YamlSI.Index << " or block "
325 << ToBB->getName() << '\n';
326 ++MismatchedEdges;
327 continue;
328 }
329 }
330
331 BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(Succ: *ToBB);
332 BI.Count += YamlSI.Count;
333 BI.MispredictedCount += YamlSI.Mispreds;
334 }
335 }
336
337 // If basic block profile wasn't read it should be 0.
338 for (BinaryBasicBlock &BB : BF)
339 if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
340 BB.setExecutionCount(0);
341
342 if (YamlBP.Header.Flags & BinaryFunction::PF_BASIC)
343 BF.setExecutionCount(FunctionExecutionCount);
344
345 ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
346
347 if (!ProfileMatched) {
348 if (opts::Verbosity >= 1)
349 errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
350 << MismatchedCalls << " calls, and " << MismatchedEdges
351 << " edges in profile did not match function " << BF << '\n';
352
353 if (YamlBF.NumBasicBlocks != BF.size())
354 ++BC.Stats.NumStaleFuncsWithEqualBlockCount;
355
356 if (!opts::InferStaleProfile)
357 return false;
358 ArrayRef<ProbeMatchSpec> ProbeMatchSpecs;
359 auto BFIt = BFToProbeMatchSpecs.find(Val: &BF);
360 if (BFIt != BFToProbeMatchSpecs.end())
361 ProbeMatchSpecs = BFIt->second;
362 ProfileMatched = inferStaleProfile(Function&: BF, YamlBF, ProbeMatchSpecs);
363 }
364 if (ProfileMatched)
365 BF.markProfiled(Flags: YamlBP.Header.Flags);
366
367 return ProfileMatched;
368}
369
370Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
371 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
372 MemoryBuffer::getFileOrSTDIN(Filename);
373 if (std::error_code EC = MB.getError()) {
374 errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n";
375 return errorCodeToError(EC);
376 }
377 yaml::Input YamlInput(MB.get()->getBuffer());
378 YamlInput.setAllowUnknownKeys(true);
379
380 // Consume YAML file.
381 YamlInput >> YamlBP;
382 if (YamlInput.error()) {
383 errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
384 << " : " << YamlInput.error().message() << '\n';
385 return errorCodeToError(EC: YamlInput.error());
386 }
387
388 // Sanity check.
389 if (YamlBP.Header.Version != 1)
390 return make_error<StringError>(
391 Args: Twine("cannot read profile : unsupported version"),
392 Args: inconvertibleErrorCode());
393
394 if (YamlBP.Header.EventNames.find(c: ',') != StringRef::npos)
395 return make_error<StringError>(
396 Args: Twine("multiple events in profile are not supported"),
397 Args: inconvertibleErrorCode());
398
399 // Match profile to function based on a function name.
400 buildNameMaps(BC);
401
402 // Preliminary assign function execution count.
403 for (auto [YamlBF, BF] : llvm::zip_equal(t&: YamlBP.Functions, u&: ProfileBFs)) {
404 if (!BF)
405 continue;
406 if (!BF->hasProfile()) {
407 BF->setExecutionCount(YamlBF.ExecCount);
408 } else {
409 if (opts::Verbosity >= 1) {
410 errs() << "BOLT-WARNING: dropping duplicate profile for " << YamlBF.Name
411 << '\n';
412 }
413 BF = nullptr;
414 }
415 }
416
417 return Error::success();
418}
419
420bool YAMLProfileReader::profileMatches(
421 const yaml::bolt::BinaryFunctionProfile &Profile, const BinaryFunction &BF) {
422 if (opts::IgnoreHash)
423 return Profile.NumBasicBlocks == BF.size();
424 return Profile.Hash == static_cast<uint64_t>(BF.getHash());
425}
426
427bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
428 if (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)
429 return true;
430 for (StringRef Name : BF.getNames())
431 if (ProfileFunctionNames.contains(key: Name))
432 return true;
433 for (StringRef Name : BF.getNames()) {
434 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name)) {
435 if (LTOCommonNameMap.contains(Key: *CommonName))
436 return true;
437 }
438 }
439
440 return false;
441}
442
443size_t YAMLProfileReader::matchWithExactName() {
444 size_t MatchedWithExactName = 0;
445 // This first pass assigns profiles that match 100% by name and by hash.
446 for (auto [YamlBF, BF] : llvm::zip_equal(t&: YamlBP.Functions, u&: ProfileBFs)) {
447 if (!BF)
448 continue;
449 BinaryFunction &Function = *BF;
450 // Clear function call count that may have been set while pre-processing
451 // the profile.
452 Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
453
454 if (profileMatches(Profile: YamlBF, BF: Function)) {
455 matchProfileToFunction(YamlBF, BF&: Function);
456 ++MatchedWithExactName;
457 }
458 }
459 return MatchedWithExactName;
460}
461
462size_t YAMLProfileReader::matchWithHash(BinaryContext &BC) {
463 // Iterates through profiled functions to match the first binary function with
464 // the same exact hash. Serves to match identical, renamed functions.
465 // Collisions are possible where multiple functions share the same exact hash.
466 size_t MatchedWithHash = 0;
467 if (opts::MatchProfileWithFunctionHash) {
468 DenseMap<size_t, BinaryFunction *> StrictHashToBF;
469 StrictHashToBF.reserve(NumEntries: BC.getBinaryFunctions().size());
470
471 for (auto &[_, BF] : BC.getBinaryFunctions())
472 StrictHashToBF[BF.getHash()] = &BF;
473
474 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
475 if (YamlBF.Used)
476 continue;
477 auto It = StrictHashToBF.find(Val: YamlBF.Hash);
478 if (It != StrictHashToBF.end() && !ProfiledFunctions.count(x: It->second)) {
479 BinaryFunction *BF = It->second;
480 matchProfileToFunction(YamlBF, BF&: *BF);
481 ++MatchedWithHash;
482 }
483 }
484 }
485 return MatchedWithHash;
486}
487
488size_t YAMLProfileReader::matchWithLTOCommonName() {
489 // This second pass allows name ambiguity for LTO private functions.
490 size_t MatchedWithLTOCommonName = 0;
491 for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
492 if (!LTOCommonNameFunctionMap.contains(Key: CommonName))
493 continue;
494 std::unordered_set<BinaryFunction *> &Functions =
495 LTOCommonNameFunctionMap[CommonName];
496 // Return true if a given profile is matched to one of BinaryFunctions with
497 // matching LTO common name.
498 auto matchProfile = [&](yaml::bolt::BinaryFunctionProfile *YamlBF) {
499 if (YamlBF->Used)
500 return false;
501 for (BinaryFunction *BF : Functions) {
502 if (!ProfiledFunctions.count(x: BF) && profileMatches(Profile: *YamlBF, BF: *BF)) {
503 matchProfileToFunction(YamlBF&: *YamlBF, BF&: *BF);
504 ++MatchedWithLTOCommonName;
505 return true;
506 }
507 }
508 return false;
509 };
510 bool ProfileMatched = llvm::any_of(Range: LTOProfiles, P: matchProfile);
511
512 // If there's only one function with a given name, try to match it
513 // partially.
514 if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
515 !LTOProfiles.front()->Used &&
516 !ProfiledFunctions.count(x: *Functions.begin())) {
517 matchProfileToFunction(YamlBF&: *LTOProfiles.front(), BF&: **Functions.begin());
518 ++MatchedWithLTOCommonName;
519 }
520 }
521 return MatchedWithLTOCommonName;
522}
523
524size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
525 if (!opts::MatchWithCallGraph)
526 return 0;
527
528 size_t MatchedWithCallGraph = 0;
529 CallGraphMatcher CGMatcher(BC, YamlBP, IdToYamLBF);
530
531 ItaniumPartialDemangler Demangler;
532 auto GetBaseName = [&](std::string &FunctionName) {
533 if (Demangler.partialDemangle(MangledName: FunctionName.c_str()))
534 return std::string("");
535 size_t BufferSize = 1;
536 char *Buffer = static_cast<char *>(std::malloc(size: BufferSize));
537 char *BaseName = Demangler.getFunctionBaseName(Buf: Buffer, N: &BufferSize);
538 if (!BaseName) {
539 std::free(ptr: Buffer);
540 return std::string("");
541 }
542 if (Buffer != BaseName)
543 Buffer = BaseName;
544 std::string BaseNameStr(Buffer, BufferSize);
545 std::free(ptr: Buffer);
546 return BaseNameStr;
547 };
548
549 // Matches YAMLBF to BFs with neighbor hashes.
550 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
551 if (YamlBF.Used)
552 continue;
553 auto AdjacentYamlBFsOpt = CGMatcher.getAdjacentYamlBFs(YamlBF);
554 if (!AdjacentYamlBFsOpt)
555 continue;
556 std::set<yaml::bolt::BinaryFunctionProfile *> AdjacentYamlBFs =
557 AdjacentYamlBFsOpt.value();
558 std::string AdjacentYamlBFsHashStr;
559 for (auto *AdjacentYamlBF : AdjacentYamlBFs)
560 AdjacentYamlBFsHashStr += AdjacentYamlBF->Name;
561 uint64_t Hash = std::hash<std::string>{}(AdjacentYamlBFsHashStr);
562 auto BFsWithSameHashOpt = CGMatcher.getBFsWithNeighborHash(NeighborHash: Hash);
563 if (!BFsWithSameHashOpt)
564 continue;
565 std::vector<BinaryFunction *> BFsWithSameHash = BFsWithSameHashOpt.value();
566 // Finds the binary function with the longest common prefix to the profiled
567 // function and matches.
568 BinaryFunction *ClosestBF = nullptr;
569 size_t LCP = 0;
570 std::string YamlBFBaseName = GetBaseName(YamlBF.Name);
571 for (BinaryFunction *BF : BFsWithSameHash) {
572 if (ProfiledFunctions.count(x: BF))
573 continue;
574 std::string BFName = std::string(BF->getOneName());
575 std::string BFBaseName = GetBaseName(BFName);
576 size_t PrefixLength = 0;
577 size_t N = std::min(a: YamlBFBaseName.size(), b: BFBaseName.size());
578 for (size_t I = 0; I < N; ++I) {
579 if (YamlBFBaseName[I] != BFBaseName[I])
580 break;
581 ++PrefixLength;
582 }
583 if (PrefixLength >= LCP) {
584 LCP = PrefixLength;
585 ClosestBF = BF;
586 }
587 }
588 if (ClosestBF) {
589 matchProfileToFunction(YamlBF, BF&: *ClosestBF);
590 ++MatchedWithCallGraph;
591 }
592 }
593
594 return MatchedWithCallGraph;
595}
596
597size_t YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
598 const MCPseudoProbeDecoder &Decoder,
599 const std::vector<yaml::bolt::InlineTreeNode> &DecodedInlineTree,
600 const MCDecodedPseudoProbeInlineTree *Root) {
601 // Match inline tree nodes by GUID, checksum, parent, and call site.
602 for (const auto &[InlineTreeNodeId, InlineTreeNode] :
603 llvm::enumerate(First: DecodedInlineTree)) {
604 uint64_t GUID = InlineTreeNode.GUID;
605 uint64_t Hash = InlineTreeNode.Hash;
606 uint32_t ParentId = InlineTreeNode.ParentIndexDelta;
607 uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
608 const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
609 if (!InlineTreeNodeId) {
610 Cur = Root;
611 } else if (const MCDecodedPseudoProbeInlineTree *Parent =
612 getInlineTreeNode(ProfileInlineTreeNodeId: ParentId)) {
613 for (const MCDecodedPseudoProbeInlineTree &Child :
614 Parent->getChildren()) {
615 if (Child.Guid == GUID) {
616 if (std::get<1>(t: Child.getInlineSite()) == CallSiteProbe)
617 Cur = &Child;
618 break;
619 }
620 }
621 }
622 // Don't match nodes if the profile is stale (mismatching binary FuncHash
623 // and YAML Hash)
624 if (Cur && Decoder.getFuncDescForGUID(GUID: Cur->Guid)->FuncHash == Hash)
625 mapInlineTreeNode(ProfileNodeIdx: InlineTreeNodeId, BinaryNode: Cur);
626 }
627 return Map.size();
628}
629
630// Decode index deltas and indirection through \p YamlPD. Return modified copy
631// of \p YamlInlineTree with populated decoded fields (GUID, Hash, ParentIndex).
632static std::vector<yaml::bolt::InlineTreeNode>
633decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD,
634 std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree) {
635 uint32_t ParentId = 0;
636 uint32_t PrevGUIDIdx = 0;
637 for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
638 uint32_t GUIDIdx = InlineTreeNode.GUIDIndex;
639 if (GUIDIdx != UINT32_MAX)
640 PrevGUIDIdx = GUIDIdx;
641 else
642 GUIDIdx = PrevGUIDIdx;
643 uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
644 ParentId += InlineTreeNode.ParentIndexDelta;
645 InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx];
646 InlineTreeNode.Hash = YamlPD.Hash[HashIdx];
647 InlineTreeNode.ParentIndexDelta = ParentId;
648 }
649 return YamlInlineTree;
650}
651
652size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) {
653 if (!opts::StaleMatchingWithPseudoProbes)
654 return 0;
655
656 const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
657 const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc;
658
659 // Set existing BF->YamlBF match into ProbeMatchSpecs for (local) probe
660 // matching.
661 assert(Decoder &&
662 "If pseudo probes are in use, pseudo probe decoder should exist");
663 for (auto [YamlBF, BF] : llvm::zip_equal(t&: YamlBP.Functions, u&: ProfileBFs)) {
664 // BF is preliminary name-matched function to YamlBF
665 // MatchedBF is final matched function
666 BinaryFunction *MatchedBF = YamlProfileToFunction.lookup(Val: YamlBF.Id);
667 if (!BF)
668 BF = MatchedBF;
669 if (!BF)
670 continue;
671 uint64_t GUID = BF->getGUID();
672 if (!GUID)
673 continue;
674 auto It = TopLevelGUIDToInlineTree.find(x: GUID);
675 if (It == TopLevelGUIDToInlineTree.end())
676 continue;
677 const MCDecodedPseudoProbeInlineTree *Node = It->second;
678 assert(Node && "Malformed TopLevelGUIDToInlineTree");
679 auto &MatchSpecs = BFToProbeMatchSpecs[BF];
680 auto &InlineTreeMap =
681 MatchSpecs.emplace_back(args: InlineTreeNodeMapTy(), args&: YamlBF).first;
682 std::vector<yaml::bolt::InlineTreeNode> ProfileInlineTree =
683 decodeYamlInlineTree(YamlPD, YamlInlineTree: YamlBF.InlineTree);
684 // Erase unsuccessful match
685 if (!InlineTreeMap.matchInlineTrees(Decoder: *Decoder, DecodedInlineTree: ProfileInlineTree, Root: Node))
686 MatchSpecs.pop_back();
687 }
688
689 return 0;
690}
691
692size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
693 if (opts::NameSimilarityFunctionMatchingThreshold == 0)
694 return 0;
695
696 size_t MatchedWithNameSimilarity = 0;
697 ItaniumPartialDemangler Demangler;
698
699 // Demangle and derive namespace from function name.
700 auto DemangleName = [&](std::string &FunctionName) {
701 StringRef RestoredName = NameResolver::restore(Name: FunctionName);
702 return demangle(MangledName: RestoredName);
703 };
704 auto DeriveNameSpace = [&](std::string &DemangledName) {
705 if (Demangler.partialDemangle(MangledName: DemangledName.c_str()))
706 return std::string("");
707 std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
708 size_t BufferSize;
709 char *NameSpace =
710 Demangler.getFunctionDeclContextName(Buf: &Buffer[0], N: &BufferSize);
711 return std::string(NameSpace, BufferSize);
712 };
713
714 // Maps namespaces to associated function block counts and gets profile
715 // function names and namespaces to minimize the number of BFs to process and
716 // avoid repeated name demangling/namespace derivation.
717 StringMap<std::set<uint32_t>> NamespaceToProfiledBFSizes;
718 std::vector<std::string> ProfileBFDemangledNames;
719 ProfileBFDemangledNames.reserve(n: YamlBP.Functions.size());
720 std::vector<std::string> ProfiledBFNamespaces;
721 ProfiledBFNamespaces.reserve(n: YamlBP.Functions.size());
722
723 for (auto &YamlBF : YamlBP.Functions) {
724 std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
725 ProfileBFDemangledNames.push_back(x: YamlBFDemangledName);
726 std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
727 ProfiledBFNamespaces.push_back(x: YamlBFNamespace);
728 NamespaceToProfiledBFSizes[YamlBFNamespace].insert(x: YamlBF.NumBasicBlocks);
729 }
730
731 StringMap<std::vector<BinaryFunction *>> NamespaceToBFs;
732
733 // Maps namespaces to BFs excluding binary functions with no equal sized
734 // profiled functions belonging to the same namespace.
735 for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
736 std::string DemangledName = BF->getDemangledName();
737 std::string Namespace = DeriveNameSpace(DemangledName);
738
739 auto NamespaceToProfiledBFSizesIt =
740 NamespaceToProfiledBFSizes.find(Key: Namespace);
741 // Skip if there are no ProfileBFs with a given \p Namespace.
742 if (NamespaceToProfiledBFSizesIt == NamespaceToProfiledBFSizes.end())
743 continue;
744 // Skip if there are no ProfileBFs in a given \p Namespace with
745 // equal number of blocks.
746 if (NamespaceToProfiledBFSizesIt->second.count(x: BF->size()) == 0)
747 continue;
748 NamespaceToBFs[Namespace].push_back(x: BF);
749 }
750
751 // Iterates through all profiled functions and binary functions belonging to
752 // the same namespace and matches based on edit distance threshold.
753 assert(YamlBP.Functions.size() == ProfiledBFNamespaces.size() &&
754 ProfiledBFNamespaces.size() == ProfileBFDemangledNames.size());
755 for (size_t I = 0; I < YamlBP.Functions.size(); ++I) {
756 yaml::bolt::BinaryFunctionProfile &YamlBF = YamlBP.Functions[I];
757 std::string &YamlBFNamespace = ProfiledBFNamespaces[I];
758 if (YamlBF.Used)
759 continue;
760 // Skip if there are no BFs in a given \p Namespace.
761 auto It = NamespaceToBFs.find(Key: YamlBFNamespace);
762 if (It == NamespaceToBFs.end())
763 continue;
764
765 std::string &YamlBFDemangledName = ProfileBFDemangledNames[I];
766 std::vector<BinaryFunction *> BFs = It->second;
767 unsigned MinEditDistance = UINT_MAX;
768 BinaryFunction *ClosestNameBF = nullptr;
769
770 // Determines BF the closest to the profiled function, in the
771 // same namespace.
772 for (BinaryFunction *BF : BFs) {
773 if (ProfiledFunctions.count(x: BF))
774 continue;
775 if (BF->size() != YamlBF.NumBasicBlocks)
776 continue;
777 std::string BFDemangledName = BF->getDemangledName();
778 unsigned BFEditDistance =
779 StringRef(BFDemangledName).edit_distance(Other: YamlBFDemangledName);
780 if (BFEditDistance < MinEditDistance) {
781 MinEditDistance = BFEditDistance;
782 ClosestNameBF = BF;
783 }
784 }
785
786 if (ClosestNameBF &&
787 MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
788 matchProfileToFunction(YamlBF, BF&: *ClosestNameBF);
789 ++MatchedWithNameSimilarity;
790 }
791 }
792
793 return MatchedWithNameSimilarity;
794}
795
796Error YAMLProfileReader::readProfile(BinaryContext &BC) {
797 if (opts::Verbosity >= 1) {
798 outs() << "BOLT-INFO: YAML profile with hash: ";
799 switch (YamlBP.Header.HashFunction) {
800 case HashFunction::StdHash:
801 outs() << "std::hash\n";
802 break;
803 case HashFunction::XXH3:
804 outs() << "xxh3\n";
805 break;
806 }
807 }
808 YamlProfileToFunction.reserve(NumEntries: YamlBP.Functions.size());
809
810 // Computes hash for binary functions.
811 if (opts::MatchProfileWithFunctionHash) {
812 for (auto &[_, BF] : BC.getBinaryFunctions()) {
813 BF.computeHash(UseDFS: YamlBP.Header.IsDFSOrder, HashFunction: YamlBP.Header.HashFunction);
814 }
815 } else if (!opts::IgnoreHash) {
816 for (BinaryFunction *BF : ProfileBFs) {
817 if (!BF)
818 continue;
819 BF->computeHash(UseDFS: YamlBP.Header.IsDFSOrder, HashFunction: YamlBP.Header.HashFunction);
820 }
821 }
822
823 if (opts::StaleMatchingWithPseudoProbes) {
824 const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
825 assert(Decoder &&
826 "If pseudo probes are in use, pseudo probe decoder should exist");
827 for (const MCDecodedPseudoProbeInlineTree &TopLev :
828 Decoder->getDummyInlineRoot().getChildren())
829 TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
830 }
831
832 // Map profiled function ids to names.
833 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
834 IdToYamLBF[YamlBF.Id] = &YamlBF;
835
836 const size_t MatchedWithExactName = matchWithExactName();
837 const size_t MatchedWithHash = matchWithHash(BC);
838 const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
839 const size_t MatchedWithCallGraph = matchWithCallGraph(BC);
840 const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);
841 [[maybe_unused]] const size_t MatchedWithPseudoProbes =
842 matchWithPseudoProbes(BC);
843
844 for (auto [YamlBF, BF] : llvm::zip_equal(t&: YamlBP.Functions, u&: ProfileBFs))
845 if (!YamlBF.Used && BF && !ProfiledFunctions.count(x: BF))
846 matchProfileToFunction(YamlBF, BF&: *BF);
847
848
849 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
850 if (!YamlBF.Used && opts::Verbosity >= 1)
851 errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
852 << '\n';
853
854 if (opts::Verbosity >= 1) {
855 outs() << "BOLT-INFO: matched " << MatchedWithExactName
856 << " functions with identical names\n";
857 outs() << "BOLT-INFO: matched " << MatchedWithHash
858 << " functions with hash\n";
859 outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
860 << " functions with matching LTO common names\n";
861 outs() << "BOLT-INFO: matched " << MatchedWithCallGraph
862 << " functions with call graph\n";
863 outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
864 << " functions with similar names\n";
865 }
866
867 // Set for parseFunctionProfile().
868 NormalizeByInsnCount = usesEvent(Name: "cycles") || usesEvent(Name: "instructions");
869 NormalizeByCalls = usesEvent(Name: "branches");
870 uint64_t NumUnused = 0;
871 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
872 if (BinaryFunction *BF = YamlProfileToFunction.lookup(Val: YamlBF.Id))
873 parseFunctionProfile(BF&: *BF, YamlBF);
874 else
875 ++NumUnused;
876 }
877
878 BC.setNumUnusedProfiledObjects(NumUnused);
879
880 if (opts::Lite &&
881 (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)) {
882 for (BinaryFunction *BF : BC.getAllBinaryFunctions())
883 if (!BF->hasProfile())
884 BF->setIgnored();
885 }
886
887 return Error::success();
888}
889
890bool YAMLProfileReader::usesEvent(StringRef Name) const {
891 return StringRef(YamlBP.Header.EventNames).contains(Other: Name);
892}
893
894} // end namespace bolt
895} // end namespace llvm
896

source code of bolt/lib/Profile/YAMLProfileReader.cpp