1//===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "bolt/Profile/YAMLProfileReader.h"
10#include "bolt/Core/BinaryBasicBlock.h"
11#include "bolt/Core/BinaryFunction.h"
12#include "bolt/Passes/MCF.h"
13#include "bolt/Profile/ProfileYAMLMapping.h"
14#include "bolt/Utils/Utils.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/Support/CommandLine.h"
17
18using namespace llvm;
19
20namespace opts {
21
22extern cl::opt<unsigned> Verbosity;
23extern cl::OptionCategory BoltOptCategory;
24extern cl::opt<bool> InferStaleProfile;
25
26static llvm::cl::opt<bool>
27 IgnoreHash("profile-ignore-hash",
28 cl::desc("ignore hash while reading function profile"),
29 cl::Hidden, cl::cat(BoltOptCategory));
30
31llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
32 cl::desc("use DFS order for YAML profile"),
33 cl::Hidden, cl::cat(BoltOptCategory));
34} // namespace opts
35
36namespace llvm {
37namespace bolt {
38
39bool YAMLProfileReader::isYAML(const StringRef Filename) {
40 if (auto MB = MemoryBuffer::getFileOrSTDIN(Filename)) {
41 StringRef Buffer = (*MB)->getBuffer();
42 return Buffer.starts_with(Prefix: "---\n");
43 } else {
44 report_error(Message: Filename, EC: MB.getError());
45 }
46 return false;
47}
48
49void YAMLProfileReader::buildNameMaps(BinaryContext &BC) {
50 auto lookupFunction = [&](StringRef Name) -> BinaryFunction * {
51 if (BinaryData *BD = BC.getBinaryDataByName(Name))
52 return BC.getFunctionForSymbol(Symbol: BD->getSymbol());
53 return nullptr;
54 };
55
56 ProfileBFs.reserve(n: YamlBP.Functions.size());
57
58 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
59 StringRef Name = YamlBF.Name;
60 const size_t Pos = Name.find(Str: "(*");
61 if (Pos != StringRef::npos)
62 Name = Name.substr(Start: 0, N: Pos);
63 ProfileFunctionNames.insert(key: Name);
64 ProfileBFs.push_back(x: lookupFunction(Name));
65 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
66 LTOCommonNameMap[*CommonName].push_back(x: &YamlBF);
67 }
68 for (auto &[Symbol, BF] : BC.SymbolToFunctionMap) {
69 StringRef Name = Symbol->getName();
70 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
71 LTOCommonNameFunctionMap[*CommonName].insert(x: BF);
72 }
73}
74
75bool YAMLProfileReader::hasLocalsWithFileName() const {
76 return llvm::any_of(Range: ProfileFunctionNames.keys(), P: [](StringRef FuncName) {
77 return FuncName.count(C: '/') == 2 && FuncName[0] != '/';
78 });
79}
80
81bool YAMLProfileReader::parseFunctionProfile(
82 BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
83 BinaryContext &BC = BF.getBinaryContext();
84
85 const bool IsDFSOrder = YamlBP.Header.IsDFSOrder;
86 const HashFunction HashFunction = YamlBP.Header.HashFunction;
87 bool ProfileMatched = true;
88 uint64_t MismatchedBlocks = 0;
89 uint64_t MismatchedCalls = 0;
90 uint64_t MismatchedEdges = 0;
91
92 uint64_t FunctionExecutionCount = 0;
93
94 BF.setExecutionCount(YamlBF.ExecCount);
95
96 uint64_t FuncRawBranchCount = 0;
97 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
98 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
99 FuncRawBranchCount += YamlSI.Count;
100 BF.setRawBranchCount(FuncRawBranchCount);
101
102 if (!opts::IgnoreHash &&
103 YamlBF.Hash != BF.computeHash(UseDFS: IsDFSOrder, HashFunction)) {
104 if (opts::Verbosity >= 1)
105 errs() << "BOLT-WARNING: function hash mismatch\n";
106 ProfileMatched = false;
107 }
108
109 if (YamlBF.NumBasicBlocks != BF.size()) {
110 if (opts::Verbosity >= 1)
111 errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
112 ProfileMatched = false;
113 }
114
115 BinaryFunction::BasicBlockOrderType Order;
116 if (IsDFSOrder)
117 llvm::copy(Range: BF.dfs(), Out: std::back_inserter(x&: Order));
118 else
119 llvm::copy(Range: BF.getLayout().blocks(), Out: std::back_inserter(x&: Order));
120
121 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
122 if (YamlBB.Index >= Order.size()) {
123 if (opts::Verbosity >= 2)
124 errs() << "BOLT-WARNING: index " << YamlBB.Index
125 << " is out of bounds\n";
126 ++MismatchedBlocks;
127 continue;
128 }
129
130 BinaryBasicBlock &BB = *Order[YamlBB.Index];
131
132 // Basic samples profile (without LBR) does not have branches information
133 // and needs a special processing.
134 if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
135 if (!YamlBB.EventCount) {
136 BB.setExecutionCount(0);
137 continue;
138 }
139 uint64_t NumSamples = YamlBB.EventCount * 1000;
140 if (NormalizeByInsnCount && BB.getNumNonPseudos())
141 NumSamples /= BB.getNumNonPseudos();
142 else if (NormalizeByCalls)
143 NumSamples /= BB.getNumCalls() + 1;
144
145 BB.setExecutionCount(NumSamples);
146 if (BB.isEntryPoint())
147 FunctionExecutionCount += NumSamples;
148 continue;
149 }
150
151 BB.setExecutionCount(YamlBB.ExecCount);
152
153 for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
154 BinaryFunction *Callee = YamlCSI.DestId < YamlProfileToFunction.size()
155 ? YamlProfileToFunction[YamlCSI.DestId]
156 : nullptr;
157 bool IsFunction = Callee ? true : false;
158 MCSymbol *CalleeSymbol = nullptr;
159 if (IsFunction)
160 CalleeSymbol = Callee->getSymbolForEntryID(EntryNum: YamlCSI.EntryDiscriminator);
161
162 BF.getAllCallSites().emplace_back(Args&: CalleeSymbol, Args: YamlCSI.Count,
163 Args: YamlCSI.Mispreds, Args: YamlCSI.Offset);
164
165 if (YamlCSI.Offset >= BB.getOriginalSize()) {
166 if (opts::Verbosity >= 2)
167 errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
168 << " out of bounds in block " << BB.getName() << '\n';
169 ++MismatchedCalls;
170 continue;
171 }
172
173 MCInst *Instr =
174 BF.getInstructionAtOffset(Offset: BB.getInputOffset() + YamlCSI.Offset);
175 if (!Instr) {
176 if (opts::Verbosity >= 2)
177 errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
178 << " in block " << BB.getName() << '\n';
179 ++MismatchedCalls;
180 continue;
181 }
182 if (!BC.MIB->isCall(Inst: *Instr) && !BC.MIB->isIndirectBranch(Inst: *Instr)) {
183 if (opts::Verbosity >= 2)
184 errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
185 << " in block " << BB.getName() << '\n';
186 ++MismatchedCalls;
187 continue;
188 }
189
190 auto setAnnotation = [&](StringRef Name, uint64_t Count) {
191 if (BC.MIB->hasAnnotation(Inst: *Instr, Name)) {
192 if (opts::Verbosity >= 1)
193 errs() << "BOLT-WARNING: ignoring duplicate " << Name
194 << " info for offset 0x" << Twine::utohexstr(Val: YamlCSI.Offset)
195 << " in function " << BF << '\n';
196 return;
197 }
198 BC.MIB->addAnnotation(Inst&: *Instr, Name, Val: Count);
199 };
200
201 if (BC.MIB->isIndirectCall(Inst: *Instr) || BC.MIB->isIndirectBranch(Inst: *Instr)) {
202 auto &CSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
203 Inst&: *Instr, Name: "CallProfile");
204 CSP.emplace_back(Args&: CalleeSymbol, Args: YamlCSI.Count, Args: YamlCSI.Mispreds);
205 } else if (BC.MIB->getConditionalTailCall(Inst: *Instr)) {
206 setAnnotation("CTCTakenCount", YamlCSI.Count);
207 setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
208 } else {
209 setAnnotation("Count", YamlCSI.Count);
210 }
211 }
212
213 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
214 if (YamlSI.Index >= Order.size()) {
215 if (opts::Verbosity >= 1)
216 errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
217 ++MismatchedEdges;
218 continue;
219 }
220
221 BinaryBasicBlock &SuccessorBB = *Order[YamlSI.Index];
222 if (!BB.getSuccessor(Label: SuccessorBB.getLabel())) {
223 if (opts::Verbosity >= 1)
224 errs() << "BOLT-WARNING: no successor for block " << BB.getName()
225 << " that matches index " << YamlSI.Index << " or block "
226 << SuccessorBB.getName() << '\n';
227 ++MismatchedEdges;
228 continue;
229 }
230
231 BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(Succ: SuccessorBB);
232 BI.Count += YamlSI.Count;
233 BI.MispredictedCount += YamlSI.Mispreds;
234 }
235 }
236
237 // If basic block profile wasn't read it should be 0.
238 for (BinaryBasicBlock &BB : BF)
239 if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
240 BB.setExecutionCount(0);
241
242 if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
243 BF.setExecutionCount(FunctionExecutionCount);
244 estimateEdgeCounts(BF);
245 }
246
247 ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
248
249 if (!ProfileMatched) {
250 if (opts::Verbosity >= 1)
251 errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
252 << MismatchedCalls << " calls, and " << MismatchedEdges
253 << " edges in profile did not match function " << BF << '\n';
254
255 if (YamlBF.NumBasicBlocks != BF.size())
256 ++BC.Stats.NumStaleFuncsWithEqualBlockCount;
257
258 if (opts::InferStaleProfile && inferStaleProfile(Function&: BF, YamlBF))
259 ProfileMatched = true;
260 }
261 if (ProfileMatched)
262 BF.markProfiled(Flags: YamlBP.Header.Flags);
263
264 return ProfileMatched;
265}
266
267Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
268 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
269 MemoryBuffer::getFileOrSTDIN(Filename);
270 if (std::error_code EC = MB.getError()) {
271 errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n";
272 return errorCodeToError(EC);
273 }
274 yaml::Input YamlInput(MB.get()->getBuffer());
275
276 // Consume YAML file.
277 YamlInput >> YamlBP;
278 if (YamlInput.error()) {
279 errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
280 << " : " << YamlInput.error().message() << '\n';
281 return errorCodeToError(EC: YamlInput.error());
282 }
283
284 // Sanity check.
285 if (YamlBP.Header.Version != 1)
286 return make_error<StringError>(
287 Args: Twine("cannot read profile : unsupported version"),
288 Args: inconvertibleErrorCode());
289
290 if (YamlBP.Header.EventNames.find(c: ',') != StringRef::npos)
291 return make_error<StringError>(
292 Args: Twine("multiple events in profile are not supported"),
293 Args: inconvertibleErrorCode());
294
295 // Match profile to function based on a function name.
296 buildNameMaps(BC);
297
298 // Preliminary assign function execution count.
299 for (auto [YamlBF, BF] : llvm::zip_equal(t&: YamlBP.Functions, u&: ProfileBFs)) {
300 if (!BF)
301 continue;
302 if (!BF->hasProfile()) {
303 BF->setExecutionCount(YamlBF.ExecCount);
304 } else {
305 if (opts::Verbosity >= 1) {
306 errs() << "BOLT-WARNING: dropping duplicate profile for " << YamlBF.Name
307 << '\n';
308 }
309 BF = nullptr;
310 }
311 }
312
313 return Error::success();
314}
315
316bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
317 for (StringRef Name : BF.getNames())
318 if (ProfileFunctionNames.contains(key: Name))
319 return true;
320 for (StringRef Name : BF.getNames()) {
321 if (const std::optional<StringRef> CommonName = getLTOCommonName(Name)) {
322 if (LTOCommonNameMap.contains(Key: *CommonName))
323 return true;
324 }
325 }
326
327 return false;
328}
329
330Error YAMLProfileReader::readProfile(BinaryContext &BC) {
331 if (opts::Verbosity >= 1) {
332 outs() << "BOLT-INFO: YAML profile with hash: ";
333 switch (YamlBP.Header.HashFunction) {
334 case HashFunction::StdHash:
335 outs() << "std::hash\n";
336 break;
337 case HashFunction::XXH3:
338 outs() << "xxh3\n";
339 break;
340 }
341 }
342 YamlProfileToFunction.resize(new_size: YamlBP.Functions.size() + 1);
343
344 auto profileMatches = [](const yaml::bolt::BinaryFunctionProfile &Profile,
345 BinaryFunction &BF) {
346 if (opts::IgnoreHash)
347 return Profile.NumBasicBlocks == BF.size();
348 return Profile.Hash == static_cast<uint64_t>(BF.getHash());
349 };
350
351 // We have to do 2 passes since LTO introduces an ambiguity in function
352 // names. The first pass assigns profiles that match 100% by name and
353 // by hash. The second pass allows name ambiguity for LTO private functions.
354 for (auto [YamlBF, BF] : llvm::zip_equal(t&: YamlBP.Functions, u&: ProfileBFs)) {
355 if (!BF)
356 continue;
357 BinaryFunction &Function = *BF;
358 // Clear function call count that may have been set while pre-processing
359 // the profile.
360 Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
361
362 // Recompute hash once per function.
363 if (!opts::IgnoreHash)
364 Function.computeHash(UseDFS: YamlBP.Header.IsDFSOrder,
365 HashFunction: YamlBP.Header.HashFunction);
366
367 if (profileMatches(YamlBF, Function))
368 matchProfileToFunction(YamlBF, BF&: Function);
369 }
370
371 for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
372 if (!LTOCommonNameFunctionMap.contains(Key: CommonName))
373 continue;
374 std::unordered_set<BinaryFunction *> &Functions =
375 LTOCommonNameFunctionMap[CommonName];
376 // Return true if a given profile is matched to one of BinaryFunctions with
377 // matching LTO common name.
378 auto matchProfile = [&](yaml::bolt::BinaryFunctionProfile *YamlBF) {
379 if (YamlBF->Used)
380 return false;
381 for (BinaryFunction *BF : Functions) {
382 if (!ProfiledFunctions.count(x: BF) && profileMatches(*YamlBF, *BF)) {
383 matchProfileToFunction(YamlBF&: *YamlBF, BF&: *BF);
384 return true;
385 }
386 }
387 return false;
388 };
389 bool ProfileMatched = llvm::any_of(Range: LTOProfiles, P: matchProfile);
390
391 // If there's only one function with a given name, try to match it
392 // partially.
393 if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
394 !LTOProfiles.front()->Used &&
395 !ProfiledFunctions.count(x: *Functions.begin()))
396 matchProfileToFunction(YamlBF&: *LTOProfiles.front(), BF&: **Functions.begin());
397 }
398
399 for (auto [YamlBF, BF] : llvm::zip_equal(t&: YamlBP.Functions, u&: ProfileBFs))
400 if (!YamlBF.Used && BF && !ProfiledFunctions.count(x: BF))
401 matchProfileToFunction(YamlBF, BF&: *BF);
402
403 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
404 if (!YamlBF.Used && opts::Verbosity >= 1)
405 errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
406 << '\n';
407
408 // Set for parseFunctionProfile().
409 NormalizeByInsnCount = usesEvent(Name: "cycles") || usesEvent(Name: "instructions");
410 NormalizeByCalls = usesEvent(Name: "branches");
411
412 uint64_t NumUnused = 0;
413 for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
414 if (YamlBF.Id >= YamlProfileToFunction.size()) {
415 // Such profile was ignored.
416 ++NumUnused;
417 continue;
418 }
419 if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id])
420 parseFunctionProfile(BF&: *BF, YamlBF);
421 else
422 ++NumUnused;
423 }
424
425 BC.setNumUnusedProfiledObjects(NumUnused);
426
427 return Error::success();
428}
429
430bool YAMLProfileReader::usesEvent(StringRef Name) const {
431 return YamlBP.Header.EventNames.find(str: std::string(Name)) != StringRef::npos;
432}
433
434} // end namespace bolt
435} // end namespace llvm
436

source code of bolt/lib/Profile/YAMLProfileReader.cpp