DataReader.cpp source code [bolt/lib/Profile/DataReader.cpp]

1	//===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This family of functions reads profile data written by the perf2bolt
10	// utility and stores it in memory for llvm-bolt consumption.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "bolt/Profile/DataReader.h"
15	#include "bolt/Core/BinaryFunction.h"
16	#include "bolt/Passes/MCF.h"
17	#include "bolt/Utils/Utils.h"
18	#include "llvm/Support/CommandLine.h"
19	#include "llvm/Support/Debug.h"
20	#include "llvm/Support/Errc.h"
21
22	#undef DEBUG_TYPE
23	#define DEBUG_TYPE "bolt-prof"
24
25	using namespace llvm;
26
27	namespace opts {
28
29	extern cl::OptionCategory BoltCategory;
30	extern llvm::cl::opt<unsigned> Verbosity;
31
32	static cl::opt<bool>
33	DumpData("dump-data",
34	cl::desc ("dump parsed bolt data for debugging"),
35	cl::Hidden,
36	cl::cat (BoltCategory));
37
38	} // namespace opts
39
40	namespace llvm {
41	namespace bolt {
42
43	namespace {
44
45	/// Return true if the function name can change across compilations.
46	bool hasVolatileName(const BinaryFunction &BF) {
47	for (const StringRef &Name : BF.getNames())
48	if (getLTOCommonName(Name))
49	return true;
50
51	return false;
52	}
53
54	/// Return standard escaped name of the function possibly renamed by BOLT.
55	std::string normalizeName(StringRef NameRef) {
56	// Strip "PG." prefix used for globalized locals.
57	NameRef = NameRef.starts_with(Prefix: "PG.") ? NameRef.substr(Start: `2`) : NameRef;
58	return getEscapedName(Name: NameRef);
59	}
60
61	} // anonymous namespace
62
63	raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
64	if (Loc.IsSymbol) {
65	OS << Loc.Name;
66	if (Loc.Offset)
67	OS << "+" << Twine::utohexstr(Val: Loc.Offset);
68	} else {
69	OS << Twine::utohexstr(Val: Loc.Offset);
70	}
71	return OS;
72	}
73
74	void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
75	Data.insert(position: Data.end(), first: FBD.Data.begin(), last: FBD.Data.end());
76	for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
77	if (I ->From.Name == FBD.Name) {
78	I ->From.Name = this->Name;
79	I ->From.Offset += Offset;
80	}
81	if (I ->To.Name == FBD.Name) {
82	I ->To.Name = this->Name;
83	I ->To.Offset += Offset;
84	}
85	}
86	llvm::stable_sort(Range&: Data);
87	ExecutionCount += FBD.ExecutionCount;
88	for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
89	assert(I ->To.Name == FBD.Name);
90	auto NewElmt = EntryData.insert(position: EntryData.end(), x: *I);
91	NewElmt ->To.Name = this->Name;
92	NewElmt ->To.Offset += Offset;
93	}
94	}
95
96	uint64_t FuncBranchData::getNumExecutedBranches() const {
97	uint64_t ExecutedBranches = `0`;
98	for (const BranchInfo &BI : Data) {
99	int64_t BranchCount = BI.Branches;
100	assert(BranchCount >= `0` && "branch execution count should not be negative");
101	ExecutedBranches += BranchCount;
102	}
103	return ExecutedBranches;
104	}
105
106	void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
107
108	void SampleInfo::print(raw_ostream &OS) const {
109	OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Val: Loc.Offset)
110	<< " " << Hits << "\n";
111	}
112
113	uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
114	assert(llvm::is_sorted(Data));
115	struct Compare {
116	bool operator()(const SampleInfo &SI, const uint64_t Val) const {
117	return SI.Loc.Offset < Val;
118	}
119	bool operator()(const uint64_t Val, const SampleInfo &SI) const {
120	return Val < SI.Loc.Offset;
121	}
122	};
123	uint64_t Result = `0`;
124	for (auto I = llvm::lower_bound(Range: Data, Value&: Start, C: Compare()),
125	E = llvm::lower_bound(Range: Data, Value&: End, C: Compare());
126	I != E; ++I)
127	Result += I ->Hits;
128	return Result;
129	}
130
131	void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
132	auto Iter = Index.find(Val: Offset);
133	if (Iter == Index.end()) {
134	Data.emplace_back(args: Location (true, Name, Offset), args&: Count);
135	Index [Offset] = Data.size() - `1`;
136	return;
137	}
138	SampleInfo &SI = Data [Iter ->second];
139	SI.Hits += Count;
140	}
141
142	void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
143	uint64_t Count, uint64_t Mispreds) {
144	auto Iter = IntraIndex [OffsetFrom].find(Val: OffsetTo);
145	if (Iter == IntraIndex [OffsetFrom].end()) {
146	Data.emplace_back(args: Location (true, Name, OffsetFrom),
147	args: Location (true, Name, OffsetTo), args&: Mispreds, args&: Count);
148	IntraIndex [OffsetFrom][OffsetTo] = Data.size() - `1`;
149	return;
150	}
151	BranchInfo &BI = Data [Iter ->second];
152	BI.Branches += Count;
153	BI.Mispreds += Mispreds;
154	}
155
156	void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
157	uint64_t Count, uint64_t Mispreds) {
158	auto Iter = InterIndex [OffsetFrom].find(Val: To);
159	if (Iter == InterIndex [OffsetFrom].end()) {
160	Data.emplace_back(args: Location (true, Name, OffsetFrom), args: To, args&: Mispreds, args&: Count);
161	InterIndex [OffsetFrom][To] = Data.size() - `1`;
162	return;
163	}
164	BranchInfo &BI = Data [Iter ->second];
165	BI.Branches += Count;
166	BI.Mispreds += Mispreds;
167	}
168
169	void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
170	uint64_t Count, uint64_t Mispreds) {
171	auto Iter = EntryIndex [OffsetTo].find(Val: From);
172	if (Iter == EntryIndex [OffsetTo].end()) {
173	EntryData.emplace_back(args: From, args: Location (true, Name, OffsetTo), args&: Mispreds,
174	args&: Count);
175	EntryIndex [OffsetTo][From] = EntryData.size() - `1`;
176	return;
177	}
178	BranchInfo &BI = EntryData [Iter ->second];
179	BI.Branches += Count;
180	BI.Mispreds += Mispreds;
181	}
182
183	void BranchInfo::mergeWith(const BranchInfo &BI) {
184	Branches += BI.Branches;
185	Mispreds += BI.Mispreds;
186	}
187
188	void BranchInfo::print(raw_ostream &OS) const {
189	OS << From.IsSymbol << " " << From.Name << " "
190	<< Twine::utohexstr(Val: From.Offset) << " " << To.IsSymbol << " " << To.Name
191	<< " " << Twine::utohexstr(Val: To.Offset) << " " << Mispreds << " " << Branches
192	<< `'\n'`;
193	}
194
195	ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
196	uint64_t To) const {
197	for (const BranchInfo &I : Data)
198	if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
199	return I;
200
201	return make_error_code(E: llvm::errc::invalid_argument);
202	}
203
204	ErrorOr<const BranchInfo &>
205	FuncBranchData::getDirectCallBranch(uint64_t From) const {
206	// Commented out because it can be expensive.
207	// assert(std::is_sorted(Data.begin(), Data.end()));
208	struct Compare {
209	bool operator()(const BranchInfo &BI, const uint64_t Val) const {
210	return BI.From.Offset < Val;
211	}
212	bool operator()(const uint64_t Val, const BranchInfo &BI) const {
213	return Val < BI.From.Offset;
214	}
215	};
216	auto Range = std::equal_range(first: Data.begin(), last: Data.end(), val: From, comp: Compare());
217	for (const auto &RI : llvm::make_range(p: Range))
218	if (RI.From.Name != RI.To.Name)
219	return RI;
220
221	return make_error_code(E: llvm::errc::invalid_argument);
222	}
223
224	void MemInfo::print(raw_ostream &OS) const {
225	OS << (Offset.IsSymbol + `3`) << " " << Offset.Name << " "
226	<< Twine::utohexstr(Val: Offset.Offset) << " " << (Addr.IsSymbol + `3`) << " "
227	<< Addr.Name << " " << Twine::utohexstr(Val: Addr.Offset) << " " << Count
228	<< "\n";
229	}
230
231	void MemInfo::prettyPrint(raw_ostream &OS) const {
232	OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
233	}
234
235	void FuncMemData::update(const Location &Offset, const Location &Addr) {
236	auto Iter = EventIndex [Offset.Offset].find(Val: Addr);
237	if (Iter == EventIndex [Offset.Offset].end()) {
238	Data.emplace_back(args: MemInfo (Offset, Addr, `1`));
239	EventIndex [Offset.Offset][Addr] = Data.size() - `1`;
240	return;
241	}
242	++Data [Iter ->second].Count;
243	}
244
245	Error DataReader::preprocessProfile(BinaryContext &BC) {
246	if (std::error_code EC = parseInput())
247	return errorCodeToError(EC);
248
249	if (opts::DumpData)
250	dump();
251
252	if (collectedInBoltedBinary())
253	outs() << "BOLT-INFO: profile collection done on a binary already "
254	"processed by BOLT\n";
255
256	for (auto &BFI : BC.getBinaryFunctions()) {
257	BinaryFunction &Function = BFI.second;
258	if (FuncMemData *MemData = getMemDataForNames(FuncNames: Function.getNames())) {
259	setMemData(BF: Function, FMD: MemData);
260	MemData->Used = true;
261	}
262	if (FuncBranchData *FuncData = getBranchDataForNames(FuncNames: Function.getNames())) {
263	setBranchData(BF: Function, FBD: FuncData);
264	Function.ExecutionCount = FuncData->ExecutionCount;
265	FuncData->Used = true;
266	}
267	}
268
269	for (auto &BFI : BC.getBinaryFunctions()) {
270	BinaryFunction &Function = BFI.second;
271	matchProfileMemData(BF&: Function);
272	}
273
274	return Error::success();
275	}
276
277	Error DataReader::readProfilePreCFG(BinaryContext &BC) {
278	for (auto &BFI : BC.getBinaryFunctions()) {
279	BinaryFunction &Function = BFI.second;
280	FuncMemData *MemoryData = getMemData(BF: Function);
281	if (!MemoryData)
282	continue;
283
284	for (MemInfo &MI : MemoryData->Data) {
285	const uint64_t Offset = MI.Offset.Offset;
286	auto II = Function.Instructions.find(x: Offset);
287	if (II == Function.Instructions.end()) {
288	// Ignore bad instruction address.
289	continue;
290	}
291
292	auto &MemAccessProfile =
293	BC.MIB ->getOrCreateAnnotationAs<MemoryAccessProfile>(
294	Inst&: II ->second, Name: "MemoryAccessProfile");
295	BinaryData BD = nullptr*;
296	if (MI.Addr.IsSymbol)
297	BD = BC.getBinaryDataByName(Name: MI.Addr.Name);
298	MemAccessProfile.AddressAccessInfo.push_back(
299	Elt: {.MemoryObject: BD, .Offset: MI.Addr.Offset, .Count: MI.Count});
300	auto NextII = std::next(x: II);
301	if (NextII == Function.Instructions.end())
302	MemAccessProfile.NextInstrOffset = Function.getSize();
303	else
304	MemAccessProfile.NextInstrOffset = II ->first;
305	}
306	Function.HasMemoryProfile = true;
307	}
308
309	return Error::success();
310	}
311
312	Error DataReader::readProfile(BinaryContext &BC) {
313	for (auto &BFI : BC.getBinaryFunctions()) {
314	BinaryFunction &Function = BFI.second;
315	readProfile(BF&: Function);
316	}
317
318	uint64_t NumUnused = `0`;
319	for (const auto &KV : NamesToBranches) {
320	const FuncBranchData &FBD = KV.second;
321	if (!FBD.Used)
322	++NumUnused;
323	}
324	BC.setNumUnusedProfiledObjects(NumUnused);
325
326	return Error::success();
327	}
328
329	std::error_code DataReader::parseInput() {
330	ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
331	MemoryBuffer::getFileOrSTDIN(Filename);
332	if (std::error_code EC = MB.getError()) {
333	Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
334	return EC;
335	}
336	FileBuf = std::move(MB.get());
337	ParsingBuf = FileBuf ->getBuffer();
338	if (std::error_code EC = parse())
339	return EC;
340	if (!ParsingBuf.empty())
341	Diag << "WARNING: invalid profile data detected at line " << Line
342	<< ". Possibly corrupted profile.\n";
343
344	buildLTONameMaps();
345
346	return std::error_code ();
347	}
348
349	void DataReader::readProfile(BinaryFunction &BF) {
350	if (BF.empty())
351	return;
352
353	if (!hasLBR()) {
354	BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
355	readSampleData(BF);
356	return;
357	}
358
359	BF.ProfileFlags = BinaryFunction::PF_LBR;
360
361	// Possibly assign/re-assign branch profile data.
362	matchProfileData(BF);
363
364	FuncBranchData *FBD = getBranchData(BF);
365	if (!FBD)
366	return;
367
368	// Assign basic block counts to function entry points. These only include
369	// counts for outside entries.
370	//
371	// There is a slight skew introduced here as branches originated from RETs
372	// may be accounted for in the execution count of an entry block if the last
373	// instruction in a predecessor fall-through block is a call. This situation
374	// should rarely happen because there are few multiple-entry functions.
375	for (const BranchInfo &BI : FBD->EntryData) {
376	BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(Offset: BI.To.Offset);
377	if (BB && (BB->isEntryPoint() \|\| BB->isLandingPad())) {
378	uint64_t Count = BB->getExecutionCount();
379	if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
380	Count = `0`;
381	BB->setExecutionCount(Count + BI.Branches);
382	}
383	}
384
385	for (const BranchInfo &BI : FBD->Data) {
386	if (BI.From.Name != BI.To.Name)
387	continue;
388
389	if (!recordBranch(BF, From: BI.From.Offset, To: BI.To.Offset, Count: BI.Branches,
390	Mispreds: BI.Mispreds)) {
391	LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
392	<< BI.To.Offset << `'\n'`);
393	}
394	}
395
396	// Convert branch data into annotations.
397	convertBranchData(BF);
398	}
399
400	void DataReader::matchProfileData(BinaryFunction &BF) {
401	// This functionality is available for LBR-mode only
402	// TODO: Implement evaluateProfileData() for samples, checking whether
403	// sample addresses match instruction addresses in the function
404	if (!hasLBR())
405	return;
406
407	FuncBranchData *FBD = getBranchData(BF);
408	if (FBD) {
409	BF.ProfileMatchRatio = evaluateProfileData(BF, BranchData: *FBD);
410	BF.RawBranchCount = FBD->getNumExecutedBranches();
411	if (BF.ProfileMatchRatio == `1.0f`) {
412	if (fetchProfileForOtherEntryPoints(BF)) {
413	BF.ProfileMatchRatio = evaluateProfileData(BF, BranchData: *FBD);
414	BF.ExecutionCount = FBD->ExecutionCount;
415	BF.RawBranchCount = FBD->getNumExecutedBranches();
416	}
417	return;
418	}
419	}
420
421	// Check if the function name can fluctuate between several compilations
422	// possibly triggered by minor unrelated code changes in the source code
423	// of the input binary.
424	if (!hasVolatileName(BF))
425	return;
426
427	// Check for a profile that matches with 100% confidence.
428	const std::vector<FuncBranchData *> AllBranchData =
429	getBranchDataForNamesRegex(FuncNames: BF.getNames());
430	for (FuncBranchData *NewBranchData : AllBranchData) {
431	// Prevent functions from sharing the same profile.
432	if (NewBranchData->Used)
433	continue;
434
435	if (evaluateProfileData(BF, BranchData: *NewBranchData) != `1.0f`)
436	continue;
437
438	if (FBD)
439	FBD->Used = false;
440
441	// Update function profile data with the new set.
442	setBranchData(BF, FBD: NewBranchData);
443	NewBranchData->Used = true;
444	BF.ExecutionCount = NewBranchData->ExecutionCount;
445	BF.ProfileMatchRatio = `1.0f`;
446	break;
447	}
448	}
449
450	void DataReader::matchProfileMemData(BinaryFunction &BF) {
451	const std::vector<FuncMemData *> AllMemData =
452	getMemDataForNamesRegex(FuncNames: BF.getNames());
453	for (FuncMemData *NewMemData : AllMemData) {
454	// Prevent functions from sharing the same profile.
455	if (NewMemData->Used)
456	continue;
457
458	if (FuncMemData *MD = getMemData(BF))
459	MD->Used = false;
460
461	// Update function profile data with the new set.
462	setMemData(BF, FMD: NewMemData);
463	NewMemData->Used = true;
464	break;
465	}
466	}
467
468	bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
469	BinaryContext &BC = BF.getBinaryContext();
470
471	FuncBranchData *FBD = getBranchData(BF);
472	if (!FBD)
473	return false;
474
475	// Check if we are missing profiling data for secondary entry points
476	bool First = true;
477	bool Updated = false;
478	for (BinaryBasicBlock *BB : BF.BasicBlocks) {
479	if (First) {
480	First = false;
481	continue;
482	}
483	if (BB->isEntryPoint()) {
484	uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
485	// Look for branch data associated with this entry point
486	if (BinaryData *BD = BC.getBinaryDataAtAddress(Address: EntryAddress)) {
487	if (FuncBranchData *Data = getBranchDataForSymbols(Symbols: BD->getSymbols())) {
488	FBD->appendFrom(FBD: *Data, Offset: BB->getOffset());
489	Data->Used = true;
490	Updated = true;
491	}
492	}
493	}
494	}
495
496	return Updated;
497	}
498
499	float DataReader::evaluateProfileData(BinaryFunction &BF,
500	const FuncBranchData &BranchData) const {
501	BinaryContext &BC = BF.getBinaryContext();
502
503	// Until we define a minimal profile, we consider an empty branch data to be
504	// a valid profile. It could happen to a function without branches when we
505	// still have an EntryData for the execution count.
506	if (BranchData.Data.empty())
507	return `1.0f`;
508
509	uint64_t NumMatchedBranches = `0`;
510	for (const BranchInfo &BI : BranchData.Data) {
511	bool IsValid = false;
512	if (BI.From.Name == BI.To.Name) {
513	// Try to record information with 0 count.
514	IsValid = recordBranch(BF, From: BI.From.Offset, To: BI.To.Offset, Count: `0`);
515	} else if (collectedInBoltedBinary()) {
516	// We can't check branch source for collections in bolted binaries because
517	// the source of the branch may be mapped to the first instruction in a BB
518	// instead of the original branch (which may not exist in the source bin).
519	IsValid = true;
520	} else {
521	// The branch has to originate from this function.
522	// Check for calls, tail calls, rets and indirect branches.
523	// When matching profiling info, we did not reach the stage
524	// when we identify tail calls, so they are still represented
525	// by regular branch instructions and we need isBranch() here.
526	MCInst *Instr = BF.getInstructionAtOffset(Offset: BI.From.Offset);
527	// If it's a prefix - skip it.
528	if (Instr && BC.MIB ->isPrefix(Inst: *Instr))
529	Instr = BF.getInstructionAtOffset(Offset: BI.From.Offset + `1`);
530	if (Instr && (BC.MIB ->isCall(Inst: Instr) \|\| BC.MIB ->isBranch(Inst: Instr) \|\|
531	BC.MIB ->isReturn(Inst: *Instr)))
532	IsValid = true;
533	}
534
535	if (IsValid) {
536	++NumMatchedBranches;
537	continue;
538	}
539
540	LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
541	<< Twine::utohexstr(BI.From.Offset) << " -> ";
542	if (BI.From.Name == BI.To.Name) dbgs()
543	<< "0x" << Twine::utohexstr(BI.To.Offset) << `'\n'`;
544	else dbgs() << "<outbounds>\n";);
545	}
546
547	const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
548	if (opts::Verbosity >= `2` && NumMatchedBranches < BranchData.Data.size())
549	errs() << "BOLT-WARNING: profile branches match only "
550	<< format(Fmt: "%.1f%%", Vals: MatchRatio * `100.0f`) << " ("
551	<< NumMatchedBranches << `'/'` << BranchData.Data.size()
552	<< ") for function " << BF << `'\n'`;
553
554	return MatchRatio;
555	}
556
557	void DataReader::readSampleData(BinaryFunction &BF) {
558	FuncSampleData *SampleDataOrErr = getFuncSampleData(FuncNames: BF.getNames());
559	if (!SampleDataOrErr)
560	return;
561
562	// Basic samples mode territory (without LBR info)
563	// First step is to assign BB execution count based on samples from perf
564	BF.ProfileMatchRatio = `1.0f`;
565	BF.removeTagsFromProfile();
566	bool NormalizeByInsnCount = usesEvent(Name: "cycles") \|\| usesEvent(Name: "instructions");
567	bool NormalizeByCalls = usesEvent(Name: "branches");
568	static bool NagUser = true;
569	if (NagUser) {
570	outs()
571	<< "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
572	if (NormalizeByInsnCount)
573	outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
574	else if (NormalizeByCalls)
575	outs() << "BOLT-INFO: normalizing samples by branches.\n";
576
577	NagUser = false;
578	}
579	uint64_t LastOffset = BF.getSize();
580	uint64_t TotalEntryCount = `0`;
581	for (BinaryFunction::BasicBlockOffset &BBOffset :
582	llvm::reverse(C&: BF.BasicBlockOffsets)) {
583	uint64_t CurOffset = BBOffset.first;
584	// Always work with samples multiplied by 1000 to avoid losing them if we
585	// later need to normalize numbers
586	uint64_t NumSamples =
587	SampleDataOrErr->getSamples(Start: CurOffset, End: LastOffset) * `1000`;
588	if (NormalizeByInsnCount && BBOffset.second->getNumNonPseudos()) {
589	NumSamples /= BBOffset.second->getNumNonPseudos();
590	} else if (NormalizeByCalls) {
591	uint32_t NumCalls = BBOffset.second->getNumCalls();
592	NumSamples /= NumCalls + `1`;
593	}
594	BBOffset.second->setExecutionCount(NumSamples);
595	if (BBOffset.second->isEntryPoint())
596	TotalEntryCount += NumSamples;
597	LastOffset = CurOffset;
598	}
599
600	BF.ExecutionCount = TotalEntryCount;
601
602	estimateEdgeCounts(BF);
603	}
604
605	void DataReader::convertBranchData(BinaryFunction &BF) const {
606	BinaryContext &BC = BF.getBinaryContext();
607
608	if (BF.empty())
609	return;
610
611	FuncBranchData *FBD = getBranchData(BF);
612	if (!FBD)
613	return;
614
615	// Profile information for calls.
616	//
617	// There are 3 cases that we annotate differently:
618	// 1) Conditional tail calls that could be mispredicted.
619	// 2) Indirect calls to multiple destinations with mispredictions.
620	// Before we validate CFG we have to handle indirect branches here too.
621	// 3) Regular direct calls. The count could be different from containing
622	// basic block count. Keep this data in case we find it useful.
623	//
624	for (BranchInfo &BI : FBD->Data) {
625	// Ignore internal branches.
626	if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != `0`)
627	continue;
628
629	MCInst *Instr = BF.getInstructionAtOffset(Offset: BI.From.Offset);
630	if (!Instr \|\|
631	(!BC.MIB ->isCall(Inst: Instr) && !BC.MIB ->isIndirectBranch(Inst: Instr)))
632	continue;
633
634	auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
635	if (opts::Verbosity >= `1` && BC.MIB ->hasAnnotation(Inst: *Instr, Name))
636	errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
637	<< Twine::utohexstr(Val: BI.From.Offset) << " in function " << BF
638	<< `'\n'`;
639	auto &Value = BC.MIB ->getOrCreateAnnotationAs<uint64_t>(Inst&: *Instr, Name);
640	Value += Count;
641	};
642
643	if (BC.MIB ->isIndirectCall(Inst: Instr) \|\| BC.MIB ->isIndirectBranch(Inst: Instr)) {
644	IndirectCallSiteProfile &CSP =
645	BC.MIB ->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
646	Inst&: *Instr, Name: "CallProfile");
647	MCSymbol CalleeSymbol = nullptr*;
648	if (BI.To.IsSymbol) {
649	if (BinaryData *BD = BC.getBinaryDataByName(Name: BI.To.Name))
650	CalleeSymbol = BD->getSymbol();
651	}
652	CSP.emplace_back(Args&: CalleeSymbol, Args&: BI.Branches, Args&: BI.Mispreds);
653	} else if (BC.MIB ->getConditionalTailCall(Inst: *Instr)) {
654	setOrUpdateAnnotation ("CTCTakenCount", BI.Branches);
655	setOrUpdateAnnotation ("CTCMispredCount", BI.Mispreds);
656	} else {
657	setOrUpdateAnnotation ("Count", BI.Branches);
658	}
659	}
660	}
661
662	bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
663	uint64_t Count, uint64_t Mispreds) const {
664	BinaryContext &BC = BF.getBinaryContext();
665
666	BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: From);
667	const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(Offset: To);
668
669	if (!FromBB \|\| !ToBB) {
670	LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
671	return false;
672	}
673
674	// Could be bad LBR data; ignore the branch. In the case of data collected
675	// in binaries optimized by BOLT, a source BB may be mapped to two output
676	// BBs as a result of optimizations. In that case, a branch between these
677	// two will be recorded as a branch from A going to A in the source address
678	// space. Keep processing.
679	if (From == To)
680	return true;
681
682	// Return from a tail call.
683	if (FromBB->succ_size() == `0`)
684	return true;
685
686	// Very rarely we will see ignored branches. Do a linear check.
687	for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches)
688	if (Branch ==
689	std::make_pair(x: static_cast<uint32_t>(From), y: static_cast<uint32_t>(To)))
690	return true;
691
692	bool OffsetMatches = !!(To == ToBB->getOffset());
693	if (!OffsetMatches) {
694	// Skip the nops to support old .fdata
695	uint64_t Offset = ToBB->getOffset();
696	for (const MCInst &Instr : *ToBB) {
697	if (!BC.MIB ->isNoop(Inst: Instr))
698	break;
699
700	if (std::optional<uint32_t> Size = BC.MIB ->getSize(Inst: Instr))
701	Offset += *Size;
702	}
703
704	if (To == Offset)
705	OffsetMatches = true;
706	}
707
708	if (!OffsetMatches) {
709	// "To" could be referring to nop instructions in between 2 basic blocks.
710	// While building the CFG we make sure these nops are attributed to the
711	// previous basic block, thus we check if the destination belongs to the
712	// gap past the last instruction.
713	const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
714	if (LastInstr) {
715	const uint32_t LastInstrOffset =
716	BC.MIB ->getOffsetWithDefault(Inst: *LastInstr, Default: `0`);
717
718	// With old .fdata we are getting FT branches for "jcc,jmp" sequences.
719	if (To == LastInstrOffset && BC.MIB ->isUnconditionalBranch(Inst: *LastInstr))
720	return true;
721
722	if (To <= LastInstrOffset) {
723	LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
724	<< " in " << BF << " : " << From << " -> " << To
725	<< `'\n'`);
726	return false;
727	}
728	}
729
730	// The real destination is the layout successor of the detected ToBB.
731	if (ToBB == BF.getLayout().block_back())
732	return false;
733	const BinaryBasicBlock *NextBB =
734	BF.getLayout().getBlock(Index: ToBB->getIndex() + `1`);
735	assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
736	ToBB = NextBB;
737	}
738
739	// If there's no corresponding instruction for 'From', we have probably
740	// discarded it as a FT from __builtin_unreachable.
741	MCInst *FromInstruction = BF.getInstructionAtOffset(Offset: From);
742	if (!FromInstruction) {
743	// If the data was collected in a bolted binary, the From addresses may be
744	// translated to the first instruction of the source BB if BOLT inserted
745	// a new branch that did not exist in the source (we can't map it to the
746	// source instruction, so we map it to the first instr of source BB).
747	// We do not keep offsets for random instructions. So the check above will
748	// evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
749	if (collectedInBoltedBinary()) {
750	if (FromBB->getInputOffset() != From) {
751	LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
752	<< BF << `'\n'`);
753	return false;
754	}
755	FromInstruction = nullptr;
756	} else {
757	LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
758	<< `'\n'`);
759	return false;
760	}
761	}
762
763	if (!FromBB->getSuccessor(Label: ToBB->getLabel())) {
764	// Check if this is a recursive call or a return from a recursive call.
765	if (FromInstruction && ToBB->isEntryPoint() &&
766	(BC.MIB ->isCall(Inst: *FromInstruction) \|\|
767	BC.MIB ->isIndirectBranch(Inst: *FromInstruction))) {
768	// Execution count is already accounted for.
769	return true;
770	}
771	// For data collected in a bolted binary, we may have created two output BBs
772	// that map to one original block. Branches between these two blocks will
773	// appear here as one BB jumping to itself, even though it has no loop
774	// edges. Ignore these.
775	if (collectedInBoltedBinary() && FromBB == ToBB)
776	return true;
777
778	BinaryBasicBlock FTSuccessor = FromBB->getConditionalSuccessor(Condition: false*);
779	if (FTSuccessor && FTSuccessor->succ_size() == `1` &&
780	FTSuccessor->getSuccessor(Label: ToBB->getLabel())) {
781	BinaryBasicBlock::BinaryBranchInfo &FTBI =
782	FTSuccessor->getBranchInfo(Succ: *ToBB);
783	FTBI.Count += Count;
784	if (Count)
785	FTBI.MispredictedCount += Mispreds;
786	ToBB = FTSuccessor;
787	} else {
788	LLVM_DEBUG(dbgs() << "invalid branch in " << BF
789	<< formatv(": {0:x} -> {1:x}\n", From, To));
790	return false;
791	}
792	}
793
794	BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(Succ: *ToBB);
795	BI.Count += Count;
796	// Only update mispredicted count if it the count was real.
797	if (Count) {
798	BI.MispredictedCount += Mispreds;
799	}
800
801	return true;
802	}
803
804	void DataReader::reportError(StringRef ErrorMsg) {
805	Diag << "Error reading BOLT data input file: line " << Line << ", column "
806	<< Col << ": " << ErrorMsg << `'\n'`;
807	}
808
809	bool DataReader::expectAndConsumeFS() {
810	if (ParsingBuf [`0`] != FieldSeparator) {
811	reportError(ErrorMsg: "expected field separator");
812	return false;
813	}
814	ParsingBuf = ParsingBuf.drop_front(N: `1`);
815	Col += `1`;
816	return true;
817	}
818
819	void DataReader::consumeAllRemainingFS() {
820	while (ParsingBuf [`0`] == FieldSeparator) {
821	ParsingBuf = ParsingBuf.drop_front(N: `1`);
822	Col += `1`;
823	}
824	}
825
826	bool DataReader::checkAndConsumeNewLine() {
827	if (ParsingBuf [`0`] != `'\n'`)
828	return false;
829
830	ParsingBuf = ParsingBuf.drop_front(N: `1`);
831	Col = `0`;
832	Line += `1`;
833	return true;
834	}
835
836	ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
837	if (EndChar == `'\\'`) {
838	reportError(ErrorMsg: "EndChar could not be backslash");
839	return make_error_code(E: llvm::errc::io_error);
840	}
841
842	std::string EndChars(`1`, EndChar);
843	EndChars.push_back(c: `'\\'`);
844	if (EndNl)
845	EndChars.push_back(c: `'\n'`);
846
847	size_t StringEnd = `0`;
848	do {
849	StringEnd = ParsingBuf.find_first_of(Chars: EndChars, From: StringEnd);
850	if (StringEnd == StringRef::npos \|\|
851	(StringEnd == `0` && ParsingBuf [StringEnd] != `'\\'`)) {
852	reportError(ErrorMsg: "malformed field");
853	return make_error_code(E: llvm::errc::io_error);
854	}
855
856	if (ParsingBuf [StringEnd] != `'\\'`)
857	break;
858
859	StringEnd += `2`;
860	} while (true);
861
862	StringRef Str = ParsingBuf.substr(Start: `0`, N: StringEnd);
863
864	// If EndNl was set and nl was found instead of EndChar, do not consume the
865	// new line.
866	bool EndNlInsteadOfEndChar = ParsingBuf [StringEnd] == `'\n'` && EndChar != `'\n'`;
867	unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + `1`;
868
869	ParsingBuf = ParsingBuf.drop_front(N: End);
870	if (EndChar == `'\n'`) {
871	Col = `0`;
872	Line += `1`;
873	} else {
874	Col += End;
875	}
876	return Str;
877	}
878
879	ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
880	ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
881	if (std::error_code EC = NumStrRes.getError())
882	return EC;
883	StringRef NumStr = NumStrRes.get();
884	int64_t Num;
885	if (NumStr.getAsInteger(Radix: `10`, Result&: Num)) {
886	reportError(ErrorMsg: "expected decimal number");
887	Diag << "Found: " << NumStr << "\n";
888	return make_error_code(E: llvm::errc::io_error);
889	}
890	return Num;
891	}
892
893	ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
894	ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
895	if (std::error_code EC = NumStrRes.getError())
896	return EC;
897	StringRef NumStr = NumStrRes.get();
898	uint64_t Num;
899	if (NumStr.getAsInteger(Radix: `16`, Result&: Num)) {
900	reportError(ErrorMsg: "expected hexidecimal number");
901	Diag << "Found: " << NumStr << "\n";
902	return make_error_code(E: llvm::errc::io_error);
903	}
904	return Num;
905	}
906
907	ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
908	bool ExpectMemLoc) {
909	// Read whether the location of the branch should be DSO or a symbol
910	// 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
911	// symbol.
912	// The symbol flag is also used to tag memory load events by adding 3 to the
913	// base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
914	if (!ExpectMemLoc && ParsingBuf [`0`] != `'0'` && ParsingBuf [`0`] != `'1'` &&
915	ParsingBuf [`0`] != `'2'`) {
916	reportError(ErrorMsg: "expected 0, 1 or 2");
917	return make_error_code(E: llvm::errc::io_error);
918	}
919
920	if (ExpectMemLoc && ParsingBuf [`0`] != `'3'` && ParsingBuf [`0`] != `'4'` &&
921	ParsingBuf [`0`] != `'5'`) {
922	reportError(ErrorMsg: "expected 3, 4 or 5");
923	return make_error_code(E: llvm::errc::io_error);
924	}
925
926	bool IsSymbol =
927	(!ExpectMemLoc && (ParsingBuf [`0`] == `'1'` \|\| ParsingBuf [`0`] == `'2'`)) \|\|
928	(ExpectMemLoc && (ParsingBuf [`0`] == `'4'` \|\| ParsingBuf [`0`] == `'5'`));
929	ParsingBuf = ParsingBuf.drop_front(N: `1`);
930	Col += `1`;
931
932	if (!expectAndConsumeFS())
933	return make_error_code(E: llvm::errc::io_error);
934	consumeAllRemainingFS();
935
936	// Read the string containing the symbol or the DSO name
937	ErrorOr<StringRef> NameRes = parseString(EndChar: FieldSeparator);
938	if (std::error_code EC = NameRes.getError())
939	return EC;
940	StringRef Name = NameRes.get();
941	consumeAllRemainingFS();
942
943	// Read the offset
944	ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
945	if (std::error_code EC = Offset.getError())
946	return EC;
947
948	return Location (IsSymbol, Name, Offset.get());
949	}
950
951	ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
952	ErrorOr<Location> Res = parseLocation(EndChar: FieldSeparator);
953	if (std::error_code EC = Res.getError())
954	return EC;
955	Location From = Res.get();
956
957	consumeAllRemainingFS();
958	Res = parseLocation(EndChar: FieldSeparator);
959	if (std::error_code EC = Res.getError())
960	return EC;
961	Location To = Res.get();
962
963	consumeAllRemainingFS();
964	ErrorOr<int64_t> MRes = parseNumberField(EndChar: FieldSeparator);
965	if (std::error_code EC = MRes.getError())
966	return EC;
967	int64_t NumMispreds = MRes.get();
968
969	consumeAllRemainingFS();
970	ErrorOr<int64_t> BRes = parseNumberField(EndChar: FieldSeparator, / EndNl = / true);
971	if (std::error_code EC = BRes.getError())
972	return EC;
973	int64_t NumBranches = BRes.get();
974
975	consumeAllRemainingFS();
976	if (!checkAndConsumeNewLine()) {
977	reportError(ErrorMsg: "expected end of line");
978	return make_error_code(E: llvm::errc::io_error);
979	}
980
981	return BranchInfo (std::move(From), std::move(To), NumMispreds, NumBranches);
982	}
983
984	ErrorOr<MemInfo> DataReader::parseMemInfo() {
985	ErrorOr<Location> Res = parseMemLocation(EndChar: FieldSeparator);
986	if (std::error_code EC = Res.getError())
987	return EC;
988	Location Offset = Res.get();
989
990	consumeAllRemainingFS();
991	Res = parseMemLocation(EndChar: FieldSeparator);
992	if (std::error_code EC = Res.getError())
993	return EC;
994	Location Addr = Res.get();
995
996	consumeAllRemainingFS();
997	ErrorOr<int64_t> CountRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
998	if (std::error_code EC = CountRes.getError())
999	return EC;
1000
1001	consumeAllRemainingFS();
1002	if (!checkAndConsumeNewLine()) {
1003	reportError(ErrorMsg: "expected end of line");
1004	return make_error_code(E: llvm::errc::io_error);
1005	}
1006
1007	return MemInfo (Offset, Addr, CountRes.get());
1008	}
1009
1010	ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1011	ErrorOr<Location> Res = parseLocation(EndChar: FieldSeparator);
1012	if (std::error_code EC = Res.getError())
1013	return EC;
1014	Location Address = Res.get();
1015
1016	consumeAllRemainingFS();
1017	ErrorOr<int64_t> BRes = parseNumberField(EndChar: FieldSeparator, / EndNl = / true);
1018	if (std::error_code EC = BRes.getError())
1019	return EC;
1020	int64_t Occurrences = BRes.get();
1021
1022	consumeAllRemainingFS();
1023	if (!checkAndConsumeNewLine()) {
1024	reportError(ErrorMsg: "expected end of line");
1025	return make_error_code(E: llvm::errc::io_error);
1026	}
1027
1028	return SampleInfo (std::move(Address), Occurrences);
1029	}
1030
1031	ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1032	if (ParsingBuf.size() < `6` \|\| ParsingBuf.substr(Start: `0`, N: `6`) != "no_lbr")
1033	return false;
1034	ParsingBuf = ParsingBuf.drop_front(N: `6`);
1035	Col += `6`;
1036
1037	if (ParsingBuf.size() > `0` && ParsingBuf [`0`] == `' '`)
1038	ParsingBuf = ParsingBuf.drop_front(N: `1`);
1039
1040	while (ParsingBuf.size() > `0` && ParsingBuf [`0`] != `'\n'`) {
1041	ErrorOr<StringRef> EventName = parseString(EndChar: `' '`, EndNl: true);
1042	if (!EventName)
1043	return make_error_code(E: llvm::errc::io_error);
1044	EventNames.insert(key: EventName.get());
1045	}
1046
1047	if (!checkAndConsumeNewLine()) {
1048	reportError(ErrorMsg: "malformed no_lbr line");
1049	return make_error_code(E: llvm::errc::io_error);
1050	}
1051	return true;
1052	}
1053
1054	ErrorOr<bool> DataReader::maybeParseBATFlag() {
1055	if (ParsingBuf.size() < `16` \|\| ParsingBuf.substr(Start: `0`, N: `16`) != "boltedcollection")
1056	return false;
1057	ParsingBuf = ParsingBuf.drop_front(N: `16`);
1058	Col += `16`;
1059
1060	if (!checkAndConsumeNewLine()) {
1061	reportError(ErrorMsg: "malformed boltedcollection line");
1062	return make_error_code(E: llvm::errc::io_error);
1063	}
1064	return true;
1065	}
1066
1067	bool DataReader::hasBranchData() {
1068	if (ParsingBuf.size() == `0`)
1069	return false;
1070
1071	if (ParsingBuf [`0`] == `'0'` \|\| ParsingBuf [`0`] == `'1'` \|\| ParsingBuf [`0`] == `'2'`)
1072	return true;
1073	return false;
1074	}
1075
1076	bool DataReader::hasMemData() {
1077	if (ParsingBuf.size() == `0`)
1078	return false;
1079
1080	if (ParsingBuf [`0`] == `'3'` \|\| ParsingBuf [`0`] == `'4'` \|\| ParsingBuf [`0`] == `'5'`)
1081	return true;
1082	return false;
1083	}
1084
1085	std::error_code DataReader::parseInNoLBRMode() {
1086	auto GetOrCreateFuncEntry = [&](StringRef Name) {
1087	auto I = NamesToSamples.find(x: Name);
1088	if (I == NamesToSamples.end()) {
1089	bool Success;
1090	std::tie(args&: I, args&: Success) = NamesToSamples.insert(x: std::make_pair(
1091	x&: Name, y: FuncSampleData (Name, FuncSampleData::ContainerTy ())));
1092
1093	assert(Success && "unexpected result of insert");
1094	}
1095	return I;
1096	};
1097
1098	auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1099	auto I = NamesToMemEvents.find(x: Name);
1100	if (I == NamesToMemEvents.end()) {
1101	bool Success;
1102	std::tie(args&: I, args&: Success) = NamesToMemEvents.insert(
1103	x: std::make_pair(x&: Name, y: FuncMemData (Name, FuncMemData::ContainerTy ())));
1104	assert(Success && "unexpected result of insert");
1105	}
1106	return I;
1107	};
1108
1109	while (hasBranchData()) {
1110	ErrorOr<SampleInfo> Res = parseSampleInfo();
1111	if (std::error_code EC = Res.getError())
1112	return EC;
1113
1114	SampleInfo SI = Res.get();
1115
1116	// Ignore samples not involving known locations
1117	if (!SI.Loc.IsSymbol)
1118	continue;
1119
1120	auto I = GetOrCreateFuncEntry (SI.Loc.Name);
1121	I ->second.Data.emplace_back(args: std::move(SI));
1122	}
1123
1124	while (hasMemData()) {
1125	ErrorOr<MemInfo> Res = parseMemInfo();
1126	if (std::error_code EC = Res.getError())
1127	return EC;
1128
1129	MemInfo MI = Res.get();
1130
1131	// Ignore memory events not involving known pc.
1132	if (!MI.Offset.IsSymbol)
1133	continue;
1134
1135	auto I = GetOrCreateFuncMemEntry (MI.Offset.Name);
1136	I ->second.Data.emplace_back(args: std::move(MI));
1137	}
1138
1139	for (auto &FuncSamples : NamesToSamples)
1140	llvm::stable_sort(Range&: FuncSamples.second.Data);
1141
1142	for (auto &MemEvents : NamesToMemEvents)
1143	llvm::stable_sort(Range&: MemEvents.second.Data);
1144
1145	return std::error_code ();
1146	}
1147
1148	std::error_code DataReader::parse() {
1149	auto GetOrCreateFuncEntry = [&](StringRef Name) {
1150	auto I = NamesToBranches.find(x: Name);
1151	if (I == NamesToBranches.end()) {
1152	bool Success;
1153	std::tie(args&: I, args&: Success) = NamesToBranches.insert(x: std::make_pair(
1154	x&: Name, y: FuncBranchData (Name, FuncBranchData::ContainerTy (),
1155	FuncBranchData::ContainerTy ())));
1156	assert(Success && "unexpected result of insert");
1157	}
1158	return I;
1159	};
1160
1161	auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1162	auto I = NamesToMemEvents.find(x: Name);
1163	if (I == NamesToMemEvents.end()) {
1164	bool Success;
1165	std::tie(args&: I, args&: Success) = NamesToMemEvents.insert(
1166	x: std::make_pair(x&: Name, y: FuncMemData (Name, FuncMemData::ContainerTy ())));
1167	assert(Success && "unexpected result of insert");
1168	}
1169	return I;
1170	};
1171
1172	Col = `0`;
1173	Line = `1`;
1174	ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1175	if (!FlagOrErr)
1176	return FlagOrErr.getError();
1177	NoLBRMode = *FlagOrErr;
1178
1179	ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1180	if (!BATFlagOrErr)
1181	return BATFlagOrErr.getError();
1182	BATMode = *BATFlagOrErr;
1183
1184	if (!hasBranchData() && !hasMemData()) {
1185	Diag << "ERROR: no valid profile data found\n";
1186	return make_error_code(E: llvm::errc::io_error);
1187	}
1188
1189	if (NoLBRMode)
1190	return parseInNoLBRMode();
1191
1192	while (hasBranchData()) {
1193	ErrorOr<BranchInfo> Res = parseBranchInfo();
1194	if (std::error_code EC = Res.getError())
1195	return EC;
1196
1197	BranchInfo BI = Res.get();
1198
1199	// Ignore branches not involving known location.
1200	if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1201	continue;
1202
1203	auto I = GetOrCreateFuncEntry (BI.From.Name);
1204	I ->second.Data.emplace_back(args: std::move(BI));
1205
1206	// Add entry data for branches to another function or branches
1207	// to entry points (including recursive calls)
1208	if (BI.To.IsSymbol &&
1209	(!BI.From.Name.equals(RHS: BI.To.Name) \|\| BI.To.Offset == `0`)) {
1210	I = GetOrCreateFuncEntry (BI.To.Name);
1211	I ->second.EntryData.emplace_back(args: std::move(BI));
1212	}
1213
1214	// If destination is the function start - update execution count.
1215	// NB: the data is skewed since we cannot tell tail recursion from
1216	// branches to the function start.
1217	if (BI.To.IsSymbol && BI.To.Offset == `0`) {
1218	I = GetOrCreateFuncEntry (BI.To.Name);
1219	I ->second.ExecutionCount += BI.Branches;
1220	}
1221	}
1222
1223	while (hasMemData()) {
1224	ErrorOr<MemInfo> Res = parseMemInfo();
1225	if (std::error_code EC = Res.getError())
1226	return EC;
1227
1228	MemInfo MI = Res.get();
1229
1230	// Ignore memory events not involving known pc.
1231	if (!MI.Offset.IsSymbol)
1232	continue;
1233
1234	auto I = GetOrCreateFuncMemEntry (MI.Offset.Name);
1235	I ->second.Data.emplace_back(args: std::move(MI));
1236	}
1237
1238	for (auto &FuncBranches : NamesToBranches)
1239	llvm::stable_sort(Range&: FuncBranches.second.Data);
1240
1241	for (auto &MemEvents : NamesToMemEvents)
1242	llvm::stable_sort(Range&: MemEvents.second.Data);
1243
1244	return std::error_code ();
1245	}
1246
1247	void DataReader::buildLTONameMaps() {
1248	for (auto &FuncData : NamesToBranches) {
1249	const StringRef FuncName = FuncData.first;
1250	const std::optional<StringRef> CommonName = getLTOCommonName(Name: FuncName);
1251	if (CommonName)
1252	LTOCommonNameMap [*CommonName].push_back(x: &FuncData.second);
1253	}
1254
1255	for (auto &FuncData : NamesToMemEvents) {
1256	const StringRef FuncName = FuncData.first;
1257	const std::optional<StringRef> CommonName = getLTOCommonName(Name: FuncName);
1258	if (CommonName)
1259	LTOCommonNameMemMap [*CommonName].push_back(x: &FuncData.second);
1260	}
1261	}
1262
1263	template <typename MapTy>
1264	static typename MapTy::mapped_type *
1265	fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1266	// Do a reverse order iteration since the name in profile has a higher chance
1267	// of matching a name at the end of the list.
1268	for (const MCSymbol *Symbol : llvm::reverse(C: Symbols)) {
1269	auto I = Map.find(normalizeName(NameRef: Symbol->getName()));
1270	if (I != Map.end())
1271	return &I->second;
1272	}
1273	return nullptr;
1274	}
1275
1276	template <typename MapTy>
1277	static typename MapTy::mapped_type *
1278	fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1279	// Do a reverse order iteration since the name in profile has a higher chance
1280	// of matching a name at the end of the list.
1281	for (StringRef Name : llvm::reverse(C: FuncNames)) {
1282	auto I = Map.find(normalizeName(NameRef: Name));
1283	if (I != Map.end())
1284	return &I->second;
1285	}
1286	return nullptr;
1287	}
1288
1289	template <typename MapTy>
1290	static std::vector<typename MapTy::mapped_type *>
1291	fetchMapEntriesRegex(MapTy &Map,
1292	const StringMap<std::vector<typename MapTy::mapped_type *>>
1293	&LTOCommonNameMap,
1294	const std::vector<StringRef> &FuncNames) {
1295	std::vector<typename MapTy::mapped_type *> AllData;
1296	// Do a reverse order iteration since the name in profile has a higher chance
1297	// of matching a name at the end of the list.
1298	for (StringRef FuncName : llvm::reverse(C: FuncNames)) {
1299	std::string Name = normalizeName(NameRef: FuncName);
1300	const std::optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1301	if (LTOCommonName) {
1302	auto I = LTOCommonNameMap.find(*LTOCommonName);
1303	if (I != LTOCommonNameMap.end()) {
1304	const std::vector<typename MapTy::mapped_type *> &CommonData =
1305	I->second;
1306	AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1307	}
1308	} else {
1309	auto I = Map.find(Name);
1310	if (I != Map.end())
1311	return {&I->second};
1312	}
1313	}
1314	return AllData;
1315	}
1316
1317	bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1318	if (getBranchData(BF: Function) \|\| getMemData(BF: Function))
1319	return true;
1320
1321	if (getFuncSampleData(FuncNames: Function.getNames()) \|\|
1322	getBranchDataForNames(FuncNames: Function.getNames()) \|\|
1323	getMemDataForNames(FuncNames: Function.getNames()))
1324	return true;
1325
1326	if (!hasVolatileName(BF: Function))
1327	return false;
1328
1329	const std::vector<FuncBranchData *> AllBranchData =
1330	getBranchDataForNamesRegex(FuncNames: Function.getNames());
1331	if (!AllBranchData.empty())
1332	return true;
1333
1334	const std::vector<FuncMemData *> AllMemData =
1335	getMemDataForNamesRegex(FuncNames: Function.getNames());
1336	if (!AllMemData.empty())
1337	return true;
1338
1339	return false;
1340	}
1341
1342	FuncBranchData *
1343	DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1344	return fetchMapEntry<NamesToBranchesMapTy>(Map&: NamesToBranches, FuncNames);
1345	}
1346
1347	FuncBranchData *
1348	DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1349	return fetchMapEntry<NamesToBranchesMapTy>(Map&: NamesToBranches, Symbols);
1350	}
1351
1352	FuncMemData *
1353	DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1354	return fetchMapEntry<NamesToMemEventsMapTy>(Map&: NamesToMemEvents, FuncNames);
1355	}
1356
1357	FuncSampleData *
1358	DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1359	return fetchMapEntry<NamesToSamplesMapTy>(Map&: NamesToSamples, FuncNames);
1360	}
1361
1362	std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1363	const std::vector<StringRef> &FuncNames) {
1364	return fetchMapEntriesRegex(Map&: NamesToBranches, LTOCommonNameMap, FuncNames);
1365	}
1366
1367	std::vector<FuncMemData *>
1368	DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1369	return fetchMapEntriesRegex(Map&: NamesToMemEvents, LTOCommonNameMap: LTOCommonNameMemMap, FuncNames);
1370	}
1371
1372	bool DataReader::hasLocalsWithFileName() const {
1373	for (const auto &Func : NamesToBranches) {
1374	const StringRef &FuncName = Func.first;
1375	if (FuncName.count(C: `'/'`) == `2` && FuncName [`0`] != `'/'`)
1376	return true;
1377	}
1378	return false;
1379	}
1380
1381	void DataReader::dump() const {
1382	for (const auto &KV : NamesToBranches) {
1383	const StringRef Name = KV.first;
1384	const FuncBranchData &FBD = KV.second;
1385	Diag << Name << " branches:\n";
1386	for (const BranchInfo &BI : FBD.Data)
1387	Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1388	<< BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1389	Diag << Name << " entry points:\n";
1390	for (const BranchInfo &BI : FBD.EntryData)
1391	Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1392	<< BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1393	}
1394
1395	for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1396	StringRef Event = I ->getKey();
1397	Diag << "Data was collected with event: " << Event << "\n";
1398	}
1399	for (const auto &KV : NamesToSamples) {
1400	const StringRef Name = KV.first;
1401	const FuncSampleData &FSD = KV.second;
1402	Diag << Name << " samples:\n";
1403	for (const SampleInfo &SI : FSD.Data)
1404	Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1405	}
1406
1407	for (const auto &KV : NamesToMemEvents) {
1408	const StringRef Name = KV.first;
1409	const FuncMemData &FMD = KV.second;
1410	Diag << "Memory events for " << Name;
1411	Location LastOffset(`0`);
1412	for (const MemInfo &MI : FMD.Data) {
1413	if (MI.Offset == LastOffset)
1414	Diag << ", " << MI.Addr << "/" << MI.Count;
1415	else
1416	Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1417	LastOffset = MI.Offset;
1418	}
1419	Diag << "\n";
1420	}
1421	}
1422
1423	} // namespace bolt
1424	} // namespace llvm
1425

source code of bolt/lib/Profile/DataReader.cpp