1//===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions reads profile data written by perf record,
10// aggregate it and then write it back to an output file.
11//
12//===----------------------------------------------------------------------===//
13
14#include "bolt/Profile/DataAggregator.h"
15#include "bolt/Core/BinaryContext.h"
16#include "bolt/Core/BinaryFunction.h"
17#include "bolt/Passes/BinaryPasses.h"
18#include "bolt/Profile/BoltAddressTranslation.h"
19#include "bolt/Profile/Heatmap.h"
20#include "bolt/Profile/YAMLProfileWriter.h"
21#include "bolt/Utils/CommandLineOpts.h"
22#include "bolt/Utils/Utils.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/ScopeExit.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Compiler.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/Errc.h"
29#include "llvm/Support/FileSystem.h"
30#include "llvm/Support/Process.h"
31#include "llvm/Support/Program.h"
32#include "llvm/Support/Regex.h"
33#include "llvm/Support/Timer.h"
34#include "llvm/Support/raw_ostream.h"
35#include <map>
36#include <optional>
37#include <unordered_map>
38#include <utility>
39
40#define DEBUG_TYPE "aggregator"
41
42using namespace llvm;
43using namespace bolt;
44
45namespace opts {
46
47static cl::opt<bool>
48 BasicAggregation("nl",
49 cl::desc("aggregate basic samples (without LBR info)"),
50 cl::cat(AggregatorCategory));
51
52static cl::opt<std::string>
53 ITraceAggregation("itrace",
54 cl::desc("Generate LBR info with perf itrace argument"),
55 cl::cat(AggregatorCategory));
56
57static cl::opt<bool>
58FilterMemProfile("filter-mem-profile",
59 cl::desc("if processing a memory profile, filter out stack or heap accesses "
60 "that won't be useful for BOLT to reduce profile file size"),
61 cl::init(Val: true),
62 cl::cat(AggregatorCategory));
63
64static cl::opt<unsigned long long>
65FilterPID("pid",
66 cl::desc("only use samples from process with specified PID"),
67 cl::init(Val: 0),
68 cl::Optional,
69 cl::cat(AggregatorCategory));
70
71static cl::opt<bool>
72IgnoreBuildID("ignore-build-id",
73 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
74 cl::init(Val: false),
75 cl::cat(AggregatorCategory));
76
77static cl::opt<bool> IgnoreInterruptLBR(
78 "ignore-interrupt-lbr",
79 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
80 cl::init(Val: true), cl::cat(AggregatorCategory));
81
82static cl::opt<unsigned long long>
83MaxSamples("max-samples",
84 cl::init(Val: -1ULL),
85 cl::desc("maximum number of samples to read from LBR profile"),
86 cl::Optional,
87 cl::Hidden,
88 cl::cat(AggregatorCategory));
89
90extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
91extern cl::opt<bool> ProfileWritePseudoProbes;
92extern cl::opt<std::string> SaveProfile;
93
94cl::opt<bool> ReadPreAggregated(
95 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
96 cl::cat(AggregatorCategory));
97
98cl::opt<std::string>
99 ReadPerfEvents("perf-script-events",
100 cl::desc("skip perf event collection by supplying a "
101 "perf-script output in a textual format"),
102 cl::ReallyHidden, cl::init(Val: ""), cl::cat(AggregatorCategory));
103
104static cl::opt<bool>
105TimeAggregator("time-aggr",
106 cl::desc("time BOLT aggregator"),
107 cl::init(Val: false),
108 cl::ZeroOrMore,
109 cl::cat(AggregatorCategory));
110
111} // namespace opts
112
113namespace {
114
115const char TimerGroupName[] = "aggregator";
116const char TimerGroupDesc[] = "Aggregator";
117
118std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
119 std::vector<SectionNameAndRange> sections;
120 for (BinarySection &Section : BC->sections()) {
121 if (!Section.isText())
122 continue;
123 if (Section.getSize() == 0)
124 continue;
125 sections.push_back(
126 x: {.Name: Section.getName(), .BeginAddress: Section.getAddress(), .EndAddress: Section.getEndAddress()});
127 }
128 llvm::sort(C&: sections,
129 Comp: [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
130 return A.BeginAddress < B.BeginAddress;
131 });
132 return sections;
133}
134}
135
136constexpr uint64_t DataAggregator::KernelBaseAddr;
137
138DataAggregator::~DataAggregator() { deleteTempFiles(); }
139
140namespace {
141void deleteTempFile(const std::string &FileName) {
142 if (std::error_code Errc = sys::fs::remove(path: FileName.c_str()))
143 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
144 << " with error " << Errc.message() << "\n";
145}
146}
147
148void DataAggregator::deleteTempFiles() {
149 for (std::string &FileName : TempFiles)
150 deleteTempFile(FileName);
151 TempFiles.clear();
152}
153
154void DataAggregator::findPerfExecutable() {
155 std::optional<std::string> PerfExecutable =
156 sys::Process::FindInEnvPath(EnvName: "PATH", FileName: "perf");
157 if (!PerfExecutable) {
158 outs() << "PERF2BOLT: No perf executable found!\n";
159 exit(status: 1);
160 }
161 PerfPath = *PerfExecutable;
162}
163
164void DataAggregator::start() {
165 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
166
167 // Turn on heatmap building if requested by --heatmap flag.
168 if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences())
169 opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional;
170
171 // Don't launch perf for pre-aggregated files or when perf input is specified
172 // by the user.
173 if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
174 return;
175
176 findPerfExecutable();
177
178 if (opts::BasicAggregation) {
179 launchPerfProcess(Name: "events without LBR",
180 PPI&: MainEventsPPI,
181 ArgsString: "script -F pid,event,ip",
182 /*Wait = */false);
183 } else if (!opts::ITraceAggregation.empty()) {
184 std::string ItracePerfScriptArgs = llvm::formatv(
185 Fmt: "script -F pid,brstack --itrace={0}", Vals&: opts::ITraceAggregation);
186 launchPerfProcess(Name: "branch events with itrace", PPI&: MainEventsPPI,
187 ArgsString: ItracePerfScriptArgs.c_str(),
188 /*Wait = */ false);
189 } else {
190 launchPerfProcess(Name: "branch events", PPI&: MainEventsPPI, ArgsString: "script -F pid,brstack",
191 /*Wait = */ false);
192 }
193
194 // Note: we launch script for mem events regardless of the option, as the
195 // command fails fairly fast if mem events were not collected.
196 launchPerfProcess(Name: "mem events",
197 PPI&: MemEventsPPI,
198 ArgsString: "script -F pid,event,addr,ip",
199 /*Wait = */false);
200
201 launchPerfProcess(Name: "process events", PPI&: MMapEventsPPI,
202 ArgsString: "script --show-mmap-events --no-itrace",
203 /*Wait = */ false);
204
205 launchPerfProcess(Name: "task events", PPI&: TaskEventsPPI,
206 ArgsString: "script --show-task-events --no-itrace",
207 /*Wait = */ false);
208}
209
210void DataAggregator::abort() {
211 if (opts::ReadPreAggregated)
212 return;
213
214 std::string Error;
215
216 // Kill subprocesses in case they are not finished
217 sys::Wait(PI: TaskEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
218 sys::Wait(PI: MMapEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
219 sys::Wait(PI: MainEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
220 sys::Wait(PI: MemEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
221
222 deleteTempFiles();
223
224 exit(status: 1);
225}
226
227void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
228 const char *ArgsString, bool Wait) {
229 SmallVector<StringRef, 4> Argv;
230
231 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
232 Argv.push_back(Elt: PerfPath.data());
233
234 StringRef(ArgsString).split(A&: Argv, Separator: ' ');
235 Argv.push_back(Elt: "-f");
236 Argv.push_back(Elt: "-i");
237 Argv.push_back(Elt: Filename.c_str());
238
239 if (std::error_code Errc =
240 sys::fs::createTemporaryFile(Prefix: "perf.script", Suffix: "out", ResultPath&: PPI.StdoutPath)) {
241 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
242 << " with error " << Errc.message() << "\n";
243 exit(status: 1);
244 }
245 TempFiles.push_back(x: PPI.StdoutPath.data());
246
247 if (std::error_code Errc =
248 sys::fs::createTemporaryFile(Prefix: "perf.script", Suffix: "err", ResultPath&: PPI.StderrPath)) {
249 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
250 << " with error " << Errc.message() << "\n";
251 exit(status: 1);
252 }
253 TempFiles.push_back(x: PPI.StderrPath.data());
254
255 std::optional<StringRef> Redirects[] = {
256 std::nullopt, // Stdin
257 StringRef(PPI.StdoutPath.data()), // Stdout
258 StringRef(PPI.StderrPath.data())}; // Stderr
259
260 LLVM_DEBUG({
261 dbgs() << "Launching perf: ";
262 for (StringRef Arg : Argv)
263 dbgs() << Arg << " ";
264 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
265 << "\n";
266 });
267
268 if (Wait)
269 PPI.PI.ReturnCode = sys::ExecuteAndWait(Program: PerfPath.data(), Args: Argv,
270 /*envp*/ Env: std::nullopt, Redirects);
271 else
272 PPI.PI = sys::ExecuteNoWait(Program: PerfPath.data(), Args: Argv, /*envp*/ Env: std::nullopt,
273 Redirects);
274}
275
276void DataAggregator::processFileBuildID(StringRef FileBuildID) {
277 PerfProcessInfo BuildIDProcessInfo;
278 launchPerfProcess(Name: "buildid list",
279 PPI&: BuildIDProcessInfo,
280 ArgsString: "buildid-list",
281 /*Wait = */true);
282
283 if (BuildIDProcessInfo.PI.ReturnCode != 0) {
284 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
285 MemoryBuffer::getFileOrSTDIN(Filename: BuildIDProcessInfo.StderrPath.data());
286 StringRef ErrBuf = (*MB)->getBuffer();
287
288 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
289 << '\n';
290 errs() << ErrBuf;
291 return;
292 }
293
294 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
295 MemoryBuffer::getFileOrSTDIN(Filename: BuildIDProcessInfo.StdoutPath.data());
296 if (std::error_code EC = MB.getError()) {
297 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
298 << EC.message() << "\n";
299 return;
300 }
301
302 FileBuf = std::move(*MB);
303 ParsingBuf = FileBuf->getBuffer();
304
305 std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
306 if (!FileName) {
307 if (hasAllBuildIDs()) {
308 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
309 "This indicates the input binary supplied for data aggregation "
310 "is not the same recorded by perf when collecting profiling "
311 "data, or there were no samples recorded for the binary. "
312 "Use -ignore-build-id option to override.\n";
313 if (!opts::IgnoreBuildID)
314 abort();
315 } else {
316 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
317 "data was recorded without it\n";
318 return;
319 }
320 } else if (*FileName != llvm::sys::path::filename(path: BC->getFilename())) {
321 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
322 BuildIDBinaryName = std::string(*FileName);
323 } else {
324 outs() << "PERF2BOLT: matched build-id and file name\n";
325 }
326}
327
328bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
329 if (opts::ReadPreAggregated)
330 return true;
331
332 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(Name: FileName);
333 if (!FD) {
334 consumeError(Err: FD.takeError());
335 return false;
336 }
337
338 char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
339
340 auto Close = make_scope_exit(F: [&] { sys::fs::closeFile(F&: *FD); });
341 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
342 FileHandle: *FD, Buf: MutableArrayRef(Buf, sizeof(Buf)), Offset: 0);
343 if (!BytesRead) {
344 consumeError(Err: BytesRead.takeError());
345 return false;
346 }
347
348 if (*BytesRead != 7)
349 return false;
350
351 if (strncmp(s1: Buf, s2: "PERFILE", n: 7) == 0)
352 return true;
353 return false;
354}
355
356void DataAggregator::parsePreAggregated() {
357 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
358 MemoryBuffer::getFileOrSTDIN(Filename);
359 if (std::error_code EC = MB.getError()) {
360 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
361 << EC.message() << "\n";
362 exit(status: 1);
363 }
364
365 FileBuf = std::move(*MB);
366 ParsingBuf = FileBuf->getBuffer();
367 Col = 0;
368 Line = 1;
369 if (parsePreAggregatedLBRSamples()) {
370 errs() << "PERF2BOLT: failed to parse samples\n";
371 exit(status: 1);
372 }
373}
374
375void DataAggregator::filterBinaryMMapInfo() {
376 if (opts::FilterPID) {
377 auto MMapInfoIter = BinaryMMapInfo.find(x: opts::FilterPID);
378 if (MMapInfoIter != BinaryMMapInfo.end()) {
379 MMapInfo MMap = MMapInfoIter->second;
380 BinaryMMapInfo.clear();
381 BinaryMMapInfo.insert(x: std::make_pair(x&: MMap.PID, y&: MMap));
382 } else {
383 if (errs().has_colors())
384 errs().changeColor(Color: raw_ostream::RED);
385 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
386 << opts::FilterPID << "\""
387 << " for binary \"" << BC->getFilename() << "\".";
388 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
389 errs() << " Profile for the following process is available:\n";
390 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
391 outs() << " " << MMI.second.PID
392 << (MMI.second.Forked ? " (forked)\n" : "\n");
393
394 if (errs().has_colors())
395 errs().resetColor();
396
397 exit(status: 1);
398 }
399 }
400}
401
402int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
403 PerfProcessErrorCallbackTy Callback) {
404 if (!opts::ReadPerfEvents.empty()) {
405 outs() << "PERF2BOLT: using pre-processed perf events for '" << Name
406 << "' (perf-script-events)\n";
407 ParsingBuf = opts::ReadPerfEvents;
408 return 0;
409 }
410
411 std::string Error;
412 outs() << "PERF2BOLT: waiting for perf " << Name
413 << " collection to finish...\n";
414 sys::ProcessInfo PI = sys::Wait(PI: Process.PI, SecondsToWait: std::nullopt, ErrMsg: &Error);
415
416 if (!Error.empty()) {
417 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
418 deleteTempFiles();
419 exit(status: 1);
420 }
421
422 if (PI.ReturnCode != 0) {
423 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
424 MemoryBuffer::getFileOrSTDIN(Filename: Process.StderrPath.data());
425 StringRef ErrBuf = (*ErrorMB)->getBuffer();
426
427 deleteTempFiles();
428 Callback(PI.ReturnCode, ErrBuf);
429 return PI.ReturnCode;
430 }
431
432 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
433 MemoryBuffer::getFileOrSTDIN(Filename: Process.StdoutPath.data());
434 if (std::error_code EC = MB.getError()) {
435 errs() << "Cannot open " << Process.StdoutPath.data() << ": "
436 << EC.message() << "\n";
437 deleteTempFiles();
438 exit(status: 1);
439 }
440
441 FileBuf = std::move(*MB);
442 ParsingBuf = FileBuf->getBuffer();
443 Col = 0;
444 Line = 1;
445 return PI.ReturnCode;
446}
447
448Error DataAggregator::preprocessProfile(BinaryContext &BC) {
449 this->BC = &BC;
450
451 auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
452 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
453 exit(status: 1);
454 };
455
456 auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
457 Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
458 "Cannot print 'addr' field.");
459 if (!NoData.match(String: ErrBuf))
460 ErrorCallback(ReturnCode, ErrBuf);
461 };
462
463 if (opts::ReadPreAggregated) {
464 parsePreAggregated();
465 goto heatmap;
466 }
467
468 if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
469 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
470 processFileBuildID(FileBuildID: *FileBuildID);
471 } else {
472 errs() << "BOLT-WARNING: build-id will not be checked because we could "
473 "not read one from input binary\n";
474 }
475
476 if (BC.IsLinuxKernel) {
477 // Current MMap parsing logic does not work with linux kernel.
478 // MMap entries for linux kernel uses PERF_RECORD_MMAP
479 // format instead of typical PERF_RECORD_MMAP2 format.
480 // Since linux kernel address mapping is absolute (same as
481 // in the ELF file), we avoid parsing MMap in linux kernel mode.
482 // While generating optimized linux kernel binary, we may need
483 // to parse MMap entries.
484
485 // In linux kernel mode, we analyze and optimize
486 // all linux kernel binary instructions, irrespective
487 // of whether they are due to system calls or due to
488 // interrupts. Therefore, we cannot ignore interrupt
489 // in Linux kernel mode.
490 opts::IgnoreInterruptLBR = false;
491 } else {
492 prepareToParse(Name: "mmap events", Process&: MMapEventsPPI, Callback: ErrorCallback);
493 if (parseMMapEvents())
494 errs() << "PERF2BOLT: failed to parse mmap events\n";
495 }
496
497 prepareToParse(Name: "task events", Process&: TaskEventsPPI, Callback: ErrorCallback);
498 if (parseTaskEvents())
499 errs() << "PERF2BOLT: failed to parse task events\n";
500
501 filterBinaryMMapInfo();
502 prepareToParse(Name: "events", Process&: MainEventsPPI, Callback: ErrorCallback);
503
504 if ((!opts::BasicAggregation && parseBranchEvents()) ||
505 (opts::BasicAggregation && parseBasicEvents()))
506 errs() << "PERF2BOLT: failed to parse samples\n";
507
508 // Special handling for memory events
509 if (!prepareToParse(Name: "mem events", Process&: MemEventsPPI, Callback: MemEventsErrorCallback))
510 if (const std::error_code EC = parseMemEvents())
511 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
512 << '\n';
513
514 deleteTempFiles();
515
516heatmap:
517 if (!opts::HeatmapMode)
518 return Error::success();
519
520 if (std::error_code EC = printLBRHeatMap())
521 return errorCodeToError(EC);
522
523 if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Optional)
524 return Error::success();
525
526 assert(opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive);
527 exit(status: 0);
528}
529
530Error DataAggregator::readProfile(BinaryContext &BC) {
531 processProfile(BC);
532
533 for (auto &BFI : BC.getBinaryFunctions()) {
534 BinaryFunction &Function = BFI.second;
535 convertBranchData(BF&: Function);
536 }
537
538 if (opts::AggregateOnly) {
539 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
540 if (std::error_code EC = writeAggregatedFile(OutputFilename: opts::OutputFilename))
541 report_error(Message: "cannot create output data file", EC);
542
543 // BAT YAML is handled by DataAggregator since normal YAML output requires
544 // CFG which is not available in BAT mode.
545 if (usesBAT()) {
546 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
547 if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::OutputFilename))
548 report_error(Message: "cannot create output data file", EC);
549 if (!opts::SaveProfile.empty())
550 if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::SaveProfile))
551 report_error(Message: "cannot create output data file", EC);
552 }
553 }
554
555 return Error::success();
556}
557
558bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
559 return Function.hasProfileAvailable();
560}
561
562void DataAggregator::processProfile(BinaryContext &BC) {
563 if (opts::BasicAggregation)
564 processBasicEvents();
565 else
566 processBranchEvents();
567
568 processMemEvents();
569
570 // Mark all functions with registered events as having a valid profile.
571 for (auto &BFI : BC.getBinaryFunctions()) {
572 BinaryFunction &BF = BFI.second;
573 if (FuncBranchData *FBD = getBranchData(BF)) {
574 BF.markProfiled(Flags: BinaryFunction::PF_BRANCH);
575 BF.RawSampleCount = FBD->getNumExecutedBranches();
576 } else if (FuncBasicSampleData *FSD =
577 getFuncBasicSampleData(FuncNames: BF.getNames())) {
578 BF.markProfiled(Flags: BinaryFunction::PF_BASIC);
579 BF.RawSampleCount = FSD->getSamples();
580 }
581 }
582
583 for (auto &FuncBranches : NamesToBranches) {
584 llvm::stable_sort(Range&: FuncBranches.second.Data);
585 llvm::stable_sort(Range&: FuncBranches.second.EntryData);
586 }
587
588 for (auto &MemEvents : NamesToMemEvents)
589 llvm::stable_sort(Range&: MemEvents.second.Data);
590
591 // Release intermediate storage.
592 clear(Container&: BranchLBRs);
593 clear(Container&: FallthroughLBRs);
594 clear(Container&: BasicSamples);
595 clear(Container&: MemSamples);
596}
597
598BinaryFunction *
599DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
600 if (!BC->containsAddress(Address))
601 return nullptr;
602
603 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
604 /*UseMaxSize=*/true);
605}
606
607BinaryFunction *
608DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
609 if (BAT)
610 if (const uint64_t HotAddr = BAT->fetchParentAddress(Address: Func.getAddress()))
611 return getBinaryFunctionContainingAddress(Address: HotAddr);
612 return nullptr;
613}
614
615StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
616 bool BAT) {
617 if (!BAT)
618 return Func.getOneName();
619
620 const BinaryFunction *OrigFunc = &Func;
621 // If it is a local function, prefer the name containing the file name where
622 // the local function was declared
623 for (StringRef AlternativeName : OrigFunc->getNames()) {
624 size_t FileNameIdx = AlternativeName.find(C: '/');
625 // Confirm the alternative name has the pattern Symbol/FileName/1 before
626 // using it
627 if (FileNameIdx == StringRef::npos ||
628 AlternativeName.find(C: '/', From: FileNameIdx + 1) == StringRef::npos)
629 continue;
630 return AlternativeName;
631 }
632 return OrigFunc->getOneName();
633}
634
635bool DataAggregator::doBasicSample(BinaryFunction &OrigFunc, uint64_t Address,
636 uint64_t Count) {
637 // To record executed bytes, use basic block size as is regardless of BAT.
638 uint64_t BlockSize = 0;
639 if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
640 Offset: Address - OrigFunc.getAddress()))
641 BlockSize = BB->getOriginalSize();
642
643 BinaryFunction *ParentFunc = getBATParentFunction(Func: OrigFunc);
644 BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
645 // Attach executed bytes to parent function in case of cold fragment.
646 Func.SampleCountInBytes += Count * BlockSize;
647
648 auto I = NamesToBasicSamples.find(x: Func.getOneName());
649 if (I == NamesToBasicSamples.end()) {
650 bool Success;
651 StringRef LocName = getLocationName(Func, BAT);
652 std::tie(args&: I, args&: Success) = NamesToBasicSamples.insert(x: std::make_pair(
653 x: Func.getOneName(),
654 y: FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy())));
655 }
656
657 Address -= Func.getAddress();
658 if (BAT)
659 Address = BAT->translate(FuncAddress: Func.getAddress(), Offset: Address, /*IsBranchSrc=*/false);
660
661 I->second.bumpCount(Offset: Address, Count);
662 return true;
663}
664
665bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
666 uint64_t To, uint64_t Count,
667 uint64_t Mispreds) {
668 FuncBranchData *AggrData = getBranchData(BF: Func);
669 if (!AggrData) {
670 AggrData = &NamesToBranches[Func.getOneName()];
671 AggrData->Name = getLocationName(Func, BAT);
672 setBranchData(BF: Func, FBD: AggrData);
673 }
674
675 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
676 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
677 AggrData->bumpBranchCount(OffsetFrom: From, OffsetTo: To, Count, Mispreds);
678 return true;
679}
680
681bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
682 BinaryFunction *ToFunc, uint64_t From,
683 uint64_t To, uint64_t Count,
684 uint64_t Mispreds) {
685 FuncBranchData *FromAggrData = nullptr;
686 FuncBranchData *ToAggrData = nullptr;
687 StringRef SrcFunc;
688 StringRef DstFunc;
689 if (FromFunc) {
690 SrcFunc = getLocationName(Func: *FromFunc, BAT);
691 FromAggrData = getBranchData(BF: *FromFunc);
692 if (!FromAggrData) {
693 FromAggrData = &NamesToBranches[FromFunc->getOneName()];
694 FromAggrData->Name = SrcFunc;
695 setBranchData(BF: *FromFunc, FBD: FromAggrData);
696 }
697
698 recordExit(BF&: *FromFunc, From, Mispred: Mispreds, Count);
699 }
700 if (ToFunc) {
701 DstFunc = getLocationName(Func: *ToFunc, BAT);
702 ToAggrData = getBranchData(BF: *ToFunc);
703 if (!ToAggrData) {
704 ToAggrData = &NamesToBranches[ToFunc->getOneName()];
705 ToAggrData->Name = DstFunc;
706 setBranchData(BF: *ToFunc, FBD: ToAggrData);
707 }
708
709 recordEntry(BF&: *ToFunc, To, Mispred: Mispreds, Count);
710 }
711
712 if (FromAggrData)
713 FromAggrData->bumpCallCount(OffsetFrom: From, To: Location(!DstFunc.empty(), DstFunc, To),
714 Count, Mispreds);
715 if (ToAggrData)
716 ToAggrData->bumpEntryCount(From: Location(!SrcFunc.empty(), SrcFunc, From), OffsetTo: To,
717 Count, Mispreds);
718 return true;
719}
720
721bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
722 uint64_t Mispreds) {
723 // Returns whether \p Offset in \p Func contains a return instruction.
724 auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
725 auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(Inst: *MI); };
726 return Func.hasInstructions()
727 ? isReturn(Func.getInstructionAtOffset(Offset))
728 : isReturn(Func.disassembleInstructionAtOffset(Offset));
729 };
730
731 // Mutates \p Addr to an offset into the containing function, performing BAT
732 // offset translation and parent lookup.
733 //
734 // Returns the containing function (or BAT parent) and whether the address
735 // corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
736 auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
737 BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: Addr);
738 if (!Func) {
739 Addr = 0;
740 return std::pair{Func, false};
741 }
742
743 Addr -= Func->getAddress();
744
745 bool IsRet = IsFrom && checkReturn(*Func, Addr);
746
747 if (BAT)
748 Addr = BAT->translate(FuncAddress: Func->getAddress(), Offset: Addr, IsBranchSrc: IsFrom);
749
750 if (BinaryFunction *ParentFunc = getBATParentFunction(Func: *Func))
751 Func = ParentFunc;
752
753 return std::pair{Func, IsRet};
754 };
755
756 auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true);
757 auto [ToFunc, _] = handleAddress(To, /*IsFrom*/ false);
758 if (!FromFunc && !ToFunc)
759 return false;
760
761 // Ignore returns.
762 if (IsReturn)
763 return true;
764
765 // Treat recursive control transfers as inter-branches.
766 if (FromFunc == ToFunc && To != 0) {
767 recordBranch(BF&: *FromFunc, From, To, Count, Mispreds);
768 return doIntraBranch(Func&: *FromFunc, From, To, Count, Mispreds);
769 }
770
771 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
772}
773
774bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
775 uint64_t Count) {
776 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(Address: First.To);
777 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Address: Second.From);
778 if (!FromFunc || !ToFunc) {
779 LLVM_DEBUG({
780 dbgs() << "Out of range trace starting in ";
781 if (FromFunc)
782 dbgs() << formatv("{0} @ {1:x}", *FromFunc,
783 First.To - FromFunc->getAddress());
784 else
785 dbgs() << Twine::utohexstr(First.To);
786 dbgs() << " and ending in ";
787 if (ToFunc)
788 dbgs() << formatv("{0} @ {1:x}", *ToFunc,
789 Second.From - ToFunc->getAddress());
790 else
791 dbgs() << Twine::utohexstr(Second.From);
792 dbgs() << '\n';
793 });
794 NumLongRangeTraces += Count;
795 return false;
796 }
797 if (FromFunc != ToFunc) {
798 NumInvalidTraces += Count;
799 LLVM_DEBUG({
800 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
801 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
802 << " and ending in " << ToFunc->getPrintName()
803 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
804 });
805 return false;
806 }
807
808 // Set ParentFunc to BAT parent function or FromFunc itself.
809 BinaryFunction *ParentFunc = getBATParentFunction(Func: *FromFunc);
810 if (!ParentFunc)
811 ParentFunc = FromFunc;
812 ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);
813
814 const uint64_t FuncAddress = FromFunc->getAddress();
815 std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
816 BAT && BAT->isBATFunction(Address: FuncAddress)
817 ? BAT->getFallthroughsInTrace(FuncAddress, From: First.To, To: Second.From)
818 : getFallthroughsInTrace(BF&: *FromFunc, First, Second, Count);
819 if (!FTs) {
820 LLVM_DEBUG(
821 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
822 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
823 << " and ending in " << ToFunc->getPrintName() << " @ "
824 << ToFunc->getPrintName() << " @ "
825 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
826 NumInvalidTraces += Count;
827 return false;
828 }
829
830 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
831 << FromFunc->getPrintName() << ":"
832 << Twine::utohexstr(First.To) << " to "
833 << Twine::utohexstr(Second.From) << ".\n");
834 for (auto [From, To] : *FTs) {
835 if (BAT) {
836 From = BAT->translate(FuncAddress: FromFunc->getAddress(), Offset: From, /*IsBranchSrc=*/true);
837 To = BAT->translate(FuncAddress: FromFunc->getAddress(), Offset: To, /*IsBranchSrc=*/false);
838 }
839 doIntraBranch(Func&: *ParentFunc, From, To, Count, Mispreds: false);
840 }
841
842 return true;
843}
844
845std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
846DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
847 const LBREntry &FirstLBR,
848 const LBREntry &SecondLBR,
849 uint64_t Count) const {
850 SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches;
851
852 BinaryContext &BC = BF.getBinaryContext();
853
854 // Offsets of the trace within this function.
855 const uint64_t From = FirstLBR.To - BF.getAddress();
856 const uint64_t To = SecondLBR.From - BF.getAddress();
857
858 if (From > To)
859 return std::nullopt;
860
861 // Accept fall-throughs inside pseudo functions (PLT/thunks).
862 // This check has to be above BF.empty as pseudo functions would pass it:
863 // pseudo => ignored => CFG not built => empty.
864 // If we return nullopt, trace would be reported as mismatching disassembled
865 // function contents which it is not. To avoid this, return an empty
866 // fall-through list instead.
867 if (BF.isPseudo())
868 return Branches;
869
870 if (!BF.isSimple())
871 return std::nullopt;
872
873 assert(BF.hasCFG() && "can only record traces in CFG state");
874
875 const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: From);
876 const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(Offset: To);
877
878 if (!FromBB || !ToBB)
879 return std::nullopt;
880
881 // Adjust FromBB if the first LBR is a return from the last instruction in
882 // the previous block (that instruction should be a call).
883 if (From == FromBB->getOffset() && !BF.containsAddress(PC: FirstLBR.From) &&
884 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
885 const BinaryBasicBlock *PrevBB =
886 BF.getLayout().getBlock(Index: FromBB->getIndex() - 1);
887 if (PrevBB->getSuccessor(Label: FromBB->getLabel())) {
888 const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
889 if (Instr && BC.MIB->isCall(Inst: *Instr))
890 FromBB = PrevBB;
891 else
892 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
893 << '\n');
894 } else {
895 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
896 }
897 }
898
899 // Fill out information for fall-through edges. The From and To could be
900 // within the same basic block, e.g. when two call instructions are in the
901 // same block. In this case we skip the processing.
902 if (FromBB == ToBB)
903 return Branches;
904
905 // Process blocks in the original layout order.
906 BinaryBasicBlock *BB = BF.getLayout().getBlock(Index: FromBB->getIndex());
907 assert(BB == FromBB && "index mismatch");
908 while (BB != ToBB) {
909 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(Index: BB->getIndex() + 1);
910 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
911
912 // Check for bad LBRs.
913 if (!BB->getSuccessor(Label: NextBB->getLabel())) {
914 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
915 << " " << FirstLBR << '\n'
916 << " " << SecondLBR << '\n');
917 return std::nullopt;
918 }
919
920 const MCInst *Instr = BB->getLastNonPseudoInstr();
921 uint64_t Offset = 0;
922 if (Instr)
923 Offset = BC.MIB->getOffsetWithDefault(Inst: *Instr, Default: 0);
924 else
925 Offset = BB->getOffset();
926
927 Branches.emplace_back(Args&: Offset, Args: NextBB->getOffset());
928
929 BB = NextBB;
930 }
931
932 // Record fall-through jumps
933 for (const auto &[FromOffset, ToOffset] : Branches) {
934 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: FromOffset);
935 BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(Offset: ToOffset);
936 assert(FromBB && ToBB);
937 BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(Succ: *ToBB);
938 BI.Count += Count;
939 }
940
941 return Branches;
942}
943
944bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
945 uint64_t Count) const {
946 if (To > BF.getSize())
947 return false;
948
949 if (!BF.hasProfile())
950 BF.ExecutionCount = 0;
951
952 BinaryBasicBlock *EntryBB = nullptr;
953 if (To == 0) {
954 BF.ExecutionCount += Count;
955 if (!BF.empty())
956 EntryBB = &BF.front();
957 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(Offset: To)) {
958 if (BB->isEntryPoint())
959 EntryBB = BB;
960 }
961
962 if (EntryBB)
963 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
964
965 return true;
966}
967
968bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
969 uint64_t Count) const {
970 if (!BF.isSimple() || From > BF.getSize())
971 return false;
972
973 if (!BF.hasProfile())
974 BF.ExecutionCount = 0;
975
976 return true;
977}
978
979ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
980 LBREntry Res;
981 ErrorOr<StringRef> FromStrRes = parseString(EndChar: '/');
982 if (std::error_code EC = FromStrRes.getError())
983 return EC;
984 StringRef OffsetStr = FromStrRes.get();
985 if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.From)) {
986 reportError(ErrorMsg: "expected hexadecimal number with From address");
987 Diag << "Found: " << OffsetStr << "\n";
988 return make_error_code(E: llvm::errc::io_error);
989 }
990
991 ErrorOr<StringRef> ToStrRes = parseString(EndChar: '/');
992 if (std::error_code EC = ToStrRes.getError())
993 return EC;
994 OffsetStr = ToStrRes.get();
995 if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.To)) {
996 reportError(ErrorMsg: "expected hexadecimal number with To address");
997 Diag << "Found: " << OffsetStr << "\n";
998 return make_error_code(E: llvm::errc::io_error);
999 }
1000
1001 ErrorOr<StringRef> MispredStrRes = parseString(EndChar: '/');
1002 if (std::error_code EC = MispredStrRes.getError())
1003 return EC;
1004 StringRef MispredStr = MispredStrRes.get();
1005 if (MispredStr.size() != 1 ||
1006 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1007 reportError(ErrorMsg: "expected single char for mispred bit");
1008 Diag << "Found: " << MispredStr << "\n";
1009 return make_error_code(E: llvm::errc::io_error);
1010 }
1011 Res.Mispred = MispredStr[0] == 'M';
1012
1013 static bool MispredWarning = true;
1014 if (MispredStr[0] == '-' && MispredWarning) {
1015 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1016 MispredWarning = false;
1017 }
1018
1019 ErrorOr<StringRef> Rest = parseString(EndChar: FieldSeparator, EndNl: true);
1020 if (std::error_code EC = Rest.getError())
1021 return EC;
1022 if (Rest.get().size() < 5) {
1023 reportError(ErrorMsg: "expected rest of LBR entry");
1024 Diag << "Found: " << Rest.get() << "\n";
1025 return make_error_code(E: llvm::errc::io_error);
1026 }
1027 return Res;
1028}
1029
1030bool DataAggregator::checkAndConsumeFS() {
1031 if (ParsingBuf[0] != FieldSeparator)
1032 return false;
1033
1034 ParsingBuf = ParsingBuf.drop_front(N: 1);
1035 Col += 1;
1036 return true;
1037}
1038
1039void DataAggregator::consumeRestOfLine() {
1040 size_t LineEnd = ParsingBuf.find_first_of(C: '\n');
1041 if (LineEnd == StringRef::npos) {
1042 ParsingBuf = StringRef();
1043 Col = 0;
1044 Line += 1;
1045 return;
1046 }
1047 ParsingBuf = ParsingBuf.drop_front(N: LineEnd + 1);
1048 Col = 0;
1049 Line += 1;
1050}
1051
1052bool DataAggregator::checkNewLine() {
1053 return ParsingBuf[0] == '\n';
1054}
1055
1056ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1057 PerfBranchSample Res;
1058
1059 while (checkAndConsumeFS()) {
1060 }
1061
1062 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1063 if (std::error_code EC = PIDRes.getError())
1064 return EC;
1065 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1066 if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) {
1067 consumeRestOfLine();
1068 return make_error_code(E: errc::no_such_process);
1069 }
1070
1071 if (checkAndConsumeNewLine())
1072 return Res;
1073
1074 while (!checkAndConsumeNewLine()) {
1075 checkAndConsumeFS();
1076
1077 ErrorOr<LBREntry> LBRRes = parseLBREntry();
1078 if (std::error_code EC = LBRRes.getError())
1079 return EC;
1080 LBREntry LBR = LBRRes.get();
1081 if (ignoreKernelInterrupt(LBR))
1082 continue;
1083 if (!BC->HasFixedLoadAddress)
1084 adjustLBR(LBR, MMI: MMapInfoIter->second);
1085 Res.LBR.push_back(Elt: LBR);
1086 }
1087
1088 return Res;
1089}
1090
1091ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1092 while (checkAndConsumeFS()) {
1093 }
1094
1095 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1096 if (std::error_code EC = PIDRes.getError())
1097 return EC;
1098
1099 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1100 if (MMapInfoIter == BinaryMMapInfo.end()) {
1101 consumeRestOfLine();
1102 return PerfBasicSample{.EventName: StringRef(), .PC: 0};
1103 }
1104
1105 while (checkAndConsumeFS()) {
1106 }
1107
1108 ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator);
1109 if (std::error_code EC = Event.getError())
1110 return EC;
1111
1112 while (checkAndConsumeFS()) {
1113 }
1114
1115 ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator, EndNl: true);
1116 if (std::error_code EC = AddrRes.getError())
1117 return EC;
1118
1119 if (!checkAndConsumeNewLine()) {
1120 reportError(ErrorMsg: "expected end of line");
1121 return make_error_code(E: llvm::errc::io_error);
1122 }
1123
1124 uint64_t Address = *AddrRes;
1125 if (!BC->HasFixedLoadAddress)
1126 adjustAddress(Address, MMI: MMapInfoIter->second);
1127
1128 return PerfBasicSample{.EventName: Event.get(), .PC: Address};
1129}
1130
1131ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1132 PerfMemSample Res{.PC: 0, .Addr: 0};
1133
1134 while (checkAndConsumeFS()) {
1135 }
1136
1137 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1138 if (std::error_code EC = PIDRes.getError())
1139 return EC;
1140
1141 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1142 if (MMapInfoIter == BinaryMMapInfo.end()) {
1143 consumeRestOfLine();
1144 return Res;
1145 }
1146
1147 while (checkAndConsumeFS()) {
1148 }
1149
1150 ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator);
1151 if (std::error_code EC = Event.getError())
1152 return EC;
1153 if (!Event.get().contains(Other: "mem-loads")) {
1154 consumeRestOfLine();
1155 return Res;
1156 }
1157
1158 while (checkAndConsumeFS()) {
1159 }
1160
1161 ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator);
1162 if (std::error_code EC = AddrRes.getError())
1163 return EC;
1164
1165 while (checkAndConsumeFS()) {
1166 }
1167
1168 ErrorOr<uint64_t> PCRes = parseHexField(EndChar: FieldSeparator, EndNl: true);
1169 if (std::error_code EC = PCRes.getError()) {
1170 consumeRestOfLine();
1171 return EC;
1172 }
1173
1174 if (!checkAndConsumeNewLine()) {
1175 reportError(ErrorMsg: "expected end of line");
1176 return make_error_code(E: llvm::errc::io_error);
1177 }
1178
1179 uint64_t Address = *AddrRes;
1180 if (!BC->HasFixedLoadAddress)
1181 adjustAddress(Address, MMI: MMapInfoIter->second);
1182
1183 return PerfMemSample{.PC: PCRes.get(), .Addr: Address};
1184}
1185
1186ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1187 auto parseOffset = [this]() -> ErrorOr<Location> {
1188 ErrorOr<uint64_t> Res = parseHexField(EndChar: FieldSeparator);
1189 if (std::error_code EC = Res.getError())
1190 return EC;
1191 return Location(Res.get());
1192 };
1193
1194 size_t Sep = ParsingBuf.find_first_of(Chars: " \n");
1195 if (Sep == StringRef::npos)
1196 return parseOffset();
1197 StringRef LookAhead = ParsingBuf.substr(Start: 0, N: Sep);
1198 if (!LookAhead.contains(C: ':'))
1199 return parseOffset();
1200
1201 ErrorOr<StringRef> BuildID = parseString(EndChar: ':');
1202 if (std::error_code EC = BuildID.getError())
1203 return EC;
1204 ErrorOr<uint64_t> Offset = parseHexField(EndChar: FieldSeparator);
1205 if (std::error_code EC = Offset.getError())
1206 return EC;
1207 return Location(true, BuildID.get(), Offset.get());
1208}
1209
1210std::error_code DataAggregator::parseAggregatedLBREntry() {
1211 enum AggregatedLBREntry : char {
1212 INVALID = 0,
1213 EVENT_NAME, // E
1214 TRACE, // T
1215 SAMPLE, // S
1216 BRANCH, // B
1217 FT, // F
1218 FT_EXTERNAL_ORIGIN // f
1219 } Type = INVALID;
1220
1221 // The number of fields to parse, set based on Type.
1222 int AddrNum = 0;
1223 int CounterNum = 0;
1224 // Storage for parsed fields.
1225 StringRef EventName;
1226 std::optional<Location> Addr[3];
1227 int64_t Counters[2] = {0};
1228
1229 while (Type == INVALID || Type == EVENT_NAME) {
1230 while (checkAndConsumeFS()) {
1231 }
1232 ErrorOr<StringRef> StrOrErr =
1233 parseString(EndChar: FieldSeparator, EndNl: Type == EVENT_NAME);
1234 if (std::error_code EC = StrOrErr.getError())
1235 return EC;
1236 StringRef Str = StrOrErr.get();
1237
1238 if (Type == EVENT_NAME) {
1239 EventName = Str;
1240 break;
1241 }
1242
1243 Type = StringSwitch<AggregatedLBREntry>(Str)
1244 .Case(S: "T", Value: TRACE)
1245 .Case(S: "S", Value: SAMPLE)
1246 .Case(S: "E", Value: EVENT_NAME)
1247 .Case(S: "B", Value: BRANCH)
1248 .Case(S: "F", Value: FT)
1249 .Case(S: "f", Value: FT_EXTERNAL_ORIGIN)
1250 .Default(Value: INVALID);
1251
1252 if (Type == INVALID) {
1253 reportError(ErrorMsg: "expected T, S, E, B, F or f");
1254 return make_error_code(E: llvm::errc::io_error);
1255 }
1256
1257 using SSI = StringSwitch<int>;
1258 AddrNum = SSI(Str).Case(S: "T", Value: 3).Case(S: "S", Value: 1).Case(S: "E", Value: 0).Default(Value: 2);
1259 CounterNum = SSI(Str).Case(S: "B", Value: 2).Case(S: "E", Value: 0).Default(Value: 1);
1260 }
1261
1262 for (int I = 0; I < AddrNum; ++I) {
1263 while (checkAndConsumeFS()) {
1264 }
1265 ErrorOr<Location> AddrOrErr = parseLocationOrOffset();
1266 if (std::error_code EC = AddrOrErr.getError())
1267 return EC;
1268 Addr[I] = AddrOrErr.get();
1269 }
1270
1271 for (int I = 0; I < CounterNum; ++I) {
1272 while (checkAndConsumeFS()) {
1273 }
1274 ErrorOr<int64_t> CountOrErr =
1275 parseNumberField(EndChar: FieldSeparator, EndNl: I + 1 == CounterNum);
1276 if (std::error_code EC = CountOrErr.getError())
1277 return EC;
1278 Counters[I] = CountOrErr.get();
1279 }
1280
1281 if (!checkAndConsumeNewLine()) {
1282 reportError(ErrorMsg: "expected end of line");
1283 return make_error_code(E: llvm::errc::io_error);
1284 }
1285
1286 if (Type == EVENT_NAME) {
1287 EventNames.insert(key: EventName);
1288 return std::error_code();
1289 }
1290
1291 const uint64_t FromOffset = Addr[0]->Offset;
1292 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(Address: FromOffset);
1293 if (FromFunc)
1294 FromFunc->setHasProfileAvailable();
1295
1296 int64_t Count = Counters[0];
1297 int64_t Mispreds = Counters[1];
1298
1299 if (Type == SAMPLE) {
1300 BasicSamples[FromOffset] += Count;
1301 NumTotalSamples += Count;
1302 return std::error_code();
1303 }
1304
1305 const uint64_t ToOffset = Addr[1]->Offset;
1306 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Address: ToOffset);
1307 if (ToFunc)
1308 ToFunc->setHasProfileAvailable();
1309
1310 Trace Trace(FromOffset, ToOffset);
1311 // Taken trace
1312 if (Type == TRACE || Type == BRANCH) {
1313 TakenBranchInfo &Info = BranchLBRs[Trace];
1314 Info.TakenCount += Count;
1315 Info.MispredCount += Mispreds;
1316
1317 NumTotalSamples += Count;
1318 }
1319 // Construct fallthrough part of the trace
1320 if (Type == TRACE) {
1321 const uint64_t TraceFtEndOffset = Addr[2]->Offset;
1322 Trace.From = ToOffset;
1323 Trace.To = TraceFtEndOffset;
1324 Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
1325 }
1326 // Add fallthrough trace
1327 if (Type != BRANCH) {
1328 FTInfo &Info = FallthroughLBRs[Trace];
1329 (Type == FT ? Info.InternCount : Info.ExternCount) += Count;
1330
1331 NumTraces += Count;
1332 }
1333
1334 return std::error_code();
1335}
1336
1337bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1338 return opts::IgnoreInterruptLBR &&
1339 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1340}
1341
1342std::error_code DataAggregator::printLBRHeatMap() {
1343 outs() << "PERF2BOLT: parse branch events...\n";
1344 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1345 TimerGroupDesc, opts::TimeAggregator);
1346
1347 if (BC->IsLinuxKernel) {
1348 opts::HeatmapMaxAddress = 0xffffffffffffffff;
1349 opts::HeatmapMinAddress = KernelBaseAddr;
1350 }
1351 opts::HeatmapBlockSizes &HMBS = opts::HeatmapBlock;
1352 Heatmap HM(HMBS[0], opts::HeatmapMinAddress, opts::HeatmapMaxAddress,
1353 getTextSections(BC));
1354 auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t {
1355 if (Symbol)
1356 if (ErrorOr<uint64_t> SymValue = BC->getSymbolValue(Symbol: *Symbol))
1357 return SymValue.get();
1358 return 0;
1359 };
1360 HM.HotStart = getSymbolValue(BC->getHotTextStartSymbol());
1361 HM.HotEnd = getSymbolValue(BC->getHotTextEndSymbol());
1362
1363 if (!NumTotalSamples) {
1364 if (opts::BasicAggregation) {
1365 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1366 "Cannot build heatmap.";
1367 } else {
1368 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1369 "Cannot build heatmap. Use -nl for building heatmap from "
1370 "basic events.\n";
1371 }
1372 exit(status: 1);
1373 }
1374
1375 outs() << "HEATMAP: building heat map...\n";
1376
1377 // Register basic samples and perf LBR addresses not covered by fallthroughs.
1378 for (const auto &[PC, Hits] : BasicSamples)
1379 HM.registerAddress(Address: PC, Count: Hits);
1380 for (const auto &LBR : FallthroughLBRs) {
1381 const Trace &Trace = LBR.first;
1382 const FTInfo &Info = LBR.second;
1383 HM.registerAddressRange(StartAddress: Trace.From, EndAddress: Trace.To,
1384 Count: Info.InternCount + Info.ExternCount);
1385 }
1386
1387 if (HM.getNumInvalidRanges())
1388 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1389
1390 if (!HM.size()) {
1391 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1392 exit(status: 1);
1393 }
1394
1395 HM.print(FileName: opts::HeatmapOutput);
1396 if (opts::HeatmapOutput == "-") {
1397 HM.printCDF(FileName: opts::HeatmapOutput);
1398 HM.printSectionHotness(Filename: opts::HeatmapOutput);
1399 } else {
1400 HM.printCDF(FileName: opts::HeatmapOutput + ".csv");
1401 HM.printSectionHotness(Filename: opts::HeatmapOutput + "-section-hotness.csv");
1402 }
1403 // Provide coarse-grained heatmaps if requested via zoom-out scales
1404 for (const uint64_t NewBucketSize : ArrayRef(HMBS).drop_front()) {
1405 HM.resizeBucket(NewSize: NewBucketSize);
1406 if (opts::HeatmapOutput == "-")
1407 HM.print(FileName: opts::HeatmapOutput);
1408 else
1409 HM.print(FileName: formatv(Fmt: "{0}-{1}", Vals&: opts::HeatmapOutput, Vals: NewBucketSize).str());
1410 }
1411
1412 return std::error_code();
1413}
1414
1415void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1416 bool NeedsSkylakeFix) {
1417 // LBRs are stored in reverse execution order. NextLBR refers to the next
1418 // executed branch record.
1419 const LBREntry *NextLBR = nullptr;
1420 uint32_t NumEntry = 0;
1421 for (const LBREntry &LBR : Sample.LBR) {
1422 ++NumEntry;
1423 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1424 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1425 // us to likely record an invalid trace and generate a stale function for
1426 // BAT mode (non BAT disassembles the function and is able to ignore this
1427 // trace at aggregation time). Drop first 2 entries (last two, in
1428 // chronological order)
1429 if (NeedsSkylakeFix && NumEntry <= 2)
1430 continue;
1431 if (NextLBR) {
1432 // Record fall-through trace.
1433 const uint64_t TraceFrom = LBR.To;
1434 const uint64_t TraceTo = NextLBR->From;
1435 const BinaryFunction *TraceBF =
1436 getBinaryFunctionContainingAddress(Address: TraceFrom);
1437 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1438 if (TraceBF && TraceBF->containsAddress(PC: LBR.From))
1439 ++Info.InternCount;
1440 else
1441 ++Info.ExternCount;
1442 ++NumTraces;
1443 }
1444 NextLBR = &LBR;
1445
1446 TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
1447 ++Info.TakenCount;
1448 Info.MispredCount += LBR.Mispred;
1449 }
1450 // Record LBR addresses not covered by fallthroughs (bottom-of-stack source
1451 // and top-of-stack target) as basic samples for heatmap.
1452 if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive &&
1453 !Sample.LBR.empty()) {
1454 ++BasicSamples[Sample.LBR.front().To];
1455 ++BasicSamples[Sample.LBR.back().From];
1456 }
1457}
1458
1459void DataAggregator::printLongRangeTracesDiagnostic() const {
1460 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1461 << NumLongRangeTraces;
1462 if (NumTraces > 0)
1463 outs() << format(Fmt: " (%.1f%%)", Vals: NumLongRangeTraces * 100.0f / NumTraces);
1464 outs() << "\n";
1465}
1466
1467static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
1468 float T2) {
1469 if (Denominator == 0) {
1470 outs() << "\n";
1471 return 0;
1472 }
1473 float Percent = Numerator * 100.0f / Denominator;
1474 outs() << " (";
1475 if (outs().has_colors()) {
1476 if (Percent > T2)
1477 outs().changeColor(Color: raw_ostream::RED);
1478 else if (Percent > T1)
1479 outs().changeColor(Color: raw_ostream::YELLOW);
1480 else
1481 outs().changeColor(Color: raw_ostream::GREEN);
1482 }
1483 outs() << format(Fmt: "%.1f%%", Vals: Percent);
1484 if (outs().has_colors())
1485 outs().resetColor();
1486 outs() << ")\n";
1487 return Percent;
1488}
1489
1490void DataAggregator::printBranchSamplesDiagnostics() const {
1491 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1492 << NumInvalidTraces;
1493 if (printColoredPct(Numerator: NumInvalidTraces, Denominator: NumTraces, T1: 5, T2: 10) > 10)
1494 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1495 "binary is probably not the same binary used during profiling "
1496 "collection. The generated data may be ineffective for improving "
1497 "performance\n\n";
1498 printLongRangeTracesDiagnostic();
1499}
1500
1501void DataAggregator::printBasicSamplesDiagnostics(
1502 uint64_t OutOfRangeSamples) const {
1503 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1504 << OutOfRangeSamples;
1505 if (printColoredPct(Numerator: OutOfRangeSamples, Denominator: NumTotalSamples, T1: 40, T2: 60) > 80)
1506 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1507 "binary is probably not the same binary used during profiling "
1508 "collection. The generated data may be ineffective for improving "
1509 "performance\n\n";
1510}
1511
1512void DataAggregator::printBranchStacksDiagnostics(
1513 uint64_t IgnoredSamples) const {
1514 outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
1515 if (printColoredPct(Numerator: IgnoredSamples, Denominator: NumTotalSamples, T1: 20, T2: 50) > 50)
1516 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1517 "were attributed to the input binary\n";
1518}
1519
1520std::error_code DataAggregator::parseBranchEvents() {
1521 outs() << "PERF2BOLT: parse branch events...\n";
1522 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1523 TimerGroupDesc, opts::TimeAggregator);
1524
1525 uint64_t NumEntries = 0;
1526 uint64_t NumSamples = 0;
1527 uint64_t NumSamplesNoLBR = 0;
1528 bool NeedsSkylakeFix = false;
1529
1530 while (hasData() && NumTotalSamples < opts::MaxSamples) {
1531 ++NumTotalSamples;
1532
1533 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1534 if (std::error_code EC = SampleRes.getError()) {
1535 if (EC == errc::no_such_process)
1536 continue;
1537 return EC;
1538 }
1539 ++NumSamples;
1540
1541 PerfBranchSample &Sample = SampleRes.get();
1542
1543 if (Sample.LBR.empty()) {
1544 ++NumSamplesNoLBR;
1545 continue;
1546 }
1547
1548 NumEntries += Sample.LBR.size();
1549 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1550 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1551 NeedsSkylakeFix = true;
1552 }
1553
1554 parseLBRSample(Sample, NeedsSkylakeFix);
1555 }
1556
1557 for (const Trace &Trace : llvm::make_first_range(c&: BranchLBRs))
1558 for (const uint64_t Addr : {Trace.From, Trace.To})
1559 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Addr))
1560 BF->setHasProfileAvailable();
1561
1562 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1563 << " LBR entries\n";
1564 if (NumTotalSamples) {
1565 if (NumSamples && NumSamplesNoLBR == NumSamples) {
1566 // Note: we don't know if perf2bolt is being used to parse memory samples
1567 // at this point. In this case, it is OK to parse zero LBRs.
1568 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1569 "LBR. Record profile with perf record -j any or run perf2bolt "
1570 "in no-LBR mode with -nl (the performance improvement in -nl "
1571 "mode may be limited)\n";
1572 } else {
1573 printBranchStacksDiagnostics(IgnoredSamples: NumTotalSamples - NumSamples);
1574 }
1575 }
1576
1577 return std::error_code();
1578}
1579
1580void DataAggregator::processBranchEvents() {
1581 outs() << "PERF2BOLT: processing branch events...\n";
1582 NamedRegionTimer T("processBranch", "Processing branch events",
1583 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1584
1585 for (const auto &AggrLBR : FallthroughLBRs) {
1586 const Trace &Loc = AggrLBR.first;
1587 const FTInfo &Info = AggrLBR.second;
1588 LBREntry First{.From: Loc.From, .To: Loc.From, .Mispred: false};
1589 LBREntry Second{.From: Loc.To, .To: Loc.To, .Mispred: false};
1590 if (Info.InternCount)
1591 doTrace(First, Second, Count: Info.InternCount);
1592 if (Info.ExternCount) {
1593 First.From = 0;
1594 doTrace(First, Second, Count: Info.ExternCount);
1595 }
1596 }
1597
1598 for (const auto &AggrLBR : BranchLBRs) {
1599 const Trace &Loc = AggrLBR.first;
1600 const TakenBranchInfo &Info = AggrLBR.second;
1601 doBranch(From: Loc.From, To: Loc.To, Count: Info.TakenCount, Mispreds: Info.MispredCount);
1602 }
1603 printBranchSamplesDiagnostics();
1604}
1605
1606std::error_code DataAggregator::parseBasicEvents() {
1607 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1608 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1609 TimerGroupDesc, opts::TimeAggregator);
1610 while (hasData()) {
1611 ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1612 if (std::error_code EC = Sample.getError())
1613 return EC;
1614
1615 if (!Sample->PC)
1616 continue;
1617 ++NumTotalSamples;
1618
1619 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC))
1620 BF->setHasProfileAvailable();
1621
1622 ++BasicSamples[Sample->PC];
1623 EventNames.insert(key: Sample->EventName);
1624 }
1625 outs() << "PERF2BOLT: read " << NumTotalSamples << " basic samples\n";
1626
1627 return std::error_code();
1628}
1629
1630void DataAggregator::processBasicEvents() {
1631 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1632 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1633 TimerGroupDesc, opts::TimeAggregator);
1634 uint64_t OutOfRangeSamples = 0;
1635 for (auto &Sample : BasicSamples) {
1636 const uint64_t PC = Sample.first;
1637 const uint64_t HitCount = Sample.second;
1638 BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC);
1639 if (!Func) {
1640 OutOfRangeSamples += HitCount;
1641 continue;
1642 }
1643
1644 doBasicSample(OrigFunc&: *Func, Address: PC, Count: HitCount);
1645 }
1646
1647 printBasicSamplesDiagnostics(OutOfRangeSamples);
1648}
1649
1650std::error_code DataAggregator::parseMemEvents() {
1651 outs() << "PERF2BOLT: parsing memory events...\n";
1652 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1653 TimerGroupDesc, opts::TimeAggregator);
1654 while (hasData()) {
1655 ErrorOr<PerfMemSample> Sample = parseMemSample();
1656 if (std::error_code EC = Sample.getError())
1657 return EC;
1658
1659 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC))
1660 BF->setHasProfileAvailable();
1661
1662 MemSamples.emplace_back(args: std::move(Sample.get()));
1663 }
1664
1665 return std::error_code();
1666}
1667
1668void DataAggregator::processMemEvents() {
1669 NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1670 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1671 for (const PerfMemSample &Sample : MemSamples) {
1672 uint64_t PC = Sample.PC;
1673 uint64_t Addr = Sample.Addr;
1674 StringRef FuncName;
1675 StringRef MemName;
1676
1677 // Try to resolve symbol for PC
1678 BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC);
1679 if (!Func) {
1680 LLVM_DEBUG(if (PC != 0) {
1681 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr);
1682 });
1683 continue;
1684 }
1685
1686 FuncName = Func->getOneName();
1687 PC -= Func->getAddress();
1688
1689 // Try to resolve symbol for memory load
1690 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Address: Addr)) {
1691 MemName = BD->getName();
1692 Addr -= BD->getAddress();
1693 } else if (opts::FilterMemProfile) {
1694 // Filter out heap/stack accesses
1695 continue;
1696 }
1697
1698 const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1699 const Location AddrLoc(!MemName.empty(), MemName, Addr);
1700
1701 FuncMemData *MemData = &NamesToMemEvents[FuncName];
1702 MemData->Name = FuncName;
1703 setMemData(BF: *Func, FMD: MemData);
1704 MemData->update(Offset: FuncLoc, Addr: AddrLoc);
1705 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1706 }
1707}
1708
1709std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1710 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1711 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1712 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1713 size_t AggregatedLBRs = 0;
1714 while (hasData()) {
1715 if (std::error_code EC = parseAggregatedLBREntry())
1716 return EC;
1717 ++AggregatedLBRs;
1718 }
1719
1720 outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n";
1721
1722 return std::error_code();
1723}
1724
1725std::optional<int32_t> DataAggregator::parseCommExecEvent() {
1726 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1727 if (LineEnd == StringRef::npos) {
1728 reportError(ErrorMsg: "expected rest of line");
1729 Diag << "Found: " << ParsingBuf << "\n";
1730 return std::nullopt;
1731 }
1732 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1733
1734 size_t Pos = Line.find(Str: "PERF_RECORD_COMM exec");
1735 if (Pos == StringRef::npos)
1736 return std::nullopt;
1737 Line = Line.drop_front(N: Pos);
1738
1739 // Line:
1740 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1741 StringRef PIDStr = Line.rsplit(Separator: ':').second.split(Separator: '/').first;
1742 int32_t PID;
1743 if (PIDStr.getAsInteger(Radix: 10, Result&: PID)) {
1744 reportError(ErrorMsg: "expected PID");
1745 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1746 return std::nullopt;
1747 }
1748
1749 return PID;
1750}
1751
1752namespace {
1753std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1754 const StringRef SecTimeStr = TimeStr.split(Separator: '.').first;
1755 const StringRef USecTimeStr = TimeStr.split(Separator: '.').second;
1756 uint64_t SecTime;
1757 uint64_t USecTime;
1758 if (SecTimeStr.getAsInteger(Radix: 10, Result&: SecTime) ||
1759 USecTimeStr.getAsInteger(Radix: 10, Result&: USecTime))
1760 return std::nullopt;
1761 return SecTime * 1000000ULL + USecTime;
1762}
1763}
1764
1765std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1766 while (checkAndConsumeFS()) {
1767 }
1768
1769 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1770 if (LineEnd == StringRef::npos) {
1771 reportError(ErrorMsg: "expected rest of line");
1772 Diag << "Found: " << ParsingBuf << "\n";
1773 return std::nullopt;
1774 }
1775 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1776
1777 size_t Pos = Line.find(Str: "PERF_RECORD_FORK");
1778 if (Pos == StringRef::npos) {
1779 consumeRestOfLine();
1780 return std::nullopt;
1781 }
1782
1783 ForkInfo FI;
1784
1785 const StringRef TimeStr =
1786 Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second;
1787 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1788 FI.Time = *TimeRes;
1789 }
1790
1791 Line = Line.drop_front(N: Pos);
1792
1793 // Line:
1794 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1795 const StringRef ChildPIDStr = Line.split(Separator: '(').second.split(Separator: ':').first;
1796 if (ChildPIDStr.getAsInteger(Radix: 10, Result&: FI.ChildPID)) {
1797 reportError(ErrorMsg: "expected PID");
1798 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1799 return std::nullopt;
1800 }
1801
1802 const StringRef ParentPIDStr = Line.rsplit(Separator: '(').second.split(Separator: ':').first;
1803 if (ParentPIDStr.getAsInteger(Radix: 10, Result&: FI.ParentPID)) {
1804 reportError(ErrorMsg: "expected PID");
1805 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1806 return std::nullopt;
1807 }
1808
1809 consumeRestOfLine();
1810
1811 return FI;
1812}
1813
1814ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1815DataAggregator::parseMMapEvent() {
1816 while (checkAndConsumeFS()) {
1817 }
1818
1819 MMapInfo ParsedInfo;
1820
1821 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1822 if (LineEnd == StringRef::npos) {
1823 reportError(ErrorMsg: "expected rest of line");
1824 Diag << "Found: " << ParsingBuf << "\n";
1825 return make_error_code(E: llvm::errc::io_error);
1826 }
1827 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1828
1829 size_t Pos = Line.find(Str: "PERF_RECORD_MMAP2");
1830 if (Pos == StringRef::npos) {
1831 consumeRestOfLine();
1832 return std::make_pair(x: StringRef(), y&: ParsedInfo);
1833 }
1834
1835 // Line:
1836 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1837
1838 const StringRef TimeStr =
1839 Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second;
1840 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1841 ParsedInfo.Time = *TimeRes;
1842
1843 Line = Line.drop_front(N: Pos);
1844
1845 // Line:
1846 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1847
1848 StringRef FileName = Line.rsplit(Separator: FieldSeparator).second;
1849 if (FileName.starts_with(Prefix: "//") || FileName.starts_with(Prefix: "[")) {
1850 consumeRestOfLine();
1851 return std::make_pair(x: StringRef(), y&: ParsedInfo);
1852 }
1853 FileName = sys::path::filename(path: FileName);
1854
1855 const StringRef PIDStr = Line.split(Separator: FieldSeparator).second.split(Separator: '/').first;
1856 if (PIDStr.getAsInteger(Radix: 10, Result&: ParsedInfo.PID)) {
1857 reportError(ErrorMsg: "expected PID");
1858 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1859 return make_error_code(E: llvm::errc::io_error);
1860 }
1861
1862 const StringRef BaseAddressStr = Line.split(Separator: '[').second.split(Separator: '(').first;
1863 if (BaseAddressStr.getAsInteger(Radix: 0, Result&: ParsedInfo.MMapAddress)) {
1864 reportError(ErrorMsg: "expected base address");
1865 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1866 return make_error_code(E: llvm::errc::io_error);
1867 }
1868
1869 const StringRef SizeStr = Line.split(Separator: '(').second.split(Separator: ')').first;
1870 if (SizeStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Size)) {
1871 reportError(ErrorMsg: "expected mmaped size");
1872 Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1873 return make_error_code(E: llvm::errc::io_error);
1874 }
1875
1876 const StringRef OffsetStr =
1877 Line.split(Separator: '@').second.ltrim().split(Separator: FieldSeparator).first;
1878 if (OffsetStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Offset)) {
1879 reportError(ErrorMsg: "expected mmaped page-aligned offset");
1880 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1881 return make_error_code(E: llvm::errc::io_error);
1882 }
1883
1884 consumeRestOfLine();
1885
1886 return std::make_pair(x&: FileName, y&: ParsedInfo);
1887}
1888
1889std::error_code DataAggregator::parseMMapEvents() {
1890 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1891 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1892 TimerGroupDesc, opts::TimeAggregator);
1893
1894 std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1895 while (hasData()) {
1896 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1897 if (std::error_code EC = FileMMapInfoRes.getError())
1898 return EC;
1899
1900 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1901 if (FileMMapInfo.second.PID == -1)
1902 continue;
1903 if (FileMMapInfo.first == "(deleted)")
1904 continue;
1905
1906 GlobalMMapInfo.insert(x&: FileMMapInfo);
1907 }
1908
1909 LLVM_DEBUG({
1910 dbgs() << "FileName -> mmap info:\n"
1911 << " Filename : PID [MMapAddr, Size, Offset]\n";
1912 for (const auto &[Name, MMap] : GlobalMMapInfo)
1913 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID,
1914 MMap.MMapAddress, MMap.Size, MMap.Offset);
1915 });
1916
1917 StringRef NameToUse = llvm::sys::path::filename(path: BC->getFilename());
1918 if (GlobalMMapInfo.count(x: NameToUse) == 0 && !BuildIDBinaryName.empty()) {
1919 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1920 << "\" for profile matching\n";
1921 NameToUse = BuildIDBinaryName;
1922 }
1923
1924 auto Range = GlobalMMapInfo.equal_range(x: NameToUse);
1925 for (MMapInfo &MMapInfo : llvm::make_second_range(c: make_range(p: Range))) {
1926 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
1927 // Check that the binary mapping matches one of the segments.
1928 bool MatchFound = llvm::any_of(
1929 Range: llvm::make_second_range(c&: BC->SegmentMapInfo),
1930 P: [&](SegmentInfo &SegInfo) {
1931 // The mapping is page-aligned and hence the MMapAddress could be
1932 // different from the segment start address. We cannot know the page
1933 // size of the mapping, but we know it should not exceed the segment
1934 // alignment value. Hence we are performing an approximate check.
1935 return SegInfo.Address >= MMapInfo.MMapAddress &&
1936 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment &&
1937 SegInfo.IsExecutable;
1938 });
1939 if (!MatchFound) {
1940 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
1941 << " at 0x" << Twine::utohexstr(Val: MMapInfo.MMapAddress) << '\n';
1942 continue;
1943 }
1944 }
1945
1946 // Set base address for shared objects.
1947 if (!BC->HasFixedLoadAddress) {
1948 std::optional<uint64_t> BaseAddress =
1949 BC->getBaseAddressForMapping(MMapAddress: MMapInfo.MMapAddress, FileOffset: MMapInfo.Offset);
1950 if (!BaseAddress) {
1951 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
1952 "binary when memory mapped at 0x"
1953 << Twine::utohexstr(Val: MMapInfo.MMapAddress)
1954 << " using file offset 0x" << Twine::utohexstr(Val: MMapInfo.Offset)
1955 << ". Ignoring profile data for this mapping\n";
1956 continue;
1957 }
1958 MMapInfo.BaseAddress = *BaseAddress;
1959 }
1960
1961 // Try to add MMapInfo to the map and update its size. Large binaries may
1962 // span to multiple text segments, so the mapping is inserted only on the
1963 // first occurrence.
1964 if (!BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo)).second)
1965 assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress &&
1966 "Base address on multiple segment mappings should match");
1967
1968 // Update mapping size.
1969 const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size;
1970 const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress;
1971 if (Size > BinaryMMapInfo[MMapInfo.PID].Size)
1972 BinaryMMapInfo[MMapInfo.PID].Size = Size;
1973 }
1974
1975 if (BinaryMMapInfo.empty()) {
1976 if (errs().has_colors())
1977 errs().changeColor(Color: raw_ostream::RED);
1978 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
1979 << BC->getFilename() << "\".";
1980 if (!GlobalMMapInfo.empty()) {
1981 errs() << " Profile for the following binary name(s) is available:\n";
1982 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
1983 I = GlobalMMapInfo.upper_bound(x: I->first))
1984 errs() << " " << I->first << '\n';
1985 errs() << "Please rename the input binary.\n";
1986 } else {
1987 errs() << " Failed to extract any binary name from a profile.\n";
1988 }
1989 if (errs().has_colors())
1990 errs().resetColor();
1991
1992 exit(status: 1);
1993 }
1994
1995 return std::error_code();
1996}
1997
1998std::error_code DataAggregator::parseTaskEvents() {
1999 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2000 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2001 TimerGroupDesc, opts::TimeAggregator);
2002
2003 while (hasData()) {
2004 if (std::optional<int32_t> CommInfo = parseCommExecEvent()) {
2005 // Remove forked child that ran execve
2006 auto MMapInfoIter = BinaryMMapInfo.find(x: *CommInfo);
2007 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2008 BinaryMMapInfo.erase(position: MMapInfoIter);
2009 consumeRestOfLine();
2010 continue;
2011 }
2012
2013 std::optional<ForkInfo> ForkInfo = parseForkEvent();
2014 if (!ForkInfo)
2015 continue;
2016
2017 if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2018 continue;
2019
2020 if (ForkInfo->Time == 0) {
2021 // Process was forked and mmaped before perf ran. In this case the child
2022 // should have its own mmap entry unless it was execve'd.
2023 continue;
2024 }
2025
2026 auto MMapInfoIter = BinaryMMapInfo.find(x: ForkInfo->ParentPID);
2027 if (MMapInfoIter == BinaryMMapInfo.end())
2028 continue;
2029
2030 MMapInfo MMapInfo = MMapInfoIter->second;
2031 MMapInfo.PID = ForkInfo->ChildPID;
2032 MMapInfo.Forked = true;
2033 BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo));
2034 }
2035
2036 outs() << "PERF2BOLT: input binary is associated with "
2037 << BinaryMMapInfo.size() << " PID(s)\n";
2038
2039 LLVM_DEBUG({
2040 for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo))
2041 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID,
2042 (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress,
2043 MMI.Size);
2044 });
2045
2046 return std::error_code();
2047}
2048
2049std::optional<std::pair<StringRef, StringRef>>
2050DataAggregator::parseNameBuildIDPair() {
2051 while (checkAndConsumeFS()) {
2052 }
2053
2054 ErrorOr<StringRef> BuildIDStr = parseString(EndChar: FieldSeparator, EndNl: true);
2055 if (std::error_code EC = BuildIDStr.getError())
2056 return std::nullopt;
2057
2058 // If one of the strings is missing, don't issue a parsing error, but still
2059 // do not return a value.
2060 consumeAllRemainingFS();
2061 if (checkNewLine())
2062 return std::nullopt;
2063
2064 ErrorOr<StringRef> NameStr = parseString(EndChar: FieldSeparator, EndNl: true);
2065 if (std::error_code EC = NameStr.getError())
2066 return std::nullopt;
2067
2068 consumeRestOfLine();
2069 return std::make_pair(x&: NameStr.get(), y&: BuildIDStr.get());
2070}
2071
2072bool DataAggregator::hasAllBuildIDs() {
2073 const StringRef SavedParsingBuf = ParsingBuf;
2074
2075 if (!hasData())
2076 return false;
2077
2078 bool HasInvalidEntries = false;
2079 while (hasData()) {
2080 if (!parseNameBuildIDPair()) {
2081 HasInvalidEntries = true;
2082 break;
2083 }
2084 }
2085
2086 ParsingBuf = SavedParsingBuf;
2087
2088 return !HasInvalidEntries;
2089}
2090
2091std::optional<StringRef>
2092DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2093 const StringRef SavedParsingBuf = ParsingBuf;
2094
2095 StringRef FileName;
2096 while (hasData()) {
2097 std::optional<std::pair<StringRef, StringRef>> IDPair =
2098 parseNameBuildIDPair();
2099 if (!IDPair) {
2100 consumeRestOfLine();
2101 continue;
2102 }
2103
2104 if (IDPair->second.starts_with(Prefix: FileBuildID)) {
2105 FileName = sys::path::filename(path: IDPair->first);
2106 break;
2107 }
2108 }
2109
2110 ParsingBuf = SavedParsingBuf;
2111
2112 if (!FileName.empty())
2113 return FileName;
2114
2115 return std::nullopt;
2116}
2117
2118std::error_code
2119DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2120 std::error_code EC;
2121 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2122 if (EC)
2123 return EC;
2124
2125 bool WriteMemLocs = false;
2126
2127 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2128 if (WriteMemLocs)
2129 OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2130 else
2131 OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2132 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Name: Loc.Name))
2133 << " " << Twine::utohexstr(Val: Loc.Offset) << FieldSeparator;
2134 };
2135
2136 uint64_t BranchValues = 0;
2137 uint64_t MemValues = 0;
2138
2139 if (BAT)
2140 OutFile << "boltedcollection\n";
2141 if (opts::BasicAggregation) {
2142 OutFile << "no_lbr";
2143 for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
2144 OutFile << " " << Entry.getKey();
2145 OutFile << "\n";
2146
2147 for (const auto &KV : NamesToBasicSamples) {
2148 const FuncBasicSampleData &FSD = KV.second;
2149 for (const BasicSampleInfo &SI : FSD.Data) {
2150 writeLocation(SI.Loc);
2151 OutFile << SI.Hits << "\n";
2152 ++BranchValues;
2153 }
2154 }
2155 } else {
2156 for (const auto &KV : NamesToBranches) {
2157 const FuncBranchData &FBD = KV.second;
2158 for (const BranchInfo &BI : FBD.Data) {
2159 writeLocation(BI.From);
2160 writeLocation(BI.To);
2161 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2162 ++BranchValues;
2163 }
2164 for (const BranchInfo &BI : FBD.EntryData) {
2165 // Do not output if source is a known symbol, since this was already
2166 // accounted for in the source function
2167 if (BI.From.IsSymbol)
2168 continue;
2169 writeLocation(BI.From);
2170 writeLocation(BI.To);
2171 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2172 ++BranchValues;
2173 }
2174 }
2175
2176 WriteMemLocs = true;
2177 for (const auto &KV : NamesToMemEvents) {
2178 const FuncMemData &FMD = KV.second;
2179 for (const MemInfo &MemEvent : FMD.Data) {
2180 writeLocation(MemEvent.Offset);
2181 writeLocation(MemEvent.Addr);
2182 OutFile << MemEvent.Count << "\n";
2183 ++MemValues;
2184 }
2185 }
2186 }
2187
2188 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2189 << " memory objects to " << OutputFilename << "\n";
2190
2191 return std::error_code();
2192}
2193
2194std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
2195 StringRef OutputFilename) const {
2196 std::error_code EC;
2197 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2198 if (EC)
2199 return EC;
2200
2201 yaml::bolt::BinaryProfile BP;
2202
2203 const MCPseudoProbeDecoder *PseudoProbeDecoder =
2204 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
2205
2206 // Fill out the header info.
2207 BP.Header.Version = 1;
2208 BP.Header.FileName = std::string(BC.getFilename());
2209 std::optional<StringRef> BuildID = BC.getFileBuildID();
2210 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
2211 BP.Header.Origin = std::string(getReaderName());
2212 // Only the input binary layout order is supported.
2213 BP.Header.IsDFSOrder = false;
2214 // FIXME: Need to match hash function used to produce BAT hashes.
2215 BP.Header.HashFunction = HashFunction::Default;
2216
2217 ListSeparator LS(",");
2218 raw_string_ostream EventNamesOS(BP.Header.EventNames);
2219 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
2220 EventNamesOS << LS << EventEntry.first().str();
2221
2222 BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_BASIC
2223 : BinaryFunction::PF_BRANCH;
2224
2225 // Add probe inline tree nodes.
2226 YAMLProfileWriter::InlineTreeDesc InlineTree;
2227 if (PseudoProbeDecoder)
2228 std::tie(args&: BP.PseudoProbeDesc, args&: InlineTree) =
2229 YAMLProfileWriter::convertPseudoProbeDesc(PseudoProbeDecoder: *PseudoProbeDecoder);
2230
2231 if (!opts::BasicAggregation) {
2232 // Convert profile for functions not covered by BAT
2233 for (auto &BFI : BC.getBinaryFunctions()) {
2234 BinaryFunction &Function = BFI.second;
2235 if (!Function.hasProfile())
2236 continue;
2237 if (BAT->isBATFunction(Address: Function.getAddress()))
2238 continue;
2239 BP.Functions.emplace_back(args: YAMLProfileWriter::convert(
2240 BF: Function, /*UseDFS=*/false, InlineTree, BAT));
2241 }
2242
2243 for (const auto &KV : NamesToBranches) {
2244 const StringRef FuncName = KV.first;
2245 const FuncBranchData &Branches = KV.second;
2246 yaml::bolt::BinaryFunctionProfile YamlBF;
2247 BinaryData *BD = BC.getBinaryDataByName(Name: FuncName);
2248 assert(BD);
2249 uint64_t FuncAddress = BD->getAddress();
2250 if (!BAT->isBATFunction(Address: FuncAddress))
2251 continue;
2252 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: FuncAddress);
2253 assert(BF);
2254 YamlBF.Name = getLocationName(Func: *BF, BAT);
2255 YamlBF.Id = BF->getFunctionNumber();
2256 YamlBF.Hash = BAT->getBFHash(FuncOutputAddress: FuncAddress);
2257 YamlBF.ExecCount = BF->getKnownExecutionCount();
2258 YamlBF.ExternEntryCount = BF->getExternEntryCount();
2259 YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(OutputAddress: FuncAddress);
2260 const BoltAddressTranslation::BBHashMapTy &BlockMap =
2261 BAT->getBBHashMap(FuncOutputAddress: FuncAddress);
2262 YamlBF.Blocks.resize(new_size: YamlBF.NumBasicBlocks);
2263
2264 for (auto &&[Entry, YamlBB] : llvm::zip(t: BlockMap, u&: YamlBF.Blocks)) {
2265 const auto &Block = Entry.second;
2266 YamlBB.Hash = Block.Hash;
2267 YamlBB.Index = Block.Index;
2268 }
2269
2270 // Lookup containing basic block offset and index
2271 auto getBlock = [&BlockMap](uint32_t Offset) {
2272 auto BlockIt = BlockMap.upper_bound(Offset);
2273 if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) {
2274 errs() << "BOLT-ERROR: invalid BAT section\n";
2275 exit(status: 1);
2276 }
2277 --BlockIt;
2278 return std::pair(BlockIt->first, BlockIt->second.Index);
2279 };
2280
2281 for (const BranchInfo &BI : Branches.Data) {
2282 using namespace yaml::bolt;
2283 const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset);
2284 BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
2285 if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) {
2286 // Internal branch
2287 const unsigned SuccIndex = getBlock(BI.To.Offset).second;
2288 auto &SI = YamlBB.Successors.emplace_back(args: SuccessorInfo{.Index: SuccIndex});
2289 SI.Count = BI.Branches;
2290 SI.Mispreds = BI.Mispreds;
2291 } else {
2292 // Call
2293 const uint32_t Offset = BI.From.Offset - BlockOffset;
2294 auto &CSI = YamlBB.CallSites.emplace_back(args: CallSiteInfo{.Offset: Offset});
2295 CSI.Count = BI.Branches;
2296 CSI.Mispreds = BI.Mispreds;
2297 if (const BinaryData *BD = BC.getBinaryDataByName(Name: BI.To.Name))
2298 YAMLProfileWriter::setCSIDestination(BC, CSI, Symbol: BD->getSymbol(), BAT,
2299 Offset: BI.To.Offset);
2300 }
2301 }
2302 // Set entry counts, similar to DataReader::readProfile.
2303 for (const BranchInfo &BI : Branches.EntryData) {
2304 if (!BlockMap.isInputBlock(InputOffset: BI.To.Offset)) {
2305 if (opts::Verbosity >= 1)
2306 errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName
2307 << " at 0x" << Twine::utohexstr(Val: BI.To.Offset) << '\n';
2308 continue;
2309 }
2310 const unsigned BlockIndex = BlockMap.getBBIndex(BBInputOffset: BI.To.Offset);
2311 YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
2312 }
2313 if (PseudoProbeDecoder) {
2314 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
2315 InlineTreeNodeId;
2316 if (BF->getGUID()) {
2317 std::tie(args&: YamlBF.InlineTree, args&: InlineTreeNodeId) =
2318 YAMLProfileWriter::convertBFInlineTree(Decoder: *PseudoProbeDecoder,
2319 InlineTree, GUID: BF->getGUID());
2320 }
2321 // Fetch probes belonging to all fragments
2322 const AddressProbesMap &ProbeMap =
2323 PseudoProbeDecoder->getAddress2ProbesMap();
2324 BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
2325 Fragments.insert(Ptr: BF);
2326 DenseMap<
2327 uint32_t,
2328 std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
2329 BlockProbes;
2330 for (const BinaryFunction *F : Fragments) {
2331 const uint64_t FuncAddr = F->getAddress();
2332 for (const MCDecodedPseudoProbe &Probe :
2333 ProbeMap.find(From: FuncAddr, To: FuncAddr + F->getSize())) {
2334 const uint32_t OutputAddress = Probe.getAddress();
2335 const uint32_t InputOffset = BAT->translate(
2336 FuncAddress: FuncAddr, Offset: OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
2337 const unsigned BlockIndex = getBlock(InputOffset).second;
2338 BlockProbes[BlockIndex].emplace_back(args: Probe);
2339 }
2340 }
2341
2342 for (auto &[Block, Probes] : BlockProbes) {
2343 YamlBF.Blocks[Block].PseudoProbes =
2344 YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
2345 }
2346 }
2347 // Skip printing if there's no profile data
2348 llvm::erase_if(
2349 C&: YamlBF.Blocks, P: [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2350 auto HasCount = [](const auto &SI) { return SI.Count; };
2351 bool HasAnyCount = YamlBB.ExecCount ||
2352 llvm::any_of(Range: YamlBB.Successors, P: HasCount) ||
2353 llvm::any_of(Range: YamlBB.CallSites, P: HasCount);
2354 return !HasAnyCount;
2355 });
2356 BP.Functions.emplace_back(args&: YamlBF);
2357 }
2358 }
2359
2360 // Write the profile.
2361 yaml::Output Out(OutFile, nullptr, 0);
2362 Out << BP;
2363 return std::error_code();
2364}
2365
2366void DataAggregator::dump() const { DataReader::dump(); }
2367
2368void DataAggregator::dump(const PerfBranchSample &Sample) const {
2369 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2370 for (const LBREntry &LBR : Sample.LBR)
2371 Diag << LBR << '\n';
2372}
2373
2374void DataAggregator::dump(const PerfMemSample &Sample) const {
2375 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2376}
2377

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Profile/DataAggregator.cpp