1//===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions reads profile data written by perf record,
10// aggregate it and then write it back to an output file.
11//
12//===----------------------------------------------------------------------===//
13
14#include "bolt/Profile/DataAggregator.h"
15#include "bolt/Core/BinaryContext.h"
16#include "bolt/Core/BinaryFunction.h"
17#include "bolt/Passes/BinaryPasses.h"
18#include "bolt/Profile/BoltAddressTranslation.h"
19#include "bolt/Profile/Heatmap.h"
20#include "bolt/Profile/YAMLProfileWriter.h"
21#include "bolt/Utils/CommandLineOpts.h"
22#include "bolt/Utils/Utils.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/ScopeExit.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Compiler.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/Errc.h"
29#include "llvm/Support/FileSystem.h"
30#include "llvm/Support/Process.h"
31#include "llvm/Support/Program.h"
32#include "llvm/Support/Regex.h"
33#include "llvm/Support/Timer.h"
34#include "llvm/Support/raw_ostream.h"
35#include <map>
36#include <optional>
37#include <unordered_map>
38#include <utility>
39
40#define DEBUG_TYPE "aggregator"
41
42using namespace llvm;
43using namespace bolt;
44
45namespace opts {
46
47static cl::opt<bool>
48 BasicAggregation("nl",
49 cl::desc("aggregate basic samples (without LBR info)"),
50 cl::cat(AggregatorCategory));
51
52cl::opt<bool> ArmSPE("spe", cl::desc("Enable Arm SPE mode."),
53 cl::cat(AggregatorCategory));
54
55static cl::opt<std::string>
56 ITraceAggregation("itrace",
57 cl::desc("Generate LBR info with perf itrace argument"),
58 cl::cat(AggregatorCategory));
59
60static cl::opt<bool>
61FilterMemProfile("filter-mem-profile",
62 cl::desc("if processing a memory profile, filter out stack or heap accesses "
63 "that won't be useful for BOLT to reduce profile file size"),
64 cl::init(Val: true),
65 cl::cat(AggregatorCategory));
66
67static cl::opt<bool> ParseMemProfile(
68 "parse-mem-profile",
69 cl::desc("enable memory profile parsing if it's present in the input data, "
70 "on by default unless `--itrace` is set."),
71 cl::init(Val: true), cl::cat(AggregatorCategory));
72
73static cl::opt<unsigned long long>
74FilterPID("pid",
75 cl::desc("only use samples from process with specified PID"),
76 cl::init(Val: 0),
77 cl::Optional,
78 cl::cat(AggregatorCategory));
79
80static cl::opt<bool> ImputeTraceFallthrough(
81 "impute-trace-fall-through",
82 cl::desc("impute missing fall-throughs for branch-only traces"),
83 cl::Optional, cl::cat(AggregatorCategory));
84
85static cl::opt<bool>
86IgnoreBuildID("ignore-build-id",
87 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
88 cl::init(Val: false),
89 cl::cat(AggregatorCategory));
90
91static cl::opt<bool> IgnoreInterruptLBR(
92 "ignore-interrupt-lbr",
93 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
94 cl::init(Val: true), cl::cat(AggregatorCategory));
95
96static cl::opt<unsigned long long>
97MaxSamples("max-samples",
98 cl::init(Val: -1ULL),
99 cl::desc("maximum number of samples to read from LBR profile"),
100 cl::Optional,
101 cl::Hidden,
102 cl::cat(AggregatorCategory));
103
104extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
105extern cl::opt<bool> ProfileWritePseudoProbes;
106extern cl::opt<std::string> SaveProfile;
107
108cl::opt<bool> ReadPreAggregated(
109 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
110 cl::cat(AggregatorCategory));
111
112cl::opt<std::string>
113 ReadPerfEvents("perf-script-events",
114 cl::desc("skip perf event collection by supplying a "
115 "perf-script output in a textual format"),
116 cl::ReallyHidden, cl::init(Val: ""), cl::cat(AggregatorCategory));
117
118static cl::opt<bool>
119TimeAggregator("time-aggr",
120 cl::desc("time BOLT aggregator"),
121 cl::init(Val: false),
122 cl::ZeroOrMore,
123 cl::cat(AggregatorCategory));
124
125} // namespace opts
126
127namespace {
128
129const char TimerGroupName[] = "aggregator";
130const char TimerGroupDesc[] = "Aggregator";
131
132std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
133 std::vector<SectionNameAndRange> sections;
134 for (BinarySection &Section : BC->sections()) {
135 if (!Section.isText())
136 continue;
137 if (Section.getSize() == 0)
138 continue;
139 sections.push_back(
140 x: {.Name: Section.getName(), .BeginAddress: Section.getAddress(), .EndAddress: Section.getEndAddress()});
141 }
142 llvm::sort(C&: sections,
143 Comp: [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
144 return A.BeginAddress < B.BeginAddress;
145 });
146 return sections;
147}
148}
149
150constexpr uint64_t DataAggregator::KernelBaseAddr;
151
152DataAggregator::~DataAggregator() { deleteTempFiles(); }
153
154namespace {
155void deleteTempFile(const std::string &FileName) {
156 if (std::error_code Errc = sys::fs::remove(path: FileName.c_str()))
157 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
158 << " with error " << Errc.message() << "\n";
159}
160}
161
162void DataAggregator::deleteTempFiles() {
163 for (std::string &FileName : TempFiles)
164 deleteTempFile(FileName);
165 TempFiles.clear();
166}
167
168void DataAggregator::findPerfExecutable() {
169 std::optional<std::string> PerfExecutable =
170 sys::Process::FindInEnvPath(EnvName: "PATH", FileName: "perf");
171 if (!PerfExecutable) {
172 outs() << "PERF2BOLT: No perf executable found!\n";
173 exit(status: 1);
174 }
175 PerfPath = *PerfExecutable;
176}
177
178void DataAggregator::start() {
179 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
180
181 // Turn on heatmap building if requested by --heatmap flag.
182 if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences())
183 opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional;
184
185 // Don't launch perf for pre-aggregated files or when perf input is specified
186 // by the user.
187 if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
188 return;
189
190 findPerfExecutable();
191
192 if (opts::ArmSPE) {
193 // pid from_ip to_ip flags
194 // where flags could be:
195 // P/M: whether branch was Predicted or Mispredicted.
196 // N: optionally appears when the branch was Not-Taken (ie fall-through)
197 // 12345 0x123/0x456/PN/-/-/8/RET/-
198 opts::ITraceAggregation = "bl";
199 opts::ParseMemProfile = true;
200 opts::BasicAggregation = false;
201 }
202
203 if (opts::BasicAggregation) {
204 launchPerfProcess(Name: "events without LBR", PPI&: MainEventsPPI,
205 Args: "script -F pid,event,ip");
206 } else if (!opts::ITraceAggregation.empty()) {
207 // Disable parsing memory profile from trace data, unless requested by user.
208 if (!opts::ParseMemProfile.getNumOccurrences())
209 opts::ParseMemProfile = false;
210 launchPerfProcess(Name: "branch events with itrace", PPI&: MainEventsPPI,
211 Args: "script -F pid,brstack --itrace=" +
212 opts::ITraceAggregation);
213 } else {
214 launchPerfProcess(Name: "branch events", PPI&: MainEventsPPI, Args: "script -F pid,brstack");
215 }
216
217 if (opts::ParseMemProfile)
218 launchPerfProcess(Name: "mem events", PPI&: MemEventsPPI,
219 Args: "script -F pid,event,addr,ip");
220
221 launchPerfProcess(Name: "process events", PPI&: MMapEventsPPI,
222 Args: "script --show-mmap-events --no-itrace");
223
224 launchPerfProcess(Name: "task events", PPI&: TaskEventsPPI,
225 Args: "script --show-task-events --no-itrace");
226}
227
228void DataAggregator::abort() {
229 if (opts::ReadPreAggregated)
230 return;
231
232 std::string Error;
233
234 // Kill subprocesses in case they are not finished
235 sys::Wait(PI: TaskEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
236 sys::Wait(PI: MMapEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
237 sys::Wait(PI: MainEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
238 if (opts::ParseMemProfile)
239 sys::Wait(PI: MemEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
240
241 deleteTempFiles();
242
243 exit(status: 1);
244}
245
246void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
247 StringRef Args) {
248 SmallVector<StringRef, 4> Argv;
249
250 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
251 Argv.push_back(Elt: PerfPath.data());
252
253 Args.split(A&: Argv, Separator: ' ');
254 Argv.push_back(Elt: "-f");
255 Argv.push_back(Elt: "-i");
256 Argv.push_back(Elt: Filename.c_str());
257
258 if (std::error_code Errc =
259 sys::fs::createTemporaryFile(Prefix: "perf.script", Suffix: "out", ResultPath&: PPI.StdoutPath)) {
260 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
261 << " with error " << Errc.message() << "\n";
262 exit(status: 1);
263 }
264 TempFiles.push_back(x: PPI.StdoutPath.data());
265
266 if (std::error_code Errc =
267 sys::fs::createTemporaryFile(Prefix: "perf.script", Suffix: "err", ResultPath&: PPI.StderrPath)) {
268 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
269 << " with error " << Errc.message() << "\n";
270 exit(status: 1);
271 }
272 TempFiles.push_back(x: PPI.StderrPath.data());
273
274 std::optional<StringRef> Redirects[] = {
275 std::nullopt, // Stdin
276 StringRef(PPI.StdoutPath.data()), // Stdout
277 StringRef(PPI.StderrPath.data())}; // Stderr
278
279 LLVM_DEBUG({
280 dbgs() << "Launching perf: ";
281 for (StringRef Arg : Argv)
282 dbgs() << Arg << " ";
283 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
284 << "\n";
285 });
286
287 PPI.PI = sys::ExecuteNoWait(Program: PerfPath.data(), Args: Argv, /*envp*/ Env: std::nullopt,
288 Redirects);
289}
290
291void DataAggregator::processFileBuildID(StringRef FileBuildID) {
292 auto WarningCallback = [](int ReturnCode, StringRef ErrBuf) {
293 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
294 };
295
296 PerfProcessInfo BuildIDProcessInfo;
297 launchPerfProcess(Name: "buildid list", PPI&: BuildIDProcessInfo, Args: "buildid-list");
298 if (prepareToParse(Name: "buildid", Process&: BuildIDProcessInfo, Callback: WarningCallback))
299 return;
300
301 std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
302 if (FileName && *FileName == sys::path::filename(path: BC->getFilename())) {
303 outs() << "PERF2BOLT: matched build-id and file name\n";
304 return;
305 }
306
307 if (FileName) {
308 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
309 BuildIDBinaryName = std::string(*FileName);
310 return;
311 }
312
313 if (!hasAllBuildIDs()) {
314 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
315 "data was recorded without it\n";
316 return;
317 }
318
319 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
320 "This indicates the input binary supplied for data aggregation "
321 "is not the same recorded by perf when collecting profiling "
322 "data, or there were no samples recorded for the binary. "
323 "Use -ignore-build-id option to override.\n";
324 if (!opts::IgnoreBuildID)
325 abort();
326}
327
328bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
329 if (opts::ReadPreAggregated)
330 return true;
331
332 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(Name: FileName);
333 if (!FD) {
334 consumeError(Err: FD.takeError());
335 return false;
336 }
337
338 char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
339
340 auto Close = make_scope_exit(F: [&] { sys::fs::closeFile(F&: *FD); });
341 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
342 FileHandle: *FD, Buf: MutableArrayRef(Buf, sizeof(Buf)), Offset: 0);
343 if (!BytesRead) {
344 consumeError(Err: BytesRead.takeError());
345 return false;
346 }
347
348 if (*BytesRead != 7)
349 return false;
350
351 if (strncmp(s1: Buf, s2: "PERFILE", n: 7) == 0)
352 return true;
353 return false;
354}
355
356void DataAggregator::parsePreAggregated() {
357 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
358 MemoryBuffer::getFileOrSTDIN(Filename);
359 if (std::error_code EC = MB.getError()) {
360 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
361 << EC.message() << "\n";
362 exit(status: 1);
363 }
364
365 FileBuf = std::move(*MB);
366 ParsingBuf = FileBuf->getBuffer();
367 Col = 0;
368 Line = 1;
369 if (parsePreAggregatedLBRSamples()) {
370 errs() << "PERF2BOLT: failed to parse samples\n";
371 exit(status: 1);
372 }
373}
374
375void DataAggregator::filterBinaryMMapInfo() {
376 if (opts::FilterPID) {
377 auto MMapInfoIter = BinaryMMapInfo.find(x: opts::FilterPID);
378 if (MMapInfoIter != BinaryMMapInfo.end()) {
379 MMapInfo MMap = MMapInfoIter->second;
380 BinaryMMapInfo.clear();
381 BinaryMMapInfo.insert(x: std::make_pair(x&: MMap.PID, y&: MMap));
382 } else {
383 if (errs().has_colors())
384 errs().changeColor(Color: raw_ostream::RED);
385 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
386 << opts::FilterPID << "\""
387 << " for binary \"" << BC->getFilename() << "\".";
388 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
389 errs() << " Profile for the following process is available:\n";
390 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
391 outs() << " " << MMI.second.PID
392 << (MMI.second.Forked ? " (forked)\n" : "\n");
393
394 if (errs().has_colors())
395 errs().resetColor();
396
397 exit(status: 1);
398 }
399 }
400}
401
402int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
403 PerfProcessErrorCallbackTy Callback) {
404 if (!opts::ReadPerfEvents.empty()) {
405 outs() << "PERF2BOLT: using pre-processed perf events for '" << Name
406 << "' (perf-script-events)\n";
407 ParsingBuf = opts::ReadPerfEvents;
408 return 0;
409 }
410
411 std::string Error;
412 outs() << "PERF2BOLT: waiting for perf " << Name
413 << " collection to finish...\n";
414 std::optional<sys::ProcessStatistics> PS;
415 sys::ProcessInfo PI = sys::Wait(PI: Process.PI, SecondsToWait: std::nullopt, ErrMsg: &Error, ProcStat: &PS);
416
417 if (!Error.empty()) {
418 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
419 deleteTempFiles();
420 exit(status: 1);
421 }
422
423 LLVM_DEBUG({
424 const float UserSec = 1.f * PS->UserTime.count() / 1e6;
425 const float TotalSec = 1.f * PS->TotalTime.count() / 1e6;
426 const float PeakGiB = 1.f * PS->PeakMemory / (1 << 20);
427 dbgs() << formatv("Finished in {0:f2}s user time, {1:f2}s total time, "
428 "{2:f2} GiB peak RSS\n",
429 UserSec, TotalSec, PeakGiB);
430 });
431
432 if (PI.ReturnCode != 0) {
433 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
434 MemoryBuffer::getFileOrSTDIN(Filename: Process.StderrPath.data());
435 StringRef ErrBuf = (*ErrorMB)->getBuffer();
436
437 deleteTempFiles();
438 Callback(PI.ReturnCode, ErrBuf);
439 return PI.ReturnCode;
440 }
441
442 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
443 MemoryBuffer::getFileOrSTDIN(Filename: Process.StdoutPath.data());
444 if (std::error_code EC = MB.getError()) {
445 errs() << "Cannot open " << Process.StdoutPath.data() << ": "
446 << EC.message() << "\n";
447 deleteTempFiles();
448 exit(status: 1);
449 }
450
451 FileBuf = std::move(*MB);
452 ParsingBuf = FileBuf->getBuffer();
453 Col = 0;
454 Line = 1;
455 return PI.ReturnCode;
456}
457
458void DataAggregator::parsePerfData(BinaryContext &BC) {
459 auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
460 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
461 exit(status: 1);
462 };
463
464 auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
465 Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
466 "Cannot print 'addr' field.");
467 if (!NoData.match(String: ErrBuf))
468 ErrorCallback(ReturnCode, ErrBuf);
469 };
470
471 if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
472 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
473 processFileBuildID(FileBuildID: *FileBuildID);
474 } else {
475 errs() << "BOLT-WARNING: build-id will not be checked because we could "
476 "not read one from input binary\n";
477 }
478
479 if (BC.IsLinuxKernel) {
480 // Current MMap parsing logic does not work with linux kernel.
481 // MMap entries for linux kernel uses PERF_RECORD_MMAP
482 // format instead of typical PERF_RECORD_MMAP2 format.
483 // Since linux kernel address mapping is absolute (same as
484 // in the ELF file), we avoid parsing MMap in linux kernel mode.
485 // While generating optimized linux kernel binary, we may need
486 // to parse MMap entries.
487
488 // In linux kernel mode, we analyze and optimize
489 // all linux kernel binary instructions, irrespective
490 // of whether they are due to system calls or due to
491 // interrupts. Therefore, we cannot ignore interrupt
492 // in Linux kernel mode.
493 opts::IgnoreInterruptLBR = false;
494 } else {
495 prepareToParse(Name: "mmap events", Process&: MMapEventsPPI, Callback: ErrorCallback);
496 if (parseMMapEvents())
497 errs() << "PERF2BOLT: failed to parse mmap events\n";
498 }
499
500 prepareToParse(Name: "task events", Process&: TaskEventsPPI, Callback: ErrorCallback);
501 if (parseTaskEvents())
502 errs() << "PERF2BOLT: failed to parse task events\n";
503
504 filterBinaryMMapInfo();
505 prepareToParse(Name: "events", Process&: MainEventsPPI, Callback: ErrorCallback);
506
507 if ((!opts::BasicAggregation && parseBranchEvents()) ||
508 (opts::BasicAggregation && parseBasicEvents()))
509 errs() << "PERF2BOLT: failed to parse samples\n";
510
511 // Special handling for memory events
512 if (opts::ParseMemProfile &&
513 !prepareToParse(Name: "mem events", Process&: MemEventsPPI, Callback: MemEventsErrorCallback))
514 if (const std::error_code EC = parseMemEvents())
515 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
516 << '\n';
517
518 deleteTempFiles();
519}
520
521void DataAggregator::imputeFallThroughs() {
522 if (Traces.empty())
523 return;
524
525 std::pair PrevBranch(Trace::EXTERNAL, Trace::EXTERNAL);
526 uint64_t AggregateCount = 0;
527 uint64_t AggregateFallthroughSize = 0;
528 uint64_t InferredTraces = 0;
529
530 // Helper map with whether the instruction is a call/ret/unconditional branch
531 std::unordered_map<uint64_t, bool> IsUncondCTMap;
532 auto checkUnconditionalControlTransfer = [&](const uint64_t Addr) {
533 auto isUncondCT = [&](const MCInst &MI) -> bool {
534 return BC->MIB->isUnconditionalControlTransfer(Inst: MI);
535 };
536 return testAndSet<bool>(Addr, Callback: isUncondCT, Map&: IsUncondCTMap).value_or(u: true);
537 };
538
539 // Traces are sorted by their component addresses (Branch, From, To).
540 // assert(is_sorted(Traces));
541
542 // Traces corresponding to the top-of-stack branch entry with a missing
543 // fall-through have BR_ONLY(-1ULL/UINT64_MAX) in To field, meaning that for
544 // fixed values of Branch and From branch-only traces are stored after all
545 // traces with valid fall-through.
546 //
547 // Group traces by (Branch, From) and compute weighted average fall-through
548 // length for the top-of-stack trace (closing the group) by accumulating the
549 // fall-through lengths of traces with valid fall-throughs earlier in the
550 // group.
551 for (auto &[Trace, Info] : Traces) {
552 // Skip fall-throughs in external code.
553 if (Trace.From == Trace::EXTERNAL)
554 continue;
555 std::pair CurrentBranch(Trace.Branch, Trace.From);
556 // BR_ONLY must be the last trace in the group
557 if (Trace.To == Trace::BR_ONLY) {
558 // If the group is not empty, use aggregate values, otherwise 0-length
559 // for unconditional jumps (call/ret/uncond branch) or 1-length for others
560 uint64_t InferredBytes =
561 PrevBranch == CurrentBranch
562 ? AggregateFallthroughSize / AggregateCount
563 : !checkUnconditionalControlTransfer(Trace.From);
564 Trace.To = Trace.From + InferredBytes;
565 LLVM_DEBUG(dbgs() << "imputed " << Trace << " (" << InferredBytes
566 << " bytes)\n");
567 ++InferredTraces;
568 } else {
569 // Trace with a valid fall-through
570 // New group: reset aggregates.
571 if (CurrentBranch != PrevBranch)
572 AggregateCount = AggregateFallthroughSize = 0;
573 // Only use valid fall-through lengths
574 if (Trace.To != Trace::EXTERNAL)
575 AggregateFallthroughSize += (Trace.To - Trace.From) * Info.TakenCount;
576 AggregateCount += Info.TakenCount;
577 }
578 PrevBranch = CurrentBranch;
579 }
580 if (opts::Verbosity >= 1)
581 outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n";
582}
583
584Error DataAggregator::preprocessProfile(BinaryContext &BC) {
585 this->BC = &BC;
586
587 if (opts::ReadPreAggregated) {
588 parsePreAggregated();
589 } else {
590 parsePerfData(BC);
591 }
592
593 // Sort parsed traces for faster processing.
594 llvm::sort(C&: Traces, Comp: llvm::less_first());
595
596 if (opts::ImputeTraceFallthrough)
597 imputeFallThroughs();
598
599 if (opts::HeatmapMode) {
600 if (std::error_code EC = printLBRHeatMap())
601 return errorCodeToError(EC);
602 if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive)
603 exit(status: 0);
604 }
605
606 return Error::success();
607}
608
609Error DataAggregator::readProfile(BinaryContext &BC) {
610 processProfile(BC);
611
612 for (auto &BFI : BC.getBinaryFunctions()) {
613 BinaryFunction &Function = BFI.second;
614 convertBranchData(BF&: Function);
615 }
616
617 if (opts::AggregateOnly) {
618 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
619 if (std::error_code EC = writeAggregatedFile(OutputFilename: opts::OutputFilename))
620 report_error(Message: "cannot create output data file", EC);
621
622 // BAT YAML is handled by DataAggregator since normal YAML output requires
623 // CFG which is not available in BAT mode.
624 if (usesBAT()) {
625 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
626 if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::OutputFilename))
627 report_error(Message: "cannot create output data file", EC);
628 if (!opts::SaveProfile.empty())
629 if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::SaveProfile))
630 report_error(Message: "cannot create output data file", EC);
631 }
632 }
633
634 return Error::success();
635}
636
637bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
638 return Function.hasProfileAvailable();
639}
640
641void DataAggregator::processProfile(BinaryContext &BC) {
642 if (opts::BasicAggregation)
643 processBasicEvents();
644 else
645 processBranchEvents();
646
647 processMemEvents();
648
649 // Mark all functions with registered events as having a valid profile.
650 for (auto &BFI : BC.getBinaryFunctions()) {
651 BinaryFunction &BF = BFI.second;
652 if (FuncBranchData *FBD = getBranchData(BF)) {
653 BF.markProfiled(Flags: BinaryFunction::PF_BRANCH);
654 BF.RawSampleCount = FBD->getNumExecutedBranches();
655 } else if (FuncBasicSampleData *FSD =
656 getFuncBasicSampleData(FuncNames: BF.getNames())) {
657 BF.markProfiled(Flags: BinaryFunction::PF_BASIC);
658 BF.RawSampleCount = FSD->getSamples();
659 }
660 }
661
662 for (auto &FuncBranches : NamesToBranches) {
663 llvm::stable_sort(Range&: FuncBranches.second.Data);
664 llvm::stable_sort(Range&: FuncBranches.second.EntryData);
665 }
666
667 for (auto &MemEvents : NamesToMemEvents)
668 llvm::stable_sort(Range&: MemEvents.second.Data);
669
670 // Release intermediate storage.
671 clear(Container&: Traces);
672 clear(Container&: BasicSamples);
673 clear(Container&: MemSamples);
674}
675
676BinaryFunction *
677DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
678 if (!BC->containsAddress(Address))
679 return nullptr;
680
681 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
682 /*UseMaxSize=*/true);
683}
684
685BinaryFunction *
686DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
687 if (BAT)
688 if (const uint64_t HotAddr = BAT->fetchParentAddress(Address: Func.getAddress()))
689 return getBinaryFunctionContainingAddress(Address: HotAddr);
690 return nullptr;
691}
692
693StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
694 bool BAT) {
695 if (!BAT)
696 return Func.getOneName();
697
698 const BinaryFunction *OrigFunc = &Func;
699 // If it is a local function, prefer the name containing the file name where
700 // the local function was declared
701 for (StringRef AlternativeName : OrigFunc->getNames()) {
702 size_t FileNameIdx = AlternativeName.find(C: '/');
703 // Confirm the alternative name has the pattern Symbol/FileName/1 before
704 // using it
705 if (FileNameIdx == StringRef::npos ||
706 AlternativeName.find(C: '/', From: FileNameIdx + 1) == StringRef::npos)
707 continue;
708 return AlternativeName;
709 }
710 return OrigFunc->getOneName();
711}
712
713bool DataAggregator::doBasicSample(BinaryFunction &OrigFunc, uint64_t Address,
714 uint64_t Count) {
715 // To record executed bytes, use basic block size as is regardless of BAT.
716 uint64_t BlockSize = 0;
717 if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
718 Offset: Address - OrigFunc.getAddress()))
719 BlockSize = BB->getOriginalSize();
720
721 BinaryFunction *ParentFunc = getBATParentFunction(Func: OrigFunc);
722 BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
723 // Attach executed bytes to parent function in case of cold fragment.
724 Func.SampleCountInBytes += Count * BlockSize;
725
726 auto I = NamesToBasicSamples.find(x: Func.getOneName());
727 if (I == NamesToBasicSamples.end()) {
728 bool Success;
729 StringRef LocName = getLocationName(Func, BAT);
730 std::tie(args&: I, args&: Success) = NamesToBasicSamples.insert(x: std::make_pair(
731 x: Func.getOneName(),
732 y: FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy())));
733 }
734
735 Address -= Func.getAddress();
736 if (BAT)
737 Address = BAT->translate(FuncAddress: Func.getAddress(), Offset: Address, /*IsBranchSrc=*/false);
738
739 I->second.bumpCount(Offset: Address, Count);
740 return true;
741}
742
743bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
744 uint64_t To, uint64_t Count,
745 uint64_t Mispreds) {
746 FuncBranchData *AggrData = getBranchData(BF: Func);
747 if (!AggrData) {
748 AggrData = &NamesToBranches[Func.getOneName()];
749 AggrData->Name = getLocationName(Func, BAT);
750 setBranchData(BF: Func, FBD: AggrData);
751 }
752
753 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
754 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
755 AggrData->bumpBranchCount(OffsetFrom: From, OffsetTo: To, Count, Mispreds);
756 return true;
757}
758
759bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
760 BinaryFunction *ToFunc, uint64_t From,
761 uint64_t To, uint64_t Count,
762 uint64_t Mispreds) {
763 FuncBranchData *FromAggrData = nullptr;
764 FuncBranchData *ToAggrData = nullptr;
765 StringRef SrcFunc;
766 StringRef DstFunc;
767 if (FromFunc) {
768 SrcFunc = getLocationName(Func: *FromFunc, BAT);
769 FromAggrData = getBranchData(BF: *FromFunc);
770 if (!FromAggrData) {
771 FromAggrData = &NamesToBranches[FromFunc->getOneName()];
772 FromAggrData->Name = SrcFunc;
773 setBranchData(BF: *FromFunc, FBD: FromAggrData);
774 }
775
776 recordExit(BF&: *FromFunc, From, Mispred: Mispreds, Count);
777 }
778 if (ToFunc) {
779 DstFunc = getLocationName(Func: *ToFunc, BAT);
780 ToAggrData = getBranchData(BF: *ToFunc);
781 if (!ToAggrData) {
782 ToAggrData = &NamesToBranches[ToFunc->getOneName()];
783 ToAggrData->Name = DstFunc;
784 setBranchData(BF: *ToFunc, FBD: ToAggrData);
785 }
786
787 recordEntry(BF&: *ToFunc, To, Mispred: Mispreds, Count);
788 }
789
790 if (FromAggrData)
791 FromAggrData->bumpCallCount(OffsetFrom: From, To: Location(!DstFunc.empty(), DstFunc, To),
792 Count, Mispreds);
793 if (ToAggrData)
794 ToAggrData->bumpEntryCount(From: Location(!SrcFunc.empty(), SrcFunc, From), OffsetTo: To,
795 Count, Mispreds);
796 return true;
797}
798
799bool DataAggregator::checkReturn(uint64_t Addr) {
800 auto isReturn = [&](const MCInst &MI) -> bool {
801 return BC->MIB->isReturn(Inst: MI);
802 };
803 return testAndSet<bool>(Addr, Callback: isReturn, Map&: Returns).value_or(u: false);
804}
805
806bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
807 uint64_t Mispreds) {
808 // Mutates \p Addr to an offset into the containing function, performing BAT
809 // offset translation and parent lookup.
810 //
811 // Returns the containing function (or BAT parent).
812 auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
813 BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: Addr);
814 if (!Func) {
815 Addr = 0;
816 return Func;
817 }
818
819 Addr -= Func->getAddress();
820
821 if (BAT)
822 Addr = BAT->translate(FuncAddress: Func->getAddress(), Offset: Addr, IsBranchSrc: IsFrom);
823
824 if (BinaryFunction *ParentFunc = getBATParentFunction(Func: *Func))
825 return ParentFunc;
826
827 return Func;
828 };
829
830 BinaryFunction *FromFunc = handleAddress(From, /*IsFrom*/ true);
831 BinaryFunction *ToFunc = handleAddress(To, /*IsFrom*/ false);
832 if (!FromFunc && !ToFunc)
833 return false;
834
835 // Treat recursive control transfers as inter-branches.
836 if (FromFunc == ToFunc && To != 0) {
837 recordBranch(BF&: *FromFunc, From, To, Count, Mispreds);
838 return doIntraBranch(Func&: *FromFunc, From, To, Count, Mispreds);
839 }
840
841 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
842}
843
844bool DataAggregator::doTrace(const Trace &Trace, uint64_t Count,
845 bool IsReturn) {
846 const uint64_t From = Trace.From, To = Trace.To;
847 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(Address: From);
848 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Address: To);
849 NumTraces += Count;
850 if (!FromFunc || !ToFunc) {
851 LLVM_DEBUG(dbgs() << "Out of range trace " << Trace << '\n');
852 NumLongRangeTraces += Count;
853 return false;
854 }
855 if (FromFunc != ToFunc) {
856 LLVM_DEBUG(dbgs() << "Invalid trace " << Trace << '\n');
857 NumInvalidTraces += Count;
858 return false;
859 }
860
861 // Set ParentFunc to BAT parent function or FromFunc itself.
862 BinaryFunction *ParentFunc = getBATParentFunction(Func: *FromFunc);
863 if (!ParentFunc)
864 ParentFunc = FromFunc;
865 ParentFunc->SampleCountInBytes += Count * (To - From);
866
867 const uint64_t FuncAddress = FromFunc->getAddress();
868 std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
869 BAT && BAT->isBATFunction(Address: FuncAddress)
870 ? BAT->getFallthroughsInTrace(FuncAddress, From: From - IsReturn, To)
871 : getFallthroughsInTrace(BF&: *FromFunc, Trace, Count, IsReturn);
872 if (!FTs) {
873 LLVM_DEBUG(dbgs() << "Invalid trace " << Trace << '\n');
874 NumInvalidTraces += Count;
875 return false;
876 }
877
878 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
879 << FromFunc->getPrintName() << ":" << Trace << '\n');
880 for (const auto &[From, To] : *FTs)
881 doIntraBranch(Func&: *ParentFunc, From, To, Count, Mispreds: false);
882
883 return true;
884}
885
886std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
887DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace,
888 uint64_t Count, bool IsReturn) const {
889 SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches;
890
891 BinaryContext &BC = BF.getBinaryContext();
892
893 // Offsets of the trace within this function.
894 const uint64_t From = Trace.From - BF.getAddress();
895 const uint64_t To = Trace.To - BF.getAddress();
896
897 if (From > To)
898 return std::nullopt;
899
900 // Accept fall-throughs inside pseudo functions (PLT/thunks).
901 // This check has to be above BF.empty as pseudo functions would pass it:
902 // pseudo => ignored => CFG not built => empty.
903 // If we return nullopt, trace would be reported as mismatching disassembled
904 // function contents which it is not. To avoid this, return an empty
905 // fall-through list instead.
906 if (BF.isPseudo())
907 return Branches;
908
909 if (!BF.isSimple())
910 return std::nullopt;
911
912 assert(BF.hasCFG() && "can only record traces in CFG state");
913
914 const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: From);
915 const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(Offset: To);
916
917 if (!FromBB || !ToBB)
918 return std::nullopt;
919
920 // Adjust FromBB if the first LBR is a return from the last instruction in
921 // the previous block (that instruction should be a call).
922 if (Trace.Branch != Trace::FT_ONLY && !BF.containsAddress(PC: Trace.Branch) &&
923 From == FromBB->getOffset() &&
924 (IsReturn ? From : !(FromBB->isEntryPoint() || FromBB->isLandingPad()))) {
925 const BinaryBasicBlock *PrevBB =
926 BF.getLayout().getBlock(Index: FromBB->getIndex() - 1);
927 if (PrevBB->getSuccessor(Label: FromBB->getLabel())) {
928 const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
929 if (Instr && BC.MIB->isCall(Inst: *Instr))
930 FromBB = PrevBB;
931 else
932 LLVM_DEBUG(dbgs() << "invalid trace (no call): " << Trace << '\n');
933 } else {
934 LLVM_DEBUG(dbgs() << "invalid trace: " << Trace << '\n');
935 }
936 }
937
938 // Fill out information for fall-through edges. The From and To could be
939 // within the same basic block, e.g. when two call instructions are in the
940 // same block. In this case we skip the processing.
941 if (FromBB == ToBB)
942 return Branches;
943
944 // Process blocks in the original layout order.
945 BinaryBasicBlock *BB = BF.getLayout().getBlock(Index: FromBB->getIndex());
946 assert(BB == FromBB && "index mismatch");
947 while (BB != ToBB) {
948 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(Index: BB->getIndex() + 1);
949 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
950
951 // Check for bad LBRs.
952 if (!BB->getSuccessor(Label: NextBB->getLabel())) {
953 LLVM_DEBUG(dbgs() << "no fall-through for the trace: " << Trace << '\n');
954 return std::nullopt;
955 }
956
957 const MCInst *Instr = BB->getLastNonPseudoInstr();
958 uint64_t Offset = 0;
959 if (Instr)
960 Offset = BC.MIB->getOffsetWithDefault(Inst: *Instr, Default: 0);
961 else
962 Offset = BB->getOffset();
963
964 Branches.emplace_back(Args&: Offset, Args: NextBB->getOffset());
965
966 BB = NextBB;
967 }
968
969 // Record fall-through jumps
970 for (const auto &[FromOffset, ToOffset] : Branches) {
971 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: FromOffset);
972 BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(Offset: ToOffset);
973 assert(FromBB && ToBB);
974 BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(Succ: *ToBB);
975 BI.Count += Count;
976 }
977
978 return Branches;
979}
980
981bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
982 uint64_t Count) const {
983 if (To > BF.getSize())
984 return false;
985
986 if (!BF.hasProfile())
987 BF.ExecutionCount = 0;
988
989 BinaryBasicBlock *EntryBB = nullptr;
990 if (To == 0) {
991 BF.ExecutionCount += Count;
992 if (!BF.empty())
993 EntryBB = &BF.front();
994 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(Offset: To)) {
995 if (BB->isEntryPoint())
996 EntryBB = BB;
997 }
998
999 if (EntryBB)
1000 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
1001
1002 return true;
1003}
1004
1005bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
1006 uint64_t Count) const {
1007 if (!BF.isSimple() || From > BF.getSize())
1008 return false;
1009
1010 if (!BF.hasProfile())
1011 BF.ExecutionCount = 0;
1012
1013 return true;
1014}
1015
1016ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
1017 LBREntry Res;
1018 ErrorOr<StringRef> FromStrRes = parseString(EndChar: '/');
1019 if (std::error_code EC = FromStrRes.getError())
1020 return EC;
1021 StringRef OffsetStr = FromStrRes.get();
1022 if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.From)) {
1023 reportError(ErrorMsg: "expected hexadecimal number with From address");
1024 Diag << "Found: " << OffsetStr << "\n";
1025 return make_error_code(E: llvm::errc::io_error);
1026 }
1027
1028 ErrorOr<StringRef> ToStrRes = parseString(EndChar: '/');
1029 if (std::error_code EC = ToStrRes.getError())
1030 return EC;
1031 OffsetStr = ToStrRes.get();
1032 if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.To)) {
1033 reportError(ErrorMsg: "expected hexadecimal number with To address");
1034 Diag << "Found: " << OffsetStr << "\n";
1035 return make_error_code(E: llvm::errc::io_error);
1036 }
1037
1038 ErrorOr<StringRef> MispredStrRes = parseString(EndChar: '/');
1039 if (std::error_code EC = MispredStrRes.getError())
1040 return EC;
1041 StringRef MispredStr = MispredStrRes.get();
1042 // SPE brstack mispredicted flags might be up to two characters long:
1043 // 'PN' or 'MN'. Where 'N' optionally appears.
1044 bool ValidStrSize = opts::ArmSPE
1045 ? MispredStr.size() >= 1 && MispredStr.size() <= 2
1046 : MispredStr.size() == 1;
1047 bool SpeTakenBitErr =
1048 (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
1049 bool PredictionBitErr =
1050 !ValidStrSize ||
1051 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
1052 if (SpeTakenBitErr)
1053 reportError(ErrorMsg: "expected 'N' as SPE prediction bit for a not-taken branch");
1054 if (PredictionBitErr)
1055 reportError(ErrorMsg: "expected 'P', 'M' or '-' char as a prediction bit");
1056
1057 if (SpeTakenBitErr || PredictionBitErr) {
1058 Diag << "Found: " << MispredStr << "\n";
1059 return make_error_code(E: llvm::errc::io_error);
1060 }
1061 Res.Mispred = MispredStr[0] == 'M';
1062
1063 static bool MispredWarning = true;
1064 if (MispredStr[0] == '-' && MispredWarning) {
1065 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1066 MispredWarning = false;
1067 }
1068
1069 ErrorOr<StringRef> Rest = parseString(EndChar: FieldSeparator, EndNl: true);
1070 if (std::error_code EC = Rest.getError())
1071 return EC;
1072 if (Rest.get().size() < 5) {
1073 reportError(ErrorMsg: "expected rest of LBR entry");
1074 Diag << "Found: " << Rest.get() << "\n";
1075 return make_error_code(E: llvm::errc::io_error);
1076 }
1077 return Res;
1078}
1079
1080bool DataAggregator::checkAndConsumeFS() {
1081 if (ParsingBuf[0] != FieldSeparator)
1082 return false;
1083
1084 ParsingBuf = ParsingBuf.drop_front(N: 1);
1085 Col += 1;
1086 return true;
1087}
1088
1089void DataAggregator::consumeRestOfLine() {
1090 size_t LineEnd = ParsingBuf.find_first_of(C: '\n');
1091 if (LineEnd == StringRef::npos) {
1092 ParsingBuf = StringRef();
1093 Col = 0;
1094 Line += 1;
1095 return;
1096 }
1097 ParsingBuf = ParsingBuf.drop_front(N: LineEnd + 1);
1098 Col = 0;
1099 Line += 1;
1100}
1101
1102bool DataAggregator::checkNewLine() {
1103 return ParsingBuf[0] == '\n';
1104}
1105
1106ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1107 PerfBranchSample Res;
1108
1109 while (checkAndConsumeFS()) {
1110 }
1111
1112 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1113 if (std::error_code EC = PIDRes.getError())
1114 return EC;
1115 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1116 if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) {
1117 consumeRestOfLine();
1118 return make_error_code(E: errc::no_such_process);
1119 }
1120
1121 if (checkAndConsumeNewLine())
1122 return Res;
1123
1124 while (!checkAndConsumeNewLine()) {
1125 checkAndConsumeFS();
1126
1127 ErrorOr<LBREntry> LBRRes = parseLBREntry();
1128 if (std::error_code EC = LBRRes.getError())
1129 return EC;
1130 LBREntry LBR = LBRRes.get();
1131 if (ignoreKernelInterrupt(LBR))
1132 continue;
1133 if (!BC->HasFixedLoadAddress)
1134 adjustLBR(LBR, MMI: MMapInfoIter->second);
1135 Res.LBR.push_back(Elt: LBR);
1136 }
1137
1138 return Res;
1139}
1140
1141ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1142 while (checkAndConsumeFS()) {
1143 }
1144
1145 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1146 if (std::error_code EC = PIDRes.getError())
1147 return EC;
1148
1149 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1150 if (MMapInfoIter == BinaryMMapInfo.end()) {
1151 consumeRestOfLine();
1152 return PerfBasicSample{.EventName: StringRef(), .PC: 0};
1153 }
1154
1155 while (checkAndConsumeFS()) {
1156 }
1157
1158 ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator);
1159 if (std::error_code EC = Event.getError())
1160 return EC;
1161
1162 while (checkAndConsumeFS()) {
1163 }
1164
1165 ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator, EndNl: true);
1166 if (std::error_code EC = AddrRes.getError())
1167 return EC;
1168
1169 if (!checkAndConsumeNewLine()) {
1170 reportError(ErrorMsg: "expected end of line");
1171 return make_error_code(E: llvm::errc::io_error);
1172 }
1173
1174 uint64_t Address = *AddrRes;
1175 if (!BC->HasFixedLoadAddress)
1176 adjustAddress(Address, MMI: MMapInfoIter->second);
1177
1178 return PerfBasicSample{.EventName: Event.get(), .PC: Address};
1179}
1180
1181ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1182 PerfMemSample Res{.PC: 0, .Addr: 0};
1183
1184 while (checkAndConsumeFS()) {
1185 }
1186
1187 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1188 if (std::error_code EC = PIDRes.getError())
1189 return EC;
1190
1191 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1192 if (MMapInfoIter == BinaryMMapInfo.end()) {
1193 consumeRestOfLine();
1194 return Res;
1195 }
1196
1197 while (checkAndConsumeFS()) {
1198 }
1199
1200 ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator);
1201 if (std::error_code EC = Event.getError())
1202 return EC;
1203 if (!Event.get().contains(Other: "mem-loads")) {
1204 consumeRestOfLine();
1205 return Res;
1206 }
1207
1208 while (checkAndConsumeFS()) {
1209 }
1210
1211 ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator);
1212 if (std::error_code EC = AddrRes.getError())
1213 return EC;
1214
1215 while (checkAndConsumeFS()) {
1216 }
1217
1218 ErrorOr<uint64_t> PCRes = parseHexField(EndChar: FieldSeparator, EndNl: true);
1219 if (std::error_code EC = PCRes.getError()) {
1220 consumeRestOfLine();
1221 return EC;
1222 }
1223
1224 if (!checkAndConsumeNewLine()) {
1225 reportError(ErrorMsg: "expected end of line");
1226 return make_error_code(E: llvm::errc::io_error);
1227 }
1228
1229 uint64_t Address = *AddrRes;
1230 if (!BC->HasFixedLoadAddress)
1231 adjustAddress(Address, MMI: MMapInfoIter->second);
1232
1233 return PerfMemSample{.PC: PCRes.get(), .Addr: Address};
1234}
1235
1236ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1237 auto parseOffset = [this]() -> ErrorOr<Location> {
1238 ErrorOr<uint64_t> Res = parseHexField(EndChar: FieldSeparator);
1239 if (std::error_code EC = Res.getError())
1240 return EC;
1241 return Location(Res.get());
1242 };
1243
1244 size_t Sep = ParsingBuf.find_first_of(Chars: " \n");
1245 if (Sep == StringRef::npos)
1246 return parseOffset();
1247 StringRef LookAhead = ParsingBuf.substr(Start: 0, N: Sep);
1248 if (!LookAhead.contains(C: ':'))
1249 return parseOffset();
1250
1251 ErrorOr<StringRef> BuildID = parseString(EndChar: ':');
1252 if (std::error_code EC = BuildID.getError())
1253 return EC;
1254 ErrorOr<uint64_t> Offset = parseHexField(EndChar: FieldSeparator);
1255 if (std::error_code EC = Offset.getError())
1256 return EC;
1257 return Location(true, BuildID.get(), Offset.get());
1258}
1259
1260std::error_code DataAggregator::parseAggregatedLBREntry() {
1261 enum AggregatedLBREntry : char {
1262 INVALID = 0,
1263 EVENT_NAME, // E
1264 TRACE, // T
1265 RETURN, // R
1266 SAMPLE, // S
1267 BRANCH, // B
1268 FT, // F
1269 FT_EXTERNAL_ORIGIN, // f
1270 FT_EXTERNAL_RETURN // r
1271 } Type = INVALID;
1272
1273 /// The number of fields to parse, set based on \p Type.
1274 int AddrNum = 0;
1275 int CounterNum = 0;
1276 /// Storage for parsed fields.
1277 StringRef EventName;
1278 std::optional<Location> Addr[3];
1279 int64_t Counters[2] = {0};
1280
1281 /// Parse strings: record type and optionally an event name.
1282 while (Type == INVALID || Type == EVENT_NAME) {
1283 while (checkAndConsumeFS()) {
1284 }
1285 ErrorOr<StringRef> StrOrErr =
1286 parseString(EndChar: FieldSeparator, EndNl: Type == EVENT_NAME);
1287 if (std::error_code EC = StrOrErr.getError())
1288 return EC;
1289 StringRef Str = StrOrErr.get();
1290
1291 if (Type == EVENT_NAME) {
1292 EventName = Str;
1293 break;
1294 }
1295
1296 Type = StringSwitch<AggregatedLBREntry>(Str)
1297 .Case(S: "T", Value: TRACE)
1298 .Case(S: "R", Value: RETURN)
1299 .Case(S: "S", Value: SAMPLE)
1300 .Case(S: "E", Value: EVENT_NAME)
1301 .Case(S: "B", Value: BRANCH)
1302 .Case(S: "F", Value: FT)
1303 .Case(S: "f", Value: FT_EXTERNAL_ORIGIN)
1304 .Case(S: "r", Value: FT_EXTERNAL_RETURN)
1305 .Default(Value: INVALID);
1306
1307 if (Type == INVALID) {
1308 reportError(ErrorMsg: "expected T, R, S, E, B, F, f or r");
1309 return make_error_code(E: llvm::errc::io_error);
1310 }
1311
1312 using SSI = StringSwitch<int>;
1313 AddrNum = SSI(Str).Cases(S0: "T", S1: "R", Value: 3).Case(S: "S", Value: 1).Case(S: "E", Value: 0).Default(Value: 2);
1314 CounterNum = SSI(Str).Case(S: "B", Value: 2).Case(S: "E", Value: 0).Default(Value: 1);
1315 }
1316
1317 /// Parse locations depending on entry type, recording them in \p Addr array.
1318 for (int I = 0; I < AddrNum; ++I) {
1319 while (checkAndConsumeFS()) {
1320 }
1321 ErrorOr<Location> AddrOrErr = parseLocationOrOffset();
1322 if (std::error_code EC = AddrOrErr.getError())
1323 return EC;
1324 Addr[I] = AddrOrErr.get();
1325 }
1326
1327 /// Parse counters depending on entry type.
1328 for (int I = 0; I < CounterNum; ++I) {
1329 while (checkAndConsumeFS()) {
1330 }
1331 ErrorOr<int64_t> CountOrErr =
1332 parseNumberField(EndChar: FieldSeparator, EndNl: I + 1 == CounterNum);
1333 if (std::error_code EC = CountOrErr.getError())
1334 return EC;
1335 Counters[I] = CountOrErr.get();
1336 }
1337
1338 /// Expect end of line here.
1339 if (!checkAndConsumeNewLine()) {
1340 reportError(ErrorMsg: "expected end of line");
1341 return make_error_code(E: llvm::errc::io_error);
1342 }
1343
1344 /// Record event name into \p EventNames and return.
1345 if (Type == EVENT_NAME) {
1346 EventNames.insert(key: EventName);
1347 return std::error_code();
1348 }
1349
1350 const uint64_t FromOffset = Addr[0]->Offset;
1351 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(Address: FromOffset);
1352 if (FromFunc)
1353 FromFunc->setHasProfileAvailable();
1354
1355 int64_t Count = Counters[0];
1356 int64_t Mispreds = Counters[1];
1357
1358 /// Record basic IP sample into \p BasicSamples and return.
1359 if (Type == SAMPLE) {
1360 BasicSamples[FromOffset] += Count;
1361 NumTotalSamples += Count;
1362 return std::error_code();
1363 }
1364
1365 const uint64_t ToOffset = Addr[1]->Offset;
1366 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Address: ToOffset);
1367 if (ToFunc)
1368 ToFunc->setHasProfileAvailable();
1369
1370 /// For fall-through types, adjust locations to match Trace container.
1371 if (Type == FT || Type == FT_EXTERNAL_ORIGIN || Type == FT_EXTERNAL_RETURN) {
1372 Addr[2] = Location(Addr[1]->Offset); // Trace To
1373 Addr[1] = Location(Addr[0]->Offset); // Trace From
1374 // Put a magic value into Trace Branch to differentiate from a full trace:
1375 if (Type == FT)
1376 Addr[0] = Location(Trace::FT_ONLY);
1377 else if (Type == FT_EXTERNAL_ORIGIN)
1378 Addr[0] = Location(Trace::FT_EXTERNAL_ORIGIN);
1379 else if (Type == FT_EXTERNAL_RETURN)
1380 Addr[0] = Location(Trace::FT_EXTERNAL_RETURN);
1381 else
1382 llvm_unreachable("Unexpected fall-through type");
1383 }
1384
1385 /// For branch type, mark Trace To to differentiate from a full trace.
1386 if (Type == BRANCH)
1387 Addr[2] = Location(Trace::BR_ONLY);
1388
1389 if (Type == RETURN) {
1390 if (!Addr[0]->Offset)
1391 Addr[0]->Offset = Trace::FT_EXTERNAL_RETURN;
1392 else
1393 Returns.emplace(args&: Addr[0]->Offset, args: true);
1394 }
1395
1396 /// Record a trace.
1397 Trace T{.Branch: Addr[0]->Offset, .From: Addr[1]->Offset, .To: Addr[2]->Offset};
1398 TakenBranchInfo TI{.TakenCount: (uint64_t)Count, .MispredCount: (uint64_t)Mispreds};
1399 Traces.emplace_back(args&: T, args&: TI);
1400
1401 NumTotalSamples += Count;
1402
1403 return std::error_code();
1404}
1405
1406bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1407 return opts::IgnoreInterruptLBR &&
1408 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1409}
1410
1411std::error_code DataAggregator::printLBRHeatMap() {
1412 outs() << "PERF2BOLT: parse branch events...\n";
1413 NamedRegionTimer T("buildHeatmap", "Building heatmap", TimerGroupName,
1414 TimerGroupDesc, opts::TimeAggregator);
1415
1416 if (BC->IsLinuxKernel) {
1417 opts::HeatmapMaxAddress = 0xffffffffffffffff;
1418 opts::HeatmapMinAddress = KernelBaseAddr;
1419 }
1420 opts::HeatmapBlockSizes &HMBS = opts::HeatmapBlock;
1421 Heatmap HM(HMBS[0], opts::HeatmapMinAddress, opts::HeatmapMaxAddress,
1422 getTextSections(BC));
1423 auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t {
1424 if (Symbol)
1425 if (ErrorOr<uint64_t> SymValue = BC->getSymbolValue(Symbol: *Symbol))
1426 return SymValue.get();
1427 return 0;
1428 };
1429 HM.HotStart = getSymbolValue(BC->getHotTextStartSymbol());
1430 HM.HotEnd = getSymbolValue(BC->getHotTextEndSymbol());
1431
1432 if (!NumTotalSamples) {
1433 if (opts::BasicAggregation) {
1434 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1435 "Cannot build heatmap.";
1436 } else {
1437 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1438 "Cannot build heatmap. Use -nl for building heatmap from "
1439 "basic events.\n";
1440 }
1441 exit(status: 1);
1442 }
1443
1444 outs() << "HEATMAP: building heat map...\n";
1445
1446 // Register basic samples and perf LBR addresses not covered by fallthroughs.
1447 for (const auto &[PC, Hits] : BasicSamples)
1448 HM.registerAddress(Address: PC, Count: Hits);
1449 for (const auto &[Trace, Info] : Traces)
1450 if (Trace.To != Trace::BR_ONLY)
1451 HM.registerAddressRange(StartAddress: Trace.From, EndAddress: Trace.To, Count: Info.TakenCount);
1452
1453 if (HM.getNumInvalidRanges())
1454 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1455
1456 if (!HM.size()) {
1457 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1458 exit(status: 1);
1459 }
1460
1461 HM.print(FileName: opts::HeatmapOutput);
1462 if (opts::HeatmapOutput == "-") {
1463 HM.printCDF(FileName: opts::HeatmapOutput);
1464 HM.printSectionHotness(Filename: opts::HeatmapOutput);
1465 } else {
1466 HM.printCDF(FileName: opts::HeatmapOutput + ".csv");
1467 HM.printSectionHotness(Filename: opts::HeatmapOutput + "-section-hotness.csv");
1468 }
1469 // Provide coarse-grained heatmaps if requested via zoom-out scales
1470 for (const uint64_t NewBucketSize : ArrayRef(HMBS).drop_front()) {
1471 HM.resizeBucket(NewSize: NewBucketSize);
1472 if (opts::HeatmapOutput == "-")
1473 HM.print(FileName: opts::HeatmapOutput);
1474 else
1475 HM.print(FileName: formatv(Fmt: "{0}-{1}", Vals&: opts::HeatmapOutput, Vals: NewBucketSize).str());
1476 }
1477
1478 return std::error_code();
1479}
1480
1481void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1482 bool NeedsSkylakeFix) {
1483 // LBRs are stored in reverse execution order. NextLBR refers to the next
1484 // executed branch record.
1485 const LBREntry *NextLBR = nullptr;
1486 uint32_t NumEntry = 0;
1487 for (const LBREntry &LBR : Sample.LBR) {
1488 ++NumEntry;
1489 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1490 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1491 // us to likely record an invalid trace and generate a stale function for
1492 // BAT mode (non BAT disassembles the function and is able to ignore this
1493 // trace at aggregation time). Drop first 2 entries (last two, in
1494 // chronological order)
1495 if (NeedsSkylakeFix && NumEntry <= 2)
1496 continue;
1497 uint64_t TraceTo = NextLBR ? NextLBR->From : Trace::BR_ONLY;
1498 NextLBR = &LBR;
1499
1500 TakenBranchInfo &Info = TraceMap[Trace{.Branch: LBR.From, .From: LBR.To, .To: TraceTo}];
1501 ++Info.TakenCount;
1502 Info.MispredCount += LBR.Mispred;
1503 }
1504 // Record LBR addresses not covered by fallthroughs (bottom-of-stack source
1505 // and top-of-stack target) as basic samples for heatmap.
1506 if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive &&
1507 !Sample.LBR.empty()) {
1508 ++BasicSamples[Sample.LBR.front().To];
1509 ++BasicSamples[Sample.LBR.back().From];
1510 }
1511}
1512
1513void DataAggregator::printLongRangeTracesDiagnostic() const {
1514 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1515 << NumLongRangeTraces;
1516 if (NumTraces > 0)
1517 outs() << format(Fmt: " (%.1f%%)", Vals: NumLongRangeTraces * 100.0f / NumTraces);
1518 outs() << "\n";
1519}
1520
1521static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
1522 float T2) {
1523 if (Denominator == 0) {
1524 outs() << "\n";
1525 return 0;
1526 }
1527 float Percent = Numerator * 100.0f / Denominator;
1528 outs() << " (";
1529 if (outs().has_colors()) {
1530 if (Percent > T2)
1531 outs().changeColor(Color: raw_ostream::RED);
1532 else if (Percent > T1)
1533 outs().changeColor(Color: raw_ostream::YELLOW);
1534 else
1535 outs().changeColor(Color: raw_ostream::GREEN);
1536 }
1537 outs() << format(Fmt: "%.1f%%", Vals: Percent);
1538 if (outs().has_colors())
1539 outs().resetColor();
1540 outs() << ")\n";
1541 return Percent;
1542}
1543
1544void DataAggregator::printBranchSamplesDiagnostics() const {
1545 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1546 << NumInvalidTraces;
1547 if (printColoredPct(Numerator: NumInvalidTraces, Denominator: NumTraces, T1: 5, T2: 10) > 10)
1548 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1549 "binary is probably not the same binary used during profiling "
1550 "collection. The generated data may be ineffective for improving "
1551 "performance\n\n";
1552 printLongRangeTracesDiagnostic();
1553}
1554
1555void DataAggregator::printBasicSamplesDiagnostics(
1556 uint64_t OutOfRangeSamples) const {
1557 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1558 << OutOfRangeSamples;
1559 if (printColoredPct(Numerator: OutOfRangeSamples, Denominator: NumTotalSamples, T1: 40, T2: 60) > 80)
1560 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1561 "binary is probably not the same binary used during profiling "
1562 "collection. The generated data may be ineffective for improving "
1563 "performance\n\n";
1564}
1565
1566void DataAggregator::printBranchStacksDiagnostics(
1567 uint64_t IgnoredSamples) const {
1568 outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
1569 if (printColoredPct(Numerator: IgnoredSamples, Denominator: NumTotalSamples, T1: 20, T2: 50) > 50)
1570 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1571 "were attributed to the input binary\n";
1572}
1573
1574std::error_code DataAggregator::parseBranchEvents() {
1575 std::string BranchEventTypeStr =
1576 opts::ArmSPE ? "SPE branch events in LBR-format" : "branch events";
1577 outs() << "PERF2BOLT: parse " << BranchEventTypeStr << "...\n";
1578 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1579 TimerGroupDesc, opts::TimeAggregator);
1580
1581 uint64_t NumEntries = 0;
1582 uint64_t NumSamples = 0;
1583 uint64_t NumSamplesNoLBR = 0;
1584 bool NeedsSkylakeFix = false;
1585
1586 while (hasData() && NumTotalSamples < opts::MaxSamples) {
1587 ++NumTotalSamples;
1588
1589 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1590 if (std::error_code EC = SampleRes.getError()) {
1591 if (EC == errc::no_such_process)
1592 continue;
1593 return EC;
1594 }
1595 ++NumSamples;
1596
1597 PerfBranchSample &Sample = SampleRes.get();
1598
1599 if (Sample.LBR.empty()) {
1600 ++NumSamplesNoLBR;
1601 continue;
1602 }
1603
1604 NumEntries += Sample.LBR.size();
1605 if (this->BC->isX86() && BAT && Sample.LBR.size() == 32 &&
1606 !NeedsSkylakeFix) {
1607 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1608 NeedsSkylakeFix = true;
1609 }
1610
1611 parseLBRSample(Sample, NeedsSkylakeFix);
1612 }
1613
1614 Traces.reserve(n: TraceMap.size());
1615 for (const auto &[Trace, Info] : TraceMap) {
1616 Traces.emplace_back(args: Trace, args: Info);
1617 for (const uint64_t Addr : {Trace.Branch, Trace.From})
1618 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Addr))
1619 BF->setHasProfileAvailable();
1620 }
1621 clear(Container&: TraceMap);
1622
1623 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1624 << " LBR entries\n";
1625 if (NumTotalSamples) {
1626 if (NumSamples && NumSamplesNoLBR == NumSamples) {
1627 // Note: we don't know if perf2bolt is being used to parse memory samples
1628 // at this point. In this case, it is OK to parse zero LBRs.
1629 if (!opts::ArmSPE)
1630 errs()
1631 << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1632 "LBR. Record profile with perf record -j any or run perf2bolt "
1633 "in no-LBR mode with -nl (the performance improvement in -nl "
1634 "mode may be limited)\n";
1635 else
1636 errs()
1637 << "PERF2BOLT-WARNING: All recorded samples for this binary lack "
1638 "SPE brstack entries. Make sure you are running Linux perf 6.14 "
1639 "or later, otherwise you get zero samples. Record the profile "
1640 "with: perf record -e 'arm_spe_0/branch_filter=1/'.";
1641 } else {
1642 printBranchStacksDiagnostics(IgnoredSamples: NumTotalSamples - NumSamples);
1643 }
1644 }
1645
1646 return std::error_code();
1647}
1648
1649void DataAggregator::processBranchEvents() {
1650 outs() << "PERF2BOLT: processing branch events...\n";
1651 NamedRegionTimer T("processBranch", "Processing branch events",
1652 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1653
1654 Returns.emplace(args: Trace::FT_EXTERNAL_RETURN, args: true);
1655 for (const auto &[Trace, Info] : Traces) {
1656 bool IsReturn = checkReturn(Addr: Trace.Branch);
1657 // Ignore returns.
1658 if (!IsReturn && Trace.Branch != Trace::FT_ONLY &&
1659 Trace.Branch != Trace::FT_EXTERNAL_ORIGIN)
1660 doBranch(From: Trace.Branch, To: Trace.From, Count: Info.TakenCount, Mispreds: Info.MispredCount);
1661 if (Trace.To != Trace::BR_ONLY)
1662 doTrace(Trace, Count: Info.TakenCount, IsReturn);
1663 }
1664 printBranchSamplesDiagnostics();
1665}
1666
1667std::error_code DataAggregator::parseBasicEvents() {
1668 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1669 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1670 TimerGroupDesc, opts::TimeAggregator);
1671 while (hasData()) {
1672 ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1673 if (std::error_code EC = Sample.getError())
1674 return EC;
1675
1676 if (!Sample->PC)
1677 continue;
1678 ++NumTotalSamples;
1679
1680 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC))
1681 BF->setHasProfileAvailable();
1682
1683 ++BasicSamples[Sample->PC];
1684 EventNames.insert(key: Sample->EventName);
1685 }
1686 outs() << "PERF2BOLT: read " << NumTotalSamples << " basic samples\n";
1687
1688 return std::error_code();
1689}
1690
1691void DataAggregator::processBasicEvents() {
1692 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1693 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1694 TimerGroupDesc, opts::TimeAggregator);
1695 uint64_t OutOfRangeSamples = 0;
1696 for (auto &Sample : BasicSamples) {
1697 const uint64_t PC = Sample.first;
1698 const uint64_t HitCount = Sample.second;
1699 BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC);
1700 if (!Func) {
1701 OutOfRangeSamples += HitCount;
1702 continue;
1703 }
1704
1705 doBasicSample(OrigFunc&: *Func, Address: PC, Count: HitCount);
1706 }
1707
1708 printBasicSamplesDiagnostics(OutOfRangeSamples);
1709}
1710
1711std::error_code DataAggregator::parseMemEvents() {
1712 outs() << "PERF2BOLT: parsing memory events...\n";
1713 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1714 TimerGroupDesc, opts::TimeAggregator);
1715 while (hasData()) {
1716 ErrorOr<PerfMemSample> Sample = parseMemSample();
1717 if (std::error_code EC = Sample.getError())
1718 return EC;
1719
1720 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC))
1721 BF->setHasProfileAvailable();
1722
1723 MemSamples.emplace_back(args: std::move(Sample.get()));
1724 }
1725
1726 return std::error_code();
1727}
1728
1729void DataAggregator::processMemEvents() {
1730 NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1731 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1732 for (const PerfMemSample &Sample : MemSamples) {
1733 uint64_t PC = Sample.PC;
1734 uint64_t Addr = Sample.Addr;
1735 StringRef FuncName;
1736 StringRef MemName;
1737
1738 // Try to resolve symbol for PC
1739 BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC);
1740 if (!Func) {
1741 LLVM_DEBUG(if (PC != 0) {
1742 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr);
1743 });
1744 continue;
1745 }
1746
1747 FuncName = Func->getOneName();
1748 PC -= Func->getAddress();
1749
1750 // Try to resolve symbol for memory load
1751 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Address: Addr)) {
1752 MemName = BD->getName();
1753 Addr -= BD->getAddress();
1754 } else if (opts::FilterMemProfile) {
1755 // Filter out heap/stack accesses
1756 continue;
1757 }
1758
1759 const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1760 const Location AddrLoc(!MemName.empty(), MemName, Addr);
1761
1762 FuncMemData *MemData = &NamesToMemEvents[FuncName];
1763 MemData->Name = FuncName;
1764 setMemData(BF: *Func, FMD: MemData);
1765 MemData->update(Offset: FuncLoc, Addr: AddrLoc);
1766 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1767 }
1768}
1769
1770std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1771 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1772 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1773 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1774 size_t AggregatedLBRs = 0;
1775 while (hasData()) {
1776 if (std::error_code EC = parseAggregatedLBREntry())
1777 return EC;
1778 ++AggregatedLBRs;
1779 }
1780
1781 outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n";
1782
1783 return std::error_code();
1784}
1785
1786std::optional<int32_t> DataAggregator::parseCommExecEvent() {
1787 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1788 if (LineEnd == StringRef::npos) {
1789 reportError(ErrorMsg: "expected rest of line");
1790 Diag << "Found: " << ParsingBuf << "\n";
1791 return std::nullopt;
1792 }
1793 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1794
1795 size_t Pos = Line.find(Str: "PERF_RECORD_COMM exec");
1796 if (Pos == StringRef::npos)
1797 return std::nullopt;
1798 Line = Line.drop_front(N: Pos);
1799
1800 // Line:
1801 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1802 StringRef PIDStr = Line.rsplit(Separator: ':').second.split(Separator: '/').first;
1803 int32_t PID;
1804 if (PIDStr.getAsInteger(Radix: 10, Result&: PID)) {
1805 reportError(ErrorMsg: "expected PID");
1806 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1807 return std::nullopt;
1808 }
1809
1810 return PID;
1811}
1812
1813namespace {
1814std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1815 const StringRef SecTimeStr = TimeStr.split(Separator: '.').first;
1816 const StringRef USecTimeStr = TimeStr.split(Separator: '.').second;
1817 uint64_t SecTime;
1818 uint64_t USecTime;
1819 if (SecTimeStr.getAsInteger(Radix: 10, Result&: SecTime) ||
1820 USecTimeStr.getAsInteger(Radix: 10, Result&: USecTime))
1821 return std::nullopt;
1822 return SecTime * 1000000ULL + USecTime;
1823}
1824}
1825
1826std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1827 while (checkAndConsumeFS()) {
1828 }
1829
1830 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1831 if (LineEnd == StringRef::npos) {
1832 reportError(ErrorMsg: "expected rest of line");
1833 Diag << "Found: " << ParsingBuf << "\n";
1834 return std::nullopt;
1835 }
1836 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1837
1838 size_t Pos = Line.find(Str: "PERF_RECORD_FORK");
1839 if (Pos == StringRef::npos) {
1840 consumeRestOfLine();
1841 return std::nullopt;
1842 }
1843
1844 ForkInfo FI;
1845
1846 const StringRef TimeStr =
1847 Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second;
1848 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1849 FI.Time = *TimeRes;
1850 }
1851
1852 Line = Line.drop_front(N: Pos);
1853
1854 // Line:
1855 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1856 const StringRef ChildPIDStr = Line.split(Separator: '(').second.split(Separator: ':').first;
1857 if (ChildPIDStr.getAsInteger(Radix: 10, Result&: FI.ChildPID)) {
1858 reportError(ErrorMsg: "expected PID");
1859 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1860 return std::nullopt;
1861 }
1862
1863 const StringRef ParentPIDStr = Line.rsplit(Separator: '(').second.split(Separator: ':').first;
1864 if (ParentPIDStr.getAsInteger(Radix: 10, Result&: FI.ParentPID)) {
1865 reportError(ErrorMsg: "expected PID");
1866 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1867 return std::nullopt;
1868 }
1869
1870 consumeRestOfLine();
1871
1872 return FI;
1873}
1874
1875ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1876DataAggregator::parseMMapEvent() {
1877 while (checkAndConsumeFS()) {
1878 }
1879
1880 MMapInfo ParsedInfo;
1881
1882 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1883 if (LineEnd == StringRef::npos) {
1884 reportError(ErrorMsg: "expected rest of line");
1885 Diag << "Found: " << ParsingBuf << "\n";
1886 return make_error_code(E: llvm::errc::io_error);
1887 }
1888 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1889
1890 size_t Pos = Line.find(Str: "PERF_RECORD_MMAP2");
1891 if (Pos == StringRef::npos) {
1892 consumeRestOfLine();
1893 return std::make_pair(x: StringRef(), y&: ParsedInfo);
1894 }
1895
1896 // Line:
1897 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1898
1899 const StringRef TimeStr =
1900 Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second;
1901 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1902 ParsedInfo.Time = *TimeRes;
1903
1904 Line = Line.drop_front(N: Pos);
1905
1906 // Line:
1907 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1908
1909 StringRef FileName = Line.rsplit(Separator: FieldSeparator).second;
1910 if (FileName.starts_with(Prefix: "//") || FileName.starts_with(Prefix: "[")) {
1911 consumeRestOfLine();
1912 return std::make_pair(x: StringRef(), y&: ParsedInfo);
1913 }
1914 FileName = sys::path::filename(path: FileName);
1915
1916 const StringRef PIDStr = Line.split(Separator: FieldSeparator).second.split(Separator: '/').first;
1917 if (PIDStr.getAsInteger(Radix: 10, Result&: ParsedInfo.PID)) {
1918 reportError(ErrorMsg: "expected PID");
1919 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1920 return make_error_code(E: llvm::errc::io_error);
1921 }
1922
1923 const StringRef BaseAddressStr = Line.split(Separator: '[').second.split(Separator: '(').first;
1924 if (BaseAddressStr.getAsInteger(Radix: 0, Result&: ParsedInfo.MMapAddress)) {
1925 reportError(ErrorMsg: "expected base address");
1926 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1927 return make_error_code(E: llvm::errc::io_error);
1928 }
1929
1930 const StringRef SizeStr = Line.split(Separator: '(').second.split(Separator: ')').first;
1931 if (SizeStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Size)) {
1932 reportError(ErrorMsg: "expected mmaped size");
1933 Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1934 return make_error_code(E: llvm::errc::io_error);
1935 }
1936
1937 const StringRef OffsetStr =
1938 Line.split(Separator: '@').second.ltrim().split(Separator: FieldSeparator).first;
1939 if (OffsetStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Offset)) {
1940 reportError(ErrorMsg: "expected mmaped page-aligned offset");
1941 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1942 return make_error_code(E: llvm::errc::io_error);
1943 }
1944
1945 consumeRestOfLine();
1946
1947 return std::make_pair(x&: FileName, y&: ParsedInfo);
1948}
1949
1950std::error_code DataAggregator::parseMMapEvents() {
1951 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1952 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1953 TimerGroupDesc, opts::TimeAggregator);
1954
1955 std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1956 while (hasData()) {
1957 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1958 if (std::error_code EC = FileMMapInfoRes.getError())
1959 return EC;
1960
1961 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1962 if (FileMMapInfo.second.PID == -1)
1963 continue;
1964 if (FileMMapInfo.first == "(deleted)")
1965 continue;
1966
1967 GlobalMMapInfo.insert(x&: FileMMapInfo);
1968 }
1969
1970 LLVM_DEBUG({
1971 dbgs() << "FileName -> mmap info:\n"
1972 << " Filename : PID [MMapAddr, Size, Offset]\n";
1973 for (const auto &[Name, MMap] : GlobalMMapInfo)
1974 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID,
1975 MMap.MMapAddress, MMap.Size, MMap.Offset);
1976 });
1977
1978 StringRef NameToUse = llvm::sys::path::filename(path: BC->getFilename());
1979 if (GlobalMMapInfo.count(x: NameToUse) == 0 && !BuildIDBinaryName.empty()) {
1980 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1981 << "\" for profile matching\n";
1982 NameToUse = BuildIDBinaryName;
1983 }
1984
1985 auto Range = GlobalMMapInfo.equal_range(x: NameToUse);
1986 for (MMapInfo &MMapInfo : llvm::make_second_range(c: make_range(p: Range))) {
1987 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
1988 // Check that the binary mapping matches one of the segments.
1989 bool MatchFound = llvm::any_of(
1990 Range: llvm::make_second_range(c&: BC->SegmentMapInfo),
1991 P: [&](SegmentInfo &SegInfo) {
1992 // The mapping is page-aligned and hence the MMapAddress could be
1993 // different from the segment start address. We cannot know the page
1994 // size of the mapping, but we know it should not exceed the segment
1995 // alignment value. Hence we are performing an approximate check.
1996 return SegInfo.Address >= MMapInfo.MMapAddress &&
1997 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment &&
1998 SegInfo.IsExecutable;
1999 });
2000 if (!MatchFound) {
2001 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2002 << " at 0x" << Twine::utohexstr(Val: MMapInfo.MMapAddress) << '\n';
2003 continue;
2004 }
2005 }
2006
2007 // Set base address for shared objects.
2008 if (!BC->HasFixedLoadAddress) {
2009 std::optional<uint64_t> BaseAddress =
2010 BC->getBaseAddressForMapping(MMapAddress: MMapInfo.MMapAddress, FileOffset: MMapInfo.Offset);
2011 if (!BaseAddress) {
2012 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2013 "binary when memory mapped at 0x"
2014 << Twine::utohexstr(Val: MMapInfo.MMapAddress)
2015 << " using file offset 0x" << Twine::utohexstr(Val: MMapInfo.Offset)
2016 << ". Ignoring profile data for this mapping\n";
2017 continue;
2018 }
2019 MMapInfo.BaseAddress = *BaseAddress;
2020 }
2021
2022 // Try to add MMapInfo to the map and update its size. Large binaries may
2023 // span to multiple text segments, so the mapping is inserted only on the
2024 // first occurrence.
2025 if (!BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo)).second)
2026 assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress &&
2027 "Base address on multiple segment mappings should match");
2028
2029 // Update mapping size.
2030 const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size;
2031 const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress;
2032 if (Size > BinaryMMapInfo[MMapInfo.PID].Size)
2033 BinaryMMapInfo[MMapInfo.PID].Size = Size;
2034 }
2035
2036 if (BinaryMMapInfo.empty()) {
2037 if (errs().has_colors())
2038 errs().changeColor(Color: raw_ostream::RED);
2039 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2040 << BC->getFilename() << "\".";
2041 if (!GlobalMMapInfo.empty()) {
2042 errs() << " Profile for the following binary name(s) is available:\n";
2043 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2044 I = GlobalMMapInfo.upper_bound(x: I->first))
2045 errs() << " " << I->first << '\n';
2046 errs() << "Please rename the input binary.\n";
2047 } else {
2048 errs() << " Failed to extract any binary name from a profile.\n";
2049 }
2050 if (errs().has_colors())
2051 errs().resetColor();
2052
2053 exit(status: 1);
2054 }
2055
2056 return std::error_code();
2057}
2058
2059std::error_code DataAggregator::parseTaskEvents() {
2060 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2061 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2062 TimerGroupDesc, opts::TimeAggregator);
2063
2064 while (hasData()) {
2065 if (std::optional<int32_t> CommInfo = parseCommExecEvent()) {
2066 // Remove forked child that ran execve
2067 auto MMapInfoIter = BinaryMMapInfo.find(x: *CommInfo);
2068 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2069 BinaryMMapInfo.erase(position: MMapInfoIter);
2070 consumeRestOfLine();
2071 continue;
2072 }
2073
2074 std::optional<ForkInfo> ForkInfo = parseForkEvent();
2075 if (!ForkInfo)
2076 continue;
2077
2078 if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2079 continue;
2080
2081 if (ForkInfo->Time == 0) {
2082 // Process was forked and mmaped before perf ran. In this case the child
2083 // should have its own mmap entry unless it was execve'd.
2084 continue;
2085 }
2086
2087 auto MMapInfoIter = BinaryMMapInfo.find(x: ForkInfo->ParentPID);
2088 if (MMapInfoIter == BinaryMMapInfo.end())
2089 continue;
2090
2091 MMapInfo MMapInfo = MMapInfoIter->second;
2092 MMapInfo.PID = ForkInfo->ChildPID;
2093 MMapInfo.Forked = true;
2094 BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo));
2095 }
2096
2097 outs() << "PERF2BOLT: input binary is associated with "
2098 << BinaryMMapInfo.size() << " PID(s)\n";
2099
2100 LLVM_DEBUG({
2101 for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo))
2102 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID,
2103 (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress,
2104 MMI.Size);
2105 });
2106
2107 return std::error_code();
2108}
2109
2110std::optional<std::pair<StringRef, StringRef>>
2111DataAggregator::parseNameBuildIDPair() {
2112 while (checkAndConsumeFS()) {
2113 }
2114
2115 ErrorOr<StringRef> BuildIDStr = parseString(EndChar: FieldSeparator, EndNl: true);
2116 if (std::error_code EC = BuildIDStr.getError())
2117 return std::nullopt;
2118
2119 // If one of the strings is missing, don't issue a parsing error, but still
2120 // do not return a value.
2121 consumeAllRemainingFS();
2122 if (checkNewLine())
2123 return std::nullopt;
2124
2125 ErrorOr<StringRef> NameStr = parseString(EndChar: FieldSeparator, EndNl: true);
2126 if (std::error_code EC = NameStr.getError())
2127 return std::nullopt;
2128
2129 consumeRestOfLine();
2130 return std::make_pair(x&: NameStr.get(), y&: BuildIDStr.get());
2131}
2132
2133bool DataAggregator::hasAllBuildIDs() {
2134 const StringRef SavedParsingBuf = ParsingBuf;
2135
2136 if (!hasData())
2137 return false;
2138
2139 bool HasInvalidEntries = false;
2140 while (hasData()) {
2141 if (!parseNameBuildIDPair()) {
2142 HasInvalidEntries = true;
2143 break;
2144 }
2145 }
2146
2147 ParsingBuf = SavedParsingBuf;
2148
2149 return !HasInvalidEntries;
2150}
2151
2152std::optional<StringRef>
2153DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2154 const StringRef SavedParsingBuf = ParsingBuf;
2155
2156 StringRef FileName;
2157 while (hasData()) {
2158 std::optional<std::pair<StringRef, StringRef>> IDPair =
2159 parseNameBuildIDPair();
2160 if (!IDPair) {
2161 consumeRestOfLine();
2162 continue;
2163 }
2164
2165 if (IDPair->second.starts_with(Prefix: FileBuildID)) {
2166 FileName = sys::path::filename(path: IDPair->first);
2167 break;
2168 }
2169 }
2170
2171 ParsingBuf = SavedParsingBuf;
2172
2173 if (!FileName.empty())
2174 return FileName;
2175
2176 return std::nullopt;
2177}
2178
2179std::error_code
2180DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2181 std::error_code EC;
2182 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2183 if (EC)
2184 return EC;
2185
2186 bool WriteMemLocs = false;
2187
2188 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2189 if (WriteMemLocs)
2190 OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2191 else
2192 OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2193 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Name: Loc.Name))
2194 << " " << Twine::utohexstr(Val: Loc.Offset) << FieldSeparator;
2195 };
2196
2197 uint64_t BranchValues = 0;
2198 uint64_t MemValues = 0;
2199
2200 if (BAT)
2201 OutFile << "boltedcollection\n";
2202 if (opts::BasicAggregation) {
2203 OutFile << "no_lbr";
2204 for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
2205 OutFile << " " << Entry.getKey();
2206 OutFile << "\n";
2207
2208 for (const auto &KV : NamesToBasicSamples) {
2209 const FuncBasicSampleData &FSD = KV.second;
2210 for (const BasicSampleInfo &SI : FSD.Data) {
2211 writeLocation(SI.Loc);
2212 OutFile << SI.Hits << "\n";
2213 ++BranchValues;
2214 }
2215 }
2216 } else {
2217 for (const auto &KV : NamesToBranches) {
2218 const FuncBranchData &FBD = KV.second;
2219 for (const BranchInfo &BI : FBD.Data) {
2220 writeLocation(BI.From);
2221 writeLocation(BI.To);
2222 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2223 ++BranchValues;
2224 }
2225 for (const BranchInfo &BI : FBD.EntryData) {
2226 // Do not output if source is a known symbol, since this was already
2227 // accounted for in the source function
2228 if (BI.From.IsSymbol)
2229 continue;
2230 writeLocation(BI.From);
2231 writeLocation(BI.To);
2232 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2233 ++BranchValues;
2234 }
2235 }
2236
2237 WriteMemLocs = true;
2238 for (const auto &KV : NamesToMemEvents) {
2239 const FuncMemData &FMD = KV.second;
2240 for (const MemInfo &MemEvent : FMD.Data) {
2241 writeLocation(MemEvent.Offset);
2242 writeLocation(MemEvent.Addr);
2243 OutFile << MemEvent.Count << "\n";
2244 ++MemValues;
2245 }
2246 }
2247 }
2248
2249 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2250 << " memory objects to " << OutputFilename << "\n";
2251
2252 return std::error_code();
2253}
2254
2255std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
2256 StringRef OutputFilename) const {
2257 std::error_code EC;
2258 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2259 if (EC)
2260 return EC;
2261
2262 yaml::bolt::BinaryProfile BP;
2263
2264 const MCPseudoProbeDecoder *PseudoProbeDecoder =
2265 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
2266
2267 // Fill out the header info.
2268 BP.Header.Version = 1;
2269 BP.Header.FileName = std::string(BC.getFilename());
2270 std::optional<StringRef> BuildID = BC.getFileBuildID();
2271 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
2272 BP.Header.Origin = std::string(getReaderName());
2273 // Only the input binary layout order is supported.
2274 BP.Header.IsDFSOrder = false;
2275 // FIXME: Need to match hash function used to produce BAT hashes.
2276 BP.Header.HashFunction = HashFunction::Default;
2277
2278 ListSeparator LS(",");
2279 raw_string_ostream EventNamesOS(BP.Header.EventNames);
2280 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
2281 EventNamesOS << LS << EventEntry.first().str();
2282
2283 BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_BASIC
2284 : BinaryFunction::PF_BRANCH;
2285
2286 // Add probe inline tree nodes.
2287 YAMLProfileWriter::InlineTreeDesc InlineTree;
2288 if (PseudoProbeDecoder)
2289 std::tie(args&: BP.PseudoProbeDesc, args&: InlineTree) =
2290 YAMLProfileWriter::convertPseudoProbeDesc(PseudoProbeDecoder: *PseudoProbeDecoder);
2291
2292 if (!opts::BasicAggregation) {
2293 // Convert profile for functions not covered by BAT
2294 for (auto &BFI : BC.getBinaryFunctions()) {
2295 BinaryFunction &Function = BFI.second;
2296 if (!Function.hasProfile())
2297 continue;
2298 if (BAT->isBATFunction(Address: Function.getAddress()))
2299 continue;
2300 BP.Functions.emplace_back(args: YAMLProfileWriter::convert(
2301 BF: Function, /*UseDFS=*/false, InlineTree, BAT));
2302 }
2303
2304 for (const auto &KV : NamesToBranches) {
2305 const StringRef FuncName = KV.first;
2306 const FuncBranchData &Branches = KV.second;
2307 yaml::bolt::BinaryFunctionProfile YamlBF;
2308 BinaryData *BD = BC.getBinaryDataByName(Name: FuncName);
2309 assert(BD);
2310 uint64_t FuncAddress = BD->getAddress();
2311 if (!BAT->isBATFunction(Address: FuncAddress))
2312 continue;
2313 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: FuncAddress);
2314 assert(BF);
2315 YamlBF.Name = getLocationName(Func: *BF, BAT);
2316 YamlBF.Id = BF->getFunctionNumber();
2317 YamlBF.Hash = BAT->getBFHash(FuncOutputAddress: FuncAddress);
2318 YamlBF.ExecCount = BF->getKnownExecutionCount();
2319 YamlBF.ExternEntryCount = BF->getExternEntryCount();
2320 YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(OutputAddress: FuncAddress);
2321 const BoltAddressTranslation::BBHashMapTy &BlockMap =
2322 BAT->getBBHashMap(FuncOutputAddress: FuncAddress);
2323 YamlBF.Blocks.resize(new_size: YamlBF.NumBasicBlocks);
2324
2325 for (auto &&[Entry, YamlBB] : llvm::zip(t: BlockMap, u&: YamlBF.Blocks)) {
2326 const auto &Block = Entry.second;
2327 YamlBB.Hash = Block.Hash;
2328 YamlBB.Index = Block.Index;
2329 }
2330
2331 // Lookup containing basic block offset and index
2332 auto getBlock = [&BlockMap](uint32_t Offset) {
2333 auto BlockIt = BlockMap.upper_bound(Offset);
2334 if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) {
2335 errs() << "BOLT-ERROR: invalid BAT section\n";
2336 exit(status: 1);
2337 }
2338 --BlockIt;
2339 return std::pair(BlockIt->first, BlockIt->second.Index);
2340 };
2341
2342 for (const BranchInfo &BI : Branches.Data) {
2343 using namespace yaml::bolt;
2344 const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset);
2345 BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
2346 if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) {
2347 // Internal branch
2348 const unsigned SuccIndex = getBlock(BI.To.Offset).second;
2349 auto &SI = YamlBB.Successors.emplace_back(args: SuccessorInfo{.Index: SuccIndex});
2350 SI.Count = BI.Branches;
2351 SI.Mispreds = BI.Mispreds;
2352 } else {
2353 // Call
2354 const uint32_t Offset = BI.From.Offset - BlockOffset;
2355 auto &CSI = YamlBB.CallSites.emplace_back(args: CallSiteInfo{.Offset: Offset});
2356 CSI.Count = BI.Branches;
2357 CSI.Mispreds = BI.Mispreds;
2358 if (const BinaryData *BD = BC.getBinaryDataByName(Name: BI.To.Name))
2359 YAMLProfileWriter::setCSIDestination(BC, CSI, Symbol: BD->getSymbol(), BAT,
2360 Offset: BI.To.Offset);
2361 }
2362 }
2363 // Set entry counts, similar to DataReader::readProfile.
2364 for (const BranchInfo &BI : Branches.EntryData) {
2365 if (!BlockMap.isInputBlock(InputOffset: BI.To.Offset)) {
2366 if (opts::Verbosity >= 1)
2367 errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName
2368 << " at 0x" << Twine::utohexstr(Val: BI.To.Offset) << '\n';
2369 continue;
2370 }
2371 const unsigned BlockIndex = BlockMap.getBBIndex(BBInputOffset: BI.To.Offset);
2372 YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
2373 }
2374 if (PseudoProbeDecoder) {
2375 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
2376 InlineTreeNodeId;
2377 if (BF->getGUID()) {
2378 std::tie(args&: YamlBF.InlineTree, args&: InlineTreeNodeId) =
2379 YAMLProfileWriter::convertBFInlineTree(Decoder: *PseudoProbeDecoder,
2380 InlineTree, GUID: BF->getGUID());
2381 }
2382 // Fetch probes belonging to all fragments
2383 const AddressProbesMap &ProbeMap =
2384 PseudoProbeDecoder->getAddress2ProbesMap();
2385 BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
2386 Fragments.insert(Ptr: BF);
2387 DenseMap<
2388 uint32_t,
2389 std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
2390 BlockProbes;
2391 for (const BinaryFunction *F : Fragments) {
2392 const uint64_t FuncAddr = F->getAddress();
2393 for (const MCDecodedPseudoProbe &Probe :
2394 ProbeMap.find(From: FuncAddr, To: FuncAddr + F->getSize())) {
2395 const uint32_t OutputAddress = Probe.getAddress();
2396 const uint32_t InputOffset = BAT->translate(
2397 FuncAddress: FuncAddr, Offset: OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
2398 const unsigned BlockIndex = getBlock(InputOffset).second;
2399 BlockProbes[BlockIndex].emplace_back(args: Probe);
2400 }
2401 }
2402
2403 for (auto &[Block, Probes] : BlockProbes) {
2404 YamlBF.Blocks[Block].PseudoProbes =
2405 YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
2406 }
2407 }
2408 // Skip printing if there's no profile data
2409 llvm::erase_if(
2410 C&: YamlBF.Blocks, P: [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2411 auto HasCount = [](const auto &SI) { return SI.Count; };
2412 bool HasAnyCount = YamlBB.ExecCount ||
2413 llvm::any_of(Range: YamlBB.Successors, P: HasCount) ||
2414 llvm::any_of(Range: YamlBB.CallSites, P: HasCount);
2415 return !HasAnyCount;
2416 });
2417 BP.Functions.emplace_back(args&: YamlBF);
2418 }
2419 }
2420
2421 // Write the profile.
2422 yaml::Output Out(OutFile, nullptr, 0);
2423 Out << BP;
2424 return std::error_code();
2425}
2426
2427void DataAggregator::dump() const { DataReader::dump(); }
2428
2429void DataAggregator::dump(const PerfBranchSample &Sample) const {
2430 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2431 for (const LBREntry &LBR : Sample.LBR)
2432 Diag << LBR << '\n';
2433}
2434
2435void DataAggregator::dump(const PerfMemSample &Sample) const {
2436 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2437}
2438

source code of bolt/lib/Profile/DataAggregator.cpp