1//===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions reads profile data written by perf record,
10// aggregate it and then write it back to an output file.
11//
12//===----------------------------------------------------------------------===//
13
14#include "bolt/Profile/DataAggregator.h"
15#include "bolt/Core/BinaryContext.h"
16#include "bolt/Core/BinaryFunction.h"
17#include "bolt/Profile/BoltAddressTranslation.h"
18#include "bolt/Profile/Heatmap.h"
19#include "bolt/Profile/YAMLProfileWriter.h"
20#include "bolt/Utils/CommandLineOpts.h"
21#include "bolt/Utils/Utils.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/ScopeExit.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Errc.h"
27#include "llvm/Support/FileSystem.h"
28#include "llvm/Support/Process.h"
29#include "llvm/Support/Program.h"
30#include "llvm/Support/Regex.h"
31#include "llvm/Support/Timer.h"
32#include "llvm/Support/raw_ostream.h"
33#include <map>
34#include <optional>
35#include <unordered_map>
36#include <utility>
37
38#define DEBUG_TYPE "aggregator"
39
40using namespace llvm;
41using namespace bolt;
42
43namespace opts {
44
45static cl::opt<bool>
46 BasicAggregation("nl",
47 cl::desc("aggregate basic samples (without LBR info)"),
48 cl::cat(AggregatorCategory));
49
50static cl::opt<std::string>
51 ITraceAggregation("itrace",
52 cl::desc("Generate LBR info with perf itrace argument"),
53 cl::cat(AggregatorCategory));
54
55static cl::opt<bool>
56FilterMemProfile("filter-mem-profile",
57 cl::desc("if processing a memory profile, filter out stack or heap accesses "
58 "that won't be useful for BOLT to reduce profile file size"),
59 cl::init(Val: true),
60 cl::cat(AggregatorCategory));
61
62static cl::opt<unsigned long long>
63FilterPID("pid",
64 cl::desc("only use samples from process with specified PID"),
65 cl::init(Val: 0),
66 cl::Optional,
67 cl::cat(AggregatorCategory));
68
69static cl::opt<bool>
70IgnoreBuildID("ignore-build-id",
71 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
72 cl::init(Val: false),
73 cl::cat(AggregatorCategory));
74
75static cl::opt<bool> IgnoreInterruptLBR(
76 "ignore-interrupt-lbr",
77 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
78 cl::init(Val: true), cl::cat(AggregatorCategory));
79
80static cl::opt<unsigned long long>
81MaxSamples("max-samples",
82 cl::init(Val: -1ULL),
83 cl::desc("maximum number of samples to read from LBR profile"),
84 cl::Optional,
85 cl::Hidden,
86 cl::cat(AggregatorCategory));
87
88extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
89extern cl::opt<std::string> SaveProfile;
90
91cl::opt<bool> ReadPreAggregated(
92 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
93 cl::cat(AggregatorCategory));
94
95static cl::opt<bool>
96TimeAggregator("time-aggr",
97 cl::desc("time BOLT aggregator"),
98 cl::init(Val: false),
99 cl::ZeroOrMore,
100 cl::cat(AggregatorCategory));
101
102static cl::opt<bool>
103 UseEventPC("use-event-pc",
104 cl::desc("use event PC in combination with LBR sampling"),
105 cl::cat(AggregatorCategory));
106
107static cl::opt<bool> WriteAutoFDOData(
108 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
109 cl::cat(AggregatorCategory));
110
111} // namespace opts
112
113namespace {
114
115const char TimerGroupName[] = "aggregator";
116const char TimerGroupDesc[] = "Aggregator";
117
118std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
119 std::vector<SectionNameAndRange> sections;
120 for (BinarySection &Section : BC->sections()) {
121 if (!Section.isText())
122 continue;
123 if (Section.getSize() == 0)
124 continue;
125 sections.push_back(
126 x: {.Name: Section.getName(), .BeginAddress: Section.getAddress(), .EndAddress: Section.getEndAddress()});
127 }
128 llvm::sort(C&: sections,
129 Comp: [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
130 return A.BeginAddress < B.BeginAddress;
131 });
132 return sections;
133}
134}
135
136constexpr uint64_t DataAggregator::KernelBaseAddr;
137
138DataAggregator::~DataAggregator() { deleteTempFiles(); }
139
140namespace {
141void deleteTempFile(const std::string &FileName) {
142 if (std::error_code Errc = sys::fs::remove(path: FileName.c_str()))
143 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
144 << " with error " << Errc.message() << "\n";
145}
146}
147
148void DataAggregator::deleteTempFiles() {
149 for (std::string &FileName : TempFiles)
150 deleteTempFile(FileName);
151 TempFiles.clear();
152}
153
154void DataAggregator::findPerfExecutable() {
155 std::optional<std::string> PerfExecutable =
156 sys::Process::FindInEnvPath(EnvName: "PATH", FileName: "perf");
157 if (!PerfExecutable) {
158 outs() << "PERF2BOLT: No perf executable found!\n";
159 exit(status: 1);
160 }
161 PerfPath = *PerfExecutable;
162}
163
164void DataAggregator::start() {
165 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
166
167 // Don't launch perf for pre-aggregated files
168 if (opts::ReadPreAggregated)
169 return;
170
171 findPerfExecutable();
172
173 if (opts::BasicAggregation) {
174 launchPerfProcess(Name: "events without LBR",
175 PPI&: MainEventsPPI,
176 ArgsString: "script -F pid,event,ip",
177 /*Wait = */false);
178 } else if (!opts::ITraceAggregation.empty()) {
179 std::string ItracePerfScriptArgs = llvm::formatv(
180 Fmt: "script -F pid,ip,brstack --itrace={0}", Vals&: opts::ITraceAggregation);
181 launchPerfProcess(Name: "branch events with itrace", PPI&: MainEventsPPI,
182 ArgsString: ItracePerfScriptArgs.c_str(),
183 /*Wait = */ false);
184 } else {
185 launchPerfProcess(Name: "branch events",
186 PPI&: MainEventsPPI,
187 ArgsString: "script -F pid,ip,brstack",
188 /*Wait = */false);
189 }
190
191 // Note: we launch script for mem events regardless of the option, as the
192 // command fails fairly fast if mem events were not collected.
193 launchPerfProcess(Name: "mem events",
194 PPI&: MemEventsPPI,
195 ArgsString: "script -F pid,event,addr,ip",
196 /*Wait = */false);
197
198 launchPerfProcess(Name: "process events", PPI&: MMapEventsPPI,
199 ArgsString: "script --show-mmap-events --no-itrace",
200 /*Wait = */ false);
201
202 launchPerfProcess(Name: "task events", PPI&: TaskEventsPPI,
203 ArgsString: "script --show-task-events --no-itrace",
204 /*Wait = */ false);
205}
206
207void DataAggregator::abort() {
208 if (opts::ReadPreAggregated)
209 return;
210
211 std::string Error;
212
213 // Kill subprocesses in case they are not finished
214 sys::Wait(PI: TaskEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
215 sys::Wait(PI: MMapEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
216 sys::Wait(PI: MainEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
217 sys::Wait(PI: MemEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error);
218
219 deleteTempFiles();
220
221 exit(status: 1);
222}
223
224void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
225 const char *ArgsString, bool Wait) {
226 SmallVector<StringRef, 4> Argv;
227
228 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
229 Argv.push_back(Elt: PerfPath.data());
230
231 StringRef(ArgsString).split(A&: Argv, Separator: ' ');
232 Argv.push_back(Elt: "-f");
233 Argv.push_back(Elt: "-i");
234 Argv.push_back(Elt: Filename.c_str());
235
236 if (std::error_code Errc =
237 sys::fs::createTemporaryFile(Prefix: "perf.script", Suffix: "out", ResultPath&: PPI.StdoutPath)) {
238 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
239 << " with error " << Errc.message() << "\n";
240 exit(status: 1);
241 }
242 TempFiles.push_back(x: PPI.StdoutPath.data());
243
244 if (std::error_code Errc =
245 sys::fs::createTemporaryFile(Prefix: "perf.script", Suffix: "err", ResultPath&: PPI.StderrPath)) {
246 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
247 << " with error " << Errc.message() << "\n";
248 exit(status: 1);
249 }
250 TempFiles.push_back(x: PPI.StderrPath.data());
251
252 std::optional<StringRef> Redirects[] = {
253 std::nullopt, // Stdin
254 StringRef(PPI.StdoutPath.data()), // Stdout
255 StringRef(PPI.StderrPath.data())}; // Stderr
256
257 LLVM_DEBUG({
258 dbgs() << "Launching perf: ";
259 for (StringRef Arg : Argv)
260 dbgs() << Arg << " ";
261 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
262 << "\n";
263 });
264
265 if (Wait)
266 PPI.PI.ReturnCode = sys::ExecuteAndWait(Program: PerfPath.data(), Args: Argv,
267 /*envp*/ Env: std::nullopt, Redirects);
268 else
269 PPI.PI = sys::ExecuteNoWait(Program: PerfPath.data(), Args: Argv, /*envp*/ Env: std::nullopt,
270 Redirects);
271}
272
273void DataAggregator::processFileBuildID(StringRef FileBuildID) {
274 PerfProcessInfo BuildIDProcessInfo;
275 launchPerfProcess(Name: "buildid list",
276 PPI&: BuildIDProcessInfo,
277 ArgsString: "buildid-list",
278 /*Wait = */true);
279
280 if (BuildIDProcessInfo.PI.ReturnCode != 0) {
281 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
282 MemoryBuffer::getFileOrSTDIN(Filename: BuildIDProcessInfo.StderrPath.data());
283 StringRef ErrBuf = (*MB)->getBuffer();
284
285 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
286 << '\n';
287 errs() << ErrBuf;
288 return;
289 }
290
291 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
292 MemoryBuffer::getFileOrSTDIN(Filename: BuildIDProcessInfo.StdoutPath.data());
293 if (std::error_code EC = MB.getError()) {
294 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
295 << EC.message() << "\n";
296 return;
297 }
298
299 FileBuf = std::move(*MB);
300 ParsingBuf = FileBuf->getBuffer();
301
302 std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
303 if (!FileName) {
304 if (hasAllBuildIDs()) {
305 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
306 "This indicates the input binary supplied for data aggregation "
307 "is not the same recorded by perf when collecting profiling "
308 "data, or there were no samples recorded for the binary. "
309 "Use -ignore-build-id option to override.\n";
310 if (!opts::IgnoreBuildID)
311 abort();
312 } else {
313 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
314 "data was recorded without it\n";
315 return;
316 }
317 } else if (*FileName != llvm::sys::path::filename(path: BC->getFilename())) {
318 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
319 BuildIDBinaryName = std::string(*FileName);
320 } else {
321 outs() << "PERF2BOLT: matched build-id and file name\n";
322 }
323}
324
325bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
326 if (opts::ReadPreAggregated)
327 return true;
328
329 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(Name: FileName);
330 if (!FD) {
331 consumeError(Err: FD.takeError());
332 return false;
333 }
334
335 char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
336
337 auto Close = make_scope_exit(F: [&] { sys::fs::closeFile(F&: *FD); });
338 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
339 FileHandle: *FD, Buf: MutableArrayRef(Buf, sizeof(Buf)), Offset: 0);
340 if (!BytesRead) {
341 consumeError(Err: BytesRead.takeError());
342 return false;
343 }
344
345 if (*BytesRead != 7)
346 return false;
347
348 if (strncmp(s1: Buf, s2: "PERFILE", n: 7) == 0)
349 return true;
350 return false;
351}
352
353void DataAggregator::parsePreAggregated() {
354 std::string Error;
355
356 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
357 MemoryBuffer::getFileOrSTDIN(Filename);
358 if (std::error_code EC = MB.getError()) {
359 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
360 << EC.message() << "\n";
361 exit(status: 1);
362 }
363
364 FileBuf = std::move(*MB);
365 ParsingBuf = FileBuf->getBuffer();
366 Col = 0;
367 Line = 1;
368 if (parsePreAggregatedLBRSamples()) {
369 errs() << "PERF2BOLT: failed to parse samples\n";
370 exit(status: 1);
371 }
372}
373
374std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
375 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
376 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
377 TimerGroupDesc, opts::TimeAggregator);
378
379 std::error_code EC;
380 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
381 if (EC)
382 return EC;
383
384 // Format:
385 // number of unique traces
386 // from_1-to_1:count_1
387 // from_2-to_2:count_2
388 // ......
389 // from_n-to_n:count_n
390 // number of unique sample addresses
391 // addr_1:count_1
392 // addr_2:count_2
393 // ......
394 // addr_n:count_n
395 // number of unique LBR entries
396 // src_1->dst_1:count_1
397 // src_2->dst_2:count_2
398 // ......
399 // src_n->dst_n:count_n
400
401 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
402
403 // AutoFDO addresses are relative to the first allocated loadable program
404 // segment
405 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
406 if (Address < FirstAllocAddress)
407 return 0;
408 return Address - FirstAllocAddress;
409 };
410
411 OutFile << FallthroughLBRs.size() << "\n";
412 for (const auto &[Trace, Info] : FallthroughLBRs) {
413 OutFile << formatv(Fmt: "{0:x-}-{1:x-}:{2}\n", Vals: filterAddress(Trace.From),
414 Vals: filterAddress(Trace.To),
415 Vals: Info.InternCount + Info.ExternCount);
416 }
417
418 OutFile << BasicSamples.size() << "\n";
419 for (const auto [PC, HitCount] : BasicSamples)
420 OutFile << formatv(Fmt: "{0:x-}:{1}\n", Vals: filterAddress(PC), Vals: HitCount);
421
422 OutFile << BranchLBRs.size() << "\n";
423 for (const auto &[Trace, Info] : BranchLBRs) {
424 OutFile << formatv(Fmt: "{0:x-}->{1:x-}:{2}\n", Vals: filterAddress(Trace.From),
425 Vals: filterAddress(Trace.To), Vals: Info.TakenCount);
426 }
427
428 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
429 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
430 << " unique branches to " << OutputFilename << "\n";
431
432 return std::error_code();
433}
434
435void DataAggregator::filterBinaryMMapInfo() {
436 if (opts::FilterPID) {
437 auto MMapInfoIter = BinaryMMapInfo.find(x: opts::FilterPID);
438 if (MMapInfoIter != BinaryMMapInfo.end()) {
439 MMapInfo MMap = MMapInfoIter->second;
440 BinaryMMapInfo.clear();
441 BinaryMMapInfo.insert(x: std::make_pair(x&: MMap.PID, y&: MMap));
442 } else {
443 if (errs().has_colors())
444 errs().changeColor(Color: raw_ostream::RED);
445 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
446 << opts::FilterPID << "\""
447 << " for binary \"" << BC->getFilename() << "\".";
448 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
449 errs() << " Profile for the following process is available:\n";
450 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
451 outs() << " " << MMI.second.PID
452 << (MMI.second.Forked ? " (forked)\n" : "\n");
453
454 if (errs().has_colors())
455 errs().resetColor();
456
457 exit(status: 1);
458 }
459 }
460}
461
462int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
463 PerfProcessErrorCallbackTy Callback) {
464 std::string Error;
465 outs() << "PERF2BOLT: waiting for perf " << Name
466 << " collection to finish...\n";
467 sys::ProcessInfo PI = sys::Wait(PI: Process.PI, SecondsToWait: std::nullopt, ErrMsg: &Error);
468
469 if (!Error.empty()) {
470 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
471 deleteTempFiles();
472 exit(status: 1);
473 }
474
475 if (PI.ReturnCode != 0) {
476 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
477 MemoryBuffer::getFileOrSTDIN(Filename: Process.StderrPath.data());
478 StringRef ErrBuf = (*ErrorMB)->getBuffer();
479
480 deleteTempFiles();
481 Callback(PI.ReturnCode, ErrBuf);
482 return PI.ReturnCode;
483 }
484
485 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
486 MemoryBuffer::getFileOrSTDIN(Filename: Process.StdoutPath.data());
487 if (std::error_code EC = MB.getError()) {
488 errs() << "Cannot open " << Process.StdoutPath.data() << ": "
489 << EC.message() << "\n";
490 deleteTempFiles();
491 exit(status: 1);
492 }
493
494 FileBuf = std::move(*MB);
495 ParsingBuf = FileBuf->getBuffer();
496 Col = 0;
497 Line = 1;
498 return PI.ReturnCode;
499}
500
501Error DataAggregator::preprocessProfile(BinaryContext &BC) {
502 this->BC = &BC;
503
504 if (opts::ReadPreAggregated) {
505 parsePreAggregated();
506 return Error::success();
507 }
508
509 if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
510 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
511 processFileBuildID(FileBuildID: *FileBuildID);
512 } else {
513 errs() << "BOLT-WARNING: build-id will not be checked because we could "
514 "not read one from input binary\n";
515 }
516
517 auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
518 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
519 exit(status: 1);
520 };
521
522 auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
523 Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
524 "Cannot print 'addr' field.");
525 if (!NoData.match(String: ErrBuf))
526 ErrorCallback(ReturnCode, ErrBuf);
527 };
528
529 if (BC.IsLinuxKernel) {
530 // Current MMap parsing logic does not work with linux kernel.
531 // MMap entries for linux kernel uses PERF_RECORD_MMAP
532 // format instead of typical PERF_RECORD_MMAP2 format.
533 // Since linux kernel address mapping is absolute (same as
534 // in the ELF file), we avoid parsing MMap in linux kernel mode.
535 // While generating optimized linux kernel binary, we may need
536 // to parse MMap entries.
537
538 // In linux kernel mode, we analyze and optimize
539 // all linux kernel binary instructions, irrespective
540 // of whether they are due to system calls or due to
541 // interrupts. Therefore, we cannot ignore interrupt
542 // in Linux kernel mode.
543 opts::IgnoreInterruptLBR = false;
544 } else {
545 prepareToParse(Name: "mmap events", Process&: MMapEventsPPI, Callback: ErrorCallback);
546 if (parseMMapEvents())
547 errs() << "PERF2BOLT: failed to parse mmap events\n";
548 }
549
550 prepareToParse(Name: "task events", Process&: TaskEventsPPI, Callback: ErrorCallback);
551 if (parseTaskEvents())
552 errs() << "PERF2BOLT: failed to parse task events\n";
553
554 filterBinaryMMapInfo();
555 prepareToParse(Name: "events", Process&: MainEventsPPI, Callback: ErrorCallback);
556
557 if (opts::HeatmapMode) {
558 if (std::error_code EC = printLBRHeatMap()) {
559 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
560 exit(status: 1);
561 }
562 exit(status: 0);
563 }
564
565 if ((!opts::BasicAggregation && parseBranchEvents()) ||
566 (opts::BasicAggregation && parseBasicEvents()))
567 errs() << "PERF2BOLT: failed to parse samples\n";
568
569 // We can finish early if the goal is just to generate data for autofdo
570 if (opts::WriteAutoFDOData) {
571 if (std::error_code EC = writeAutoFDOData(OutputFilename: opts::OutputFilename))
572 errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
573
574 deleteTempFiles();
575 exit(status: 0);
576 }
577
578 // Special handling for memory events
579 if (prepareToParse(Name: "mem events", Process&: MemEventsPPI, Callback: MemEventsErrorCallback))
580 return Error::success();
581
582 if (const std::error_code EC = parseMemEvents())
583 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
584 << '\n';
585
586 deleteTempFiles();
587
588 return Error::success();
589}
590
591Error DataAggregator::readProfile(BinaryContext &BC) {
592 processProfile(BC);
593
594 for (auto &BFI : BC.getBinaryFunctions()) {
595 BinaryFunction &Function = BFI.second;
596 convertBranchData(BF&: Function);
597 }
598
599 if (opts::AggregateOnly) {
600 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
601 if (std::error_code EC = writeAggregatedFile(OutputFilename: opts::OutputFilename))
602 report_error(Message: "cannot create output data file", EC);
603
604 // BAT YAML is handled by DataAggregator since normal YAML output requires
605 // CFG which is not available in BAT mode.
606 if (usesBAT()) {
607 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
608 if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::OutputFilename))
609 report_error(Message: "cannot create output data file", EC);
610 if (!opts::SaveProfile.empty())
611 if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::SaveProfile))
612 report_error(Message: "cannot create output data file", EC);
613 }
614 }
615
616 return Error::success();
617}
618
619bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
620 return Function.hasProfileAvailable();
621}
622
623void DataAggregator::processProfile(BinaryContext &BC) {
624 if (opts::ReadPreAggregated)
625 processPreAggregated();
626 else if (opts::BasicAggregation)
627 processBasicEvents();
628 else
629 processBranchEvents();
630
631 processMemEvents();
632
633 // Mark all functions with registered events as having a valid profile.
634 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
635 : BinaryFunction::PF_LBR;
636 for (auto &BFI : BC.getBinaryFunctions()) {
637 BinaryFunction &BF = BFI.second;
638 if (getBranchData(BF) || getFuncSampleData(FuncNames: BF.getNames()))
639 BF.markProfiled(Flags);
640 }
641
642 for (auto &FuncBranches : NamesToBranches)
643 llvm::stable_sort(Range&: FuncBranches.second.Data);
644
645 for (auto &MemEvents : NamesToMemEvents)
646 llvm::stable_sort(Range&: MemEvents.second.Data);
647
648 // Release intermediate storage.
649 clear(Container&: BranchLBRs);
650 clear(Container&: FallthroughLBRs);
651 clear(Container&: AggregatedLBRs);
652 clear(Container&: BasicSamples);
653 clear(Container&: MemSamples);
654}
655
656BinaryFunction *
657DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
658 if (!BC->containsAddress(Address))
659 return nullptr;
660
661 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
662 /*UseMaxSize=*/true);
663}
664
665BinaryFunction *
666DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
667 if (BAT)
668 if (const uint64_t HotAddr = BAT->fetchParentAddress(Address: Func.getAddress()))
669 return getBinaryFunctionContainingAddress(Address: HotAddr);
670 return nullptr;
671}
672
673StringRef DataAggregator::getLocationName(const BinaryFunction &Func) const {
674 if (!BAT)
675 return Func.getOneName();
676
677 const BinaryFunction *OrigFunc = &Func;
678 // If it is a local function, prefer the name containing the file name where
679 // the local function was declared
680 for (StringRef AlternativeName : OrigFunc->getNames()) {
681 size_t FileNameIdx = AlternativeName.find(C: '/');
682 // Confirm the alternative name has the pattern Symbol/FileName/1 before
683 // using it
684 if (FileNameIdx == StringRef::npos ||
685 AlternativeName.find(C: '/', From: FileNameIdx + 1) == StringRef::npos)
686 continue;
687 return AlternativeName;
688 }
689 return OrigFunc->getOneName();
690}
691
692bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
693 uint64_t Count) {
694 BinaryFunction *ParentFunc = getBATParentFunction(Func: OrigFunc);
695 BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
696 if (ParentFunc)
697 NumColdSamples += Count;
698
699 auto I = NamesToSamples.find(x: Func.getOneName());
700 if (I == NamesToSamples.end()) {
701 bool Success;
702 StringRef LocName = getLocationName(Func);
703 std::tie(args&: I, args&: Success) = NamesToSamples.insert(
704 x: std::make_pair(x: Func.getOneName(),
705 y: FuncSampleData(LocName, FuncSampleData::ContainerTy())));
706 }
707
708 Address -= Func.getAddress();
709 if (BAT)
710 Address = BAT->translate(FuncAddress: Func.getAddress(), Offset: Address, /*IsBranchSrc=*/false);
711
712 I->second.bumpCount(Offset: Address, Count);
713 return true;
714}
715
716bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
717 uint64_t To, uint64_t Count,
718 uint64_t Mispreds) {
719 FuncBranchData *AggrData = getBranchData(BF: Func);
720 if (!AggrData) {
721 AggrData = &NamesToBranches[Func.getOneName()];
722 AggrData->Name = getLocationName(Func);
723 setBranchData(BF: Func, FBD: AggrData);
724 }
725
726 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
727 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
728 AggrData->bumpBranchCount(OffsetFrom: From, OffsetTo: To, Count, Mispreds);
729 return true;
730}
731
732bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
733 BinaryFunction *ToFunc, uint64_t From,
734 uint64_t To, uint64_t Count,
735 uint64_t Mispreds) {
736 FuncBranchData *FromAggrData = nullptr;
737 FuncBranchData *ToAggrData = nullptr;
738 StringRef SrcFunc;
739 StringRef DstFunc;
740 if (FromFunc) {
741 SrcFunc = getLocationName(Func: *FromFunc);
742 FromAggrData = getBranchData(BF: *FromFunc);
743 if (!FromAggrData) {
744 FromAggrData = &NamesToBranches[FromFunc->getOneName()];
745 FromAggrData->Name = SrcFunc;
746 setBranchData(BF: *FromFunc, FBD: FromAggrData);
747 }
748
749 recordExit(BF&: *FromFunc, From, Mispred: Mispreds, Count);
750 }
751 if (ToFunc) {
752 DstFunc = getLocationName(Func: *ToFunc);
753 ToAggrData = getBranchData(BF: *ToFunc);
754 if (!ToAggrData) {
755 ToAggrData = &NamesToBranches[ToFunc->getOneName()];
756 ToAggrData->Name = DstFunc;
757 setBranchData(BF: *ToFunc, FBD: ToAggrData);
758 }
759
760 recordEntry(BF&: *ToFunc, To, Mispred: Mispreds, Count);
761 }
762
763 if (FromAggrData)
764 FromAggrData->bumpCallCount(OffsetFrom: From, To: Location(!DstFunc.empty(), DstFunc, To),
765 Count, Mispreds);
766 if (ToAggrData)
767 ToAggrData->bumpEntryCount(From: Location(!SrcFunc.empty(), SrcFunc, From), OffsetTo: To,
768 Count, Mispreds);
769 return true;
770}
771
772bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
773 uint64_t Mispreds) {
774 auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
775 if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: Addr)) {
776 Addr -= Func->getAddress();
777
778 if (BAT)
779 Addr = BAT->translate(FuncAddress: Func->getAddress(), Offset: Addr, IsBranchSrc: IsFrom);
780
781 if (BinaryFunction *ParentFunc = getBATParentFunction(Func: *Func)) {
782 Func = ParentFunc;
783 if (IsFrom)
784 NumColdSamples += Count;
785 }
786
787 return Func;
788 }
789 return nullptr;
790 };
791
792 BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
793 BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
794 if (!FromFunc && !ToFunc)
795 return false;
796
797 // Treat recursive control transfers as inter-branches.
798 if (FromFunc == ToFunc && To != 0) {
799 recordBranch(BF&: *FromFunc, From, To, Count, Mispreds);
800 return doIntraBranch(Func&: *FromFunc, From, To, Count, Mispreds);
801 }
802
803 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
804}
805
806bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
807 uint64_t Count) {
808 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(Address: First.To);
809 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Address: Second.From);
810 if (!FromFunc || !ToFunc) {
811 LLVM_DEBUG({
812 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
813 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
814 << " and ending in " << ToFunc->getPrintName()
815 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
816 });
817 NumLongRangeTraces += Count;
818 return false;
819 }
820 if (FromFunc != ToFunc) {
821 NumInvalidTraces += Count;
822 LLVM_DEBUG({
823 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
824 << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
825 << " and ending in " << ToFunc->getPrintName()
826 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
827 });
828 return false;
829 }
830
831 std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
832 BAT ? BAT->getFallthroughsInTrace(FuncAddress: FromFunc->getAddress(), From: First.To,
833 To: Second.From)
834 : getFallthroughsInTrace(BF&: *FromFunc, First, Second, Count);
835 if (!FTs) {
836 LLVM_DEBUG(
837 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
838 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
839 << " and ending in " << ToFunc->getPrintName() << " @ "
840 << ToFunc->getPrintName() << " @ "
841 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
842 NumInvalidTraces += Count;
843 return false;
844 }
845
846 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
847 << FromFunc->getPrintName() << ":"
848 << Twine::utohexstr(First.To) << " to "
849 << Twine::utohexstr(Second.From) << ".\n");
850 BinaryFunction *ParentFunc = getBATParentFunction(Func: *FromFunc);
851 for (auto [From, To] : *FTs) {
852 if (BAT) {
853 From = BAT->translate(FuncAddress: FromFunc->getAddress(), Offset: From, /*IsBranchSrc=*/true);
854 To = BAT->translate(FuncAddress: FromFunc->getAddress(), Offset: To, /*IsBranchSrc=*/false);
855 }
856 doIntraBranch(Func&: ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, Mispreds: false);
857 }
858
859 return true;
860}
861
862bool DataAggregator::recordTrace(
863 BinaryFunction &BF, const LBREntry &FirstLBR, const LBREntry &SecondLBR,
864 uint64_t Count,
865 SmallVector<std::pair<uint64_t, uint64_t>, 16> &Branches) const {
866 BinaryContext &BC = BF.getBinaryContext();
867
868 if (!BF.isSimple())
869 return false;
870
871 assert(BF.hasCFG() && "can only record traces in CFG state");
872
873 // Offsets of the trace within this function.
874 const uint64_t From = FirstLBR.To - BF.getAddress();
875 const uint64_t To = SecondLBR.From - BF.getAddress();
876
877 if (From > To)
878 return false;
879
880 const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: From);
881 const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(Offset: To);
882
883 if (!FromBB || !ToBB)
884 return false;
885
886 // Adjust FromBB if the first LBR is a return from the last instruction in
887 // the previous block (that instruction should be a call).
888 if (From == FromBB->getOffset() && !BF.containsAddress(PC: FirstLBR.From) &&
889 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
890 const BinaryBasicBlock *PrevBB =
891 BF.getLayout().getBlock(Index: FromBB->getIndex() - 1);
892 if (PrevBB->getSuccessor(Label: FromBB->getLabel())) {
893 const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
894 if (Instr && BC.MIB->isCall(Inst: *Instr))
895 FromBB = PrevBB;
896 else
897 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
898 << '\n');
899 } else {
900 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
901 }
902 }
903
904 // Fill out information for fall-through edges. The From and To could be
905 // within the same basic block, e.g. when two call instructions are in the
906 // same block. In this case we skip the processing.
907 if (FromBB == ToBB)
908 return true;
909
910 // Process blocks in the original layout order.
911 BinaryBasicBlock *BB = BF.getLayout().getBlock(Index: FromBB->getIndex());
912 assert(BB == FromBB && "index mismatch");
913 while (BB != ToBB) {
914 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(Index: BB->getIndex() + 1);
915 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
916
917 // Check for bad LBRs.
918 if (!BB->getSuccessor(Label: NextBB->getLabel())) {
919 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
920 << " " << FirstLBR << '\n'
921 << " " << SecondLBR << '\n');
922 return false;
923 }
924
925 const MCInst *Instr = BB->getLastNonPseudoInstr();
926 uint64_t Offset = 0;
927 if (Instr)
928 Offset = BC.MIB->getOffsetWithDefault(Inst: *Instr, Default: 0);
929 else
930 Offset = BB->getOffset();
931
932 Branches.emplace_back(Args&: Offset, Args: NextBB->getOffset());
933
934 BB = NextBB;
935 }
936
937 // Record fall-through jumps
938 for (const auto &[FromOffset, ToOffset] : Branches) {
939 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: FromOffset);
940 BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(Offset: ToOffset);
941 assert(FromBB && ToBB);
942 BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(Succ: *ToBB);
943 BI.Count += Count;
944 }
945
946 return true;
947}
948
949std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
950DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
951 const LBREntry &FirstLBR,
952 const LBREntry &SecondLBR,
953 uint64_t Count) const {
954 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
955
956 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, Branches&: Res))
957 return std::nullopt;
958
959 return Res;
960}
961
962bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
963 uint64_t Count) const {
964 if (To > BF.getSize())
965 return false;
966
967 if (!BF.hasProfile())
968 BF.ExecutionCount = 0;
969
970 BinaryBasicBlock *EntryBB = nullptr;
971 if (To == 0) {
972 BF.ExecutionCount += Count;
973 if (!BF.empty())
974 EntryBB = &BF.front();
975 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(Offset: To)) {
976 if (BB->isEntryPoint())
977 EntryBB = BB;
978 }
979
980 if (EntryBB)
981 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
982
983 return true;
984}
985
986bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
987 uint64_t Count) const {
988 if (!BF.isSimple() || From > BF.getSize())
989 return false;
990
991 if (!BF.hasProfile())
992 BF.ExecutionCount = 0;
993
994 return true;
995}
996
997ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
998 LBREntry Res;
999 ErrorOr<StringRef> FromStrRes = parseString(EndChar: '/');
1000 if (std::error_code EC = FromStrRes.getError())
1001 return EC;
1002 StringRef OffsetStr = FromStrRes.get();
1003 if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.From)) {
1004 reportError(ErrorMsg: "expected hexadecimal number with From address");
1005 Diag << "Found: " << OffsetStr << "\n";
1006 return make_error_code(E: llvm::errc::io_error);
1007 }
1008
1009 ErrorOr<StringRef> ToStrRes = parseString(EndChar: '/');
1010 if (std::error_code EC = ToStrRes.getError())
1011 return EC;
1012 OffsetStr = ToStrRes.get();
1013 if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.To)) {
1014 reportError(ErrorMsg: "expected hexadecimal number with To address");
1015 Diag << "Found: " << OffsetStr << "\n";
1016 return make_error_code(E: llvm::errc::io_error);
1017 }
1018
1019 ErrorOr<StringRef> MispredStrRes = parseString(EndChar: '/');
1020 if (std::error_code EC = MispredStrRes.getError())
1021 return EC;
1022 StringRef MispredStr = MispredStrRes.get();
1023 if (MispredStr.size() != 1 ||
1024 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1025 reportError(ErrorMsg: "expected single char for mispred bit");
1026 Diag << "Found: " << MispredStr << "\n";
1027 return make_error_code(E: llvm::errc::io_error);
1028 }
1029 Res.Mispred = MispredStr[0] == 'M';
1030
1031 static bool MispredWarning = true;
1032 if (MispredStr[0] == '-' && MispredWarning) {
1033 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1034 MispredWarning = false;
1035 }
1036
1037 ErrorOr<StringRef> Rest = parseString(EndChar: FieldSeparator, EndNl: true);
1038 if (std::error_code EC = Rest.getError())
1039 return EC;
1040 if (Rest.get().size() < 5) {
1041 reportError(ErrorMsg: "expected rest of LBR entry");
1042 Diag << "Found: " << Rest.get() << "\n";
1043 return make_error_code(E: llvm::errc::io_error);
1044 }
1045 return Res;
1046}
1047
1048bool DataAggregator::checkAndConsumeFS() {
1049 if (ParsingBuf[0] != FieldSeparator)
1050 return false;
1051
1052 ParsingBuf = ParsingBuf.drop_front(N: 1);
1053 Col += 1;
1054 return true;
1055}
1056
1057void DataAggregator::consumeRestOfLine() {
1058 size_t LineEnd = ParsingBuf.find_first_of(C: '\n');
1059 if (LineEnd == StringRef::npos) {
1060 ParsingBuf = StringRef();
1061 Col = 0;
1062 Line += 1;
1063 return;
1064 }
1065 ParsingBuf = ParsingBuf.drop_front(N: LineEnd + 1);
1066 Col = 0;
1067 Line += 1;
1068}
1069
1070bool DataAggregator::checkNewLine() {
1071 return ParsingBuf[0] == '\n';
1072}
1073
1074ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1075 PerfBranchSample Res;
1076
1077 while (checkAndConsumeFS()) {
1078 }
1079
1080 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1081 if (std::error_code EC = PIDRes.getError())
1082 return EC;
1083 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1084 if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) {
1085 consumeRestOfLine();
1086 return make_error_code(E: errc::no_such_process);
1087 }
1088
1089 while (checkAndConsumeFS()) {
1090 }
1091
1092 ErrorOr<uint64_t> PCRes = parseHexField(EndChar: FieldSeparator, EndNl: true);
1093 if (std::error_code EC = PCRes.getError())
1094 return EC;
1095 Res.PC = PCRes.get();
1096
1097 if (checkAndConsumeNewLine())
1098 return Res;
1099
1100 while (!checkAndConsumeNewLine()) {
1101 checkAndConsumeFS();
1102
1103 ErrorOr<LBREntry> LBRRes = parseLBREntry();
1104 if (std::error_code EC = LBRRes.getError())
1105 return EC;
1106 LBREntry LBR = LBRRes.get();
1107 if (ignoreKernelInterrupt(LBR))
1108 continue;
1109 if (!BC->HasFixedLoadAddress)
1110 adjustLBR(LBR, MMI: MMapInfoIter->second);
1111 Res.LBR.push_back(Elt: LBR);
1112 }
1113
1114 return Res;
1115}
1116
1117ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1118 while (checkAndConsumeFS()) {
1119 }
1120
1121 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1122 if (std::error_code EC = PIDRes.getError())
1123 return EC;
1124
1125 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1126 if (MMapInfoIter == BinaryMMapInfo.end()) {
1127 consumeRestOfLine();
1128 return PerfBasicSample{.EventName: StringRef(), .PC: 0};
1129 }
1130
1131 while (checkAndConsumeFS()) {
1132 }
1133
1134 ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator);
1135 if (std::error_code EC = Event.getError())
1136 return EC;
1137
1138 while (checkAndConsumeFS()) {
1139 }
1140
1141 ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator, EndNl: true);
1142 if (std::error_code EC = AddrRes.getError())
1143 return EC;
1144
1145 if (!checkAndConsumeNewLine()) {
1146 reportError(ErrorMsg: "expected end of line");
1147 return make_error_code(E: llvm::errc::io_error);
1148 }
1149
1150 uint64_t Address = *AddrRes;
1151 if (!BC->HasFixedLoadAddress)
1152 adjustAddress(Address, MMI: MMapInfoIter->second);
1153
1154 return PerfBasicSample{.EventName: Event.get(), .PC: Address};
1155}
1156
1157ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1158 PerfMemSample Res{.PC: 0, .Addr: 0};
1159
1160 while (checkAndConsumeFS()) {
1161 }
1162
1163 ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1164 if (std::error_code EC = PIDRes.getError())
1165 return EC;
1166
1167 auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes);
1168 if (MMapInfoIter == BinaryMMapInfo.end()) {
1169 consumeRestOfLine();
1170 return Res;
1171 }
1172
1173 while (checkAndConsumeFS()) {
1174 }
1175
1176 ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator);
1177 if (std::error_code EC = Event.getError())
1178 return EC;
1179 if (!Event.get().contains(Other: "mem-loads")) {
1180 consumeRestOfLine();
1181 return Res;
1182 }
1183
1184 while (checkAndConsumeFS()) {
1185 }
1186
1187 ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator);
1188 if (std::error_code EC = AddrRes.getError())
1189 return EC;
1190
1191 while (checkAndConsumeFS()) {
1192 }
1193
1194 ErrorOr<uint64_t> PCRes = parseHexField(EndChar: FieldSeparator, EndNl: true);
1195 if (std::error_code EC = PCRes.getError()) {
1196 consumeRestOfLine();
1197 return EC;
1198 }
1199
1200 if (!checkAndConsumeNewLine()) {
1201 reportError(ErrorMsg: "expected end of line");
1202 return make_error_code(E: llvm::errc::io_error);
1203 }
1204
1205 uint64_t Address = *AddrRes;
1206 if (!BC->HasFixedLoadAddress)
1207 adjustAddress(Address, MMI: MMapInfoIter->second);
1208
1209 return PerfMemSample{.PC: PCRes.get(), .Addr: Address};
1210}
1211
1212ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1213 auto parseOffset = [this]() -> ErrorOr<Location> {
1214 ErrorOr<uint64_t> Res = parseHexField(EndChar: FieldSeparator);
1215 if (std::error_code EC = Res.getError())
1216 return EC;
1217 return Location(Res.get());
1218 };
1219
1220 size_t Sep = ParsingBuf.find_first_of(Chars: " \n");
1221 if (Sep == StringRef::npos)
1222 return parseOffset();
1223 StringRef LookAhead = ParsingBuf.substr(Start: 0, N: Sep);
1224 if (LookAhead.find_first_of(Chars: ":") == StringRef::npos)
1225 return parseOffset();
1226
1227 ErrorOr<StringRef> BuildID = parseString(EndChar: ':');
1228 if (std::error_code EC = BuildID.getError())
1229 return EC;
1230 ErrorOr<uint64_t> Offset = parseHexField(EndChar: FieldSeparator);
1231 if (std::error_code EC = Offset.getError())
1232 return EC;
1233 return Location(true, BuildID.get(), Offset.get());
1234}
1235
1236ErrorOr<DataAggregator::AggregatedLBREntry>
1237DataAggregator::parseAggregatedLBREntry() {
1238 while (checkAndConsumeFS()) {
1239 }
1240
1241 ErrorOr<StringRef> TypeOrErr = parseString(EndChar: FieldSeparator);
1242 if (std::error_code EC = TypeOrErr.getError())
1243 return EC;
1244 auto Type = AggregatedLBREntry::BRANCH;
1245 if (TypeOrErr.get() == "B") {
1246 Type = AggregatedLBREntry::BRANCH;
1247 } else if (TypeOrErr.get() == "F") {
1248 Type = AggregatedLBREntry::FT;
1249 } else if (TypeOrErr.get() == "f") {
1250 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1251 } else {
1252 reportError(ErrorMsg: "expected B, F or f");
1253 return make_error_code(E: llvm::errc::io_error);
1254 }
1255
1256 while (checkAndConsumeFS()) {
1257 }
1258 ErrorOr<Location> From = parseLocationOrOffset();
1259 if (std::error_code EC = From.getError())
1260 return EC;
1261
1262 while (checkAndConsumeFS()) {
1263 }
1264 ErrorOr<Location> To = parseLocationOrOffset();
1265 if (std::error_code EC = To.getError())
1266 return EC;
1267
1268 while (checkAndConsumeFS()) {
1269 }
1270 ErrorOr<int64_t> Frequency =
1271 parseNumberField(EndChar: FieldSeparator, EndNl: Type != AggregatedLBREntry::BRANCH);
1272 if (std::error_code EC = Frequency.getError())
1273 return EC;
1274
1275 uint64_t Mispreds = 0;
1276 if (Type == AggregatedLBREntry::BRANCH) {
1277 while (checkAndConsumeFS()) {
1278 }
1279 ErrorOr<int64_t> MispredsOrErr = parseNumberField(EndChar: FieldSeparator, EndNl: true);
1280 if (std::error_code EC = MispredsOrErr.getError())
1281 return EC;
1282 Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1283 }
1284
1285 if (!checkAndConsumeNewLine()) {
1286 reportError(ErrorMsg: "expected end of line");
1287 return make_error_code(E: llvm::errc::io_error);
1288 }
1289
1290 return AggregatedLBREntry{.From: From.get(), .To: To.get(),
1291 .Count: static_cast<uint64_t>(Frequency.get()), .Mispreds: Mispreds,
1292 .EntryType: Type};
1293}
1294
1295bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1296 return opts::IgnoreInterruptLBR &&
1297 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1298}
1299
1300std::error_code DataAggregator::printLBRHeatMap() {
1301 outs() << "PERF2BOLT: parse branch events...\n";
1302 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1303 TimerGroupDesc, opts::TimeAggregator);
1304
1305 if (BC->IsLinuxKernel) {
1306 opts::HeatmapMaxAddress = 0xffffffffffffffff;
1307 opts::HeatmapMinAddress = KernelBaseAddr;
1308 }
1309 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1310 opts::HeatmapMaxAddress, getTextSections(BC));
1311 uint64_t NumTotalSamples = 0;
1312
1313 if (opts::BasicAggregation) {
1314 while (hasData()) {
1315 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1316 if (std::error_code EC = SampleRes.getError()) {
1317 if (EC == errc::no_such_process)
1318 continue;
1319 return EC;
1320 }
1321 PerfBasicSample &Sample = SampleRes.get();
1322 HM.registerAddress(Address: Sample.PC);
1323 NumTotalSamples++;
1324 }
1325 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1326 } else {
1327 while (hasData()) {
1328 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1329 if (std::error_code EC = SampleRes.getError()) {
1330 if (EC == errc::no_such_process)
1331 continue;
1332 return EC;
1333 }
1334
1335 PerfBranchSample &Sample = SampleRes.get();
1336
1337 // LBRs are stored in reverse execution order. NextLBR refers to the next
1338 // executed branch record.
1339 const LBREntry *NextLBR = nullptr;
1340 for (const LBREntry &LBR : Sample.LBR) {
1341 if (NextLBR) {
1342 // Record fall-through trace.
1343 const uint64_t TraceFrom = LBR.To;
1344 const uint64_t TraceTo = NextLBR->From;
1345 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1346 }
1347 NextLBR = &LBR;
1348 }
1349 if (!Sample.LBR.empty()) {
1350 HM.registerAddress(Address: Sample.LBR.front().To);
1351 HM.registerAddress(Address: Sample.LBR.back().From);
1352 }
1353 NumTotalSamples += Sample.LBR.size();
1354 }
1355 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1356 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1357 }
1358
1359 if (!NumTotalSamples) {
1360 if (opts::BasicAggregation) {
1361 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1362 "Cannot build heatmap.";
1363 } else {
1364 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1365 "Cannot build heatmap. Use -nl for building heatmap from "
1366 "basic events.\n";
1367 }
1368 exit(status: 1);
1369 }
1370
1371 outs() << "HEATMAP: building heat map...\n";
1372
1373 for (const auto &LBR : FallthroughLBRs) {
1374 const Trace &Trace = LBR.first;
1375 const FTInfo &Info = LBR.second;
1376 HM.registerAddressRange(StartAddress: Trace.From, EndAddress: Trace.To, Count: Info.InternCount);
1377 }
1378
1379 if (HM.getNumInvalidRanges())
1380 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1381
1382 if (!HM.size()) {
1383 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1384 exit(status: 1);
1385 }
1386
1387 HM.print(FileName: opts::OutputFilename);
1388 if (opts::OutputFilename == "-")
1389 HM.printCDF(FileName: opts::OutputFilename);
1390 else
1391 HM.printCDF(FileName: opts::OutputFilename + ".csv");
1392 if (opts::OutputFilename == "-")
1393 HM.printSectionHotness(Filename: opts::OutputFilename);
1394 else
1395 HM.printSectionHotness(Filename: opts::OutputFilename + "-section-hotness.csv");
1396
1397 return std::error_code();
1398}
1399
1400uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1401 bool NeedsSkylakeFix) {
1402 uint64_t NumTraces{0};
1403 // LBRs are stored in reverse execution order. NextPC refers to the next
1404 // recorded executed PC.
1405 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1406 uint32_t NumEntry = 0;
1407 for (const LBREntry &LBR : Sample.LBR) {
1408 ++NumEntry;
1409 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1410 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1411 // us to likely record an invalid trace and generate a stale function for
1412 // BAT mode (non BAT disassembles the function and is able to ignore this
1413 // trace at aggregation time). Drop first 2 entries (last two, in
1414 // chronological order)
1415 if (NeedsSkylakeFix && NumEntry <= 2)
1416 continue;
1417 if (NextPC) {
1418 // Record fall-through trace.
1419 const uint64_t TraceFrom = LBR.To;
1420 const uint64_t TraceTo = NextPC;
1421 const BinaryFunction *TraceBF =
1422 getBinaryFunctionContainingAddress(Address: TraceFrom);
1423 if (TraceBF && TraceBF->containsAddress(PC: TraceTo)) {
1424 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1425 if (TraceBF->containsAddress(PC: LBR.From))
1426 ++Info.InternCount;
1427 else
1428 ++Info.ExternCount;
1429 } else {
1430 const BinaryFunction *ToFunc =
1431 getBinaryFunctionContainingAddress(Address: TraceTo);
1432 if (TraceBF && ToFunc) {
1433 LLVM_DEBUG({
1434 dbgs() << "Invalid trace starting in " << TraceBF->getPrintName()
1435 << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress())
1436 << formatv(" and ending @ {0:x}\n", TraceTo);
1437 });
1438 ++NumInvalidTraces;
1439 } else {
1440 LLVM_DEBUG({
1441 dbgs() << "Out of range trace starting in "
1442 << (TraceBF ? TraceBF->getPrintName() : "None")
1443 << formatv(" @ {0:x}",
1444 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1445 << " and ending in "
1446 << (ToFunc ? ToFunc->getPrintName() : "None")
1447 << formatv(" @ {0:x}\n",
1448 TraceTo - (ToFunc ? ToFunc->getAddress() : 0));
1449 });
1450 ++NumLongRangeTraces;
1451 }
1452 }
1453 ++NumTraces;
1454 }
1455 NextPC = LBR.From;
1456
1457 uint64_t From = getBinaryFunctionContainingAddress(Address: LBR.From) ? LBR.From : 0;
1458 uint64_t To = getBinaryFunctionContainingAddress(Address: LBR.To) ? LBR.To : 0;
1459 if (!From && !To)
1460 continue;
1461 BranchInfo &Info = BranchLBRs[Trace(From, To)];
1462 ++Info.TakenCount;
1463 Info.MispredCount += LBR.Mispred;
1464 }
1465 return NumTraces;
1466}
1467
1468std::error_code DataAggregator::parseBranchEvents() {
1469 outs() << "PERF2BOLT: parse branch events...\n";
1470 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1471 TimerGroupDesc, opts::TimeAggregator);
1472
1473 uint64_t NumTotalSamples = 0;
1474 uint64_t NumEntries = 0;
1475 uint64_t NumSamples = 0;
1476 uint64_t NumSamplesNoLBR = 0;
1477 uint64_t NumTraces = 0;
1478 bool NeedsSkylakeFix = false;
1479
1480 while (hasData() && NumTotalSamples < opts::MaxSamples) {
1481 ++NumTotalSamples;
1482
1483 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1484 if (std::error_code EC = SampleRes.getError()) {
1485 if (EC == errc::no_such_process)
1486 continue;
1487 return EC;
1488 }
1489 ++NumSamples;
1490
1491 PerfBranchSample &Sample = SampleRes.get();
1492 if (opts::WriteAutoFDOData)
1493 ++BasicSamples[Sample.PC];
1494
1495 if (Sample.LBR.empty()) {
1496 ++NumSamplesNoLBR;
1497 continue;
1498 }
1499
1500 NumEntries += Sample.LBR.size();
1501 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1502 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1503 NeedsSkylakeFix = true;
1504 }
1505
1506 NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
1507 }
1508
1509 for (const Trace &Trace : llvm::make_first_range(c&: BranchLBRs))
1510 for (const uint64_t Addr : {Trace.From, Trace.To})
1511 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Addr))
1512 BF->setHasProfileAvailable();
1513
1514 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1515 OS << " (";
1516 if (OS.has_colors()) {
1517 if (Percent > T2)
1518 OS.changeColor(Color: raw_ostream::RED);
1519 else if (Percent > T1)
1520 OS.changeColor(Color: raw_ostream::YELLOW);
1521 else
1522 OS.changeColor(Color: raw_ostream::GREEN);
1523 }
1524 OS << format(Fmt: "%.1f%%", Vals: Percent);
1525 if (OS.has_colors())
1526 OS.resetColor();
1527 OS << ")";
1528 };
1529
1530 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1531 << " LBR entries\n";
1532 if (NumTotalSamples) {
1533 if (NumSamples && NumSamplesNoLBR == NumSamples) {
1534 // Note: we don't know if perf2bolt is being used to parse memory samples
1535 // at this point. In this case, it is OK to parse zero LBRs.
1536 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1537 "LBR. Record profile with perf record -j any or run perf2bolt "
1538 "in no-LBR mode with -nl (the performance improvement in -nl "
1539 "mode may be limited)\n";
1540 } else {
1541 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1542 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1543 outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1544 printColored(outs(), PercentIgnored, 20, 50);
1545 outs() << " were ignored\n";
1546 if (PercentIgnored > 50.0f)
1547 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1548 "were attributed to the input binary\n";
1549 }
1550 }
1551 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1552 << NumInvalidTraces;
1553 float Perc = 0.0f;
1554 if (NumTraces > 0) {
1555 Perc = NumInvalidTraces * 100.0f / NumTraces;
1556 printColored(outs(), Perc, 5, 10);
1557 }
1558 outs() << "\n";
1559 if (Perc > 10.0f)
1560 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1561 "binary is probably not the same binary used during profiling "
1562 "collection. The generated data may be ineffective for improving "
1563 "performance.\n\n";
1564
1565 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1566 << NumLongRangeTraces;
1567 if (NumTraces > 0)
1568 outs() << format(Fmt: " (%.1f%%)", Vals: NumLongRangeTraces * 100.0f / NumTraces);
1569 outs() << "\n";
1570
1571 if (NumColdSamples > 0) {
1572 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1573 outs() << "PERF2BOLT: " << NumColdSamples
1574 << format(Fmt: " (%.1f%%)", Vals: ColdSamples)
1575 << " samples recorded in cold regions of split functions.\n";
1576 if (ColdSamples > 5.0f)
1577 outs()
1578 << "WARNING: The BOLT-processed binary where samples were collected "
1579 "likely used bad data or your service observed a large shift in "
1580 "profile. You may want to audit this.\n";
1581 }
1582
1583 return std::error_code();
1584}
1585
1586void DataAggregator::processBranchEvents() {
1587 outs() << "PERF2BOLT: processing branch events...\n";
1588 NamedRegionTimer T("processBranch", "Processing branch events",
1589 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1590
1591 for (const auto &AggrLBR : FallthroughLBRs) {
1592 const Trace &Loc = AggrLBR.first;
1593 const FTInfo &Info = AggrLBR.second;
1594 LBREntry First{.From: Loc.From, .To: Loc.From, .Mispred: false};
1595 LBREntry Second{.From: Loc.To, .To: Loc.To, .Mispred: false};
1596 if (Info.InternCount)
1597 doTrace(First, Second, Count: Info.InternCount);
1598 if (Info.ExternCount) {
1599 First.From = 0;
1600 doTrace(First, Second, Count: Info.ExternCount);
1601 }
1602 }
1603
1604 for (const auto &AggrLBR : BranchLBRs) {
1605 const Trace &Loc = AggrLBR.first;
1606 const BranchInfo &Info = AggrLBR.second;
1607 doBranch(From: Loc.From, To: Loc.To, Count: Info.TakenCount, Mispreds: Info.MispredCount);
1608 }
1609}
1610
1611std::error_code DataAggregator::parseBasicEvents() {
1612 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1613 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1614 TimerGroupDesc, opts::TimeAggregator);
1615 while (hasData()) {
1616 ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1617 if (std::error_code EC = Sample.getError())
1618 return EC;
1619
1620 if (!Sample->PC)
1621 continue;
1622
1623 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC))
1624 BF->setHasProfileAvailable();
1625
1626 ++BasicSamples[Sample->PC];
1627 EventNames.insert(key: Sample->EventName);
1628 }
1629
1630 return std::error_code();
1631}
1632
1633void DataAggregator::processBasicEvents() {
1634 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1635 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1636 TimerGroupDesc, opts::TimeAggregator);
1637 uint64_t OutOfRangeSamples = 0;
1638 uint64_t NumSamples = 0;
1639 for (auto &Sample : BasicSamples) {
1640 const uint64_t PC = Sample.first;
1641 const uint64_t HitCount = Sample.second;
1642 NumSamples += HitCount;
1643 BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC);
1644 if (!Func) {
1645 OutOfRangeSamples += HitCount;
1646 continue;
1647 }
1648
1649 doSample(OrigFunc&: *Func, Address: PC, Count: HitCount);
1650 }
1651 outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1652
1653 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1654 << OutOfRangeSamples;
1655 float Perc = 0.0f;
1656 if (NumSamples > 0) {
1657 outs() << " (";
1658 Perc = OutOfRangeSamples * 100.0f / NumSamples;
1659 if (outs().has_colors()) {
1660 if (Perc > 60.0f)
1661 outs().changeColor(Color: raw_ostream::RED);
1662 else if (Perc > 40.0f)
1663 outs().changeColor(Color: raw_ostream::YELLOW);
1664 else
1665 outs().changeColor(Color: raw_ostream::GREEN);
1666 }
1667 outs() << format(Fmt: "%.1f%%", Vals: Perc);
1668 if (outs().has_colors())
1669 outs().resetColor();
1670 outs() << ")";
1671 }
1672 outs() << "\n";
1673 if (Perc > 80.0f)
1674 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1675 "binary is probably not the same binary used during profiling "
1676 "collection. The generated data may be ineffective for improving "
1677 "performance.\n\n";
1678}
1679
1680std::error_code DataAggregator::parseMemEvents() {
1681 outs() << "PERF2BOLT: parsing memory events...\n";
1682 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1683 TimerGroupDesc, opts::TimeAggregator);
1684 while (hasData()) {
1685 ErrorOr<PerfMemSample> Sample = parseMemSample();
1686 if (std::error_code EC = Sample.getError())
1687 return EC;
1688
1689 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC))
1690 BF->setHasProfileAvailable();
1691
1692 MemSamples.emplace_back(args: std::move(Sample.get()));
1693 }
1694
1695 return std::error_code();
1696}
1697
1698void DataAggregator::processMemEvents() {
1699 NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1700 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1701 for (const PerfMemSample &Sample : MemSamples) {
1702 uint64_t PC = Sample.PC;
1703 uint64_t Addr = Sample.Addr;
1704 StringRef FuncName;
1705 StringRef MemName;
1706
1707 // Try to resolve symbol for PC
1708 BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC);
1709 if (!Func) {
1710 LLVM_DEBUG(if (PC != 0) {
1711 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr);
1712 });
1713 continue;
1714 }
1715
1716 FuncName = Func->getOneName();
1717 PC -= Func->getAddress();
1718
1719 // Try to resolve symbol for memory load
1720 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Address: Addr)) {
1721 MemName = BD->getName();
1722 Addr -= BD->getAddress();
1723 } else if (opts::FilterMemProfile) {
1724 // Filter out heap/stack accesses
1725 continue;
1726 }
1727
1728 const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1729 const Location AddrLoc(!MemName.empty(), MemName, Addr);
1730
1731 FuncMemData *MemData = &NamesToMemEvents[FuncName];
1732 MemData->Name = FuncName;
1733 setMemData(BF: *Func, FMD: MemData);
1734 MemData->update(Offset: FuncLoc, Addr: AddrLoc);
1735 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1736 }
1737}
1738
1739std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1740 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1741 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1742 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1743 while (hasData()) {
1744 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1745 if (std::error_code EC = AggrEntry.getError())
1746 return EC;
1747
1748 for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset})
1749 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Addr))
1750 BF->setHasProfileAvailable();
1751
1752 AggregatedLBRs.emplace_back(args: std::move(AggrEntry.get()));
1753 }
1754
1755 return std::error_code();
1756}
1757
1758void DataAggregator::processPreAggregated() {
1759 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1760 NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1761 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1762
1763 uint64_t NumTraces = 0;
1764 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1765 switch (AggrEntry.EntryType) {
1766 case AggregatedLBREntry::BRANCH:
1767 doBranch(From: AggrEntry.From.Offset, To: AggrEntry.To.Offset, Count: AggrEntry.Count,
1768 Mispreds: AggrEntry.Mispreds);
1769 break;
1770 case AggregatedLBREntry::FT:
1771 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1772 LBREntry First{.From: AggrEntry.EntryType == AggregatedLBREntry::FT
1773 ? AggrEntry.From.Offset
1774 : 0,
1775 .To: AggrEntry.From.Offset, .Mispred: false};
1776 LBREntry Second{.From: AggrEntry.To.Offset, .To: AggrEntry.To.Offset, .Mispred: false};
1777 doTrace(First, Second, Count: AggrEntry.Count);
1778 NumTraces += AggrEntry.Count;
1779 break;
1780 }
1781 }
1782 }
1783
1784 outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1785 << " aggregated LBR entries\n";
1786 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1787 << NumInvalidTraces;
1788 float Perc = 0.0f;
1789 if (NumTraces > 0) {
1790 outs() << " (";
1791 Perc = NumInvalidTraces * 100.0f / NumTraces;
1792 if (outs().has_colors()) {
1793 if (Perc > 10.0f)
1794 outs().changeColor(Color: raw_ostream::RED);
1795 else if (Perc > 5.0f)
1796 outs().changeColor(Color: raw_ostream::YELLOW);
1797 else
1798 outs().changeColor(Color: raw_ostream::GREEN);
1799 }
1800 outs() << format(Fmt: "%.1f%%", Vals: Perc);
1801 if (outs().has_colors())
1802 outs().resetColor();
1803 outs() << ")";
1804 }
1805 outs() << "\n";
1806 if (Perc > 10.0f)
1807 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1808 "binary is probably not the same binary used during profiling "
1809 "collection. The generated data may be ineffective for improving "
1810 "performance.\n\n";
1811
1812 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1813 << NumLongRangeTraces;
1814 if (NumTraces > 0)
1815 outs() << format(Fmt: " (%.1f%%)", Vals: NumLongRangeTraces * 100.0f / NumTraces);
1816 outs() << "\n";
1817}
1818
1819std::optional<int32_t> DataAggregator::parseCommExecEvent() {
1820 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1821 if (LineEnd == StringRef::npos) {
1822 reportError(ErrorMsg: "expected rest of line");
1823 Diag << "Found: " << ParsingBuf << "\n";
1824 return std::nullopt;
1825 }
1826 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1827
1828 size_t Pos = Line.find(Str: "PERF_RECORD_COMM exec");
1829 if (Pos == StringRef::npos)
1830 return std::nullopt;
1831 Line = Line.drop_front(N: Pos);
1832
1833 // Line:
1834 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1835 StringRef PIDStr = Line.rsplit(Separator: ':').second.split(Separator: '/').first;
1836 int32_t PID;
1837 if (PIDStr.getAsInteger(Radix: 10, Result&: PID)) {
1838 reportError(ErrorMsg: "expected PID");
1839 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1840 return std::nullopt;
1841 }
1842
1843 return PID;
1844}
1845
1846namespace {
1847std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1848 const StringRef SecTimeStr = TimeStr.split(Separator: '.').first;
1849 const StringRef USecTimeStr = TimeStr.split(Separator: '.').second;
1850 uint64_t SecTime;
1851 uint64_t USecTime;
1852 if (SecTimeStr.getAsInteger(Radix: 10, Result&: SecTime) ||
1853 USecTimeStr.getAsInteger(Radix: 10, Result&: USecTime))
1854 return std::nullopt;
1855 return SecTime * 1000000ULL + USecTime;
1856}
1857}
1858
1859std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1860 while (checkAndConsumeFS()) {
1861 }
1862
1863 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1864 if (LineEnd == StringRef::npos) {
1865 reportError(ErrorMsg: "expected rest of line");
1866 Diag << "Found: " << ParsingBuf << "\n";
1867 return std::nullopt;
1868 }
1869 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1870
1871 size_t Pos = Line.find(Str: "PERF_RECORD_FORK");
1872 if (Pos == StringRef::npos) {
1873 consumeRestOfLine();
1874 return std::nullopt;
1875 }
1876
1877 ForkInfo FI;
1878
1879 const StringRef TimeStr =
1880 Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second;
1881 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1882 FI.Time = *TimeRes;
1883 }
1884
1885 Line = Line.drop_front(N: Pos);
1886
1887 // Line:
1888 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1889 const StringRef ChildPIDStr = Line.split(Separator: '(').second.split(Separator: ':').first;
1890 if (ChildPIDStr.getAsInteger(Radix: 10, Result&: FI.ChildPID)) {
1891 reportError(ErrorMsg: "expected PID");
1892 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1893 return std::nullopt;
1894 }
1895
1896 const StringRef ParentPIDStr = Line.rsplit(Separator: '(').second.split(Separator: ':').first;
1897 if (ParentPIDStr.getAsInteger(Radix: 10, Result&: FI.ParentPID)) {
1898 reportError(ErrorMsg: "expected PID");
1899 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1900 return std::nullopt;
1901 }
1902
1903 consumeRestOfLine();
1904
1905 return FI;
1906}
1907
1908ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1909DataAggregator::parseMMapEvent() {
1910 while (checkAndConsumeFS()) {
1911 }
1912
1913 MMapInfo ParsedInfo;
1914
1915 size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n");
1916 if (LineEnd == StringRef::npos) {
1917 reportError(ErrorMsg: "expected rest of line");
1918 Diag << "Found: " << ParsingBuf << "\n";
1919 return make_error_code(E: llvm::errc::io_error);
1920 }
1921 StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd);
1922
1923 size_t Pos = Line.find(Str: "PERF_RECORD_MMAP2");
1924 if (Pos == StringRef::npos) {
1925 consumeRestOfLine();
1926 return std::make_pair(x: StringRef(), y&: ParsedInfo);
1927 }
1928
1929 // Line:
1930 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1931
1932 const StringRef TimeStr =
1933 Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second;
1934 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1935 ParsedInfo.Time = *TimeRes;
1936
1937 Line = Line.drop_front(N: Pos);
1938
1939 // Line:
1940 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1941
1942 StringRef FileName = Line.rsplit(Separator: FieldSeparator).second;
1943 if (FileName.starts_with(Prefix: "//") || FileName.starts_with(Prefix: "[")) {
1944 consumeRestOfLine();
1945 return std::make_pair(x: StringRef(), y&: ParsedInfo);
1946 }
1947 FileName = sys::path::filename(path: FileName);
1948
1949 const StringRef PIDStr = Line.split(Separator: FieldSeparator).second.split(Separator: '/').first;
1950 if (PIDStr.getAsInteger(Radix: 10, Result&: ParsedInfo.PID)) {
1951 reportError(ErrorMsg: "expected PID");
1952 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1953 return make_error_code(E: llvm::errc::io_error);
1954 }
1955
1956 const StringRef BaseAddressStr = Line.split(Separator: '[').second.split(Separator: '(').first;
1957 if (BaseAddressStr.getAsInteger(Radix: 0, Result&: ParsedInfo.MMapAddress)) {
1958 reportError(ErrorMsg: "expected base address");
1959 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1960 return make_error_code(E: llvm::errc::io_error);
1961 }
1962
1963 const StringRef SizeStr = Line.split(Separator: '(').second.split(Separator: ')').first;
1964 if (SizeStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Size)) {
1965 reportError(ErrorMsg: "expected mmaped size");
1966 Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1967 return make_error_code(E: llvm::errc::io_error);
1968 }
1969
1970 const StringRef OffsetStr =
1971 Line.split(Separator: '@').second.ltrim().split(Separator: FieldSeparator).first;
1972 if (OffsetStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Offset)) {
1973 reportError(ErrorMsg: "expected mmaped page-aligned offset");
1974 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1975 return make_error_code(E: llvm::errc::io_error);
1976 }
1977
1978 consumeRestOfLine();
1979
1980 return std::make_pair(x&: FileName, y&: ParsedInfo);
1981}
1982
1983std::error_code DataAggregator::parseMMapEvents() {
1984 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1985 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1986 TimerGroupDesc, opts::TimeAggregator);
1987
1988 std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1989 while (hasData()) {
1990 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1991 if (std::error_code EC = FileMMapInfoRes.getError())
1992 return EC;
1993
1994 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1995 if (FileMMapInfo.second.PID == -1)
1996 continue;
1997 if (FileMMapInfo.first.equals(RHS: "(deleted)"))
1998 continue;
1999
2000 // Consider only the first mapping of the file for any given PID
2001 auto Range = GlobalMMapInfo.equal_range(x: FileMMapInfo.first);
2002 bool PIDExists = llvm::any_of(Range: make_range(p: Range), P: [&](const auto &MI) {
2003 return MI.second.PID == FileMMapInfo.second.PID;
2004 });
2005
2006 if (PIDExists)
2007 continue;
2008
2009 GlobalMMapInfo.insert(x&: FileMMapInfo);
2010 }
2011
2012 LLVM_DEBUG({
2013 dbgs() << "FileName -> mmap info:\n"
2014 << " Filename : PID [MMapAddr, Size, Offset]\n";
2015 for (const auto &[Name, MMap] : GlobalMMapInfo)
2016 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID,
2017 MMap.MMapAddress, MMap.Size, MMap.Offset);
2018 });
2019
2020 StringRef NameToUse = llvm::sys::path::filename(path: BC->getFilename());
2021 if (GlobalMMapInfo.count(x: NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2022 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2023 << "\" for profile matching\n";
2024 NameToUse = BuildIDBinaryName;
2025 }
2026
2027 auto Range = GlobalMMapInfo.equal_range(x: NameToUse);
2028 for (MMapInfo &MMapInfo : llvm::make_second_range(c: make_range(p: Range))) {
2029 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2030 // Check that the binary mapping matches one of the segments.
2031 bool MatchFound = llvm::any_of(
2032 Range: llvm::make_second_range(c&: BC->SegmentMapInfo),
2033 P: [&](SegmentInfo &SegInfo) {
2034 // The mapping is page-aligned and hence the MMapAddress could be
2035 // different from the segment start address. We cannot know the page
2036 // size of the mapping, but we know it should not exceed the segment
2037 // alignment value. Hence we are performing an approximate check.
2038 return SegInfo.Address >= MMapInfo.MMapAddress &&
2039 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment;
2040 });
2041 if (!MatchFound) {
2042 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2043 << " at 0x" << Twine::utohexstr(Val: MMapInfo.MMapAddress) << '\n';
2044 continue;
2045 }
2046 }
2047
2048 // Set base address for shared objects.
2049 if (!BC->HasFixedLoadAddress) {
2050 std::optional<uint64_t> BaseAddress =
2051 BC->getBaseAddressForMapping(MMapAddress: MMapInfo.MMapAddress, FileOffset: MMapInfo.Offset);
2052 if (!BaseAddress) {
2053 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2054 "binary when memory mapped at 0x"
2055 << Twine::utohexstr(Val: MMapInfo.MMapAddress)
2056 << " using file offset 0x" << Twine::utohexstr(Val: MMapInfo.Offset)
2057 << ". Ignoring profile data for this mapping\n";
2058 continue;
2059 } else {
2060 MMapInfo.BaseAddress = *BaseAddress;
2061 }
2062 }
2063
2064 BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo));
2065 }
2066
2067 if (BinaryMMapInfo.empty()) {
2068 if (errs().has_colors())
2069 errs().changeColor(Color: raw_ostream::RED);
2070 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2071 << BC->getFilename() << "\".";
2072 if (!GlobalMMapInfo.empty()) {
2073 errs() << " Profile for the following binary name(s) is available:\n";
2074 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2075 I = GlobalMMapInfo.upper_bound(x: I->first))
2076 errs() << " " << I->first << '\n';
2077 errs() << "Please rename the input binary.\n";
2078 } else {
2079 errs() << " Failed to extract any binary name from a profile.\n";
2080 }
2081 if (errs().has_colors())
2082 errs().resetColor();
2083
2084 exit(status: 1);
2085 }
2086
2087 return std::error_code();
2088}
2089
2090std::error_code DataAggregator::parseTaskEvents() {
2091 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2092 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2093 TimerGroupDesc, opts::TimeAggregator);
2094
2095 while (hasData()) {
2096 if (std::optional<int32_t> CommInfo = parseCommExecEvent()) {
2097 // Remove forked child that ran execve
2098 auto MMapInfoIter = BinaryMMapInfo.find(x: *CommInfo);
2099 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2100 BinaryMMapInfo.erase(position: MMapInfoIter);
2101 consumeRestOfLine();
2102 continue;
2103 }
2104
2105 std::optional<ForkInfo> ForkInfo = parseForkEvent();
2106 if (!ForkInfo)
2107 continue;
2108
2109 if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2110 continue;
2111
2112 if (ForkInfo->Time == 0) {
2113 // Process was forked and mmaped before perf ran. In this case the child
2114 // should have its own mmap entry unless it was execve'd.
2115 continue;
2116 }
2117
2118 auto MMapInfoIter = BinaryMMapInfo.find(x: ForkInfo->ParentPID);
2119 if (MMapInfoIter == BinaryMMapInfo.end())
2120 continue;
2121
2122 MMapInfo MMapInfo = MMapInfoIter->second;
2123 MMapInfo.PID = ForkInfo->ChildPID;
2124 MMapInfo.Forked = true;
2125 BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo));
2126 }
2127
2128 outs() << "PERF2BOLT: input binary is associated with "
2129 << BinaryMMapInfo.size() << " PID(s)\n";
2130
2131 LLVM_DEBUG({
2132 for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo))
2133 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID,
2134 (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress,
2135 MMI.Size);
2136 });
2137
2138 return std::error_code();
2139}
2140
2141std::optional<std::pair<StringRef, StringRef>>
2142DataAggregator::parseNameBuildIDPair() {
2143 while (checkAndConsumeFS()) {
2144 }
2145
2146 ErrorOr<StringRef> BuildIDStr = parseString(EndChar: FieldSeparator, EndNl: true);
2147 if (std::error_code EC = BuildIDStr.getError())
2148 return std::nullopt;
2149
2150 // If one of the strings is missing, don't issue a parsing error, but still
2151 // do not return a value.
2152 consumeAllRemainingFS();
2153 if (checkNewLine())
2154 return std::nullopt;
2155
2156 ErrorOr<StringRef> NameStr = parseString(EndChar: FieldSeparator, EndNl: true);
2157 if (std::error_code EC = NameStr.getError())
2158 return std::nullopt;
2159
2160 consumeRestOfLine();
2161 return std::make_pair(x&: NameStr.get(), y&: BuildIDStr.get());
2162}
2163
2164bool DataAggregator::hasAllBuildIDs() {
2165 const StringRef SavedParsingBuf = ParsingBuf;
2166
2167 if (!hasData())
2168 return false;
2169
2170 bool HasInvalidEntries = false;
2171 while (hasData()) {
2172 if (!parseNameBuildIDPair()) {
2173 HasInvalidEntries = true;
2174 break;
2175 }
2176 }
2177
2178 ParsingBuf = SavedParsingBuf;
2179
2180 return !HasInvalidEntries;
2181}
2182
2183std::optional<StringRef>
2184DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2185 const StringRef SavedParsingBuf = ParsingBuf;
2186
2187 StringRef FileName;
2188 while (hasData()) {
2189 std::optional<std::pair<StringRef, StringRef>> IDPair =
2190 parseNameBuildIDPair();
2191 if (!IDPair) {
2192 consumeRestOfLine();
2193 continue;
2194 }
2195
2196 if (IDPair->second.starts_with(Prefix: FileBuildID)) {
2197 FileName = sys::path::filename(path: IDPair->first);
2198 break;
2199 }
2200 }
2201
2202 ParsingBuf = SavedParsingBuf;
2203
2204 if (!FileName.empty())
2205 return FileName;
2206
2207 return std::nullopt;
2208}
2209
2210std::error_code
2211DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2212 std::error_code EC;
2213 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2214 if (EC)
2215 return EC;
2216
2217 bool WriteMemLocs = false;
2218
2219 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2220 if (WriteMemLocs)
2221 OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2222 else
2223 OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2224 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Name: Loc.Name))
2225 << " " << Twine::utohexstr(Val: Loc.Offset) << FieldSeparator;
2226 };
2227
2228 uint64_t BranchValues = 0;
2229 uint64_t MemValues = 0;
2230
2231 if (BAT)
2232 OutFile << "boltedcollection\n";
2233 if (opts::BasicAggregation) {
2234 OutFile << "no_lbr";
2235 for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
2236 OutFile << " " << Entry.getKey();
2237 OutFile << "\n";
2238
2239 for (const auto &KV : NamesToSamples) {
2240 const FuncSampleData &FSD = KV.second;
2241 for (const SampleInfo &SI : FSD.Data) {
2242 writeLocation(SI.Loc);
2243 OutFile << SI.Hits << "\n";
2244 ++BranchValues;
2245 }
2246 }
2247 } else {
2248 for (const auto &KV : NamesToBranches) {
2249 const FuncBranchData &FBD = KV.second;
2250 for (const llvm::bolt::BranchInfo &BI : FBD.Data) {
2251 writeLocation(BI.From);
2252 writeLocation(BI.To);
2253 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2254 ++BranchValues;
2255 }
2256 for (const llvm::bolt::BranchInfo &BI : FBD.EntryData) {
2257 // Do not output if source is a known symbol, since this was already
2258 // accounted for in the source function
2259 if (BI.From.IsSymbol)
2260 continue;
2261 writeLocation(BI.From);
2262 writeLocation(BI.To);
2263 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2264 ++BranchValues;
2265 }
2266 }
2267
2268 WriteMemLocs = true;
2269 for (const auto &KV : NamesToMemEvents) {
2270 const FuncMemData &FMD = KV.second;
2271 for (const MemInfo &MemEvent : FMD.Data) {
2272 writeLocation(MemEvent.Offset);
2273 writeLocation(MemEvent.Addr);
2274 OutFile << MemEvent.Count << "\n";
2275 ++MemValues;
2276 }
2277 }
2278 }
2279
2280 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2281 << " memory objects to " << OutputFilename << "\n";
2282
2283 return std::error_code();
2284}
2285
2286std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
2287 StringRef OutputFilename) const {
2288 std::error_code EC;
2289 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2290 if (EC)
2291 return EC;
2292
2293 yaml::bolt::BinaryProfile BP;
2294
2295 // Fill out the header info.
2296 BP.Header.Version = 1;
2297 BP.Header.FileName = std::string(BC.getFilename());
2298 std::optional<StringRef> BuildID = BC.getFileBuildID();
2299 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
2300 BP.Header.Origin = std::string(getReaderName());
2301 // Only the input binary layout order is supported.
2302 BP.Header.IsDFSOrder = false;
2303 // FIXME: Need to match hash function used to produce BAT hashes.
2304 BP.Header.HashFunction = HashFunction::Default;
2305
2306 ListSeparator LS(",");
2307 raw_string_ostream EventNamesOS(BP.Header.EventNames);
2308 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
2309 EventNamesOS << LS << EventEntry.first().str();
2310
2311 BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
2312 : BinaryFunction::PF_LBR;
2313
2314 if (!opts::BasicAggregation) {
2315 // Convert profile for functions not covered by BAT
2316 for (auto &BFI : BC.getBinaryFunctions()) {
2317 BinaryFunction &Function = BFI.second;
2318 if (!Function.hasProfile())
2319 continue;
2320 if (BAT->isBATFunction(Address: Function.getAddress()))
2321 continue;
2322 BP.Functions.emplace_back(
2323 args: YAMLProfileWriter::convert(BF: Function, /*UseDFS=*/false, BAT));
2324 }
2325
2326 for (const auto &KV : NamesToBranches) {
2327 const StringRef FuncName = KV.first;
2328 const FuncBranchData &Branches = KV.second;
2329 yaml::bolt::BinaryFunctionProfile YamlBF;
2330 BinaryData *BD = BC.getBinaryDataByName(Name: FuncName);
2331 assert(BD);
2332 uint64_t FuncAddress = BD->getAddress();
2333 if (!BAT->isBATFunction(Address: FuncAddress))
2334 continue;
2335 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: FuncAddress);
2336 assert(BF);
2337 YamlBF.Name = FuncName.str();
2338 YamlBF.Id = BF->getFunctionNumber();
2339 YamlBF.Hash = BAT->getBFHash(FuncOutputAddress: FuncAddress);
2340 YamlBF.ExecCount = BF->getKnownExecutionCount();
2341 YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(OutputAddress: FuncAddress);
2342 const BoltAddressTranslation::BBHashMapTy &BlockMap =
2343 BAT->getBBHashMap(FuncOutputAddress: FuncAddress);
2344 YamlBF.Blocks.resize(new_size: YamlBF.NumBasicBlocks);
2345
2346 for (auto &&[Idx, YamlBB] : llvm::enumerate(First&: YamlBF.Blocks))
2347 YamlBB.Index = Idx;
2348
2349 for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
2350 YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash();
2351
2352 auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) {
2353 const llvm::bolt::BranchInfo &BI = Branches.Data.at(n: SuccDataIdx);
2354 yaml::bolt::SuccessorInfo SI;
2355 SI.Index = BlockMap.getBBIndex(BBInputOffset: SuccOffset);
2356 SI.Count = BI.Branches;
2357 SI.Mispreds = BI.Mispreds;
2358 return SI;
2359 };
2360
2361 auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx,
2362 uint32_t Offset) {
2363 const llvm::bolt::BranchInfo &BI = Branches.Data.at(n: CallToIdx);
2364 yaml::bolt::CallSiteInfo CSI;
2365 CSI.DestId = 0; // designated for unknown functions
2366 CSI.EntryDiscriminator = 0;
2367 CSI.Count = BI.Branches;
2368 CSI.Mispreds = BI.Mispreds;
2369 CSI.Offset = Offset;
2370 if (BinaryData *BD = BC.getBinaryDataByName(Name: CallToLoc.Name))
2371 YAMLProfileWriter::setCSIDestination(BC, CSI, Symbol: BD->getSymbol(), BAT,
2372 Offset: CallToLoc.Offset);
2373 return CSI;
2374 };
2375
2376 for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) {
2377 if (!BlockMap.isInputBlock(InputOffset: FromOffset))
2378 continue;
2379 const unsigned Index = BlockMap.getBBIndex(BBInputOffset: FromOffset);
2380 yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index];
2381 for (const auto &[SuccOffset, SuccDataIdx] : SuccKV)
2382 if (BlockMap.isInputBlock(InputOffset: SuccOffset))
2383 YamlBB.Successors.emplace_back(
2384 args: getSuccessorInfo(SuccOffset, SuccDataIdx));
2385 }
2386 for (const auto &[FromOffset, CallTo] : Branches.InterIndex) {
2387 auto BlockIt = BlockMap.upper_bound(Offset: FromOffset);
2388 --BlockIt;
2389 const unsigned BlockOffset = BlockIt->first;
2390 const unsigned BlockIndex = BlockIt->second.getBBIndex();
2391 yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
2392 const uint32_t Offset = FromOffset - BlockOffset;
2393 for (const auto &[CallToLoc, CallToIdx] : CallTo)
2394 YamlBB.CallSites.emplace_back(
2395 args: getCallSiteInfo(CallToLoc, CallToIdx, Offset));
2396 llvm::sort(C&: YamlBB.CallSites, Comp: [](yaml::bolt::CallSiteInfo &A,
2397 yaml::bolt::CallSiteInfo &B) {
2398 return A.Offset < B.Offset;
2399 });
2400 }
2401 // Drop blocks without a hash, won't be useful for stale matching.
2402 llvm::erase_if(C&: YamlBF.Blocks,
2403 P: [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2404 return YamlBB.Hash == (yaml::Hex64)0;
2405 });
2406 BP.Functions.emplace_back(args&: YamlBF);
2407 }
2408 }
2409
2410 // Write the profile.
2411 yaml::Output Out(OutFile, nullptr, 0);
2412 Out << BP;
2413 return std::error_code();
2414}
2415
2416void DataAggregator::dump() const { DataReader::dump(); }
2417
2418void DataAggregator::dump(const LBREntry &LBR) const {
2419 Diag << "From: " << Twine::utohexstr(Val: LBR.From)
2420 << " To: " << Twine::utohexstr(Val: LBR.To) << " Mispred? " << LBR.Mispred
2421 << "\n";
2422}
2423
2424void DataAggregator::dump(const PerfBranchSample &Sample) const {
2425 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2426 for (const LBREntry &LBR : Sample.LBR)
2427 dump(LBR);
2428}
2429
2430void DataAggregator::dump(const PerfMemSample &Sample) const {
2431 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2432}
2433

source code of bolt/lib/Profile/DataAggregator.cpp