1 | //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This family of functions reads profile data written by perf record, |
10 | // aggregate it and then write it back to an output file. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "bolt/Profile/DataAggregator.h" |
15 | #include "bolt/Core/BinaryContext.h" |
16 | #include "bolt/Core/BinaryFunction.h" |
17 | #include "bolt/Passes/BinaryPasses.h" |
18 | #include "bolt/Profile/BoltAddressTranslation.h" |
19 | #include "bolt/Profile/Heatmap.h" |
20 | #include "bolt/Profile/YAMLProfileWriter.h" |
21 | #include "bolt/Utils/CommandLineOpts.h" |
22 | #include "bolt/Utils/Utils.h" |
23 | #include "llvm/ADT/STLExtras.h" |
24 | #include "llvm/ADT/ScopeExit.h" |
25 | #include "llvm/Support/CommandLine.h" |
26 | #include "llvm/Support/Compiler.h" |
27 | #include "llvm/Support/Debug.h" |
28 | #include "llvm/Support/Errc.h" |
29 | #include "llvm/Support/FileSystem.h" |
30 | #include "llvm/Support/Process.h" |
31 | #include "llvm/Support/Program.h" |
32 | #include "llvm/Support/Regex.h" |
33 | #include "llvm/Support/Timer.h" |
34 | #include "llvm/Support/raw_ostream.h" |
35 | #include <map> |
36 | #include <optional> |
37 | #include <unordered_map> |
38 | #include <utility> |
39 | |
40 | #define DEBUG_TYPE "aggregator" |
41 | |
42 | using namespace llvm; |
43 | using namespace bolt; |
44 | |
45 | namespace opts { |
46 | |
47 | static cl::opt<bool> |
48 | BasicAggregation("nl", |
49 | cl::desc("aggregate basic samples (without LBR info)"), |
50 | cl::cat(AggregatorCategory)); |
51 | |
52 | static cl::opt<std::string> |
53 | ITraceAggregation("itrace", |
54 | cl::desc("Generate LBR info with perf itrace argument"), |
55 | cl::cat(AggregatorCategory)); |
56 | |
57 | static cl::opt<bool> |
58 | FilterMemProfile("filter-mem-profile", |
59 | cl::desc("if processing a memory profile, filter out stack or heap accesses " |
60 | "that won't be useful for BOLT to reduce profile file size"), |
61 | cl::init(Val: true), |
62 | cl::cat(AggregatorCategory)); |
63 | |
64 | static cl::opt<unsigned long long> |
65 | FilterPID("pid", |
66 | cl::desc("only use samples from process with specified PID"), |
67 | cl::init(Val: 0), |
68 | cl::Optional, |
69 | cl::cat(AggregatorCategory)); |
70 | |
71 | static cl::opt<bool> |
72 | IgnoreBuildID("ignore-build-id", |
73 | cl::desc("continue even if build-ids in input binary and perf.data mismatch"), |
74 | cl::init(Val: false), |
75 | cl::cat(AggregatorCategory)); |
76 | |
77 | static cl::opt<bool> IgnoreInterruptLBR( |
78 | "ignore-interrupt-lbr", |
79 | cl::desc("ignore kernel interrupt LBR that happens asynchronously"), |
80 | cl::init(Val: true), cl::cat(AggregatorCategory)); |
81 | |
82 | static cl::opt<unsigned long long> |
83 | MaxSamples("max-samples", |
84 | cl::init(Val: -1ULL), |
85 | cl::desc("maximum number of samples to read from LBR profile"), |
86 | cl::Optional, |
87 | cl::Hidden, |
88 | cl::cat(AggregatorCategory)); |
89 | |
90 | extern cl::opt<opts::ProfileFormatKind> ProfileFormat; |
91 | extern cl::opt<bool> ProfileWritePseudoProbes; |
92 | extern cl::opt<std::string> SaveProfile; |
93 | |
94 | cl::opt<bool> ReadPreAggregated( |
95 | "pa", cl::desc( "skip perf and read data from a pre-aggregated file format"), |
96 | cl::cat(AggregatorCategory)); |
97 | |
98 | cl::opt<std::string> |
99 | ReadPerfEvents("perf-script-events", |
100 | cl::desc("skip perf event collection by supplying a " |
101 | "perf-script output in a textual format"), |
102 | cl::ReallyHidden, cl::init(Val: ""), cl::cat(AggregatorCategory)); |
103 | |
104 | static cl::opt<bool> |
105 | TimeAggregator("time-aggr", |
106 | cl::desc("time BOLT aggregator"), |
107 | cl::init(Val: false), |
108 | cl::ZeroOrMore, |
109 | cl::cat(AggregatorCategory)); |
110 | |
111 | } // namespace opts |
112 | |
113 | namespace { |
114 | |
115 | const char TimerGroupName[] = "aggregator"; |
116 | const char TimerGroupDesc[] = "Aggregator"; |
117 | |
118 | std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) { |
119 | std::vector<SectionNameAndRange> sections; |
120 | for (BinarySection &Section : BC->sections()) { |
121 | if (!Section.isText()) |
122 | continue; |
123 | if (Section.getSize() == 0) |
124 | continue; |
125 | sections.push_back( |
126 | x: {.Name: Section.getName(), .BeginAddress: Section.getAddress(), .EndAddress: Section.getEndAddress()}); |
127 | } |
128 | llvm::sort(C&: sections, |
129 | Comp: [](const SectionNameAndRange &A, const SectionNameAndRange &B) { |
130 | return A.BeginAddress < B.BeginAddress; |
131 | }); |
132 | return sections; |
133 | } |
134 | } |
135 | |
136 | constexpr uint64_t DataAggregator::KernelBaseAddr; |
137 | |
138 | DataAggregator::~DataAggregator() { deleteTempFiles(); } |
139 | |
140 | namespace { |
141 | void deleteTempFile(const std::string &FileName) { |
142 | if (std::error_code Errc = sys::fs::remove(path: FileName.c_str())) |
143 | errs() << "PERF2BOLT: failed to delete temporary file "<< FileName |
144 | << " with error "<< Errc.message() << "\n"; |
145 | } |
146 | } |
147 | |
148 | void DataAggregator::deleteTempFiles() { |
149 | for (std::string &FileName : TempFiles) |
150 | deleteTempFile(FileName); |
151 | TempFiles.clear(); |
152 | } |
153 | |
154 | void DataAggregator::findPerfExecutable() { |
155 | std::optional<std::string> PerfExecutable = |
156 | sys::Process::FindInEnvPath(EnvName: "PATH", FileName: "perf"); |
157 | if (!PerfExecutable) { |
158 | outs() << "PERF2BOLT: No perf executable found!\n"; |
159 | exit(status: 1); |
160 | } |
161 | PerfPath = *PerfExecutable; |
162 | } |
163 | |
164 | void DataAggregator::start() { |
165 | outs() << "PERF2BOLT: Starting data aggregation job for "<< Filename << "\n"; |
166 | |
167 | // Turn on heatmap building if requested by --heatmap flag. |
168 | if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences()) |
169 | opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional; |
170 | |
171 | // Don't launch perf for pre-aggregated files or when perf input is specified |
172 | // by the user. |
173 | if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty()) |
174 | return; |
175 | |
176 | findPerfExecutable(); |
177 | |
178 | if (opts::BasicAggregation) { |
179 | launchPerfProcess(Name: "events without LBR", |
180 | PPI&: MainEventsPPI, |
181 | ArgsString: "script -F pid,event,ip", |
182 | /*Wait = */false); |
183 | } else if (!opts::ITraceAggregation.empty()) { |
184 | std::string ItracePerfScriptArgs = llvm::formatv( |
185 | Fmt: "script -F pid,brstack --itrace={0}", Vals&: opts::ITraceAggregation); |
186 | launchPerfProcess(Name: "branch events with itrace", PPI&: MainEventsPPI, |
187 | ArgsString: ItracePerfScriptArgs.c_str(), |
188 | /*Wait = */ false); |
189 | } else { |
190 | launchPerfProcess(Name: "branch events", PPI&: MainEventsPPI, ArgsString: "script -F pid,brstack", |
191 | /*Wait = */ false); |
192 | } |
193 | |
194 | // Note: we launch script for mem events regardless of the option, as the |
195 | // command fails fairly fast if mem events were not collected. |
196 | launchPerfProcess(Name: "mem events", |
197 | PPI&: MemEventsPPI, |
198 | ArgsString: "script -F pid,event,addr,ip", |
199 | /*Wait = */false); |
200 | |
201 | launchPerfProcess(Name: "process events", PPI&: MMapEventsPPI, |
202 | ArgsString: "script --show-mmap-events --no-itrace", |
203 | /*Wait = */ false); |
204 | |
205 | launchPerfProcess(Name: "task events", PPI&: TaskEventsPPI, |
206 | ArgsString: "script --show-task-events --no-itrace", |
207 | /*Wait = */ false); |
208 | } |
209 | |
210 | void DataAggregator::abort() { |
211 | if (opts::ReadPreAggregated) |
212 | return; |
213 | |
214 | std::string Error; |
215 | |
216 | // Kill subprocesses in case they are not finished |
217 | sys::Wait(PI: TaskEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error); |
218 | sys::Wait(PI: MMapEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error); |
219 | sys::Wait(PI: MainEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error); |
220 | sys::Wait(PI: MemEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error); |
221 | |
222 | deleteTempFiles(); |
223 | |
224 | exit(status: 1); |
225 | } |
226 | |
227 | void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, |
228 | const char *ArgsString, bool Wait) { |
229 | SmallVector<StringRef, 4> Argv; |
230 | |
231 | outs() << "PERF2BOLT: spawning perf job to read "<< Name << '\n'; |
232 | Argv.push_back(Elt: PerfPath.data()); |
233 | |
234 | StringRef(ArgsString).split(A&: Argv, Separator: ' '); |
235 | Argv.push_back(Elt: "-f"); |
236 | Argv.push_back(Elt: "-i"); |
237 | Argv.push_back(Elt: Filename.c_str()); |
238 | |
239 | if (std::error_code Errc = |
240 | sys::fs::createTemporaryFile(Prefix: "perf.script", Suffix: "out", ResultPath&: PPI.StdoutPath)) { |
241 | errs() << "PERF2BOLT: failed to create temporary file "<< PPI.StdoutPath |
242 | << " with error "<< Errc.message() << "\n"; |
243 | exit(status: 1); |
244 | } |
245 | TempFiles.push_back(x: PPI.StdoutPath.data()); |
246 | |
247 | if (std::error_code Errc = |
248 | sys::fs::createTemporaryFile(Prefix: "perf.script", Suffix: "err", ResultPath&: PPI.StderrPath)) { |
249 | errs() << "PERF2BOLT: failed to create temporary file "<< PPI.StderrPath |
250 | << " with error "<< Errc.message() << "\n"; |
251 | exit(status: 1); |
252 | } |
253 | TempFiles.push_back(x: PPI.StderrPath.data()); |
254 | |
255 | std::optional<StringRef> Redirects[] = { |
256 | std::nullopt, // Stdin |
257 | StringRef(PPI.StdoutPath.data()), // Stdout |
258 | StringRef(PPI.StderrPath.data())}; // Stderr |
259 | |
260 | LLVM_DEBUG({ |
261 | dbgs() << "Launching perf: "; |
262 | for (StringRef Arg : Argv) |
263 | dbgs() << Arg << " "; |
264 | dbgs() << " 1> "<< PPI.StdoutPath.data() << " 2> "<< PPI.StderrPath.data() |
265 | << "\n"; |
266 | }); |
267 | |
268 | if (Wait) |
269 | PPI.PI.ReturnCode = sys::ExecuteAndWait(Program: PerfPath.data(), Args: Argv, |
270 | /*envp*/ Env: std::nullopt, Redirects); |
271 | else |
272 | PPI.PI = sys::ExecuteNoWait(Program: PerfPath.data(), Args: Argv, /*envp*/ Env: std::nullopt, |
273 | Redirects); |
274 | } |
275 | |
276 | void DataAggregator::processFileBuildID(StringRef FileBuildID) { |
277 | PerfProcessInfo BuildIDProcessInfo; |
278 | launchPerfProcess(Name: "buildid list", |
279 | PPI&: BuildIDProcessInfo, |
280 | ArgsString: "buildid-list", |
281 | /*Wait = */true); |
282 | |
283 | if (BuildIDProcessInfo.PI.ReturnCode != 0) { |
284 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
285 | MemoryBuffer::getFileOrSTDIN(Filename: BuildIDProcessInfo.StderrPath.data()); |
286 | StringRef ErrBuf = (*MB)->getBuffer(); |
287 | |
288 | errs() << "PERF-ERROR: return code "<< BuildIDProcessInfo.PI.ReturnCode |
289 | << '\n'; |
290 | errs() << ErrBuf; |
291 | return; |
292 | } |
293 | |
294 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
295 | MemoryBuffer::getFileOrSTDIN(Filename: BuildIDProcessInfo.StdoutPath.data()); |
296 | if (std::error_code EC = MB.getError()) { |
297 | errs() << "Cannot open "<< BuildIDProcessInfo.StdoutPath.data() << ": " |
298 | << EC.message() << "\n"; |
299 | return; |
300 | } |
301 | |
302 | FileBuf = std::move(*MB); |
303 | ParsingBuf = FileBuf->getBuffer(); |
304 | |
305 | std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); |
306 | if (!FileName) { |
307 | if (hasAllBuildIDs()) { |
308 | errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " |
309 | "This indicates the input binary supplied for data aggregation " |
310 | "is not the same recorded by perf when collecting profiling " |
311 | "data, or there were no samples recorded for the binary. " |
312 | "Use -ignore-build-id option to override.\n"; |
313 | if (!opts::IgnoreBuildID) |
314 | abort(); |
315 | } else { |
316 | errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " |
317 | "data was recorded without it\n"; |
318 | return; |
319 | } |
320 | } else if (*FileName != llvm::sys::path::filename(path: BC->getFilename())) { |
321 | errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; |
322 | BuildIDBinaryName = std::string(*FileName); |
323 | } else { |
324 | outs() << "PERF2BOLT: matched build-id and file name\n"; |
325 | } |
326 | } |
327 | |
328 | bool DataAggregator::checkPerfDataMagic(StringRef FileName) { |
329 | if (opts::ReadPreAggregated) |
330 | return true; |
331 | |
332 | Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(Name: FileName); |
333 | if (!FD) { |
334 | consumeError(Err: FD.takeError()); |
335 | return false; |
336 | } |
337 | |
338 | char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; |
339 | |
340 | auto Close = make_scope_exit(F: [&] { sys::fs::closeFile(F&: *FD); }); |
341 | Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( |
342 | FileHandle: *FD, Buf: MutableArrayRef(Buf, sizeof(Buf)), Offset: 0); |
343 | if (!BytesRead) { |
344 | consumeError(Err: BytesRead.takeError()); |
345 | return false; |
346 | } |
347 | |
348 | if (*BytesRead != 7) |
349 | return false; |
350 | |
351 | if (strncmp(s1: Buf, s2: "PERFILE", n: 7) == 0) |
352 | return true; |
353 | return false; |
354 | } |
355 | |
356 | void DataAggregator::parsePreAggregated() { |
357 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
358 | MemoryBuffer::getFileOrSTDIN(Filename); |
359 | if (std::error_code EC = MB.getError()) { |
360 | errs() << "PERF2BOLT-ERROR: cannot open "<< Filename << ": " |
361 | << EC.message() << "\n"; |
362 | exit(status: 1); |
363 | } |
364 | |
365 | FileBuf = std::move(*MB); |
366 | ParsingBuf = FileBuf->getBuffer(); |
367 | Col = 0; |
368 | Line = 1; |
369 | if (parsePreAggregatedLBRSamples()) { |
370 | errs() << "PERF2BOLT: failed to parse samples\n"; |
371 | exit(status: 1); |
372 | } |
373 | } |
374 | |
375 | void DataAggregator::filterBinaryMMapInfo() { |
376 | if (opts::FilterPID) { |
377 | auto MMapInfoIter = BinaryMMapInfo.find(x: opts::FilterPID); |
378 | if (MMapInfoIter != BinaryMMapInfo.end()) { |
379 | MMapInfo MMap = MMapInfoIter->second; |
380 | BinaryMMapInfo.clear(); |
381 | BinaryMMapInfo.insert(x: std::make_pair(x&: MMap.PID, y&: MMap)); |
382 | } else { |
383 | if (errs().has_colors()) |
384 | errs().changeColor(Color: raw_ostream::RED); |
385 | errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" |
386 | << opts::FilterPID << "\"" |
387 | << " for binary \""<< BC->getFilename() << "\"."; |
388 | assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); |
389 | errs() << " Profile for the following process is available:\n"; |
390 | for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) |
391 | outs() << " "<< MMI.second.PID |
392 | << (MMI.second.Forked ? " (forked)\n": "\n"); |
393 | |
394 | if (errs().has_colors()) |
395 | errs().resetColor(); |
396 | |
397 | exit(status: 1); |
398 | } |
399 | } |
400 | } |
401 | |
402 | int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, |
403 | PerfProcessErrorCallbackTy Callback) { |
404 | if (!opts::ReadPerfEvents.empty()) { |
405 | outs() << "PERF2BOLT: using pre-processed perf events for '"<< Name |
406 | << "' (perf-script-events)\n"; |
407 | ParsingBuf = opts::ReadPerfEvents; |
408 | return 0; |
409 | } |
410 | |
411 | std::string Error; |
412 | outs() << "PERF2BOLT: waiting for perf "<< Name |
413 | << " collection to finish...\n"; |
414 | sys::ProcessInfo PI = sys::Wait(PI: Process.PI, SecondsToWait: std::nullopt, ErrMsg: &Error); |
415 | |
416 | if (!Error.empty()) { |
417 | errs() << "PERF-ERROR: "<< PerfPath << ": "<< Error << "\n"; |
418 | deleteTempFiles(); |
419 | exit(status: 1); |
420 | } |
421 | |
422 | if (PI.ReturnCode != 0) { |
423 | ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = |
424 | MemoryBuffer::getFileOrSTDIN(Filename: Process.StderrPath.data()); |
425 | StringRef ErrBuf = (*ErrorMB)->getBuffer(); |
426 | |
427 | deleteTempFiles(); |
428 | Callback(PI.ReturnCode, ErrBuf); |
429 | return PI.ReturnCode; |
430 | } |
431 | |
432 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
433 | MemoryBuffer::getFileOrSTDIN(Filename: Process.StdoutPath.data()); |
434 | if (std::error_code EC = MB.getError()) { |
435 | errs() << "Cannot open "<< Process.StdoutPath.data() << ": " |
436 | << EC.message() << "\n"; |
437 | deleteTempFiles(); |
438 | exit(status: 1); |
439 | } |
440 | |
441 | FileBuf = std::move(*MB); |
442 | ParsingBuf = FileBuf->getBuffer(); |
443 | Col = 0; |
444 | Line = 1; |
445 | return PI.ReturnCode; |
446 | } |
447 | |
448 | Error DataAggregator::preprocessProfile(BinaryContext &BC) { |
449 | this->BC = &BC; |
450 | |
451 | auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) { |
452 | errs() << "PERF-ERROR: return code "<< ReturnCode << "\n"<< ErrBuf; |
453 | exit(status: 1); |
454 | }; |
455 | |
456 | auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) { |
457 | Regex NoData("Samples for '.*' event do not have ADDR attribute set. " |
458 | "Cannot print 'addr' field."); |
459 | if (!NoData.match(String: ErrBuf)) |
460 | ErrorCallback(ReturnCode, ErrBuf); |
461 | }; |
462 | |
463 | if (opts::ReadPreAggregated) { |
464 | parsePreAggregated(); |
465 | goto heatmap; |
466 | } |
467 | |
468 | if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) { |
469 | outs() << "BOLT-INFO: binary build-id is: "<< *FileBuildID << "\n"; |
470 | processFileBuildID(FileBuildID: *FileBuildID); |
471 | } else { |
472 | errs() << "BOLT-WARNING: build-id will not be checked because we could " |
473 | "not read one from input binary\n"; |
474 | } |
475 | |
476 | if (BC.IsLinuxKernel) { |
477 | // Current MMap parsing logic does not work with linux kernel. |
478 | // MMap entries for linux kernel uses PERF_RECORD_MMAP |
479 | // format instead of typical PERF_RECORD_MMAP2 format. |
480 | // Since linux kernel address mapping is absolute (same as |
481 | // in the ELF file), we avoid parsing MMap in linux kernel mode. |
482 | // While generating optimized linux kernel binary, we may need |
483 | // to parse MMap entries. |
484 | |
485 | // In linux kernel mode, we analyze and optimize |
486 | // all linux kernel binary instructions, irrespective |
487 | // of whether they are due to system calls or due to |
488 | // interrupts. Therefore, we cannot ignore interrupt |
489 | // in Linux kernel mode. |
490 | opts::IgnoreInterruptLBR = false; |
491 | } else { |
492 | prepareToParse(Name: "mmap events", Process&: MMapEventsPPI, Callback: ErrorCallback); |
493 | if (parseMMapEvents()) |
494 | errs() << "PERF2BOLT: failed to parse mmap events\n"; |
495 | } |
496 | |
497 | prepareToParse(Name: "task events", Process&: TaskEventsPPI, Callback: ErrorCallback); |
498 | if (parseTaskEvents()) |
499 | errs() << "PERF2BOLT: failed to parse task events\n"; |
500 | |
501 | filterBinaryMMapInfo(); |
502 | prepareToParse(Name: "events", Process&: MainEventsPPI, Callback: ErrorCallback); |
503 | |
504 | if ((!opts::BasicAggregation && parseBranchEvents()) || |
505 | (opts::BasicAggregation && parseBasicEvents())) |
506 | errs() << "PERF2BOLT: failed to parse samples\n"; |
507 | |
508 | // Special handling for memory events |
509 | if (!prepareToParse(Name: "mem events", Process&: MemEventsPPI, Callback: MemEventsErrorCallback)) |
510 | if (const std::error_code EC = parseMemEvents()) |
511 | errs() << "PERF2BOLT: failed to parse memory events: "<< EC.message() |
512 | << '\n'; |
513 | |
514 | deleteTempFiles(); |
515 | |
516 | heatmap: |
517 | if (!opts::HeatmapMode) |
518 | return Error::success(); |
519 | |
520 | if (std::error_code EC = printLBRHeatMap()) |
521 | return errorCodeToError(EC); |
522 | |
523 | if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Optional) |
524 | return Error::success(); |
525 | |
526 | assert(opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive); |
527 | exit(status: 0); |
528 | } |
529 | |
530 | Error DataAggregator::readProfile(BinaryContext &BC) { |
531 | processProfile(BC); |
532 | |
533 | for (auto &BFI : BC.getBinaryFunctions()) { |
534 | BinaryFunction &Function = BFI.second; |
535 | convertBranchData(BF&: Function); |
536 | } |
537 | |
538 | if (opts::AggregateOnly) { |
539 | if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata) |
540 | if (std::error_code EC = writeAggregatedFile(OutputFilename: opts::OutputFilename)) |
541 | report_error(Message: "cannot create output data file", EC); |
542 | |
543 | // BAT YAML is handled by DataAggregator since normal YAML output requires |
544 | // CFG which is not available in BAT mode. |
545 | if (usesBAT()) { |
546 | if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) |
547 | if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::OutputFilename)) |
548 | report_error(Message: "cannot create output data file", EC); |
549 | if (!opts::SaveProfile.empty()) |
550 | if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::SaveProfile)) |
551 | report_error(Message: "cannot create output data file", EC); |
552 | } |
553 | } |
554 | |
555 | return Error::success(); |
556 | } |
557 | |
558 | bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { |
559 | return Function.hasProfileAvailable(); |
560 | } |
561 | |
562 | void DataAggregator::processProfile(BinaryContext &BC) { |
563 | if (opts::BasicAggregation) |
564 | processBasicEvents(); |
565 | else |
566 | processBranchEvents(); |
567 | |
568 | processMemEvents(); |
569 | |
570 | // Mark all functions with registered events as having a valid profile. |
571 | for (auto &BFI : BC.getBinaryFunctions()) { |
572 | BinaryFunction &BF = BFI.second; |
573 | if (FuncBranchData *FBD = getBranchData(BF)) { |
574 | BF.markProfiled(Flags: BinaryFunction::PF_BRANCH); |
575 | BF.RawSampleCount = FBD->getNumExecutedBranches(); |
576 | } else if (FuncBasicSampleData *FSD = |
577 | getFuncBasicSampleData(FuncNames: BF.getNames())) { |
578 | BF.markProfiled(Flags: BinaryFunction::PF_BASIC); |
579 | BF.RawSampleCount = FSD->getSamples(); |
580 | } |
581 | } |
582 | |
583 | for (auto &FuncBranches : NamesToBranches) { |
584 | llvm::stable_sort(Range&: FuncBranches.second.Data); |
585 | llvm::stable_sort(Range&: FuncBranches.second.EntryData); |
586 | } |
587 | |
588 | for (auto &MemEvents : NamesToMemEvents) |
589 | llvm::stable_sort(Range&: MemEvents.second.Data); |
590 | |
591 | // Release intermediate storage. |
592 | clear(Container&: BranchLBRs); |
593 | clear(Container&: FallthroughLBRs); |
594 | clear(Container&: BasicSamples); |
595 | clear(Container&: MemSamples); |
596 | } |
597 | |
598 | BinaryFunction * |
599 | DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { |
600 | if (!BC->containsAddress(Address)) |
601 | return nullptr; |
602 | |
603 | return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, |
604 | /*UseMaxSize=*/true); |
605 | } |
606 | |
607 | BinaryFunction * |
608 | DataAggregator::getBATParentFunction(const BinaryFunction &Func) const { |
609 | if (BAT) |
610 | if (const uint64_t HotAddr = BAT->fetchParentAddress(Address: Func.getAddress())) |
611 | return getBinaryFunctionContainingAddress(Address: HotAddr); |
612 | return nullptr; |
613 | } |
614 | |
615 | StringRef DataAggregator::getLocationName(const BinaryFunction &Func, |
616 | bool BAT) { |
617 | if (!BAT) |
618 | return Func.getOneName(); |
619 | |
620 | const BinaryFunction *OrigFunc = &Func; |
621 | // If it is a local function, prefer the name containing the file name where |
622 | // the local function was declared |
623 | for (StringRef AlternativeName : OrigFunc->getNames()) { |
624 | size_t FileNameIdx = AlternativeName.find(C: '/'); |
625 | // Confirm the alternative name has the pattern Symbol/FileName/1 before |
626 | // using it |
627 | if (FileNameIdx == StringRef::npos || |
628 | AlternativeName.find(C: '/', From: FileNameIdx + 1) == StringRef::npos) |
629 | continue; |
630 | return AlternativeName; |
631 | } |
632 | return OrigFunc->getOneName(); |
633 | } |
634 | |
635 | bool DataAggregator::doBasicSample(BinaryFunction &OrigFunc, uint64_t Address, |
636 | uint64_t Count) { |
637 | // To record executed bytes, use basic block size as is regardless of BAT. |
638 | uint64_t BlockSize = 0; |
639 | if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset( |
640 | Offset: Address - OrigFunc.getAddress())) |
641 | BlockSize = BB->getOriginalSize(); |
642 | |
643 | BinaryFunction *ParentFunc = getBATParentFunction(Func: OrigFunc); |
644 | BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; |
645 | // Attach executed bytes to parent function in case of cold fragment. |
646 | Func.SampleCountInBytes += Count * BlockSize; |
647 | |
648 | auto I = NamesToBasicSamples.find(x: Func.getOneName()); |
649 | if (I == NamesToBasicSamples.end()) { |
650 | bool Success; |
651 | StringRef LocName = getLocationName(Func, BAT); |
652 | std::tie(args&: I, args&: Success) = NamesToBasicSamples.insert(x: std::make_pair( |
653 | x: Func.getOneName(), |
654 | y: FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy()))); |
655 | } |
656 | |
657 | Address -= Func.getAddress(); |
658 | if (BAT) |
659 | Address = BAT->translate(FuncAddress: Func.getAddress(), Offset: Address, /*IsBranchSrc=*/false); |
660 | |
661 | I->second.bumpCount(Offset: Address, Count); |
662 | return true; |
663 | } |
664 | |
665 | bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, |
666 | uint64_t To, uint64_t Count, |
667 | uint64_t Mispreds) { |
668 | FuncBranchData *AggrData = getBranchData(BF: Func); |
669 | if (!AggrData) { |
670 | AggrData = &NamesToBranches[Func.getOneName()]; |
671 | AggrData->Name = getLocationName(Func, BAT); |
672 | setBranchData(BF: Func, FBD: AggrData); |
673 | } |
674 | |
675 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " |
676 | << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To)); |
677 | AggrData->bumpBranchCount(OffsetFrom: From, OffsetTo: To, Count, Mispreds); |
678 | return true; |
679 | } |
680 | |
681 | bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, |
682 | BinaryFunction *ToFunc, uint64_t From, |
683 | uint64_t To, uint64_t Count, |
684 | uint64_t Mispreds) { |
685 | FuncBranchData *FromAggrData = nullptr; |
686 | FuncBranchData *ToAggrData = nullptr; |
687 | StringRef SrcFunc; |
688 | StringRef DstFunc; |
689 | if (FromFunc) { |
690 | SrcFunc = getLocationName(Func: *FromFunc, BAT); |
691 | FromAggrData = getBranchData(BF: *FromFunc); |
692 | if (!FromAggrData) { |
693 | FromAggrData = &NamesToBranches[FromFunc->getOneName()]; |
694 | FromAggrData->Name = SrcFunc; |
695 | setBranchData(BF: *FromFunc, FBD: FromAggrData); |
696 | } |
697 | |
698 | recordExit(BF&: *FromFunc, From, Mispred: Mispreds, Count); |
699 | } |
700 | if (ToFunc) { |
701 | DstFunc = getLocationName(Func: *ToFunc, BAT); |
702 | ToAggrData = getBranchData(BF: *ToFunc); |
703 | if (!ToAggrData) { |
704 | ToAggrData = &NamesToBranches[ToFunc->getOneName()]; |
705 | ToAggrData->Name = DstFunc; |
706 | setBranchData(BF: *ToFunc, FBD: ToAggrData); |
707 | } |
708 | |
709 | recordEntry(BF&: *ToFunc, To, Mispred: Mispreds, Count); |
710 | } |
711 | |
712 | if (FromAggrData) |
713 | FromAggrData->bumpCallCount(OffsetFrom: From, To: Location(!DstFunc.empty(), DstFunc, To), |
714 | Count, Mispreds); |
715 | if (ToAggrData) |
716 | ToAggrData->bumpEntryCount(From: Location(!SrcFunc.empty(), SrcFunc, From), OffsetTo: To, |
717 | Count, Mispreds); |
718 | return true; |
719 | } |
720 | |
721 | bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, |
722 | uint64_t Mispreds) { |
723 | // Returns whether \p Offset in \p Func contains a return instruction. |
724 | auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) { |
725 | auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(Inst: *MI); }; |
726 | return Func.hasInstructions() |
727 | ? isReturn(Func.getInstructionAtOffset(Offset)) |
728 | : isReturn(Func.disassembleInstructionAtOffset(Offset)); |
729 | }; |
730 | |
731 | // Mutates \p Addr to an offset into the containing function, performing BAT |
732 | // offset translation and parent lookup. |
733 | // |
734 | // Returns the containing function (or BAT parent) and whether the address |
735 | // corresponds to a return (if \p IsFrom) or a call continuation (otherwise). |
736 | auto handleAddress = [&](uint64_t &Addr, bool IsFrom) { |
737 | BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: Addr); |
738 | if (!Func) { |
739 | Addr = 0; |
740 | return std::pair{Func, false}; |
741 | } |
742 | |
743 | Addr -= Func->getAddress(); |
744 | |
745 | bool IsRet = IsFrom && checkReturn(*Func, Addr); |
746 | |
747 | if (BAT) |
748 | Addr = BAT->translate(FuncAddress: Func->getAddress(), Offset: Addr, IsBranchSrc: IsFrom); |
749 | |
750 | if (BinaryFunction *ParentFunc = getBATParentFunction(Func: *Func)) |
751 | Func = ParentFunc; |
752 | |
753 | return std::pair{Func, IsRet}; |
754 | }; |
755 | |
756 | auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true); |
757 | auto [ToFunc, _] = handleAddress(To, /*IsFrom*/ false); |
758 | if (!FromFunc && !ToFunc) |
759 | return false; |
760 | |
761 | // Ignore returns. |
762 | if (IsReturn) |
763 | return true; |
764 | |
765 | // Treat recursive control transfers as inter-branches. |
766 | if (FromFunc == ToFunc && To != 0) { |
767 | recordBranch(BF&: *FromFunc, From, To, Count, Mispreds); |
768 | return doIntraBranch(Func&: *FromFunc, From, To, Count, Mispreds); |
769 | } |
770 | |
771 | return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); |
772 | } |
773 | |
774 | bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, |
775 | uint64_t Count) { |
776 | BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(Address: First.To); |
777 | BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Address: Second.From); |
778 | if (!FromFunc || !ToFunc) { |
779 | LLVM_DEBUG({ |
780 | dbgs() << "Out of range trace starting in "; |
781 | if (FromFunc) |
782 | dbgs() << formatv("{0} @ {1:x}", *FromFunc, |
783 | First.To - FromFunc->getAddress()); |
784 | else |
785 | dbgs() << Twine::utohexstr(First.To); |
786 | dbgs() << " and ending in "; |
787 | if (ToFunc) |
788 | dbgs() << formatv("{0} @ {1:x}", *ToFunc, |
789 | Second.From - ToFunc->getAddress()); |
790 | else |
791 | dbgs() << Twine::utohexstr(Second.From); |
792 | dbgs() << '\n'; |
793 | }); |
794 | NumLongRangeTraces += Count; |
795 | return false; |
796 | } |
797 | if (FromFunc != ToFunc) { |
798 | NumInvalidTraces += Count; |
799 | LLVM_DEBUG({ |
800 | dbgs() << "Invalid trace starting in "<< FromFunc->getPrintName() |
801 | << formatv(" @ {0:x}", First.To - FromFunc->getAddress()) |
802 | << " and ending in "<< ToFunc->getPrintName() |
803 | << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress()); |
804 | }); |
805 | return false; |
806 | } |
807 | |
808 | // Set ParentFunc to BAT parent function or FromFunc itself. |
809 | BinaryFunction *ParentFunc = getBATParentFunction(Func: *FromFunc); |
810 | if (!ParentFunc) |
811 | ParentFunc = FromFunc; |
812 | ParentFunc->SampleCountInBytes += Count * (Second.From - First.To); |
813 | |
814 | const uint64_t FuncAddress = FromFunc->getAddress(); |
815 | std::optional<BoltAddressTranslation::FallthroughListTy> FTs = |
816 | BAT && BAT->isBATFunction(Address: FuncAddress) |
817 | ? BAT->getFallthroughsInTrace(FuncAddress, From: First.To, To: Second.From) |
818 | : getFallthroughsInTrace(BF&: *FromFunc, First, Second, Count); |
819 | if (!FTs) { |
820 | LLVM_DEBUG( |
821 | dbgs() << "Invalid trace starting in "<< FromFunc->getPrintName() |
822 | << " @ "<< Twine::utohexstr(First.To - FromFunc->getAddress()) |
823 | << " and ending in "<< ToFunc->getPrintName() << " @ " |
824 | << ToFunc->getPrintName() << " @ " |
825 | << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); |
826 | NumInvalidTraces += Count; |
827 | return false; |
828 | } |
829 | |
830 | LLVM_DEBUG(dbgs() << "Processing "<< FTs->size() << " fallthroughs for " |
831 | << FromFunc->getPrintName() << ":" |
832 | << Twine::utohexstr(First.To) << " to " |
833 | << Twine::utohexstr(Second.From) << ".\n"); |
834 | for (auto [From, To] : *FTs) { |
835 | if (BAT) { |
836 | From = BAT->translate(FuncAddress: FromFunc->getAddress(), Offset: From, /*IsBranchSrc=*/true); |
837 | To = BAT->translate(FuncAddress: FromFunc->getAddress(), Offset: To, /*IsBranchSrc=*/false); |
838 | } |
839 | doIntraBranch(Func&: *ParentFunc, From, To, Count, Mispreds: false); |
840 | } |
841 | |
842 | return true; |
843 | } |
844 | |
845 | std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> |
846 | DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, |
847 | const LBREntry &FirstLBR, |
848 | const LBREntry &SecondLBR, |
849 | uint64_t Count) const { |
850 | SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches; |
851 | |
852 | BinaryContext &BC = BF.getBinaryContext(); |
853 | |
854 | // Offsets of the trace within this function. |
855 | const uint64_t From = FirstLBR.To - BF.getAddress(); |
856 | const uint64_t To = SecondLBR.From - BF.getAddress(); |
857 | |
858 | if (From > To) |
859 | return std::nullopt; |
860 | |
861 | // Accept fall-throughs inside pseudo functions (PLT/thunks). |
862 | // This check has to be above BF.empty as pseudo functions would pass it: |
863 | // pseudo => ignored => CFG not built => empty. |
864 | // If we return nullopt, trace would be reported as mismatching disassembled |
865 | // function contents which it is not. To avoid this, return an empty |
866 | // fall-through list instead. |
867 | if (BF.isPseudo()) |
868 | return Branches; |
869 | |
870 | if (!BF.isSimple()) |
871 | return std::nullopt; |
872 | |
873 | assert(BF.hasCFG() && "can only record traces in CFG state"); |
874 | |
875 | const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: From); |
876 | const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(Offset: To); |
877 | |
878 | if (!FromBB || !ToBB) |
879 | return std::nullopt; |
880 | |
881 | // Adjust FromBB if the first LBR is a return from the last instruction in |
882 | // the previous block (that instruction should be a call). |
883 | if (From == FromBB->getOffset() && !BF.containsAddress(PC: FirstLBR.From) && |
884 | !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { |
885 | const BinaryBasicBlock *PrevBB = |
886 | BF.getLayout().getBlock(Index: FromBB->getIndex() - 1); |
887 | if (PrevBB->getSuccessor(Label: FromBB->getLabel())) { |
888 | const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); |
889 | if (Instr && BC.MIB->isCall(Inst: *Instr)) |
890 | FromBB = PrevBB; |
891 | else |
892 | LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): "<< FirstLBR |
893 | << '\n'); |
894 | } else { |
895 | LLVM_DEBUG(dbgs() << "invalid incoming LBR: "<< FirstLBR << '\n'); |
896 | } |
897 | } |
898 | |
899 | // Fill out information for fall-through edges. The From and To could be |
900 | // within the same basic block, e.g. when two call instructions are in the |
901 | // same block. In this case we skip the processing. |
902 | if (FromBB == ToBB) |
903 | return Branches; |
904 | |
905 | // Process blocks in the original layout order. |
906 | BinaryBasicBlock *BB = BF.getLayout().getBlock(Index: FromBB->getIndex()); |
907 | assert(BB == FromBB && "index mismatch"); |
908 | while (BB != ToBB) { |
909 | BinaryBasicBlock *NextBB = BF.getLayout().getBlock(Index: BB->getIndex() + 1); |
910 | assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); |
911 | |
912 | // Check for bad LBRs. |
913 | if (!BB->getSuccessor(Label: NextBB->getLabel())) { |
914 | LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" |
915 | << " "<< FirstLBR << '\n' |
916 | << " "<< SecondLBR << '\n'); |
917 | return std::nullopt; |
918 | } |
919 | |
920 | const MCInst *Instr = BB->getLastNonPseudoInstr(); |
921 | uint64_t Offset = 0; |
922 | if (Instr) |
923 | Offset = BC.MIB->getOffsetWithDefault(Inst: *Instr, Default: 0); |
924 | else |
925 | Offset = BB->getOffset(); |
926 | |
927 | Branches.emplace_back(Args&: Offset, Args: NextBB->getOffset()); |
928 | |
929 | BB = NextBB; |
930 | } |
931 | |
932 | // Record fall-through jumps |
933 | for (const auto &[FromOffset, ToOffset] : Branches) { |
934 | BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: FromOffset); |
935 | BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(Offset: ToOffset); |
936 | assert(FromBB && ToBB); |
937 | BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(Succ: *ToBB); |
938 | BI.Count += Count; |
939 | } |
940 | |
941 | return Branches; |
942 | } |
943 | |
944 | bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, |
945 | uint64_t Count) const { |
946 | if (To > BF.getSize()) |
947 | return false; |
948 | |
949 | if (!BF.hasProfile()) |
950 | BF.ExecutionCount = 0; |
951 | |
952 | BinaryBasicBlock *EntryBB = nullptr; |
953 | if (To == 0) { |
954 | BF.ExecutionCount += Count; |
955 | if (!BF.empty()) |
956 | EntryBB = &BF.front(); |
957 | } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(Offset: To)) { |
958 | if (BB->isEntryPoint()) |
959 | EntryBB = BB; |
960 | } |
961 | |
962 | if (EntryBB) |
963 | EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); |
964 | |
965 | return true; |
966 | } |
967 | |
968 | bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, |
969 | uint64_t Count) const { |
970 | if (!BF.isSimple() || From > BF.getSize()) |
971 | return false; |
972 | |
973 | if (!BF.hasProfile()) |
974 | BF.ExecutionCount = 0; |
975 | |
976 | return true; |
977 | } |
978 | |
979 | ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() { |
980 | LBREntry Res; |
981 | ErrorOr<StringRef> FromStrRes = parseString(EndChar: '/'); |
982 | if (std::error_code EC = FromStrRes.getError()) |
983 | return EC; |
984 | StringRef OffsetStr = FromStrRes.get(); |
985 | if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.From)) { |
986 | reportError(ErrorMsg: "expected hexadecimal number with From address"); |
987 | Diag << "Found: "<< OffsetStr << "\n"; |
988 | return make_error_code(E: llvm::errc::io_error); |
989 | } |
990 | |
991 | ErrorOr<StringRef> ToStrRes = parseString(EndChar: '/'); |
992 | if (std::error_code EC = ToStrRes.getError()) |
993 | return EC; |
994 | OffsetStr = ToStrRes.get(); |
995 | if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.To)) { |
996 | reportError(ErrorMsg: "expected hexadecimal number with To address"); |
997 | Diag << "Found: "<< OffsetStr << "\n"; |
998 | return make_error_code(E: llvm::errc::io_error); |
999 | } |
1000 | |
1001 | ErrorOr<StringRef> MispredStrRes = parseString(EndChar: '/'); |
1002 | if (std::error_code EC = MispredStrRes.getError()) |
1003 | return EC; |
1004 | StringRef MispredStr = MispredStrRes.get(); |
1005 | if (MispredStr.size() != 1 || |
1006 | (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { |
1007 | reportError(ErrorMsg: "expected single char for mispred bit"); |
1008 | Diag << "Found: "<< MispredStr << "\n"; |
1009 | return make_error_code(E: llvm::errc::io_error); |
1010 | } |
1011 | Res.Mispred = MispredStr[0] == 'M'; |
1012 | |
1013 | static bool MispredWarning = true; |
1014 | if (MispredStr[0] == '-' && MispredWarning) { |
1015 | errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; |
1016 | MispredWarning = false; |
1017 | } |
1018 | |
1019 | ErrorOr<StringRef> Rest = parseString(EndChar: FieldSeparator, EndNl: true); |
1020 | if (std::error_code EC = Rest.getError()) |
1021 | return EC; |
1022 | if (Rest.get().size() < 5) { |
1023 | reportError(ErrorMsg: "expected rest of LBR entry"); |
1024 | Diag << "Found: "<< Rest.get() << "\n"; |
1025 | return make_error_code(E: llvm::errc::io_error); |
1026 | } |
1027 | return Res; |
1028 | } |
1029 | |
1030 | bool DataAggregator::checkAndConsumeFS() { |
1031 | if (ParsingBuf[0] != FieldSeparator) |
1032 | return false; |
1033 | |
1034 | ParsingBuf = ParsingBuf.drop_front(N: 1); |
1035 | Col += 1; |
1036 | return true; |
1037 | } |
1038 | |
1039 | void DataAggregator::consumeRestOfLine() { |
1040 | size_t LineEnd = ParsingBuf.find_first_of(C: '\n'); |
1041 | if (LineEnd == StringRef::npos) { |
1042 | ParsingBuf = StringRef(); |
1043 | Col = 0; |
1044 | Line += 1; |
1045 | return; |
1046 | } |
1047 | ParsingBuf = ParsingBuf.drop_front(N: LineEnd + 1); |
1048 | Col = 0; |
1049 | Line += 1; |
1050 | } |
1051 | |
1052 | bool DataAggregator::checkNewLine() { |
1053 | return ParsingBuf[0] == '\n'; |
1054 | } |
1055 | |
1056 | ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { |
1057 | PerfBranchSample Res; |
1058 | |
1059 | while (checkAndConsumeFS()) { |
1060 | } |
1061 | |
1062 | ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true); |
1063 | if (std::error_code EC = PIDRes.getError()) |
1064 | return EC; |
1065 | auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes); |
1066 | if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) { |
1067 | consumeRestOfLine(); |
1068 | return make_error_code(E: errc::no_such_process); |
1069 | } |
1070 | |
1071 | if (checkAndConsumeNewLine()) |
1072 | return Res; |
1073 | |
1074 | while (!checkAndConsumeNewLine()) { |
1075 | checkAndConsumeFS(); |
1076 | |
1077 | ErrorOr<LBREntry> LBRRes = parseLBREntry(); |
1078 | if (std::error_code EC = LBRRes.getError()) |
1079 | return EC; |
1080 | LBREntry LBR = LBRRes.get(); |
1081 | if (ignoreKernelInterrupt(LBR)) |
1082 | continue; |
1083 | if (!BC->HasFixedLoadAddress) |
1084 | adjustLBR(LBR, MMI: MMapInfoIter->second); |
1085 | Res.LBR.push_back(Elt: LBR); |
1086 | } |
1087 | |
1088 | return Res; |
1089 | } |
1090 | |
1091 | ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { |
1092 | while (checkAndConsumeFS()) { |
1093 | } |
1094 | |
1095 | ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true); |
1096 | if (std::error_code EC = PIDRes.getError()) |
1097 | return EC; |
1098 | |
1099 | auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes); |
1100 | if (MMapInfoIter == BinaryMMapInfo.end()) { |
1101 | consumeRestOfLine(); |
1102 | return PerfBasicSample{.EventName: StringRef(), .PC: 0}; |
1103 | } |
1104 | |
1105 | while (checkAndConsumeFS()) { |
1106 | } |
1107 | |
1108 | ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator); |
1109 | if (std::error_code EC = Event.getError()) |
1110 | return EC; |
1111 | |
1112 | while (checkAndConsumeFS()) { |
1113 | } |
1114 | |
1115 | ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator, EndNl: true); |
1116 | if (std::error_code EC = AddrRes.getError()) |
1117 | return EC; |
1118 | |
1119 | if (!checkAndConsumeNewLine()) { |
1120 | reportError(ErrorMsg: "expected end of line"); |
1121 | return make_error_code(E: llvm::errc::io_error); |
1122 | } |
1123 | |
1124 | uint64_t Address = *AddrRes; |
1125 | if (!BC->HasFixedLoadAddress) |
1126 | adjustAddress(Address, MMI: MMapInfoIter->second); |
1127 | |
1128 | return PerfBasicSample{.EventName: Event.get(), .PC: Address}; |
1129 | } |
1130 | |
1131 | ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { |
1132 | PerfMemSample Res{.PC: 0, .Addr: 0}; |
1133 | |
1134 | while (checkAndConsumeFS()) { |
1135 | } |
1136 | |
1137 | ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true); |
1138 | if (std::error_code EC = PIDRes.getError()) |
1139 | return EC; |
1140 | |
1141 | auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes); |
1142 | if (MMapInfoIter == BinaryMMapInfo.end()) { |
1143 | consumeRestOfLine(); |
1144 | return Res; |
1145 | } |
1146 | |
1147 | while (checkAndConsumeFS()) { |
1148 | } |
1149 | |
1150 | ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator); |
1151 | if (std::error_code EC = Event.getError()) |
1152 | return EC; |
1153 | if (!Event.get().contains(Other: "mem-loads")) { |
1154 | consumeRestOfLine(); |
1155 | return Res; |
1156 | } |
1157 | |
1158 | while (checkAndConsumeFS()) { |
1159 | } |
1160 | |
1161 | ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator); |
1162 | if (std::error_code EC = AddrRes.getError()) |
1163 | return EC; |
1164 | |
1165 | while (checkAndConsumeFS()) { |
1166 | } |
1167 | |
1168 | ErrorOr<uint64_t> PCRes = parseHexField(EndChar: FieldSeparator, EndNl: true); |
1169 | if (std::error_code EC = PCRes.getError()) { |
1170 | consumeRestOfLine(); |
1171 | return EC; |
1172 | } |
1173 | |
1174 | if (!checkAndConsumeNewLine()) { |
1175 | reportError(ErrorMsg: "expected end of line"); |
1176 | return make_error_code(E: llvm::errc::io_error); |
1177 | } |
1178 | |
1179 | uint64_t Address = *AddrRes; |
1180 | if (!BC->HasFixedLoadAddress) |
1181 | adjustAddress(Address, MMI: MMapInfoIter->second); |
1182 | |
1183 | return PerfMemSample{.PC: PCRes.get(), .Addr: Address}; |
1184 | } |
1185 | |
1186 | ErrorOr<Location> DataAggregator::parseLocationOrOffset() { |
1187 | auto parseOffset = [this]() -> ErrorOr<Location> { |
1188 | ErrorOr<uint64_t> Res = parseHexField(EndChar: FieldSeparator); |
1189 | if (std::error_code EC = Res.getError()) |
1190 | return EC; |
1191 | return Location(Res.get()); |
1192 | }; |
1193 | |
1194 | size_t Sep = ParsingBuf.find_first_of(Chars: " \n"); |
1195 | if (Sep == StringRef::npos) |
1196 | return parseOffset(); |
1197 | StringRef LookAhead = ParsingBuf.substr(Start: 0, N: Sep); |
1198 | if (!LookAhead.contains(C: ':')) |
1199 | return parseOffset(); |
1200 | |
1201 | ErrorOr<StringRef> BuildID = parseString(EndChar: ':'); |
1202 | if (std::error_code EC = BuildID.getError()) |
1203 | return EC; |
1204 | ErrorOr<uint64_t> Offset = parseHexField(EndChar: FieldSeparator); |
1205 | if (std::error_code EC = Offset.getError()) |
1206 | return EC; |
1207 | return Location(true, BuildID.get(), Offset.get()); |
1208 | } |
1209 | |
1210 | std::error_code DataAggregator::parseAggregatedLBREntry() { |
1211 | enum AggregatedLBREntry : char { |
1212 | INVALID = 0, |
1213 | EVENT_NAME, // E |
1214 | TRACE, // T |
1215 | SAMPLE, // S |
1216 | BRANCH, // B |
1217 | FT, // F |
1218 | FT_EXTERNAL_ORIGIN // f |
1219 | } Type = INVALID; |
1220 | |
1221 | // The number of fields to parse, set based on Type. |
1222 | int AddrNum = 0; |
1223 | int CounterNum = 0; |
1224 | // Storage for parsed fields. |
1225 | StringRef EventName; |
1226 | std::optional<Location> Addr[3]; |
1227 | int64_t Counters[2] = {0}; |
1228 | |
1229 | while (Type == INVALID || Type == EVENT_NAME) { |
1230 | while (checkAndConsumeFS()) { |
1231 | } |
1232 | ErrorOr<StringRef> StrOrErr = |
1233 | parseString(EndChar: FieldSeparator, EndNl: Type == EVENT_NAME); |
1234 | if (std::error_code EC = StrOrErr.getError()) |
1235 | return EC; |
1236 | StringRef Str = StrOrErr.get(); |
1237 | |
1238 | if (Type == EVENT_NAME) { |
1239 | EventName = Str; |
1240 | break; |
1241 | } |
1242 | |
1243 | Type = StringSwitch<AggregatedLBREntry>(Str) |
1244 | .Case(S: "T", Value: TRACE) |
1245 | .Case(S: "S", Value: SAMPLE) |
1246 | .Case(S: "E", Value: EVENT_NAME) |
1247 | .Case(S: "B", Value: BRANCH) |
1248 | .Case(S: "F", Value: FT) |
1249 | .Case(S: "f", Value: FT_EXTERNAL_ORIGIN) |
1250 | .Default(Value: INVALID); |
1251 | |
1252 | if (Type == INVALID) { |
1253 | reportError(ErrorMsg: "expected T, S, E, B, F or f"); |
1254 | return make_error_code(E: llvm::errc::io_error); |
1255 | } |
1256 | |
1257 | using SSI = StringSwitch<int>; |
1258 | AddrNum = SSI(Str).Case(S: "T", Value: 3).Case(S: "S", Value: 1).Case(S: "E", Value: 0).Default(Value: 2); |
1259 | CounterNum = SSI(Str).Case(S: "B", Value: 2).Case(S: "E", Value: 0).Default(Value: 1); |
1260 | } |
1261 | |
1262 | for (int I = 0; I < AddrNum; ++I) { |
1263 | while (checkAndConsumeFS()) { |
1264 | } |
1265 | ErrorOr<Location> AddrOrErr = parseLocationOrOffset(); |
1266 | if (std::error_code EC = AddrOrErr.getError()) |
1267 | return EC; |
1268 | Addr[I] = AddrOrErr.get(); |
1269 | } |
1270 | |
1271 | for (int I = 0; I < CounterNum; ++I) { |
1272 | while (checkAndConsumeFS()) { |
1273 | } |
1274 | ErrorOr<int64_t> CountOrErr = |
1275 | parseNumberField(EndChar: FieldSeparator, EndNl: I + 1 == CounterNum); |
1276 | if (std::error_code EC = CountOrErr.getError()) |
1277 | return EC; |
1278 | Counters[I] = CountOrErr.get(); |
1279 | } |
1280 | |
1281 | if (!checkAndConsumeNewLine()) { |
1282 | reportError(ErrorMsg: "expected end of line"); |
1283 | return make_error_code(E: llvm::errc::io_error); |
1284 | } |
1285 | |
1286 | if (Type == EVENT_NAME) { |
1287 | EventNames.insert(key: EventName); |
1288 | return std::error_code(); |
1289 | } |
1290 | |
1291 | const uint64_t FromOffset = Addr[0]->Offset; |
1292 | BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(Address: FromOffset); |
1293 | if (FromFunc) |
1294 | FromFunc->setHasProfileAvailable(); |
1295 | |
1296 | int64_t Count = Counters[0]; |
1297 | int64_t Mispreds = Counters[1]; |
1298 | |
1299 | if (Type == SAMPLE) { |
1300 | BasicSamples[FromOffset] += Count; |
1301 | NumTotalSamples += Count; |
1302 | return std::error_code(); |
1303 | } |
1304 | |
1305 | const uint64_t ToOffset = Addr[1]->Offset; |
1306 | BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Address: ToOffset); |
1307 | if (ToFunc) |
1308 | ToFunc->setHasProfileAvailable(); |
1309 | |
1310 | Trace Trace(FromOffset, ToOffset); |
1311 | // Taken trace |
1312 | if (Type == TRACE || Type == BRANCH) { |
1313 | TakenBranchInfo &Info = BranchLBRs[Trace]; |
1314 | Info.TakenCount += Count; |
1315 | Info.MispredCount += Mispreds; |
1316 | |
1317 | NumTotalSamples += Count; |
1318 | } |
1319 | // Construct fallthrough part of the trace |
1320 | if (Type == TRACE) { |
1321 | const uint64_t TraceFtEndOffset = Addr[2]->Offset; |
1322 | Trace.From = ToOffset; |
1323 | Trace.To = TraceFtEndOffset; |
1324 | Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN; |
1325 | } |
1326 | // Add fallthrough trace |
1327 | if (Type != BRANCH) { |
1328 | FTInfo &Info = FallthroughLBRs[Trace]; |
1329 | (Type == FT ? Info.InternCount : Info.ExternCount) += Count; |
1330 | |
1331 | NumTraces += Count; |
1332 | } |
1333 | |
1334 | return std::error_code(); |
1335 | } |
1336 | |
1337 | bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { |
1338 | return opts::IgnoreInterruptLBR && |
1339 | (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); |
1340 | } |
1341 | |
1342 | std::error_code DataAggregator::printLBRHeatMap() { |
1343 | outs() << "PERF2BOLT: parse branch events...\n"; |
1344 | NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, |
1345 | TimerGroupDesc, opts::TimeAggregator); |
1346 | |
1347 | if (BC->IsLinuxKernel) { |
1348 | opts::HeatmapMaxAddress = 0xffffffffffffffff; |
1349 | opts::HeatmapMinAddress = KernelBaseAddr; |
1350 | } |
1351 | opts::HeatmapBlockSizes &HMBS = opts::HeatmapBlock; |
1352 | Heatmap HM(HMBS[0], opts::HeatmapMinAddress, opts::HeatmapMaxAddress, |
1353 | getTextSections(BC)); |
1354 | auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t { |
1355 | if (Symbol) |
1356 | if (ErrorOr<uint64_t> SymValue = BC->getSymbolValue(Symbol: *Symbol)) |
1357 | return SymValue.get(); |
1358 | return 0; |
1359 | }; |
1360 | HM.HotStart = getSymbolValue(BC->getHotTextStartSymbol()); |
1361 | HM.HotEnd = getSymbolValue(BC->getHotTextEndSymbol()); |
1362 | |
1363 | if (!NumTotalSamples) { |
1364 | if (opts::BasicAggregation) { |
1365 | errs() << "HEATMAP-ERROR: no basic event samples detected in profile. " |
1366 | "Cannot build heatmap."; |
1367 | } else { |
1368 | errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " |
1369 | "Cannot build heatmap. Use -nl for building heatmap from " |
1370 | "basic events.\n"; |
1371 | } |
1372 | exit(status: 1); |
1373 | } |
1374 | |
1375 | outs() << "HEATMAP: building heat map...\n"; |
1376 | |
1377 | // Register basic samples and perf LBR addresses not covered by fallthroughs. |
1378 | for (const auto &[PC, Hits] : BasicSamples) |
1379 | HM.registerAddress(Address: PC, Count: Hits); |
1380 | for (const auto &LBR : FallthroughLBRs) { |
1381 | const Trace &Trace = LBR.first; |
1382 | const FTInfo &Info = LBR.second; |
1383 | HM.registerAddressRange(StartAddress: Trace.From, EndAddress: Trace.To, |
1384 | Count: Info.InternCount + Info.ExternCount); |
1385 | } |
1386 | |
1387 | if (HM.getNumInvalidRanges()) |
1388 | outs() << "HEATMAP: invalid traces: "<< HM.getNumInvalidRanges() << '\n'; |
1389 | |
1390 | if (!HM.size()) { |
1391 | errs() << "HEATMAP-ERROR: no valid traces registered\n"; |
1392 | exit(status: 1); |
1393 | } |
1394 | |
1395 | HM.print(FileName: opts::HeatmapOutput); |
1396 | if (opts::HeatmapOutput == "-") { |
1397 | HM.printCDF(FileName: opts::HeatmapOutput); |
1398 | HM.printSectionHotness(Filename: opts::HeatmapOutput); |
1399 | } else { |
1400 | HM.printCDF(FileName: opts::HeatmapOutput + ".csv"); |
1401 | HM.printSectionHotness(Filename: opts::HeatmapOutput + "-section-hotness.csv"); |
1402 | } |
1403 | // Provide coarse-grained heatmaps if requested via zoom-out scales |
1404 | for (const uint64_t NewBucketSize : ArrayRef(HMBS).drop_front()) { |
1405 | HM.resizeBucket(NewSize: NewBucketSize); |
1406 | if (opts::HeatmapOutput == "-") |
1407 | HM.print(FileName: opts::HeatmapOutput); |
1408 | else |
1409 | HM.print(FileName: formatv(Fmt: "{0}-{1}", Vals&: opts::HeatmapOutput, Vals: NewBucketSize).str()); |
1410 | } |
1411 | |
1412 | return std::error_code(); |
1413 | } |
1414 | |
1415 | void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, |
1416 | bool NeedsSkylakeFix) { |
1417 | // LBRs are stored in reverse execution order. NextLBR refers to the next |
1418 | // executed branch record. |
1419 | const LBREntry *NextLBR = nullptr; |
1420 | uint32_t NumEntry = 0; |
1421 | for (const LBREntry &LBR : Sample.LBR) { |
1422 | ++NumEntry; |
1423 | // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) |
1424 | // sometimes record entry 32 as an exact copy of entry 31. This will cause |
1425 | // us to likely record an invalid trace and generate a stale function for |
1426 | // BAT mode (non BAT disassembles the function and is able to ignore this |
1427 | // trace at aggregation time). Drop first 2 entries (last two, in |
1428 | // chronological order) |
1429 | if (NeedsSkylakeFix && NumEntry <= 2) |
1430 | continue; |
1431 | if (NextLBR) { |
1432 | // Record fall-through trace. |
1433 | const uint64_t TraceFrom = LBR.To; |
1434 | const uint64_t TraceTo = NextLBR->From; |
1435 | const BinaryFunction *TraceBF = |
1436 | getBinaryFunctionContainingAddress(Address: TraceFrom); |
1437 | FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; |
1438 | if (TraceBF && TraceBF->containsAddress(PC: LBR.From)) |
1439 | ++Info.InternCount; |
1440 | else |
1441 | ++Info.ExternCount; |
1442 | ++NumTraces; |
1443 | } |
1444 | NextLBR = &LBR; |
1445 | |
1446 | TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)]; |
1447 | ++Info.TakenCount; |
1448 | Info.MispredCount += LBR.Mispred; |
1449 | } |
1450 | // Record LBR addresses not covered by fallthroughs (bottom-of-stack source |
1451 | // and top-of-stack target) as basic samples for heatmap. |
1452 | if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive && |
1453 | !Sample.LBR.empty()) { |
1454 | ++BasicSamples[Sample.LBR.front().To]; |
1455 | ++BasicSamples[Sample.LBR.back().From]; |
1456 | } |
1457 | } |
1458 | |
1459 | void DataAggregator::printLongRangeTracesDiagnostic() const { |
1460 | outs() << "PERF2BOLT: out of range traces involving unknown regions: " |
1461 | << NumLongRangeTraces; |
1462 | if (NumTraces > 0) |
1463 | outs() << format(Fmt: " (%.1f%%)", Vals: NumLongRangeTraces * 100.0f / NumTraces); |
1464 | outs() << "\n"; |
1465 | } |
1466 | |
1467 | static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1, |
1468 | float T2) { |
1469 | if (Denominator == 0) { |
1470 | outs() << "\n"; |
1471 | return 0; |
1472 | } |
1473 | float Percent = Numerator * 100.0f / Denominator; |
1474 | outs() << " ("; |
1475 | if (outs().has_colors()) { |
1476 | if (Percent > T2) |
1477 | outs().changeColor(Color: raw_ostream::RED); |
1478 | else if (Percent > T1) |
1479 | outs().changeColor(Color: raw_ostream::YELLOW); |
1480 | else |
1481 | outs().changeColor(Color: raw_ostream::GREEN); |
1482 | } |
1483 | outs() << format(Fmt: "%.1f%%", Vals: Percent); |
1484 | if (outs().has_colors()) |
1485 | outs().resetColor(); |
1486 | outs() << ")\n"; |
1487 | return Percent; |
1488 | } |
1489 | |
1490 | void DataAggregator::printBranchSamplesDiagnostics() const { |
1491 | outs() << "PERF2BOLT: traces mismatching disassembled function contents: " |
1492 | << NumInvalidTraces; |
1493 | if (printColoredPct(Numerator: NumInvalidTraces, Denominator: NumTraces, T1: 5, T2: 10) > 10) |
1494 | outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " |
1495 | "binary is probably not the same binary used during profiling " |
1496 | "collection. The generated data may be ineffective for improving " |
1497 | "performance\n\n"; |
1498 | printLongRangeTracesDiagnostic(); |
1499 | } |
1500 | |
1501 | void DataAggregator::printBasicSamplesDiagnostics( |
1502 | uint64_t OutOfRangeSamples) const { |
1503 | outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " |
1504 | << OutOfRangeSamples; |
1505 | if (printColoredPct(Numerator: OutOfRangeSamples, Denominator: NumTotalSamples, T1: 40, T2: 60) > 80) |
1506 | outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " |
1507 | "binary is probably not the same binary used during profiling " |
1508 | "collection. The generated data may be ineffective for improving " |
1509 | "performance\n\n"; |
1510 | } |
1511 | |
1512 | void DataAggregator::printBranchStacksDiagnostics( |
1513 | uint64_t IgnoredSamples) const { |
1514 | outs() << "PERF2BOLT: ignored samples: "<< IgnoredSamples; |
1515 | if (printColoredPct(Numerator: IgnoredSamples, Denominator: NumTotalSamples, T1: 20, T2: 50) > 50) |
1516 | errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " |
1517 | "were attributed to the input binary\n"; |
1518 | } |
1519 | |
1520 | std::error_code DataAggregator::parseBranchEvents() { |
1521 | outs() << "PERF2BOLT: parse branch events...\n"; |
1522 | NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, |
1523 | TimerGroupDesc, opts::TimeAggregator); |
1524 | |
1525 | uint64_t NumEntries = 0; |
1526 | uint64_t NumSamples = 0; |
1527 | uint64_t NumSamplesNoLBR = 0; |
1528 | bool NeedsSkylakeFix = false; |
1529 | |
1530 | while (hasData() && NumTotalSamples < opts::MaxSamples) { |
1531 | ++NumTotalSamples; |
1532 | |
1533 | ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); |
1534 | if (std::error_code EC = SampleRes.getError()) { |
1535 | if (EC == errc::no_such_process) |
1536 | continue; |
1537 | return EC; |
1538 | } |
1539 | ++NumSamples; |
1540 | |
1541 | PerfBranchSample &Sample = SampleRes.get(); |
1542 | |
1543 | if (Sample.LBR.empty()) { |
1544 | ++NumSamplesNoLBR; |
1545 | continue; |
1546 | } |
1547 | |
1548 | NumEntries += Sample.LBR.size(); |
1549 | if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { |
1550 | errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; |
1551 | NeedsSkylakeFix = true; |
1552 | } |
1553 | |
1554 | parseLBRSample(Sample, NeedsSkylakeFix); |
1555 | } |
1556 | |
1557 | for (const Trace &Trace : llvm::make_first_range(c&: BranchLBRs)) |
1558 | for (const uint64_t Addr : {Trace.From, Trace.To}) |
1559 | if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Addr)) |
1560 | BF->setHasProfileAvailable(); |
1561 | |
1562 | outs() << "PERF2BOLT: read "<< NumSamples << " samples and "<< NumEntries |
1563 | << " LBR entries\n"; |
1564 | if (NumTotalSamples) { |
1565 | if (NumSamples && NumSamplesNoLBR == NumSamples) { |
1566 | // Note: we don't know if perf2bolt is being used to parse memory samples |
1567 | // at this point. In this case, it is OK to parse zero LBRs. |
1568 | errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " |
1569 | "LBR. Record profile with perf record -j any or run perf2bolt " |
1570 | "in no-LBR mode with -nl (the performance improvement in -nl " |
1571 | "mode may be limited)\n"; |
1572 | } else { |
1573 | printBranchStacksDiagnostics(IgnoredSamples: NumTotalSamples - NumSamples); |
1574 | } |
1575 | } |
1576 | |
1577 | return std::error_code(); |
1578 | } |
1579 | |
1580 | void DataAggregator::processBranchEvents() { |
1581 | outs() << "PERF2BOLT: processing branch events...\n"; |
1582 | NamedRegionTimer T("processBranch", "Processing branch events", |
1583 | TimerGroupName, TimerGroupDesc, opts::TimeAggregator); |
1584 | |
1585 | for (const auto &AggrLBR : FallthroughLBRs) { |
1586 | const Trace &Loc = AggrLBR.first; |
1587 | const FTInfo &Info = AggrLBR.second; |
1588 | LBREntry First{.From: Loc.From, .To: Loc.From, .Mispred: false}; |
1589 | LBREntry Second{.From: Loc.To, .To: Loc.To, .Mispred: false}; |
1590 | if (Info.InternCount) |
1591 | doTrace(First, Second, Count: Info.InternCount); |
1592 | if (Info.ExternCount) { |
1593 | First.From = 0; |
1594 | doTrace(First, Second, Count: Info.ExternCount); |
1595 | } |
1596 | } |
1597 | |
1598 | for (const auto &AggrLBR : BranchLBRs) { |
1599 | const Trace &Loc = AggrLBR.first; |
1600 | const TakenBranchInfo &Info = AggrLBR.second; |
1601 | doBranch(From: Loc.From, To: Loc.To, Count: Info.TakenCount, Mispreds: Info.MispredCount); |
1602 | } |
1603 | printBranchSamplesDiagnostics(); |
1604 | } |
1605 | |
1606 | std::error_code DataAggregator::parseBasicEvents() { |
1607 | outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; |
1608 | NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, |
1609 | TimerGroupDesc, opts::TimeAggregator); |
1610 | while (hasData()) { |
1611 | ErrorOr<PerfBasicSample> Sample = parseBasicSample(); |
1612 | if (std::error_code EC = Sample.getError()) |
1613 | return EC; |
1614 | |
1615 | if (!Sample->PC) |
1616 | continue; |
1617 | ++NumTotalSamples; |
1618 | |
1619 | if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC)) |
1620 | BF->setHasProfileAvailable(); |
1621 | |
1622 | ++BasicSamples[Sample->PC]; |
1623 | EventNames.insert(key: Sample->EventName); |
1624 | } |
1625 | outs() << "PERF2BOLT: read "<< NumTotalSamples << " basic samples\n"; |
1626 | |
1627 | return std::error_code(); |
1628 | } |
1629 | |
1630 | void DataAggregator::processBasicEvents() { |
1631 | outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; |
1632 | NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, |
1633 | TimerGroupDesc, opts::TimeAggregator); |
1634 | uint64_t OutOfRangeSamples = 0; |
1635 | for (auto &Sample : BasicSamples) { |
1636 | const uint64_t PC = Sample.first; |
1637 | const uint64_t HitCount = Sample.second; |
1638 | BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC); |
1639 | if (!Func) { |
1640 | OutOfRangeSamples += HitCount; |
1641 | continue; |
1642 | } |
1643 | |
1644 | doBasicSample(OrigFunc&: *Func, Address: PC, Count: HitCount); |
1645 | } |
1646 | |
1647 | printBasicSamplesDiagnostics(OutOfRangeSamples); |
1648 | } |
1649 | |
1650 | std::error_code DataAggregator::parseMemEvents() { |
1651 | outs() << "PERF2BOLT: parsing memory events...\n"; |
1652 | NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, |
1653 | TimerGroupDesc, opts::TimeAggregator); |
1654 | while (hasData()) { |
1655 | ErrorOr<PerfMemSample> Sample = parseMemSample(); |
1656 | if (std::error_code EC = Sample.getError()) |
1657 | return EC; |
1658 | |
1659 | if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC)) |
1660 | BF->setHasProfileAvailable(); |
1661 | |
1662 | MemSamples.emplace_back(args: std::move(Sample.get())); |
1663 | } |
1664 | |
1665 | return std::error_code(); |
1666 | } |
1667 | |
1668 | void DataAggregator::processMemEvents() { |
1669 | NamedRegionTimer T("ProcessMemEvents", "Processing mem events", |
1670 | TimerGroupName, TimerGroupDesc, opts::TimeAggregator); |
1671 | for (const PerfMemSample &Sample : MemSamples) { |
1672 | uint64_t PC = Sample.PC; |
1673 | uint64_t Addr = Sample.Addr; |
1674 | StringRef FuncName; |
1675 | StringRef MemName; |
1676 | |
1677 | // Try to resolve symbol for PC |
1678 | BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC); |
1679 | if (!Func) { |
1680 | LLVM_DEBUG(if (PC != 0) { |
1681 | dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr); |
1682 | }); |
1683 | continue; |
1684 | } |
1685 | |
1686 | FuncName = Func->getOneName(); |
1687 | PC -= Func->getAddress(); |
1688 | |
1689 | // Try to resolve symbol for memory load |
1690 | if (BinaryData *BD = BC->getBinaryDataContainingAddress(Address: Addr)) { |
1691 | MemName = BD->getName(); |
1692 | Addr -= BD->getAddress(); |
1693 | } else if (opts::FilterMemProfile) { |
1694 | // Filter out heap/stack accesses |
1695 | continue; |
1696 | } |
1697 | |
1698 | const Location FuncLoc(!FuncName.empty(), FuncName, PC); |
1699 | const Location AddrLoc(!MemName.empty(), MemName, Addr); |
1700 | |
1701 | FuncMemData *MemData = &NamesToMemEvents[FuncName]; |
1702 | MemData->Name = FuncName; |
1703 | setMemData(BF: *Func, FMD: MemData); |
1704 | MemData->update(Offset: FuncLoc, Addr: AddrLoc); |
1705 | LLVM_DEBUG(dbgs() << "Mem event: "<< FuncLoc << " = "<< AddrLoc << "\n"); |
1706 | } |
1707 | } |
1708 | |
1709 | std::error_code DataAggregator::parsePreAggregatedLBRSamples() { |
1710 | outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; |
1711 | NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", |
1712 | TimerGroupName, TimerGroupDesc, opts::TimeAggregator); |
1713 | size_t AggregatedLBRs = 0; |
1714 | while (hasData()) { |
1715 | if (std::error_code EC = parseAggregatedLBREntry()) |
1716 | return EC; |
1717 | ++AggregatedLBRs; |
1718 | } |
1719 | |
1720 | outs() << "PERF2BOLT: read "<< AggregatedLBRs << " aggregated LBR entries\n"; |
1721 | |
1722 | return std::error_code(); |
1723 | } |
1724 | |
1725 | std::optional<int32_t> DataAggregator::parseCommExecEvent() { |
1726 | size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n"); |
1727 | if (LineEnd == StringRef::npos) { |
1728 | reportError(ErrorMsg: "expected rest of line"); |
1729 | Diag << "Found: "<< ParsingBuf << "\n"; |
1730 | return std::nullopt; |
1731 | } |
1732 | StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd); |
1733 | |
1734 | size_t Pos = Line.find(Str: "PERF_RECORD_COMM exec"); |
1735 | if (Pos == StringRef::npos) |
1736 | return std::nullopt; |
1737 | Line = Line.drop_front(N: Pos); |
1738 | |
1739 | // Line: |
1740 | // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" |
1741 | StringRef PIDStr = Line.rsplit(Separator: ':').second.split(Separator: '/').first; |
1742 | int32_t PID; |
1743 | if (PIDStr.getAsInteger(Radix: 10, Result&: PID)) { |
1744 | reportError(ErrorMsg: "expected PID"); |
1745 | Diag << "Found: "<< PIDStr << "in '"<< Line << "'\n"; |
1746 | return std::nullopt; |
1747 | } |
1748 | |
1749 | return PID; |
1750 | } |
1751 | |
1752 | namespace { |
1753 | std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) { |
1754 | const StringRef SecTimeStr = TimeStr.split(Separator: '.').first; |
1755 | const StringRef USecTimeStr = TimeStr.split(Separator: '.').second; |
1756 | uint64_t SecTime; |
1757 | uint64_t USecTime; |
1758 | if (SecTimeStr.getAsInteger(Radix: 10, Result&: SecTime) || |
1759 | USecTimeStr.getAsInteger(Radix: 10, Result&: USecTime)) |
1760 | return std::nullopt; |
1761 | return SecTime * 1000000ULL + USecTime; |
1762 | } |
1763 | } |
1764 | |
1765 | std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { |
1766 | while (checkAndConsumeFS()) { |
1767 | } |
1768 | |
1769 | size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n"); |
1770 | if (LineEnd == StringRef::npos) { |
1771 | reportError(ErrorMsg: "expected rest of line"); |
1772 | Diag << "Found: "<< ParsingBuf << "\n"; |
1773 | return std::nullopt; |
1774 | } |
1775 | StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd); |
1776 | |
1777 | size_t Pos = Line.find(Str: "PERF_RECORD_FORK"); |
1778 | if (Pos == StringRef::npos) { |
1779 | consumeRestOfLine(); |
1780 | return std::nullopt; |
1781 | } |
1782 | |
1783 | ForkInfo FI; |
1784 | |
1785 | const StringRef TimeStr = |
1786 | Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second; |
1787 | if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { |
1788 | FI.Time = *TimeRes; |
1789 | } |
1790 | |
1791 | Line = Line.drop_front(N: Pos); |
1792 | |
1793 | // Line: |
1794 | // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) |
1795 | const StringRef ChildPIDStr = Line.split(Separator: '(').second.split(Separator: ':').first; |
1796 | if (ChildPIDStr.getAsInteger(Radix: 10, Result&: FI.ChildPID)) { |
1797 | reportError(ErrorMsg: "expected PID"); |
1798 | Diag << "Found: "<< ChildPIDStr << "in '"<< Line << "'\n"; |
1799 | return std::nullopt; |
1800 | } |
1801 | |
1802 | const StringRef ParentPIDStr = Line.rsplit(Separator: '(').second.split(Separator: ':').first; |
1803 | if (ParentPIDStr.getAsInteger(Radix: 10, Result&: FI.ParentPID)) { |
1804 | reportError(ErrorMsg: "expected PID"); |
1805 | Diag << "Found: "<< ParentPIDStr << "in '"<< Line << "'\n"; |
1806 | return std::nullopt; |
1807 | } |
1808 | |
1809 | consumeRestOfLine(); |
1810 | |
1811 | return FI; |
1812 | } |
1813 | |
1814 | ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> |
1815 | DataAggregator::parseMMapEvent() { |
1816 | while (checkAndConsumeFS()) { |
1817 | } |
1818 | |
1819 | MMapInfo ParsedInfo; |
1820 | |
1821 | size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n"); |
1822 | if (LineEnd == StringRef::npos) { |
1823 | reportError(ErrorMsg: "expected rest of line"); |
1824 | Diag << "Found: "<< ParsingBuf << "\n"; |
1825 | return make_error_code(E: llvm::errc::io_error); |
1826 | } |
1827 | StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd); |
1828 | |
1829 | size_t Pos = Line.find(Str: "PERF_RECORD_MMAP2"); |
1830 | if (Pos == StringRef::npos) { |
1831 | consumeRestOfLine(); |
1832 | return std::make_pair(x: StringRef(), y&: ParsedInfo); |
1833 | } |
1834 | |
1835 | // Line: |
1836 | // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> |
1837 | |
1838 | const StringRef TimeStr = |
1839 | Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second; |
1840 | if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) |
1841 | ParsedInfo.Time = *TimeRes; |
1842 | |
1843 | Line = Line.drop_front(N: Pos); |
1844 | |
1845 | // Line: |
1846 | // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> |
1847 | |
1848 | StringRef FileName = Line.rsplit(Separator: FieldSeparator).second; |
1849 | if (FileName.starts_with(Prefix: "//") || FileName.starts_with(Prefix: "[")) { |
1850 | consumeRestOfLine(); |
1851 | return std::make_pair(x: StringRef(), y&: ParsedInfo); |
1852 | } |
1853 | FileName = sys::path::filename(path: FileName); |
1854 | |
1855 | const StringRef PIDStr = Line.split(Separator: FieldSeparator).second.split(Separator: '/').first; |
1856 | if (PIDStr.getAsInteger(Radix: 10, Result&: ParsedInfo.PID)) { |
1857 | reportError(ErrorMsg: "expected PID"); |
1858 | Diag << "Found: "<< PIDStr << "in '"<< Line << "'\n"; |
1859 | return make_error_code(E: llvm::errc::io_error); |
1860 | } |
1861 | |
1862 | const StringRef BaseAddressStr = Line.split(Separator: '[').second.split(Separator: '(').first; |
1863 | if (BaseAddressStr.getAsInteger(Radix: 0, Result&: ParsedInfo.MMapAddress)) { |
1864 | reportError(ErrorMsg: "expected base address"); |
1865 | Diag << "Found: "<< BaseAddressStr << "in '"<< Line << "'\n"; |
1866 | return make_error_code(E: llvm::errc::io_error); |
1867 | } |
1868 | |
1869 | const StringRef SizeStr = Line.split(Separator: '(').second.split(Separator: ')').first; |
1870 | if (SizeStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Size)) { |
1871 | reportError(ErrorMsg: "expected mmaped size"); |
1872 | Diag << "Found: "<< SizeStr << "in '"<< Line << "'\n"; |
1873 | return make_error_code(E: llvm::errc::io_error); |
1874 | } |
1875 | |
1876 | const StringRef OffsetStr = |
1877 | Line.split(Separator: '@').second.ltrim().split(Separator: FieldSeparator).first; |
1878 | if (OffsetStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Offset)) { |
1879 | reportError(ErrorMsg: "expected mmaped page-aligned offset"); |
1880 | Diag << "Found: "<< OffsetStr << "in '"<< Line << "'\n"; |
1881 | return make_error_code(E: llvm::errc::io_error); |
1882 | } |
1883 | |
1884 | consumeRestOfLine(); |
1885 | |
1886 | return std::make_pair(x&: FileName, y&: ParsedInfo); |
1887 | } |
1888 | |
1889 | std::error_code DataAggregator::parseMMapEvents() { |
1890 | outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; |
1891 | NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, |
1892 | TimerGroupDesc, opts::TimeAggregator); |
1893 | |
1894 | std::multimap<StringRef, MMapInfo> GlobalMMapInfo; |
1895 | while (hasData()) { |
1896 | ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); |
1897 | if (std::error_code EC = FileMMapInfoRes.getError()) |
1898 | return EC; |
1899 | |
1900 | std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); |
1901 | if (FileMMapInfo.second.PID == -1) |
1902 | continue; |
1903 | if (FileMMapInfo.first == "(deleted)") |
1904 | continue; |
1905 | |
1906 | GlobalMMapInfo.insert(x&: FileMMapInfo); |
1907 | } |
1908 | |
1909 | LLVM_DEBUG({ |
1910 | dbgs() << "FileName -> mmap info:\n" |
1911 | << " Filename : PID [MMapAddr, Size, Offset]\n"; |
1912 | for (const auto &[Name, MMap] : GlobalMMapInfo) |
1913 | dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID, |
1914 | MMap.MMapAddress, MMap.Size, MMap.Offset); |
1915 | }); |
1916 | |
1917 | StringRef NameToUse = llvm::sys::path::filename(path: BC->getFilename()); |
1918 | if (GlobalMMapInfo.count(x: NameToUse) == 0 && !BuildIDBinaryName.empty()) { |
1919 | errs() << "PERF2BOLT-WARNING: using \""<< BuildIDBinaryName |
1920 | << "\" for profile matching\n"; |
1921 | NameToUse = BuildIDBinaryName; |
1922 | } |
1923 | |
1924 | auto Range = GlobalMMapInfo.equal_range(x: NameToUse); |
1925 | for (MMapInfo &MMapInfo : llvm::make_second_range(c: make_range(p: Range))) { |
1926 | if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { |
1927 | // Check that the binary mapping matches one of the segments. |
1928 | bool MatchFound = llvm::any_of( |
1929 | Range: llvm::make_second_range(c&: BC->SegmentMapInfo), |
1930 | P: [&](SegmentInfo &SegInfo) { |
1931 | // The mapping is page-aligned and hence the MMapAddress could be |
1932 | // different from the segment start address. We cannot know the page |
1933 | // size of the mapping, but we know it should not exceed the segment |
1934 | // alignment value. Hence we are performing an approximate check. |
1935 | return SegInfo.Address >= MMapInfo.MMapAddress && |
1936 | SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment && |
1937 | SegInfo.IsExecutable; |
1938 | }); |
1939 | if (!MatchFound) { |
1940 | errs() << "PERF2BOLT-WARNING: ignoring mapping of "<< NameToUse |
1941 | << " at 0x"<< Twine::utohexstr(Val: MMapInfo.MMapAddress) << '\n'; |
1942 | continue; |
1943 | } |
1944 | } |
1945 | |
1946 | // Set base address for shared objects. |
1947 | if (!BC->HasFixedLoadAddress) { |
1948 | std::optional<uint64_t> BaseAddress = |
1949 | BC->getBaseAddressForMapping(MMapAddress: MMapInfo.MMapAddress, FileOffset: MMapInfo.Offset); |
1950 | if (!BaseAddress) { |
1951 | errs() << "PERF2BOLT-WARNING: unable to find base address of the " |
1952 | "binary when memory mapped at 0x" |
1953 | << Twine::utohexstr(Val: MMapInfo.MMapAddress) |
1954 | << " using file offset 0x"<< Twine::utohexstr(Val: MMapInfo.Offset) |
1955 | << ". Ignoring profile data for this mapping\n"; |
1956 | continue; |
1957 | } |
1958 | MMapInfo.BaseAddress = *BaseAddress; |
1959 | } |
1960 | |
1961 | // Try to add MMapInfo to the map and update its size. Large binaries may |
1962 | // span to multiple text segments, so the mapping is inserted only on the |
1963 | // first occurrence. |
1964 | if (!BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo)).second) |
1965 | assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress && |
1966 | "Base address on multiple segment mappings should match"); |
1967 | |
1968 | // Update mapping size. |
1969 | const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size; |
1970 | const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress; |
1971 | if (Size > BinaryMMapInfo[MMapInfo.PID].Size) |
1972 | BinaryMMapInfo[MMapInfo.PID].Size = Size; |
1973 | } |
1974 | |
1975 | if (BinaryMMapInfo.empty()) { |
1976 | if (errs().has_colors()) |
1977 | errs().changeColor(Color: raw_ostream::RED); |
1978 | errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" |
1979 | << BC->getFilename() << "\"."; |
1980 | if (!GlobalMMapInfo.empty()) { |
1981 | errs() << " Profile for the following binary name(s) is available:\n"; |
1982 | for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; |
1983 | I = GlobalMMapInfo.upper_bound(x: I->first)) |
1984 | errs() << " "<< I->first << '\n'; |
1985 | errs() << "Please rename the input binary.\n"; |
1986 | } else { |
1987 | errs() << " Failed to extract any binary name from a profile.\n"; |
1988 | } |
1989 | if (errs().has_colors()) |
1990 | errs().resetColor(); |
1991 | |
1992 | exit(status: 1); |
1993 | } |
1994 | |
1995 | return std::error_code(); |
1996 | } |
1997 | |
1998 | std::error_code DataAggregator::parseTaskEvents() { |
1999 | outs() << "PERF2BOLT: parsing perf-script task events output\n"; |
2000 | NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, |
2001 | TimerGroupDesc, opts::TimeAggregator); |
2002 | |
2003 | while (hasData()) { |
2004 | if (std::optional<int32_t> CommInfo = parseCommExecEvent()) { |
2005 | // Remove forked child that ran execve |
2006 | auto MMapInfoIter = BinaryMMapInfo.find(x: *CommInfo); |
2007 | if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) |
2008 | BinaryMMapInfo.erase(position: MMapInfoIter); |
2009 | consumeRestOfLine(); |
2010 | continue; |
2011 | } |
2012 | |
2013 | std::optional<ForkInfo> ForkInfo = parseForkEvent(); |
2014 | if (!ForkInfo) |
2015 | continue; |
2016 | |
2017 | if (ForkInfo->ParentPID == ForkInfo->ChildPID) |
2018 | continue; |
2019 | |
2020 | if (ForkInfo->Time == 0) { |
2021 | // Process was forked and mmaped before perf ran. In this case the child |
2022 | // should have its own mmap entry unless it was execve'd. |
2023 | continue; |
2024 | } |
2025 | |
2026 | auto MMapInfoIter = BinaryMMapInfo.find(x: ForkInfo->ParentPID); |
2027 | if (MMapInfoIter == BinaryMMapInfo.end()) |
2028 | continue; |
2029 | |
2030 | MMapInfo MMapInfo = MMapInfoIter->second; |
2031 | MMapInfo.PID = ForkInfo->ChildPID; |
2032 | MMapInfo.Forked = true; |
2033 | BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo)); |
2034 | } |
2035 | |
2036 | outs() << "PERF2BOLT: input binary is associated with " |
2037 | << BinaryMMapInfo.size() << " PID(s)\n"; |
2038 | |
2039 | LLVM_DEBUG({ |
2040 | for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo)) |
2041 | outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID, |
2042 | (MMI.Forked ? " (forked)": ""), MMI.MMapAddress, |
2043 | MMI.Size); |
2044 | }); |
2045 | |
2046 | return std::error_code(); |
2047 | } |
2048 | |
2049 | std::optional<std::pair<StringRef, StringRef>> |
2050 | DataAggregator::parseNameBuildIDPair() { |
2051 | while (checkAndConsumeFS()) { |
2052 | } |
2053 | |
2054 | ErrorOr<StringRef> BuildIDStr = parseString(EndChar: FieldSeparator, EndNl: true); |
2055 | if (std::error_code EC = BuildIDStr.getError()) |
2056 | return std::nullopt; |
2057 | |
2058 | // If one of the strings is missing, don't issue a parsing error, but still |
2059 | // do not return a value. |
2060 | consumeAllRemainingFS(); |
2061 | if (checkNewLine()) |
2062 | return std::nullopt; |
2063 | |
2064 | ErrorOr<StringRef> NameStr = parseString(EndChar: FieldSeparator, EndNl: true); |
2065 | if (std::error_code EC = NameStr.getError()) |
2066 | return std::nullopt; |
2067 | |
2068 | consumeRestOfLine(); |
2069 | return std::make_pair(x&: NameStr.get(), y&: BuildIDStr.get()); |
2070 | } |
2071 | |
2072 | bool DataAggregator::hasAllBuildIDs() { |
2073 | const StringRef SavedParsingBuf = ParsingBuf; |
2074 | |
2075 | if (!hasData()) |
2076 | return false; |
2077 | |
2078 | bool HasInvalidEntries = false; |
2079 | while (hasData()) { |
2080 | if (!parseNameBuildIDPair()) { |
2081 | HasInvalidEntries = true; |
2082 | break; |
2083 | } |
2084 | } |
2085 | |
2086 | ParsingBuf = SavedParsingBuf; |
2087 | |
2088 | return !HasInvalidEntries; |
2089 | } |
2090 | |
2091 | std::optional<StringRef> |
2092 | DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { |
2093 | const StringRef SavedParsingBuf = ParsingBuf; |
2094 | |
2095 | StringRef FileName; |
2096 | while (hasData()) { |
2097 | std::optional<std::pair<StringRef, StringRef>> IDPair = |
2098 | parseNameBuildIDPair(); |
2099 | if (!IDPair) { |
2100 | consumeRestOfLine(); |
2101 | continue; |
2102 | } |
2103 | |
2104 | if (IDPair->second.starts_with(Prefix: FileBuildID)) { |
2105 | FileName = sys::path::filename(path: IDPair->first); |
2106 | break; |
2107 | } |
2108 | } |
2109 | |
2110 | ParsingBuf = SavedParsingBuf; |
2111 | |
2112 | if (!FileName.empty()) |
2113 | return FileName; |
2114 | |
2115 | return std::nullopt; |
2116 | } |
2117 | |
2118 | std::error_code |
2119 | DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { |
2120 | std::error_code EC; |
2121 | raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); |
2122 | if (EC) |
2123 | return EC; |
2124 | |
2125 | bool WriteMemLocs = false; |
2126 | |
2127 | auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { |
2128 | if (WriteMemLocs) |
2129 | OutFile << (Loc.IsSymbol ? "4 ": "3 "); |
2130 | else |
2131 | OutFile << (Loc.IsSymbol ? "1 ": "0 "); |
2132 | OutFile << (Loc.Name.empty() ? "[unknown]": getEscapedName(Name: Loc.Name)) |
2133 | << " "<< Twine::utohexstr(Val: Loc.Offset) << FieldSeparator; |
2134 | }; |
2135 | |
2136 | uint64_t BranchValues = 0; |
2137 | uint64_t MemValues = 0; |
2138 | |
2139 | if (BAT) |
2140 | OutFile << "boltedcollection\n"; |
2141 | if (opts::BasicAggregation) { |
2142 | OutFile << "no_lbr"; |
2143 | for (const StringMapEntry<std::nullopt_t> &Entry : EventNames) |
2144 | OutFile << " "<< Entry.getKey(); |
2145 | OutFile << "\n"; |
2146 | |
2147 | for (const auto &KV : NamesToBasicSamples) { |
2148 | const FuncBasicSampleData &FSD = KV.second; |
2149 | for (const BasicSampleInfo &SI : FSD.Data) { |
2150 | writeLocation(SI.Loc); |
2151 | OutFile << SI.Hits << "\n"; |
2152 | ++BranchValues; |
2153 | } |
2154 | } |
2155 | } else { |
2156 | for (const auto &KV : NamesToBranches) { |
2157 | const FuncBranchData &FBD = KV.second; |
2158 | for (const BranchInfo &BI : FBD.Data) { |
2159 | writeLocation(BI.From); |
2160 | writeLocation(BI.To); |
2161 | OutFile << BI.Mispreds << " "<< BI.Branches << "\n"; |
2162 | ++BranchValues; |
2163 | } |
2164 | for (const BranchInfo &BI : FBD.EntryData) { |
2165 | // Do not output if source is a known symbol, since this was already |
2166 | // accounted for in the source function |
2167 | if (BI.From.IsSymbol) |
2168 | continue; |
2169 | writeLocation(BI.From); |
2170 | writeLocation(BI.To); |
2171 | OutFile << BI.Mispreds << " "<< BI.Branches << "\n"; |
2172 | ++BranchValues; |
2173 | } |
2174 | } |
2175 | |
2176 | WriteMemLocs = true; |
2177 | for (const auto &KV : NamesToMemEvents) { |
2178 | const FuncMemData &FMD = KV.second; |
2179 | for (const MemInfo &MemEvent : FMD.Data) { |
2180 | writeLocation(MemEvent.Offset); |
2181 | writeLocation(MemEvent.Addr); |
2182 | OutFile << MemEvent.Count << "\n"; |
2183 | ++MemValues; |
2184 | } |
2185 | } |
2186 | } |
2187 | |
2188 | outs() << "PERF2BOLT: wrote "<< BranchValues << " objects and "<< MemValues |
2189 | << " memory objects to "<< OutputFilename << "\n"; |
2190 | |
2191 | return std::error_code(); |
2192 | } |
2193 | |
2194 | std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, |
2195 | StringRef OutputFilename) const { |
2196 | std::error_code EC; |
2197 | raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); |
2198 | if (EC) |
2199 | return EC; |
2200 | |
2201 | yaml::bolt::BinaryProfile BP; |
2202 | |
2203 | const MCPseudoProbeDecoder *PseudoProbeDecoder = |
2204 | opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; |
2205 | |
2206 | // Fill out the header info. |
2207 | BP.Header.Version = 1; |
2208 | BP.Header.FileName = std::string(BC.getFilename()); |
2209 | std::optional<StringRef> BuildID = BC.getFileBuildID(); |
2210 | BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>"; |
2211 | BP.Header.Origin = std::string(getReaderName()); |
2212 | // Only the input binary layout order is supported. |
2213 | BP.Header.IsDFSOrder = false; |
2214 | // FIXME: Need to match hash function used to produce BAT hashes. |
2215 | BP.Header.HashFunction = HashFunction::Default; |
2216 | |
2217 | ListSeparator LS(","); |
2218 | raw_string_ostream EventNamesOS(BP.Header.EventNames); |
2219 | for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) |
2220 | EventNamesOS << LS << EventEntry.first().str(); |
2221 | |
2222 | BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_BASIC |
2223 | : BinaryFunction::PF_BRANCH; |
2224 | |
2225 | // Add probe inline tree nodes. |
2226 | YAMLProfileWriter::InlineTreeDesc InlineTree; |
2227 | if (PseudoProbeDecoder) |
2228 | std::tie(args&: BP.PseudoProbeDesc, args&: InlineTree) = |
2229 | YAMLProfileWriter::convertPseudoProbeDesc(PseudoProbeDecoder: *PseudoProbeDecoder); |
2230 | |
2231 | if (!opts::BasicAggregation) { |
2232 | // Convert profile for functions not covered by BAT |
2233 | for (auto &BFI : BC.getBinaryFunctions()) { |
2234 | BinaryFunction &Function = BFI.second; |
2235 | if (!Function.hasProfile()) |
2236 | continue; |
2237 | if (BAT->isBATFunction(Address: Function.getAddress())) |
2238 | continue; |
2239 | BP.Functions.emplace_back(args: YAMLProfileWriter::convert( |
2240 | BF: Function, /*UseDFS=*/false, InlineTree, BAT)); |
2241 | } |
2242 | |
2243 | for (const auto &KV : NamesToBranches) { |
2244 | const StringRef FuncName = KV.first; |
2245 | const FuncBranchData &Branches = KV.second; |
2246 | yaml::bolt::BinaryFunctionProfile YamlBF; |
2247 | BinaryData *BD = BC.getBinaryDataByName(Name: FuncName); |
2248 | assert(BD); |
2249 | uint64_t FuncAddress = BD->getAddress(); |
2250 | if (!BAT->isBATFunction(Address: FuncAddress)) |
2251 | continue; |
2252 | BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: FuncAddress); |
2253 | assert(BF); |
2254 | YamlBF.Name = getLocationName(Func: *BF, BAT); |
2255 | YamlBF.Id = BF->getFunctionNumber(); |
2256 | YamlBF.Hash = BAT->getBFHash(FuncOutputAddress: FuncAddress); |
2257 | YamlBF.ExecCount = BF->getKnownExecutionCount(); |
2258 | YamlBF.ExternEntryCount = BF->getExternEntryCount(); |
2259 | YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(OutputAddress: FuncAddress); |
2260 | const BoltAddressTranslation::BBHashMapTy &BlockMap = |
2261 | BAT->getBBHashMap(FuncOutputAddress: FuncAddress); |
2262 | YamlBF.Blocks.resize(new_size: YamlBF.NumBasicBlocks); |
2263 | |
2264 | for (auto &&[Entry, YamlBB] : llvm::zip(t: BlockMap, u&: YamlBF.Blocks)) { |
2265 | const auto &Block = Entry.second; |
2266 | YamlBB.Hash = Block.Hash; |
2267 | YamlBB.Index = Block.Index; |
2268 | } |
2269 | |
2270 | // Lookup containing basic block offset and index |
2271 | auto getBlock = [&BlockMap](uint32_t Offset) { |
2272 | auto BlockIt = BlockMap.upper_bound(Offset); |
2273 | if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) { |
2274 | errs() << "BOLT-ERROR: invalid BAT section\n"; |
2275 | exit(status: 1); |
2276 | } |
2277 | --BlockIt; |
2278 | return std::pair(BlockIt->first, BlockIt->second.Index); |
2279 | }; |
2280 | |
2281 | for (const BranchInfo &BI : Branches.Data) { |
2282 | using namespace yaml::bolt; |
2283 | const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset); |
2284 | BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex]; |
2285 | if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) { |
2286 | // Internal branch |
2287 | const unsigned SuccIndex = getBlock(BI.To.Offset).second; |
2288 | auto &SI = YamlBB.Successors.emplace_back(args: SuccessorInfo{.Index: SuccIndex}); |
2289 | SI.Count = BI.Branches; |
2290 | SI.Mispreds = BI.Mispreds; |
2291 | } else { |
2292 | // Call |
2293 | const uint32_t Offset = BI.From.Offset - BlockOffset; |
2294 | auto &CSI = YamlBB.CallSites.emplace_back(args: CallSiteInfo{.Offset: Offset}); |
2295 | CSI.Count = BI.Branches; |
2296 | CSI.Mispreds = BI.Mispreds; |
2297 | if (const BinaryData *BD = BC.getBinaryDataByName(Name: BI.To.Name)) |
2298 | YAMLProfileWriter::setCSIDestination(BC, CSI, Symbol: BD->getSymbol(), BAT, |
2299 | Offset: BI.To.Offset); |
2300 | } |
2301 | } |
2302 | // Set entry counts, similar to DataReader::readProfile. |
2303 | for (const BranchInfo &BI : Branches.EntryData) { |
2304 | if (!BlockMap.isInputBlock(InputOffset: BI.To.Offset)) { |
2305 | if (opts::Verbosity >= 1) |
2306 | errs() << "BOLT-WARNING: Unexpected EntryData in "<< FuncName |
2307 | << " at 0x"<< Twine::utohexstr(Val: BI.To.Offset) << '\n'; |
2308 | continue; |
2309 | } |
2310 | const unsigned BlockIndex = BlockMap.getBBIndex(BBInputOffset: BI.To.Offset); |
2311 | YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches; |
2312 | } |
2313 | if (PseudoProbeDecoder) { |
2314 | DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> |
2315 | InlineTreeNodeId; |
2316 | if (BF->getGUID()) { |
2317 | std::tie(args&: YamlBF.InlineTree, args&: InlineTreeNodeId) = |
2318 | YAMLProfileWriter::convertBFInlineTree(Decoder: *PseudoProbeDecoder, |
2319 | InlineTree, GUID: BF->getGUID()); |
2320 | } |
2321 | // Fetch probes belonging to all fragments |
2322 | const AddressProbesMap &ProbeMap = |
2323 | PseudoProbeDecoder->getAddress2ProbesMap(); |
2324 | BinaryFunction::FragmentsSetTy Fragments(BF->Fragments); |
2325 | Fragments.insert(Ptr: BF); |
2326 | DenseMap< |
2327 | uint32_t, |
2328 | std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>> |
2329 | BlockProbes; |
2330 | for (const BinaryFunction *F : Fragments) { |
2331 | const uint64_t FuncAddr = F->getAddress(); |
2332 | for (const MCDecodedPseudoProbe &Probe : |
2333 | ProbeMap.find(From: FuncAddr, To: FuncAddr + F->getSize())) { |
2334 | const uint32_t OutputAddress = Probe.getAddress(); |
2335 | const uint32_t InputOffset = BAT->translate( |
2336 | FuncAddress: FuncAddr, Offset: OutputAddress - FuncAddr, /*IsBranchSrc=*/true); |
2337 | const unsigned BlockIndex = getBlock(InputOffset).second; |
2338 | BlockProbes[BlockIndex].emplace_back(args: Probe); |
2339 | } |
2340 | } |
2341 | |
2342 | for (auto &[Block, Probes] : BlockProbes) { |
2343 | YamlBF.Blocks[Block].PseudoProbes = |
2344 | YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId); |
2345 | } |
2346 | } |
2347 | // Skip printing if there's no profile data |
2348 | llvm::erase_if( |
2349 | C&: YamlBF.Blocks, P: [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { |
2350 | auto HasCount = [](const auto &SI) { return SI.Count; }; |
2351 | bool HasAnyCount = YamlBB.ExecCount || |
2352 | llvm::any_of(Range: YamlBB.Successors, P: HasCount) || |
2353 | llvm::any_of(Range: YamlBB.CallSites, P: HasCount); |
2354 | return !HasAnyCount; |
2355 | }); |
2356 | BP.Functions.emplace_back(args&: YamlBF); |
2357 | } |
2358 | } |
2359 | |
2360 | // Write the profile. |
2361 | yaml::Output Out(OutFile, nullptr, 0); |
2362 | Out << BP; |
2363 | return std::error_code(); |
2364 | } |
2365 | |
2366 | void DataAggregator::dump() const { DataReader::dump(); } |
2367 | |
2368 | void DataAggregator::dump(const PerfBranchSample &Sample) const { |
2369 | Diag << "Sample LBR entries: "<< Sample.LBR.size() << "\n"; |
2370 | for (const LBREntry &LBR : Sample.LBR) |
2371 | Diag << LBR << '\n'; |
2372 | } |
2373 | |
2374 | void DataAggregator::dump(const PerfMemSample &Sample) const { |
2375 | Diag << "Sample mem entries: "<< Sample.PC << ": "<< Sample.Addr << "\n"; |
2376 | } |
2377 |
Definitions
- BasicAggregation
- ITraceAggregation
- FilterMemProfile
- FilterPID
- IgnoreBuildID
- IgnoreInterruptLBR
- MaxSamples
- ReadPreAggregated
- ReadPerfEvents
- TimeAggregator
- TimerGroupName
- TimerGroupDesc
- getTextSections
- ~DataAggregator
- deleteTempFile
- deleteTempFiles
- findPerfExecutable
- start
- abort
- launchPerfProcess
- processFileBuildID
- checkPerfDataMagic
- parsePreAggregated
- filterBinaryMMapInfo
- prepareToParse
- preprocessProfile
- readProfile
- mayHaveProfileData
- processProfile
- getBinaryFunctionContainingAddress
- getBATParentFunction
- getLocationName
- doBasicSample
- doIntraBranch
- doInterBranch
- doBranch
- doTrace
- getFallthroughsInTrace
- recordEntry
- recordExit
- parseLBREntry
- checkAndConsumeFS
- consumeRestOfLine
- checkNewLine
- parseBranchSample
- parseBasicSample
- parseMemSample
- parseLocationOrOffset
- parseAggregatedLBREntry
- ignoreKernelInterrupt
- printLBRHeatMap
- parseLBRSample
- printLongRangeTracesDiagnostic
- printColoredPct
- printBranchSamplesDiagnostics
- printBasicSamplesDiagnostics
- printBranchStacksDiagnostics
- parseBranchEvents
- processBranchEvents
- parseBasicEvents
- processBasicEvents
- parseMemEvents
- processMemEvents
- parsePreAggregatedLBRSamples
- parseCommExecEvent
- parsePerfTime
- parseForkEvent
- parseMMapEvent
- parseMMapEvents
- parseTaskEvents
- parseNameBuildIDPair
- hasAllBuildIDs
- getFileNameForBuildID
- writeAggregatedFile
- writeBATYAML
- dump
- dump
Improve your Profiling and Debugging skills
Find out more