1 | //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This family of functions reads profile data written by perf record, |
10 | // aggregate it and then write it back to an output file. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "bolt/Profile/DataAggregator.h" |
15 | #include "bolt/Core/BinaryContext.h" |
16 | #include "bolt/Core/BinaryFunction.h" |
17 | #include "bolt/Profile/BoltAddressTranslation.h" |
18 | #include "bolt/Profile/Heatmap.h" |
19 | #include "bolt/Profile/YAMLProfileWriter.h" |
20 | #include "bolt/Utils/CommandLineOpts.h" |
21 | #include "bolt/Utils/Utils.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/ScopeExit.h" |
24 | #include "llvm/Support/CommandLine.h" |
25 | #include "llvm/Support/Debug.h" |
26 | #include "llvm/Support/Errc.h" |
27 | #include "llvm/Support/FileSystem.h" |
28 | #include "llvm/Support/Process.h" |
29 | #include "llvm/Support/Program.h" |
30 | #include "llvm/Support/Regex.h" |
31 | #include "llvm/Support/Timer.h" |
32 | #include "llvm/Support/raw_ostream.h" |
33 | #include <map> |
34 | #include <optional> |
35 | #include <unordered_map> |
36 | #include <utility> |
37 | |
38 | #define DEBUG_TYPE "aggregator" |
39 | |
40 | using namespace llvm; |
41 | using namespace bolt; |
42 | |
43 | namespace opts { |
44 | |
45 | static cl::opt<bool> |
46 | BasicAggregation("nl" , |
47 | cl::desc("aggregate basic samples (without LBR info)" ), |
48 | cl::cat(AggregatorCategory)); |
49 | |
50 | static cl::opt<std::string> |
51 | ITraceAggregation("itrace" , |
52 | cl::desc("Generate LBR info with perf itrace argument" ), |
53 | cl::cat(AggregatorCategory)); |
54 | |
55 | static cl::opt<bool> |
56 | FilterMemProfile("filter-mem-profile" , |
57 | cl::desc("if processing a memory profile, filter out stack or heap accesses " |
58 | "that won't be useful for BOLT to reduce profile file size" ), |
59 | cl::init(Val: true), |
60 | cl::cat(AggregatorCategory)); |
61 | |
62 | static cl::opt<unsigned long long> |
63 | FilterPID("pid" , |
64 | cl::desc("only use samples from process with specified PID" ), |
65 | cl::init(Val: 0), |
66 | cl::Optional, |
67 | cl::cat(AggregatorCategory)); |
68 | |
69 | static cl::opt<bool> |
70 | IgnoreBuildID("ignore-build-id" , |
71 | cl::desc("continue even if build-ids in input binary and perf.data mismatch" ), |
72 | cl::init(Val: false), |
73 | cl::cat(AggregatorCategory)); |
74 | |
75 | static cl::opt<bool> IgnoreInterruptLBR( |
76 | "ignore-interrupt-lbr" , |
77 | cl::desc("ignore kernel interrupt LBR that happens asynchronously" ), |
78 | cl::init(Val: true), cl::cat(AggregatorCategory)); |
79 | |
80 | static cl::opt<unsigned long long> |
81 | MaxSamples("max-samples" , |
82 | cl::init(Val: -1ULL), |
83 | cl::desc("maximum number of samples to read from LBR profile" ), |
84 | cl::Optional, |
85 | cl::Hidden, |
86 | cl::cat(AggregatorCategory)); |
87 | |
88 | extern cl::opt<opts::ProfileFormatKind> ProfileFormat; |
89 | extern cl::opt<std::string> SaveProfile; |
90 | |
91 | cl::opt<bool> ReadPreAggregated( |
92 | "pa" , cl::desc("skip perf and read data from a pre-aggregated file format" ), |
93 | cl::cat(AggregatorCategory)); |
94 | |
95 | static cl::opt<bool> |
96 | TimeAggregator("time-aggr" , |
97 | cl::desc("time BOLT aggregator" ), |
98 | cl::init(Val: false), |
99 | cl::ZeroOrMore, |
100 | cl::cat(AggregatorCategory)); |
101 | |
102 | static cl::opt<bool> |
103 | UseEventPC("use-event-pc" , |
104 | cl::desc("use event PC in combination with LBR sampling" ), |
105 | cl::cat(AggregatorCategory)); |
106 | |
107 | static cl::opt<bool> WriteAutoFDOData( |
108 | "autofdo" , cl::desc("generate autofdo textual data instead of bolt data" ), |
109 | cl::cat(AggregatorCategory)); |
110 | |
111 | } // namespace opts |
112 | |
113 | namespace { |
114 | |
115 | const char TimerGroupName[] = "aggregator" ; |
116 | const char TimerGroupDesc[] = "Aggregator" ; |
117 | |
118 | std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) { |
119 | std::vector<SectionNameAndRange> sections; |
120 | for (BinarySection &Section : BC->sections()) { |
121 | if (!Section.isText()) |
122 | continue; |
123 | if (Section.getSize() == 0) |
124 | continue; |
125 | sections.push_back( |
126 | x: {.Name: Section.getName(), .BeginAddress: Section.getAddress(), .EndAddress: Section.getEndAddress()}); |
127 | } |
128 | llvm::sort(C&: sections, |
129 | Comp: [](const SectionNameAndRange &A, const SectionNameAndRange &B) { |
130 | return A.BeginAddress < B.BeginAddress; |
131 | }); |
132 | return sections; |
133 | } |
134 | } |
135 | |
136 | constexpr uint64_t DataAggregator::KernelBaseAddr; |
137 | |
138 | DataAggregator::~DataAggregator() { deleteTempFiles(); } |
139 | |
140 | namespace { |
141 | void deleteTempFile(const std::string &FileName) { |
142 | if (std::error_code Errc = sys::fs::remove(path: FileName.c_str())) |
143 | errs() << "PERF2BOLT: failed to delete temporary file " << FileName |
144 | << " with error " << Errc.message() << "\n" ; |
145 | } |
146 | } |
147 | |
148 | void DataAggregator::deleteTempFiles() { |
149 | for (std::string &FileName : TempFiles) |
150 | deleteTempFile(FileName); |
151 | TempFiles.clear(); |
152 | } |
153 | |
154 | void DataAggregator::findPerfExecutable() { |
155 | std::optional<std::string> PerfExecutable = |
156 | sys::Process::FindInEnvPath(EnvName: "PATH" , FileName: "perf" ); |
157 | if (!PerfExecutable) { |
158 | outs() << "PERF2BOLT: No perf executable found!\n" ; |
159 | exit(status: 1); |
160 | } |
161 | PerfPath = *PerfExecutable; |
162 | } |
163 | |
164 | void DataAggregator::start() { |
165 | outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n" ; |
166 | |
167 | // Don't launch perf for pre-aggregated files |
168 | if (opts::ReadPreAggregated) |
169 | return; |
170 | |
171 | findPerfExecutable(); |
172 | |
173 | if (opts::BasicAggregation) { |
174 | launchPerfProcess(Name: "events without LBR" , |
175 | PPI&: MainEventsPPI, |
176 | ArgsString: "script -F pid,event,ip" , |
177 | /*Wait = */false); |
178 | } else if (!opts::ITraceAggregation.empty()) { |
179 | std::string ItracePerfScriptArgs = llvm::formatv( |
180 | Fmt: "script -F pid,ip,brstack --itrace={0}" , Vals&: opts::ITraceAggregation); |
181 | launchPerfProcess(Name: "branch events with itrace" , PPI&: MainEventsPPI, |
182 | ArgsString: ItracePerfScriptArgs.c_str(), |
183 | /*Wait = */ false); |
184 | } else { |
185 | launchPerfProcess(Name: "branch events" , |
186 | PPI&: MainEventsPPI, |
187 | ArgsString: "script -F pid,ip,brstack" , |
188 | /*Wait = */false); |
189 | } |
190 | |
191 | // Note: we launch script for mem events regardless of the option, as the |
192 | // command fails fairly fast if mem events were not collected. |
193 | launchPerfProcess(Name: "mem events" , |
194 | PPI&: MemEventsPPI, |
195 | ArgsString: "script -F pid,event,addr,ip" , |
196 | /*Wait = */false); |
197 | |
198 | launchPerfProcess(Name: "process events" , PPI&: MMapEventsPPI, |
199 | ArgsString: "script --show-mmap-events --no-itrace" , |
200 | /*Wait = */ false); |
201 | |
202 | launchPerfProcess(Name: "task events" , PPI&: TaskEventsPPI, |
203 | ArgsString: "script --show-task-events --no-itrace" , |
204 | /*Wait = */ false); |
205 | } |
206 | |
207 | void DataAggregator::abort() { |
208 | if (opts::ReadPreAggregated) |
209 | return; |
210 | |
211 | std::string Error; |
212 | |
213 | // Kill subprocesses in case they are not finished |
214 | sys::Wait(PI: TaskEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error); |
215 | sys::Wait(PI: MMapEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error); |
216 | sys::Wait(PI: MainEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error); |
217 | sys::Wait(PI: MemEventsPPI.PI, SecondsToWait: 1, ErrMsg: &Error); |
218 | |
219 | deleteTempFiles(); |
220 | |
221 | exit(status: 1); |
222 | } |
223 | |
224 | void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, |
225 | const char *ArgsString, bool Wait) { |
226 | SmallVector<StringRef, 4> Argv; |
227 | |
228 | outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; |
229 | Argv.push_back(Elt: PerfPath.data()); |
230 | |
231 | StringRef(ArgsString).split(A&: Argv, Separator: ' '); |
232 | Argv.push_back(Elt: "-f" ); |
233 | Argv.push_back(Elt: "-i" ); |
234 | Argv.push_back(Elt: Filename.c_str()); |
235 | |
236 | if (std::error_code Errc = |
237 | sys::fs::createTemporaryFile(Prefix: "perf.script" , Suffix: "out" , ResultPath&: PPI.StdoutPath)) { |
238 | errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath |
239 | << " with error " << Errc.message() << "\n" ; |
240 | exit(status: 1); |
241 | } |
242 | TempFiles.push_back(x: PPI.StdoutPath.data()); |
243 | |
244 | if (std::error_code Errc = |
245 | sys::fs::createTemporaryFile(Prefix: "perf.script" , Suffix: "err" , ResultPath&: PPI.StderrPath)) { |
246 | errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath |
247 | << " with error " << Errc.message() << "\n" ; |
248 | exit(status: 1); |
249 | } |
250 | TempFiles.push_back(x: PPI.StderrPath.data()); |
251 | |
252 | std::optional<StringRef> Redirects[] = { |
253 | std::nullopt, // Stdin |
254 | StringRef(PPI.StdoutPath.data()), // Stdout |
255 | StringRef(PPI.StderrPath.data())}; // Stderr |
256 | |
257 | LLVM_DEBUG({ |
258 | dbgs() << "Launching perf: " ; |
259 | for (StringRef Arg : Argv) |
260 | dbgs() << Arg << " " ; |
261 | dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() |
262 | << "\n" ; |
263 | }); |
264 | |
265 | if (Wait) |
266 | PPI.PI.ReturnCode = sys::ExecuteAndWait(Program: PerfPath.data(), Args: Argv, |
267 | /*envp*/ Env: std::nullopt, Redirects); |
268 | else |
269 | PPI.PI = sys::ExecuteNoWait(Program: PerfPath.data(), Args: Argv, /*envp*/ Env: std::nullopt, |
270 | Redirects); |
271 | } |
272 | |
273 | void DataAggregator::processFileBuildID(StringRef FileBuildID) { |
274 | PerfProcessInfo BuildIDProcessInfo; |
275 | launchPerfProcess(Name: "buildid list" , |
276 | PPI&: BuildIDProcessInfo, |
277 | ArgsString: "buildid-list" , |
278 | /*Wait = */true); |
279 | |
280 | if (BuildIDProcessInfo.PI.ReturnCode != 0) { |
281 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
282 | MemoryBuffer::getFileOrSTDIN(Filename: BuildIDProcessInfo.StderrPath.data()); |
283 | StringRef ErrBuf = (*MB)->getBuffer(); |
284 | |
285 | errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode |
286 | << '\n'; |
287 | errs() << ErrBuf; |
288 | return; |
289 | } |
290 | |
291 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
292 | MemoryBuffer::getFileOrSTDIN(Filename: BuildIDProcessInfo.StdoutPath.data()); |
293 | if (std::error_code EC = MB.getError()) { |
294 | errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " |
295 | << EC.message() << "\n" ; |
296 | return; |
297 | } |
298 | |
299 | FileBuf = std::move(*MB); |
300 | ParsingBuf = FileBuf->getBuffer(); |
301 | |
302 | std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); |
303 | if (!FileName) { |
304 | if (hasAllBuildIDs()) { |
305 | errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " |
306 | "This indicates the input binary supplied for data aggregation " |
307 | "is not the same recorded by perf when collecting profiling " |
308 | "data, or there were no samples recorded for the binary. " |
309 | "Use -ignore-build-id option to override.\n" ; |
310 | if (!opts::IgnoreBuildID) |
311 | abort(); |
312 | } else { |
313 | errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " |
314 | "data was recorded without it\n" ; |
315 | return; |
316 | } |
317 | } else if (*FileName != llvm::sys::path::filename(path: BC->getFilename())) { |
318 | errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n" ; |
319 | BuildIDBinaryName = std::string(*FileName); |
320 | } else { |
321 | outs() << "PERF2BOLT: matched build-id and file name\n" ; |
322 | } |
323 | } |
324 | |
325 | bool DataAggregator::checkPerfDataMagic(StringRef FileName) { |
326 | if (opts::ReadPreAggregated) |
327 | return true; |
328 | |
329 | Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(Name: FileName); |
330 | if (!FD) { |
331 | consumeError(Err: FD.takeError()); |
332 | return false; |
333 | } |
334 | |
335 | char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; |
336 | |
337 | auto Close = make_scope_exit(F: [&] { sys::fs::closeFile(F&: *FD); }); |
338 | Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( |
339 | FileHandle: *FD, Buf: MutableArrayRef(Buf, sizeof(Buf)), Offset: 0); |
340 | if (!BytesRead) { |
341 | consumeError(Err: BytesRead.takeError()); |
342 | return false; |
343 | } |
344 | |
345 | if (*BytesRead != 7) |
346 | return false; |
347 | |
348 | if (strncmp(s1: Buf, s2: "PERFILE" , n: 7) == 0) |
349 | return true; |
350 | return false; |
351 | } |
352 | |
353 | void DataAggregator::parsePreAggregated() { |
354 | std::string Error; |
355 | |
356 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
357 | MemoryBuffer::getFileOrSTDIN(Filename); |
358 | if (std::error_code EC = MB.getError()) { |
359 | errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " |
360 | << EC.message() << "\n" ; |
361 | exit(status: 1); |
362 | } |
363 | |
364 | FileBuf = std::move(*MB); |
365 | ParsingBuf = FileBuf->getBuffer(); |
366 | Col = 0; |
367 | Line = 1; |
368 | if (parsePreAggregatedLBRSamples()) { |
369 | errs() << "PERF2BOLT: failed to parse samples\n" ; |
370 | exit(status: 1); |
371 | } |
372 | } |
373 | |
374 | std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { |
375 | outs() << "PERF2BOLT: writing data for autofdo tools...\n" ; |
376 | NamedRegionTimer T("writeAutoFDO" , "Processing branch events" , TimerGroupName, |
377 | TimerGroupDesc, opts::TimeAggregator); |
378 | |
379 | std::error_code EC; |
380 | raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); |
381 | if (EC) |
382 | return EC; |
383 | |
384 | // Format: |
385 | // number of unique traces |
386 | // from_1-to_1:count_1 |
387 | // from_2-to_2:count_2 |
388 | // ...... |
389 | // from_n-to_n:count_n |
390 | // number of unique sample addresses |
391 | // addr_1:count_1 |
392 | // addr_2:count_2 |
393 | // ...... |
394 | // addr_n:count_n |
395 | // number of unique LBR entries |
396 | // src_1->dst_1:count_1 |
397 | // src_2->dst_2:count_2 |
398 | // ...... |
399 | // src_n->dst_n:count_n |
400 | |
401 | const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress; |
402 | |
403 | // AutoFDO addresses are relative to the first allocated loadable program |
404 | // segment |
405 | auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t { |
406 | if (Address < FirstAllocAddress) |
407 | return 0; |
408 | return Address - FirstAllocAddress; |
409 | }; |
410 | |
411 | OutFile << FallthroughLBRs.size() << "\n" ; |
412 | for (const auto &[Trace, Info] : FallthroughLBRs) { |
413 | OutFile << formatv(Fmt: "{0:x-}-{1:x-}:{2}\n" , Vals: filterAddress(Trace.From), |
414 | Vals: filterAddress(Trace.To), |
415 | Vals: Info.InternCount + Info.ExternCount); |
416 | } |
417 | |
418 | OutFile << BasicSamples.size() << "\n" ; |
419 | for (const auto [PC, HitCount] : BasicSamples) |
420 | OutFile << formatv(Fmt: "{0:x-}:{1}\n" , Vals: filterAddress(PC), Vals: HitCount); |
421 | |
422 | OutFile << BranchLBRs.size() << "\n" ; |
423 | for (const auto &[Trace, Info] : BranchLBRs) { |
424 | OutFile << formatv(Fmt: "{0:x-}->{1:x-}:{2}\n" , Vals: filterAddress(Trace.From), |
425 | Vals: filterAddress(Trace.To), Vals: Info.TakenCount); |
426 | } |
427 | |
428 | outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, " |
429 | << BasicSamples.size() << " sample addresses and " << BranchLBRs.size() |
430 | << " unique branches to " << OutputFilename << "\n" ; |
431 | |
432 | return std::error_code(); |
433 | } |
434 | |
435 | void DataAggregator::filterBinaryMMapInfo() { |
436 | if (opts::FilterPID) { |
437 | auto MMapInfoIter = BinaryMMapInfo.find(x: opts::FilterPID); |
438 | if (MMapInfoIter != BinaryMMapInfo.end()) { |
439 | MMapInfo MMap = MMapInfoIter->second; |
440 | BinaryMMapInfo.clear(); |
441 | BinaryMMapInfo.insert(x: std::make_pair(x&: MMap.PID, y&: MMap)); |
442 | } else { |
443 | if (errs().has_colors()) |
444 | errs().changeColor(Color: raw_ostream::RED); |
445 | errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" |
446 | << opts::FilterPID << "\"" |
447 | << " for binary \"" << BC->getFilename() << "\"." ; |
448 | assert(!BinaryMMapInfo.empty() && "No memory map for matching binary" ); |
449 | errs() << " Profile for the following process is available:\n" ; |
450 | for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) |
451 | outs() << " " << MMI.second.PID |
452 | << (MMI.second.Forked ? " (forked)\n" : "\n" ); |
453 | |
454 | if (errs().has_colors()) |
455 | errs().resetColor(); |
456 | |
457 | exit(status: 1); |
458 | } |
459 | } |
460 | } |
461 | |
462 | int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, |
463 | PerfProcessErrorCallbackTy Callback) { |
464 | std::string Error; |
465 | outs() << "PERF2BOLT: waiting for perf " << Name |
466 | << " collection to finish...\n" ; |
467 | sys::ProcessInfo PI = sys::Wait(PI: Process.PI, SecondsToWait: std::nullopt, ErrMsg: &Error); |
468 | |
469 | if (!Error.empty()) { |
470 | errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n" ; |
471 | deleteTempFiles(); |
472 | exit(status: 1); |
473 | } |
474 | |
475 | if (PI.ReturnCode != 0) { |
476 | ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = |
477 | MemoryBuffer::getFileOrSTDIN(Filename: Process.StderrPath.data()); |
478 | StringRef ErrBuf = (*ErrorMB)->getBuffer(); |
479 | |
480 | deleteTempFiles(); |
481 | Callback(PI.ReturnCode, ErrBuf); |
482 | return PI.ReturnCode; |
483 | } |
484 | |
485 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
486 | MemoryBuffer::getFileOrSTDIN(Filename: Process.StdoutPath.data()); |
487 | if (std::error_code EC = MB.getError()) { |
488 | errs() << "Cannot open " << Process.StdoutPath.data() << ": " |
489 | << EC.message() << "\n" ; |
490 | deleteTempFiles(); |
491 | exit(status: 1); |
492 | } |
493 | |
494 | FileBuf = std::move(*MB); |
495 | ParsingBuf = FileBuf->getBuffer(); |
496 | Col = 0; |
497 | Line = 1; |
498 | return PI.ReturnCode; |
499 | } |
500 | |
501 | Error DataAggregator::preprocessProfile(BinaryContext &BC) { |
502 | this->BC = &BC; |
503 | |
504 | if (opts::ReadPreAggregated) { |
505 | parsePreAggregated(); |
506 | return Error::success(); |
507 | } |
508 | |
509 | if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) { |
510 | outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n" ; |
511 | processFileBuildID(FileBuildID: *FileBuildID); |
512 | } else { |
513 | errs() << "BOLT-WARNING: build-id will not be checked because we could " |
514 | "not read one from input binary\n" ; |
515 | } |
516 | |
517 | auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) { |
518 | errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf; |
519 | exit(status: 1); |
520 | }; |
521 | |
522 | auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) { |
523 | Regex NoData("Samples for '.*' event do not have ADDR attribute set. " |
524 | "Cannot print 'addr' field." ); |
525 | if (!NoData.match(String: ErrBuf)) |
526 | ErrorCallback(ReturnCode, ErrBuf); |
527 | }; |
528 | |
529 | if (BC.IsLinuxKernel) { |
530 | // Current MMap parsing logic does not work with linux kernel. |
531 | // MMap entries for linux kernel uses PERF_RECORD_MMAP |
532 | // format instead of typical PERF_RECORD_MMAP2 format. |
533 | // Since linux kernel address mapping is absolute (same as |
534 | // in the ELF file), we avoid parsing MMap in linux kernel mode. |
535 | // While generating optimized linux kernel binary, we may need |
536 | // to parse MMap entries. |
537 | |
538 | // In linux kernel mode, we analyze and optimize |
539 | // all linux kernel binary instructions, irrespective |
540 | // of whether they are due to system calls or due to |
541 | // interrupts. Therefore, we cannot ignore interrupt |
542 | // in Linux kernel mode. |
543 | opts::IgnoreInterruptLBR = false; |
544 | } else { |
545 | prepareToParse(Name: "mmap events" , Process&: MMapEventsPPI, Callback: ErrorCallback); |
546 | if (parseMMapEvents()) |
547 | errs() << "PERF2BOLT: failed to parse mmap events\n" ; |
548 | } |
549 | |
550 | prepareToParse(Name: "task events" , Process&: TaskEventsPPI, Callback: ErrorCallback); |
551 | if (parseTaskEvents()) |
552 | errs() << "PERF2BOLT: failed to parse task events\n" ; |
553 | |
554 | filterBinaryMMapInfo(); |
555 | prepareToParse(Name: "events" , Process&: MainEventsPPI, Callback: ErrorCallback); |
556 | |
557 | if (opts::HeatmapMode) { |
558 | if (std::error_code EC = printLBRHeatMap()) { |
559 | errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; |
560 | exit(status: 1); |
561 | } |
562 | exit(status: 0); |
563 | } |
564 | |
565 | if ((!opts::BasicAggregation && parseBranchEvents()) || |
566 | (opts::BasicAggregation && parseBasicEvents())) |
567 | errs() << "PERF2BOLT: failed to parse samples\n" ; |
568 | |
569 | // We can finish early if the goal is just to generate data for autofdo |
570 | if (opts::WriteAutoFDOData) { |
571 | if (std::error_code EC = writeAutoFDOData(OutputFilename: opts::OutputFilename)) |
572 | errs() << "Error writing autofdo data to file: " << EC.message() << "\n" ; |
573 | |
574 | deleteTempFiles(); |
575 | exit(status: 0); |
576 | } |
577 | |
578 | // Special handling for memory events |
579 | if (prepareToParse(Name: "mem events" , Process&: MemEventsPPI, Callback: MemEventsErrorCallback)) |
580 | return Error::success(); |
581 | |
582 | if (const std::error_code EC = parseMemEvents()) |
583 | errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() |
584 | << '\n'; |
585 | |
586 | deleteTempFiles(); |
587 | |
588 | return Error::success(); |
589 | } |
590 | |
591 | Error DataAggregator::readProfile(BinaryContext &BC) { |
592 | processProfile(BC); |
593 | |
594 | for (auto &BFI : BC.getBinaryFunctions()) { |
595 | BinaryFunction &Function = BFI.second; |
596 | convertBranchData(BF&: Function); |
597 | } |
598 | |
599 | if (opts::AggregateOnly) { |
600 | if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata) |
601 | if (std::error_code EC = writeAggregatedFile(OutputFilename: opts::OutputFilename)) |
602 | report_error(Message: "cannot create output data file" , EC); |
603 | |
604 | // BAT YAML is handled by DataAggregator since normal YAML output requires |
605 | // CFG which is not available in BAT mode. |
606 | if (usesBAT()) { |
607 | if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) |
608 | if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::OutputFilename)) |
609 | report_error(Message: "cannot create output data file" , EC); |
610 | if (!opts::SaveProfile.empty()) |
611 | if (std::error_code EC = writeBATYAML(BC, OutputFilename: opts::SaveProfile)) |
612 | report_error(Message: "cannot create output data file" , EC); |
613 | } |
614 | } |
615 | |
616 | return Error::success(); |
617 | } |
618 | |
619 | bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { |
620 | return Function.hasProfileAvailable(); |
621 | } |
622 | |
623 | void DataAggregator::processProfile(BinaryContext &BC) { |
624 | if (opts::ReadPreAggregated) |
625 | processPreAggregated(); |
626 | else if (opts::BasicAggregation) |
627 | processBasicEvents(); |
628 | else |
629 | processBranchEvents(); |
630 | |
631 | processMemEvents(); |
632 | |
633 | // Mark all functions with registered events as having a valid profile. |
634 | const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE |
635 | : BinaryFunction::PF_LBR; |
636 | for (auto &BFI : BC.getBinaryFunctions()) { |
637 | BinaryFunction &BF = BFI.second; |
638 | if (getBranchData(BF) || getFuncSampleData(FuncNames: BF.getNames())) |
639 | BF.markProfiled(Flags); |
640 | } |
641 | |
642 | for (auto &FuncBranches : NamesToBranches) |
643 | llvm::stable_sort(Range&: FuncBranches.second.Data); |
644 | |
645 | for (auto &MemEvents : NamesToMemEvents) |
646 | llvm::stable_sort(Range&: MemEvents.second.Data); |
647 | |
648 | // Release intermediate storage. |
649 | clear(Container&: BranchLBRs); |
650 | clear(Container&: FallthroughLBRs); |
651 | clear(Container&: AggregatedLBRs); |
652 | clear(Container&: BasicSamples); |
653 | clear(Container&: MemSamples); |
654 | } |
655 | |
656 | BinaryFunction * |
657 | DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { |
658 | if (!BC->containsAddress(Address)) |
659 | return nullptr; |
660 | |
661 | return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, |
662 | /*UseMaxSize=*/true); |
663 | } |
664 | |
665 | BinaryFunction * |
666 | DataAggregator::getBATParentFunction(const BinaryFunction &Func) const { |
667 | if (BAT) |
668 | if (const uint64_t HotAddr = BAT->fetchParentAddress(Address: Func.getAddress())) |
669 | return getBinaryFunctionContainingAddress(Address: HotAddr); |
670 | return nullptr; |
671 | } |
672 | |
673 | StringRef DataAggregator::getLocationName(const BinaryFunction &Func) const { |
674 | if (!BAT) |
675 | return Func.getOneName(); |
676 | |
677 | const BinaryFunction *OrigFunc = &Func; |
678 | // If it is a local function, prefer the name containing the file name where |
679 | // the local function was declared |
680 | for (StringRef AlternativeName : OrigFunc->getNames()) { |
681 | size_t FileNameIdx = AlternativeName.find(C: '/'); |
682 | // Confirm the alternative name has the pattern Symbol/FileName/1 before |
683 | // using it |
684 | if (FileNameIdx == StringRef::npos || |
685 | AlternativeName.find(C: '/', From: FileNameIdx + 1) == StringRef::npos) |
686 | continue; |
687 | return AlternativeName; |
688 | } |
689 | return OrigFunc->getOneName(); |
690 | } |
691 | |
692 | bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address, |
693 | uint64_t Count) { |
694 | BinaryFunction *ParentFunc = getBATParentFunction(Func: OrigFunc); |
695 | BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; |
696 | if (ParentFunc) |
697 | NumColdSamples += Count; |
698 | |
699 | auto I = NamesToSamples.find(x: Func.getOneName()); |
700 | if (I == NamesToSamples.end()) { |
701 | bool Success; |
702 | StringRef LocName = getLocationName(Func); |
703 | std::tie(args&: I, args&: Success) = NamesToSamples.insert( |
704 | x: std::make_pair(x: Func.getOneName(), |
705 | y: FuncSampleData(LocName, FuncSampleData::ContainerTy()))); |
706 | } |
707 | |
708 | Address -= Func.getAddress(); |
709 | if (BAT) |
710 | Address = BAT->translate(FuncAddress: Func.getAddress(), Offset: Address, /*IsBranchSrc=*/false); |
711 | |
712 | I->second.bumpCount(Offset: Address, Count); |
713 | return true; |
714 | } |
715 | |
716 | bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, |
717 | uint64_t To, uint64_t Count, |
718 | uint64_t Mispreds) { |
719 | FuncBranchData *AggrData = getBranchData(BF: Func); |
720 | if (!AggrData) { |
721 | AggrData = &NamesToBranches[Func.getOneName()]; |
722 | AggrData->Name = getLocationName(Func); |
723 | setBranchData(BF: Func, FBD: AggrData); |
724 | } |
725 | |
726 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " |
727 | << formatv("{0} @ {1:x} -> {0} @ {2:x}\n" , Func, From, To)); |
728 | AggrData->bumpBranchCount(OffsetFrom: From, OffsetTo: To, Count, Mispreds); |
729 | return true; |
730 | } |
731 | |
732 | bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, |
733 | BinaryFunction *ToFunc, uint64_t From, |
734 | uint64_t To, uint64_t Count, |
735 | uint64_t Mispreds) { |
736 | FuncBranchData *FromAggrData = nullptr; |
737 | FuncBranchData *ToAggrData = nullptr; |
738 | StringRef SrcFunc; |
739 | StringRef DstFunc; |
740 | if (FromFunc) { |
741 | SrcFunc = getLocationName(Func: *FromFunc); |
742 | FromAggrData = getBranchData(BF: *FromFunc); |
743 | if (!FromAggrData) { |
744 | FromAggrData = &NamesToBranches[FromFunc->getOneName()]; |
745 | FromAggrData->Name = SrcFunc; |
746 | setBranchData(BF: *FromFunc, FBD: FromAggrData); |
747 | } |
748 | |
749 | recordExit(BF&: *FromFunc, From, Mispred: Mispreds, Count); |
750 | } |
751 | if (ToFunc) { |
752 | DstFunc = getLocationName(Func: *ToFunc); |
753 | ToAggrData = getBranchData(BF: *ToFunc); |
754 | if (!ToAggrData) { |
755 | ToAggrData = &NamesToBranches[ToFunc->getOneName()]; |
756 | ToAggrData->Name = DstFunc; |
757 | setBranchData(BF: *ToFunc, FBD: ToAggrData); |
758 | } |
759 | |
760 | recordEntry(BF&: *ToFunc, To, Mispred: Mispreds, Count); |
761 | } |
762 | |
763 | if (FromAggrData) |
764 | FromAggrData->bumpCallCount(OffsetFrom: From, To: Location(!DstFunc.empty(), DstFunc, To), |
765 | Count, Mispreds); |
766 | if (ToAggrData) |
767 | ToAggrData->bumpEntryCount(From: Location(!SrcFunc.empty(), SrcFunc, From), OffsetTo: To, |
768 | Count, Mispreds); |
769 | return true; |
770 | } |
771 | |
772 | bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, |
773 | uint64_t Mispreds) { |
774 | auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * { |
775 | if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: Addr)) { |
776 | Addr -= Func->getAddress(); |
777 | |
778 | if (BAT) |
779 | Addr = BAT->translate(FuncAddress: Func->getAddress(), Offset: Addr, IsBranchSrc: IsFrom); |
780 | |
781 | if (BinaryFunction *ParentFunc = getBATParentFunction(Func: *Func)) { |
782 | Func = ParentFunc; |
783 | if (IsFrom) |
784 | NumColdSamples += Count; |
785 | } |
786 | |
787 | return Func; |
788 | } |
789 | return nullptr; |
790 | }; |
791 | |
792 | BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true); |
793 | BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false); |
794 | if (!FromFunc && !ToFunc) |
795 | return false; |
796 | |
797 | // Treat recursive control transfers as inter-branches. |
798 | if (FromFunc == ToFunc && To != 0) { |
799 | recordBranch(BF&: *FromFunc, From, To, Count, Mispreds); |
800 | return doIntraBranch(Func&: *FromFunc, From, To, Count, Mispreds); |
801 | } |
802 | |
803 | return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); |
804 | } |
805 | |
806 | bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, |
807 | uint64_t Count) { |
808 | BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(Address: First.To); |
809 | BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Address: Second.From); |
810 | if (!FromFunc || !ToFunc) { |
811 | LLVM_DEBUG({ |
812 | dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() |
813 | << formatv(" @ {0:x}" , First.To - FromFunc->getAddress()) |
814 | << " and ending in " << ToFunc->getPrintName() |
815 | << formatv(" @ {0:x}\n" , Second.From - ToFunc->getAddress()); |
816 | }); |
817 | NumLongRangeTraces += Count; |
818 | return false; |
819 | } |
820 | if (FromFunc != ToFunc) { |
821 | NumInvalidTraces += Count; |
822 | LLVM_DEBUG({ |
823 | dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() |
824 | << formatv(" @ {0:x}" , First.To - FromFunc->getAddress()) |
825 | << " and ending in " << ToFunc->getPrintName() |
826 | << formatv(" @ {0:x}\n" , Second.From - ToFunc->getAddress()); |
827 | }); |
828 | return false; |
829 | } |
830 | |
831 | std::optional<BoltAddressTranslation::FallthroughListTy> FTs = |
832 | BAT ? BAT->getFallthroughsInTrace(FuncAddress: FromFunc->getAddress(), From: First.To, |
833 | To: Second.From) |
834 | : getFallthroughsInTrace(BF&: *FromFunc, First, Second, Count); |
835 | if (!FTs) { |
836 | LLVM_DEBUG( |
837 | dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() |
838 | << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) |
839 | << " and ending in " << ToFunc->getPrintName() << " @ " |
840 | << ToFunc->getPrintName() << " @ " |
841 | << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); |
842 | NumInvalidTraces += Count; |
843 | return false; |
844 | } |
845 | |
846 | LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " |
847 | << FromFunc->getPrintName() << ":" |
848 | << Twine::utohexstr(First.To) << " to " |
849 | << Twine::utohexstr(Second.From) << ".\n" ); |
850 | BinaryFunction *ParentFunc = getBATParentFunction(Func: *FromFunc); |
851 | for (auto [From, To] : *FTs) { |
852 | if (BAT) { |
853 | From = BAT->translate(FuncAddress: FromFunc->getAddress(), Offset: From, /*IsBranchSrc=*/true); |
854 | To = BAT->translate(FuncAddress: FromFunc->getAddress(), Offset: To, /*IsBranchSrc=*/false); |
855 | } |
856 | doIntraBranch(Func&: ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, Mispreds: false); |
857 | } |
858 | |
859 | return true; |
860 | } |
861 | |
862 | bool DataAggregator::recordTrace( |
863 | BinaryFunction &BF, const LBREntry &FirstLBR, const LBREntry &SecondLBR, |
864 | uint64_t Count, |
865 | SmallVector<std::pair<uint64_t, uint64_t>, 16> &Branches) const { |
866 | BinaryContext &BC = BF.getBinaryContext(); |
867 | |
868 | if (!BF.isSimple()) |
869 | return false; |
870 | |
871 | assert(BF.hasCFG() && "can only record traces in CFG state" ); |
872 | |
873 | // Offsets of the trace within this function. |
874 | const uint64_t From = FirstLBR.To - BF.getAddress(); |
875 | const uint64_t To = SecondLBR.From - BF.getAddress(); |
876 | |
877 | if (From > To) |
878 | return false; |
879 | |
880 | const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: From); |
881 | const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(Offset: To); |
882 | |
883 | if (!FromBB || !ToBB) |
884 | return false; |
885 | |
886 | // Adjust FromBB if the first LBR is a return from the last instruction in |
887 | // the previous block (that instruction should be a call). |
888 | if (From == FromBB->getOffset() && !BF.containsAddress(PC: FirstLBR.From) && |
889 | !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { |
890 | const BinaryBasicBlock *PrevBB = |
891 | BF.getLayout().getBlock(Index: FromBB->getIndex() - 1); |
892 | if (PrevBB->getSuccessor(Label: FromBB->getLabel())) { |
893 | const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); |
894 | if (Instr && BC.MIB->isCall(Inst: *Instr)) |
895 | FromBB = PrevBB; |
896 | else |
897 | LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR |
898 | << '\n'); |
899 | } else { |
900 | LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); |
901 | } |
902 | } |
903 | |
904 | // Fill out information for fall-through edges. The From and To could be |
905 | // within the same basic block, e.g. when two call instructions are in the |
906 | // same block. In this case we skip the processing. |
907 | if (FromBB == ToBB) |
908 | return true; |
909 | |
910 | // Process blocks in the original layout order. |
911 | BinaryBasicBlock *BB = BF.getLayout().getBlock(Index: FromBB->getIndex()); |
912 | assert(BB == FromBB && "index mismatch" ); |
913 | while (BB != ToBB) { |
914 | BinaryBasicBlock *NextBB = BF.getLayout().getBlock(Index: BB->getIndex() + 1); |
915 | assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout" ); |
916 | |
917 | // Check for bad LBRs. |
918 | if (!BB->getSuccessor(Label: NextBB->getLabel())) { |
919 | LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" |
920 | << " " << FirstLBR << '\n' |
921 | << " " << SecondLBR << '\n'); |
922 | return false; |
923 | } |
924 | |
925 | const MCInst *Instr = BB->getLastNonPseudoInstr(); |
926 | uint64_t Offset = 0; |
927 | if (Instr) |
928 | Offset = BC.MIB->getOffsetWithDefault(Inst: *Instr, Default: 0); |
929 | else |
930 | Offset = BB->getOffset(); |
931 | |
932 | Branches.emplace_back(Args&: Offset, Args: NextBB->getOffset()); |
933 | |
934 | BB = NextBB; |
935 | } |
936 | |
937 | // Record fall-through jumps |
938 | for (const auto &[FromOffset, ToOffset] : Branches) { |
939 | BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(Offset: FromOffset); |
940 | BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(Offset: ToOffset); |
941 | assert(FromBB && ToBB); |
942 | BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(Succ: *ToBB); |
943 | BI.Count += Count; |
944 | } |
945 | |
946 | return true; |
947 | } |
948 | |
949 | std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> |
950 | DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, |
951 | const LBREntry &FirstLBR, |
952 | const LBREntry &SecondLBR, |
953 | uint64_t Count) const { |
954 | SmallVector<std::pair<uint64_t, uint64_t>, 16> Res; |
955 | |
956 | if (!recordTrace(BF, FirstLBR, SecondLBR, Count, Branches&: Res)) |
957 | return std::nullopt; |
958 | |
959 | return Res; |
960 | } |
961 | |
962 | bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, |
963 | uint64_t Count) const { |
964 | if (To > BF.getSize()) |
965 | return false; |
966 | |
967 | if (!BF.hasProfile()) |
968 | BF.ExecutionCount = 0; |
969 | |
970 | BinaryBasicBlock *EntryBB = nullptr; |
971 | if (To == 0) { |
972 | BF.ExecutionCount += Count; |
973 | if (!BF.empty()) |
974 | EntryBB = &BF.front(); |
975 | } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(Offset: To)) { |
976 | if (BB->isEntryPoint()) |
977 | EntryBB = BB; |
978 | } |
979 | |
980 | if (EntryBB) |
981 | EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); |
982 | |
983 | return true; |
984 | } |
985 | |
986 | bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, |
987 | uint64_t Count) const { |
988 | if (!BF.isSimple() || From > BF.getSize()) |
989 | return false; |
990 | |
991 | if (!BF.hasProfile()) |
992 | BF.ExecutionCount = 0; |
993 | |
994 | return true; |
995 | } |
996 | |
997 | ErrorOr<LBREntry> DataAggregator::parseLBREntry() { |
998 | LBREntry Res; |
999 | ErrorOr<StringRef> FromStrRes = parseString(EndChar: '/'); |
1000 | if (std::error_code EC = FromStrRes.getError()) |
1001 | return EC; |
1002 | StringRef OffsetStr = FromStrRes.get(); |
1003 | if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.From)) { |
1004 | reportError(ErrorMsg: "expected hexadecimal number with From address" ); |
1005 | Diag << "Found: " << OffsetStr << "\n" ; |
1006 | return make_error_code(E: llvm::errc::io_error); |
1007 | } |
1008 | |
1009 | ErrorOr<StringRef> ToStrRes = parseString(EndChar: '/'); |
1010 | if (std::error_code EC = ToStrRes.getError()) |
1011 | return EC; |
1012 | OffsetStr = ToStrRes.get(); |
1013 | if (OffsetStr.getAsInteger(Radix: 0, Result&: Res.To)) { |
1014 | reportError(ErrorMsg: "expected hexadecimal number with To address" ); |
1015 | Diag << "Found: " << OffsetStr << "\n" ; |
1016 | return make_error_code(E: llvm::errc::io_error); |
1017 | } |
1018 | |
1019 | ErrorOr<StringRef> MispredStrRes = parseString(EndChar: '/'); |
1020 | if (std::error_code EC = MispredStrRes.getError()) |
1021 | return EC; |
1022 | StringRef MispredStr = MispredStrRes.get(); |
1023 | if (MispredStr.size() != 1 || |
1024 | (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { |
1025 | reportError(ErrorMsg: "expected single char for mispred bit" ); |
1026 | Diag << "Found: " << MispredStr << "\n" ; |
1027 | return make_error_code(E: llvm::errc::io_error); |
1028 | } |
1029 | Res.Mispred = MispredStr[0] == 'M'; |
1030 | |
1031 | static bool MispredWarning = true; |
1032 | if (MispredStr[0] == '-' && MispredWarning) { |
1033 | errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n" ; |
1034 | MispredWarning = false; |
1035 | } |
1036 | |
1037 | ErrorOr<StringRef> Rest = parseString(EndChar: FieldSeparator, EndNl: true); |
1038 | if (std::error_code EC = Rest.getError()) |
1039 | return EC; |
1040 | if (Rest.get().size() < 5) { |
1041 | reportError(ErrorMsg: "expected rest of LBR entry" ); |
1042 | Diag << "Found: " << Rest.get() << "\n" ; |
1043 | return make_error_code(E: llvm::errc::io_error); |
1044 | } |
1045 | return Res; |
1046 | } |
1047 | |
1048 | bool DataAggregator::checkAndConsumeFS() { |
1049 | if (ParsingBuf[0] != FieldSeparator) |
1050 | return false; |
1051 | |
1052 | ParsingBuf = ParsingBuf.drop_front(N: 1); |
1053 | Col += 1; |
1054 | return true; |
1055 | } |
1056 | |
1057 | void DataAggregator::consumeRestOfLine() { |
1058 | size_t LineEnd = ParsingBuf.find_first_of(C: '\n'); |
1059 | if (LineEnd == StringRef::npos) { |
1060 | ParsingBuf = StringRef(); |
1061 | Col = 0; |
1062 | Line += 1; |
1063 | return; |
1064 | } |
1065 | ParsingBuf = ParsingBuf.drop_front(N: LineEnd + 1); |
1066 | Col = 0; |
1067 | Line += 1; |
1068 | } |
1069 | |
1070 | bool DataAggregator::checkNewLine() { |
1071 | return ParsingBuf[0] == '\n'; |
1072 | } |
1073 | |
1074 | ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { |
1075 | PerfBranchSample Res; |
1076 | |
1077 | while (checkAndConsumeFS()) { |
1078 | } |
1079 | |
1080 | ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true); |
1081 | if (std::error_code EC = PIDRes.getError()) |
1082 | return EC; |
1083 | auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes); |
1084 | if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) { |
1085 | consumeRestOfLine(); |
1086 | return make_error_code(E: errc::no_such_process); |
1087 | } |
1088 | |
1089 | while (checkAndConsumeFS()) { |
1090 | } |
1091 | |
1092 | ErrorOr<uint64_t> PCRes = parseHexField(EndChar: FieldSeparator, EndNl: true); |
1093 | if (std::error_code EC = PCRes.getError()) |
1094 | return EC; |
1095 | Res.PC = PCRes.get(); |
1096 | |
1097 | if (checkAndConsumeNewLine()) |
1098 | return Res; |
1099 | |
1100 | while (!checkAndConsumeNewLine()) { |
1101 | checkAndConsumeFS(); |
1102 | |
1103 | ErrorOr<LBREntry> LBRRes = parseLBREntry(); |
1104 | if (std::error_code EC = LBRRes.getError()) |
1105 | return EC; |
1106 | LBREntry LBR = LBRRes.get(); |
1107 | if (ignoreKernelInterrupt(LBR)) |
1108 | continue; |
1109 | if (!BC->HasFixedLoadAddress) |
1110 | adjustLBR(LBR, MMI: MMapInfoIter->second); |
1111 | Res.LBR.push_back(Elt: LBR); |
1112 | } |
1113 | |
1114 | return Res; |
1115 | } |
1116 | |
1117 | ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { |
1118 | while (checkAndConsumeFS()) { |
1119 | } |
1120 | |
1121 | ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true); |
1122 | if (std::error_code EC = PIDRes.getError()) |
1123 | return EC; |
1124 | |
1125 | auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes); |
1126 | if (MMapInfoIter == BinaryMMapInfo.end()) { |
1127 | consumeRestOfLine(); |
1128 | return PerfBasicSample{.EventName: StringRef(), .PC: 0}; |
1129 | } |
1130 | |
1131 | while (checkAndConsumeFS()) { |
1132 | } |
1133 | |
1134 | ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator); |
1135 | if (std::error_code EC = Event.getError()) |
1136 | return EC; |
1137 | |
1138 | while (checkAndConsumeFS()) { |
1139 | } |
1140 | |
1141 | ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator, EndNl: true); |
1142 | if (std::error_code EC = AddrRes.getError()) |
1143 | return EC; |
1144 | |
1145 | if (!checkAndConsumeNewLine()) { |
1146 | reportError(ErrorMsg: "expected end of line" ); |
1147 | return make_error_code(E: llvm::errc::io_error); |
1148 | } |
1149 | |
1150 | uint64_t Address = *AddrRes; |
1151 | if (!BC->HasFixedLoadAddress) |
1152 | adjustAddress(Address, MMI: MMapInfoIter->second); |
1153 | |
1154 | return PerfBasicSample{.EventName: Event.get(), .PC: Address}; |
1155 | } |
1156 | |
1157 | ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { |
1158 | PerfMemSample Res{.PC: 0, .Addr: 0}; |
1159 | |
1160 | while (checkAndConsumeFS()) { |
1161 | } |
1162 | |
1163 | ErrorOr<int64_t> PIDRes = parseNumberField(EndChar: FieldSeparator, EndNl: true); |
1164 | if (std::error_code EC = PIDRes.getError()) |
1165 | return EC; |
1166 | |
1167 | auto MMapInfoIter = BinaryMMapInfo.find(x: *PIDRes); |
1168 | if (MMapInfoIter == BinaryMMapInfo.end()) { |
1169 | consumeRestOfLine(); |
1170 | return Res; |
1171 | } |
1172 | |
1173 | while (checkAndConsumeFS()) { |
1174 | } |
1175 | |
1176 | ErrorOr<StringRef> Event = parseString(EndChar: FieldSeparator); |
1177 | if (std::error_code EC = Event.getError()) |
1178 | return EC; |
1179 | if (!Event.get().contains(Other: "mem-loads" )) { |
1180 | consumeRestOfLine(); |
1181 | return Res; |
1182 | } |
1183 | |
1184 | while (checkAndConsumeFS()) { |
1185 | } |
1186 | |
1187 | ErrorOr<uint64_t> AddrRes = parseHexField(EndChar: FieldSeparator); |
1188 | if (std::error_code EC = AddrRes.getError()) |
1189 | return EC; |
1190 | |
1191 | while (checkAndConsumeFS()) { |
1192 | } |
1193 | |
1194 | ErrorOr<uint64_t> PCRes = parseHexField(EndChar: FieldSeparator, EndNl: true); |
1195 | if (std::error_code EC = PCRes.getError()) { |
1196 | consumeRestOfLine(); |
1197 | return EC; |
1198 | } |
1199 | |
1200 | if (!checkAndConsumeNewLine()) { |
1201 | reportError(ErrorMsg: "expected end of line" ); |
1202 | return make_error_code(E: llvm::errc::io_error); |
1203 | } |
1204 | |
1205 | uint64_t Address = *AddrRes; |
1206 | if (!BC->HasFixedLoadAddress) |
1207 | adjustAddress(Address, MMI: MMapInfoIter->second); |
1208 | |
1209 | return PerfMemSample{.PC: PCRes.get(), .Addr: Address}; |
1210 | } |
1211 | |
1212 | ErrorOr<Location> DataAggregator::parseLocationOrOffset() { |
1213 | auto parseOffset = [this]() -> ErrorOr<Location> { |
1214 | ErrorOr<uint64_t> Res = parseHexField(EndChar: FieldSeparator); |
1215 | if (std::error_code EC = Res.getError()) |
1216 | return EC; |
1217 | return Location(Res.get()); |
1218 | }; |
1219 | |
1220 | size_t Sep = ParsingBuf.find_first_of(Chars: " \n" ); |
1221 | if (Sep == StringRef::npos) |
1222 | return parseOffset(); |
1223 | StringRef LookAhead = ParsingBuf.substr(Start: 0, N: Sep); |
1224 | if (LookAhead.find_first_of(Chars: ":" ) == StringRef::npos) |
1225 | return parseOffset(); |
1226 | |
1227 | ErrorOr<StringRef> BuildID = parseString(EndChar: ':'); |
1228 | if (std::error_code EC = BuildID.getError()) |
1229 | return EC; |
1230 | ErrorOr<uint64_t> Offset = parseHexField(EndChar: FieldSeparator); |
1231 | if (std::error_code EC = Offset.getError()) |
1232 | return EC; |
1233 | return Location(true, BuildID.get(), Offset.get()); |
1234 | } |
1235 | |
1236 | ErrorOr<DataAggregator::AggregatedLBREntry> |
1237 | DataAggregator::parseAggregatedLBREntry() { |
1238 | while (checkAndConsumeFS()) { |
1239 | } |
1240 | |
1241 | ErrorOr<StringRef> TypeOrErr = parseString(EndChar: FieldSeparator); |
1242 | if (std::error_code EC = TypeOrErr.getError()) |
1243 | return EC; |
1244 | auto Type = AggregatedLBREntry::BRANCH; |
1245 | if (TypeOrErr.get() == "B" ) { |
1246 | Type = AggregatedLBREntry::BRANCH; |
1247 | } else if (TypeOrErr.get() == "F" ) { |
1248 | Type = AggregatedLBREntry::FT; |
1249 | } else if (TypeOrErr.get() == "f" ) { |
1250 | Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; |
1251 | } else { |
1252 | reportError(ErrorMsg: "expected B, F or f" ); |
1253 | return make_error_code(E: llvm::errc::io_error); |
1254 | } |
1255 | |
1256 | while (checkAndConsumeFS()) { |
1257 | } |
1258 | ErrorOr<Location> From = parseLocationOrOffset(); |
1259 | if (std::error_code EC = From.getError()) |
1260 | return EC; |
1261 | |
1262 | while (checkAndConsumeFS()) { |
1263 | } |
1264 | ErrorOr<Location> To = parseLocationOrOffset(); |
1265 | if (std::error_code EC = To.getError()) |
1266 | return EC; |
1267 | |
1268 | while (checkAndConsumeFS()) { |
1269 | } |
1270 | ErrorOr<int64_t> Frequency = |
1271 | parseNumberField(EndChar: FieldSeparator, EndNl: Type != AggregatedLBREntry::BRANCH); |
1272 | if (std::error_code EC = Frequency.getError()) |
1273 | return EC; |
1274 | |
1275 | uint64_t Mispreds = 0; |
1276 | if (Type == AggregatedLBREntry::BRANCH) { |
1277 | while (checkAndConsumeFS()) { |
1278 | } |
1279 | ErrorOr<int64_t> MispredsOrErr = parseNumberField(EndChar: FieldSeparator, EndNl: true); |
1280 | if (std::error_code EC = MispredsOrErr.getError()) |
1281 | return EC; |
1282 | Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); |
1283 | } |
1284 | |
1285 | if (!checkAndConsumeNewLine()) { |
1286 | reportError(ErrorMsg: "expected end of line" ); |
1287 | return make_error_code(E: llvm::errc::io_error); |
1288 | } |
1289 | |
1290 | return AggregatedLBREntry{.From: From.get(), .To: To.get(), |
1291 | .Count: static_cast<uint64_t>(Frequency.get()), .Mispreds: Mispreds, |
1292 | .EntryType: Type}; |
1293 | } |
1294 | |
1295 | bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { |
1296 | return opts::IgnoreInterruptLBR && |
1297 | (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); |
1298 | } |
1299 | |
1300 | std::error_code DataAggregator::printLBRHeatMap() { |
1301 | outs() << "PERF2BOLT: parse branch events...\n" ; |
1302 | NamedRegionTimer T("parseBranch" , "Parsing branch events" , TimerGroupName, |
1303 | TimerGroupDesc, opts::TimeAggregator); |
1304 | |
1305 | if (BC->IsLinuxKernel) { |
1306 | opts::HeatmapMaxAddress = 0xffffffffffffffff; |
1307 | opts::HeatmapMinAddress = KernelBaseAddr; |
1308 | } |
1309 | Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, |
1310 | opts::HeatmapMaxAddress, getTextSections(BC)); |
1311 | uint64_t NumTotalSamples = 0; |
1312 | |
1313 | if (opts::BasicAggregation) { |
1314 | while (hasData()) { |
1315 | ErrorOr<PerfBasicSample> SampleRes = parseBasicSample(); |
1316 | if (std::error_code EC = SampleRes.getError()) { |
1317 | if (EC == errc::no_such_process) |
1318 | continue; |
1319 | return EC; |
1320 | } |
1321 | PerfBasicSample &Sample = SampleRes.get(); |
1322 | HM.registerAddress(Address: Sample.PC); |
1323 | NumTotalSamples++; |
1324 | } |
1325 | outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n" ; |
1326 | } else { |
1327 | while (hasData()) { |
1328 | ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); |
1329 | if (std::error_code EC = SampleRes.getError()) { |
1330 | if (EC == errc::no_such_process) |
1331 | continue; |
1332 | return EC; |
1333 | } |
1334 | |
1335 | PerfBranchSample &Sample = SampleRes.get(); |
1336 | |
1337 | // LBRs are stored in reverse execution order. NextLBR refers to the next |
1338 | // executed branch record. |
1339 | const LBREntry *NextLBR = nullptr; |
1340 | for (const LBREntry &LBR : Sample.LBR) { |
1341 | if (NextLBR) { |
1342 | // Record fall-through trace. |
1343 | const uint64_t TraceFrom = LBR.To; |
1344 | const uint64_t TraceTo = NextLBR->From; |
1345 | ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; |
1346 | } |
1347 | NextLBR = &LBR; |
1348 | } |
1349 | if (!Sample.LBR.empty()) { |
1350 | HM.registerAddress(Address: Sample.LBR.front().To); |
1351 | HM.registerAddress(Address: Sample.LBR.back().From); |
1352 | } |
1353 | NumTotalSamples += Sample.LBR.size(); |
1354 | } |
1355 | outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n" ; |
1356 | outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n" ; |
1357 | } |
1358 | |
1359 | if (!NumTotalSamples) { |
1360 | if (opts::BasicAggregation) { |
1361 | errs() << "HEATMAP-ERROR: no basic event samples detected in profile. " |
1362 | "Cannot build heatmap." ; |
1363 | } else { |
1364 | errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " |
1365 | "Cannot build heatmap. Use -nl for building heatmap from " |
1366 | "basic events.\n" ; |
1367 | } |
1368 | exit(status: 1); |
1369 | } |
1370 | |
1371 | outs() << "HEATMAP: building heat map...\n" ; |
1372 | |
1373 | for (const auto &LBR : FallthroughLBRs) { |
1374 | const Trace &Trace = LBR.first; |
1375 | const FTInfo &Info = LBR.second; |
1376 | HM.registerAddressRange(StartAddress: Trace.From, EndAddress: Trace.To, Count: Info.InternCount); |
1377 | } |
1378 | |
1379 | if (HM.getNumInvalidRanges()) |
1380 | outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; |
1381 | |
1382 | if (!HM.size()) { |
1383 | errs() << "HEATMAP-ERROR: no valid traces registered\n" ; |
1384 | exit(status: 1); |
1385 | } |
1386 | |
1387 | HM.print(FileName: opts::OutputFilename); |
1388 | if (opts::OutputFilename == "-" ) |
1389 | HM.printCDF(FileName: opts::OutputFilename); |
1390 | else |
1391 | HM.printCDF(FileName: opts::OutputFilename + ".csv" ); |
1392 | if (opts::OutputFilename == "-" ) |
1393 | HM.printSectionHotness(Filename: opts::OutputFilename); |
1394 | else |
1395 | HM.printSectionHotness(Filename: opts::OutputFilename + "-section-hotness.csv" ); |
1396 | |
1397 | return std::error_code(); |
1398 | } |
1399 | |
1400 | uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample, |
1401 | bool NeedsSkylakeFix) { |
1402 | uint64_t NumTraces{0}; |
1403 | // LBRs are stored in reverse execution order. NextPC refers to the next |
1404 | // recorded executed PC. |
1405 | uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; |
1406 | uint32_t NumEntry = 0; |
1407 | for (const LBREntry &LBR : Sample.LBR) { |
1408 | ++NumEntry; |
1409 | // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) |
1410 | // sometimes record entry 32 as an exact copy of entry 31. This will cause |
1411 | // us to likely record an invalid trace and generate a stale function for |
1412 | // BAT mode (non BAT disassembles the function and is able to ignore this |
1413 | // trace at aggregation time). Drop first 2 entries (last two, in |
1414 | // chronological order) |
1415 | if (NeedsSkylakeFix && NumEntry <= 2) |
1416 | continue; |
1417 | if (NextPC) { |
1418 | // Record fall-through trace. |
1419 | const uint64_t TraceFrom = LBR.To; |
1420 | const uint64_t TraceTo = NextPC; |
1421 | const BinaryFunction *TraceBF = |
1422 | getBinaryFunctionContainingAddress(Address: TraceFrom); |
1423 | if (TraceBF && TraceBF->containsAddress(PC: TraceTo)) { |
1424 | FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; |
1425 | if (TraceBF->containsAddress(PC: LBR.From)) |
1426 | ++Info.InternCount; |
1427 | else |
1428 | ++Info.ExternCount; |
1429 | } else { |
1430 | const BinaryFunction *ToFunc = |
1431 | getBinaryFunctionContainingAddress(Address: TraceTo); |
1432 | if (TraceBF && ToFunc) { |
1433 | LLVM_DEBUG({ |
1434 | dbgs() << "Invalid trace starting in " << TraceBF->getPrintName() |
1435 | << formatv(" @ {0:x}" , TraceFrom - TraceBF->getAddress()) |
1436 | << formatv(" and ending @ {0:x}\n" , TraceTo); |
1437 | }); |
1438 | ++NumInvalidTraces; |
1439 | } else { |
1440 | LLVM_DEBUG({ |
1441 | dbgs() << "Out of range trace starting in " |
1442 | << (TraceBF ? TraceBF->getPrintName() : "None" ) |
1443 | << formatv(" @ {0:x}" , |
1444 | TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) |
1445 | << " and ending in " |
1446 | << (ToFunc ? ToFunc->getPrintName() : "None" ) |
1447 | << formatv(" @ {0:x}\n" , |
1448 | TraceTo - (ToFunc ? ToFunc->getAddress() : 0)); |
1449 | }); |
1450 | ++NumLongRangeTraces; |
1451 | } |
1452 | } |
1453 | ++NumTraces; |
1454 | } |
1455 | NextPC = LBR.From; |
1456 | |
1457 | uint64_t From = getBinaryFunctionContainingAddress(Address: LBR.From) ? LBR.From : 0; |
1458 | uint64_t To = getBinaryFunctionContainingAddress(Address: LBR.To) ? LBR.To : 0; |
1459 | if (!From && !To) |
1460 | continue; |
1461 | BranchInfo &Info = BranchLBRs[Trace(From, To)]; |
1462 | ++Info.TakenCount; |
1463 | Info.MispredCount += LBR.Mispred; |
1464 | } |
1465 | return NumTraces; |
1466 | } |
1467 | |
1468 | std::error_code DataAggregator::parseBranchEvents() { |
1469 | outs() << "PERF2BOLT: parse branch events...\n" ; |
1470 | NamedRegionTimer T("parseBranch" , "Parsing branch events" , TimerGroupName, |
1471 | TimerGroupDesc, opts::TimeAggregator); |
1472 | |
1473 | uint64_t NumTotalSamples = 0; |
1474 | uint64_t NumEntries = 0; |
1475 | uint64_t NumSamples = 0; |
1476 | uint64_t NumSamplesNoLBR = 0; |
1477 | uint64_t NumTraces = 0; |
1478 | bool NeedsSkylakeFix = false; |
1479 | |
1480 | while (hasData() && NumTotalSamples < opts::MaxSamples) { |
1481 | ++NumTotalSamples; |
1482 | |
1483 | ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); |
1484 | if (std::error_code EC = SampleRes.getError()) { |
1485 | if (EC == errc::no_such_process) |
1486 | continue; |
1487 | return EC; |
1488 | } |
1489 | ++NumSamples; |
1490 | |
1491 | PerfBranchSample &Sample = SampleRes.get(); |
1492 | if (opts::WriteAutoFDOData) |
1493 | ++BasicSamples[Sample.PC]; |
1494 | |
1495 | if (Sample.LBR.empty()) { |
1496 | ++NumSamplesNoLBR; |
1497 | continue; |
1498 | } |
1499 | |
1500 | NumEntries += Sample.LBR.size(); |
1501 | if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { |
1502 | errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n" ; |
1503 | NeedsSkylakeFix = true; |
1504 | } |
1505 | |
1506 | NumTraces += parseLBRSample(Sample, NeedsSkylakeFix); |
1507 | } |
1508 | |
1509 | for (const Trace &Trace : llvm::make_first_range(c&: BranchLBRs)) |
1510 | for (const uint64_t Addr : {Trace.From, Trace.To}) |
1511 | if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Addr)) |
1512 | BF->setHasProfileAvailable(); |
1513 | |
1514 | auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { |
1515 | OS << " (" ; |
1516 | if (OS.has_colors()) { |
1517 | if (Percent > T2) |
1518 | OS.changeColor(Color: raw_ostream::RED); |
1519 | else if (Percent > T1) |
1520 | OS.changeColor(Color: raw_ostream::YELLOW); |
1521 | else |
1522 | OS.changeColor(Color: raw_ostream::GREEN); |
1523 | } |
1524 | OS << format(Fmt: "%.1f%%" , Vals: Percent); |
1525 | if (OS.has_colors()) |
1526 | OS.resetColor(); |
1527 | OS << ")" ; |
1528 | }; |
1529 | |
1530 | outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries |
1531 | << " LBR entries\n" ; |
1532 | if (NumTotalSamples) { |
1533 | if (NumSamples && NumSamplesNoLBR == NumSamples) { |
1534 | // Note: we don't know if perf2bolt is being used to parse memory samples |
1535 | // at this point. In this case, it is OK to parse zero LBRs. |
1536 | errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " |
1537 | "LBR. Record profile with perf record -j any or run perf2bolt " |
1538 | "in no-LBR mode with -nl (the performance improvement in -nl " |
1539 | "mode may be limited)\n" ; |
1540 | } else { |
1541 | const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; |
1542 | const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; |
1543 | outs() << "PERF2BOLT: " << IgnoredSamples << " samples" ; |
1544 | printColored(outs(), PercentIgnored, 20, 50); |
1545 | outs() << " were ignored\n" ; |
1546 | if (PercentIgnored > 50.0f) |
1547 | errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " |
1548 | "were attributed to the input binary\n" ; |
1549 | } |
1550 | } |
1551 | outs() << "PERF2BOLT: traces mismatching disassembled function contents: " |
1552 | << NumInvalidTraces; |
1553 | float Perc = 0.0f; |
1554 | if (NumTraces > 0) { |
1555 | Perc = NumInvalidTraces * 100.0f / NumTraces; |
1556 | printColored(outs(), Perc, 5, 10); |
1557 | } |
1558 | outs() << "\n" ; |
1559 | if (Perc > 10.0f) |
1560 | outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " |
1561 | "binary is probably not the same binary used during profiling " |
1562 | "collection. The generated data may be ineffective for improving " |
1563 | "performance.\n\n" ; |
1564 | |
1565 | outs() << "PERF2BOLT: out of range traces involving unknown regions: " |
1566 | << NumLongRangeTraces; |
1567 | if (NumTraces > 0) |
1568 | outs() << format(Fmt: " (%.1f%%)" , Vals: NumLongRangeTraces * 100.0f / NumTraces); |
1569 | outs() << "\n" ; |
1570 | |
1571 | if (NumColdSamples > 0) { |
1572 | const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; |
1573 | outs() << "PERF2BOLT: " << NumColdSamples |
1574 | << format(Fmt: " (%.1f%%)" , Vals: ColdSamples) |
1575 | << " samples recorded in cold regions of split functions.\n" ; |
1576 | if (ColdSamples > 5.0f) |
1577 | outs() |
1578 | << "WARNING: The BOLT-processed binary where samples were collected " |
1579 | "likely used bad data or your service observed a large shift in " |
1580 | "profile. You may want to audit this.\n" ; |
1581 | } |
1582 | |
1583 | return std::error_code(); |
1584 | } |
1585 | |
1586 | void DataAggregator::processBranchEvents() { |
1587 | outs() << "PERF2BOLT: processing branch events...\n" ; |
1588 | NamedRegionTimer T("processBranch" , "Processing branch events" , |
1589 | TimerGroupName, TimerGroupDesc, opts::TimeAggregator); |
1590 | |
1591 | for (const auto &AggrLBR : FallthroughLBRs) { |
1592 | const Trace &Loc = AggrLBR.first; |
1593 | const FTInfo &Info = AggrLBR.second; |
1594 | LBREntry First{.From: Loc.From, .To: Loc.From, .Mispred: false}; |
1595 | LBREntry Second{.From: Loc.To, .To: Loc.To, .Mispred: false}; |
1596 | if (Info.InternCount) |
1597 | doTrace(First, Second, Count: Info.InternCount); |
1598 | if (Info.ExternCount) { |
1599 | First.From = 0; |
1600 | doTrace(First, Second, Count: Info.ExternCount); |
1601 | } |
1602 | } |
1603 | |
1604 | for (const auto &AggrLBR : BranchLBRs) { |
1605 | const Trace &Loc = AggrLBR.first; |
1606 | const BranchInfo &Info = AggrLBR.second; |
1607 | doBranch(From: Loc.From, To: Loc.To, Count: Info.TakenCount, Mispreds: Info.MispredCount); |
1608 | } |
1609 | } |
1610 | |
1611 | std::error_code DataAggregator::parseBasicEvents() { |
1612 | outs() << "PERF2BOLT: parsing basic events (without LBR)...\n" ; |
1613 | NamedRegionTimer T("parseBasic" , "Parsing basic events" , TimerGroupName, |
1614 | TimerGroupDesc, opts::TimeAggregator); |
1615 | while (hasData()) { |
1616 | ErrorOr<PerfBasicSample> Sample = parseBasicSample(); |
1617 | if (std::error_code EC = Sample.getError()) |
1618 | return EC; |
1619 | |
1620 | if (!Sample->PC) |
1621 | continue; |
1622 | |
1623 | if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC)) |
1624 | BF->setHasProfileAvailable(); |
1625 | |
1626 | ++BasicSamples[Sample->PC]; |
1627 | EventNames.insert(key: Sample->EventName); |
1628 | } |
1629 | |
1630 | return std::error_code(); |
1631 | } |
1632 | |
1633 | void DataAggregator::processBasicEvents() { |
1634 | outs() << "PERF2BOLT: processing basic events (without LBR)...\n" ; |
1635 | NamedRegionTimer T("processBasic" , "Processing basic events" , TimerGroupName, |
1636 | TimerGroupDesc, opts::TimeAggregator); |
1637 | uint64_t OutOfRangeSamples = 0; |
1638 | uint64_t NumSamples = 0; |
1639 | for (auto &Sample : BasicSamples) { |
1640 | const uint64_t PC = Sample.first; |
1641 | const uint64_t HitCount = Sample.second; |
1642 | NumSamples += HitCount; |
1643 | BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC); |
1644 | if (!Func) { |
1645 | OutOfRangeSamples += HitCount; |
1646 | continue; |
1647 | } |
1648 | |
1649 | doSample(OrigFunc&: *Func, Address: PC, Count: HitCount); |
1650 | } |
1651 | outs() << "PERF2BOLT: read " << NumSamples << " samples\n" ; |
1652 | |
1653 | outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " |
1654 | << OutOfRangeSamples; |
1655 | float Perc = 0.0f; |
1656 | if (NumSamples > 0) { |
1657 | outs() << " (" ; |
1658 | Perc = OutOfRangeSamples * 100.0f / NumSamples; |
1659 | if (outs().has_colors()) { |
1660 | if (Perc > 60.0f) |
1661 | outs().changeColor(Color: raw_ostream::RED); |
1662 | else if (Perc > 40.0f) |
1663 | outs().changeColor(Color: raw_ostream::YELLOW); |
1664 | else |
1665 | outs().changeColor(Color: raw_ostream::GREEN); |
1666 | } |
1667 | outs() << format(Fmt: "%.1f%%" , Vals: Perc); |
1668 | if (outs().has_colors()) |
1669 | outs().resetColor(); |
1670 | outs() << ")" ; |
1671 | } |
1672 | outs() << "\n" ; |
1673 | if (Perc > 80.0f) |
1674 | outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " |
1675 | "binary is probably not the same binary used during profiling " |
1676 | "collection. The generated data may be ineffective for improving " |
1677 | "performance.\n\n" ; |
1678 | } |
1679 | |
1680 | std::error_code DataAggregator::parseMemEvents() { |
1681 | outs() << "PERF2BOLT: parsing memory events...\n" ; |
1682 | NamedRegionTimer T("parseMemEvents" , "Parsing mem events" , TimerGroupName, |
1683 | TimerGroupDesc, opts::TimeAggregator); |
1684 | while (hasData()) { |
1685 | ErrorOr<PerfMemSample> Sample = parseMemSample(); |
1686 | if (std::error_code EC = Sample.getError()) |
1687 | return EC; |
1688 | |
1689 | if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Sample->PC)) |
1690 | BF->setHasProfileAvailable(); |
1691 | |
1692 | MemSamples.emplace_back(args: std::move(Sample.get())); |
1693 | } |
1694 | |
1695 | return std::error_code(); |
1696 | } |
1697 | |
1698 | void DataAggregator::processMemEvents() { |
1699 | NamedRegionTimer T("ProcessMemEvents" , "Processing mem events" , |
1700 | TimerGroupName, TimerGroupDesc, opts::TimeAggregator); |
1701 | for (const PerfMemSample &Sample : MemSamples) { |
1702 | uint64_t PC = Sample.PC; |
1703 | uint64_t Addr = Sample.Addr; |
1704 | StringRef FuncName; |
1705 | StringRef MemName; |
1706 | |
1707 | // Try to resolve symbol for PC |
1708 | BinaryFunction *Func = getBinaryFunctionContainingAddress(Address: PC); |
1709 | if (!Func) { |
1710 | LLVM_DEBUG(if (PC != 0) { |
1711 | dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n" , PC, Addr); |
1712 | }); |
1713 | continue; |
1714 | } |
1715 | |
1716 | FuncName = Func->getOneName(); |
1717 | PC -= Func->getAddress(); |
1718 | |
1719 | // Try to resolve symbol for memory load |
1720 | if (BinaryData *BD = BC->getBinaryDataContainingAddress(Address: Addr)) { |
1721 | MemName = BD->getName(); |
1722 | Addr -= BD->getAddress(); |
1723 | } else if (opts::FilterMemProfile) { |
1724 | // Filter out heap/stack accesses |
1725 | continue; |
1726 | } |
1727 | |
1728 | const Location FuncLoc(!FuncName.empty(), FuncName, PC); |
1729 | const Location AddrLoc(!MemName.empty(), MemName, Addr); |
1730 | |
1731 | FuncMemData *MemData = &NamesToMemEvents[FuncName]; |
1732 | MemData->Name = FuncName; |
1733 | setMemData(BF: *Func, FMD: MemData); |
1734 | MemData->update(Offset: FuncLoc, Addr: AddrLoc); |
1735 | LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n" ); |
1736 | } |
1737 | } |
1738 | |
1739 | std::error_code DataAggregator::parsePreAggregatedLBRSamples() { |
1740 | outs() << "PERF2BOLT: parsing pre-aggregated profile...\n" ; |
1741 | NamedRegionTimer T("parseAggregated" , "Parsing aggregated branch events" , |
1742 | TimerGroupName, TimerGroupDesc, opts::TimeAggregator); |
1743 | while (hasData()) { |
1744 | ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry(); |
1745 | if (std::error_code EC = AggrEntry.getError()) |
1746 | return EC; |
1747 | |
1748 | for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset}) |
1749 | if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Address: Addr)) |
1750 | BF->setHasProfileAvailable(); |
1751 | |
1752 | AggregatedLBRs.emplace_back(args: std::move(AggrEntry.get())); |
1753 | } |
1754 | |
1755 | return std::error_code(); |
1756 | } |
1757 | |
1758 | void DataAggregator::processPreAggregated() { |
1759 | outs() << "PERF2BOLT: processing pre-aggregated profile...\n" ; |
1760 | NamedRegionTimer T("processAggregated" , "Processing aggregated branch events" , |
1761 | TimerGroupName, TimerGroupDesc, opts::TimeAggregator); |
1762 | |
1763 | uint64_t NumTraces = 0; |
1764 | for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { |
1765 | switch (AggrEntry.EntryType) { |
1766 | case AggregatedLBREntry::BRANCH: |
1767 | doBranch(From: AggrEntry.From.Offset, To: AggrEntry.To.Offset, Count: AggrEntry.Count, |
1768 | Mispreds: AggrEntry.Mispreds); |
1769 | break; |
1770 | case AggregatedLBREntry::FT: |
1771 | case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { |
1772 | LBREntry First{.From: AggrEntry.EntryType == AggregatedLBREntry::FT |
1773 | ? AggrEntry.From.Offset |
1774 | : 0, |
1775 | .To: AggrEntry.From.Offset, .Mispred: false}; |
1776 | LBREntry Second{.From: AggrEntry.To.Offset, .To: AggrEntry.To.Offset, .Mispred: false}; |
1777 | doTrace(First, Second, Count: AggrEntry.Count); |
1778 | NumTraces += AggrEntry.Count; |
1779 | break; |
1780 | } |
1781 | } |
1782 | } |
1783 | |
1784 | outs() << "PERF2BOLT: read " << AggregatedLBRs.size() |
1785 | << " aggregated LBR entries\n" ; |
1786 | outs() << "PERF2BOLT: traces mismatching disassembled function contents: " |
1787 | << NumInvalidTraces; |
1788 | float Perc = 0.0f; |
1789 | if (NumTraces > 0) { |
1790 | outs() << " (" ; |
1791 | Perc = NumInvalidTraces * 100.0f / NumTraces; |
1792 | if (outs().has_colors()) { |
1793 | if (Perc > 10.0f) |
1794 | outs().changeColor(Color: raw_ostream::RED); |
1795 | else if (Perc > 5.0f) |
1796 | outs().changeColor(Color: raw_ostream::YELLOW); |
1797 | else |
1798 | outs().changeColor(Color: raw_ostream::GREEN); |
1799 | } |
1800 | outs() << format(Fmt: "%.1f%%" , Vals: Perc); |
1801 | if (outs().has_colors()) |
1802 | outs().resetColor(); |
1803 | outs() << ")" ; |
1804 | } |
1805 | outs() << "\n" ; |
1806 | if (Perc > 10.0f) |
1807 | outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " |
1808 | "binary is probably not the same binary used during profiling " |
1809 | "collection. The generated data may be ineffective for improving " |
1810 | "performance.\n\n" ; |
1811 | |
1812 | outs() << "PERF2BOLT: Out of range traces involving unknown regions: " |
1813 | << NumLongRangeTraces; |
1814 | if (NumTraces > 0) |
1815 | outs() << format(Fmt: " (%.1f%%)" , Vals: NumLongRangeTraces * 100.0f / NumTraces); |
1816 | outs() << "\n" ; |
1817 | } |
1818 | |
1819 | std::optional<int32_t> DataAggregator::parseCommExecEvent() { |
1820 | size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n" ); |
1821 | if (LineEnd == StringRef::npos) { |
1822 | reportError(ErrorMsg: "expected rest of line" ); |
1823 | Diag << "Found: " << ParsingBuf << "\n" ; |
1824 | return std::nullopt; |
1825 | } |
1826 | StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd); |
1827 | |
1828 | size_t Pos = Line.find(Str: "PERF_RECORD_COMM exec" ); |
1829 | if (Pos == StringRef::npos) |
1830 | return std::nullopt; |
1831 | Line = Line.drop_front(N: Pos); |
1832 | |
1833 | // Line: |
1834 | // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" |
1835 | StringRef PIDStr = Line.rsplit(Separator: ':').second.split(Separator: '/').first; |
1836 | int32_t PID; |
1837 | if (PIDStr.getAsInteger(Radix: 10, Result&: PID)) { |
1838 | reportError(ErrorMsg: "expected PID" ); |
1839 | Diag << "Found: " << PIDStr << "in '" << Line << "'\n" ; |
1840 | return std::nullopt; |
1841 | } |
1842 | |
1843 | return PID; |
1844 | } |
1845 | |
1846 | namespace { |
1847 | std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) { |
1848 | const StringRef SecTimeStr = TimeStr.split(Separator: '.').first; |
1849 | const StringRef USecTimeStr = TimeStr.split(Separator: '.').second; |
1850 | uint64_t SecTime; |
1851 | uint64_t USecTime; |
1852 | if (SecTimeStr.getAsInteger(Radix: 10, Result&: SecTime) || |
1853 | USecTimeStr.getAsInteger(Radix: 10, Result&: USecTime)) |
1854 | return std::nullopt; |
1855 | return SecTime * 1000000ULL + USecTime; |
1856 | } |
1857 | } |
1858 | |
1859 | std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { |
1860 | while (checkAndConsumeFS()) { |
1861 | } |
1862 | |
1863 | size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n" ); |
1864 | if (LineEnd == StringRef::npos) { |
1865 | reportError(ErrorMsg: "expected rest of line" ); |
1866 | Diag << "Found: " << ParsingBuf << "\n" ; |
1867 | return std::nullopt; |
1868 | } |
1869 | StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd); |
1870 | |
1871 | size_t Pos = Line.find(Str: "PERF_RECORD_FORK" ); |
1872 | if (Pos == StringRef::npos) { |
1873 | consumeRestOfLine(); |
1874 | return std::nullopt; |
1875 | } |
1876 | |
1877 | ForkInfo FI; |
1878 | |
1879 | const StringRef TimeStr = |
1880 | Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second; |
1881 | if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { |
1882 | FI.Time = *TimeRes; |
1883 | } |
1884 | |
1885 | Line = Line.drop_front(N: Pos); |
1886 | |
1887 | // Line: |
1888 | // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) |
1889 | const StringRef ChildPIDStr = Line.split(Separator: '(').second.split(Separator: ':').first; |
1890 | if (ChildPIDStr.getAsInteger(Radix: 10, Result&: FI.ChildPID)) { |
1891 | reportError(ErrorMsg: "expected PID" ); |
1892 | Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n" ; |
1893 | return std::nullopt; |
1894 | } |
1895 | |
1896 | const StringRef ParentPIDStr = Line.rsplit(Separator: '(').second.split(Separator: ':').first; |
1897 | if (ParentPIDStr.getAsInteger(Radix: 10, Result&: FI.ParentPID)) { |
1898 | reportError(ErrorMsg: "expected PID" ); |
1899 | Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n" ; |
1900 | return std::nullopt; |
1901 | } |
1902 | |
1903 | consumeRestOfLine(); |
1904 | |
1905 | return FI; |
1906 | } |
1907 | |
1908 | ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> |
1909 | DataAggregator::parseMMapEvent() { |
1910 | while (checkAndConsumeFS()) { |
1911 | } |
1912 | |
1913 | MMapInfo ParsedInfo; |
1914 | |
1915 | size_t LineEnd = ParsingBuf.find_first_of(Chars: "\n" ); |
1916 | if (LineEnd == StringRef::npos) { |
1917 | reportError(ErrorMsg: "expected rest of line" ); |
1918 | Diag << "Found: " << ParsingBuf << "\n" ; |
1919 | return make_error_code(E: llvm::errc::io_error); |
1920 | } |
1921 | StringRef Line = ParsingBuf.substr(Start: 0, N: LineEnd); |
1922 | |
1923 | size_t Pos = Line.find(Str: "PERF_RECORD_MMAP2" ); |
1924 | if (Pos == StringRef::npos) { |
1925 | consumeRestOfLine(); |
1926 | return std::make_pair(x: StringRef(), y&: ParsedInfo); |
1927 | } |
1928 | |
1929 | // Line: |
1930 | // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> |
1931 | |
1932 | const StringRef TimeStr = |
1933 | Line.substr(Start: 0, N: Pos).rsplit(Separator: ':').first.rsplit(Separator: FieldSeparator).second; |
1934 | if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) |
1935 | ParsedInfo.Time = *TimeRes; |
1936 | |
1937 | Line = Line.drop_front(N: Pos); |
1938 | |
1939 | // Line: |
1940 | // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> |
1941 | |
1942 | StringRef FileName = Line.rsplit(Separator: FieldSeparator).second; |
1943 | if (FileName.starts_with(Prefix: "//" ) || FileName.starts_with(Prefix: "[" )) { |
1944 | consumeRestOfLine(); |
1945 | return std::make_pair(x: StringRef(), y&: ParsedInfo); |
1946 | } |
1947 | FileName = sys::path::filename(path: FileName); |
1948 | |
1949 | const StringRef PIDStr = Line.split(Separator: FieldSeparator).second.split(Separator: '/').first; |
1950 | if (PIDStr.getAsInteger(Radix: 10, Result&: ParsedInfo.PID)) { |
1951 | reportError(ErrorMsg: "expected PID" ); |
1952 | Diag << "Found: " << PIDStr << "in '" << Line << "'\n" ; |
1953 | return make_error_code(E: llvm::errc::io_error); |
1954 | } |
1955 | |
1956 | const StringRef BaseAddressStr = Line.split(Separator: '[').second.split(Separator: '(').first; |
1957 | if (BaseAddressStr.getAsInteger(Radix: 0, Result&: ParsedInfo.MMapAddress)) { |
1958 | reportError(ErrorMsg: "expected base address" ); |
1959 | Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n" ; |
1960 | return make_error_code(E: llvm::errc::io_error); |
1961 | } |
1962 | |
1963 | const StringRef SizeStr = Line.split(Separator: '(').second.split(Separator: ')').first; |
1964 | if (SizeStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Size)) { |
1965 | reportError(ErrorMsg: "expected mmaped size" ); |
1966 | Diag << "Found: " << SizeStr << "in '" << Line << "'\n" ; |
1967 | return make_error_code(E: llvm::errc::io_error); |
1968 | } |
1969 | |
1970 | const StringRef OffsetStr = |
1971 | Line.split(Separator: '@').second.ltrim().split(Separator: FieldSeparator).first; |
1972 | if (OffsetStr.getAsInteger(Radix: 0, Result&: ParsedInfo.Offset)) { |
1973 | reportError(ErrorMsg: "expected mmaped page-aligned offset" ); |
1974 | Diag << "Found: " << OffsetStr << "in '" << Line << "'\n" ; |
1975 | return make_error_code(E: llvm::errc::io_error); |
1976 | } |
1977 | |
1978 | consumeRestOfLine(); |
1979 | |
1980 | return std::make_pair(x&: FileName, y&: ParsedInfo); |
1981 | } |
1982 | |
1983 | std::error_code DataAggregator::parseMMapEvents() { |
1984 | outs() << "PERF2BOLT: parsing perf-script mmap events output\n" ; |
1985 | NamedRegionTimer T("parseMMapEvents" , "Parsing mmap events" , TimerGroupName, |
1986 | TimerGroupDesc, opts::TimeAggregator); |
1987 | |
1988 | std::multimap<StringRef, MMapInfo> GlobalMMapInfo; |
1989 | while (hasData()) { |
1990 | ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); |
1991 | if (std::error_code EC = FileMMapInfoRes.getError()) |
1992 | return EC; |
1993 | |
1994 | std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); |
1995 | if (FileMMapInfo.second.PID == -1) |
1996 | continue; |
1997 | if (FileMMapInfo.first.equals(RHS: "(deleted)" )) |
1998 | continue; |
1999 | |
2000 | // Consider only the first mapping of the file for any given PID |
2001 | auto Range = GlobalMMapInfo.equal_range(x: FileMMapInfo.first); |
2002 | bool PIDExists = llvm::any_of(Range: make_range(p: Range), P: [&](const auto &MI) { |
2003 | return MI.second.PID == FileMMapInfo.second.PID; |
2004 | }); |
2005 | |
2006 | if (PIDExists) |
2007 | continue; |
2008 | |
2009 | GlobalMMapInfo.insert(x&: FileMMapInfo); |
2010 | } |
2011 | |
2012 | LLVM_DEBUG({ |
2013 | dbgs() << "FileName -> mmap info:\n" |
2014 | << " Filename : PID [MMapAddr, Size, Offset]\n" ; |
2015 | for (const auto &[Name, MMap] : GlobalMMapInfo) |
2016 | dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n" , Name, MMap.PID, |
2017 | MMap.MMapAddress, MMap.Size, MMap.Offset); |
2018 | }); |
2019 | |
2020 | StringRef NameToUse = llvm::sys::path::filename(path: BC->getFilename()); |
2021 | if (GlobalMMapInfo.count(x: NameToUse) == 0 && !BuildIDBinaryName.empty()) { |
2022 | errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName |
2023 | << "\" for profile matching\n" ; |
2024 | NameToUse = BuildIDBinaryName; |
2025 | } |
2026 | |
2027 | auto Range = GlobalMMapInfo.equal_range(x: NameToUse); |
2028 | for (MMapInfo &MMapInfo : llvm::make_second_range(c: make_range(p: Range))) { |
2029 | if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { |
2030 | // Check that the binary mapping matches one of the segments. |
2031 | bool MatchFound = llvm::any_of( |
2032 | Range: llvm::make_second_range(c&: BC->SegmentMapInfo), |
2033 | P: [&](SegmentInfo &SegInfo) { |
2034 | // The mapping is page-aligned and hence the MMapAddress could be |
2035 | // different from the segment start address. We cannot know the page |
2036 | // size of the mapping, but we know it should not exceed the segment |
2037 | // alignment value. Hence we are performing an approximate check. |
2038 | return SegInfo.Address >= MMapInfo.MMapAddress && |
2039 | SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment; |
2040 | }); |
2041 | if (!MatchFound) { |
2042 | errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse |
2043 | << " at 0x" << Twine::utohexstr(Val: MMapInfo.MMapAddress) << '\n'; |
2044 | continue; |
2045 | } |
2046 | } |
2047 | |
2048 | // Set base address for shared objects. |
2049 | if (!BC->HasFixedLoadAddress) { |
2050 | std::optional<uint64_t> BaseAddress = |
2051 | BC->getBaseAddressForMapping(MMapAddress: MMapInfo.MMapAddress, FileOffset: MMapInfo.Offset); |
2052 | if (!BaseAddress) { |
2053 | errs() << "PERF2BOLT-WARNING: unable to find base address of the " |
2054 | "binary when memory mapped at 0x" |
2055 | << Twine::utohexstr(Val: MMapInfo.MMapAddress) |
2056 | << " using file offset 0x" << Twine::utohexstr(Val: MMapInfo.Offset) |
2057 | << ". Ignoring profile data for this mapping\n" ; |
2058 | continue; |
2059 | } else { |
2060 | MMapInfo.BaseAddress = *BaseAddress; |
2061 | } |
2062 | } |
2063 | |
2064 | BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo)); |
2065 | } |
2066 | |
2067 | if (BinaryMMapInfo.empty()) { |
2068 | if (errs().has_colors()) |
2069 | errs().changeColor(Color: raw_ostream::RED); |
2070 | errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" |
2071 | << BC->getFilename() << "\"." ; |
2072 | if (!GlobalMMapInfo.empty()) { |
2073 | errs() << " Profile for the following binary name(s) is available:\n" ; |
2074 | for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; |
2075 | I = GlobalMMapInfo.upper_bound(x: I->first)) |
2076 | errs() << " " << I->first << '\n'; |
2077 | errs() << "Please rename the input binary.\n" ; |
2078 | } else { |
2079 | errs() << " Failed to extract any binary name from a profile.\n" ; |
2080 | } |
2081 | if (errs().has_colors()) |
2082 | errs().resetColor(); |
2083 | |
2084 | exit(status: 1); |
2085 | } |
2086 | |
2087 | return std::error_code(); |
2088 | } |
2089 | |
2090 | std::error_code DataAggregator::parseTaskEvents() { |
2091 | outs() << "PERF2BOLT: parsing perf-script task events output\n" ; |
2092 | NamedRegionTimer T("parseTaskEvents" , "Parsing task events" , TimerGroupName, |
2093 | TimerGroupDesc, opts::TimeAggregator); |
2094 | |
2095 | while (hasData()) { |
2096 | if (std::optional<int32_t> CommInfo = parseCommExecEvent()) { |
2097 | // Remove forked child that ran execve |
2098 | auto MMapInfoIter = BinaryMMapInfo.find(x: *CommInfo); |
2099 | if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) |
2100 | BinaryMMapInfo.erase(position: MMapInfoIter); |
2101 | consumeRestOfLine(); |
2102 | continue; |
2103 | } |
2104 | |
2105 | std::optional<ForkInfo> ForkInfo = parseForkEvent(); |
2106 | if (!ForkInfo) |
2107 | continue; |
2108 | |
2109 | if (ForkInfo->ParentPID == ForkInfo->ChildPID) |
2110 | continue; |
2111 | |
2112 | if (ForkInfo->Time == 0) { |
2113 | // Process was forked and mmaped before perf ran. In this case the child |
2114 | // should have its own mmap entry unless it was execve'd. |
2115 | continue; |
2116 | } |
2117 | |
2118 | auto MMapInfoIter = BinaryMMapInfo.find(x: ForkInfo->ParentPID); |
2119 | if (MMapInfoIter == BinaryMMapInfo.end()) |
2120 | continue; |
2121 | |
2122 | MMapInfo MMapInfo = MMapInfoIter->second; |
2123 | MMapInfo.PID = ForkInfo->ChildPID; |
2124 | MMapInfo.Forked = true; |
2125 | BinaryMMapInfo.insert(x: std::make_pair(x&: MMapInfo.PID, y&: MMapInfo)); |
2126 | } |
2127 | |
2128 | outs() << "PERF2BOLT: input binary is associated with " |
2129 | << BinaryMMapInfo.size() << " PID(s)\n" ; |
2130 | |
2131 | LLVM_DEBUG({ |
2132 | for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo)) |
2133 | outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n" , MMI.PID, |
2134 | (MMI.Forked ? " (forked)" : "" ), MMI.MMapAddress, |
2135 | MMI.Size); |
2136 | }); |
2137 | |
2138 | return std::error_code(); |
2139 | } |
2140 | |
2141 | std::optional<std::pair<StringRef, StringRef>> |
2142 | DataAggregator::parseNameBuildIDPair() { |
2143 | while (checkAndConsumeFS()) { |
2144 | } |
2145 | |
2146 | ErrorOr<StringRef> BuildIDStr = parseString(EndChar: FieldSeparator, EndNl: true); |
2147 | if (std::error_code EC = BuildIDStr.getError()) |
2148 | return std::nullopt; |
2149 | |
2150 | // If one of the strings is missing, don't issue a parsing error, but still |
2151 | // do not return a value. |
2152 | consumeAllRemainingFS(); |
2153 | if (checkNewLine()) |
2154 | return std::nullopt; |
2155 | |
2156 | ErrorOr<StringRef> NameStr = parseString(EndChar: FieldSeparator, EndNl: true); |
2157 | if (std::error_code EC = NameStr.getError()) |
2158 | return std::nullopt; |
2159 | |
2160 | consumeRestOfLine(); |
2161 | return std::make_pair(x&: NameStr.get(), y&: BuildIDStr.get()); |
2162 | } |
2163 | |
2164 | bool DataAggregator::hasAllBuildIDs() { |
2165 | const StringRef SavedParsingBuf = ParsingBuf; |
2166 | |
2167 | if (!hasData()) |
2168 | return false; |
2169 | |
2170 | bool HasInvalidEntries = false; |
2171 | while (hasData()) { |
2172 | if (!parseNameBuildIDPair()) { |
2173 | HasInvalidEntries = true; |
2174 | break; |
2175 | } |
2176 | } |
2177 | |
2178 | ParsingBuf = SavedParsingBuf; |
2179 | |
2180 | return !HasInvalidEntries; |
2181 | } |
2182 | |
2183 | std::optional<StringRef> |
2184 | DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { |
2185 | const StringRef SavedParsingBuf = ParsingBuf; |
2186 | |
2187 | StringRef FileName; |
2188 | while (hasData()) { |
2189 | std::optional<std::pair<StringRef, StringRef>> IDPair = |
2190 | parseNameBuildIDPair(); |
2191 | if (!IDPair) { |
2192 | consumeRestOfLine(); |
2193 | continue; |
2194 | } |
2195 | |
2196 | if (IDPair->second.starts_with(Prefix: FileBuildID)) { |
2197 | FileName = sys::path::filename(path: IDPair->first); |
2198 | break; |
2199 | } |
2200 | } |
2201 | |
2202 | ParsingBuf = SavedParsingBuf; |
2203 | |
2204 | if (!FileName.empty()) |
2205 | return FileName; |
2206 | |
2207 | return std::nullopt; |
2208 | } |
2209 | |
2210 | std::error_code |
2211 | DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { |
2212 | std::error_code EC; |
2213 | raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); |
2214 | if (EC) |
2215 | return EC; |
2216 | |
2217 | bool WriteMemLocs = false; |
2218 | |
2219 | auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { |
2220 | if (WriteMemLocs) |
2221 | OutFile << (Loc.IsSymbol ? "4 " : "3 " ); |
2222 | else |
2223 | OutFile << (Loc.IsSymbol ? "1 " : "0 " ); |
2224 | OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Name: Loc.Name)) |
2225 | << " " << Twine::utohexstr(Val: Loc.Offset) << FieldSeparator; |
2226 | }; |
2227 | |
2228 | uint64_t BranchValues = 0; |
2229 | uint64_t MemValues = 0; |
2230 | |
2231 | if (BAT) |
2232 | OutFile << "boltedcollection\n" ; |
2233 | if (opts::BasicAggregation) { |
2234 | OutFile << "no_lbr" ; |
2235 | for (const StringMapEntry<std::nullopt_t> &Entry : EventNames) |
2236 | OutFile << " " << Entry.getKey(); |
2237 | OutFile << "\n" ; |
2238 | |
2239 | for (const auto &KV : NamesToSamples) { |
2240 | const FuncSampleData &FSD = KV.second; |
2241 | for (const SampleInfo &SI : FSD.Data) { |
2242 | writeLocation(SI.Loc); |
2243 | OutFile << SI.Hits << "\n" ; |
2244 | ++BranchValues; |
2245 | } |
2246 | } |
2247 | } else { |
2248 | for (const auto &KV : NamesToBranches) { |
2249 | const FuncBranchData &FBD = KV.second; |
2250 | for (const llvm::bolt::BranchInfo &BI : FBD.Data) { |
2251 | writeLocation(BI.From); |
2252 | writeLocation(BI.To); |
2253 | OutFile << BI.Mispreds << " " << BI.Branches << "\n" ; |
2254 | ++BranchValues; |
2255 | } |
2256 | for (const llvm::bolt::BranchInfo &BI : FBD.EntryData) { |
2257 | // Do not output if source is a known symbol, since this was already |
2258 | // accounted for in the source function |
2259 | if (BI.From.IsSymbol) |
2260 | continue; |
2261 | writeLocation(BI.From); |
2262 | writeLocation(BI.To); |
2263 | OutFile << BI.Mispreds << " " << BI.Branches << "\n" ; |
2264 | ++BranchValues; |
2265 | } |
2266 | } |
2267 | |
2268 | WriteMemLocs = true; |
2269 | for (const auto &KV : NamesToMemEvents) { |
2270 | const FuncMemData &FMD = KV.second; |
2271 | for (const MemInfo &MemEvent : FMD.Data) { |
2272 | writeLocation(MemEvent.Offset); |
2273 | writeLocation(MemEvent.Addr); |
2274 | OutFile << MemEvent.Count << "\n" ; |
2275 | ++MemValues; |
2276 | } |
2277 | } |
2278 | } |
2279 | |
2280 | outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues |
2281 | << " memory objects to " << OutputFilename << "\n" ; |
2282 | |
2283 | return std::error_code(); |
2284 | } |
2285 | |
2286 | std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, |
2287 | StringRef OutputFilename) const { |
2288 | std::error_code EC; |
2289 | raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); |
2290 | if (EC) |
2291 | return EC; |
2292 | |
2293 | yaml::bolt::BinaryProfile BP; |
2294 | |
2295 | // Fill out the header info. |
2296 | BP.Header.Version = 1; |
2297 | BP.Header.FileName = std::string(BC.getFilename()); |
2298 | std::optional<StringRef> BuildID = BC.getFileBuildID(); |
2299 | BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>" ; |
2300 | BP.Header.Origin = std::string(getReaderName()); |
2301 | // Only the input binary layout order is supported. |
2302 | BP.Header.IsDFSOrder = false; |
2303 | // FIXME: Need to match hash function used to produce BAT hashes. |
2304 | BP.Header.HashFunction = HashFunction::Default; |
2305 | |
2306 | ListSeparator LS("," ); |
2307 | raw_string_ostream EventNamesOS(BP.Header.EventNames); |
2308 | for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) |
2309 | EventNamesOS << LS << EventEntry.first().str(); |
2310 | |
2311 | BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE |
2312 | : BinaryFunction::PF_LBR; |
2313 | |
2314 | if (!opts::BasicAggregation) { |
2315 | // Convert profile for functions not covered by BAT |
2316 | for (auto &BFI : BC.getBinaryFunctions()) { |
2317 | BinaryFunction &Function = BFI.second; |
2318 | if (!Function.hasProfile()) |
2319 | continue; |
2320 | if (BAT->isBATFunction(Address: Function.getAddress())) |
2321 | continue; |
2322 | BP.Functions.emplace_back( |
2323 | args: YAMLProfileWriter::convert(BF: Function, /*UseDFS=*/false, BAT)); |
2324 | } |
2325 | |
2326 | for (const auto &KV : NamesToBranches) { |
2327 | const StringRef FuncName = KV.first; |
2328 | const FuncBranchData &Branches = KV.second; |
2329 | yaml::bolt::BinaryFunctionProfile YamlBF; |
2330 | BinaryData *BD = BC.getBinaryDataByName(Name: FuncName); |
2331 | assert(BD); |
2332 | uint64_t FuncAddress = BD->getAddress(); |
2333 | if (!BAT->isBATFunction(Address: FuncAddress)) |
2334 | continue; |
2335 | BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: FuncAddress); |
2336 | assert(BF); |
2337 | YamlBF.Name = FuncName.str(); |
2338 | YamlBF.Id = BF->getFunctionNumber(); |
2339 | YamlBF.Hash = BAT->getBFHash(FuncOutputAddress: FuncAddress); |
2340 | YamlBF.ExecCount = BF->getKnownExecutionCount(); |
2341 | YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(OutputAddress: FuncAddress); |
2342 | const BoltAddressTranslation::BBHashMapTy &BlockMap = |
2343 | BAT->getBBHashMap(FuncOutputAddress: FuncAddress); |
2344 | YamlBF.Blocks.resize(new_size: YamlBF.NumBasicBlocks); |
2345 | |
2346 | for (auto &&[Idx, YamlBB] : llvm::enumerate(First&: YamlBF.Blocks)) |
2347 | YamlBB.Index = Idx; |
2348 | |
2349 | for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI) |
2350 | YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash(); |
2351 | |
2352 | auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) { |
2353 | const llvm::bolt::BranchInfo &BI = Branches.Data.at(n: SuccDataIdx); |
2354 | yaml::bolt::SuccessorInfo SI; |
2355 | SI.Index = BlockMap.getBBIndex(BBInputOffset: SuccOffset); |
2356 | SI.Count = BI.Branches; |
2357 | SI.Mispreds = BI.Mispreds; |
2358 | return SI; |
2359 | }; |
2360 | |
2361 | auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx, |
2362 | uint32_t Offset) { |
2363 | const llvm::bolt::BranchInfo &BI = Branches.Data.at(n: CallToIdx); |
2364 | yaml::bolt::CallSiteInfo CSI; |
2365 | CSI.DestId = 0; // designated for unknown functions |
2366 | CSI.EntryDiscriminator = 0; |
2367 | CSI.Count = BI.Branches; |
2368 | CSI.Mispreds = BI.Mispreds; |
2369 | CSI.Offset = Offset; |
2370 | if (BinaryData *BD = BC.getBinaryDataByName(Name: CallToLoc.Name)) |
2371 | YAMLProfileWriter::setCSIDestination(BC, CSI, Symbol: BD->getSymbol(), BAT, |
2372 | Offset: CallToLoc.Offset); |
2373 | return CSI; |
2374 | }; |
2375 | |
2376 | for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { |
2377 | if (!BlockMap.isInputBlock(InputOffset: FromOffset)) |
2378 | continue; |
2379 | const unsigned Index = BlockMap.getBBIndex(BBInputOffset: FromOffset); |
2380 | yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index]; |
2381 | for (const auto &[SuccOffset, SuccDataIdx] : SuccKV) |
2382 | if (BlockMap.isInputBlock(InputOffset: SuccOffset)) |
2383 | YamlBB.Successors.emplace_back( |
2384 | args: getSuccessorInfo(SuccOffset, SuccDataIdx)); |
2385 | } |
2386 | for (const auto &[FromOffset, CallTo] : Branches.InterIndex) { |
2387 | auto BlockIt = BlockMap.upper_bound(Offset: FromOffset); |
2388 | --BlockIt; |
2389 | const unsigned BlockOffset = BlockIt->first; |
2390 | const unsigned BlockIndex = BlockIt->second.getBBIndex(); |
2391 | yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex]; |
2392 | const uint32_t Offset = FromOffset - BlockOffset; |
2393 | for (const auto &[CallToLoc, CallToIdx] : CallTo) |
2394 | YamlBB.CallSites.emplace_back( |
2395 | args: getCallSiteInfo(CallToLoc, CallToIdx, Offset)); |
2396 | llvm::sort(C&: YamlBB.CallSites, Comp: [](yaml::bolt::CallSiteInfo &A, |
2397 | yaml::bolt::CallSiteInfo &B) { |
2398 | return A.Offset < B.Offset; |
2399 | }); |
2400 | } |
2401 | // Drop blocks without a hash, won't be useful for stale matching. |
2402 | llvm::erase_if(C&: YamlBF.Blocks, |
2403 | P: [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { |
2404 | return YamlBB.Hash == (yaml::Hex64)0; |
2405 | }); |
2406 | BP.Functions.emplace_back(args&: YamlBF); |
2407 | } |
2408 | } |
2409 | |
2410 | // Write the profile. |
2411 | yaml::Output Out(OutFile, nullptr, 0); |
2412 | Out << BP; |
2413 | return std::error_code(); |
2414 | } |
2415 | |
2416 | void DataAggregator::dump() const { DataReader::dump(); } |
2417 | |
2418 | void DataAggregator::dump(const LBREntry &LBR) const { |
2419 | Diag << "From: " << Twine::utohexstr(Val: LBR.From) |
2420 | << " To: " << Twine::utohexstr(Val: LBR.To) << " Mispred? " << LBR.Mispred |
2421 | << "\n" ; |
2422 | } |
2423 | |
2424 | void DataAggregator::dump(const PerfBranchSample &Sample) const { |
2425 | Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n" ; |
2426 | for (const LBREntry &LBR : Sample.LBR) |
2427 | dump(LBR); |
2428 | } |
2429 | |
2430 | void DataAggregator::dump(const PerfMemSample &Sample) const { |
2431 | Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n" ; |
2432 | } |
2433 | |