1//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11#include "ErrorHandling.h"
12#include "ProfiledBinary.h"
13#include "llvm/Support/Casting.h"
14#include "llvm/Support/CommandLine.h"
15#include "llvm/Support/Regex.h"
16#include <cstdint>
17#include <fstream>
18#include <map>
19
20using namespace llvm;
21using namespace sampleprof;
22
23namespace llvm {
24
25class CleanupInstaller;
26
27namespace sampleprof {
28
29// Stream based trace line iterator
30class TraceStream {
31 std::string CurrentLine;
32 std::ifstream Fin;
33 bool IsAtEoF = false;
34 uint64_t LineNumber = 0;
35
36public:
37 TraceStream(StringRef Filename) : Fin(Filename.str()) {
38 if (!Fin.good())
39 exitWithError(Message: "Error read input perf script file", Whence: Filename);
40 advance();
41 }
42
43 StringRef getCurrentLine() {
44 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
45 return CurrentLine;
46 }
47
48 uint64_t getLineNumber() { return LineNumber; }
49
50 bool isAtEoF() { return IsAtEoF; }
51
52 // Read the next line
53 void advance() {
54 if (!std::getline(is&: Fin, str&: CurrentLine)) {
55 IsAtEoF = true;
56 return;
57 }
58 LineNumber++;
59 }
60};
61
62// The type of input format.
63enum PerfFormat {
64 UnknownFormat = 0,
65 PerfData = 1, // Raw linux perf.data.
66 PerfScript = 2, // Perf script create by `perf script` command.
67 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
68
69};
70
71// The type of perfscript content.
72enum PerfContent {
73 UnknownContent = 0,
74 LBR = 1, // Only LBR sample.
75 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
76};
77
78struct PerfInputFile {
79 std::string InputFile;
80 PerfFormat Format = PerfFormat::UnknownFormat;
81 PerfContent Content = PerfContent::UnknownContent;
82};
83
84// The parsed LBR sample entry.
85struct LBREntry {
86 uint64_t Source = 0;
87 uint64_t Target = 0;
88 LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
89
90#ifndef NDEBUG
91 void print() const {
92 dbgs() << "from " << format(Fmt: "%#010x", Vals: Source) << " to "
93 << format(Fmt: "%#010x", Vals: Target);
94 }
95#endif
96};
97
98#ifndef NDEBUG
99static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
100 for (size_t I = 0; I < LBRStack.size(); I++) {
101 dbgs() << "[" << I << "] ";
102 LBRStack[I].print();
103 dbgs() << "\n";
104 }
105}
106
107static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
108 for (size_t I = 0; I < CallStack.size(); I++) {
109 dbgs() << "[" << I << "] " << format(Fmt: "%#010x", Vals: CallStack[I]) << "\n";
110 }
111}
112#endif
113
114// Hash interface for generic data of type T
115// Data should implement a \fn getHashCode and a \fn isEqual
116// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
117// i.e we explicitly calculate hash of derived class, assign to base class's
118// HashCode. This also provides the flexibility for calculating the hash code
119// incrementally(like rolling hash) during frame stack unwinding since unwinding
120// only changes the leaf of frame stack. \fn isEqual is a virtual function,
121// which will have perf overhead. In the future, if we redesign a better hash
122// function, then we can just skip this or switch to non-virtual function(like
123// just ignore comparison if hash conflicts probabilities is low)
124template <class T> class Hashable {
125public:
126 std::shared_ptr<T> Data;
127 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
128
129 // Hash code generation
130 struct Hash {
131 uint64_t operator()(const Hashable<T> &Key) const {
132 // Don't make it virtual for getHashCode
133 uint64_t Hash = Key.Data->getHashCode();
134 assert(Hash && "Should generate HashCode for it!");
135 return Hash;
136 }
137 };
138
139 // Hash equal
140 struct Equal {
141 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
142 // Precisely compare the data, vtable will have overhead.
143 return LHS.Data->isEqual(RHS.Data.get());
144 }
145 };
146
147 T *getPtr() const { return Data.get(); }
148};
149
150struct PerfSample {
151 // LBR stack recorded in FIFO order.
152 SmallVector<LBREntry, 16> LBRStack;
153 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
154 // generation
155 SmallVector<uint64_t, 16> CallStack;
156
157 virtual ~PerfSample() = default;
158 uint64_t getHashCode() const {
159 // Use simple DJB2 hash
160 auto HashCombine = [](uint64_t H, uint64_t V) {
161 return ((H << 5) + H) + V;
162 };
163 uint64_t Hash = 5381;
164 for (const auto &Value : CallStack) {
165 Hash = HashCombine(Hash, Value);
166 }
167 for (const auto &Entry : LBRStack) {
168 Hash = HashCombine(Hash, Entry.Source);
169 Hash = HashCombine(Hash, Entry.Target);
170 }
171 return Hash;
172 }
173
174 bool isEqual(const PerfSample *Other) const {
175 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
176 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
177
178 if (CallStack.size() != OtherCallStack.size() ||
179 LBRStack.size() != OtherLBRStack.size())
180 return false;
181
182 if (!std::equal(first1: CallStack.begin(), last1: CallStack.end(), first2: OtherCallStack.begin()))
183 return false;
184
185 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
186 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
187 LBRStack[I].Target != OtherLBRStack[I].Target)
188 return false;
189 }
190 return true;
191 }
192
193#ifndef NDEBUG
194 uint64_t Linenum = 0;
195
196 void print() const {
197 dbgs() << "Line " << Linenum << "\n";
198 dbgs() << "LBR stack\n";
199 printLBRStack(LBRStack);
200 dbgs() << "Call stack\n";
201 printCallStack(CallStack);
202 }
203#endif
204};
205// After parsing the sample, we record the samples by aggregating them
206// into this counter. The key stores the sample data and the value is
207// the sample repeat times.
208using AggregatedCounter =
209 std::unordered_map<Hashable<PerfSample>, uint64_t,
210 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
211
212using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
213
214inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
215 ProfiledBinary *Binary) {
216 // Start bigger than End is considered invalid.
217 // LBR ranges cross the unconditional jmp are also assumed invalid.
218 // It's found that perf data may contain duplicate LBR entries that could form
219 // a range that does not reflect real execution flow on some Intel targets,
220 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
221 // cannot be a linear execution range that spans over unconditional jmp.
222 return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
223}
224
225// The state for the unwinder, it doesn't hold the data but only keep the
226// pointer/index of the data, While unwinding, the CallStack is changed
227// dynamicially and will be recorded as the context of the sample
228struct UnwindState {
229 // Profiled binary that current frame address belongs to
230 const ProfiledBinary *Binary;
231 // Call stack trie node
232 struct ProfiledFrame {
233 const uint64_t Address = DummyRoot;
234 ProfiledFrame *Parent;
235 SampleVector RangeSamples;
236 SampleVector BranchSamples;
237 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
238
239 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
240 : Address(Addr), Parent(P) {}
241 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
242 assert(Address && "Address can't be zero!");
243 auto Ret = Children.emplace(
244 args&: Address, args: std::make_unique<ProfiledFrame>(args&: Address, args: this));
245 return Ret.first->second.get();
246 }
247 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
248 RangeSamples.emplace_back(Args: std::make_tuple(args&: Start, args&: End, args&: Count));
249 }
250 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
251 BranchSamples.emplace_back(Args: std::make_tuple(args&: Source, args&: Target, args&: Count));
252 }
253 bool isDummyRoot() { return Address == DummyRoot; }
254 bool isExternalFrame() { return Address == ExternalAddr; }
255 bool isLeafFrame() { return Children.empty(); }
256 };
257
258 ProfiledFrame DummyTrieRoot;
259 ProfiledFrame *CurrentLeafFrame;
260 // Used to fall through the LBR stack
261 uint32_t LBRIndex = 0;
262 // Reference to PerfSample.LBRStack
263 const SmallVector<LBREntry, 16> &LBRStack;
264 // Used to iterate the address range
265 InstructionPointer InstPtr;
266 // Indicate whether unwinding is currently in a bad state which requires to
267 // skip all subsequent unwinding.
268 bool Invalid = false;
269 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
270 : Binary(Binary), LBRStack(Sample->LBRStack),
271 InstPtr(Binary, Sample->CallStack.front()) {
272 initFrameTrie(CallStack: Sample->CallStack);
273 }
274
275 bool validateInitialState() {
276 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
277 uint64_t LeafAddr = CurrentLeafFrame->Address;
278 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
279 "External leading LBR should match the leaf frame.");
280
281 // When we take a stack sample, ideally the sampling distance between the
282 // leaf IP of stack and the last LBR target shouldn't be very large.
283 // Use a heuristic size (0x100) to filter out broken records.
284 if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
285 WithColor::warning() << "Bogus trace: stack tip = "
286 << format(Fmt: "%#010x", Vals: LeafAddr)
287 << ", LBR tip = " << format(Fmt: "%#010x\n", Vals: LBRLeaf);
288 return false;
289 }
290 return true;
291 }
292
293 void checkStateConsistency() {
294 assert(InstPtr.Address == CurrentLeafFrame->Address &&
295 "IP should align with context leaf");
296 }
297
298 void setInvalid() { Invalid = true; }
299 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
300 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
301 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
302 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
303 bool IsLastLBR() const { return LBRIndex == 0; }
304 bool getLBRStackSize() const { return LBRStack.size(); }
305 void advanceLBR() { LBRIndex++; }
306 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
307
308 void pushFrame(uint64_t Address) {
309 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
310 }
311
312 void switchToFrame(uint64_t Address) {
313 if (CurrentLeafFrame->Address == Address)
314 return;
315 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
316 }
317
318 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
319
320 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
321
322 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
323 ProfiledFrame *Cur = &DummyTrieRoot;
324 for (auto Address : reverse(C: CallStack)) {
325 Cur = Cur->getOrCreateChildFrame(Address);
326 }
327 CurrentLeafFrame = Cur;
328 }
329
330 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
331};
332
333// Base class for sample counter key with context
334struct ContextKey {
335 uint64_t HashCode = 0;
336 virtual ~ContextKey() = default;
337 uint64_t getHashCode() {
338 if (HashCode == 0)
339 genHashCode();
340 return HashCode;
341 }
342 virtual void genHashCode() = 0;
343 virtual bool isEqual(const ContextKey *K) const {
344 return HashCode == K->HashCode;
345 };
346
347 // Utilities for LLVM-style RTTI
348 enum ContextKind { CK_StringBased, CK_AddrBased };
349 const ContextKind Kind;
350 ContextKind getKind() const { return Kind; }
351 ContextKey(ContextKind K) : Kind(K){};
352};
353
354// String based context id
355struct StringBasedCtxKey : public ContextKey {
356 SampleContextFrameVector Context;
357
358 bool WasLeafInlined;
359 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
360 static bool classof(const ContextKey *K) {
361 return K->getKind() == CK_StringBased;
362 }
363
364 bool isEqual(const ContextKey *K) const override {
365 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(Val: K);
366 return Context == Other->Context;
367 }
368
369 void genHashCode() override {
370 HashCode = hash_value(S: SampleContextFrames(Context));
371 }
372};
373
374// Address-based context id
375struct AddrBasedCtxKey : public ContextKey {
376 SmallVector<uint64_t, 16> Context;
377
378 bool WasLeafInlined;
379 AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
380 static bool classof(const ContextKey *K) {
381 return K->getKind() == CK_AddrBased;
382 }
383
384 bool isEqual(const ContextKey *K) const override {
385 const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(Val: K);
386 return Context == Other->Context;
387 }
388
389 void genHashCode() override {
390 HashCode = hash_combine_range(first: Context.begin(), last: Context.end());
391 }
392};
393
394// The counter of branch samples for one function indexed by the branch,
395// which is represented as the source and target offset pair.
396using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
397// The counter of range samples for one function indexed by the range,
398// which is represented as the start and end offset pair.
399using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
400// Wrapper for sample counters including range counter and branch counter
401struct SampleCounter {
402 RangeSample RangeCounter;
403 BranchSample BranchCounter;
404
405 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
406 assert(Start <= End && "Invalid instruction range");
407 RangeCounter[{Start, End}] += Repeat;
408 }
409 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
410 BranchCounter[{Source, Target}] += Repeat;
411 }
412};
413
414// Sample counter with context to support context-sensitive profile
415using ContextSampleCounterMap =
416 std::unordered_map<Hashable<ContextKey>, SampleCounter,
417 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
418
419struct FrameStack {
420 SmallVector<uint64_t, 16> Stack;
421 ProfiledBinary *Binary;
422 FrameStack(ProfiledBinary *B) : Binary(B) {}
423 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
424 assert(!Cur->isExternalFrame() &&
425 "External frame's not expected for context stack.");
426 Stack.push_back(Elt: Cur->Address);
427 return true;
428 }
429
430 void popFrame() {
431 if (!Stack.empty())
432 Stack.pop_back();
433 }
434 std::shared_ptr<StringBasedCtxKey> getContextKey();
435};
436
437struct AddressStack {
438 SmallVector<uint64_t, 16> Stack;
439 ProfiledBinary *Binary;
440 AddressStack(ProfiledBinary *B) : Binary(B) {}
441 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
442 assert(!Cur->isExternalFrame() &&
443 "External frame's not expected for context stack.");
444 Stack.push_back(Elt: Cur->Address);
445 return true;
446 }
447
448 void popFrame() {
449 if (!Stack.empty())
450 Stack.pop_back();
451 }
452 std::shared_ptr<AddrBasedCtxKey> getContextKey();
453};
454
455/*
456As in hybrid sample we have a group of LBRs and the most recent sampling call
457stack, we can walk through those LBRs to infer more call stacks which would be
458used as context for profile. VirtualUnwinder is the class to do the call stack
459unwinding based on LBR state. Two types of unwinding are processd here:
4601) LBR unwinding and 2) linear range unwinding.
461Specifically, for each LBR entry(can be classified into call, return, regular
462branch), LBR unwinding will replay the operation by pushing, popping or
463switching leaf frame towards the call stack and since the initial call stack
464is most recently sampled, the replay should be in anti-execution order, i.e. for
465the regular case, pop the call stack when LBR is call, push frame on call stack
466when LBR is return. After each LBR processed, it also needs to align with the
467next LBR by going through instructions from previous LBR's target to current
468LBR's source, which is the linear unwinding. As instruction from linear range
469can come from different function by inlining, linear unwinding will do the range
470splitting and record counters by the range with same inline context. Over those
471unwinding process we will record each call stack as context id and LBR/linear
472range as sample counter for further CS profile generation.
473*/
474class VirtualUnwinder {
475public:
476 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
477 : CtxCounterMap(Counter), Binary(B) {}
478 bool unwind(const PerfSample *Sample, uint64_t Repeat);
479 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
480
481 uint64_t NumTotalBranches = 0;
482 uint64_t NumExtCallBranch = 0;
483 uint64_t NumMissingExternalFrame = 0;
484 uint64_t NumMismatchedProEpiBranch = 0;
485 uint64_t NumMismatchedExtCallBranch = 0;
486 uint64_t NumUnpairedExtAddr = 0;
487 uint64_t NumPairedExtAddr = 0;
488
489private:
490 bool isSourceExternal(UnwindState &State) const {
491 return State.getCurrentLBRSource() == ExternalAddr;
492 }
493
494 bool isTargetExternal(UnwindState &State) const {
495 return State.getCurrentLBRTarget() == ExternalAddr;
496 }
497
498 // Determine whether the return source is from external code by checking if
499 // the target's the next inst is a call inst.
500 bool isReturnFromExternal(UnwindState &State) const {
501 return isSourceExternal(State) &&
502 (Binary->getCallAddrFromFrameAddr(FrameAddr: State.getCurrentLBRTarget()) != 0);
503 }
504
505 // If the source is external address but it's not the `return` case, treat it
506 // as a call from external.
507 bool isCallFromExternal(UnwindState &State) const {
508 return isSourceExternal(State) &&
509 Binary->getCallAddrFromFrameAddr(FrameAddr: State.getCurrentLBRTarget()) == 0;
510 }
511
512 bool isCallState(UnwindState &State) const {
513 // The tail call frame is always missing here in stack sample, we will
514 // use a specific tail call tracker to infer it.
515 if (!isValidState(State))
516 return false;
517
518 if (Binary->addressIsCall(Address: State.getCurrentLBRSource()))
519 return true;
520
521 return isCallFromExternal(State);
522 }
523
524 bool isReturnState(UnwindState &State) const {
525 if (!isValidState(State))
526 return false;
527
528 // Simply check addressIsReturn, as ret is always reliable, both for
529 // regular call and tail call.
530 if (Binary->addressIsReturn(Address: State.getCurrentLBRSource()))
531 return true;
532
533 return isReturnFromExternal(State);
534 }
535
536 bool isValidState(UnwindState &State) const { return !State.Invalid; }
537
538 void unwindCall(UnwindState &State);
539 void unwindLinear(UnwindState &State, uint64_t Repeat);
540 void unwindReturn(UnwindState &State);
541 void unwindBranch(UnwindState &State);
542
543 template <typename T>
544 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
545 // Collect each samples on trie node by DFS traversal
546 template <typename T>
547 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
548 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
549
550 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
551 uint64_t Repeat);
552 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
553 uint64_t Repeat);
554
555 ContextSampleCounterMap *CtxCounterMap;
556 // Profiled binary that current frame address belongs to
557 ProfiledBinary *Binary;
558 // Keep track of all untracked callsites
559 std::set<uint64_t> UntrackedCallsites;
560};
561
562// Read perf trace to parse the events and samples.
563class PerfReaderBase {
564public:
565 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
566 : Binary(B), PerfTraceFile(PerfTrace) {
567 // Initialize the base address to preferred address.
568 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
569 };
570 virtual ~PerfReaderBase() = default;
571 static std::unique_ptr<PerfReaderBase>
572 create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
573 std::optional<uint32_t> PIDFilter);
574
575 // Entry of the reader to parse multiple perf traces
576 virtual void parsePerfTraces() = 0;
577 const ContextSampleCounterMap &getSampleCounters() const {
578 return SampleCounters;
579 }
580 bool profileIsCS() { return ProfileIsCS; }
581
582protected:
583 ProfiledBinary *Binary = nullptr;
584 StringRef PerfTraceFile;
585
586 ContextSampleCounterMap SampleCounters;
587 bool ProfileIsCS = false;
588
589 uint64_t NumTotalSample = 0;
590 uint64_t NumLeafExternalFrame = 0;
591 uint64_t NumLeadingOutgoingLBR = 0;
592};
593
594// Read perf script to parse the events and samples.
595class PerfScriptReader : public PerfReaderBase {
596public:
597 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
598 std::optional<uint32_t> PID)
599 : PerfReaderBase(B, PerfTrace), PIDFilter(PID){};
600
601 // Entry of the reader to parse multiple perf traces
602 void parsePerfTraces() override;
603 // Generate perf script from perf data
604 static PerfInputFile
605 convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File,
606 std::optional<uint32_t> PIDFilter);
607 // Extract perf script type by peaking at the input
608 static PerfContent checkPerfScriptType(StringRef FileName);
609
610 // Cleanup installers for temporary files created by perf script command.
611 // Those files will be automatically removed when running destructor or
612 // receiving signals.
613 static SmallVector<CleanupInstaller, 2> TempFileCleanups;
614
615protected:
616 // The parsed MMap event
617 struct MMapEvent {
618 uint64_t PID = 0;
619 uint64_t Address = 0;
620 uint64_t Size = 0;
621 uint64_t Offset = 0;
622 StringRef BinaryPath;
623 };
624
625 // Check whether a given line is LBR sample
626 static bool isLBRSample(StringRef Line);
627 // Check whether a given line is MMAP event
628 static bool isMMap2Event(StringRef Line);
629 // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
630 // mapping between the binary name and its memory layout.
631 static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
632 MMapEvent &MMap);
633 // Update base address based on mmap events
634 void updateBinaryAddress(const MMapEvent &Event);
635 // Parse mmap event and update binary address
636 void parseMMap2Event(TraceStream &TraceIt);
637 // Parse perf events/samples and do aggregation
638 void parseAndAggregateTrace();
639 // Parse either an MMAP event or a perf sample
640 void parseEventOrSample(TraceStream &TraceIt);
641 // Warn if the relevant mmap event is missing.
642 void warnIfMissingMMap();
643 // Emit accumulate warnings.
644 void warnTruncatedStack();
645 // Warn if range is invalid.
646 void warnInvalidRange();
647 // Extract call stack from the perf trace lines
648 bool extractCallstack(TraceStream &TraceIt,
649 SmallVectorImpl<uint64_t> &CallStack);
650 // Extract LBR stack from one perf trace line
651 bool extractLBRStack(TraceStream &TraceIt,
652 SmallVectorImpl<LBREntry> &LBRStack);
653 uint64_t parseAggregatedCount(TraceStream &TraceIt);
654 // Parse one sample from multiple perf lines, override this for different
655 // sample type
656 void parseSample(TraceStream &TraceIt);
657 // An aggregated count is given to indicate how many times the sample is
658 // repeated.
659 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
660 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
661 // Post process the profile after trace aggregation, we will do simple range
662 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
663 virtual void generateUnsymbolizedProfile();
664 void writeUnsymbolizedProfile(StringRef Filename);
665 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
666
667 // Samples with the repeating time generated by the perf reader
668 AggregatedCounter AggregatedSamples;
669 // Keep track of all invalid return addresses
670 std::set<uint64_t> InvalidReturnAddresses;
671 // PID for the process of interest
672 std::optional<uint32_t> PIDFilter;
673};
674
675/*
676 The reader of LBR only perf script.
677 A typical LBR sample is like:
678 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
679 ... 0x4005c8/0x4005dc/P/-/-/0
680*/
681class LBRPerfReader : public PerfScriptReader {
682public:
683 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
684 std::optional<uint32_t> PID)
685 : PerfScriptReader(Binary, PerfTrace, PID){};
686 // Parse the LBR only sample.
687 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
688};
689
690/*
691 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
692 which is used to generate CS profile. An example of hybrid sample:
693 4005dc # call stack leaf
694 400634
695 400684 # call stack root
696 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
697 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
698*/
699class HybridPerfReader : public PerfScriptReader {
700public:
701 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
702 std::optional<uint32_t> PID)
703 : PerfScriptReader(Binary, PerfTrace, PID){};
704 // Parse the hybrid sample including the call and LBR line
705 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
706 void generateUnsymbolizedProfile() override;
707
708private:
709 // Unwind the hybrid samples after aggregration
710 void unwindSamples();
711};
712
713/*
714 Format of unsymbolized profile:
715
716 [frame1 @ frame2 @ ...] # If it's a CS profile
717 number of entries in RangeCounter
718 from_1-to_1:count_1
719 from_2-to_2:count_2
720 ......
721 from_n-to_n:count_n
722 number of entries in BranchCounter
723 src_1->dst_1:count_1
724 src_2->dst_2:count_2
725 ......
726 src_n->dst_n:count_n
727 [frame1 @ frame2 @ ...] # Next context
728 ......
729
730Note that non-CS profile doesn't have the empty `[]` context.
731*/
732class UnsymbolizedProfileReader : public PerfReaderBase {
733public:
734 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
735 : PerfReaderBase(Binary, PerfTrace){};
736 void parsePerfTraces() override;
737
738private:
739 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
740 void readUnsymbolizedProfile(StringRef Filename);
741
742 std::unordered_set<std::string> ContextStrSet;
743};
744
745} // end namespace sampleprof
746} // end namespace llvm
747
748#endif
749

source code of llvm/tools/llvm-profgen/PerfReader.h