1//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11#include "ErrorHandling.h"
12#include "ProfiledBinary.h"
13#include "llvm/Support/Casting.h"
14#include "llvm/Support/CommandLine.h"
15#include "llvm/Support/Regex.h"
16#include <cstdint>
17#include <fstream>
18#include <map>
19
20using namespace llvm;
21using namespace sampleprof;
22
23namespace llvm {
24
25class CleanupInstaller;
26
27namespace sampleprof {
28
29// Stream based trace line iterator
30class TraceStream {
31 std::string CurrentLine;
32 std::ifstream Fin;
33 bool IsAtEoF = false;
34 uint64_t LineNumber = 0;
35
36public:
37 TraceStream(StringRef Filename) : Fin(Filename.str()) {
38 if (!Fin.good())
39 exitWithError(Message: "Error read input perf script file", Whence: Filename);
40 advance();
41 }
42
43 StringRef getCurrentLine() {
44 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
45 return CurrentLine;
46 }
47
48 uint64_t getLineNumber() { return LineNumber; }
49
50 bool isAtEoF() { return IsAtEoF; }
51
52 // Read the next line
53 void advance() {
54 if (!std::getline(is&: Fin, str&: CurrentLine)) {
55 IsAtEoF = true;
56 return;
57 }
58 LineNumber++;
59 }
60};
61
62// The type of input format.
63enum PerfFormat {
64 UnknownFormat = 0,
65 PerfData = 1, // Raw linux perf.data.
66 PerfScript = 2, // Perf script create by `perf script` command.
67 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
68
69};
70
71// The type of perfscript content.
72enum PerfContent {
73 UnknownContent = 0,
74 LBR = 1, // Only LBR sample.
75 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
76};
77
78struct PerfInputFile {
79 std::string InputFile;
80 PerfFormat Format = PerfFormat::UnknownFormat;
81 PerfContent Content = PerfContent::UnknownContent;
82};
83
84// The parsed LBR sample entry.
85struct LBREntry {
86 uint64_t Source = 0;
87 uint64_t Target = 0;
88 LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
89
90#ifndef NDEBUG
91 void print() const {
92 dbgs() << "from " << format(Fmt: "%#010x", Vals: Source) << " to "
93 << format(Fmt: "%#010x", Vals: Target);
94 }
95#endif
96};
97
98#ifndef NDEBUG
99static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
100 for (size_t I = 0; I < LBRStack.size(); I++) {
101 dbgs() << "[" << I << "] ";
102 LBRStack[I].print();
103 dbgs() << "\n";
104 }
105}
106
107static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
108 for (size_t I = 0; I < CallStack.size(); I++) {
109 dbgs() << "[" << I << "] " << format(Fmt: "%#010x", Vals: CallStack[I]) << "\n";
110 }
111}
112#endif
113
114// Hash interface for generic data of type T
115// Data should implement a \fn getHashCode and a \fn isEqual
116// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
117// i.e we explicitly calculate hash of derived class, assign to base class's
118// HashCode. This also provides the flexibility for calculating the hash code
119// incrementally(like rolling hash) during frame stack unwinding since unwinding
120// only changes the leaf of frame stack. \fn isEqual is a virtual function,
121// which will have perf overhead. In the future, if we redesign a better hash
122// function, then we can just skip this or switch to non-virtual function(like
123// just ignore comparison if hash conflicts probabilities is low)
124template <class T> class Hashable {
125public:
126 std::shared_ptr<T> Data;
127 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
128
129 // Hash code generation
130 struct Hash {
131 uint64_t operator()(const Hashable<T> &Key) const {
132 // Don't make it virtual for getHashCode
133 uint64_t Hash = Key.Data->getHashCode();
134 assert(Hash && "Should generate HashCode for it!");
135 return Hash;
136 }
137 };
138
139 // Hash equal
140 struct Equal {
141 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
142 // Precisely compare the data, vtable will have overhead.
143 return LHS.Data->isEqual(RHS.Data.get());
144 }
145 };
146
147 T *getPtr() const { return Data.get(); }
148};
149
150struct PerfSample {
151 // LBR stack recorded in FIFO order.
152 SmallVector<LBREntry, 16> LBRStack;
153 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
154 // generation
155 SmallVector<uint64_t, 16> CallStack;
156
157 virtual ~PerfSample() = default;
158 uint64_t getHashCode() const {
159 // Use simple DJB2 hash
160 auto HashCombine = [](uint64_t H, uint64_t V) {
161 return ((H << 5) + H) + V;
162 };
163 uint64_t Hash = 5381;
164 for (const auto &Value : CallStack) {
165 Hash = HashCombine(Hash, Value);
166 }
167 for (const auto &Entry : LBRStack) {
168 Hash = HashCombine(Hash, Entry.Source);
169 Hash = HashCombine(Hash, Entry.Target);
170 }
171 return Hash;
172 }
173
174 bool isEqual(const PerfSample *Other) const {
175 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
176 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
177
178 if (CallStack.size() != OtherCallStack.size() ||
179 LBRStack.size() != OtherLBRStack.size())
180 return false;
181
182 if (!std::equal(first1: CallStack.begin(), last1: CallStack.end(), first2: OtherCallStack.begin()))
183 return false;
184
185 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
186 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
187 LBRStack[I].Target != OtherLBRStack[I].Target)
188 return false;
189 }
190 return true;
191 }
192
193#ifndef NDEBUG
194 uint64_t Linenum = 0;
195
196 void print() const {
197 dbgs() << "Line " << Linenum << "\n";
198 dbgs() << "LBR stack\n";
199 printLBRStack(LBRStack);
200 dbgs() << "Call stack\n";
201 printCallStack(CallStack);
202 }
203#endif
204};
205// After parsing the sample, we record the samples by aggregating them
206// into this counter. The key stores the sample data and the value is
207// the sample repeat times.
208using AggregatedCounter =
209 std::unordered_map<Hashable<PerfSample>, uint64_t,
210 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
211
212using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
213
214inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
215 ProfiledBinary *Binary) {
216 // Start bigger than End is considered invalid.
217 // LBR ranges cross the unconditional jmp are also assumed invalid.
218 // It's found that perf data may contain duplicate LBR entries that could form
219 // a range that does not reflect real execution flow on some Intel targets,
220 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
221 // cannot be a linear execution range that spans over unconditional jmp.
222 return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
223}
224
225// The state for the unwinder, it doesn't hold the data but only keep the
226// pointer/index of the data, While unwinding, the CallStack is changed
227// dynamicially and will be recorded as the context of the sample
228struct UnwindState {
229 // Profiled binary that current frame address belongs to
230 const ProfiledBinary *Binary;
231 // Call stack trie node
232 struct ProfiledFrame {
233 const uint64_t Address = DummyRoot;
234 ProfiledFrame *Parent;
235 SampleVector RangeSamples;
236 SampleVector BranchSamples;
237 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
238
239 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
240 : Address(Addr), Parent(P) {}
241 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
242 assert(Address && "Address can't be zero!");
243 auto Ret = Children.emplace(
244 args&: Address, args: std::make_unique<ProfiledFrame>(args&: Address, args: this));
245 return Ret.first->second.get();
246 }
247 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
248 RangeSamples.emplace_back(Args: std::make_tuple(args&: Start, args&: End, args&: Count));
249 }
250 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
251 BranchSamples.emplace_back(Args: std::make_tuple(args&: Source, args&: Target, args&: Count));
252 }
253 bool isDummyRoot() { return Address == DummyRoot; }
254 bool isExternalFrame() { return Address == ExternalAddr; }
255 bool isLeafFrame() { return Children.empty(); }
256 };
257
258 ProfiledFrame DummyTrieRoot;
259 ProfiledFrame *CurrentLeafFrame;
260 // Used to fall through the LBR stack
261 uint32_t LBRIndex = 0;
262 // Reference to PerfSample.LBRStack
263 const SmallVector<LBREntry, 16> &LBRStack;
264 // Used to iterate the address range
265 InstructionPointer InstPtr;
266 // Indicate whether unwinding is currently in a bad state which requires to
267 // skip all subsequent unwinding.
268 bool Invalid = false;
269 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
270 : Binary(Binary), LBRStack(Sample->LBRStack),
271 InstPtr(Binary, Sample->CallStack.front()) {
272 initFrameTrie(CallStack: Sample->CallStack);
273 }
274
275 bool validateInitialState() {
276 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
277 uint64_t LeafAddr = CurrentLeafFrame->Address;
278 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
279 "External leading LBR should match the leaf frame.");
280
281 // When we take a stack sample, ideally the sampling distance between the
282 // leaf IP of stack and the last LBR target shouldn't be very large.
283 // Use a heuristic size (0x100) to filter out broken records.
284 if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
285 WithColor::warning() << "Bogus trace: stack tip = "
286 << format(Fmt: "%#010x", Vals: LeafAddr)
287 << ", LBR tip = " << format(Fmt: "%#010x\n", Vals: LBRLeaf);
288 return false;
289 }
290 return true;
291 }
292
293 void checkStateConsistency() {
294 assert(InstPtr.Address == CurrentLeafFrame->Address &&
295 "IP should align with context leaf");
296 }
297
298 void setInvalid() { Invalid = true; }
299 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
300 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
301 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
302 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
303 bool IsLastLBR() const { return LBRIndex == 0; }
304 bool getLBRStackSize() const { return LBRStack.size(); }
305 void advanceLBR() { LBRIndex++; }
306 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
307
308 void pushFrame(uint64_t Address) {
309 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
310 }
311
312 void switchToFrame(uint64_t Address) {
313 if (CurrentLeafFrame->Address == Address)
314 return;
315 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
316 }
317
318 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
319
320 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
321
322 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
323 ProfiledFrame *Cur = &DummyTrieRoot;
324 for (auto Address : reverse(C: CallStack)) {
325 Cur = Cur->getOrCreateChildFrame(Address);
326 }
327 CurrentLeafFrame = Cur;
328 }
329
330 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
331};
332
333// Base class for sample counter key with context
334struct ContextKey {
335 uint64_t HashCode = 0;
336 virtual ~ContextKey() = default;
337 uint64_t getHashCode() {
338 if (HashCode == 0)
339 genHashCode();
340 return HashCode;
341 }
342 virtual void genHashCode() = 0;
343 virtual bool isEqual(const ContextKey *K) const {
344 return HashCode == K->HashCode;
345 };
346
347 // Utilities for LLVM-style RTTI
348 enum ContextKind { CK_StringBased, CK_AddrBased };
349 const ContextKind Kind;
350 ContextKind getKind() const { return Kind; }
351 ContextKey(ContextKind K) : Kind(K){};
352};
353
354// String based context id
355struct StringBasedCtxKey : public ContextKey {
356 SampleContextFrameVector Context;
357
358 bool WasLeafInlined;
359 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
360 static bool classof(const ContextKey *K) {
361 return K->getKind() == CK_StringBased;
362 }
363
364 bool isEqual(const ContextKey *K) const override {
365 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(Val: K);
366 return Context == Other->Context;
367 }
368
369 void genHashCode() override {
370 HashCode = hash_value(S: SampleContextFrames(Context));
371 }
372};
373
374// Address-based context id
375struct AddrBasedCtxKey : public ContextKey {
376 SmallVector<uint64_t, 16> Context;
377
378 bool WasLeafInlined;
379 AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
380 static bool classof(const ContextKey *K) {
381 return K->getKind() == CK_AddrBased;
382 }
383
384 bool isEqual(const ContextKey *K) const override {
385 const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(Val: K);
386 return Context == Other->Context;
387 }
388
389 void genHashCode() override { HashCode = hash_combine_range(R&: Context); }
390};
391
392// The counter of branch samples for one function indexed by the branch,
393// which is represented as the source and target offset pair.
394using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
395// The counter of range samples for one function indexed by the range,
396// which is represented as the start and end offset pair.
397using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
398// Wrapper for sample counters including range counter and branch counter
399struct SampleCounter {
400 RangeSample RangeCounter;
401 BranchSample BranchCounter;
402
403 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
404 assert(Start <= End && "Invalid instruction range");
405 RangeCounter[{Start, End}] += Repeat;
406 }
407 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
408 BranchCounter[{Source, Target}] += Repeat;
409 }
410};
411
412// Sample counter with context to support context-sensitive profile
413using ContextSampleCounterMap =
414 std::unordered_map<Hashable<ContextKey>, SampleCounter,
415 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
416
417struct FrameStack {
418 SmallVector<uint64_t, 16> Stack;
419 ProfiledBinary *Binary;
420 FrameStack(ProfiledBinary *B) : Binary(B) {}
421 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
422 assert(!Cur->isExternalFrame() &&
423 "External frame's not expected for context stack.");
424 Stack.push_back(Elt: Cur->Address);
425 return true;
426 }
427
428 void popFrame() {
429 if (!Stack.empty())
430 Stack.pop_back();
431 }
432 std::shared_ptr<StringBasedCtxKey> getContextKey();
433};
434
435struct AddressStack {
436 SmallVector<uint64_t, 16> Stack;
437 ProfiledBinary *Binary;
438 AddressStack(ProfiledBinary *B) : Binary(B) {}
439 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
440 assert(!Cur->isExternalFrame() &&
441 "External frame's not expected for context stack.");
442 Stack.push_back(Elt: Cur->Address);
443 return true;
444 }
445
446 void popFrame() {
447 if (!Stack.empty())
448 Stack.pop_back();
449 }
450 std::shared_ptr<AddrBasedCtxKey> getContextKey();
451};
452
453/*
454As in hybrid sample we have a group of LBRs and the most recent sampling call
455stack, we can walk through those LBRs to infer more call stacks which would be
456used as context for profile. VirtualUnwinder is the class to do the call stack
457unwinding based on LBR state. Two types of unwinding are processd here:
4581) LBR unwinding and 2) linear range unwinding.
459Specifically, for each LBR entry(can be classified into call, return, regular
460branch), LBR unwinding will replay the operation by pushing, popping or
461switching leaf frame towards the call stack and since the initial call stack
462is most recently sampled, the replay should be in anti-execution order, i.e. for
463the regular case, pop the call stack when LBR is call, push frame on call stack
464when LBR is return. After each LBR processed, it also needs to align with the
465next LBR by going through instructions from previous LBR's target to current
466LBR's source, which is the linear unwinding. As instruction from linear range
467can come from different function by inlining, linear unwinding will do the range
468splitting and record counters by the range with same inline context. Over those
469unwinding process we will record each call stack as context id and LBR/linear
470range as sample counter for further CS profile generation.
471*/
472class VirtualUnwinder {
473public:
474 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
475 : CtxCounterMap(Counter), Binary(B) {}
476 bool unwind(const PerfSample *Sample, uint64_t Repeat);
477 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
478
479 uint64_t NumTotalBranches = 0;
480 uint64_t NumExtCallBranch = 0;
481 uint64_t NumMissingExternalFrame = 0;
482 uint64_t NumMismatchedProEpiBranch = 0;
483 uint64_t NumMismatchedExtCallBranch = 0;
484 uint64_t NumUnpairedExtAddr = 0;
485 uint64_t NumPairedExtAddr = 0;
486
487private:
488 bool isSourceExternal(UnwindState &State) const {
489 return State.getCurrentLBRSource() == ExternalAddr;
490 }
491
492 bool isTargetExternal(UnwindState &State) const {
493 return State.getCurrentLBRTarget() == ExternalAddr;
494 }
495
496 // Determine whether the return source is from external code by checking if
497 // the target's the next inst is a call inst.
498 bool isReturnFromExternal(UnwindState &State) const {
499 return isSourceExternal(State) &&
500 (Binary->getCallAddrFromFrameAddr(FrameAddr: State.getCurrentLBRTarget()) != 0);
501 }
502
503 // If the source is external address but it's not the `return` case, treat it
504 // as a call from external.
505 bool isCallFromExternal(UnwindState &State) const {
506 return isSourceExternal(State) &&
507 Binary->getCallAddrFromFrameAddr(FrameAddr: State.getCurrentLBRTarget()) == 0;
508 }
509
510 bool isCallState(UnwindState &State) const {
511 // The tail call frame is always missing here in stack sample, we will
512 // use a specific tail call tracker to infer it.
513 if (!isValidState(State))
514 return false;
515
516 if (Binary->addressIsCall(Address: State.getCurrentLBRSource()))
517 return true;
518
519 return isCallFromExternal(State);
520 }
521
522 bool isReturnState(UnwindState &State) const {
523 if (!isValidState(State))
524 return false;
525
526 // Simply check addressIsReturn, as ret is always reliable, both for
527 // regular call and tail call.
528 if (Binary->addressIsReturn(Address: State.getCurrentLBRSource()))
529 return true;
530
531 return isReturnFromExternal(State);
532 }
533
534 bool isValidState(UnwindState &State) const { return !State.Invalid; }
535
536 void unwindCall(UnwindState &State);
537 void unwindLinear(UnwindState &State, uint64_t Repeat);
538 void unwindReturn(UnwindState &State);
539 void unwindBranch(UnwindState &State);
540
541 template <typename T>
542 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
543 // Collect each samples on trie node by DFS traversal
544 template <typename T>
545 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
546 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
547
548 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
549 uint64_t Repeat);
550 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
551 uint64_t Repeat);
552
553 ContextSampleCounterMap *CtxCounterMap;
554 // Profiled binary that current frame address belongs to
555 ProfiledBinary *Binary;
556 // Keep track of all untracked callsites
557 std::set<uint64_t> UntrackedCallsites;
558};
559
560// Read perf trace to parse the events and samples.
561class PerfReaderBase {
562public:
563 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
564 : Binary(B), PerfTraceFile(PerfTrace) {
565 // Initialize the base address to preferred address.
566 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
567 };
568 virtual ~PerfReaderBase() = default;
569 static std::unique_ptr<PerfReaderBase>
570 create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
571 std::optional<int32_t> PIDFilter);
572
573 // Entry of the reader to parse multiple perf traces
574 virtual void parsePerfTraces() = 0;
575 const ContextSampleCounterMap &getSampleCounters() const {
576 return SampleCounters;
577 }
578 bool profileIsCS() { return ProfileIsCS; }
579
580protected:
581 ProfiledBinary *Binary = nullptr;
582 StringRef PerfTraceFile;
583
584 ContextSampleCounterMap SampleCounters;
585 bool ProfileIsCS = false;
586
587 uint64_t NumTotalSample = 0;
588 uint64_t NumLeafExternalFrame = 0;
589 uint64_t NumLeadingOutgoingLBR = 0;
590};
591
592// Read perf script to parse the events and samples.
593class PerfScriptReader : public PerfReaderBase {
594public:
595 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
596 std::optional<int32_t> PID)
597 : PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};
598
599 // Entry of the reader to parse multiple perf traces
600 void parsePerfTraces() override;
601 // Generate perf script from perf data
602 static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
603 bool SkipPID, PerfInputFile &File,
604 std::optional<int32_t> PIDFilter);
605 // Extract perf script type by peaking at the input
606 static PerfContent checkPerfScriptType(StringRef FileName);
607
608 // Cleanup installers for temporary files created by perf script command.
609 // Those files will be automatically removed when running destructor or
610 // receiving signals.
611 static SmallVector<CleanupInstaller, 2> TempFileCleanups;
612
613protected:
614 // The parsed MMap event
615 struct MMapEvent {
616 int64_t PID = 0;
617 uint64_t Address = 0;
618 uint64_t Size = 0;
619 uint64_t Offset = 0;
620 StringRef BinaryPath;
621 };
622
623 // Check whether a given line is LBR sample
624 static bool isLBRSample(StringRef Line);
625 // Check whether a given line is MMAP event
626 static bool isMMapEvent(StringRef Line);
627 // Parse a single line of a PERF_RECORD_MMAP event looking for a
628 // mapping between the binary name and its memory layout.
629 static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
630 MMapEvent &MMap);
631 // Update base address based on mmap events
632 void updateBinaryAddress(const MMapEvent &Event);
633 // Parse mmap event and update binary address
634 void parseMMapEvent(TraceStream &TraceIt);
635 // Parse perf events/samples and do aggregation
636 void parseAndAggregateTrace();
637 // Parse either an MMAP event or a perf sample
638 void parseEventOrSample(TraceStream &TraceIt);
639 // Warn if the relevant mmap event is missing.
640 void warnIfMissingMMap();
641 // Emit accumulate warnings.
642 void warnTruncatedStack();
643 // Warn if range is invalid.
644 void warnInvalidRange();
645 // Extract call stack from the perf trace lines
646 bool extractCallstack(TraceStream &TraceIt,
647 SmallVectorImpl<uint64_t> &CallStack);
648 // Extract LBR stack from one perf trace line
649 bool extractLBRStack(TraceStream &TraceIt,
650 SmallVectorImpl<LBREntry> &LBRStack);
651 uint64_t parseAggregatedCount(TraceStream &TraceIt);
652 // Parse one sample from multiple perf lines, override this for different
653 // sample type
654 void parseSample(TraceStream &TraceIt);
655 // An aggregated count is given to indicate how many times the sample is
656 // repeated.
657 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
658 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
659 // Post process the profile after trace aggregation, we will do simple range
660 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
661 virtual void generateUnsymbolizedProfile();
662 void writeUnsymbolizedProfile(StringRef Filename);
663 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
664
665 // Samples with the repeating time generated by the perf reader
666 AggregatedCounter AggregatedSamples;
667 // Keep track of all invalid return addresses
668 std::set<uint64_t> InvalidReturnAddresses;
669 // PID for the process of interest
670 std::optional<int32_t> PIDFilter;
671};
672
673/*
674 The reader of LBR only perf script.
675 A typical LBR sample is like:
676 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
677 ... 0x4005c8/0x4005dc/P/-/-/0
678*/
679class LBRPerfReader : public PerfScriptReader {
680public:
681 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
682 std::optional<int32_t> PID)
683 : PerfScriptReader(Binary, PerfTrace, PID) {};
684 // Parse the LBR only sample.
685 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
686};
687
688/*
689 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
690 which is used to generate CS profile. An example of hybrid sample:
691 4005dc # call stack leaf
692 400634
693 400684 # call stack root
694 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
695 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
696*/
697class HybridPerfReader : public PerfScriptReader {
698public:
699 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
700 std::optional<int32_t> PID)
701 : PerfScriptReader(Binary, PerfTrace, PID) {};
702 // Parse the hybrid sample including the call and LBR line
703 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
704 void generateUnsymbolizedProfile() override;
705
706private:
707 // Unwind the hybrid samples after aggregration
708 void unwindSamples();
709};
710
711/*
712 Format of unsymbolized profile:
713
714 [frame1 @ frame2 @ ...] # If it's a CS profile
715 number of entries in RangeCounter
716 from_1-to_1:count_1
717 from_2-to_2:count_2
718 ......
719 from_n-to_n:count_n
720 number of entries in BranchCounter
721 src_1->dst_1:count_1
722 src_2->dst_2:count_2
723 ......
724 src_n->dst_n:count_n
725 [frame1 @ frame2 @ ...] # Next context
726 ......
727
728Note that non-CS profile doesn't have the empty `[]` context.
729*/
730class UnsymbolizedProfileReader : public PerfReaderBase {
731public:
732 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
733 : PerfReaderBase(Binary, PerfTrace){};
734 void parsePerfTraces() override;
735
736private:
737 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
738 void readUnsymbolizedProfile(StringRef Filename);
739
740 std::unordered_set<std::string> ContextStrSet;
741};
742
743} // end namespace sampleprof
744} // end namespace llvm
745
746#endif
747

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of llvm/tools/llvm-profgen/PerfReader.h