1 | //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "PerfReader.h" |
9 | #include "ProfileGenerator.h" |
10 | #include "llvm/ADT/SmallString.h" |
11 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
12 | #include "llvm/Support/FileSystem.h" |
13 | #include "llvm/Support/Process.h" |
14 | #include "llvm/Support/ToolOutputFile.h" |
15 | |
16 | #define DEBUG_TYPE "perf-reader" |
17 | |
18 | cl::opt<bool> SkipSymbolization("skip-symbolization", |
19 | cl::desc("Dump the unsymbolized profile to the " |
20 | "output file. It will show unwinder " |
21 | "output for CS profile generation.")); |
22 | |
23 | static cl::opt<bool> ShowMmapEvents("show-mmap-events", |
24 | cl::desc("Print binary load events.")); |
25 | |
26 | static cl::opt<bool> |
27 | UseOffset("use-offset", cl::init(Val: true), |
28 | cl::desc("Work with `--skip-symbolization` or " |
29 | "`--unsymbolized-profile` to write/read the " |
30 | "offset instead of virtual address.")); |
31 | |
32 | static cl::opt<bool> UseLoadableSegmentAsBase( |
33 | "use-first-loadable-segment-as-base", |
34 | cl::desc("Use first loadable segment address as base address " |
35 | "for offsets in unsymbolized profile. By default " |
36 | "first executable segment address is used")); |
37 | |
38 | static cl::opt<bool> |
39 | IgnoreStackSamples("ignore-stack-samples", |
40 | cl::desc("Ignore call stack samples for hybrid samples " |
41 | "and produce context-insensitive profile.")); |
42 | cl::opt<bool> ShowDetailedWarning("show-detailed-warning", |
43 | cl::desc("Show detailed warning message.")); |
44 | |
45 | static cl::opt<int> CSProfMaxUnsymbolizedCtxDepth( |
46 | "csprof-max-unsymbolized-context-depth", cl::init(Val: -1), |
47 | cl::desc("Keep the last K contexts while merging unsymbolized profile. -1 " |
48 | "means no depth limit.")); |
49 | |
50 | extern cl::opt<std::string> PerfTraceFilename; |
51 | extern cl::opt<bool> ShowDisassemblyOnly; |
52 | extern cl::opt<bool> ShowSourceLocations; |
53 | extern cl::opt<std::string> OutputFilename; |
54 | |
55 | namespace llvm { |
56 | namespace sampleprof { |
57 | |
58 | void VirtualUnwinder::unwindCall(UnwindState &State) { |
59 | uint64_t Source = State.getCurrentLBRSource(); |
60 | auto *ParentFrame = State.getParentFrame(); |
61 | // The 2nd frame after leaf could be missing if stack sample is |
62 | // taken when IP is within prolog/epilog, as frame chain isn't |
63 | // setup yet. Fill in the missing frame in that case. |
64 | // TODO: Currently we just assume all the addr that can't match the |
65 | // 2nd frame is in prolog/epilog. In the future, we will switch to |
66 | // pro/epi tracker(Dwarf CFI) for the precise check. |
67 | if (ParentFrame == State.getDummyRootPtr() || |
68 | ParentFrame->Address != Source) { |
69 | State.switchToFrame(Address: Source); |
70 | if (ParentFrame != State.getDummyRootPtr()) { |
71 | if (Source == ExternalAddr) |
72 | NumMismatchedExtCallBranch++; |
73 | else |
74 | NumMismatchedProEpiBranch++; |
75 | } |
76 | } else { |
77 | State.popFrame(); |
78 | } |
79 | State.InstPtr.update(Addr: Source); |
80 | } |
81 | |
82 | void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { |
83 | InstructionPointer &IP = State.InstPtr; |
84 | uint64_t Target = State.getCurrentLBRTarget(); |
85 | uint64_t End = IP.Address; |
86 | |
87 | if (End == ExternalAddr && Target == ExternalAddr) { |
88 | // Filter out the case when leaf external frame matches the external LBR |
89 | // target, this is a valid state, it happens that the code run into external |
90 | // address then return back. The call frame under the external frame |
91 | // remains valid and can be unwound later, just skip recording this range. |
92 | NumPairedExtAddr++; |
93 | return; |
94 | } |
95 | |
96 | if (End == ExternalAddr || Target == ExternalAddr) { |
97 | // Range is invalid if only one point is external address. This means LBR |
98 | // traces contains a standalone external address failing to pair another |
99 | // one, likely due to interrupt jmp or broken perf script. Set the |
100 | // state to invalid. |
101 | NumUnpairedExtAddr++; |
102 | State.setInvalid(); |
103 | return; |
104 | } |
105 | |
106 | if (!isValidFallThroughRange(Start: Target, End, Binary)) { |
107 | // Skip unwinding the rest of LBR trace when a bogus range is seen. |
108 | State.setInvalid(); |
109 | return; |
110 | } |
111 | |
112 | if (Binary->usePseudoProbes()) { |
113 | // We don't need to top frame probe since it should be extracted |
114 | // from the range. |
115 | // The outcome of the virtual unwinding with pseudo probes is a |
116 | // map from a context key to the address range being unwound. |
117 | // This means basically linear unwinding is not needed for pseudo |
118 | // probes. The range will be simply recorded here and will be |
119 | // converted to a list of pseudo probes to report in ProfileGenerator. |
120 | State.getParentFrame()->recordRangeCount(Start: Target, End, Count: Repeat); |
121 | } else { |
122 | // Unwind linear execution part. |
123 | // Split and record the range by different inline context. For example: |
124 | // [0x01] ... main:1 # Target |
125 | // [0x02] ... main:2 |
126 | // [0x03] ... main:3 @ foo:1 |
127 | // [0x04] ... main:3 @ foo:2 |
128 | // [0x05] ... main:3 @ foo:3 |
129 | // [0x06] ... main:4 |
130 | // [0x07] ... main:5 # End |
131 | // It will be recorded: |
132 | // [main:*] : [0x06, 0x07], [0x01, 0x02] |
133 | // [main:3 @ foo:*] : [0x03, 0x05] |
134 | while (IP.Address > Target) { |
135 | uint64_t PrevIP = IP.Address; |
136 | IP.backward(); |
137 | // Break into segments for implicit call/return due to inlining |
138 | bool SameInlinee = Binary->inlineContextEqual(Add1: PrevIP, Add2: IP.Address); |
139 | if (!SameInlinee) { |
140 | State.switchToFrame(Address: PrevIP); |
141 | State.CurrentLeafFrame->recordRangeCount(Start: PrevIP, End, Count: Repeat); |
142 | End = IP.Address; |
143 | } |
144 | } |
145 | assert(IP.Address == Target && "The last one must be the target address."); |
146 | // Record the remaining range, [0x01, 0x02] in the example |
147 | State.switchToFrame(Address: IP.Address); |
148 | State.CurrentLeafFrame->recordRangeCount(Start: IP.Address, End, Count: Repeat); |
149 | } |
150 | } |
151 | |
152 | void VirtualUnwinder::unwindReturn(UnwindState &State) { |
153 | // Add extra frame as we unwind through the return |
154 | const LBREntry &LBR = State.getCurrentLBR(); |
155 | uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr: LBR.Target); |
156 | State.switchToFrame(Address: CallAddr); |
157 | State.pushFrame(Address: LBR.Source); |
158 | State.InstPtr.update(Addr: LBR.Source); |
159 | } |
160 | |
161 | void VirtualUnwinder::unwindBranch(UnwindState &State) { |
162 | // TODO: Tolerate tail call for now, as we may see tail call from libraries. |
163 | // This is only for intra function branches, excluding tail calls. |
164 | uint64_t Source = State.getCurrentLBRSource(); |
165 | State.switchToFrame(Address: Source); |
166 | State.InstPtr.update(Addr: Source); |
167 | } |
168 | |
169 | std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() { |
170 | std::shared_ptr<StringBasedCtxKey> KeyStr = |
171 | std::make_shared<StringBasedCtxKey>(); |
172 | KeyStr->Context = Binary->getExpandedContext(Stack, WasLeafInlined&: KeyStr->WasLeafInlined); |
173 | return KeyStr; |
174 | } |
175 | |
176 | std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() { |
177 | std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>(); |
178 | KeyStr->Context = Stack; |
179 | CSProfileGenerator::compressRecursionContext<uint64_t>(Context&: KeyStr->Context); |
180 | // MaxContextDepth(--csprof-max-context-depth) is used to trim both symbolized |
181 | // and unsymbolized profile context. Sometimes we want to at least preserve |
182 | // the inlinings for the leaf frame(the profiled binary inlining), |
183 | // --csprof-max-context-depth may not be flexible enough, in this case, |
184 | // --csprof-max-unsymbolized-context-depth is used to limit the context for |
185 | // unsymbolized profile. If both are set, use the minimum of them. |
186 | int Depth = CSProfileGenerator::MaxContextDepth != -1 |
187 | ? CSProfileGenerator::MaxContextDepth |
188 | : KeyStr->Context.size(); |
189 | Depth = CSProfMaxUnsymbolizedCtxDepth != -1 |
190 | ? std::min(a: static_cast<int>(CSProfMaxUnsymbolizedCtxDepth), b: Depth) |
191 | : Depth; |
192 | CSProfileGenerator::trimContext<uint64_t>(S&: KeyStr->Context, Depth); |
193 | return KeyStr; |
194 | } |
195 | |
196 | template <typename T> |
197 | void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, |
198 | T &Stack) { |
199 | if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty()) |
200 | return; |
201 | |
202 | std::shared_ptr<ContextKey> Key = Stack.getContextKey(); |
203 | if (Key == nullptr) |
204 | return; |
205 | auto Ret = CtxCounterMap->emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
206 | SampleCounter &SCounter = Ret.first->second; |
207 | for (auto &I : Cur->RangeSamples) |
208 | SCounter.recordRangeCount(Start: std::get<0>(t&: I), End: std::get<1>(t&: I), Repeat: std::get<2>(t&: I)); |
209 | |
210 | for (auto &I : Cur->BranchSamples) |
211 | SCounter.recordBranchCount(Source: std::get<0>(t&: I), Target: std::get<1>(t&: I), Repeat: std::get<2>(t&: I)); |
212 | } |
213 | |
214 | template <typename T> |
215 | void VirtualUnwinder::collectSamplesFromFrameTrie( |
216 | UnwindState::ProfiledFrame *Cur, T &Stack) { |
217 | if (!Cur->isDummyRoot()) { |
218 | // Truncate the context for external frame since this isn't a real call |
219 | // context the compiler will see. |
220 | if (Cur->isExternalFrame() || !Stack.pushFrame(Cur)) { |
221 | // Process truncated context |
222 | // Start a new traversal ignoring its bottom context |
223 | T EmptyStack(Binary); |
224 | collectSamplesFromFrame(Cur, EmptyStack); |
225 | for (const auto &Item : Cur->Children) { |
226 | collectSamplesFromFrameTrie(Item.second.get(), EmptyStack); |
227 | } |
228 | |
229 | // Keep note of untracked call site and deduplicate them |
230 | // for warning later. |
231 | if (!Cur->isLeafFrame()) |
232 | UntrackedCallsites.insert(x: Cur->Address); |
233 | |
234 | return; |
235 | } |
236 | } |
237 | |
238 | collectSamplesFromFrame(Cur, Stack); |
239 | // Process children frame |
240 | for (const auto &Item : Cur->Children) { |
241 | collectSamplesFromFrameTrie(Item.second.get(), Stack); |
242 | } |
243 | // Recover the call stack |
244 | Stack.popFrame(); |
245 | } |
246 | |
247 | void VirtualUnwinder::collectSamplesFromFrameTrie( |
248 | UnwindState::ProfiledFrame *Cur) { |
249 | if (Binary->usePseudoProbes()) { |
250 | AddressStack Stack(Binary); |
251 | collectSamplesFromFrameTrie<AddressStack>(Cur, Stack); |
252 | } else { |
253 | FrameStack Stack(Binary); |
254 | collectSamplesFromFrameTrie<FrameStack>(Cur, Stack); |
255 | } |
256 | } |
257 | |
258 | void VirtualUnwinder::recordBranchCount(const LBREntry &Branch, |
259 | UnwindState &State, uint64_t Repeat) { |
260 | if (Branch.Target == ExternalAddr) |
261 | return; |
262 | |
263 | // Record external-to-internal pattern on the trie root, it later can be |
264 | // used for generating head samples. |
265 | if (Branch.Source == ExternalAddr) { |
266 | State.getDummyRootPtr()->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
267 | Count: Repeat); |
268 | return; |
269 | } |
270 | |
271 | if (Binary->usePseudoProbes()) { |
272 | // Same as recordRangeCount, We don't need to top frame probe since we will |
273 | // extract it from branch's source address |
274 | State.getParentFrame()->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
275 | Count: Repeat); |
276 | } else { |
277 | State.CurrentLeafFrame->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
278 | Count: Repeat); |
279 | } |
280 | } |
281 | |
282 | bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) { |
283 | // Capture initial state as starting point for unwinding. |
284 | UnwindState State(Sample, Binary); |
285 | |
286 | // Sanity check - making sure leaf of LBR aligns with leaf of stack sample |
287 | // Stack sample sometimes can be unreliable, so filter out bogus ones. |
288 | if (!State.validateInitialState()) |
289 | return false; |
290 | |
291 | NumTotalBranches += State.LBRStack.size(); |
292 | // Now process the LBR samples in parrallel with stack sample |
293 | // Note that we do not reverse the LBR entry order so we can |
294 | // unwind the sample stack as we walk through LBR entries. |
295 | while (State.hasNextLBR()) { |
296 | State.checkStateConsistency(); |
297 | |
298 | // Do not attempt linear unwind for the leaf range as it's incomplete. |
299 | if (!State.IsLastLBR()) { |
300 | // Unwind implicit calls/returns from inlining, along the linear path, |
301 | // break into smaller sub section each with its own calling context. |
302 | unwindLinear(State, Repeat); |
303 | } |
304 | |
305 | // Save the LBR branch before it gets unwound. |
306 | const LBREntry &Branch = State.getCurrentLBR(); |
307 | if (isCallState(State)) { |
308 | // Unwind calls - we know we encountered call if LBR overlaps with |
309 | // transition between leaf the 2nd frame. Note that for calls that |
310 | // were not in the original stack sample, we should have added the |
311 | // extra frame when processing the return paired with this call. |
312 | unwindCall(State); |
313 | } else if (isReturnState(State)) { |
314 | // Unwind returns - check whether the IP is indeed at a return |
315 | // instruction |
316 | unwindReturn(State); |
317 | } else if (isValidState(State)) { |
318 | // Unwind branches |
319 | unwindBranch(State); |
320 | } else { |
321 | // Skip unwinding the rest of LBR trace. Reset the stack and update the |
322 | // state so that the rest of the trace can still be processed as if they |
323 | // do not have stack samples. |
324 | State.clearCallStack(); |
325 | State.InstPtr.update(Addr: State.getCurrentLBRSource()); |
326 | State.pushFrame(Address: State.InstPtr.Address); |
327 | } |
328 | |
329 | State.advanceLBR(); |
330 | // Record `branch` with calling context after unwinding. |
331 | recordBranchCount(Branch, State, Repeat); |
332 | } |
333 | // As samples are aggregated on trie, record them into counter map |
334 | collectSamplesFromFrameTrie(Cur: State.getDummyRootPtr()); |
335 | |
336 | return true; |
337 | } |
338 | |
339 | std::unique_ptr<PerfReaderBase> |
340 | PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput, |
341 | std::optional<int32_t> PIDFilter) { |
342 | std::unique_ptr<PerfReaderBase> PerfReader; |
343 | |
344 | if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) { |
345 | PerfReader.reset( |
346 | p: new UnsymbolizedProfileReader(Binary, PerfInput.InputFile)); |
347 | return PerfReader; |
348 | } |
349 | |
350 | // For perf data input, we need to convert them into perf script first. |
351 | // If this is a kernel perf file, there is no need for retrieving PIDs. |
352 | if (PerfInput.Format == PerfFormat::PerfData) |
353 | PerfInput = PerfScriptReader::convertPerfDataToTrace( |
354 | Binary, SkipPID: Binary->isKernel(), File&: PerfInput, PIDFilter); |
355 | |
356 | assert((PerfInput.Format == PerfFormat::PerfScript) && |
357 | "Should be a perfscript!"); |
358 | |
359 | PerfInput.Content = |
360 | PerfScriptReader::checkPerfScriptType(FileName: PerfInput.InputFile); |
361 | if (PerfInput.Content == PerfContent::LBRStack) { |
362 | PerfReader.reset( |
363 | p: new HybridPerfReader(Binary, PerfInput.InputFile, PIDFilter)); |
364 | } else if (PerfInput.Content == PerfContent::LBR) { |
365 | PerfReader.reset(p: new LBRPerfReader(Binary, PerfInput.InputFile, PIDFilter)); |
366 | } else { |
367 | exitWithError(Message: "Unsupported perfscript!"); |
368 | } |
369 | |
370 | return PerfReader; |
371 | } |
372 | |
373 | PerfInputFile |
374 | PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID, |
375 | PerfInputFile &File, |
376 | std::optional<int32_t> PIDFilter) { |
377 | StringRef PerfData = File.InputFile; |
378 | // Run perf script to retrieve PIDs matching binary we're interested in. |
379 | auto PerfExecutable = sys::Process::FindInEnvPath(EnvName: "PATH", FileName: "perf"); |
380 | if (!PerfExecutable) { |
381 | exitWithError(Message: "Perf not found."); |
382 | } |
383 | std::string PerfPath = *PerfExecutable; |
384 | SmallString<128> PerfTraceFile; |
385 | sys::fs::createUniquePath(Model: "perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp", |
386 | ResultPath&: PerfTraceFile, /*MakeAbsolute=*/true); |
387 | std::string ErrorFile = std::string(PerfTraceFile) + ".err"; |
388 | std::optional<StringRef> Redirects[] = {std::nullopt, // Stdin |
389 | StringRef(PerfTraceFile), // Stdout |
390 | StringRef(ErrorFile)}; // Stderr |
391 | PerfScriptReader::TempFileCleanups.emplace_back(Args&: PerfTraceFile); |
392 | PerfScriptReader::TempFileCleanups.emplace_back(Args&: ErrorFile); |
393 | |
394 | std::string PIDs; |
395 | if (!SkipPID) { |
396 | StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events", |
397 | "-F", "comm,pid", "-i", |
398 | PerfData}; |
399 | sys::ExecuteAndWait(Program: PerfPath, Args: ScriptMMapArgs, Env: std::nullopt, Redirects); |
400 | |
401 | // Collect the PIDs |
402 | TraceStream TraceIt(PerfTraceFile); |
403 | std::unordered_set<int32_t> PIDSet; |
404 | while (!TraceIt.isAtEoF()) { |
405 | MMapEvent MMap; |
406 | if (isMMapEvent(Line: TraceIt.getCurrentLine()) && |
407 | extractMMapEventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap)) { |
408 | auto It = PIDSet.emplace(args&: MMap.PID); |
409 | if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) { |
410 | if (!PIDs.empty()) { |
411 | PIDs.append(s: ","); |
412 | } |
413 | PIDs.append(str: utostr(X: MMap.PID)); |
414 | } |
415 | } |
416 | TraceIt.advance(); |
417 | } |
418 | |
419 | if (PIDs.empty()) { |
420 | exitWithError(Message: "No relevant mmap event is found in perf data."); |
421 | } |
422 | } |
423 | |
424 | // Run perf script again to retrieve events for PIDs collected above |
425 | SmallVector<StringRef, 8> ScriptSampleArgs; |
426 | ScriptSampleArgs.push_back(Elt: PerfPath); |
427 | ScriptSampleArgs.push_back(Elt: "script"); |
428 | ScriptSampleArgs.push_back(Elt: "--show-mmap-events"); |
429 | ScriptSampleArgs.push_back(Elt: "-F"); |
430 | ScriptSampleArgs.push_back(Elt: "ip,brstack"); |
431 | ScriptSampleArgs.push_back(Elt: "-i"); |
432 | ScriptSampleArgs.push_back(Elt: PerfData); |
433 | if (!PIDs.empty()) { |
434 | ScriptSampleArgs.push_back(Elt: "--pid"); |
435 | ScriptSampleArgs.push_back(Elt: PIDs); |
436 | } |
437 | sys::ExecuteAndWait(Program: PerfPath, Args: ScriptSampleArgs, Env: std::nullopt, Redirects); |
438 | |
439 | return {.InputFile: std::string(PerfTraceFile), .Format: PerfFormat::PerfScript, |
440 | .Content: PerfContent::UnknownContent}; |
441 | } |
442 | |
443 | static StringRef filename(StringRef Path, bool UseBackSlash) { |
444 | llvm::sys::path::Style PathStyle = |
445 | UseBackSlash ? llvm::sys::path::Style::windows_backslash |
446 | : llvm::sys::path::Style::native; |
447 | StringRef FileName = llvm::sys::path::filename(path: Path, style: PathStyle); |
448 | |
449 | // In case this file use \r\n as newline. |
450 | if (UseBackSlash && FileName.back() == '\r') |
451 | return FileName.drop_back(); |
452 | |
453 | return FileName; |
454 | } |
455 | |
456 | void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) { |
457 | // Drop the event which doesn't belong to user-provided binary |
458 | StringRef BinaryName = filename(Path: Event.BinaryPath, UseBackSlash: Binary->isCOFF()); |
459 | bool IsKernel = Binary->isKernel(); |
460 | if (!IsKernel && Binary->getName() != BinaryName) |
461 | return; |
462 | if (IsKernel && !Binary->isKernelImageName(BinaryName)) |
463 | return; |
464 | |
465 | // Drop the event if process does not match pid filter |
466 | if (PIDFilter && Event.PID != *PIDFilter) |
467 | return; |
468 | |
469 | // Drop the event if its image is loaded at the same address |
470 | if (Event.Address == Binary->getBaseAddress()) { |
471 | Binary->setIsLoadedByMMap(true); |
472 | return; |
473 | } |
474 | |
475 | if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) { |
476 | // A binary image could be unloaded and then reloaded at different |
477 | // place, so update binary load address. |
478 | // Only update for the first executable segment and assume all other |
479 | // segments are loaded at consecutive memory addresses, which is the case on |
480 | // X64. |
481 | Binary->setBaseAddress(Event.Address); |
482 | Binary->setIsLoadedByMMap(true); |
483 | } else { |
484 | // Verify segments are loaded consecutively. |
485 | const auto &Offsets = Binary->getTextSegmentOffsets(); |
486 | auto It = llvm::lower_bound(Range: Offsets, Value: Event.Offset); |
487 | if (It != Offsets.end() && *It == Event.Offset) { |
488 | // The event is for loading a separate executable segment. |
489 | auto I = std::distance(first: Offsets.begin(), last: It); |
490 | const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses(); |
491 | if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() != |
492 | Event.Address - Binary->getBaseAddress()) |
493 | exitWithError(Message: "Executable segments not loaded consecutively"); |
494 | } else { |
495 | if (It == Offsets.begin()) |
496 | exitWithError(Message: "File offset not found"); |
497 | else { |
498 | // Find the segment the event falls in. A large segment could be loaded |
499 | // via multiple mmap calls with consecutive memory addresses. |
500 | --It; |
501 | assert(*It < Event.Offset); |
502 | if (Event.Offset - *It != Event.Address - Binary->getBaseAddress()) |
503 | exitWithError(Message: "Segment not loaded by consecutive mmaps"); |
504 | } |
505 | } |
506 | } |
507 | } |
508 | |
509 | static std::string getContextKeyStr(ContextKey *K, |
510 | const ProfiledBinary *Binary) { |
511 | if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(Val: K)) { |
512 | return SampleContext::getContextString(Context: CtxKey->Context); |
513 | } else if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(Val: K)) { |
514 | std::ostringstream OContextStr; |
515 | for (uint32_t I = 0; I < CtxKey->Context.size(); I++) { |
516 | if (OContextStr.str().size()) |
517 | OContextStr << " @ "; |
518 | uint64_t Address = CtxKey->Context[I]; |
519 | if (UseOffset) { |
520 | if (UseLoadableSegmentAsBase) |
521 | Address -= Binary->getFirstLoadableAddress(); |
522 | else |
523 | Address -= Binary->getPreferredBaseAddress(); |
524 | } |
525 | OContextStr << "0x" |
526 | << utohexstr(X: Address, |
527 | /*LowerCase=*/true); |
528 | } |
529 | return OContextStr.str(); |
530 | } else { |
531 | llvm_unreachable("unexpected key type"); |
532 | } |
533 | } |
534 | |
535 | void HybridPerfReader::unwindSamples() { |
536 | VirtualUnwinder Unwinder(&SampleCounters, Binary); |
537 | for (const auto &Item : AggregatedSamples) { |
538 | const PerfSample *Sample = Item.first.getPtr(); |
539 | Unwinder.unwind(Sample, Repeat: Item.second); |
540 | } |
541 | |
542 | // Warn about untracked frames due to missing probes. |
543 | if (ShowDetailedWarning) { |
544 | for (auto Address : Unwinder.getUntrackedCallsites()) |
545 | WithColor::warning() << "Profile context truncated due to missing probe " |
546 | << "for call instruction at " |
547 | << format(Fmt: "0x%"PRIx64, Vals: Address) << "\n"; |
548 | } |
549 | |
550 | emitWarningSummary(Num: Unwinder.getUntrackedCallsites().size(), |
551 | Total: SampleCounters.size(), |
552 | Msg: "of profiled contexts are truncated due to missing probe " |
553 | "for call instruction."); |
554 | |
555 | emitWarningSummary( |
556 | Num: Unwinder.NumMismatchedExtCallBranch, Total: Unwinder.NumTotalBranches, |
557 | Msg: "of branches'source is a call instruction but doesn't match call frame " |
558 | "stack, likely due to unwinding error of external frame."); |
559 | |
560 | emitWarningSummary(Num: Unwinder.NumPairedExtAddr * 2, Total: Unwinder.NumTotalBranches, |
561 | Msg: "of branches containing paired external address."); |
562 | |
563 | emitWarningSummary(Num: Unwinder.NumUnpairedExtAddr, Total: Unwinder.NumTotalBranches, |
564 | Msg: "of branches containing external address but doesn't have " |
565 | "another external address to pair, likely due to " |
566 | "interrupt jmp or broken perf script."); |
567 | |
568 | emitWarningSummary( |
569 | Num: Unwinder.NumMismatchedProEpiBranch, Total: Unwinder.NumTotalBranches, |
570 | Msg: "of branches'source is a call instruction but doesn't match call frame " |
571 | "stack, likely due to frame in prolog/epilog."); |
572 | |
573 | emitWarningSummary(Num: Unwinder.NumMissingExternalFrame, |
574 | Total: Unwinder.NumExtCallBranch, |
575 | Msg: "of artificial call branches but doesn't have an external " |
576 | "frame to match."); |
577 | } |
578 | |
579 | bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, |
580 | SmallVectorImpl<LBREntry> &LBRStack) { |
581 | // The raw format of LBR stack is like: |
582 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
583 | // ... 0x4005c8/0x4005dc/P/-/-/0 |
584 | // It's in FIFO order and separated by whitespace. |
585 | SmallVector<StringRef, 32> Records; |
586 | TraceIt.getCurrentLine().rtrim().split(A&: Records, Separator: " ", MaxSplit: -1, KeepEmpty: false); |
587 | auto WarnInvalidLBR = [](TraceStream &TraceIt) { |
588 | WithColor::warning() << "Invalid address in LBR record at line " |
589 | << TraceIt.getLineNumber() << ": " |
590 | << TraceIt.getCurrentLine() << "\n"; |
591 | }; |
592 | |
593 | // Skip the leading instruction pointer. |
594 | size_t Index = 0; |
595 | uint64_t LeadingAddr; |
596 | if (!Records.empty() && !Records[0].contains(C: '/')) { |
597 | if (Records[0].getAsInteger(Radix: 16, Result&: LeadingAddr)) { |
598 | WarnInvalidLBR(TraceIt); |
599 | TraceIt.advance(); |
600 | return false; |
601 | } |
602 | Index = 1; |
603 | } |
604 | |
605 | // Now extract LBR samples - note that we do not reverse the |
606 | // LBR entry order so we can unwind the sample stack as we walk |
607 | // through LBR entries. |
608 | while (Index < Records.size()) { |
609 | auto &Token = Records[Index++]; |
610 | if (Token.size() == 0) |
611 | continue; |
612 | |
613 | SmallVector<StringRef, 8> Addresses; |
614 | Token.split(A&: Addresses, Separator: "/"); |
615 | uint64_t Src; |
616 | uint64_t Dst; |
617 | |
618 | // Stop at broken LBR records. |
619 | if (Addresses.size() < 2 || Addresses[0].substr(Start: 2).getAsInteger(Radix: 16, Result&: Src) || |
620 | Addresses[1].substr(Start: 2).getAsInteger(Radix: 16, Result&: Dst)) { |
621 | WarnInvalidLBR(TraceIt); |
622 | break; |
623 | } |
624 | |
625 | // Canonicalize to use preferred load address as base address. |
626 | Src = Binary->canonicalizeVirtualAddress(Address: Src); |
627 | Dst = Binary->canonicalizeVirtualAddress(Address: Dst); |
628 | bool SrcIsInternal = Binary->addressIsCode(Address: Src); |
629 | bool DstIsInternal = Binary->addressIsCode(Address: Dst); |
630 | if (!SrcIsInternal) |
631 | Src = ExternalAddr; |
632 | if (!DstIsInternal) |
633 | Dst = ExternalAddr; |
634 | // Filter external-to-external case to reduce LBR trace size. |
635 | if (!SrcIsInternal && !DstIsInternal) |
636 | continue; |
637 | |
638 | LBRStack.emplace_back(Args: LBREntry(Src, Dst)); |
639 | } |
640 | TraceIt.advance(); |
641 | return !LBRStack.empty(); |
642 | } |
643 | |
644 | bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, |
645 | SmallVectorImpl<uint64_t> &CallStack) { |
646 | // The raw format of call stack is like: |
647 | // 4005dc # leaf frame |
648 | // 400634 |
649 | // 400684 # root frame |
650 | // It's in bottom-up order with each frame in one line. |
651 | |
652 | // Extract stack frames from sample |
653 | while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(Prefix: " 0x")) { |
654 | StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); |
655 | uint64_t FrameAddr = 0; |
656 | if (FrameStr.getAsInteger(Radix: 16, Result&: FrameAddr)) { |
657 | // We might parse a non-perf sample line like empty line and comments, |
658 | // skip it |
659 | TraceIt.advance(); |
660 | return false; |
661 | } |
662 | TraceIt.advance(); |
663 | |
664 | FrameAddr = Binary->canonicalizeVirtualAddress(Address: FrameAddr); |
665 | // Currently intermixed frame from different binaries is not supported. |
666 | if (!Binary->addressIsCode(Address: FrameAddr)) { |
667 | if (CallStack.empty()) |
668 | NumLeafExternalFrame++; |
669 | // Push a special value(ExternalAddr) for the external frames so that |
670 | // unwinder can still work on this with artificial Call/Return branch. |
671 | // After unwinding, the context will be truncated for external frame. |
672 | // Also deduplicate the consecutive external addresses. |
673 | if (CallStack.empty() || CallStack.back() != ExternalAddr) |
674 | CallStack.emplace_back(Args: ExternalAddr); |
675 | continue; |
676 | } |
677 | |
678 | // We need to translate return address to call address for non-leaf frames. |
679 | if (!CallStack.empty()) { |
680 | auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); |
681 | if (!CallAddr) { |
682 | // Stop at an invalid return address caused by bad unwinding. This could |
683 | // happen to frame-pointer-based unwinding and the callee functions that |
684 | // do not have the frame pointer chain set up. |
685 | InvalidReturnAddresses.insert(x: FrameAddr); |
686 | break; |
687 | } |
688 | FrameAddr = CallAddr; |
689 | } |
690 | |
691 | CallStack.emplace_back(Args&: FrameAddr); |
692 | } |
693 | |
694 | // Strip out the bottom external addr. |
695 | if (CallStack.size() > 1 && CallStack.back() == ExternalAddr) |
696 | CallStack.pop_back(); |
697 | |
698 | // Skip other unrelated line, find the next valid LBR line |
699 | // Note that even for empty call stack, we should skip the address at the |
700 | // bottom, otherwise the following pass may generate a truncated callstack |
701 | while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(Prefix: " 0x")) { |
702 | TraceIt.advance(); |
703 | } |
704 | // Filter out broken stack sample. We may not have complete frame info |
705 | // if sample end up in prolog/epilog, the result is dangling context not |
706 | // connected to entry point. This should be relatively rare thus not much |
707 | // impact on overall profile quality. However we do want to filter them |
708 | // out to reduce the number of different calling contexts. One instance |
709 | // of such case - when sample landed in prolog/epilog, somehow stack |
710 | // walking will be broken in an unexpected way that higher frames will be |
711 | // missing. |
712 | return !CallStack.empty() && |
713 | !Binary->addressInPrologEpilog(Address: CallStack.front()); |
714 | } |
715 | |
716 | void PerfScriptReader::warnIfMissingMMap() { |
717 | if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) { |
718 | WithColor::warning() << "No relevant mmap event is matched for " |
719 | << Binary->getName() |
720 | << ", will use preferred address (" |
721 | << format(Fmt: "0x%"PRIx64, |
722 | Vals: Binary->getPreferredBaseAddress()) |
723 | << ") as the base loading address!\n"; |
724 | // Avoid redundant warning, only warn at the first unmatched sample. |
725 | Binary->setMissingMMapWarned(true); |
726 | } |
727 | } |
728 | |
729 | void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { |
730 | // The raw hybird sample started with call stack in FILO order and followed |
731 | // intermediately by LBR sample |
732 | // e.g. |
733 | // 4005dc # call stack leaf |
734 | // 400634 |
735 | // 400684 # call stack root |
736 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
737 | // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries |
738 | // |
739 | std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); |
740 | #ifndef NDEBUG |
741 | Sample->Linenum = TraceIt.getLineNumber(); |
742 | #endif |
743 | // Parsing call stack and populate into PerfSample.CallStack |
744 | if (!extractCallstack(TraceIt, CallStack&: Sample->CallStack)) { |
745 | // Skip the next LBR line matched current call stack |
746 | if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(Prefix: " 0x")) |
747 | TraceIt.advance(); |
748 | return; |
749 | } |
750 | |
751 | warnIfMissingMMap(); |
752 | |
753 | if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(Prefix: " 0x")) { |
754 | // Parsing LBR stack and populate into PerfSample.LBRStack |
755 | if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) { |
756 | if (IgnoreStackSamples) { |
757 | Sample->CallStack.clear(); |
758 | } else { |
759 | // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR |
760 | // ranges |
761 | Sample->CallStack.front() = Sample->LBRStack[0].Target; |
762 | } |
763 | // Record samples by aggregation |
764 | AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; |
765 | } |
766 | } else { |
767 | // LBR sample is encoded in single line after stack sample |
768 | exitWithError(Message: "'Hybrid perf sample is corrupted, No LBR sample line"); |
769 | } |
770 | } |
771 | |
772 | void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) { |
773 | std::error_code EC; |
774 | raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF); |
775 | if (EC) |
776 | exitWithError(EC, Whence: Filename); |
777 | writeUnsymbolizedProfile(OS); |
778 | } |
779 | |
780 | // Use ordered map to make the output deterministic |
781 | using OrderedCounterForPrint = std::map<std::string, SampleCounter *>; |
782 | |
783 | void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) { |
784 | OrderedCounterForPrint OrderedCounters; |
785 | for (auto &CI : SampleCounters) { |
786 | OrderedCounters[getContextKeyStr(K: CI.first.getPtr(), Binary)] = &CI.second; |
787 | } |
788 | |
789 | auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator, |
790 | uint32_t Indent) { |
791 | OS.indent(NumSpaces: Indent); |
792 | OS << Counter.size() << "\n"; |
793 | for (auto &I : Counter) { |
794 | uint64_t Start = I.first.first; |
795 | uint64_t End = I.first.second; |
796 | |
797 | if (UseOffset) { |
798 | if (UseLoadableSegmentAsBase) { |
799 | Start -= Binary->getFirstLoadableAddress(); |
800 | End -= Binary->getFirstLoadableAddress(); |
801 | } else { |
802 | Start -= Binary->getPreferredBaseAddress(); |
803 | End -= Binary->getPreferredBaseAddress(); |
804 | } |
805 | } |
806 | |
807 | OS.indent(NumSpaces: Indent); |
808 | OS << Twine::utohexstr(Val: Start) << Separator << Twine::utohexstr(Val: End) << ":" |
809 | << I.second << "\n"; |
810 | } |
811 | }; |
812 | |
813 | for (auto &CI : OrderedCounters) { |
814 | uint32_t Indent = 0; |
815 | if (ProfileIsCS) { |
816 | // Context string key |
817 | OS << "["<< CI.first << "]\n"; |
818 | Indent = 2; |
819 | } |
820 | |
821 | SampleCounter &Counter = *CI.second; |
822 | SCounterPrinter(Counter.RangeCounter, "-", Indent); |
823 | SCounterPrinter(Counter.BranchCounter, "->", Indent); |
824 | } |
825 | } |
826 | |
827 | // Format of input: |
828 | // number of entries in RangeCounter |
829 | // from_1-to_1:count_1 |
830 | // from_2-to_2:count_2 |
831 | // ...... |
832 | // from_n-to_n:count_n |
833 | // number of entries in BranchCounter |
834 | // src_1->dst_1:count_1 |
835 | // src_2->dst_2:count_2 |
836 | // ...... |
837 | // src_n->dst_n:count_n |
838 | void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt, |
839 | SampleCounter &SCounters) { |
840 | auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) { |
841 | std::string Msg = TraceIt.isAtEoF() |
842 | ? "Invalid raw profile!" |
843 | : "Invalid raw profile at line "+ |
844 | Twine(TraceIt.getLineNumber()).str() + ": "+ |
845 | TraceIt.getCurrentLine().str(); |
846 | exitWithError(Message: Msg); |
847 | }; |
848 | auto ReadNumber = [&](uint64_t &Num) { |
849 | if (TraceIt.isAtEoF()) |
850 | exitWithErrorForTraceLine(TraceIt); |
851 | if (TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 10, Result&: Num)) |
852 | exitWithErrorForTraceLine(TraceIt); |
853 | TraceIt.advance(); |
854 | }; |
855 | |
856 | auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) { |
857 | uint64_t Num = 0; |
858 | ReadNumber(Num); |
859 | while (Num--) { |
860 | if (TraceIt.isAtEoF()) |
861 | exitWithErrorForTraceLine(TraceIt); |
862 | StringRef Line = TraceIt.getCurrentLine().ltrim(); |
863 | |
864 | uint64_t Count = 0; |
865 | auto LineSplit = Line.split(Separator: ":"); |
866 | if (LineSplit.second.empty() || LineSplit.second.getAsInteger(Radix: 10, Result&: Count)) |
867 | exitWithErrorForTraceLine(TraceIt); |
868 | |
869 | uint64_t Source = 0; |
870 | uint64_t Target = 0; |
871 | auto Range = LineSplit.first.split(Separator); |
872 | if (Range.second.empty() || Range.first.getAsInteger(Radix: 16, Result&: Source) || |
873 | Range.second.getAsInteger(Radix: 16, Result&: Target)) |
874 | exitWithErrorForTraceLine(TraceIt); |
875 | |
876 | if (UseOffset) { |
877 | if (UseLoadableSegmentAsBase) { |
878 | Source += Binary->getFirstLoadableAddress(); |
879 | Target += Binary->getFirstLoadableAddress(); |
880 | } else { |
881 | Source += Binary->getPreferredBaseAddress(); |
882 | Target += Binary->getPreferredBaseAddress(); |
883 | } |
884 | } |
885 | |
886 | Counter[{Source, Target}] += Count; |
887 | TraceIt.advance(); |
888 | } |
889 | }; |
890 | |
891 | ReadCounter(SCounters.RangeCounter, "-"); |
892 | ReadCounter(SCounters.BranchCounter, "->"); |
893 | } |
894 | |
895 | void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) { |
896 | TraceStream TraceIt(FileName); |
897 | while (!TraceIt.isAtEoF()) { |
898 | std::shared_ptr<StringBasedCtxKey> Key = |
899 | std::make_shared<StringBasedCtxKey>(); |
900 | StringRef Line = TraceIt.getCurrentLine(); |
901 | // Read context stack for CS profile. |
902 | if (Line.starts_with(Prefix: "[")) { |
903 | ProfileIsCS = true; |
904 | auto I = ContextStrSet.insert(x: Line.str()); |
905 | SampleContext::createCtxVectorFromStr(ContextStr: *I.first, Context&: Key->Context); |
906 | TraceIt.advance(); |
907 | } |
908 | auto Ret = |
909 | SampleCounters.emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
910 | readSampleCounters(TraceIt, SCounters&: Ret.first->second); |
911 | } |
912 | } |
913 | |
914 | void UnsymbolizedProfileReader::parsePerfTraces() { |
915 | readUnsymbolizedProfile(FileName: PerfTraceFile); |
916 | } |
917 | |
918 | void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, |
919 | uint64_t Repeat) { |
920 | SampleCounter &Counter = SampleCounters.begin()->second; |
921 | uint64_t EndAddress = 0; |
922 | for (const LBREntry &LBR : Sample->LBRStack) { |
923 | uint64_t SourceAddress = LBR.Source; |
924 | uint64_t TargetAddress = LBR.Target; |
925 | |
926 | // Record the branch if its SourceAddress is external. It can be the case an |
927 | // external source call an internal function, later this branch will be used |
928 | // to generate the function's head sample. |
929 | if (Binary->addressIsCode(Address: TargetAddress)) { |
930 | Counter.recordBranchCount(Source: SourceAddress, Target: TargetAddress, Repeat); |
931 | } |
932 | |
933 | // If this not the first LBR, update the range count between TO of current |
934 | // LBR and FROM of next LBR. |
935 | uint64_t StartAddress = TargetAddress; |
936 | if (Binary->addressIsCode(Address: StartAddress) && |
937 | Binary->addressIsCode(Address: EndAddress) && |
938 | isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary)) |
939 | Counter.recordRangeCount(Start: StartAddress, End: EndAddress, Repeat); |
940 | EndAddress = SourceAddress; |
941 | } |
942 | } |
943 | |
944 | void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { |
945 | std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); |
946 | // Parsing LBR stack and populate into PerfSample.LBRStack |
947 | if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) { |
948 | warnIfMissingMMap(); |
949 | // Record LBR only samples by aggregation |
950 | AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; |
951 | } |
952 | } |
953 | |
954 | void PerfScriptReader::generateUnsymbolizedProfile() { |
955 | // There is no context for LBR only sample, so initialize one entry with |
956 | // fake "empty" context key. |
957 | assert(SampleCounters.empty() && |
958 | "Sample counter map should be empty before raw profile generation"); |
959 | std::shared_ptr<StringBasedCtxKey> Key = |
960 | std::make_shared<StringBasedCtxKey>(); |
961 | SampleCounters.emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
962 | for (const auto &Item : AggregatedSamples) { |
963 | const PerfSample *Sample = Item.first.getPtr(); |
964 | computeCounterFromLBR(Sample, Repeat: Item.second); |
965 | } |
966 | } |
967 | |
968 | uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) { |
969 | // The aggregated count is optional, so do not skip the line and return 1 if |
970 | // it's unmatched |
971 | uint64_t Count = 1; |
972 | if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: Count)) |
973 | TraceIt.advance(); |
974 | return Count; |
975 | } |
976 | |
977 | void PerfScriptReader::parseSample(TraceStream &TraceIt) { |
978 | NumTotalSample++; |
979 | uint64_t Count = parseAggregatedCount(TraceIt); |
980 | assert(Count >= 1 && "Aggregated count should be >= 1!"); |
981 | parseSample(TraceIt, Count); |
982 | } |
983 | |
984 | bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary, |
985 | StringRef Line, |
986 | MMapEvent &MMap) { |
987 | // Parse a MMap2 line like: |
988 | // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 |
989 | // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so |
990 | constexpr static const char *const MMap2Pattern = |
991 | "PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: " |
992 | "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " |
993 | "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; |
994 | // Parse a MMap line like |
995 | // PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \ |
996 | // 0xffffffff81e00000]: x [kernel.kallsyms]_text |
997 | constexpr static const char *const MMapPattern = |
998 | "PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: " |
999 | "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " |
1000 | "(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)"; |
1001 | // Field 0 - whole line |
1002 | // Field 1 - PID |
1003 | // Field 2 - base address |
1004 | // Field 3 - mmapped size |
1005 | // Field 4 - page offset |
1006 | // Field 5 - binary path |
1007 | enum EventIndex { |
1008 | WHOLE_LINE = 0, |
1009 | PID = 1, |
1010 | MMAPPED_ADDRESS = 2, |
1011 | MMAPPED_SIZE = 3, |
1012 | PAGE_OFFSET = 4, |
1013 | BINARY_PATH = 5 |
1014 | }; |
1015 | |
1016 | bool R = false; |
1017 | SmallVector<StringRef, 6> Fields; |
1018 | if (Line.contains(Other: "PERF_RECORD_MMAP2 ")) { |
1019 | Regex RegMmap2(MMap2Pattern); |
1020 | R = RegMmap2.match(String: Line, Matches: &Fields); |
1021 | } else if (Line.contains(Other: "PERF_RECORD_MMAP ")) { |
1022 | Regex RegMmap(MMapPattern); |
1023 | R = RegMmap.match(String: Line, Matches: &Fields); |
1024 | } else |
1025 | llvm_unreachable("unexpected MMAP event entry"); |
1026 | |
1027 | if (!R) { |
1028 | std::string WarningMsg = "Cannot parse mmap event: "+ Line.str() + " \n"; |
1029 | WithColor::warning() << WarningMsg; |
1030 | return false; |
1031 | } |
1032 | long long MMapPID = 0; |
1033 | getAsSignedInteger(Str: Fields[PID], Radix: 10, Result&: MMapPID); |
1034 | MMap.PID = MMapPID; |
1035 | Fields[MMAPPED_ADDRESS].getAsInteger(Radix: 0, Result&: MMap.Address); |
1036 | Fields[MMAPPED_SIZE].getAsInteger(Radix: 0, Result&: MMap.Size); |
1037 | Fields[PAGE_OFFSET].getAsInteger(Radix: 0, Result&: MMap.Offset); |
1038 | MMap.BinaryPath = Fields[BINARY_PATH]; |
1039 | if (ShowMmapEvents) { |
1040 | outs() << "Mmap: Binary "<< MMap.BinaryPath << " loaded at " |
1041 | << format(Fmt: "0x%"PRIx64 ":", Vals: MMap.Address) << " \n"; |
1042 | } |
1043 | |
1044 | StringRef BinaryName = filename(Path: MMap.BinaryPath, UseBackSlash: Binary->isCOFF()); |
1045 | if (Binary->isKernel()) { |
1046 | return Binary->isKernelImageName(BinaryName); |
1047 | } |
1048 | return Binary->getName() == BinaryName; |
1049 | } |
1050 | |
1051 | void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) { |
1052 | MMapEvent MMap; |
1053 | if (extractMMapEventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap)) |
1054 | updateBinaryAddress(Event: MMap); |
1055 | TraceIt.advance(); |
1056 | } |
1057 | |
1058 | void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) { |
1059 | if (isMMapEvent(Line: TraceIt.getCurrentLine())) |
1060 | parseMMapEvent(TraceIt); |
1061 | else |
1062 | parseSample(TraceIt); |
1063 | } |
1064 | |
1065 | void PerfScriptReader::parseAndAggregateTrace() { |
1066 | // Trace line iterator |
1067 | TraceStream TraceIt(PerfTraceFile); |
1068 | while (!TraceIt.isAtEoF()) |
1069 | parseEventOrSample(TraceIt); |
1070 | } |
1071 | |
1072 | // A LBR sample is like: |
1073 | // 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... |
1074 | // A heuristic for fast detection by checking whether a |
1075 | // leading " 0x" and the '/' exist. |
1076 | bool PerfScriptReader::isLBRSample(StringRef Line) { |
1077 | // Skip the leading instruction pointer |
1078 | SmallVector<StringRef, 32> Records; |
1079 | Line.trim().split(A&: Records, Separator: " ", MaxSplit: 2, KeepEmpty: false); |
1080 | if (Records.size() < 2) |
1081 | return false; |
1082 | if (Records[1].starts_with(Prefix: "0x") && Records[1].contains(C: '/')) |
1083 | return true; |
1084 | return false; |
1085 | } |
1086 | |
1087 | bool PerfScriptReader::isMMapEvent(StringRef Line) { |
1088 | // Short cut to avoid string find is possible. |
1089 | if (Line.empty() || Line.size() < 50) |
1090 | return false; |
1091 | |
1092 | if (std::isdigit(Line[0])) |
1093 | return false; |
1094 | |
1095 | // PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of |
1096 | // the line for ` perf script --show-mmap-events -i ...` |
1097 | return Line.contains(Other: "PERF_RECORD_MMAP"); |
1098 | } |
1099 | |
1100 | // The raw hybird sample is like |
1101 | // e.g. |
1102 | // 4005dc # call stack leaf |
1103 | // 400634 |
1104 | // 400684 # call stack root |
1105 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
1106 | // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries |
1107 | // Determine the perfscript contains hybrid samples(call stack + LBRs) by |
1108 | // checking whether there is a non-empty call stack immediately followed by |
1109 | // a LBR sample |
1110 | PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) { |
1111 | TraceStream TraceIt(FileName); |
1112 | uint64_t FrameAddr = 0; |
1113 | while (!TraceIt.isAtEoF()) { |
1114 | // Skip the aggregated count |
1115 | if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: FrameAddr)) |
1116 | TraceIt.advance(); |
1117 | |
1118 | // Detect sample with call stack |
1119 | int32_t Count = 0; |
1120 | while (!TraceIt.isAtEoF() && |
1121 | !TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 16, Result&: FrameAddr)) { |
1122 | Count++; |
1123 | TraceIt.advance(); |
1124 | } |
1125 | if (!TraceIt.isAtEoF()) { |
1126 | if (isLBRSample(Line: TraceIt.getCurrentLine())) { |
1127 | if (Count > 0) |
1128 | return PerfContent::LBRStack; |
1129 | else |
1130 | return PerfContent::LBR; |
1131 | } |
1132 | TraceIt.advance(); |
1133 | } |
1134 | } |
1135 | |
1136 | exitWithError(Message: "Invalid perf script input!"); |
1137 | return PerfContent::UnknownContent; |
1138 | } |
1139 | |
1140 | void HybridPerfReader::generateUnsymbolizedProfile() { |
1141 | ProfileIsCS = !IgnoreStackSamples; |
1142 | if (ProfileIsCS) |
1143 | unwindSamples(); |
1144 | else |
1145 | PerfScriptReader::generateUnsymbolizedProfile(); |
1146 | } |
1147 | |
1148 | void PerfScriptReader::warnTruncatedStack() { |
1149 | if (ShowDetailedWarning) { |
1150 | for (auto Address : InvalidReturnAddresses) { |
1151 | WithColor::warning() |
1152 | << "Truncated stack sample due to invalid return address at " |
1153 | << format(Fmt: "0x%"PRIx64, Vals: Address) |
1154 | << ", likely caused by frame pointer omission\n"; |
1155 | } |
1156 | } |
1157 | emitWarningSummary( |
1158 | Num: InvalidReturnAddresses.size(), Total: AggregatedSamples.size(), |
1159 | Msg: "of truncated stack samples due to invalid return address, " |
1160 | "likely caused by frame pointer omission."); |
1161 | } |
1162 | |
1163 | void PerfScriptReader::warnInvalidRange() { |
1164 | std::unordered_map<std::pair<uint64_t, uint64_t>, uint64_t, |
1165 | pair_hash<uint64_t, uint64_t>> |
1166 | Ranges; |
1167 | |
1168 | for (const auto &Item : AggregatedSamples) { |
1169 | const PerfSample *Sample = Item.first.getPtr(); |
1170 | uint64_t Count = Item.second; |
1171 | uint64_t EndAddress = 0; |
1172 | for (const LBREntry &LBR : Sample->LBRStack) { |
1173 | uint64_t SourceAddress = LBR.Source; |
1174 | uint64_t StartAddress = LBR.Target; |
1175 | if (EndAddress != 0) |
1176 | Ranges[{StartAddress, EndAddress}] += Count; |
1177 | EndAddress = SourceAddress; |
1178 | } |
1179 | } |
1180 | |
1181 | if (Ranges.empty()) { |
1182 | WithColor::warning() << "No samples in perf script!\n"; |
1183 | return; |
1184 | } |
1185 | |
1186 | auto WarnInvalidRange = [&](uint64_t StartAddress, uint64_t EndAddress, |
1187 | StringRef Msg) { |
1188 | if (!ShowDetailedWarning) |
1189 | return; |
1190 | WithColor::warning() << "["<< format(Fmt: "%8"PRIx64, Vals: StartAddress) << "," |
1191 | << format(Fmt: "%8"PRIx64, Vals: EndAddress) << "]: "<< Msg |
1192 | << "\n"; |
1193 | }; |
1194 | |
1195 | const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " |
1196 | "likely due to profile and binary mismatch."; |
1197 | const char *DanglingRangeMsg = "Range does not belong to any functions, " |
1198 | "likely from PLT, .init or .fini section."; |
1199 | const char *RangeCrossFuncMsg = |
1200 | "Fall through range should not cross function boundaries, likely due to " |
1201 | "profile and binary mismatch."; |
1202 | const char *BogusRangeMsg = "Range start is after or too far from range end."; |
1203 | |
1204 | uint64_t TotalRangeNum = 0; |
1205 | uint64_t InstNotBoundary = 0; |
1206 | uint64_t UnmatchedRange = 0; |
1207 | uint64_t RangeCrossFunc = 0; |
1208 | uint64_t BogusRange = 0; |
1209 | |
1210 | for (auto &I : Ranges) { |
1211 | uint64_t StartAddress = I.first.first; |
1212 | uint64_t EndAddress = I.first.second; |
1213 | TotalRangeNum += I.second; |
1214 | |
1215 | if (!Binary->addressIsCode(Address: StartAddress) && |
1216 | !Binary->addressIsCode(Address: EndAddress)) |
1217 | continue; |
1218 | |
1219 | if (!Binary->addressIsCode(Address: StartAddress) || |
1220 | !Binary->addressIsTransfer(Address: EndAddress)) { |
1221 | InstNotBoundary += I.second; |
1222 | WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg); |
1223 | } |
1224 | |
1225 | auto *FRange = Binary->findFuncRange(Address: StartAddress); |
1226 | if (!FRange) { |
1227 | UnmatchedRange += I.second; |
1228 | WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg); |
1229 | continue; |
1230 | } |
1231 | |
1232 | if (EndAddress >= FRange->EndAddress) { |
1233 | RangeCrossFunc += I.second; |
1234 | WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg); |
1235 | } |
1236 | |
1237 | if (Binary->addressIsCode(Address: StartAddress) && |
1238 | Binary->addressIsCode(Address: EndAddress) && |
1239 | !isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary)) { |
1240 | BogusRange += I.second; |
1241 | WarnInvalidRange(StartAddress, EndAddress, BogusRangeMsg); |
1242 | } |
1243 | } |
1244 | |
1245 | emitWarningSummary( |
1246 | Num: InstNotBoundary, Total: TotalRangeNum, |
1247 | Msg: "of samples are from ranges that are not on instruction boundary."); |
1248 | emitWarningSummary( |
1249 | Num: UnmatchedRange, Total: TotalRangeNum, |
1250 | Msg: "of samples are from ranges that do not belong to any functions."); |
1251 | emitWarningSummary( |
1252 | Num: RangeCrossFunc, Total: TotalRangeNum, |
1253 | Msg: "of samples are from ranges that do cross function boundaries."); |
1254 | emitWarningSummary( |
1255 | Num: BogusRange, Total: TotalRangeNum, |
1256 | Msg: "of samples are from ranges that have range start after or too far from " |
1257 | "range end acrossing the unconditinal jmp."); |
1258 | } |
1259 | |
1260 | void PerfScriptReader::parsePerfTraces() { |
1261 | // Parse perf traces and do aggregation. |
1262 | parseAndAggregateTrace(); |
1263 | if (Binary->isKernel() && !Binary->getIsLoadedByMMap()) { |
1264 | exitWithError( |
1265 | Message: "Kernel is requested, but no kernel is found in mmap events."); |
1266 | } |
1267 | |
1268 | emitWarningSummary(Num: NumLeafExternalFrame, Total: NumTotalSample, |
1269 | Msg: "of samples have leaf external frame in call stack."); |
1270 | emitWarningSummary(Num: NumLeadingOutgoingLBR, Total: NumTotalSample, |
1271 | Msg: "of samples have leading external LBR."); |
1272 | |
1273 | // Generate unsymbolized profile. |
1274 | warnTruncatedStack(); |
1275 | warnInvalidRange(); |
1276 | generateUnsymbolizedProfile(); |
1277 | AggregatedSamples.clear(); |
1278 | |
1279 | if (SkipSymbolization) |
1280 | writeUnsymbolizedProfile(Filename: OutputFilename); |
1281 | } |
1282 | |
1283 | SmallVector<CleanupInstaller, 2> PerfScriptReader::TempFileCleanups; |
1284 | |
1285 | } // end namespace sampleprof |
1286 | } // end namespace llvm |
1287 |
Definitions
- SkipSymbolization
- ShowMmapEvents
- UseOffset
- UseLoadableSegmentAsBase
- IgnoreStackSamples
- ShowDetailedWarning
- CSProfMaxUnsymbolizedCtxDepth
- unwindCall
- unwindLinear
- unwindReturn
- unwindBranch
- getContextKey
- getContextKey
- collectSamplesFromFrame
- collectSamplesFromFrameTrie
- collectSamplesFromFrameTrie
- recordBranchCount
- unwind
- create
- convertPerfDataToTrace
- filename
- updateBinaryAddress
- getContextKeyStr
- unwindSamples
- extractLBRStack
- extractCallstack
- warnIfMissingMMap
- parseSample
- writeUnsymbolizedProfile
- writeUnsymbolizedProfile
- readSampleCounters
- readUnsymbolizedProfile
- parsePerfTraces
- computeCounterFromLBR
- parseSample
- generateUnsymbolizedProfile
- parseAggregatedCount
- parseSample
- extractMMapEventForBinary
- parseMMapEvent
- parseEventOrSample
- parseAndAggregateTrace
- isLBRSample
- isMMapEvent
- checkPerfScriptType
- generateUnsymbolizedProfile
- warnTruncatedStack
- warnInvalidRange
- parsePerfTraces
Improve your Profiling and Debugging skills
Find out more