1 | //===-- LibiptDecoder.cpp --======-----------------------------------------===// |
2 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
3 | // See https://llvm.org/LICENSE.txt for license information. |
4 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 | // |
6 | //===----------------------------------------------------------------------===// |
7 | |
8 | #include "LibiptDecoder.h" |
9 | #include "TraceIntelPT.h" |
10 | #include "lldb/Target/Process.h" |
11 | #include <optional> |
12 | |
13 | using namespace lldb; |
14 | using namespace lldb_private; |
15 | using namespace lldb_private::trace_intel_pt; |
16 | using namespace llvm; |
17 | |
18 | bool IsLibiptError(int status) { return status < 0; } |
19 | |
20 | bool IsEndOfStream(int status) { |
21 | assert(status >= 0 && "We can't check if we reached the end of the stream if " |
22 | "we got a failed status" ); |
23 | return status & pts_eos; |
24 | } |
25 | |
26 | bool HasEvents(int status) { |
27 | assert(status >= 0 && "We can't check for events if we got a failed status" ); |
28 | return status & pts_event_pending; |
29 | } |
30 | |
31 | // RAII deleter for libipt's decoders |
32 | auto InsnDecoderDeleter = [](pt_insn_decoder *decoder) { |
33 | pt_insn_free_decoder(decoder); |
34 | }; |
35 | |
36 | auto QueryDecoderDeleter = [](pt_query_decoder *decoder) { |
37 | pt_qry_free_decoder(decoder); |
38 | }; |
39 | |
40 | using PtInsnDecoderUP = |
41 | std::unique_ptr<pt_insn_decoder, decltype(InsnDecoderDeleter)>; |
42 | |
43 | using PtQueryDecoderUP = |
44 | std::unique_ptr<pt_query_decoder, decltype(QueryDecoderDeleter)>; |
45 | |
46 | /// Create a basic configuration object limited to a given buffer that can be |
47 | /// used for many different decoders. |
48 | static Expected<pt_config> CreateBasicLibiptConfig(TraceIntelPT &trace_intel_pt, |
49 | ArrayRef<uint8_t> buffer) { |
50 | Expected<pt_cpu> cpu_info = trace_intel_pt.GetCPUInfo(); |
51 | if (!cpu_info) |
52 | return cpu_info.takeError(); |
53 | |
54 | pt_config config; |
55 | pt_config_init(&config); |
56 | config.cpu = *cpu_info; |
57 | |
58 | int status = pt_cpu_errata(&config.errata, &config.cpu); |
59 | if (IsLibiptError(status)) |
60 | return make_error<IntelPTError>(Args&: status); |
61 | |
62 | // The libipt library does not modify the trace buffer, hence the |
63 | // following casts are safe. |
64 | config.begin = const_cast<uint8_t *>(buffer.data()); |
65 | config.end = const_cast<uint8_t *>(buffer.data() + buffer.size()); |
66 | return config; |
67 | } |
68 | |
69 | /// Callback used by libipt for reading the process memory. |
70 | /// |
71 | /// More information can be found in |
72 | /// https://github.com/intel/libipt/blob/master/doc/man/pt_image_set_callback.3.md |
73 | static int ReadProcessMemory(uint8_t *buffer, size_t size, |
74 | const pt_asid * /* unused */, uint64_t pc, |
75 | void *context) { |
76 | Process *process = static_cast<Process *>(context); |
77 | |
78 | Status error; |
79 | int bytes_read = process->ReadMemory(vm_addr: pc, buf: buffer, size, error); |
80 | if (error.Fail()) |
81 | return -pte_nomap; |
82 | return bytes_read; |
83 | } |
84 | |
85 | /// Set up the memory image callback for the given decoder. |
86 | static Error SetupMemoryImage(pt_insn_decoder *decoder, Process &process) { |
87 | pt_image *image = pt_insn_get_image(decoder); |
88 | |
89 | int status = pt_image_set_callback(image, ReadProcessMemory, &process); |
90 | if (IsLibiptError(status)) |
91 | return make_error<IntelPTError>(Args&: status); |
92 | return Error::success(); |
93 | } |
94 | |
95 | /// Create an instruction decoder for the given buffer and the given process. |
96 | static Expected<PtInsnDecoderUP> |
97 | CreateInstructionDecoder(TraceIntelPT &trace_intel_pt, ArrayRef<uint8_t> buffer, |
98 | Process &process) { |
99 | Expected<pt_config> config = CreateBasicLibiptConfig(trace_intel_pt, buffer); |
100 | if (!config) |
101 | return config.takeError(); |
102 | |
103 | pt_insn_decoder *decoder_ptr = pt_insn_alloc_decoder(&*config); |
104 | if (!decoder_ptr) |
105 | return make_error<IntelPTError>(-pte_nomem); |
106 | |
107 | PtInsnDecoderUP decoder_up(decoder_ptr, InsnDecoderDeleter); |
108 | |
109 | if (Error err = SetupMemoryImage(decoder_ptr, process)) |
110 | return std::move(err); |
111 | |
112 | return decoder_up; |
113 | } |
114 | |
115 | /// Create a query decoder for the given buffer. The query decoder is the |
116 | /// highest level decoder that operates directly on packets and doesn't perform |
117 | /// actual instruction decoding. That's why it can be useful for inspecting a |
118 | /// raw trace without pinning it to a particular process. |
119 | static Expected<PtQueryDecoderUP> |
120 | CreateQueryDecoder(TraceIntelPT &trace_intel_pt, ArrayRef<uint8_t> buffer) { |
121 | Expected<pt_config> config = CreateBasicLibiptConfig(trace_intel_pt, buffer); |
122 | if (!config) |
123 | return config.takeError(); |
124 | |
125 | pt_query_decoder *decoder_ptr = pt_qry_alloc_decoder(&*config); |
126 | if (!decoder_ptr) |
127 | return make_error<IntelPTError>(-pte_nomem); |
128 | |
129 | return PtQueryDecoderUP(decoder_ptr, QueryDecoderDeleter); |
130 | } |
131 | |
132 | /// Class used to identify anomalies in traces, which should often indicate a |
133 | /// fatal error in the trace. |
134 | class PSBBlockAnomalyDetector { |
135 | public: |
136 | PSBBlockAnomalyDetector(pt_insn_decoder &decoder, |
137 | TraceIntelPT &trace_intel_pt, |
138 | DecodedThread &decoded_thread) |
139 | : m_decoder(decoder), m_decoded_thread(decoded_thread) { |
140 | m_infinite_decoding_loop_threshold = |
141 | trace_intel_pt.GetGlobalProperties() |
142 | .GetInfiniteDecodingLoopVerificationThreshold(); |
143 | m_extremely_large_decoding_threshold = |
144 | trace_intel_pt.GetGlobalProperties() |
145 | .GetExtremelyLargeDecodingThreshold(); |
146 | m_next_infinite_decoding_loop_threshold = |
147 | m_infinite_decoding_loop_threshold; |
148 | } |
149 | |
150 | /// \return |
151 | /// An \a llvm::Error if an anomaly that includes the last instruction item |
152 | /// in the trace, or \a llvm::Error::success otherwise. |
153 | Error DetectAnomaly() { |
154 | RefreshPacketOffset(); |
155 | uint64_t insn_added_since_last_packet_offset = |
156 | m_decoded_thread.GetTotalInstructionCount() - |
157 | m_insn_count_at_last_packet_offset; |
158 | |
159 | // We want to check if we might have fallen in an infinite loop. As this |
160 | // check is not a no-op, we want to do it when we have a strong suggestion |
161 | // that things went wrong. First, we check how many instructions we have |
162 | // decoded since we processed an Intel PT packet for the last time. This |
163 | // number should be low, because at some point we should see branches, jumps |
164 | // or interrupts that require a new packet to be processed. Once we reach |
165 | // certain threshold we start analyzing the trace. |
166 | // |
167 | // We use the number of decoded instructions since the last Intel PT packet |
168 | // as a proxy because, in fact, we don't expect a single packet to give, |
169 | // say, 100k instructions. That would mean that there are 100k sequential |
170 | // instructions without any single branch, which is highly unlikely, or that |
171 | // we found an infinite loop using direct jumps, e.g. |
172 | // |
173 | // 0x0A: nop or pause |
174 | // 0x0C: jump to 0x0A |
175 | // |
176 | // which is indeed code that is found in the kernel. I presume we reach |
177 | // this kind of code in the decoder because we don't handle self-modified |
178 | // code in post-mortem kernel traces. |
179 | // |
180 | // We are right now only signaling the anomaly as a trace error, but it |
181 | // would be more conservative to also discard all the trace items found in |
182 | // this PSB. I prefer not to do that for the time being to give more |
183 | // exposure to this kind of anomalies and help debugging. Discarding the |
184 | // trace items would just make investigation harded. |
185 | // |
186 | // Finally, if the user wants to see if a specific thread has an anomaly, |
187 | // it's enough to run the `thread trace dump info` command and look for the |
188 | // count of this kind of errors. |
189 | |
190 | if (insn_added_since_last_packet_offset >= |
191 | m_extremely_large_decoding_threshold) { |
192 | // In this case, we have decoded a massive amount of sequential |
193 | // instructions that don't loop. Honestly I wonder if this will ever |
194 | // happen, but better safe than sorry. |
195 | return createStringError( |
196 | EC: inconvertibleErrorCode(), |
197 | Msg: "anomalous trace: possible infinite trace detected" ); |
198 | } |
199 | if (insn_added_since_last_packet_offset == |
200 | m_next_infinite_decoding_loop_threshold) { |
201 | if (std::optional<uint64_t> loop_size = TryIdentifyInfiniteLoop()) { |
202 | return createStringError( |
203 | EC: inconvertibleErrorCode(), |
204 | Fmt: "anomalous trace: possible infinite loop detected of size %" PRIu64, |
205 | Vals: *loop_size); |
206 | } |
207 | m_next_infinite_decoding_loop_threshold *= 2; |
208 | } |
209 | return Error::success(); |
210 | } |
211 | |
212 | private: |
213 | std::optional<uint64_t> TryIdentifyInfiniteLoop() { |
214 | // The infinite decoding loops we'll encounter are due to sequential |
215 | // instructions that repeat themselves due to direct jumps, therefore in a |
216 | // cycle each individual address will only appear once. We use this |
217 | // information to detect cycles by finding the last 2 ocurrences of the last |
218 | // instruction added to the trace. Then we traverse the trace making sure |
219 | // that these two instructions where the ends of a repeating loop. |
220 | |
221 | // This is a utility that returns the most recent instruction index given a |
222 | // position in the trace. If the given position is an instruction, that |
223 | // position is returned. It skips non-instruction items. |
224 | auto most_recent_insn_index = |
225 | [&](uint64_t item_index) -> std::optional<uint64_t> { |
226 | while (true) { |
227 | if (m_decoded_thread.GetItemKindByIndex(item_index) == |
228 | lldb::eTraceItemKindInstruction) { |
229 | return item_index; |
230 | } |
231 | if (item_index == 0) |
232 | return std::nullopt; |
233 | item_index--; |
234 | } |
235 | return std::nullopt; |
236 | }; |
237 | // Similar to most_recent_insn_index but skips the starting position. |
238 | auto prev_insn_index = [&](uint64_t item_index) -> std::optional<uint64_t> { |
239 | if (item_index == 0) |
240 | return std::nullopt; |
241 | return most_recent_insn_index(item_index - 1); |
242 | }; |
243 | |
244 | // We first find the most recent instruction. |
245 | std::optional<uint64_t> last_insn_index_opt = |
246 | *prev_insn_index(m_decoded_thread.GetItemsCount()); |
247 | if (!last_insn_index_opt) |
248 | return std::nullopt; |
249 | uint64_t last_insn_index = *last_insn_index_opt; |
250 | |
251 | // We then find the most recent previous occurrence of that last |
252 | // instruction. |
253 | std::optional<uint64_t> last_insn_copy_index = |
254 | prev_insn_index(last_insn_index); |
255 | uint64_t loop_size = 1; |
256 | while (last_insn_copy_index && |
257 | m_decoded_thread.GetInstructionLoadAddress(item_index: *last_insn_copy_index) != |
258 | m_decoded_thread.GetInstructionLoadAddress(item_index: last_insn_index)) { |
259 | last_insn_copy_index = prev_insn_index(*last_insn_copy_index); |
260 | loop_size++; |
261 | } |
262 | if (!last_insn_copy_index) |
263 | return std::nullopt; |
264 | |
265 | // Now we check if the segment between these last positions of the last |
266 | // instruction address is in fact a repeating loop. |
267 | uint64_t loop_elements_visited = 1; |
268 | uint64_t insn_index_a = last_insn_index, |
269 | insn_index_b = *last_insn_copy_index; |
270 | while (loop_elements_visited < loop_size) { |
271 | if (std::optional<uint64_t> prev = prev_insn_index(insn_index_a)) |
272 | insn_index_a = *prev; |
273 | else |
274 | return std::nullopt; |
275 | if (std::optional<uint64_t> prev = prev_insn_index(insn_index_b)) |
276 | insn_index_b = *prev; |
277 | else |
278 | return std::nullopt; |
279 | if (m_decoded_thread.GetInstructionLoadAddress(item_index: insn_index_a) != |
280 | m_decoded_thread.GetInstructionLoadAddress(item_index: insn_index_b)) |
281 | return std::nullopt; |
282 | loop_elements_visited++; |
283 | } |
284 | return loop_size; |
285 | } |
286 | |
287 | // Refresh the internal counters if a new packet offset has been visited |
288 | void RefreshPacketOffset() { |
289 | lldb::addr_t new_packet_offset; |
290 | if (!IsLibiptError(pt_insn_get_offset(&m_decoder, &new_packet_offset)) && |
291 | new_packet_offset != m_last_packet_offset) { |
292 | m_last_packet_offset = new_packet_offset; |
293 | m_next_infinite_decoding_loop_threshold = |
294 | m_infinite_decoding_loop_threshold; |
295 | m_insn_count_at_last_packet_offset = |
296 | m_decoded_thread.GetTotalInstructionCount(); |
297 | } |
298 | } |
299 | |
300 | pt_insn_decoder &m_decoder; |
301 | DecodedThread &m_decoded_thread; |
302 | lldb::addr_t m_last_packet_offset = LLDB_INVALID_ADDRESS; |
303 | uint64_t m_insn_count_at_last_packet_offset = 0; |
304 | uint64_t m_infinite_decoding_loop_threshold; |
305 | uint64_t m_next_infinite_decoding_loop_threshold; |
306 | uint64_t m_extremely_large_decoding_threshold; |
307 | }; |
308 | |
309 | /// Class that decodes a raw buffer for a single PSB block using the low level |
310 | /// libipt library. It assumes that kernel and user mode instructions are not |
311 | /// mixed in the same PSB block. |
312 | /// |
313 | /// Throughout this code, the status of the decoder will be used to identify |
314 | /// events needed to be processed or errors in the decoder. The values can be |
315 | /// - negative: actual errors |
316 | /// - positive or zero: not an error, but a list of bits signaling the status |
317 | /// of the decoder, e.g. whether there are events that need to be decoded or |
318 | /// not. |
319 | class PSBBlockDecoder { |
320 | public: |
321 | /// \param[in] decoder |
322 | /// A decoder configured to start and end within the boundaries of the |
323 | /// given \p psb_block. |
324 | /// |
325 | /// \param[in] psb_block |
326 | /// The PSB block to decode. |
327 | /// |
328 | /// \param[in] next_block_ip |
329 | /// The starting ip at the next PSB block of the same thread if available. |
330 | /// |
331 | /// \param[in] decoded_thread |
332 | /// A \a DecodedThread object where the decoded instructions will be |
333 | /// appended to. It might have already some instructions. |
334 | /// |
335 | /// \param[in] tsc_upper_bound |
336 | /// Maximum allowed value of TSCs decoded from this PSB block. |
337 | /// Any of this PSB's data occurring after this TSC will be excluded. |
338 | PSBBlockDecoder(PtInsnDecoderUP &&decoder_up, const PSBBlock &psb_block, |
339 | std::optional<lldb::addr_t> next_block_ip, |
340 | DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, |
341 | std::optional<DecodedThread::TSC> tsc_upper_bound) |
342 | : m_decoder_up(std::move(decoder_up)), m_psb_block(psb_block), |
343 | m_next_block_ip(next_block_ip), m_decoded_thread(decoded_thread), |
344 | m_anomaly_detector(*m_decoder_up, trace_intel_pt, decoded_thread), |
345 | m_tsc_upper_bound(tsc_upper_bound) {} |
346 | |
347 | /// \param[in] trace_intel_pt |
348 | /// The main Trace object that own the PSB block. |
349 | /// |
350 | /// \param[in] decoder |
351 | /// A decoder configured to start and end within the boundaries of the |
352 | /// given \p psb_block. |
353 | /// |
354 | /// \param[in] psb_block |
355 | /// The PSB block to decode. |
356 | /// |
357 | /// \param[in] buffer |
358 | /// The raw intel pt trace for this block. |
359 | /// |
360 | /// \param[in] process |
361 | /// The process to decode. It provides the memory image to use for |
362 | /// decoding. |
363 | /// |
364 | /// \param[in] next_block_ip |
365 | /// The starting ip at the next PSB block of the same thread if available. |
366 | /// |
367 | /// \param[in] decoded_thread |
368 | /// A \a DecodedThread object where the decoded instructions will be |
369 | /// appended to. It might have already some instructions. |
370 | static Expected<PSBBlockDecoder> |
371 | Create(TraceIntelPT &trace_intel_pt, const PSBBlock &psb_block, |
372 | ArrayRef<uint8_t> buffer, Process &process, |
373 | std::optional<lldb::addr_t> next_block_ip, |
374 | DecodedThread &decoded_thread, |
375 | std::optional<DecodedThread::TSC> tsc_upper_bound) { |
376 | Expected<PtInsnDecoderUP> decoder_up = |
377 | CreateInstructionDecoder(trace_intel_pt, buffer, process); |
378 | if (!decoder_up) |
379 | return decoder_up.takeError(); |
380 | |
381 | return PSBBlockDecoder(std::move(*decoder_up), psb_block, next_block_ip, |
382 | decoded_thread, trace_intel_pt, tsc_upper_bound); |
383 | } |
384 | |
385 | void DecodePSBBlock() { |
386 | int status = pt_insn_sync_forward(m_decoder_up.get()); |
387 | assert(status >= 0 && |
388 | "Synchronization shouldn't fail because this PSB was previously " |
389 | "decoded correctly." ); |
390 | |
391 | // We emit a TSC before a sync event to more easily associate a timestamp to |
392 | // the sync event. If present, the current block's TSC would be the first |
393 | // TSC we'll see when processing events. |
394 | if (m_psb_block.tsc) |
395 | m_decoded_thread.NotifyTsc(tsc: *m_psb_block.tsc); |
396 | |
397 | m_decoded_thread.NotifySyncPoint(psb_offset: m_psb_block.psb_offset); |
398 | |
399 | DecodeInstructionsAndEvents(status); |
400 | } |
401 | |
402 | private: |
403 | /// Append an instruction and return \b false if and only if a serious anomaly |
404 | /// has been detected. |
405 | bool AppendInstructionAndDetectAnomalies(const pt_insn &insn) { |
406 | m_decoded_thread.AppendInstruction(insn); |
407 | |
408 | if (Error err = m_anomaly_detector.DetectAnomaly()) { |
409 | m_decoded_thread.AppendCustomError(error: toString(E: std::move(err)), |
410 | /*fatal=*/true); |
411 | return false; |
412 | } |
413 | return true; |
414 | } |
415 | /// Decode all the instructions and events of the given PSB block. The |
416 | /// decoding loop might stop abruptly if an infinite decoding loop is |
417 | /// detected. |
418 | void DecodeInstructionsAndEvents(int status) { |
419 | pt_insn insn; |
420 | |
421 | while (true) { |
422 | status = ProcessPTEvents(status); |
423 | |
424 | if (IsLibiptError(status)) |
425 | return; |
426 | else if (IsEndOfStream(status)) |
427 | break; |
428 | |
429 | // The status returned by pt_insn_next will need to be processed |
430 | // by ProcessPTEvents in the next loop if it is not an error. |
431 | std::memset(s: &insn, c: 0, n: sizeof insn); |
432 | status = pt_insn_next(m_decoder_up.get(), &insn, sizeof(insn)); |
433 | |
434 | if (IsLibiptError(status)) { |
435 | m_decoded_thread.AppendError(error: IntelPTError(status, insn.ip)); |
436 | return; |
437 | } else if (IsEndOfStream(status)) { |
438 | break; |
439 | } |
440 | |
441 | if (!AppendInstructionAndDetectAnomalies(insn)) |
442 | return; |
443 | } |
444 | |
445 | // We need to keep querying non-branching instructions until we hit the |
446 | // starting point of the next PSB. We won't see events at this point. This |
447 | // is based on |
448 | // https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#parallel-decode |
449 | if (m_next_block_ip && insn.ip != 0) { |
450 | while (insn.ip != *m_next_block_ip) { |
451 | if (!AppendInstructionAndDetectAnomalies(insn)) |
452 | return; |
453 | |
454 | status = pt_insn_next(m_decoder_up.get(), &insn, sizeof(insn)); |
455 | |
456 | if (IsLibiptError(status)) { |
457 | m_decoded_thread.AppendError(error: IntelPTError(status, insn.ip)); |
458 | return; |
459 | } |
460 | } |
461 | } |
462 | } |
463 | |
464 | /// Process the TSC of a decoded PT event. Specifically, check if this TSC |
465 | /// is below the TSC upper bound for this PSB. If the TSC exceeds the upper |
466 | /// bound, return an error to abort decoding. Otherwise add the it to the |
467 | /// underlying DecodedThread and decoding should continue as expected. |
468 | /// |
469 | /// \param[in] tsc |
470 | /// The TSC of the a decoded event. |
471 | Error ProcessPTEventTSC(DecodedThread::TSC tsc) { |
472 | if (m_tsc_upper_bound && tsc >= *m_tsc_upper_bound) { |
473 | // This event and all the remaining events of this PSB have a TSC |
474 | // outside the range of the "owning" ThreadContinuousExecution. For |
475 | // now we drop all of these events/instructions, future work can |
476 | // improve upon this by determining the "owning" |
477 | // ThreadContinuousExecution of the remaining PSB data. |
478 | std::string err_msg = formatv(Fmt: "decoding truncated: TSC {0} exceeds " |
479 | "maximum TSC value {1}, will skip decoding" |
480 | " the remaining data of the PSB" , |
481 | Vals&: tsc, Vals&: *m_tsc_upper_bound) |
482 | .str(); |
483 | |
484 | uint64_t offset; |
485 | int status = pt_insn_get_offset(m_decoder_up.get(), &offset); |
486 | if (!IsLibiptError(status)) { |
487 | err_msg = formatv(Fmt: "{2} (skipping {0} of {1} bytes)" , Vals&: offset, |
488 | Vals&: m_psb_block.size, Vals&: err_msg) |
489 | .str(); |
490 | } |
491 | m_decoded_thread.AppendCustomError(error: err_msg); |
492 | return createStringError(EC: inconvertibleErrorCode(), S: err_msg); |
493 | } else { |
494 | m_decoded_thread.NotifyTsc(tsc); |
495 | return Error::success(); |
496 | } |
497 | } |
498 | |
499 | /// Before querying instructions, we need to query the events associated with |
500 | /// that instruction, e.g. timing and trace disablement events. |
501 | /// |
502 | /// \param[in] status |
503 | /// The status gotten from the previous instruction decoding or PSB |
504 | /// synchronization. |
505 | /// |
506 | /// \return |
507 | /// The pte_status after decoding events. |
508 | int ProcessPTEvents(int status) { |
509 | while (HasEvents(status)) { |
510 | pt_event event; |
511 | std::memset(s: &event, c: 0, n: sizeof event); |
512 | status = pt_insn_event(m_decoder_up.get(), &event, sizeof(event)); |
513 | |
514 | if (IsLibiptError(status)) { |
515 | m_decoded_thread.AppendError(error: IntelPTError(status)); |
516 | return status; |
517 | } |
518 | |
519 | if (event.has_tsc) { |
520 | if (Error err = ProcessPTEventTSC(tsc: event.tsc)) { |
521 | consumeError(Err: std::move(err)); |
522 | return -pte_internal; |
523 | } |
524 | } |
525 | |
526 | switch (event.type) { |
527 | case ptev_disabled: |
528 | // The CPU paused tracing the program, e.g. due to ip filtering. |
529 | m_decoded_thread.AppendEvent(lldb::eTraceEventDisabledHW); |
530 | break; |
531 | case ptev_async_disabled: |
532 | // The kernel or user code paused tracing the program, e.g. |
533 | // a breakpoint or a ioctl invocation pausing the trace, or a |
534 | // context switch happened. |
535 | m_decoded_thread.AppendEvent(lldb::eTraceEventDisabledSW); |
536 | break; |
537 | case ptev_overflow: |
538 | // The CPU internal buffer had an overflow error and some instructions |
539 | // were lost. A OVF packet comes with an FUP packet (harcoded address) |
540 | // according to the documentation, so we'll continue seeing instructions |
541 | // after this event. |
542 | m_decoded_thread.AppendError(IntelPTError(-pte_overflow)); |
543 | break; |
544 | default: |
545 | break; |
546 | } |
547 | } |
548 | |
549 | return status; |
550 | } |
551 | |
552 | private: |
553 | PtInsnDecoderUP m_decoder_up; |
554 | PSBBlock m_psb_block; |
555 | std::optional<lldb::addr_t> m_next_block_ip; |
556 | DecodedThread &m_decoded_thread; |
557 | PSBBlockAnomalyDetector m_anomaly_detector; |
558 | std::optional<DecodedThread::TSC> m_tsc_upper_bound; |
559 | }; |
560 | |
561 | Error lldb_private::trace_intel_pt::DecodeSingleTraceForThread( |
562 | DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, |
563 | ArrayRef<uint8_t> buffer) { |
564 | Expected<std::vector<PSBBlock>> blocks = |
565 | SplitTraceIntoPSBBlock(trace_intel_pt, buffer, /*expect_tscs=*/false); |
566 | if (!blocks) |
567 | return blocks.takeError(); |
568 | |
569 | for (size_t i = 0; i < blocks->size(); i++) { |
570 | PSBBlock &block = blocks->at(n: i); |
571 | |
572 | Expected<PSBBlockDecoder> decoder = PSBBlockDecoder::Create( |
573 | trace_intel_pt, psb_block: block, buffer: buffer.slice(N: block.psb_offset, M: block.size), |
574 | process&: *decoded_thread.GetThread()->GetProcess(), |
575 | next_block_ip: i + 1 < blocks->size() ? blocks->at(n: i + 1).starting_ip : std::nullopt, |
576 | decoded_thread, tsc_upper_bound: std::nullopt); |
577 | if (!decoder) |
578 | return decoder.takeError(); |
579 | |
580 | decoder->DecodePSBBlock(); |
581 | } |
582 | |
583 | return Error::success(); |
584 | } |
585 | |
586 | Error lldb_private::trace_intel_pt::DecodeSystemWideTraceForThread( |
587 | DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, |
588 | const DenseMap<lldb::cpu_id_t, llvm::ArrayRef<uint8_t>> &buffers, |
589 | const std::vector<IntelPTThreadContinousExecution> &executions) { |
590 | bool has_seen_psbs = false; |
591 | for (size_t i = 0; i < executions.size(); i++) { |
592 | const IntelPTThreadContinousExecution &execution = executions[i]; |
593 | |
594 | auto variant = execution.thread_execution.variant; |
595 | |
596 | // We emit the first valid tsc |
597 | if (execution.psb_blocks.empty()) { |
598 | decoded_thread.NotifyTsc(tsc: execution.thread_execution.GetLowestKnownTSC()); |
599 | } else { |
600 | assert(execution.psb_blocks.front().tsc && |
601 | "per cpu decoding expects TSCs" ); |
602 | decoded_thread.NotifyTsc( |
603 | tsc: std::min(a: execution.thread_execution.GetLowestKnownTSC(), |
604 | b: *execution.psb_blocks.front().tsc)); |
605 | } |
606 | |
607 | // We then emit the CPU, which will be correctly associated with a tsc. |
608 | decoded_thread.NotifyCPU(cpu_id: execution.thread_execution.cpu_id); |
609 | |
610 | // If we haven't seen a PSB yet, then it's fine not to show errors |
611 | if (has_seen_psbs) { |
612 | if (execution.psb_blocks.empty()) { |
613 | decoded_thread.AppendCustomError( |
614 | error: formatv(Fmt: "Unable to find intel pt data a thread " |
615 | "execution on cpu id = {0}" , |
616 | Vals: execution.thread_execution.cpu_id) |
617 | .str()); |
618 | } |
619 | |
620 | // A hinted start is a non-initial execution that doesn't have a switch |
621 | // in. An only end is an initial execution that doesn't have a switch in. |
622 | // Any of those cases represent a gap because we have seen a PSB before. |
623 | if (variant == ThreadContinuousExecution::Variant::HintedStart || |
624 | variant == ThreadContinuousExecution::Variant::OnlyEnd) { |
625 | decoded_thread.AppendCustomError( |
626 | error: formatv(Fmt: "Unable to find the context switch in for a thread " |
627 | "execution on cpu id = {0}" , |
628 | Vals: execution.thread_execution.cpu_id) |
629 | .str()); |
630 | } |
631 | } |
632 | |
633 | for (size_t j = 0; j < execution.psb_blocks.size(); j++) { |
634 | const PSBBlock &psb_block = execution.psb_blocks[j]; |
635 | |
636 | Expected<PSBBlockDecoder> decoder = PSBBlockDecoder::Create( |
637 | trace_intel_pt, psb_block, |
638 | buffer: buffers.lookup(Val: execution.thread_execution.cpu_id) |
639 | .slice(N: psb_block.psb_offset, M: psb_block.size), |
640 | process&: *decoded_thread.GetThread()->GetProcess(), |
641 | next_block_ip: j + 1 < execution.psb_blocks.size() |
642 | ? execution.psb_blocks[j + 1].starting_ip |
643 | : std::nullopt, |
644 | decoded_thread, tsc_upper_bound: execution.thread_execution.GetEndTSC()); |
645 | if (!decoder) |
646 | return decoder.takeError(); |
647 | |
648 | has_seen_psbs = true; |
649 | decoder->DecodePSBBlock(); |
650 | } |
651 | |
652 | // If we haven't seen a PSB yet, then it's fine not to show errors |
653 | if (has_seen_psbs) { |
654 | // A hinted end is a non-ending execution that doesn't have a switch out. |
655 | // An only start is an ending execution that doesn't have a switch out. |
656 | // Any of those cases represent a gap if we still have executions to |
657 | // process and we have seen a PSB before. |
658 | if (i + 1 != executions.size() && |
659 | (variant == ThreadContinuousExecution::Variant::OnlyStart || |
660 | variant == ThreadContinuousExecution::Variant::HintedEnd)) { |
661 | decoded_thread.AppendCustomError( |
662 | error: formatv(Fmt: "Unable to find the context switch out for a thread " |
663 | "execution on cpu id = {0}" , |
664 | Vals: execution.thread_execution.cpu_id) |
665 | .str()); |
666 | } |
667 | } |
668 | } |
669 | return Error::success(); |
670 | } |
671 | |
672 | bool IntelPTThreadContinousExecution::operator<( |
673 | const IntelPTThreadContinousExecution &o) const { |
674 | // As the context switch might be incomplete, we look first for the first real |
675 | // PSB packet, which is a valid TSC. Otherwise, We query the thread execution |
676 | // itself for some tsc. |
677 | auto get_tsc = [](const IntelPTThreadContinousExecution &exec) { |
678 | return exec.psb_blocks.empty() ? exec.thread_execution.GetLowestKnownTSC() |
679 | : exec.psb_blocks.front().tsc; |
680 | }; |
681 | |
682 | return get_tsc(*this) < get_tsc(o); |
683 | } |
684 | |
685 | Expected<std::vector<PSBBlock>> |
686 | lldb_private::trace_intel_pt::SplitTraceIntoPSBBlock( |
687 | TraceIntelPT &trace_intel_pt, llvm::ArrayRef<uint8_t> buffer, |
688 | bool expect_tscs) { |
689 | // This follows |
690 | // https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#parallel-decode |
691 | |
692 | Expected<PtQueryDecoderUP> decoder_up = |
693 | CreateQueryDecoder(trace_intel_pt, buffer); |
694 | if (!decoder_up) |
695 | return decoder_up.takeError(); |
696 | |
697 | pt_query_decoder *decoder = decoder_up.get().get(); |
698 | |
699 | std::vector<PSBBlock> executions; |
700 | |
701 | while (true) { |
702 | uint64_t maybe_ip = LLDB_INVALID_ADDRESS; |
703 | int decoding_status = pt_qry_sync_forward(decoder, &maybe_ip); |
704 | if (IsLibiptError(status: decoding_status)) |
705 | break; |
706 | |
707 | uint64_t psb_offset; |
708 | int offset_status = pt_qry_get_sync_offset(decoder, &psb_offset); |
709 | assert(offset_status >= 0 && |
710 | "This can't fail because we were able to synchronize" ); |
711 | |
712 | std::optional<uint64_t> ip; |
713 | if (!(pts_ip_suppressed & decoding_status)) |
714 | ip = maybe_ip; |
715 | |
716 | std::optional<uint64_t> tsc; |
717 | // Now we fetch the first TSC that comes after the PSB. |
718 | while (HasEvents(status: decoding_status)) { |
719 | pt_event event; |
720 | decoding_status = pt_qry_event(decoder, &event, sizeof(event)); |
721 | if (IsLibiptError(status: decoding_status)) |
722 | break; |
723 | if (event.has_tsc) { |
724 | tsc = event.tsc; |
725 | break; |
726 | } |
727 | } |
728 | if (IsLibiptError(status: decoding_status)) { |
729 | // We continue to the next PSB. This effectively merges this PSB with the |
730 | // previous one, and that should be fine because this PSB might be the |
731 | // direct continuation of the previous thread and it's better to show an |
732 | // error in the decoded thread than to hide it. If this is the first PSB, |
733 | // we are okay losing it. Besides that, an error at processing events |
734 | // means that we wouldn't be able to get any instruction out of it. |
735 | continue; |
736 | } |
737 | |
738 | if (expect_tscs && !tsc) |
739 | return createStringError(EC: inconvertibleErrorCode(), |
740 | Msg: "Found a PSB without TSC." ); |
741 | |
742 | executions.push_back(x: { |
743 | .psb_offset: psb_offset, |
744 | .tsc: tsc, |
745 | .size: 0, |
746 | .starting_ip: ip, |
747 | }); |
748 | } |
749 | if (!executions.empty()) { |
750 | // We now adjust the sizes of each block |
751 | executions.back().size = buffer.size() - executions.back().psb_offset; |
752 | for (int i = (int)executions.size() - 2; i >= 0; i--) { |
753 | executions[i].size = |
754 | executions[i + 1].psb_offset - executions[i].psb_offset; |
755 | } |
756 | } |
757 | return executions; |
758 | } |
759 | |
760 | Expected<std::optional<uint64_t>> |
761 | lldb_private::trace_intel_pt::FindLowestTSCInTrace(TraceIntelPT &trace_intel_pt, |
762 | ArrayRef<uint8_t> buffer) { |
763 | Expected<PtQueryDecoderUP> decoder_up = |
764 | CreateQueryDecoder(trace_intel_pt, buffer); |
765 | if (!decoder_up) |
766 | return decoder_up.takeError(); |
767 | |
768 | pt_query_decoder *decoder = decoder_up.get().get(); |
769 | uint64_t ip = LLDB_INVALID_ADDRESS; |
770 | int status = pt_qry_sync_forward(decoder, &ip); |
771 | if (IsLibiptError(status)) |
772 | return std::nullopt; |
773 | |
774 | while (HasEvents(status)) { |
775 | pt_event event; |
776 | status = pt_qry_event(decoder, &event, sizeof(event)); |
777 | if (IsLibiptError(status)) |
778 | return std::nullopt; |
779 | if (event.has_tsc) |
780 | return event.tsc; |
781 | } |
782 | return std::nullopt; |
783 | } |
784 | |