| 1 | //===-- LibiptDecoder.cpp --======-----------------------------------------===// |
| 2 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 3 | // See https://llvm.org/LICENSE.txt for license information. |
| 4 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 5 | // |
| 6 | //===----------------------------------------------------------------------===// |
| 7 | |
| 8 | #include "LibiptDecoder.h" |
| 9 | #include "TraceIntelPT.h" |
| 10 | #include "lldb/Target/Process.h" |
| 11 | #include <optional> |
| 12 | |
| 13 | using namespace lldb; |
| 14 | using namespace lldb_private; |
| 15 | using namespace lldb_private::trace_intel_pt; |
| 16 | using namespace llvm; |
| 17 | |
| 18 | bool IsLibiptError(int status) { return status < 0; } |
| 19 | |
| 20 | bool IsEndOfStream(int status) { |
| 21 | assert(status >= 0 && "We can't check if we reached the end of the stream if " |
| 22 | "we got a failed status" ); |
| 23 | return status & pts_eos; |
| 24 | } |
| 25 | |
| 26 | bool HasEvents(int status) { |
| 27 | assert(status >= 0 && "We can't check for events if we got a failed status" ); |
| 28 | return status & pts_event_pending; |
| 29 | } |
| 30 | |
| 31 | // RAII deleter for libipt's decoders |
| 32 | auto InsnDecoderDeleter = [](pt_insn_decoder *decoder) { |
| 33 | pt_insn_free_decoder(decoder); |
| 34 | }; |
| 35 | |
| 36 | auto QueryDecoderDeleter = [](pt_query_decoder *decoder) { |
| 37 | pt_qry_free_decoder(decoder); |
| 38 | }; |
| 39 | |
| 40 | using PtInsnDecoderUP = |
| 41 | std::unique_ptr<pt_insn_decoder, decltype(InsnDecoderDeleter)>; |
| 42 | |
| 43 | using PtQueryDecoderUP = |
| 44 | std::unique_ptr<pt_query_decoder, decltype(QueryDecoderDeleter)>; |
| 45 | |
| 46 | /// Create a basic configuration object limited to a given buffer that can be |
| 47 | /// used for many different decoders. |
| 48 | static Expected<pt_config> CreateBasicLibiptConfig(TraceIntelPT &trace_intel_pt, |
| 49 | ArrayRef<uint8_t> buffer) { |
| 50 | Expected<pt_cpu> cpu_info = trace_intel_pt.GetCPUInfo(); |
| 51 | if (!cpu_info) |
| 52 | return cpu_info.takeError(); |
| 53 | |
| 54 | pt_config config; |
| 55 | pt_config_init(&config); |
| 56 | config.cpu = *cpu_info; |
| 57 | |
| 58 | int status = pt_cpu_errata(&config.errata, &config.cpu); |
| 59 | if (IsLibiptError(status)) |
| 60 | return make_error<IntelPTError>(Args&: status); |
| 61 | |
| 62 | // The libipt library does not modify the trace buffer, hence the |
| 63 | // following casts are safe. |
| 64 | config.begin = const_cast<uint8_t *>(buffer.data()); |
| 65 | config.end = const_cast<uint8_t *>(buffer.data() + buffer.size()); |
| 66 | return config; |
| 67 | } |
| 68 | |
| 69 | /// Callback used by libipt for reading the process memory. |
| 70 | /// |
| 71 | /// More information can be found in |
| 72 | /// https://github.com/intel/libipt/blob/master/doc/man/pt_image_set_callback.3.md |
| 73 | static int ReadProcessMemory(uint8_t *buffer, size_t size, |
| 74 | const pt_asid * /* unused */, uint64_t pc, |
| 75 | void *context) { |
| 76 | Process *process = static_cast<Process *>(context); |
| 77 | |
| 78 | Status error; |
| 79 | int bytes_read = process->ReadMemory(vm_addr: pc, buf: buffer, size, error); |
| 80 | if (error.Fail()) |
| 81 | return -pte_nomap; |
| 82 | return bytes_read; |
| 83 | } |
| 84 | |
| 85 | /// Set up the memory image callback for the given decoder. |
| 86 | static Error SetupMemoryImage(pt_insn_decoder *decoder, Process &process) { |
| 87 | pt_image *image = pt_insn_get_image(decoder); |
| 88 | |
| 89 | int status = pt_image_set_callback(image, ReadProcessMemory, &process); |
| 90 | if (IsLibiptError(status)) |
| 91 | return make_error<IntelPTError>(Args&: status); |
| 92 | return Error::success(); |
| 93 | } |
| 94 | |
| 95 | /// Create an instruction decoder for the given buffer and the given process. |
| 96 | static Expected<PtInsnDecoderUP> |
| 97 | CreateInstructionDecoder(TraceIntelPT &trace_intel_pt, ArrayRef<uint8_t> buffer, |
| 98 | Process &process) { |
| 99 | Expected<pt_config> config = CreateBasicLibiptConfig(trace_intel_pt, buffer); |
| 100 | if (!config) |
| 101 | return config.takeError(); |
| 102 | |
| 103 | pt_insn_decoder *decoder_ptr = pt_insn_alloc_decoder(&*config); |
| 104 | if (!decoder_ptr) |
| 105 | return make_error<IntelPTError>(-pte_nomem); |
| 106 | |
| 107 | PtInsnDecoderUP decoder_up(decoder_ptr, InsnDecoderDeleter); |
| 108 | |
| 109 | if (Error err = SetupMemoryImage(decoder_ptr, process)) |
| 110 | return std::move(err); |
| 111 | |
| 112 | return decoder_up; |
| 113 | } |
| 114 | |
| 115 | /// Create a query decoder for the given buffer. The query decoder is the |
| 116 | /// highest level decoder that operates directly on packets and doesn't perform |
| 117 | /// actual instruction decoding. That's why it can be useful for inspecting a |
| 118 | /// raw trace without pinning it to a particular process. |
| 119 | static Expected<PtQueryDecoderUP> |
| 120 | CreateQueryDecoder(TraceIntelPT &trace_intel_pt, ArrayRef<uint8_t> buffer) { |
| 121 | Expected<pt_config> config = CreateBasicLibiptConfig(trace_intel_pt, buffer); |
| 122 | if (!config) |
| 123 | return config.takeError(); |
| 124 | |
| 125 | pt_query_decoder *decoder_ptr = pt_qry_alloc_decoder(&*config); |
| 126 | if (!decoder_ptr) |
| 127 | return make_error<IntelPTError>(-pte_nomem); |
| 128 | |
| 129 | return PtQueryDecoderUP(decoder_ptr, QueryDecoderDeleter); |
| 130 | } |
| 131 | |
| 132 | /// Class used to identify anomalies in traces, which should often indicate a |
| 133 | /// fatal error in the trace. |
| 134 | class PSBBlockAnomalyDetector { |
| 135 | public: |
| 136 | PSBBlockAnomalyDetector(pt_insn_decoder &decoder, |
| 137 | TraceIntelPT &trace_intel_pt, |
| 138 | DecodedThread &decoded_thread) |
| 139 | : m_decoder(decoder), m_decoded_thread(decoded_thread) { |
| 140 | m_infinite_decoding_loop_threshold = |
| 141 | trace_intel_pt.GetGlobalProperties() |
| 142 | .GetInfiniteDecodingLoopVerificationThreshold(); |
| 143 | m_extremely_large_decoding_threshold = |
| 144 | trace_intel_pt.GetGlobalProperties() |
| 145 | .GetExtremelyLargeDecodingThreshold(); |
| 146 | m_next_infinite_decoding_loop_threshold = |
| 147 | m_infinite_decoding_loop_threshold; |
| 148 | } |
| 149 | |
| 150 | /// \return |
| 151 | /// An \a llvm::Error if an anomaly that includes the last instruction item |
| 152 | /// in the trace, or \a llvm::Error::success otherwise. |
| 153 | Error DetectAnomaly() { |
| 154 | RefreshPacketOffset(); |
| 155 | uint64_t insn_added_since_last_packet_offset = |
| 156 | m_decoded_thread.GetTotalInstructionCount() - |
| 157 | m_insn_count_at_last_packet_offset; |
| 158 | |
| 159 | // We want to check if we might have fallen in an infinite loop. As this |
| 160 | // check is not a no-op, we want to do it when we have a strong suggestion |
| 161 | // that things went wrong. First, we check how many instructions we have |
| 162 | // decoded since we processed an Intel PT packet for the last time. This |
| 163 | // number should be low, because at some point we should see branches, jumps |
| 164 | // or interrupts that require a new packet to be processed. Once we reach |
| 165 | // certain threshold we start analyzing the trace. |
| 166 | // |
| 167 | // We use the number of decoded instructions since the last Intel PT packet |
| 168 | // as a proxy because, in fact, we don't expect a single packet to give, |
| 169 | // say, 100k instructions. That would mean that there are 100k sequential |
| 170 | // instructions without any single branch, which is highly unlikely, or that |
| 171 | // we found an infinite loop using direct jumps, e.g. |
| 172 | // |
| 173 | // 0x0A: nop or pause |
| 174 | // 0x0C: jump to 0x0A |
| 175 | // |
| 176 | // which is indeed code that is found in the kernel. I presume we reach |
| 177 | // this kind of code in the decoder because we don't handle self-modified |
| 178 | // code in post-mortem kernel traces. |
| 179 | // |
| 180 | // We are right now only signaling the anomaly as a trace error, but it |
| 181 | // would be more conservative to also discard all the trace items found in |
| 182 | // this PSB. I prefer not to do that for the time being to give more |
| 183 | // exposure to this kind of anomalies and help debugging. Discarding the |
| 184 | // trace items would just make investigation harded. |
| 185 | // |
| 186 | // Finally, if the user wants to see if a specific thread has an anomaly, |
| 187 | // it's enough to run the `thread trace dump info` command and look for the |
| 188 | // count of this kind of errors. |
| 189 | |
| 190 | if (insn_added_since_last_packet_offset >= |
| 191 | m_extremely_large_decoding_threshold) { |
| 192 | // In this case, we have decoded a massive amount of sequential |
| 193 | // instructions that don't loop. Honestly I wonder if this will ever |
| 194 | // happen, but better safe than sorry. |
| 195 | return createStringError( |
| 196 | EC: inconvertibleErrorCode(), |
| 197 | S: "anomalous trace: possible infinite trace detected" ); |
| 198 | } |
| 199 | if (insn_added_since_last_packet_offset == |
| 200 | m_next_infinite_decoding_loop_threshold) { |
| 201 | if (std::optional<uint64_t> loop_size = TryIdentifyInfiniteLoop()) { |
| 202 | return createStringError( |
| 203 | EC: inconvertibleErrorCode(), |
| 204 | Fmt: "anomalous trace: possible infinite loop detected of size %" PRIu64, |
| 205 | Vals: *loop_size); |
| 206 | } |
| 207 | m_next_infinite_decoding_loop_threshold *= 2; |
| 208 | } |
| 209 | return Error::success(); |
| 210 | } |
| 211 | |
| 212 | private: |
| 213 | std::optional<uint64_t> TryIdentifyInfiniteLoop() { |
| 214 | // The infinite decoding loops we'll encounter are due to sequential |
| 215 | // instructions that repeat themselves due to direct jumps, therefore in a |
| 216 | // cycle each individual address will only appear once. We use this |
| 217 | // information to detect cycles by finding the last 2 ocurrences of the last |
| 218 | // instruction added to the trace. Then we traverse the trace making sure |
| 219 | // that these two instructions where the ends of a repeating loop. |
| 220 | |
| 221 | // This is a utility that returns the most recent instruction index given a |
| 222 | // position in the trace. If the given position is an instruction, that |
| 223 | // position is returned. It skips non-instruction items. |
| 224 | auto most_recent_insn_index = |
| 225 | [&](uint64_t item_index) -> std::optional<uint64_t> { |
| 226 | while (true) { |
| 227 | if (m_decoded_thread.GetItemKindByIndex(item_index) == |
| 228 | lldb::eTraceItemKindInstruction) { |
| 229 | return item_index; |
| 230 | } |
| 231 | if (item_index == 0) |
| 232 | return std::nullopt; |
| 233 | item_index--; |
| 234 | } |
| 235 | return std::nullopt; |
| 236 | }; |
| 237 | // Similar to most_recent_insn_index but skips the starting position. |
| 238 | auto prev_insn_index = [&](uint64_t item_index) -> std::optional<uint64_t> { |
| 239 | if (item_index == 0) |
| 240 | return std::nullopt; |
| 241 | return most_recent_insn_index(item_index - 1); |
| 242 | }; |
| 243 | |
| 244 | // We first find the most recent instruction. |
| 245 | std::optional<uint64_t> last_insn_index_opt = |
| 246 | *prev_insn_index(m_decoded_thread.GetItemsCount()); |
| 247 | if (!last_insn_index_opt) |
| 248 | return std::nullopt; |
| 249 | uint64_t last_insn_index = *last_insn_index_opt; |
| 250 | |
| 251 | // We then find the most recent previous occurrence of that last |
| 252 | // instruction. |
| 253 | std::optional<uint64_t> last_insn_copy_index = |
| 254 | prev_insn_index(last_insn_index); |
| 255 | uint64_t loop_size = 1; |
| 256 | while (last_insn_copy_index && |
| 257 | m_decoded_thread.GetInstructionLoadAddress(item_index: *last_insn_copy_index) != |
| 258 | m_decoded_thread.GetInstructionLoadAddress(item_index: last_insn_index)) { |
| 259 | last_insn_copy_index = prev_insn_index(*last_insn_copy_index); |
| 260 | loop_size++; |
| 261 | } |
| 262 | if (!last_insn_copy_index) |
| 263 | return std::nullopt; |
| 264 | |
| 265 | // Now we check if the segment between these last positions of the last |
| 266 | // instruction address is in fact a repeating loop. |
| 267 | uint64_t loop_elements_visited = 1; |
| 268 | uint64_t insn_index_a = last_insn_index, |
| 269 | insn_index_b = *last_insn_copy_index; |
| 270 | while (loop_elements_visited < loop_size) { |
| 271 | if (std::optional<uint64_t> prev = prev_insn_index(insn_index_a)) |
| 272 | insn_index_a = *prev; |
| 273 | else |
| 274 | return std::nullopt; |
| 275 | if (std::optional<uint64_t> prev = prev_insn_index(insn_index_b)) |
| 276 | insn_index_b = *prev; |
| 277 | else |
| 278 | return std::nullopt; |
| 279 | if (m_decoded_thread.GetInstructionLoadAddress(item_index: insn_index_a) != |
| 280 | m_decoded_thread.GetInstructionLoadAddress(item_index: insn_index_b)) |
| 281 | return std::nullopt; |
| 282 | loop_elements_visited++; |
| 283 | } |
| 284 | return loop_size; |
| 285 | } |
| 286 | |
| 287 | // Refresh the internal counters if a new packet offset has been visited |
| 288 | void RefreshPacketOffset() { |
| 289 | lldb::addr_t new_packet_offset; |
| 290 | if (!IsLibiptError(pt_insn_get_offset(&m_decoder, &new_packet_offset)) && |
| 291 | new_packet_offset != m_last_packet_offset) { |
| 292 | m_last_packet_offset = new_packet_offset; |
| 293 | m_next_infinite_decoding_loop_threshold = |
| 294 | m_infinite_decoding_loop_threshold; |
| 295 | m_insn_count_at_last_packet_offset = |
| 296 | m_decoded_thread.GetTotalInstructionCount(); |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | pt_insn_decoder &m_decoder; |
| 301 | DecodedThread &m_decoded_thread; |
| 302 | lldb::addr_t m_last_packet_offset = LLDB_INVALID_ADDRESS; |
| 303 | uint64_t m_insn_count_at_last_packet_offset = 0; |
| 304 | uint64_t m_infinite_decoding_loop_threshold; |
| 305 | uint64_t m_next_infinite_decoding_loop_threshold; |
| 306 | uint64_t m_extremely_large_decoding_threshold; |
| 307 | }; |
| 308 | |
| 309 | /// Class that decodes a raw buffer for a single PSB block using the low level |
| 310 | /// libipt library. It assumes that kernel and user mode instructions are not |
| 311 | /// mixed in the same PSB block. |
| 312 | /// |
| 313 | /// Throughout this code, the status of the decoder will be used to identify |
| 314 | /// events needed to be processed or errors in the decoder. The values can be |
| 315 | /// - negative: actual errors |
| 316 | /// - positive or zero: not an error, but a list of bits signaling the status |
| 317 | /// of the decoder, e.g. whether there are events that need to be decoded or |
| 318 | /// not. |
| 319 | class PSBBlockDecoder { |
| 320 | public: |
| 321 | /// \param[in] decoder |
| 322 | /// A decoder configured to start and end within the boundaries of the |
| 323 | /// given \p psb_block. |
| 324 | /// |
| 325 | /// \param[in] psb_block |
| 326 | /// The PSB block to decode. |
| 327 | /// |
| 328 | /// \param[in] next_block_ip |
| 329 | /// The starting ip at the next PSB block of the same thread if available. |
| 330 | /// |
| 331 | /// \param[in] decoded_thread |
| 332 | /// A \a DecodedThread object where the decoded instructions will be |
| 333 | /// appended to. It might have already some instructions. |
| 334 | /// |
| 335 | /// \param[in] tsc_upper_bound |
| 336 | /// Maximum allowed value of TSCs decoded from this PSB block. |
| 337 | /// Any of this PSB's data occurring after this TSC will be excluded. |
| 338 | PSBBlockDecoder(PtInsnDecoderUP &&decoder_up, const PSBBlock &psb_block, |
| 339 | std::optional<lldb::addr_t> next_block_ip, |
| 340 | DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, |
| 341 | std::optional<DecodedThread::TSC> tsc_upper_bound) |
| 342 | : m_decoder_up(std::move(decoder_up)), m_psb_block(psb_block), |
| 343 | m_next_block_ip(next_block_ip), m_decoded_thread(decoded_thread), |
| 344 | m_anomaly_detector(*m_decoder_up, trace_intel_pt, decoded_thread), |
| 345 | m_tsc_upper_bound(tsc_upper_bound) {} |
| 346 | |
| 347 | /// \param[in] trace_intel_pt |
| 348 | /// The main Trace object that own the PSB block. |
| 349 | /// |
| 350 | /// \param[in] decoder |
| 351 | /// A decoder configured to start and end within the boundaries of the |
| 352 | /// given \p psb_block. |
| 353 | /// |
| 354 | /// \param[in] psb_block |
| 355 | /// The PSB block to decode. |
| 356 | /// |
| 357 | /// \param[in] buffer |
| 358 | /// The raw intel pt trace for this block. |
| 359 | /// |
| 360 | /// \param[in] process |
| 361 | /// The process to decode. It provides the memory image to use for |
| 362 | /// decoding. |
| 363 | /// |
| 364 | /// \param[in] next_block_ip |
| 365 | /// The starting ip at the next PSB block of the same thread if available. |
| 366 | /// |
| 367 | /// \param[in] decoded_thread |
| 368 | /// A \a DecodedThread object where the decoded instructions will be |
| 369 | /// appended to. It might have already some instructions. |
| 370 | static Expected<PSBBlockDecoder> |
| 371 | Create(TraceIntelPT &trace_intel_pt, const PSBBlock &psb_block, |
| 372 | ArrayRef<uint8_t> buffer, Process &process, |
| 373 | std::optional<lldb::addr_t> next_block_ip, |
| 374 | DecodedThread &decoded_thread, |
| 375 | std::optional<DecodedThread::TSC> tsc_upper_bound) { |
| 376 | Expected<PtInsnDecoderUP> decoder_up = |
| 377 | CreateInstructionDecoder(trace_intel_pt, buffer, process); |
| 378 | if (!decoder_up) |
| 379 | return decoder_up.takeError(); |
| 380 | |
| 381 | return PSBBlockDecoder(std::move(*decoder_up), psb_block, next_block_ip, |
| 382 | decoded_thread, trace_intel_pt, tsc_upper_bound); |
| 383 | } |
| 384 | |
| 385 | void DecodePSBBlock() { |
| 386 | int status = pt_insn_sync_forward(m_decoder_up.get()); |
| 387 | assert(status >= 0 && |
| 388 | "Synchronization shouldn't fail because this PSB was previously " |
| 389 | "decoded correctly." ); |
| 390 | |
| 391 | // We emit a TSC before a sync event to more easily associate a timestamp to |
| 392 | // the sync event. If present, the current block's TSC would be the first |
| 393 | // TSC we'll see when processing events. |
| 394 | if (m_psb_block.tsc) |
| 395 | m_decoded_thread.NotifyTsc(tsc: *m_psb_block.tsc); |
| 396 | |
| 397 | m_decoded_thread.NotifySyncPoint(psb_offset: m_psb_block.psb_offset); |
| 398 | |
| 399 | DecodeInstructionsAndEvents(status); |
| 400 | } |
| 401 | |
| 402 | private: |
| 403 | /// Append an instruction and return \b false if and only if a serious anomaly |
| 404 | /// has been detected. |
| 405 | bool AppendInstructionAndDetectAnomalies(const pt_insn &insn) { |
| 406 | m_decoded_thread.AppendInstruction(insn); |
| 407 | |
| 408 | if (Error err = m_anomaly_detector.DetectAnomaly()) { |
| 409 | m_decoded_thread.AppendCustomError(error: toString(E: std::move(err)), |
| 410 | /*fatal=*/true); |
| 411 | return false; |
| 412 | } |
| 413 | return true; |
| 414 | } |
| 415 | /// Decode all the instructions and events of the given PSB block. The |
| 416 | /// decoding loop might stop abruptly if an infinite decoding loop is |
| 417 | /// detected. |
| 418 | void DecodeInstructionsAndEvents(int status) { |
| 419 | pt_insn insn; |
| 420 | |
| 421 | while (true) { |
| 422 | status = ProcessPTEvents(status); |
| 423 | |
| 424 | if (IsLibiptError(status)) |
| 425 | return; |
| 426 | else if (IsEndOfStream(status)) |
| 427 | break; |
| 428 | |
| 429 | // The status returned by pt_insn_next will need to be processed |
| 430 | // by ProcessPTEvents in the next loop if it is not an error. |
| 431 | std::memset(s: &insn, c: 0, n: sizeof insn); |
| 432 | status = pt_insn_next(m_decoder_up.get(), &insn, sizeof(insn)); |
| 433 | |
| 434 | if (IsLibiptError(status)) { |
| 435 | m_decoded_thread.AppendError(error: IntelPTError(status, insn.ip)); |
| 436 | return; |
| 437 | } else if (IsEndOfStream(status)) { |
| 438 | break; |
| 439 | } |
| 440 | |
| 441 | if (!AppendInstructionAndDetectAnomalies(insn)) |
| 442 | return; |
| 443 | } |
| 444 | |
| 445 | // We need to keep querying non-branching instructions until we hit the |
| 446 | // starting point of the next PSB. We won't see events at this point. This |
| 447 | // is based on |
| 448 | // https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#parallel-decode |
| 449 | if (m_next_block_ip && insn.ip != 0) { |
| 450 | while (insn.ip != *m_next_block_ip) { |
| 451 | if (!AppendInstructionAndDetectAnomalies(insn)) |
| 452 | return; |
| 453 | |
| 454 | status = pt_insn_next(m_decoder_up.get(), &insn, sizeof(insn)); |
| 455 | |
| 456 | if (IsLibiptError(status)) { |
| 457 | m_decoded_thread.AppendError(error: IntelPTError(status, insn.ip)); |
| 458 | return; |
| 459 | } |
| 460 | } |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | /// Process the TSC of a decoded PT event. Specifically, check if this TSC |
| 465 | /// is below the TSC upper bound for this PSB. If the TSC exceeds the upper |
| 466 | /// bound, return an error to abort decoding. Otherwise add the it to the |
| 467 | /// underlying DecodedThread and decoding should continue as expected. |
| 468 | /// |
| 469 | /// \param[in] tsc |
| 470 | /// The TSC of the a decoded event. |
| 471 | Error ProcessPTEventTSC(DecodedThread::TSC tsc) { |
| 472 | if (m_tsc_upper_bound && tsc >= *m_tsc_upper_bound) { |
| 473 | // This event and all the remaining events of this PSB have a TSC |
| 474 | // outside the range of the "owning" ThreadContinuousExecution. For |
| 475 | // now we drop all of these events/instructions, future work can |
| 476 | // improve upon this by determining the "owning" |
| 477 | // ThreadContinuousExecution of the remaining PSB data. |
| 478 | std::string err_msg = formatv(Fmt: "decoding truncated: TSC {0} exceeds " |
| 479 | "maximum TSC value {1}, will skip decoding" |
| 480 | " the remaining data of the PSB" , |
| 481 | Vals&: tsc, Vals&: *m_tsc_upper_bound) |
| 482 | .str(); |
| 483 | |
| 484 | uint64_t offset; |
| 485 | int status = pt_insn_get_offset(m_decoder_up.get(), &offset); |
| 486 | if (!IsLibiptError(status)) { |
| 487 | err_msg = formatv(Fmt: "{2} (skipping {0} of {1} bytes)" , Vals&: offset, |
| 488 | Vals&: m_psb_block.size, Vals&: err_msg) |
| 489 | .str(); |
| 490 | } |
| 491 | m_decoded_thread.AppendCustomError(error: err_msg); |
| 492 | return createStringError(EC: inconvertibleErrorCode(), S: err_msg); |
| 493 | } else { |
| 494 | m_decoded_thread.NotifyTsc(tsc); |
| 495 | return Error::success(); |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | /// Before querying instructions, we need to query the events associated with |
| 500 | /// that instruction, e.g. timing and trace disablement events. |
| 501 | /// |
| 502 | /// \param[in] status |
| 503 | /// The status gotten from the previous instruction decoding or PSB |
| 504 | /// synchronization. |
| 505 | /// |
| 506 | /// \return |
| 507 | /// The pte_status after decoding events. |
| 508 | int ProcessPTEvents(int status) { |
| 509 | while (HasEvents(status)) { |
| 510 | pt_event event; |
| 511 | std::memset(s: &event, c: 0, n: sizeof event); |
| 512 | status = pt_insn_event(m_decoder_up.get(), &event, sizeof(event)); |
| 513 | |
| 514 | if (IsLibiptError(status)) { |
| 515 | m_decoded_thread.AppendError(error: IntelPTError(status)); |
| 516 | return status; |
| 517 | } |
| 518 | |
| 519 | if (event.has_tsc) { |
| 520 | if (Error err = ProcessPTEventTSC(tsc: event.tsc)) { |
| 521 | consumeError(Err: std::move(err)); |
| 522 | return -pte_internal; |
| 523 | } |
| 524 | } |
| 525 | |
| 526 | switch (event.type) { |
| 527 | case ptev_disabled: |
| 528 | // The CPU paused tracing the program, e.g. due to ip filtering. |
| 529 | m_decoded_thread.AppendEvent(lldb::eTraceEventDisabledHW); |
| 530 | break; |
| 531 | case ptev_async_disabled: |
| 532 | // The kernel or user code paused tracing the program, e.g. |
| 533 | // a breakpoint or a ioctl invocation pausing the trace, or a |
| 534 | // context switch happened. |
| 535 | m_decoded_thread.AppendEvent(lldb::eTraceEventDisabledSW); |
| 536 | break; |
| 537 | case ptev_overflow: |
| 538 | // The CPU internal buffer had an overflow error and some instructions |
| 539 | // were lost. A OVF packet comes with an FUP packet (harcoded address) |
| 540 | // according to the documentation, so we'll continue seeing instructions |
| 541 | // after this event. |
| 542 | m_decoded_thread.AppendError(IntelPTError(-pte_overflow)); |
| 543 | break; |
| 544 | default: |
| 545 | break; |
| 546 | } |
| 547 | } |
| 548 | |
| 549 | return status; |
| 550 | } |
| 551 | |
| 552 | private: |
| 553 | PtInsnDecoderUP m_decoder_up; |
| 554 | PSBBlock m_psb_block; |
| 555 | std::optional<lldb::addr_t> m_next_block_ip; |
| 556 | DecodedThread &m_decoded_thread; |
| 557 | PSBBlockAnomalyDetector m_anomaly_detector; |
| 558 | std::optional<DecodedThread::TSC> m_tsc_upper_bound; |
| 559 | }; |
| 560 | |
| 561 | Error lldb_private::trace_intel_pt::DecodeSingleTraceForThread( |
| 562 | DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, |
| 563 | ArrayRef<uint8_t> buffer) { |
| 564 | Expected<std::vector<PSBBlock>> blocks = |
| 565 | SplitTraceIntoPSBBlock(trace_intel_pt, buffer, /*expect_tscs=*/false); |
| 566 | if (!blocks) |
| 567 | return blocks.takeError(); |
| 568 | |
| 569 | for (size_t i = 0; i < blocks->size(); i++) { |
| 570 | PSBBlock &block = blocks->at(n: i); |
| 571 | |
| 572 | Expected<PSBBlockDecoder> decoder = PSBBlockDecoder::Create( |
| 573 | trace_intel_pt, block, buffer.slice(N: block.psb_offset, M: block.size), |
| 574 | *decoded_thread.GetThread()->GetProcess(), |
| 575 | i + 1 < blocks->size() ? blocks->at(n: i + 1).starting_ip : std::nullopt, |
| 576 | decoded_thread, std::nullopt); |
| 577 | if (!decoder) |
| 578 | return decoder.takeError(); |
| 579 | |
| 580 | decoder->DecodePSBBlock(); |
| 581 | } |
| 582 | |
| 583 | return Error::success(); |
| 584 | } |
| 585 | |
| 586 | Error lldb_private::trace_intel_pt::DecodeSystemWideTraceForThread( |
| 587 | DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, |
| 588 | const DenseMap<lldb::cpu_id_t, llvm::ArrayRef<uint8_t>> &buffers, |
| 589 | const std::vector<IntelPTThreadContinousExecution> &executions) { |
| 590 | bool has_seen_psbs = false; |
| 591 | for (size_t i = 0; i < executions.size(); i++) { |
| 592 | const IntelPTThreadContinousExecution &execution = executions[i]; |
| 593 | |
| 594 | auto variant = execution.thread_execution.variant; |
| 595 | |
| 596 | // We emit the first valid tsc |
| 597 | if (execution.psb_blocks.empty()) { |
| 598 | decoded_thread.NotifyTsc(tsc: execution.thread_execution.GetLowestKnownTSC()); |
| 599 | } else { |
| 600 | assert(execution.psb_blocks.front().tsc && |
| 601 | "per cpu decoding expects TSCs" ); |
| 602 | decoded_thread.NotifyTsc( |
| 603 | tsc: std::min(a: execution.thread_execution.GetLowestKnownTSC(), |
| 604 | b: *execution.psb_blocks.front().tsc)); |
| 605 | } |
| 606 | |
| 607 | // We then emit the CPU, which will be correctly associated with a tsc. |
| 608 | decoded_thread.NotifyCPU(cpu_id: execution.thread_execution.cpu_id); |
| 609 | |
| 610 | // If we haven't seen a PSB yet, then it's fine not to show errors |
| 611 | if (has_seen_psbs) { |
| 612 | if (execution.psb_blocks.empty()) { |
| 613 | decoded_thread.AppendCustomError( |
| 614 | error: formatv(Fmt: "Unable to find intel pt data a thread " |
| 615 | "execution on cpu id = {0}" , |
| 616 | Vals: execution.thread_execution.cpu_id) |
| 617 | .str()); |
| 618 | } |
| 619 | |
| 620 | // A hinted start is a non-initial execution that doesn't have a switch |
| 621 | // in. An only end is an initial execution that doesn't have a switch in. |
| 622 | // Any of those cases represent a gap because we have seen a PSB before. |
| 623 | if (variant == ThreadContinuousExecution::Variant::HintedStart || |
| 624 | variant == ThreadContinuousExecution::Variant::OnlyEnd) { |
| 625 | decoded_thread.AppendCustomError( |
| 626 | error: formatv(Fmt: "Unable to find the context switch in for a thread " |
| 627 | "execution on cpu id = {0}" , |
| 628 | Vals: execution.thread_execution.cpu_id) |
| 629 | .str()); |
| 630 | } |
| 631 | } |
| 632 | |
| 633 | for (size_t j = 0; j < execution.psb_blocks.size(); j++) { |
| 634 | const PSBBlock &psb_block = execution.psb_blocks[j]; |
| 635 | |
| 636 | Expected<PSBBlockDecoder> decoder = PSBBlockDecoder::Create( |
| 637 | trace_intel_pt, psb_block, |
| 638 | buffers.lookup(Val: execution.thread_execution.cpu_id) |
| 639 | .slice(N: psb_block.psb_offset, M: psb_block.size), |
| 640 | *decoded_thread.GetThread()->GetProcess(), |
| 641 | j + 1 < execution.psb_blocks.size() |
| 642 | ? execution.psb_blocks[j + 1].starting_ip |
| 643 | : std::nullopt, |
| 644 | decoded_thread, execution.thread_execution.GetEndTSC()); |
| 645 | if (!decoder) |
| 646 | return decoder.takeError(); |
| 647 | |
| 648 | has_seen_psbs = true; |
| 649 | decoder->DecodePSBBlock(); |
| 650 | } |
| 651 | |
| 652 | // If we haven't seen a PSB yet, then it's fine not to show errors |
| 653 | if (has_seen_psbs) { |
| 654 | // A hinted end is a non-ending execution that doesn't have a switch out. |
| 655 | // An only start is an ending execution that doesn't have a switch out. |
| 656 | // Any of those cases represent a gap if we still have executions to |
| 657 | // process and we have seen a PSB before. |
| 658 | if (i + 1 != executions.size() && |
| 659 | (variant == ThreadContinuousExecution::Variant::OnlyStart || |
| 660 | variant == ThreadContinuousExecution::Variant::HintedEnd)) { |
| 661 | decoded_thread.AppendCustomError( |
| 662 | error: formatv(Fmt: "Unable to find the context switch out for a thread " |
| 663 | "execution on cpu id = {0}" , |
| 664 | Vals: execution.thread_execution.cpu_id) |
| 665 | .str()); |
| 666 | } |
| 667 | } |
| 668 | } |
| 669 | return Error::success(); |
| 670 | } |
| 671 | |
| 672 | bool IntelPTThreadContinousExecution::operator<( |
| 673 | const IntelPTThreadContinousExecution &o) const { |
| 674 | // As the context switch might be incomplete, we look first for the first real |
| 675 | // PSB packet, which is a valid TSC. Otherwise, We query the thread execution |
| 676 | // itself for some tsc. |
| 677 | auto get_tsc = [](const IntelPTThreadContinousExecution &exec) { |
| 678 | return exec.psb_blocks.empty() ? exec.thread_execution.GetLowestKnownTSC() |
| 679 | : exec.psb_blocks.front().tsc; |
| 680 | }; |
| 681 | |
| 682 | return get_tsc(*this) < get_tsc(o); |
| 683 | } |
| 684 | |
| 685 | Expected<std::vector<PSBBlock>> |
| 686 | lldb_private::trace_intel_pt::SplitTraceIntoPSBBlock( |
| 687 | TraceIntelPT &trace_intel_pt, llvm::ArrayRef<uint8_t> buffer, |
| 688 | bool expect_tscs) { |
| 689 | // This follows |
| 690 | // https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#parallel-decode |
| 691 | |
| 692 | Expected<PtQueryDecoderUP> decoder_up = |
| 693 | CreateQueryDecoder(trace_intel_pt, buffer); |
| 694 | if (!decoder_up) |
| 695 | return decoder_up.takeError(); |
| 696 | |
| 697 | pt_query_decoder *decoder = decoder_up.get().get(); |
| 698 | |
| 699 | std::vector<PSBBlock> executions; |
| 700 | |
| 701 | while (true) { |
| 702 | uint64_t maybe_ip = LLDB_INVALID_ADDRESS; |
| 703 | int decoding_status = pt_qry_sync_forward(decoder, &maybe_ip); |
| 704 | if (IsLibiptError(status: decoding_status)) |
| 705 | break; |
| 706 | |
| 707 | uint64_t psb_offset; |
| 708 | int offset_status = pt_qry_get_sync_offset(decoder, &psb_offset); |
| 709 | assert(offset_status >= 0 && |
| 710 | "This can't fail because we were able to synchronize" ); |
| 711 | |
| 712 | std::optional<uint64_t> ip; |
| 713 | if (!(pts_ip_suppressed & decoding_status)) |
| 714 | ip = maybe_ip; |
| 715 | |
| 716 | std::optional<uint64_t> tsc; |
| 717 | // Now we fetch the first TSC that comes after the PSB. |
| 718 | while (HasEvents(status: decoding_status)) { |
| 719 | pt_event event; |
| 720 | decoding_status = pt_qry_event(decoder, &event, sizeof(event)); |
| 721 | if (IsLibiptError(status: decoding_status)) |
| 722 | break; |
| 723 | if (event.has_tsc) { |
| 724 | tsc = event.tsc; |
| 725 | break; |
| 726 | } |
| 727 | } |
| 728 | if (IsLibiptError(status: decoding_status)) { |
| 729 | // We continue to the next PSB. This effectively merges this PSB with the |
| 730 | // previous one, and that should be fine because this PSB might be the |
| 731 | // direct continuation of the previous thread and it's better to show an |
| 732 | // error in the decoded thread than to hide it. If this is the first PSB, |
| 733 | // we are okay losing it. Besides that, an error at processing events |
| 734 | // means that we wouldn't be able to get any instruction out of it. |
| 735 | continue; |
| 736 | } |
| 737 | |
| 738 | if (expect_tscs && !tsc) |
| 739 | return createStringError(EC: inconvertibleErrorCode(), |
| 740 | S: "Found a PSB without TSC." ); |
| 741 | |
| 742 | executions.push_back(x: { |
| 743 | .psb_offset: psb_offset, |
| 744 | .tsc: tsc, |
| 745 | .size: 0, |
| 746 | .starting_ip: ip, |
| 747 | }); |
| 748 | } |
| 749 | if (!executions.empty()) { |
| 750 | // We now adjust the sizes of each block |
| 751 | executions.back().size = buffer.size() - executions.back().psb_offset; |
| 752 | for (int i = (int)executions.size() - 2; i >= 0; i--) { |
| 753 | executions[i].size = |
| 754 | executions[i + 1].psb_offset - executions[i].psb_offset; |
| 755 | } |
| 756 | } |
| 757 | return executions; |
| 758 | } |
| 759 | |
| 760 | Expected<std::optional<uint64_t>> |
| 761 | lldb_private::trace_intel_pt::FindLowestTSCInTrace(TraceIntelPT &trace_intel_pt, |
| 762 | ArrayRef<uint8_t> buffer) { |
| 763 | Expected<PtQueryDecoderUP> decoder_up = |
| 764 | CreateQueryDecoder(trace_intel_pt, buffer); |
| 765 | if (!decoder_up) |
| 766 | return decoder_up.takeError(); |
| 767 | |
| 768 | pt_query_decoder *decoder = decoder_up.get().get(); |
| 769 | uint64_t ip = LLDB_INVALID_ADDRESS; |
| 770 | int status = pt_qry_sync_forward(decoder, &ip); |
| 771 | if (IsLibiptError(status)) |
| 772 | return std::nullopt; |
| 773 | |
| 774 | while (HasEvents(status)) { |
| 775 | pt_event event; |
| 776 | status = pt_qry_event(decoder, &event, sizeof(event)); |
| 777 | if (IsLibiptError(status)) |
| 778 | return std::nullopt; |
| 779 | if (event.has_tsc) |
| 780 | return event.tsc; |
| 781 | } |
| 782 | return std::nullopt; |
| 783 | } |
| 784 | |