| 1 | //===-- PerfContextSwitchDecoder.cpp --======------------------------------===// |
| 2 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 3 | // See https://llvm.org/LICENSE.txt for license information. |
| 4 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 5 | // |
| 6 | //===----------------------------------------------------------------------===// |
| 7 | |
| 8 | #include "PerfContextSwitchDecoder.h" |
| 9 | #include <optional> |
| 10 | |
| 11 | using namespace lldb; |
| 12 | using namespace lldb_private; |
| 13 | using namespace lldb_private::trace_intel_pt; |
| 14 | using namespace llvm; |
| 15 | |
| 16 | /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on |
| 17 | /// non-linux platforms. |
| 18 | /// \{ |
| 19 | #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) |
| 20 | |
| 21 | #define PERF_RECORD_LOST 2 |
| 22 | #define PERF_RECORD_THROTTLE 5 |
| 23 | #define PERF_RECORD_UNTHROTTLE 6 |
| 24 | #define PERF_RECORD_LOST_SAMPLES 13 |
| 25 | #define PERF_RECORD_SWITCH_CPU_WIDE 15 |
| 26 | #define PERF_RECORD_MAX 19 |
| 27 | |
| 28 | struct { |
| 29 | uint32_t ; |
| 30 | uint16_t ; |
| 31 | uint16_t ; |
| 32 | |
| 33 | /// \return |
| 34 | /// An \a llvm::Error if the record looks obviously wrong, or \a |
| 35 | /// llvm::Error::success() otherwise. |
| 36 | Error () const { |
| 37 | // The following checks are based on visual inspection of the records and |
| 38 | // enums in |
| 39 | // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h |
| 40 | // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records |
| 41 | // hold. |
| 42 | |
| 43 | // A record of too many uint64_t's or more should mean that the data is |
| 44 | // wrong |
| 45 | const uint64_t max_valid_size_bytes = 8000; |
| 46 | if (size == 0 || size > max_valid_size_bytes) |
| 47 | return createStringError( |
| 48 | EC: inconvertibleErrorCode(), |
| 49 | S: formatv(Fmt: "A record of {0} bytes was found." , Vals: size)); |
| 50 | |
| 51 | // We add some numbers to PERF_RECORD_MAX because some systems might have |
| 52 | // custom records. In any case, we are looking only for abnormal data. |
| 53 | if (type >= PERF_RECORD_MAX + 100) |
| 54 | return createStringError( |
| 55 | EC: inconvertibleErrorCode(), |
| 56 | S: formatv(Fmt: "Invalid record type {0} was found." , Vals: type)); |
| 57 | return Error::success(); |
| 58 | } |
| 59 | |
| 60 | bool () const { |
| 61 | return type == PERF_RECORD_SWITCH_CPU_WIDE; |
| 62 | } |
| 63 | |
| 64 | bool () const { |
| 65 | return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE || |
| 66 | type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES; |
| 67 | } |
| 68 | }; |
| 69 | /// \} |
| 70 | |
| 71 | /// Record found in the perf_event context switch traces. It might contain |
| 72 | /// additional fields in memory, but header.size should have the actual size |
| 73 | /// of the record. |
| 74 | struct PerfContextSwitchRecord { |
| 75 | struct perf_event_header ; |
| 76 | uint32_t next_prev_pid; |
| 77 | uint32_t next_prev_tid; |
| 78 | uint32_t pid, tid; |
| 79 | uint64_t time_in_nanos; |
| 80 | |
| 81 | bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } |
| 82 | }; |
| 83 | |
| 84 | /// Record produced after parsing the raw context switch trace produce by |
| 85 | /// perf_event. A major difference between this struct and |
| 86 | /// PerfContextSwitchRecord is that this one uses tsc instead of nanos. |
| 87 | struct ContextSwitchRecord { |
| 88 | uint64_t tsc; |
| 89 | /// Whether the switch is in or out |
| 90 | bool is_out; |
| 91 | /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally |
| 92 | /// runs after a context switch out of a normal user thread. |
| 93 | lldb::pid_t pid; |
| 94 | lldb::tid_t tid; |
| 95 | |
| 96 | bool IsOut() const { return is_out; } |
| 97 | |
| 98 | bool IsIn() const { return !is_out; } |
| 99 | }; |
| 100 | |
| 101 | uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const { |
| 102 | switch (variant) { |
| 103 | case Variant::Complete: |
| 104 | return tscs.complete.start; |
| 105 | case Variant::OnlyStart: |
| 106 | return tscs.only_start.start; |
| 107 | case Variant::OnlyEnd: |
| 108 | return tscs.only_end.end; |
| 109 | case Variant::HintedEnd: |
| 110 | return tscs.hinted_end.start; |
| 111 | case Variant::HintedStart: |
| 112 | return tscs.hinted_start.end; |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | uint64_t ThreadContinuousExecution::GetStartTSC() const { |
| 117 | switch (variant) { |
| 118 | case Variant::Complete: |
| 119 | return tscs.complete.start; |
| 120 | case Variant::OnlyStart: |
| 121 | return tscs.only_start.start; |
| 122 | case Variant::OnlyEnd: |
| 123 | return 0; |
| 124 | case Variant::HintedEnd: |
| 125 | return tscs.hinted_end.start; |
| 126 | case Variant::HintedStart: |
| 127 | return tscs.hinted_start.hinted_start; |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | uint64_t ThreadContinuousExecution::GetEndTSC() const { |
| 132 | switch (variant) { |
| 133 | case Variant::Complete: |
| 134 | return tscs.complete.end; |
| 135 | case Variant::OnlyStart: |
| 136 | return std::numeric_limits<uint64_t>::max(); |
| 137 | case Variant::OnlyEnd: |
| 138 | return tscs.only_end.end; |
| 139 | case Variant::HintedEnd: |
| 140 | return tscs.hinted_end.hinted_end; |
| 141 | case Variant::HintedStart: |
| 142 | return tscs.hinted_start.end; |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( |
| 147 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, |
| 148 | uint64_t end) { |
| 149 | ThreadContinuousExecution o(cpu_id, tid, pid); |
| 150 | o.variant = Variant::Complete; |
| 151 | o.tscs.complete.start = start; |
| 152 | o.tscs.complete.end = end; |
| 153 | return o; |
| 154 | } |
| 155 | |
| 156 | ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( |
| 157 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, |
| 158 | uint64_t hinted_start, uint64_t end) { |
| 159 | ThreadContinuousExecution o(cpu_id, tid, pid); |
| 160 | o.variant = Variant::HintedStart; |
| 161 | o.tscs.hinted_start.hinted_start = hinted_start; |
| 162 | o.tscs.hinted_start.end = end; |
| 163 | return o; |
| 164 | } |
| 165 | |
| 166 | ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( |
| 167 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, |
| 168 | uint64_t hinted_end) { |
| 169 | ThreadContinuousExecution o(cpu_id, tid, pid); |
| 170 | o.variant = Variant::HintedEnd; |
| 171 | o.tscs.hinted_end.start = start; |
| 172 | o.tscs.hinted_end.hinted_end = hinted_end; |
| 173 | return o; |
| 174 | } |
| 175 | |
| 176 | ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( |
| 177 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) { |
| 178 | ThreadContinuousExecution o(cpu_id, tid, pid); |
| 179 | o.variant = Variant::OnlyEnd; |
| 180 | o.tscs.only_end.end = end; |
| 181 | return o; |
| 182 | } |
| 183 | |
| 184 | ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( |
| 185 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) { |
| 186 | ThreadContinuousExecution o(cpu_id, tid, pid); |
| 187 | o.variant = Variant::OnlyStart; |
| 188 | o.tscs.only_start.start = start; |
| 189 | return o; |
| 190 | } |
| 191 | |
| 192 | static Error RecoverExecutionsFromConsecutiveRecords( |
| 193 | cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion, |
| 194 | const ContextSwitchRecord ¤t_record, |
| 195 | const std::optional<ContextSwitchRecord> &prev_record, |
| 196 | std::function<void(const ThreadContinuousExecution &execution)> |
| 197 | on_new_execution) { |
| 198 | if (!prev_record) { |
| 199 | if (current_record.IsOut()) { |
| 200 | on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution( |
| 201 | cpu_id, tid: current_record.tid, pid: current_record.pid, end: current_record.tsc)); |
| 202 | } |
| 203 | // The 'in' case will be handled later when we try to look for its end |
| 204 | return Error::success(); |
| 205 | } |
| 206 | |
| 207 | const ContextSwitchRecord &prev = *prev_record; |
| 208 | if (prev.tsc >= current_record.tsc) |
| 209 | return createStringError( |
| 210 | EC: inconvertibleErrorCode(), |
| 211 | S: formatv(Fmt: "A context switch record doesn't happen after the previous " |
| 212 | "record. Previous TSC= {0}, current TSC = {1}." , |
| 213 | Vals: prev.tsc, Vals: current_record.tsc)); |
| 214 | |
| 215 | if (current_record.IsIn() && prev.IsIn()) { |
| 216 | // We found two consecutive ins, which means that we didn't capture |
| 217 | // the end of the previous execution. |
| 218 | on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( |
| 219 | cpu_id, tid: prev.tid, pid: prev.pid, start: prev.tsc, hinted_end: current_record.tsc - 1)); |
| 220 | } else if (current_record.IsOut() && prev.IsOut()) { |
| 221 | // We found two consecutive outs, that means that we didn't capture |
| 222 | // the beginning of the current execution. |
| 223 | on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( |
| 224 | cpu_id, tid: current_record.tid, pid: current_record.pid, hinted_start: prev.tsc + 1, |
| 225 | end: current_record.tsc)); |
| 226 | } else if (current_record.IsOut() && prev.IsIn()) { |
| 227 | if (current_record.pid == prev.pid && current_record.tid == prev.tid) { |
| 228 | /// A complete execution |
| 229 | on_new_execution(ThreadContinuousExecution::CreateCompleteExecution( |
| 230 | cpu_id, tid: current_record.tid, pid: current_record.pid, start: prev.tsc, |
| 231 | end: current_record.tsc)); |
| 232 | } else { |
| 233 | // An out after the in of a different thread. The first one doesn't |
| 234 | // have an end, and the second one doesn't have a start. |
| 235 | on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( |
| 236 | cpu_id, tid: prev.tid, pid: prev.pid, start: prev.tsc, hinted_end: current_record.tsc - 1)); |
| 237 | on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( |
| 238 | cpu_id, tid: current_record.tid, pid: current_record.pid, hinted_start: prev.tsc + 1, |
| 239 | end: current_record.tsc)); |
| 240 | } |
| 241 | } |
| 242 | return Error::success(); |
| 243 | } |
| 244 | |
| 245 | Expected<std::vector<ThreadContinuousExecution>> |
| 246 | lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( |
| 247 | ArrayRef<uint8_t> data, cpu_id_t cpu_id, |
| 248 | const LinuxPerfZeroTscConversion &tsc_conversion) { |
| 249 | |
| 250 | std::vector<ThreadContinuousExecution> executions; |
| 251 | |
| 252 | // This offset is used to create the error message in case of failures. |
| 253 | size_t offset = 0; |
| 254 | |
| 255 | auto do_decode = [&]() -> Error { |
| 256 | std::optional<ContextSwitchRecord> prev_record; |
| 257 | while (offset < data.size()) { |
| 258 | const perf_event_header &perf_record = |
| 259 | *reinterpret_cast<const perf_event_header *>(data.data() + offset); |
| 260 | if (Error err = perf_record.SanityCheck()) |
| 261 | return err; |
| 262 | |
| 263 | if (perf_record.IsContextSwitchRecord()) { |
| 264 | const PerfContextSwitchRecord &context_switch_record = |
| 265 | *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + |
| 266 | offset); |
| 267 | ContextSwitchRecord record{ |
| 268 | .tsc: tsc_conversion.ToTSC(nanos: context_switch_record.time_in_nanos), |
| 269 | .is_out: context_switch_record.IsOut(), |
| 270 | .pid: static_cast<lldb::pid_t>(context_switch_record.pid), |
| 271 | .tid: static_cast<lldb::tid_t>(context_switch_record.tid)}; |
| 272 | |
| 273 | if (Error err = RecoverExecutionsFromConsecutiveRecords( |
| 274 | cpu_id, tsc_conversion, current_record: record, prev_record, |
| 275 | on_new_execution: [&](const ThreadContinuousExecution &execution) { |
| 276 | executions.push_back(x: execution); |
| 277 | })) |
| 278 | return err; |
| 279 | |
| 280 | prev_record = record; |
| 281 | } |
| 282 | offset += perf_record.size; |
| 283 | } |
| 284 | |
| 285 | // We might have an incomplete last record |
| 286 | if (prev_record && prev_record->IsIn()) |
| 287 | executions.push_back(x: ThreadContinuousExecution::CreateOnlyStartExecution( |
| 288 | cpu_id, tid: prev_record->tid, pid: prev_record->pid, start: prev_record->tsc)); |
| 289 | return Error::success(); |
| 290 | }; |
| 291 | |
| 292 | if (Error err = do_decode()) |
| 293 | return createStringError(EC: inconvertibleErrorCode(), |
| 294 | S: formatv(Fmt: "Malformed perf context switch trace for " |
| 295 | "cpu {0} at offset {1}. {2}" , |
| 296 | Vals&: cpu_id, Vals&: offset, Vals: toString(E: std::move(err)))); |
| 297 | |
| 298 | return executions; |
| 299 | } |
| 300 | |
| 301 | Expected<std::vector<uint8_t>> |
| 302 | lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace( |
| 303 | llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) { |
| 304 | size_t offset = 0; |
| 305 | std::vector<uint8_t> out_data; |
| 306 | |
| 307 | while (offset < data.size()) { |
| 308 | const perf_event_header &perf_record = |
| 309 | *reinterpret_cast<const perf_event_header *>(data.data() + offset); |
| 310 | if (Error err = perf_record.SanityCheck()) |
| 311 | return std::move(err); |
| 312 | bool should_copy = false; |
| 313 | if (perf_record.IsContextSwitchRecord()) { |
| 314 | const PerfContextSwitchRecord &context_switch_record = |
| 315 | *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + |
| 316 | offset); |
| 317 | if (pids.count(x: context_switch_record.pid)) |
| 318 | should_copy = true; |
| 319 | } else if (perf_record.IsErrorRecord()) { |
| 320 | should_copy = true; |
| 321 | } |
| 322 | |
| 323 | if (should_copy) { |
| 324 | for (size_t i = 0; i < perf_record.size; i++) { |
| 325 | out_data.push_back(x: data[offset + i]); |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | offset += perf_record.size; |
| 330 | } |
| 331 | return out_data; |
| 332 | } |
| 333 | |