| 1 | //===-- DecodedThread.cpp -------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "DecodedThread.h" |
| 10 | #include "TraceCursorIntelPT.h" |
| 11 | #include <intel-pt.h> |
| 12 | #include <memory> |
| 13 | #include <optional> |
| 14 | |
| 15 | using namespace lldb; |
| 16 | using namespace lldb_private; |
| 17 | using namespace lldb_private::trace_intel_pt; |
| 18 | using namespace llvm; |
| 19 | |
| 20 | char IntelPTError::ID; |
| 21 | |
| 22 | IntelPTError::IntelPTError(int libipt_error_code, lldb::addr_t address) |
| 23 | : m_libipt_error_code(libipt_error_code), m_address(address) { |
| 24 | assert(libipt_error_code < 0); |
| 25 | } |
| 26 | |
| 27 | void IntelPTError::log(llvm::raw_ostream &OS) const { |
| 28 | OS << pt_errstr(pt_errcode(m_libipt_error_code)); |
| 29 | if (m_address != LLDB_INVALID_ADDRESS && m_address > 0) |
| 30 | OS << formatv(Fmt: ": {0:x+16}" , Vals: m_address); |
| 31 | } |
| 32 | |
| 33 | bool DecodedThread::TSCRange::InRange(uint64_t item_index) const { |
| 34 | return item_index >= first_item_index && |
| 35 | item_index < first_item_index + items_count; |
| 36 | } |
| 37 | |
| 38 | bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const { |
| 39 | return item_index >= first_item_index && |
| 40 | item_index < first_item_index + items_count; |
| 41 | } |
| 42 | |
| 43 | double DecodedThread::NanosecondsRange::GetInterpolatedTime( |
| 44 | uint64_t item_index, uint64_t begin_of_time_nanos, |
| 45 | const LinuxPerfZeroTscConversion &tsc_conversion) const { |
| 46 | uint64_t items_since_last_tsc = item_index - first_item_index; |
| 47 | |
| 48 | auto interpolate = [&](uint64_t next_range_start_ns) { |
| 49 | if (next_range_start_ns == nanos) { |
| 50 | // If the resolution of the conversion formula is bad enough to consider |
| 51 | // these two timestamps as equal, then we just increase the next one by 1 |
| 52 | // for correction |
| 53 | next_range_start_ns++; |
| 54 | } |
| 55 | long double item_duration = |
| 56 | static_cast<long double>(items_count) / (next_range_start_ns - nanos); |
| 57 | return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration; |
| 58 | }; |
| 59 | |
| 60 | if (!next_range) { |
| 61 | // If this is the last TSC range, so we have to extrapolate. In this case, |
| 62 | // we assume that each instruction took one TSC, which is what an |
| 63 | // instruction would take if no parallelism is achieved and the frequency |
| 64 | // multiplier is 1. |
| 65 | return interpolate(tsc_conversion.ToNanos(tsc: tsc + items_count)); |
| 66 | } |
| 67 | if (items_count < (next_range->tsc - tsc)) { |
| 68 | // If the numbers of items in this range is less than the total TSC duration |
| 69 | // of this range, i.e. each instruction taking longer than 1 TSC, then we |
| 70 | // can assume that something else happened between these TSCs (e.g. a |
| 71 | // context switch, change to kernel, decoding errors, etc). In this case, we |
| 72 | // also assume that each instruction took 1 TSC. A proper way to improve |
| 73 | // this would be to analize the next events in the trace looking for context |
| 74 | // switches or trace disablement events, but for now, as we only want an |
| 75 | // approximation, we keep it simple. We are also guaranteed that the time in |
| 76 | // nanos of the next range is different to the current one, just because of |
| 77 | // the definition of a NanosecondsRange. |
| 78 | return interpolate( |
| 79 | std::min(a: tsc_conversion.ToNanos(tsc: tsc + items_count), b: next_range->nanos)); |
| 80 | } |
| 81 | |
| 82 | // In this case, each item took less than 1 TSC, so some parallelism was |
| 83 | // achieved, which is an indication that we didn't suffered of any kind of |
| 84 | // interruption. |
| 85 | return interpolate(next_range->nanos); |
| 86 | } |
| 87 | |
| 88 | uint64_t DecodedThread::GetItemsCount() const { return m_item_data.size(); } |
| 89 | |
| 90 | lldb::addr_t |
| 91 | DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const { |
| 92 | return std::get<lldb::addr_t>(v: m_item_data[item_index]); |
| 93 | } |
| 94 | |
| 95 | lldb::addr_t |
| 96 | DecodedThread::GetSyncPointOffsetByIndex(uint64_t item_index) const { |
| 97 | return m_psb_offsets.find(Val: item_index)->second; |
| 98 | } |
| 99 | |
| 100 | ThreadSP DecodedThread::GetThread() { return m_thread_sp; } |
| 101 | |
| 102 | template <typename Data> |
| 103 | DecodedThread::TraceItemStorage & |
| 104 | DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind, Data &&data) { |
| 105 | m_item_data.emplace_back(data); |
| 106 | |
| 107 | if (m_last_tsc) |
| 108 | (*m_last_tsc)->second.items_count++; |
| 109 | if (m_last_nanoseconds) |
| 110 | (*m_last_nanoseconds)->second.items_count++; |
| 111 | |
| 112 | return m_item_data.back(); |
| 113 | } |
| 114 | |
| 115 | void DecodedThread::NotifySyncPoint(lldb::addr_t psb_offset) { |
| 116 | m_psb_offsets.try_emplace(Key: GetItemsCount(), Args&: psb_offset); |
| 117 | AppendEvent(lldb::eTraceEventSyncPoint); |
| 118 | } |
| 119 | |
| 120 | void DecodedThread::NotifyTsc(TSC tsc) { |
| 121 | if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc) |
| 122 | return; |
| 123 | if (m_last_tsc) |
| 124 | assert(tsc >= (*m_last_tsc)->second.tsc && |
| 125 | "We can't have decreasing times" ); |
| 126 | |
| 127 | m_last_tsc = |
| 128 | m_tscs.emplace(args: GetItemsCount(), args: TSCRange{.tsc: tsc, .items_count: 0, .first_item_index: GetItemsCount()}).first; |
| 129 | |
| 130 | if (m_tsc_conversion) { |
| 131 | uint64_t nanos = m_tsc_conversion->ToNanos(tsc); |
| 132 | if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) { |
| 133 | m_last_nanoseconds = |
| 134 | m_nanoseconds |
| 135 | .emplace(args: GetItemsCount(), args: NanosecondsRange{.nanos: nanos, .tsc: tsc, .next_range: nullptr, .items_count: 0, |
| 136 | .first_item_index: GetItemsCount()}) |
| 137 | .first; |
| 138 | if (*m_last_nanoseconds != m_nanoseconds.begin()) { |
| 139 | auto prev_range = prev(x: *m_last_nanoseconds); |
| 140 | prev_range->second.next_range = &(*m_last_nanoseconds)->second; |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | AppendEvent(lldb::eTraceEventHWClockTick); |
| 145 | } |
| 146 | |
| 147 | void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) { |
| 148 | if (!m_last_cpu || *m_last_cpu != cpu_id) { |
| 149 | m_cpus.emplace(args: GetItemsCount(), args&: cpu_id); |
| 150 | m_last_cpu = cpu_id; |
| 151 | AppendEvent(lldb::eTraceEventCPUChanged); |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | lldb::cpu_id_t DecodedThread::GetCPUByIndex(uint64_t item_index) const { |
| 156 | auto it = m_cpus.upper_bound(x: item_index); |
| 157 | return it == m_cpus.begin() ? LLDB_INVALID_CPU_ID : prev(x: it)->second; |
| 158 | } |
| 159 | |
| 160 | std::optional<DecodedThread::TSCRange> |
| 161 | DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const { |
| 162 | auto next_it = m_tscs.upper_bound(x: item_index); |
| 163 | if (next_it == m_tscs.begin()) |
| 164 | return std::nullopt; |
| 165 | return prev(x: next_it)->second; |
| 166 | } |
| 167 | |
| 168 | std::optional<DecodedThread::NanosecondsRange> |
| 169 | DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) { |
| 170 | auto next_it = m_nanoseconds.upper_bound(x: item_index); |
| 171 | if (next_it == m_nanoseconds.begin()) |
| 172 | return std::nullopt; |
| 173 | return prev(x: next_it)->second; |
| 174 | } |
| 175 | |
| 176 | uint64_t DecodedThread::GetTotalInstructionCount() const { |
| 177 | return m_insn_count; |
| 178 | } |
| 179 | |
| 180 | void DecodedThread::AppendEvent(lldb::TraceEvent event) { |
| 181 | CreateNewTraceItem(kind: lldb::eTraceItemKindEvent, data&: event); |
| 182 | m_events_stats.RecordEvent(event); |
| 183 | } |
| 184 | |
| 185 | void DecodedThread::AppendInstruction(const pt_insn &insn) { |
| 186 | CreateNewTraceItem(lldb::eTraceItemKindInstruction, insn.ip); |
| 187 | m_insn_count++; |
| 188 | } |
| 189 | |
| 190 | void DecodedThread::AppendError(const IntelPTError &error) { |
| 191 | CreateNewTraceItem(kind: lldb::eTraceItemKindError, data: error.message()); |
| 192 | m_error_stats.RecordError(/*fatal=*/false); |
| 193 | } |
| 194 | |
| 195 | void DecodedThread::AppendCustomError(StringRef err, bool fatal) { |
| 196 | CreateNewTraceItem(kind: lldb::eTraceItemKindError, data: err.str()); |
| 197 | m_error_stats.RecordError(fatal); |
| 198 | } |
| 199 | |
| 200 | lldb::TraceEvent DecodedThread::GetEventByIndex(int item_index) const { |
| 201 | return std::get<lldb::TraceEvent>(v: m_item_data[item_index]); |
| 202 | } |
| 203 | |
| 204 | const DecodedThread::EventsStats &DecodedThread::GetEventsStats() const { |
| 205 | return m_events_stats; |
| 206 | } |
| 207 | |
| 208 | void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) { |
| 209 | events_counts[event]++; |
| 210 | total_count++; |
| 211 | } |
| 212 | |
| 213 | uint64_t DecodedThread::ErrorStats::GetTotalCount() const { |
| 214 | uint64_t total = 0; |
| 215 | for (const auto &[kind, count] : libipt_errors) |
| 216 | total += count; |
| 217 | |
| 218 | return total + other_errors + fatal_errors; |
| 219 | } |
| 220 | |
| 221 | void DecodedThread::ErrorStats::RecordError(bool fatal) { |
| 222 | if (fatal) |
| 223 | fatal_errors++; |
| 224 | else |
| 225 | other_errors++; |
| 226 | } |
| 227 | |
| 228 | void DecodedThread::ErrorStats::RecordError(int libipt_error_code) { |
| 229 | libipt_errors[pt_errstr(pt_errcode(libipt_error_code))]++; |
| 230 | } |
| 231 | |
| 232 | const DecodedThread::ErrorStats &DecodedThread::GetErrorStats() const { |
| 233 | return m_error_stats; |
| 234 | } |
| 235 | |
| 236 | lldb::TraceItemKind |
| 237 | DecodedThread::GetItemKindByIndex(uint64_t item_index) const { |
| 238 | return std::visit( |
| 239 | visitor: llvm::makeVisitor( |
| 240 | Callables: [](const std::string &) { return lldb::eTraceItemKindError; }, |
| 241 | Callables: [](lldb::TraceEvent) { return lldb::eTraceItemKindEvent; }, |
| 242 | Callables: [](lldb::addr_t) { return lldb::eTraceItemKindInstruction; }), |
| 243 | variants: m_item_data[item_index]); |
| 244 | } |
| 245 | |
| 246 | llvm::StringRef DecodedThread::GetErrorByIndex(uint64_t item_index) const { |
| 247 | if (item_index >= m_item_data.size()) |
| 248 | return llvm::StringRef(); |
| 249 | return std::get<std::string>(v: m_item_data[item_index]); |
| 250 | } |
| 251 | |
| 252 | DecodedThread::DecodedThread( |
| 253 | ThreadSP thread_sp, |
| 254 | const std::optional<LinuxPerfZeroTscConversion> &tsc_conversion) |
| 255 | : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {} |
| 256 | |
| 257 | size_t DecodedThread::CalculateApproximateMemoryUsage() const { |
| 258 | return sizeof(TraceItemStorage) * m_item_data.size() + |
| 259 | (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() + |
| 260 | (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() + |
| 261 | (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); |
| 262 | } |
| 263 | |