1 | //===-- PerfContextSwitchDecoder.cpp --======------------------------------===// |
2 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
3 | // See https://llvm.org/LICENSE.txt for license information. |
4 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 | // |
6 | //===----------------------------------------------------------------------===// |
7 | |
8 | #include "PerfContextSwitchDecoder.h" |
9 | #include <optional> |
10 | |
11 | using namespace lldb; |
12 | using namespace lldb_private; |
13 | using namespace lldb_private::trace_intel_pt; |
14 | using namespace llvm; |
15 | |
16 | /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on |
17 | /// non-linux platforms. |
18 | /// \{ |
19 | #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) |
20 | |
21 | #define PERF_RECORD_LOST 2 |
22 | #define PERF_RECORD_THROTTLE 5 |
23 | #define PERF_RECORD_UNTHROTTLE 6 |
24 | #define PERF_RECORD_LOST_SAMPLES 13 |
25 | #define PERF_RECORD_SWITCH_CPU_WIDE 15 |
26 | #define PERF_RECORD_MAX 19 |
27 | |
28 | struct { |
29 | uint32_t ; |
30 | uint16_t ; |
31 | uint16_t ; |
32 | |
33 | /// \return |
34 | /// An \a llvm::Error if the record looks obviously wrong, or \a |
35 | /// llvm::Error::success() otherwise. |
36 | Error () const { |
37 | // The following checks are based on visual inspection of the records and |
38 | // enums in |
39 | // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h |
40 | // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records |
41 | // hold. |
42 | |
43 | // A record of too many uint64_t's or more should mean that the data is |
44 | // wrong |
45 | const uint64_t max_valid_size_bytes = 8000; |
46 | if (size == 0 || size > max_valid_size_bytes) |
47 | return createStringError( |
48 | EC: inconvertibleErrorCode(), |
49 | S: formatv(Fmt: "A record of {0} bytes was found." , Vals: size)); |
50 | |
51 | // We add some numbers to PERF_RECORD_MAX because some systems might have |
52 | // custom records. In any case, we are looking only for abnormal data. |
53 | if (type >= PERF_RECORD_MAX + 100) |
54 | return createStringError( |
55 | EC: inconvertibleErrorCode(), |
56 | S: formatv(Fmt: "Invalid record type {0} was found." , Vals: type)); |
57 | return Error::success(); |
58 | } |
59 | |
60 | bool () const { |
61 | return type == PERF_RECORD_SWITCH_CPU_WIDE; |
62 | } |
63 | |
64 | bool () const { |
65 | return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE || |
66 | type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES; |
67 | } |
68 | }; |
69 | /// \} |
70 | |
71 | /// Record found in the perf_event context switch traces. It might contain |
72 | /// additional fields in memory, but header.size should have the actual size |
73 | /// of the record. |
74 | struct PerfContextSwitchRecord { |
75 | struct perf_event_header ; |
76 | uint32_t next_prev_pid; |
77 | uint32_t next_prev_tid; |
78 | uint32_t pid, tid; |
79 | uint64_t time_in_nanos; |
80 | |
81 | bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } |
82 | }; |
83 | |
84 | /// Record produced after parsing the raw context switch trace produce by |
85 | /// perf_event. A major difference between this struct and |
86 | /// PerfContextSwitchRecord is that this one uses tsc instead of nanos. |
87 | struct ContextSwitchRecord { |
88 | uint64_t tsc; |
89 | /// Whether the switch is in or out |
90 | bool is_out; |
91 | /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally |
92 | /// runs after a context switch out of a normal user thread. |
93 | lldb::pid_t pid; |
94 | lldb::tid_t tid; |
95 | |
96 | bool IsOut() const { return is_out; } |
97 | |
98 | bool IsIn() const { return !is_out; } |
99 | }; |
100 | |
101 | uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const { |
102 | switch (variant) { |
103 | case Variant::Complete: |
104 | return tscs.complete.start; |
105 | case Variant::OnlyStart: |
106 | return tscs.only_start.start; |
107 | case Variant::OnlyEnd: |
108 | return tscs.only_end.end; |
109 | case Variant::HintedEnd: |
110 | return tscs.hinted_end.start; |
111 | case Variant::HintedStart: |
112 | return tscs.hinted_start.end; |
113 | } |
114 | } |
115 | |
116 | uint64_t ThreadContinuousExecution::GetStartTSC() const { |
117 | switch (variant) { |
118 | case Variant::Complete: |
119 | return tscs.complete.start; |
120 | case Variant::OnlyStart: |
121 | return tscs.only_start.start; |
122 | case Variant::OnlyEnd: |
123 | return 0; |
124 | case Variant::HintedEnd: |
125 | return tscs.hinted_end.start; |
126 | case Variant::HintedStart: |
127 | return tscs.hinted_start.hinted_start; |
128 | } |
129 | } |
130 | |
131 | uint64_t ThreadContinuousExecution::GetEndTSC() const { |
132 | switch (variant) { |
133 | case Variant::Complete: |
134 | return tscs.complete.end; |
135 | case Variant::OnlyStart: |
136 | return std::numeric_limits<uint64_t>::max(); |
137 | case Variant::OnlyEnd: |
138 | return tscs.only_end.end; |
139 | case Variant::HintedEnd: |
140 | return tscs.hinted_end.hinted_end; |
141 | case Variant::HintedStart: |
142 | return tscs.hinted_start.end; |
143 | } |
144 | } |
145 | |
146 | ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( |
147 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, |
148 | uint64_t end) { |
149 | ThreadContinuousExecution o(cpu_id, tid, pid); |
150 | o.variant = Variant::Complete; |
151 | o.tscs.complete.start = start; |
152 | o.tscs.complete.end = end; |
153 | return o; |
154 | } |
155 | |
156 | ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( |
157 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, |
158 | uint64_t hinted_start, uint64_t end) { |
159 | ThreadContinuousExecution o(cpu_id, tid, pid); |
160 | o.variant = Variant::HintedStart; |
161 | o.tscs.hinted_start.hinted_start = hinted_start; |
162 | o.tscs.hinted_start.end = end; |
163 | return o; |
164 | } |
165 | |
166 | ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( |
167 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, |
168 | uint64_t hinted_end) { |
169 | ThreadContinuousExecution o(cpu_id, tid, pid); |
170 | o.variant = Variant::HintedEnd; |
171 | o.tscs.hinted_end.start = start; |
172 | o.tscs.hinted_end.hinted_end = hinted_end; |
173 | return o; |
174 | } |
175 | |
176 | ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( |
177 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) { |
178 | ThreadContinuousExecution o(cpu_id, tid, pid); |
179 | o.variant = Variant::OnlyEnd; |
180 | o.tscs.only_end.end = end; |
181 | return o; |
182 | } |
183 | |
184 | ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( |
185 | lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) { |
186 | ThreadContinuousExecution o(cpu_id, tid, pid); |
187 | o.variant = Variant::OnlyStart; |
188 | o.tscs.only_start.start = start; |
189 | return o; |
190 | } |
191 | |
192 | static Error RecoverExecutionsFromConsecutiveRecords( |
193 | cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion, |
194 | const ContextSwitchRecord ¤t_record, |
195 | const std::optional<ContextSwitchRecord> &prev_record, |
196 | std::function<void(const ThreadContinuousExecution &execution)> |
197 | on_new_execution) { |
198 | if (!prev_record) { |
199 | if (current_record.IsOut()) { |
200 | on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution( |
201 | cpu_id, tid: current_record.tid, pid: current_record.pid, end: current_record.tsc)); |
202 | } |
203 | // The 'in' case will be handled later when we try to look for its end |
204 | return Error::success(); |
205 | } |
206 | |
207 | const ContextSwitchRecord &prev = *prev_record; |
208 | if (prev.tsc >= current_record.tsc) |
209 | return createStringError( |
210 | EC: inconvertibleErrorCode(), |
211 | S: formatv(Fmt: "A context switch record doesn't happen after the previous " |
212 | "record. Previous TSC= {0}, current TSC = {1}." , |
213 | Vals: prev.tsc, Vals: current_record.tsc)); |
214 | |
215 | if (current_record.IsIn() && prev.IsIn()) { |
216 | // We found two consecutive ins, which means that we didn't capture |
217 | // the end of the previous execution. |
218 | on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( |
219 | cpu_id, tid: prev.tid, pid: prev.pid, start: prev.tsc, hinted_end: current_record.tsc - 1)); |
220 | } else if (current_record.IsOut() && prev.IsOut()) { |
221 | // We found two consecutive outs, that means that we didn't capture |
222 | // the beginning of the current execution. |
223 | on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( |
224 | cpu_id, tid: current_record.tid, pid: current_record.pid, hinted_start: prev.tsc + 1, |
225 | end: current_record.tsc)); |
226 | } else if (current_record.IsOut() && prev.IsIn()) { |
227 | if (current_record.pid == prev.pid && current_record.tid == prev.tid) { |
228 | /// A complete execution |
229 | on_new_execution(ThreadContinuousExecution::CreateCompleteExecution( |
230 | cpu_id, tid: current_record.tid, pid: current_record.pid, start: prev.tsc, |
231 | end: current_record.tsc)); |
232 | } else { |
233 | // An out after the in of a different thread. The first one doesn't |
234 | // have an end, and the second one doesn't have a start. |
235 | on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( |
236 | cpu_id, tid: prev.tid, pid: prev.pid, start: prev.tsc, hinted_end: current_record.tsc - 1)); |
237 | on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( |
238 | cpu_id, tid: current_record.tid, pid: current_record.pid, hinted_start: prev.tsc + 1, |
239 | end: current_record.tsc)); |
240 | } |
241 | } |
242 | return Error::success(); |
243 | } |
244 | |
245 | Expected<std::vector<ThreadContinuousExecution>> |
246 | lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( |
247 | ArrayRef<uint8_t> data, cpu_id_t cpu_id, |
248 | const LinuxPerfZeroTscConversion &tsc_conversion) { |
249 | |
250 | std::vector<ThreadContinuousExecution> executions; |
251 | |
252 | // This offset is used to create the error message in case of failures. |
253 | size_t offset = 0; |
254 | |
255 | auto do_decode = [&]() -> Error { |
256 | std::optional<ContextSwitchRecord> prev_record; |
257 | while (offset < data.size()) { |
258 | const perf_event_header &perf_record = |
259 | *reinterpret_cast<const perf_event_header *>(data.data() + offset); |
260 | if (Error err = perf_record.SanityCheck()) |
261 | return err; |
262 | |
263 | if (perf_record.IsContextSwitchRecord()) { |
264 | const PerfContextSwitchRecord &context_switch_record = |
265 | *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + |
266 | offset); |
267 | ContextSwitchRecord record{ |
268 | .tsc: tsc_conversion.ToTSC(nanos: context_switch_record.time_in_nanos), |
269 | .is_out: context_switch_record.IsOut(), |
270 | .pid: static_cast<lldb::pid_t>(context_switch_record.pid), |
271 | .tid: static_cast<lldb::tid_t>(context_switch_record.tid)}; |
272 | |
273 | if (Error err = RecoverExecutionsFromConsecutiveRecords( |
274 | cpu_id, tsc_conversion, current_record: record, prev_record, |
275 | on_new_execution: [&](const ThreadContinuousExecution &execution) { |
276 | executions.push_back(x: execution); |
277 | })) |
278 | return err; |
279 | |
280 | prev_record = record; |
281 | } |
282 | offset += perf_record.size; |
283 | } |
284 | |
285 | // We might have an incomplete last record |
286 | if (prev_record && prev_record->IsIn()) |
287 | executions.push_back(x: ThreadContinuousExecution::CreateOnlyStartExecution( |
288 | cpu_id, tid: prev_record->tid, pid: prev_record->pid, start: prev_record->tsc)); |
289 | return Error::success(); |
290 | }; |
291 | |
292 | if (Error err = do_decode()) |
293 | return createStringError(EC: inconvertibleErrorCode(), |
294 | S: formatv(Fmt: "Malformed perf context switch trace for " |
295 | "cpu {0} at offset {1}. {2}" , |
296 | Vals&: cpu_id, Vals&: offset, Vals: toString(E: std::move(err)))); |
297 | |
298 | return executions; |
299 | } |
300 | |
301 | Expected<std::vector<uint8_t>> |
302 | lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace( |
303 | llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) { |
304 | size_t offset = 0; |
305 | std::vector<uint8_t> out_data; |
306 | |
307 | while (offset < data.size()) { |
308 | const perf_event_header &perf_record = |
309 | *reinterpret_cast<const perf_event_header *>(data.data() + offset); |
310 | if (Error err = perf_record.SanityCheck()) |
311 | return std::move(err); |
312 | bool should_copy = false; |
313 | if (perf_record.IsContextSwitchRecord()) { |
314 | const PerfContextSwitchRecord &context_switch_record = |
315 | *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + |
316 | offset); |
317 | if (pids.count(x: context_switch_record.pid)) |
318 | should_copy = true; |
319 | } else if (perf_record.IsErrorRecord()) { |
320 | should_copy = true; |
321 | } |
322 | |
323 | if (should_copy) { |
324 | for (size_t i = 0; i < perf_record.size; i++) { |
325 | out_data.push_back(x: data[offset + i]); |
326 | } |
327 | } |
328 | |
329 | offset += perf_record.size; |
330 | } |
331 | return out_data; |
332 | } |
333 | |