1 | //===-- Perf.cpp ----------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Perf.h" |
10 | |
11 | #include "Plugins/Process/POSIX/ProcessPOSIXLog.h" |
12 | #include "lldb/Host/linux/Support.h" |
13 | #include "llvm/Support/FormatVariadic.h" |
14 | #include "llvm/Support/MathExtras.h" |
15 | #include "llvm/Support/MemoryBuffer.h" |
16 | #include <linux/version.h> |
17 | #include <sys/ioctl.h> |
18 | #include <sys/mman.h> |
19 | #include <sys/syscall.h> |
20 | #include <unistd.h> |
21 | |
22 | using namespace lldb_private; |
23 | using namespace process_linux; |
24 | using namespace llvm; |
25 | |
26 | Expected<LinuxPerfZeroTscConversion> |
27 | lldb_private::process_linux::LoadPerfTscConversionParameters() { |
28 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) |
29 | lldb::pid_t pid = getpid(); |
30 | perf_event_attr attr; |
31 | memset(s: &attr, c: 0, n: sizeof(attr)); |
32 | attr.size = sizeof(attr); |
33 | attr.type = PERF_TYPE_SOFTWARE; |
34 | attr.config = PERF_COUNT_SW_DUMMY; |
35 | |
36 | Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid); |
37 | if (!perf_event) |
38 | return perf_event.takeError(); |
39 | if (Error mmap_err = |
40 | perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0, |
41 | /*num_aux_pages=*/0, |
42 | /*data_buffer_write=*/false)) |
43 | return std::move(mmap_err); |
44 | |
45 | perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage(); |
46 | if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) { |
47 | return LinuxPerfZeroTscConversion{ |
48 | .time_mult: mmap_metada.time_mult, .time_shift: mmap_metada.time_shift, .time_zero: {.value: mmap_metada.time_zero}}; |
49 | } else { |
50 | auto err_cap = |
51 | !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero" ; |
52 | std::string err_msg = |
53 | llvm::formatv(Fmt: "Can't get TSC to real time conversion values. " |
54 | "perf_event capability '{0}' not supported." , |
55 | Vals&: err_cap); |
56 | return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), S: err_msg); |
57 | } |
58 | #else |
59 | std::string err_msg = "PERF_COUNT_SW_DUMMY requires Linux 3.12" ; |
60 | return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); |
61 | #endif |
62 | } |
63 | |
64 | void resource_handle::MmapDeleter::operator()(void *ptr) { |
65 | if (m_bytes && ptr != nullptr) |
66 | munmap(addr: ptr, len: m_bytes); |
67 | } |
68 | |
69 | void resource_handle::FileDescriptorDeleter::operator()(long *ptr) { |
70 | if (ptr == nullptr) |
71 | return; |
72 | if (*ptr == -1) |
73 | return; |
74 | close(fd: *ptr); |
75 | std::default_delete<long>()(ptr); |
76 | } |
77 | |
78 | llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, |
79 | std::optional<lldb::pid_t> pid, |
80 | std::optional<lldb::cpu_id_t> cpu, |
81 | std::optional<long> group_fd, |
82 | unsigned long flags) { |
83 | errno = 0; |
84 | long fd = syscall(SYS_perf_event_open, &attr, pid.value_or(u: -1), |
85 | cpu.value_or(u: -1), group_fd.value_or(u: -1), flags); |
86 | if (fd == -1) { |
87 | std::string err_msg = |
88 | llvm::formatv(Fmt: "perf event syscall failed: {0}" , Vals: std::strerror(errno)); |
89 | return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), S: err_msg); |
90 | } |
91 | return PerfEvent(fd, !attr.disabled); |
92 | } |
93 | |
94 | llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, |
95 | std::optional<lldb::pid_t> pid, |
96 | std::optional<lldb::cpu_id_t> cpu) { |
97 | return Init(attr, pid, cpu, group_fd: -1, flags: 0); |
98 | } |
99 | |
100 | llvm::Expected<resource_handle::MmapUP> |
101 | PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags, |
102 | long int offset, llvm::StringRef buffer_name) { |
103 | errno = 0; |
104 | auto mmap_result = ::mmap(addr: addr, len: length, prot: prot, flags: flags, fd: GetFd(), offset: offset); |
105 | |
106 | if (mmap_result == MAP_FAILED) { |
107 | std::string err_msg = |
108 | llvm::formatv(Fmt: "perf event mmap allocation failed for {0}: {1}" , |
109 | Vals&: buffer_name, Vals: std::strerror(errno)); |
110 | return createStringError(EC: inconvertibleErrorCode(), S: err_msg); |
111 | } |
112 | return resource_handle::MmapUP(mmap_result, length); |
113 | } |
114 | |
115 | llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages, |
116 | bool data_buffer_write) { |
117 | size_t mmap_size = (num_data_pages + 1) * getpagesize(); |
118 | if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap( |
119 | addr: nullptr, length: mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0), |
120 | MAP_SHARED, offset: 0, buffer_name: "metadata and data buffer" )) { |
121 | m_metadata_data_base = std::move(mmap_metadata_data.get()); |
122 | return Error::success(); |
123 | } else |
124 | return mmap_metadata_data.takeError(); |
125 | } |
126 | |
127 | llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) { |
128 | #ifndef PERF_ATTR_SIZE_VER5 |
129 | return createStringError(inconvertibleErrorCode(), |
130 | "Intel PT Linux perf event not supported" ); |
131 | #else |
132 | if (num_aux_pages == 0) |
133 | return Error::success(); |
134 | |
135 | perf_event_mmap_page &metadata_page = GetMetadataPage(); |
136 | |
137 | metadata_page.aux_offset = |
138 | metadata_page.data_offset + metadata_page.data_size; |
139 | metadata_page.aux_size = num_aux_pages * getpagesize(); |
140 | |
141 | if (Expected<resource_handle::MmapUP> mmap_aux = |
142 | DoMmap(addr: nullptr, length: metadata_page.aux_size, PROT_READ, MAP_SHARED, |
143 | offset: metadata_page.aux_offset, buffer_name: "aux buffer" )) { |
144 | m_aux_base = std::move(mmap_aux.get()); |
145 | return Error::success(); |
146 | } else |
147 | return mmap_aux.takeError(); |
148 | #endif |
149 | } |
150 | |
151 | llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages, |
152 | size_t num_aux_pages, |
153 | bool data_buffer_write) { |
154 | if (num_data_pages != 0 && !isPowerOf2_64(Value: num_data_pages)) |
155 | return llvm::createStringError( |
156 | EC: llvm::inconvertibleErrorCode(), |
157 | S: llvm::formatv(Fmt: "Number of data pages must be a power of 2, got: {0}" , |
158 | Vals&: num_data_pages)); |
159 | if (num_aux_pages != 0 && !isPowerOf2_64(Value: num_aux_pages)) |
160 | return llvm::createStringError( |
161 | EC: llvm::inconvertibleErrorCode(), |
162 | S: llvm::formatv(Fmt: "Number of aux pages must be a power of 2, got: {0}" , |
163 | Vals&: num_aux_pages)); |
164 | if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write)) |
165 | return err; |
166 | if (Error err = MmapAuxBuffer(num_aux_pages)) |
167 | return err; |
168 | return Error::success(); |
169 | } |
170 | |
171 | long PerfEvent::GetFd() const { return *(m_fd.get()); } |
172 | |
173 | perf_event_mmap_page &PerfEvent::GetMetadataPage() const { |
174 | return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get()); |
175 | } |
176 | |
177 | ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const { |
178 | #ifndef PERF_ATTR_SIZE_VER5 |
179 | llvm_unreachable("Intel PT Linux perf event not supported" ); |
180 | #else |
181 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
182 | return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) + |
183 | mmap_metadata.data_offset, |
184 | static_cast<size_t>(mmap_metadata.data_size)}; |
185 | #endif |
186 | } |
187 | |
188 | ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const { |
189 | #ifndef PERF_ATTR_SIZE_VER5 |
190 | llvm_unreachable("Intel PT Linux perf event not supported" ); |
191 | #else |
192 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
193 | return {reinterpret_cast<uint8_t *>(m_aux_base.get()), |
194 | static_cast<size_t>(mmap_metadata.aux_size)}; |
195 | #endif |
196 | } |
197 | |
198 | Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyDataBuffer() { |
199 | // The following code assumes that the protection level of the DATA page |
200 | // is PROT_READ. If PROT_WRITE is used, then reading would require that |
201 | // this piece of code updates some pointers. See more about data_tail |
202 | // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. |
203 | |
204 | #ifndef PERF_ATTR_SIZE_VER5 |
205 | return createStringError(inconvertibleErrorCode(), |
206 | "Intel PT Linux perf event not supported" ); |
207 | #else |
208 | bool was_enabled = m_enabled; |
209 | if (Error err = DisableWithIoctl()) |
210 | return std::move(err); |
211 | |
212 | /** |
213 | * The data buffer and aux buffer have different implementations |
214 | * with respect to their definition of head pointer when using PROD_READ only. |
215 | * In the case of Aux data buffer the head always wraps around the aux buffer |
216 | * and we don't need to care about it, whereas the data_head keeps |
217 | * increasing and needs to be wrapped by modulus operator |
218 | */ |
219 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
220 | |
221 | ArrayRef<uint8_t> data = GetDataBuffer(); |
222 | uint64_t data_head = mmap_metadata.data_head; |
223 | uint64_t data_size = mmap_metadata.data_size; |
224 | std::vector<uint8_t> output; |
225 | output.reserve(n: data.size()); |
226 | |
227 | if (data_head > data_size) { |
228 | uint64_t actual_data_head = data_head % data_size; |
229 | // The buffer has wrapped, so we first the oldest chunk of data |
230 | output.insert(position: output.end(), first: data.begin() + actual_data_head, last: data.end()); |
231 | // And we read the most recent chunk of data |
232 | output.insert(position: output.end(), first: data.begin(), last: data.begin() + actual_data_head); |
233 | } else { |
234 | // There's been no wrapping, so we just read linearly |
235 | output.insert(position: output.end(), first: data.begin(), last: data.begin() + data_head); |
236 | } |
237 | |
238 | if (was_enabled) { |
239 | if (Error err = EnableWithIoctl()) |
240 | return std::move(err); |
241 | } |
242 | |
243 | return output; |
244 | #endif |
245 | } |
246 | |
247 | Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() { |
248 | // The following code assumes that the protection level of the AUX page |
249 | // is PROT_READ. If PROT_WRITE is used, then reading would require that |
250 | // this piece of code updates some pointers. See more about aux_tail |
251 | // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. |
252 | |
253 | #ifndef PERF_ATTR_SIZE_VER5 |
254 | return createStringError(inconvertibleErrorCode(), |
255 | "Intel PT Linux perf event not supported" ); |
256 | #else |
257 | bool was_enabled = m_enabled; |
258 | if (Error err = DisableWithIoctl()) |
259 | return std::move(err); |
260 | |
261 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
262 | |
263 | ArrayRef<uint8_t> data = GetAuxBuffer(); |
264 | uint64_t aux_head = mmap_metadata.aux_head; |
265 | std::vector<uint8_t> output; |
266 | output.reserve(n: data.size()); |
267 | |
268 | /** |
269 | * When configured as ring buffer, the aux buffer keeps wrapping around |
270 | * the buffer and its not possible to detect how many times the buffer |
271 | * wrapped. Initially the buffer is filled with zeros,as shown below |
272 | * so in order to get complete buffer we first copy firstpartsize, followed |
273 | * by any left over part from beginning to aux_head |
274 | * |
275 | * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size |
276 | * aux_head->||<- firstpartsize ->| |
277 | * |
278 | * */ |
279 | |
280 | output.insert(position: output.end(), first: data.begin() + aux_head, last: data.end()); |
281 | output.insert(position: output.end(), first: data.begin(), last: data.begin() + aux_head); |
282 | |
283 | if (was_enabled) { |
284 | if (Error err = EnableWithIoctl()) |
285 | return std::move(err); |
286 | } |
287 | |
288 | return output; |
289 | #endif |
290 | } |
291 | |
292 | Error PerfEvent::DisableWithIoctl() { |
293 | if (!m_enabled) |
294 | return Error::success(); |
295 | |
296 | if (ioctl(fd: *m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0) |
297 | return createStringError(EC: inconvertibleErrorCode(), |
298 | Fmt: "Can't disable perf event. %s" , |
299 | Vals: std::strerror(errno)); |
300 | |
301 | m_enabled = false; |
302 | return Error::success(); |
303 | } |
304 | |
305 | bool PerfEvent::IsEnabled() const { return m_enabled; } |
306 | |
307 | Error PerfEvent::EnableWithIoctl() { |
308 | if (m_enabled) |
309 | return Error::success(); |
310 | |
311 | if (ioctl(fd: *m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0) |
312 | return createStringError(EC: inconvertibleErrorCode(), |
313 | Fmt: "Can't enable perf event. %s" , |
314 | Vals: std::strerror(errno)); |
315 | |
316 | m_enabled = true; |
317 | return Error::success(); |
318 | } |
319 | |
320 | size_t PerfEvent::GetEffectiveDataBufferSize() const { |
321 | #ifndef PERF_ATTR_SIZE_VER5 |
322 | llvm_unreachable("Intel PT Linux perf event not supported" ); |
323 | #else |
324 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
325 | if (mmap_metadata.data_head < mmap_metadata.data_size) |
326 | return mmap_metadata.data_head; |
327 | else |
328 | return mmap_metadata.data_size; // The buffer has wrapped. |
329 | #endif |
330 | } |
331 | |
332 | Expected<PerfEvent> |
333 | lldb_private::process_linux::CreateContextSwitchTracePerfEvent( |
334 | lldb::cpu_id_t cpu_id, const PerfEvent *parent_perf_event) { |
335 | Log *log = GetLog(mask: POSIXLog::Trace); |
336 | #ifndef PERF_ATTR_SIZE_VER5 |
337 | return createStringError(inconvertibleErrorCode(), |
338 | "Intel PT Linux perf event not supported" ); |
339 | #else |
340 | perf_event_attr attr; |
341 | memset(s: &attr, c: 0, n: sizeof(attr)); |
342 | attr.size = sizeof(attr); |
343 | attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME; |
344 | attr.type = PERF_TYPE_SOFTWARE; |
345 | attr.context_switch = 1; |
346 | attr.exclude_kernel = 1; |
347 | attr.sample_id_all = 1; |
348 | attr.exclude_hv = 1; |
349 | attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false; |
350 | |
351 | // The given perf configuration will produce context switch records of 32 |
352 | // bytes each. Assuming that every context switch will be emitted twice (one |
353 | // for context switch ins and another one for context switch outs), and that a |
354 | // context switch will happen at least every half a millisecond per core, we |
355 | // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more |
356 | // than what a regular intel pt trace can get. Pessimistically we pick as |
357 | // 32KiB for the size of our context switch trace. |
358 | |
359 | uint64_t data_buffer_size = 32768; |
360 | uint64_t data_buffer_numpages = data_buffer_size / getpagesize(); |
361 | |
362 | LLDB_LOG(log, "Will create context switch trace buffer of size {0}" , |
363 | data_buffer_size); |
364 | |
365 | std::optional<long> group_fd; |
366 | if (parent_perf_event) |
367 | group_fd = parent_perf_event->GetFd(); |
368 | |
369 | if (Expected<PerfEvent> perf_event = PerfEvent::Init( |
370 | attr, /*pid=*/std::nullopt, cpu: cpu_id, group_fd, /*flags=*/0)) { |
371 | if (Error mmap_err = perf_event->MmapMetadataAndBuffers( |
372 | num_data_pages: data_buffer_numpages, num_aux_pages: 0, /*data_buffer_write=*/false)) { |
373 | return std::move(mmap_err); |
374 | } |
375 | return perf_event; |
376 | } else { |
377 | return perf_event.takeError(); |
378 | } |
379 | #endif |
380 | } |
381 | |