1//===-- Perf.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Perf.h"
10
11#include "Plugins/Process/POSIX/ProcessPOSIXLog.h"
12#include "llvm/Support/FormatVariadic.h"
13#include "llvm/Support/MathExtras.h"
14#include "llvm/Support/MemoryBuffer.h"
15#include <linux/version.h>
16#include <sys/ioctl.h>
17#include <sys/mman.h>
18#include <sys/syscall.h>
19#include <unistd.h>
20
21using namespace lldb_private;
22using namespace process_linux;
23using namespace llvm;
24
25Expected<LinuxPerfZeroTscConversion>
26lldb_private::process_linux::LoadPerfTscConversionParameters() {
27#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
28 lldb::pid_t pid = getpid();
29 perf_event_attr attr;
30 memset(s: &attr, c: 0, n: sizeof(attr));
31 attr.size = sizeof(attr);
32 attr.type = PERF_TYPE_SOFTWARE;
33 attr.config = PERF_COUNT_SW_DUMMY;
34
35 Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid);
36 if (!perf_event)
37 return perf_event.takeError();
38 if (Error mmap_err =
39 perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0,
40 /*num_aux_pages=*/0,
41 /*data_buffer_write=*/false))
42 return std::move(mmap_err);
43
44 perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage();
45 if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) {
46 return LinuxPerfZeroTscConversion{
47 .time_mult: mmap_metada.time_mult, .time_shift: mmap_metada.time_shift, .time_zero: {.value: mmap_metada.time_zero}};
48 } else {
49 auto err_cap =
50 !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero";
51 std::string err_msg =
52 llvm::formatv(Fmt: "Can't get TSC to real time conversion values. "
53 "perf_event capability '{0}' not supported.",
54 Vals&: err_cap);
55 return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), S: err_msg);
56 }
57#else
58 std::string err_msg = "PERF_COUNT_SW_DUMMY requires Linux 3.12";
59 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg);
60#endif
61}
62
63void resource_handle::MmapDeleter::operator()(void *ptr) {
64 if (m_bytes && ptr != nullptr)
65 munmap(addr: ptr, len: m_bytes);
66}
67
68void resource_handle::FileDescriptorDeleter::operator()(long *ptr) {
69 if (ptr == nullptr)
70 return;
71 if (*ptr == -1)
72 return;
73 close(fd: *ptr);
74 std::default_delete<long>()(ptr);
75}
76
77llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr,
78 std::optional<lldb::pid_t> pid,
79 std::optional<lldb::cpu_id_t> cpu,
80 std::optional<long> group_fd,
81 unsigned long flags) {
82 errno = 0;
83 long fd = syscall(SYS_perf_event_open, &attr, pid.value_or(u: -1),
84 cpu.value_or(u: -1), group_fd.value_or(u: -1), flags);
85 if (fd == -1) {
86 std::string err_msg =
87 llvm::formatv(Fmt: "perf event syscall failed: {0}", Vals: std::strerror(errno));
88 return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), S: err_msg);
89 }
90 return PerfEvent(fd, !attr.disabled);
91}
92
93llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr,
94 std::optional<lldb::pid_t> pid,
95 std::optional<lldb::cpu_id_t> cpu) {
96 return Init(attr, pid, cpu, group_fd: -1, flags: 0);
97}
98
99llvm::Expected<resource_handle::MmapUP>
100PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags,
101 long int offset, llvm::StringRef buffer_name) {
102 errno = 0;
103 auto mmap_result = ::mmap(addr: addr, len: length, prot: prot, flags: flags, fd: GetFd(), offset: offset);
104
105 if (mmap_result == MAP_FAILED) {
106 std::string err_msg =
107 llvm::formatv(Fmt: "perf event mmap allocation failed for {0}: {1}",
108 Vals&: buffer_name, Vals: std::strerror(errno));
109 return createStringError(EC: inconvertibleErrorCode(), S: err_msg);
110 }
111 return resource_handle::MmapUP(mmap_result, length);
112}
113
114llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages,
115 bool data_buffer_write) {
116 size_t mmap_size = (num_data_pages + 1) * getpagesize();
117 if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap(
118 addr: nullptr, length: mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0),
119 MAP_SHARED, offset: 0, buffer_name: "metadata and data buffer")) {
120 m_metadata_data_base = std::move(mmap_metadata_data.get());
121 return Error::success();
122 } else
123 return mmap_metadata_data.takeError();
124}
125
126llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) {
127#ifndef PERF_ATTR_SIZE_VER5
128 return createStringError(inconvertibleErrorCode(),
129 "Intel PT Linux perf event not supported");
130#else
131 if (num_aux_pages == 0)
132 return Error::success();
133
134 perf_event_mmap_page &metadata_page = GetMetadataPage();
135
136 metadata_page.aux_offset =
137 metadata_page.data_offset + metadata_page.data_size;
138 metadata_page.aux_size = num_aux_pages * getpagesize();
139
140 if (Expected<resource_handle::MmapUP> mmap_aux =
141 DoMmap(addr: nullptr, length: metadata_page.aux_size, PROT_READ, MAP_SHARED,
142 offset: metadata_page.aux_offset, buffer_name: "aux buffer")) {
143 m_aux_base = std::move(mmap_aux.get());
144 return Error::success();
145 } else
146 return mmap_aux.takeError();
147#endif
148}
149
150llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages,
151 size_t num_aux_pages,
152 bool data_buffer_write) {
153 if (num_data_pages != 0 && !isPowerOf2_64(Value: num_data_pages))
154 return llvm::createStringError(
155 EC: llvm::inconvertibleErrorCode(),
156 S: llvm::formatv(Fmt: "Number of data pages must be a power of 2, got: {0}",
157 Vals&: num_data_pages));
158 if (num_aux_pages != 0 && !isPowerOf2_64(Value: num_aux_pages))
159 return llvm::createStringError(
160 EC: llvm::inconvertibleErrorCode(),
161 S: llvm::formatv(Fmt: "Number of aux pages must be a power of 2, got: {0}",
162 Vals&: num_aux_pages));
163 if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write))
164 return err;
165 if (Error err = MmapAuxBuffer(num_aux_pages))
166 return err;
167 return Error::success();
168}
169
170long PerfEvent::GetFd() const { return *(m_fd.get()); }
171
172perf_event_mmap_page &PerfEvent::GetMetadataPage() const {
173 return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get());
174}
175
176ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const {
177#ifndef PERF_ATTR_SIZE_VER5
178 llvm_unreachable("Intel PT Linux perf event not supported");
179#else
180 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
181 return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) +
182 mmap_metadata.data_offset,
183 static_cast<size_t>(mmap_metadata.data_size)};
184#endif
185}
186
187ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const {
188#ifndef PERF_ATTR_SIZE_VER5
189 llvm_unreachable("Intel PT Linux perf event not supported");
190#else
191 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
192 return {reinterpret_cast<uint8_t *>(m_aux_base.get()),
193 static_cast<size_t>(mmap_metadata.aux_size)};
194#endif
195}
196
197Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyDataBuffer() {
198 // The following code assumes that the protection level of the DATA page
199 // is PROT_READ. If PROT_WRITE is used, then reading would require that
200 // this piece of code updates some pointers. See more about data_tail
201 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
202
203#ifndef PERF_ATTR_SIZE_VER5
204 return createStringError(inconvertibleErrorCode(),
205 "Intel PT Linux perf event not supported");
206#else
207 bool was_enabled = m_enabled;
208 if (Error err = DisableWithIoctl())
209 return std::move(err);
210
211 /**
212 * The data buffer and aux buffer have different implementations
213 * with respect to their definition of head pointer when using PROD_READ only.
214 * In the case of Aux data buffer the head always wraps around the aux buffer
215 * and we don't need to care about it, whereas the data_head keeps
216 * increasing and needs to be wrapped by modulus operator
217 */
218 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
219
220 ArrayRef<uint8_t> data = GetDataBuffer();
221 uint64_t data_head = mmap_metadata.data_head;
222 uint64_t data_size = mmap_metadata.data_size;
223 std::vector<uint8_t> output;
224 output.reserve(n: data.size());
225
226 if (data_head > data_size) {
227 uint64_t actual_data_head = data_head % data_size;
228 // The buffer has wrapped, so we first the oldest chunk of data
229 output.insert(position: output.end(), first: data.begin() + actual_data_head, last: data.end());
230 // And we read the most recent chunk of data
231 output.insert(position: output.end(), first: data.begin(), last: data.begin() + actual_data_head);
232 } else {
233 // There's been no wrapping, so we just read linearly
234 output.insert(position: output.end(), first: data.begin(), last: data.begin() + data_head);
235 }
236
237 if (was_enabled) {
238 if (Error err = EnableWithIoctl())
239 return std::move(err);
240 }
241
242 return output;
243#endif
244}
245
246Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() {
247 // The following code assumes that the protection level of the AUX page
248 // is PROT_READ. If PROT_WRITE is used, then reading would require that
249 // this piece of code updates some pointers. See more about aux_tail
250 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
251
252#ifndef PERF_ATTR_SIZE_VER5
253 return createStringError(inconvertibleErrorCode(),
254 "Intel PT Linux perf event not supported");
255#else
256 bool was_enabled = m_enabled;
257 if (Error err = DisableWithIoctl())
258 return std::move(err);
259
260 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
261
262 ArrayRef<uint8_t> data = GetAuxBuffer();
263 uint64_t aux_head = mmap_metadata.aux_head;
264 std::vector<uint8_t> output;
265 output.reserve(n: data.size());
266
267 /**
268 * When configured as ring buffer, the aux buffer keeps wrapping around
269 * the buffer and its not possible to detect how many times the buffer
270 * wrapped. Initially the buffer is filled with zeros,as shown below
271 * so in order to get complete buffer we first copy firstpartsize, followed
272 * by any left over part from beginning to aux_head
273 *
274 * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size
275 * aux_head->||<- firstpartsize ->|
276 *
277 * */
278
279 output.insert(position: output.end(), first: data.begin() + aux_head, last: data.end());
280 output.insert(position: output.end(), first: data.begin(), last: data.begin() + aux_head);
281
282 if (was_enabled) {
283 if (Error err = EnableWithIoctl())
284 return std::move(err);
285 }
286
287 return output;
288#endif
289}
290
291Error PerfEvent::DisableWithIoctl() {
292 if (!m_enabled)
293 return Error::success();
294
295 if (ioctl(fd: *m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0)
296 return createStringError(EC: inconvertibleErrorCode(),
297 Fmt: "Can't disable perf event. %s",
298 Vals: std::strerror(errno));
299
300 m_enabled = false;
301 return Error::success();
302}
303
304bool PerfEvent::IsEnabled() const { return m_enabled; }
305
306Error PerfEvent::EnableWithIoctl() {
307 if (m_enabled)
308 return Error::success();
309
310 if (ioctl(fd: *m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0)
311 return createStringError(EC: inconvertibleErrorCode(),
312 Fmt: "Can't enable perf event. %s",
313 Vals: std::strerror(errno));
314
315 m_enabled = true;
316 return Error::success();
317}
318
319size_t PerfEvent::GetEffectiveDataBufferSize() const {
320#ifndef PERF_ATTR_SIZE_VER5
321 llvm_unreachable("Intel PT Linux perf event not supported");
322#else
323 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
324 if (mmap_metadata.data_head < mmap_metadata.data_size)
325 return mmap_metadata.data_head;
326 else
327 return mmap_metadata.data_size; // The buffer has wrapped.
328#endif
329}
330
331Expected<PerfEvent>
332lldb_private::process_linux::CreateContextSwitchTracePerfEvent(
333 lldb::cpu_id_t cpu_id, const PerfEvent *parent_perf_event) {
334 Log *log = GetLog(mask: POSIXLog::Trace);
335#ifndef PERF_ATTR_SIZE_VER5
336 return createStringError(inconvertibleErrorCode(),
337 "Intel PT Linux perf event not supported");
338#else
339 perf_event_attr attr;
340 memset(s: &attr, c: 0, n: sizeof(attr));
341 attr.size = sizeof(attr);
342 attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
343 attr.type = PERF_TYPE_SOFTWARE;
344 attr.context_switch = 1;
345 attr.exclude_kernel = 1;
346 attr.sample_id_all = 1;
347 attr.exclude_hv = 1;
348 attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false;
349
350 // The given perf configuration will produce context switch records of 32
351 // bytes each. Assuming that every context switch will be emitted twice (one
352 // for context switch ins and another one for context switch outs), and that a
353 // context switch will happen at least every half a millisecond per core, we
354 // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more
355 // than what a regular intel pt trace can get. Pessimistically we pick as
356 // 32KiB for the size of our context switch trace.
357
358 uint64_t data_buffer_size = 32768;
359 uint64_t data_buffer_numpages = data_buffer_size / getpagesize();
360
361 LLDB_LOG(log, "Will create context switch trace buffer of size {0}",
362 data_buffer_size);
363
364 std::optional<long> group_fd;
365 if (parent_perf_event)
366 group_fd = parent_perf_event->GetFd();
367
368 if (Expected<PerfEvent> perf_event = PerfEvent::Init(
369 attr, /*pid=*/std::nullopt, cpu: cpu_id, group_fd, /*flags=*/0)) {
370 if (Error mmap_err = perf_event->MmapMetadataAndBuffers(
371 num_data_pages: data_buffer_numpages, num_aux_pages: 0, /*data_buffer_write=*/false)) {
372 return std::move(mmap_err);
373 }
374 return perf_event;
375 } else {
376 return perf_event.takeError();
377 }
378#endif
379}
380

source code of lldb/source/Plugins/Process/Linux/Perf.cpp