| 1 | //===-- Perf.cpp ----------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "Perf.h" |
| 10 | |
| 11 | #include "Plugins/Process/POSIX/ProcessPOSIXLog.h" |
| 12 | #include "llvm/Support/FormatVariadic.h" |
| 13 | #include "llvm/Support/MathExtras.h" |
| 14 | #include "llvm/Support/MemoryBuffer.h" |
| 15 | #include <linux/version.h> |
| 16 | #include <sys/ioctl.h> |
| 17 | #include <sys/mman.h> |
| 18 | #include <sys/syscall.h> |
| 19 | #include <unistd.h> |
| 20 | |
| 21 | using namespace lldb_private; |
| 22 | using namespace process_linux; |
| 23 | using namespace llvm; |
| 24 | |
| 25 | Expected<LinuxPerfZeroTscConversion> |
| 26 | lldb_private::process_linux::LoadPerfTscConversionParameters() { |
| 27 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) |
| 28 | lldb::pid_t pid = getpid(); |
| 29 | perf_event_attr attr; |
| 30 | memset(s: &attr, c: 0, n: sizeof(attr)); |
| 31 | attr.size = sizeof(attr); |
| 32 | attr.type = PERF_TYPE_SOFTWARE; |
| 33 | attr.config = PERF_COUNT_SW_DUMMY; |
| 34 | |
| 35 | Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid); |
| 36 | if (!perf_event) |
| 37 | return perf_event.takeError(); |
| 38 | if (Error mmap_err = |
| 39 | perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0, |
| 40 | /*num_aux_pages=*/0, |
| 41 | /*data_buffer_write=*/false)) |
| 42 | return std::move(mmap_err); |
| 43 | |
| 44 | perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage(); |
| 45 | if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) { |
| 46 | return LinuxPerfZeroTscConversion{ |
| 47 | .time_mult: mmap_metada.time_mult, .time_shift: mmap_metada.time_shift, .time_zero: {.value: mmap_metada.time_zero}}; |
| 48 | } else { |
| 49 | auto err_cap = |
| 50 | !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero" ; |
| 51 | std::string err_msg = |
| 52 | llvm::formatv(Fmt: "Can't get TSC to real time conversion values. " |
| 53 | "perf_event capability '{0}' not supported." , |
| 54 | Vals&: err_cap); |
| 55 | return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), S: err_msg); |
| 56 | } |
| 57 | #else |
| 58 | std::string err_msg = "PERF_COUNT_SW_DUMMY requires Linux 3.12" ; |
| 59 | return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); |
| 60 | #endif |
| 61 | } |
| 62 | |
| 63 | void resource_handle::MmapDeleter::operator()(void *ptr) { |
| 64 | if (m_bytes && ptr != nullptr) |
| 65 | munmap(addr: ptr, len: m_bytes); |
| 66 | } |
| 67 | |
| 68 | void resource_handle::FileDescriptorDeleter::operator()(long *ptr) { |
| 69 | if (ptr == nullptr) |
| 70 | return; |
| 71 | if (*ptr == -1) |
| 72 | return; |
| 73 | close(fd: *ptr); |
| 74 | std::default_delete<long>()(ptr); |
| 75 | } |
| 76 | |
| 77 | llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, |
| 78 | std::optional<lldb::pid_t> pid, |
| 79 | std::optional<lldb::cpu_id_t> cpu, |
| 80 | std::optional<long> group_fd, |
| 81 | unsigned long flags) { |
| 82 | errno = 0; |
| 83 | long fd = syscall(SYS_perf_event_open, &attr, pid.value_or(u: -1), |
| 84 | cpu.value_or(u: -1), group_fd.value_or(u: -1), flags); |
| 85 | if (fd == -1) { |
| 86 | std::string err_msg = |
| 87 | llvm::formatv(Fmt: "perf event syscall failed: {0}" , Vals: std::strerror(errno)); |
| 88 | return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), S: err_msg); |
| 89 | } |
| 90 | return PerfEvent(fd, !attr.disabled); |
| 91 | } |
| 92 | |
| 93 | llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, |
| 94 | std::optional<lldb::pid_t> pid, |
| 95 | std::optional<lldb::cpu_id_t> cpu) { |
| 96 | return Init(attr, pid, cpu, group_fd: -1, flags: 0); |
| 97 | } |
| 98 | |
| 99 | llvm::Expected<resource_handle::MmapUP> |
| 100 | PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags, |
| 101 | long int offset, llvm::StringRef buffer_name) { |
| 102 | errno = 0; |
| 103 | auto mmap_result = ::mmap(addr: addr, len: length, prot: prot, flags: flags, fd: GetFd(), offset: offset); |
| 104 | |
| 105 | if (mmap_result == MAP_FAILED) { |
| 106 | std::string err_msg = |
| 107 | llvm::formatv(Fmt: "perf event mmap allocation failed for {0}: {1}" , |
| 108 | Vals&: buffer_name, Vals: std::strerror(errno)); |
| 109 | return createStringError(EC: inconvertibleErrorCode(), S: err_msg); |
| 110 | } |
| 111 | return resource_handle::MmapUP(mmap_result, length); |
| 112 | } |
| 113 | |
| 114 | llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages, |
| 115 | bool data_buffer_write) { |
| 116 | size_t mmap_size = (num_data_pages + 1) * getpagesize(); |
| 117 | if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap( |
| 118 | addr: nullptr, length: mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0), |
| 119 | MAP_SHARED, offset: 0, buffer_name: "metadata and data buffer" )) { |
| 120 | m_metadata_data_base = std::move(mmap_metadata_data.get()); |
| 121 | return Error::success(); |
| 122 | } else |
| 123 | return mmap_metadata_data.takeError(); |
| 124 | } |
| 125 | |
| 126 | llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) { |
| 127 | #ifndef PERF_ATTR_SIZE_VER5 |
| 128 | return createStringError(inconvertibleErrorCode(), |
| 129 | "Intel PT Linux perf event not supported" ); |
| 130 | #else |
| 131 | if (num_aux_pages == 0) |
| 132 | return Error::success(); |
| 133 | |
| 134 | perf_event_mmap_page &metadata_page = GetMetadataPage(); |
| 135 | |
| 136 | metadata_page.aux_offset = |
| 137 | metadata_page.data_offset + metadata_page.data_size; |
| 138 | metadata_page.aux_size = num_aux_pages * getpagesize(); |
| 139 | |
| 140 | if (Expected<resource_handle::MmapUP> mmap_aux = |
| 141 | DoMmap(addr: nullptr, length: metadata_page.aux_size, PROT_READ, MAP_SHARED, |
| 142 | offset: metadata_page.aux_offset, buffer_name: "aux buffer" )) { |
| 143 | m_aux_base = std::move(mmap_aux.get()); |
| 144 | return Error::success(); |
| 145 | } else |
| 146 | return mmap_aux.takeError(); |
| 147 | #endif |
| 148 | } |
| 149 | |
| 150 | llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages, |
| 151 | size_t num_aux_pages, |
| 152 | bool data_buffer_write) { |
| 153 | if (num_data_pages != 0 && !isPowerOf2_64(Value: num_data_pages)) |
| 154 | return llvm::createStringError( |
| 155 | EC: llvm::inconvertibleErrorCode(), |
| 156 | S: llvm::formatv(Fmt: "Number of data pages must be a power of 2, got: {0}" , |
| 157 | Vals&: num_data_pages)); |
| 158 | if (num_aux_pages != 0 && !isPowerOf2_64(Value: num_aux_pages)) |
| 159 | return llvm::createStringError( |
| 160 | EC: llvm::inconvertibleErrorCode(), |
| 161 | S: llvm::formatv(Fmt: "Number of aux pages must be a power of 2, got: {0}" , |
| 162 | Vals&: num_aux_pages)); |
| 163 | if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write)) |
| 164 | return err; |
| 165 | if (Error err = MmapAuxBuffer(num_aux_pages)) |
| 166 | return err; |
| 167 | return Error::success(); |
| 168 | } |
| 169 | |
| 170 | long PerfEvent::GetFd() const { return *(m_fd.get()); } |
| 171 | |
| 172 | perf_event_mmap_page &PerfEvent::GetMetadataPage() const { |
| 173 | return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get()); |
| 174 | } |
| 175 | |
| 176 | ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const { |
| 177 | #ifndef PERF_ATTR_SIZE_VER5 |
| 178 | llvm_unreachable("Intel PT Linux perf event not supported" ); |
| 179 | #else |
| 180 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
| 181 | return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) + |
| 182 | mmap_metadata.data_offset, |
| 183 | static_cast<size_t>(mmap_metadata.data_size)}; |
| 184 | #endif |
| 185 | } |
| 186 | |
| 187 | ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const { |
| 188 | #ifndef PERF_ATTR_SIZE_VER5 |
| 189 | llvm_unreachable("Intel PT Linux perf event not supported" ); |
| 190 | #else |
| 191 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
| 192 | return {reinterpret_cast<uint8_t *>(m_aux_base.get()), |
| 193 | static_cast<size_t>(mmap_metadata.aux_size)}; |
| 194 | #endif |
| 195 | } |
| 196 | |
| 197 | Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyDataBuffer() { |
| 198 | // The following code assumes that the protection level of the DATA page |
| 199 | // is PROT_READ. If PROT_WRITE is used, then reading would require that |
| 200 | // this piece of code updates some pointers. See more about data_tail |
| 201 | // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. |
| 202 | |
| 203 | #ifndef PERF_ATTR_SIZE_VER5 |
| 204 | return createStringError(inconvertibleErrorCode(), |
| 205 | "Intel PT Linux perf event not supported" ); |
| 206 | #else |
| 207 | bool was_enabled = m_enabled; |
| 208 | if (Error err = DisableWithIoctl()) |
| 209 | return std::move(err); |
| 210 | |
| 211 | /** |
| 212 | * The data buffer and aux buffer have different implementations |
| 213 | * with respect to their definition of head pointer when using PROD_READ only. |
| 214 | * In the case of Aux data buffer the head always wraps around the aux buffer |
| 215 | * and we don't need to care about it, whereas the data_head keeps |
| 216 | * increasing and needs to be wrapped by modulus operator |
| 217 | */ |
| 218 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
| 219 | |
| 220 | ArrayRef<uint8_t> data = GetDataBuffer(); |
| 221 | uint64_t data_head = mmap_metadata.data_head; |
| 222 | uint64_t data_size = mmap_metadata.data_size; |
| 223 | std::vector<uint8_t> output; |
| 224 | output.reserve(n: data.size()); |
| 225 | |
| 226 | if (data_head > data_size) { |
| 227 | uint64_t actual_data_head = data_head % data_size; |
| 228 | // The buffer has wrapped, so we first the oldest chunk of data |
| 229 | output.insert(position: output.end(), first: data.begin() + actual_data_head, last: data.end()); |
| 230 | // And we read the most recent chunk of data |
| 231 | output.insert(position: output.end(), first: data.begin(), last: data.begin() + actual_data_head); |
| 232 | } else { |
| 233 | // There's been no wrapping, so we just read linearly |
| 234 | output.insert(position: output.end(), first: data.begin(), last: data.begin() + data_head); |
| 235 | } |
| 236 | |
| 237 | if (was_enabled) { |
| 238 | if (Error err = EnableWithIoctl()) |
| 239 | return std::move(err); |
| 240 | } |
| 241 | |
| 242 | return output; |
| 243 | #endif |
| 244 | } |
| 245 | |
| 246 | Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() { |
| 247 | // The following code assumes that the protection level of the AUX page |
| 248 | // is PROT_READ. If PROT_WRITE is used, then reading would require that |
| 249 | // this piece of code updates some pointers. See more about aux_tail |
| 250 | // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. |
| 251 | |
| 252 | #ifndef PERF_ATTR_SIZE_VER5 |
| 253 | return createStringError(inconvertibleErrorCode(), |
| 254 | "Intel PT Linux perf event not supported" ); |
| 255 | #else |
| 256 | bool was_enabled = m_enabled; |
| 257 | if (Error err = DisableWithIoctl()) |
| 258 | return std::move(err); |
| 259 | |
| 260 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
| 261 | |
| 262 | ArrayRef<uint8_t> data = GetAuxBuffer(); |
| 263 | uint64_t aux_head = mmap_metadata.aux_head; |
| 264 | std::vector<uint8_t> output; |
| 265 | output.reserve(n: data.size()); |
| 266 | |
| 267 | /** |
| 268 | * When configured as ring buffer, the aux buffer keeps wrapping around |
| 269 | * the buffer and its not possible to detect how many times the buffer |
| 270 | * wrapped. Initially the buffer is filled with zeros,as shown below |
| 271 | * so in order to get complete buffer we first copy firstpartsize, followed |
| 272 | * by any left over part from beginning to aux_head |
| 273 | * |
| 274 | * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size |
| 275 | * aux_head->||<- firstpartsize ->| |
| 276 | * |
| 277 | * */ |
| 278 | |
| 279 | output.insert(position: output.end(), first: data.begin() + aux_head, last: data.end()); |
| 280 | output.insert(position: output.end(), first: data.begin(), last: data.begin() + aux_head); |
| 281 | |
| 282 | if (was_enabled) { |
| 283 | if (Error err = EnableWithIoctl()) |
| 284 | return std::move(err); |
| 285 | } |
| 286 | |
| 287 | return output; |
| 288 | #endif |
| 289 | } |
| 290 | |
| 291 | Error PerfEvent::DisableWithIoctl() { |
| 292 | if (!m_enabled) |
| 293 | return Error::success(); |
| 294 | |
| 295 | if (ioctl(fd: *m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0) |
| 296 | return createStringError(EC: inconvertibleErrorCode(), |
| 297 | Fmt: "Can't disable perf event. %s" , |
| 298 | Vals: std::strerror(errno)); |
| 299 | |
| 300 | m_enabled = false; |
| 301 | return Error::success(); |
| 302 | } |
| 303 | |
| 304 | bool PerfEvent::IsEnabled() const { return m_enabled; } |
| 305 | |
| 306 | Error PerfEvent::EnableWithIoctl() { |
| 307 | if (m_enabled) |
| 308 | return Error::success(); |
| 309 | |
| 310 | if (ioctl(fd: *m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0) |
| 311 | return createStringError(EC: inconvertibleErrorCode(), |
| 312 | Fmt: "Can't enable perf event. %s" , |
| 313 | Vals: std::strerror(errno)); |
| 314 | |
| 315 | m_enabled = true; |
| 316 | return Error::success(); |
| 317 | } |
| 318 | |
| 319 | size_t PerfEvent::GetEffectiveDataBufferSize() const { |
| 320 | #ifndef PERF_ATTR_SIZE_VER5 |
| 321 | llvm_unreachable("Intel PT Linux perf event not supported" ); |
| 322 | #else |
| 323 | perf_event_mmap_page &mmap_metadata = GetMetadataPage(); |
| 324 | if (mmap_metadata.data_head < mmap_metadata.data_size) |
| 325 | return mmap_metadata.data_head; |
| 326 | else |
| 327 | return mmap_metadata.data_size; // The buffer has wrapped. |
| 328 | #endif |
| 329 | } |
| 330 | |
| 331 | Expected<PerfEvent> |
| 332 | lldb_private::process_linux::CreateContextSwitchTracePerfEvent( |
| 333 | lldb::cpu_id_t cpu_id, const PerfEvent *parent_perf_event) { |
| 334 | Log *log = GetLog(mask: POSIXLog::Trace); |
| 335 | #ifndef PERF_ATTR_SIZE_VER5 |
| 336 | return createStringError(inconvertibleErrorCode(), |
| 337 | "Intel PT Linux perf event not supported" ); |
| 338 | #else |
| 339 | perf_event_attr attr; |
| 340 | memset(s: &attr, c: 0, n: sizeof(attr)); |
| 341 | attr.size = sizeof(attr); |
| 342 | attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME; |
| 343 | attr.type = PERF_TYPE_SOFTWARE; |
| 344 | attr.context_switch = 1; |
| 345 | attr.exclude_kernel = 1; |
| 346 | attr.sample_id_all = 1; |
| 347 | attr.exclude_hv = 1; |
| 348 | attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false; |
| 349 | |
| 350 | // The given perf configuration will produce context switch records of 32 |
| 351 | // bytes each. Assuming that every context switch will be emitted twice (one |
| 352 | // for context switch ins and another one for context switch outs), and that a |
| 353 | // context switch will happen at least every half a millisecond per core, we |
| 354 | // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more |
| 355 | // than what a regular intel pt trace can get. Pessimistically we pick as |
| 356 | // 32KiB for the size of our context switch trace. |
| 357 | |
| 358 | uint64_t data_buffer_size = 32768; |
| 359 | uint64_t data_buffer_numpages = data_buffer_size / getpagesize(); |
| 360 | |
| 361 | LLDB_LOG(log, "Will create context switch trace buffer of size {0}" , |
| 362 | data_buffer_size); |
| 363 | |
| 364 | std::optional<long> group_fd; |
| 365 | if (parent_perf_event) |
| 366 | group_fd = parent_perf_event->GetFd(); |
| 367 | |
| 368 | if (Expected<PerfEvent> perf_event = PerfEvent::Init( |
| 369 | attr, /*pid=*/std::nullopt, cpu: cpu_id, group_fd, /*flags=*/0)) { |
| 370 | if (Error mmap_err = perf_event->MmapMetadataAndBuffers( |
| 371 | num_data_pages: data_buffer_numpages, num_aux_pages: 0, /*data_buffer_write=*/false)) { |
| 372 | return std::move(mmap_err); |
| 373 | } |
| 374 | return perf_event; |
| 375 | } else { |
| 376 | return perf_event.takeError(); |
| 377 | } |
| 378 | #endif |
| 379 | } |
| 380 | |