| 1 | // Copyright (C) 2016 Intel Corporation. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #include "qbenchmarkperfevents_p.h" |
| 5 | #include "qbenchmarkmetric.h" |
| 6 | #include "qbenchmark_p.h" |
| 7 | |
| 8 | #ifdef QTESTLIB_USE_PERF_EVENTS |
| 9 | |
| 10 | // include the qcore_unix_p.h without core-private |
| 11 | // we only use inline functions anyway |
| 12 | #include "../corelib/kernel/qcore_unix_p.h" |
| 13 | |
| 14 | #include <sys/types.h> |
| 15 | #include <errno.h> |
| 16 | #include <fcntl.h> |
| 17 | #include <string.h> |
| 18 | #include <stdio.h> |
| 19 | |
| 20 | #include <sys/ioctl.h> |
| 21 | #include <sys/prctl.h> |
| 22 | #include <sys/syscall.h> |
| 23 | |
| 24 | #include "3rdparty/linux/perf_event_p.h" |
| 25 | |
| 26 | // for PERF_TYPE_HW_CACHE, the config is a bitmask |
| 27 | // lowest 8 bits: cache type |
| 28 | // bits 8 to 15: cache operation |
| 29 | // bits 16 to 23: cache result |
| 30 | #define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 31 | #define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 32 | #define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 33 | #define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 34 | #define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 35 | #define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 36 | #define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 37 | #define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 38 | #define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 39 | #define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 40 | #define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 41 | #define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 42 | #define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 43 | #define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 44 | #define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 45 | #define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 46 | #define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| 47 | #define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| 48 | |
| 49 | QT_BEGIN_NAMESPACE |
| 50 | |
| 51 | struct PerfEvent |
| 52 | { |
| 53 | quint32 type; |
| 54 | quint64 config; |
| 55 | }; |
| 56 | Q_GLOBAL_STATIC(QList<PerfEvent>, eventTypes); |
| 57 | |
| 58 | static QList<PerfEvent> defaultCounters() |
| 59 | { |
| 60 | return { |
| 61 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
| 62 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
| 63 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
| 64 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
| 65 | }; |
| 66 | } |
| 67 | |
| 68 | // This class does not exist in the API so it's qdoc comment marker was removed. |
| 69 | |
| 70 | /* |
| 71 | \class QBenchmarkPerfEvents |
| 72 | \brief The Linux perf events benchmark backend |
| 73 | |
| 74 | This benchmark backend uses the Linux Performance Counters interface, |
| 75 | introduced with the Linux kernel v2.6.31. The interface is done by one |
| 76 | system call (perf_event_open) which takes an attribute structure and |
| 77 | returns a file descriptor. |
| 78 | |
| 79 | More information: |
| 80 | \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt> |
| 81 | \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c> |
| 82 | (note: as of v3.3.1, the documentation is out-of-date with the kernel |
| 83 | interface, so reading the source code of existing tools is necessary) |
| 84 | |
| 85 | This benchlib backend monitors the current process as well as child process |
| 86 | launched. We do not try to benchmark in kernel or hypervisor mode, as that |
| 87 | usually requires elevated privileges. |
| 88 | */ |
| 89 | |
| 90 | static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) |
| 91 | { |
| 92 | #ifdef SYS_perf_event_open |
| 93 | // syscall() returns long, but perf_event_open() is used to get a file descriptor |
| 94 | return int(syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags)); |
| 95 | #else |
| 96 | Q_UNUSED(attr); |
| 97 | Q_UNUSED(pid); |
| 98 | Q_UNUSED(cpu); |
| 99 | Q_UNUSED(group_fd); |
| 100 | Q_UNUSED(flags); |
| 101 | errno = ENOSYS; |
| 102 | return -1; |
| 103 | #endif |
| 104 | } |
| 105 | |
| 106 | bool QBenchmarkPerfEventsMeasurer::isAvailable() |
| 107 | { |
| 108 | // this generates an EFAULT because attr == NULL if perf_event_open is available |
| 109 | // if the kernel is too old, it generates ENOSYS |
| 110 | return perf_event_open(attr: nullptr, pid: 0, cpu: 0, group_fd: 0, flags: 0) == -1 && errno != ENOSYS; |
| 111 | } |
| 112 | |
| 113 | /* Event list structure |
| 114 | The following table provides the list of supported events |
| 115 | |
| 116 | Event type Event counter Unit Name and aliases |
| 117 | HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles |
| 118 | HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles |
| 119 | HARDWARE INSTRUCTIONS Instructions instructions |
| 120 | HARDWARE CACHE_REFERENCES CacheReferences cache-references |
| 121 | HARDWARE CACHE_MISSES CacheMisses cache-misses |
| 122 | HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches |
| 123 | HARDWARE BRANCH_MISSES BranchMisses branch-misses |
| 124 | HARDWARE BUS_CYCLES BusCycles bus-cycles |
| 125 | HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend |
| 126 | HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend |
| 127 | SOFTWARE CPU_CLOCK WalltimeNanoseconds cpu-clock |
| 128 | SOFTWARE TASK_CLOCK WalltimeNanoseconds task-clock |
| 129 | SOFTWARE PAGE_FAULTS PageFaults page-faults faults |
| 130 | SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults |
| 131 | SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults |
| 132 | SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs |
| 133 | SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations |
| 134 | SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults |
| 135 | SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults |
| 136 | HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads |
| 137 | HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores |
| 138 | HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches |
| 139 | HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads |
| 140 | HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches |
| 141 | HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads |
| 142 | HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores |
| 143 | HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches |
| 144 | HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses |
| 145 | HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses |
| 146 | HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses |
| 147 | HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses |
| 148 | HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses |
| 149 | HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses |
| 150 | HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses |
| 151 | HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses |
| 152 | HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts |
| 153 | HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses |
| 154 | |
| 155 | Use the following Perl script to re-generate the list |
| 156 | === cut perl === |
| 157 | #!/usr/bin/env perl |
| 158 | # Load all entries into %map |
| 159 | while (<STDIN>) { |
| 160 | m/^\s*(.*)\s*$/; |
| 161 | @_ = split /\s+/, $1; |
| 162 | $type = shift @_; |
| 163 | $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" : |
| 164 | $type eq "SOFTWARE" ? "PERF_COUNT_SW_" : |
| 165 | $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_; |
| 166 | $unit = shift @_; |
| 167 | |
| 168 | for $string (@_) { |
| 169 | die "$string was already seen!" if defined($map{$string}); |
| 170 | $map{$string} = [-1, $type, $id, $unit]; |
| 171 | push @strings, $string; |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | # sort the map and print the string list |
| 176 | @strings = sort @strings; |
| 177 | print "static const char eventlist_strings[] = \n"; |
| 178 | $counter = 0; |
| 179 | for $entry (@strings) { |
| 180 | print " \"$entry\\0\"\n"; |
| 181 | $map{$entry}[0] = $counter; |
| 182 | $counter += 1 + length $entry; |
| 183 | } |
| 184 | |
| 185 | # print the table |
| 186 | print " \"\\0\";\n\nstatic const Events eventlist[] = {\n"; |
| 187 | for $entry (sort @strings) { |
| 188 | printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n", |
| 189 | $map{$entry}[0], |
| 190 | $map{$entry}[1], |
| 191 | $map{$entry}[2], |
| 192 | $map{$entry}[3]; |
| 193 | } |
| 194 | print "};\n"; |
| 195 | === cut perl === |
| 196 | */ |
| 197 | |
| 198 | struct Events { |
| 199 | unsigned offset; |
| 200 | quint32 type; |
| 201 | quint64 event_id; |
| 202 | QTest::QBenchmarkMetric metric; |
| 203 | }; |
| 204 | |
| 205 | /* -- BEGIN GENERATED CODE -- */ |
| 206 | static const char eventlist_strings[] = |
| 207 | "alignment-faults\0" |
| 208 | "branch-instructions\0" |
| 209 | "branch-load-misses\0" |
| 210 | "branch-loads\0" |
| 211 | "branch-mispredicts\0" |
| 212 | "branch-misses\0" |
| 213 | "branch-predicts\0" |
| 214 | "branch-read-misses\0" |
| 215 | "branch-reads\0" |
| 216 | "branches\0" |
| 217 | "bus-cycles\0" |
| 218 | "cache-misses\0" |
| 219 | "cache-references\0" |
| 220 | "context-switches\0" |
| 221 | "cpu-clock\0" |
| 222 | "cpu-cycles\0" |
| 223 | "cpu-migrations\0" |
| 224 | "cs\0" |
| 225 | "cycles\0" |
| 226 | "emulation-faults\0" |
| 227 | "faults\0" |
| 228 | "idle-cycles-backend\0" |
| 229 | "idle-cycles-frontend\0" |
| 230 | "instructions\0" |
| 231 | "l1d-cache-load-misses\0" |
| 232 | "l1d-cache-loads\0" |
| 233 | "l1d-cache-prefetch-misses\0" |
| 234 | "l1d-cache-prefetches\0" |
| 235 | "l1d-cache-read-misses\0" |
| 236 | "l1d-cache-reads\0" |
| 237 | "l1d-cache-store-misses\0" |
| 238 | "l1d-cache-stores\0" |
| 239 | "l1d-cache-write-misses\0" |
| 240 | "l1d-cache-writes\0" |
| 241 | "l1d-load-misses\0" |
| 242 | "l1d-loads\0" |
| 243 | "l1d-prefetch-misses\0" |
| 244 | "l1d-prefetches\0" |
| 245 | "l1d-read-misses\0" |
| 246 | "l1d-reads\0" |
| 247 | "l1d-store-misses\0" |
| 248 | "l1d-stores\0" |
| 249 | "l1d-write-misses\0" |
| 250 | "l1d-writes\0" |
| 251 | "l1i-cache-load-misses\0" |
| 252 | "l1i-cache-loads\0" |
| 253 | "l1i-cache-prefetch-misses\0" |
| 254 | "l1i-cache-prefetches\0" |
| 255 | "l1i-cache-read-misses\0" |
| 256 | "l1i-cache-reads\0" |
| 257 | "l1i-load-misses\0" |
| 258 | "l1i-loads\0" |
| 259 | "l1i-prefetch-misses\0" |
| 260 | "l1i-prefetches\0" |
| 261 | "l1i-read-misses\0" |
| 262 | "l1i-reads\0" |
| 263 | "llc-cache-load-misses\0" |
| 264 | "llc-cache-loads\0" |
| 265 | "llc-cache-prefetch-misses\0" |
| 266 | "llc-cache-prefetches\0" |
| 267 | "llc-cache-read-misses\0" |
| 268 | "llc-cache-reads\0" |
| 269 | "llc-cache-store-misses\0" |
| 270 | "llc-cache-stores\0" |
| 271 | "llc-cache-write-misses\0" |
| 272 | "llc-cache-writes\0" |
| 273 | "llc-load-misses\0" |
| 274 | "llc-loads\0" |
| 275 | "llc-prefetch-misses\0" |
| 276 | "llc-prefetches\0" |
| 277 | "llc-read-misses\0" |
| 278 | "llc-reads\0" |
| 279 | "llc-store-misses\0" |
| 280 | "llc-stores\0" |
| 281 | "llc-write-misses\0" |
| 282 | "llc-writes\0" |
| 283 | "major-faults\0" |
| 284 | "migrations\0" |
| 285 | "minor-faults\0" |
| 286 | "page-faults\0" |
| 287 | "ref-cycles\0" |
| 288 | "stalled-cycles-backend\0" |
| 289 | "stalled-cycles-frontend\0" |
| 290 | "task-clock\0" |
| 291 | "\0" ; |
| 292 | |
| 293 | static const Events eventlist[] = { |
| 294 | { .offset: 0, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_ALIGNMENT_FAULTS, .metric: QTest::AlignmentFaults }, |
| 295 | { .offset: 17, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions }, |
| 296 | { .offset: 37, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
| 297 | { .offset: 56, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
| 298 | { .offset: 69, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
| 299 | { .offset: 88, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_MISSES, .metric: QTest::BranchMisses }, |
| 300 | { .offset: 102, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
| 301 | { .offset: 118, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
| 302 | { .offset: 137, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
| 303 | { .offset: 150, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions }, |
| 304 | { .offset: 159, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BUS_CYCLES, .metric: QTest::BusCycles }, |
| 305 | { .offset: 170, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_MISSES, .metric: QTest::CacheMisses }, |
| 306 | { .offset: 183, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_REFERENCES, .metric: QTest::CacheReferences }, |
| 307 | { .offset: 200, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches }, |
| 308 | { .offset: 217, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_CLOCK, .metric: QTest::WalltimeNanoseconds }, |
| 309 | { .offset: 227, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles }, |
| 310 | { .offset: 238, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations }, |
| 311 | { .offset: 253, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches }, |
| 312 | { .offset: 256, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles }, |
| 313 | { .offset: 263, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_EMULATION_FAULTS, .metric: QTest::EmulationFaults }, |
| 314 | { .offset: 280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults }, |
| 315 | { .offset: 287, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles }, |
| 316 | { .offset: 307, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles }, |
| 317 | { .offset: 328, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_INSTRUCTIONS, .metric: QTest::Instructions }, |
| 318 | { .offset: 341, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
| 319 | { .offset: 363, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
| 320 | { .offset: 379, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
| 321 | { .offset: 405, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches }, |
| 322 | { .offset: 426, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
| 323 | { .offset: 448, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
| 324 | { .offset: 464, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
| 325 | { .offset: 487, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
| 326 | { .offset: 504, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
| 327 | { .offset: 527, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
| 328 | { .offset: 544, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
| 329 | { .offset: 560, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
| 330 | { .offset: 570, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
| 331 | { .offset: 590, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches }, |
| 332 | { .offset: 605, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
| 333 | { .offset: 621, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
| 334 | { .offset: 631, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
| 335 | { .offset: 648, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
| 336 | { .offset: 659, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
| 337 | { .offset: 676, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
| 338 | { .offset: 687, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
| 339 | { .offset: 709, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
| 340 | { .offset: 725, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
| 341 | { .offset: 751, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches }, |
| 342 | { .offset: 772, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
| 343 | { .offset: 794, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
| 344 | { .offset: 810, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
| 345 | { .offset: 826, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
| 346 | { .offset: 836, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
| 347 | { .offset: 856, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches }, |
| 348 | { .offset: 871, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
| 349 | { .offset: 887, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
| 350 | { .offset: 897, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
| 351 | { .offset: 919, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
| 352 | { .offset: 935, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
| 353 | { .offset: 961, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches }, |
| 354 | { .offset: 982, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
| 355 | { .offset: 1004, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
| 356 | { .offset: 1020, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
| 357 | { .offset: 1043, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
| 358 | { .offset: 1060, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
| 359 | { .offset: 1083, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
| 360 | { .offset: 1100, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
| 361 | { .offset: 1116, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
| 362 | { .offset: 1126, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
| 363 | { .offset: 1146, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches }, |
| 364 | { .offset: 1161, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
| 365 | { .offset: 1177, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
| 366 | { .offset: 1187, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
| 367 | { .offset: 1204, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
| 368 | { .offset: 1215, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
| 369 | { .offset: 1232, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
| 370 | { .offset: 1243, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MAJ, .metric: QTest::MajorPageFaults }, |
| 371 | { .offset: 1256, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations }, |
| 372 | { .offset: 1267, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MIN, .metric: QTest::MinorPageFaults }, |
| 373 | { .offset: 1280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults }, |
| 374 | { .offset: 1292, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_REF_CPU_CYCLES, .metric: QTest::RefCPUCycles }, |
| 375 | { .offset: 1303, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles }, |
| 376 | { .offset: 1326, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles }, |
| 377 | { .offset: 1350, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_TASK_CLOCK, .metric: QTest::WalltimeNanoseconds }, |
| 378 | }; |
| 379 | /* -- END GENERATED CODE -- */ |
| 380 | |
| 381 | static QTest::QBenchmarkMetric metricForEvent(PerfEvent counter) |
| 382 | { |
| 383 | for (const Events &ev : eventlist) { |
| 384 | if (ev.type == counter.type && ev.event_id == counter.config) |
| 385 | return ev.metric; |
| 386 | } |
| 387 | return QTest::Events; |
| 388 | } |
| 389 | |
| 390 | void QBenchmarkPerfEventsMeasurer::setCounter(const char *name) |
| 391 | { |
| 392 | eventTypes->clear(); |
| 393 | std::string_view input = name; |
| 394 | if (qsizetype idx = input.find(c: ':'); idx >= 0) |
| 395 | input = input.substr(pos: 0, n: idx); |
| 396 | |
| 397 | while (!input.empty()) { |
| 398 | std::string_view countername = input; |
| 399 | if (qsizetype idx = countername.find(c: ','); idx >= 0) |
| 400 | countername = countername.substr(pos: 0, n: idx); |
| 401 | |
| 402 | for (const Events &ev : eventlist) { |
| 403 | int c = countername.compare(str: eventlist_strings + ev.offset); |
| 404 | if (c > 0) |
| 405 | continue; |
| 406 | if (c < 0) { |
| 407 | fprintf(stderr, format: "ERROR: Performance counter type '%.*s' is unknown\n" , |
| 408 | int(countername.size()), countername.data()); |
| 409 | exit(status: 1); |
| 410 | } |
| 411 | eventTypes->append(t: { .type: ev.type, .config: ev.event_id }); |
| 412 | break; |
| 413 | } |
| 414 | |
| 415 | if (countername.size() == input.size()) |
| 416 | input = {}; |
| 417 | else |
| 418 | input.remove_prefix(n: countername.size() + 1); |
| 419 | } |
| 420 | |
| 421 | // We used to support attributes, but our code was the opposite of what |
| 422 | // perf(1) does, plus QBenchlib isn't exactly expected to be used to |
| 423 | // profile Linux kernel code or launch guest VMs as part of the workload. |
| 424 | // So we keep accepting the colon as a delimiter but ignore it. |
| 425 | } |
| 426 | |
| 427 | void QBenchmarkPerfEventsMeasurer::listCounters() |
| 428 | { |
| 429 | if (!isAvailable()) { |
| 430 | printf(format: "Performance counters are not available on this system\n" ); |
| 431 | return; |
| 432 | } |
| 433 | |
| 434 | printf(format: "The following performance counters are available:\n" ); |
| 435 | for (const Events &ev : eventlist) { |
| 436 | printf(format: " %-30s [%s]\n" , eventlist_strings + ev.offset, |
| 437 | ev.type == PERF_TYPE_HARDWARE ? "hardware" : |
| 438 | ev.type == PERF_TYPE_SOFTWARE ? "software" : |
| 439 | ev.type == PERF_TYPE_HW_CACHE ? "cache" : "other" ); |
| 440 | } |
| 441 | } |
| 442 | |
| 443 | QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default; |
| 444 | |
| 445 | QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer() |
| 446 | { |
| 447 | for (int fd : std::as_const(t&: fds)) |
| 448 | qt_safe_close(fd); |
| 449 | } |
| 450 | |
| 451 | void QBenchmarkPerfEventsMeasurer::start() |
| 452 | { |
| 453 | QT_WARNING_DISABLE_GCC("-Wmissing-field-initializers" ) |
| 454 | QT_WARNING_DISABLE_CLANG("-Wmissing-field-initializers" ) |
| 455 | perf_event_attr attr = { |
| 456 | .size = sizeof attr, |
| 457 | .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING, |
| 458 | .disabled = true, // we'll enable later |
| 459 | .inherit = true, // let children processes inherit the monitoring |
| 460 | .pinned = true, // keep it running in the hardware |
| 461 | .inherit_stat = true, // aggregate all the info from child processes |
| 462 | .task = true, // trace fork/exits |
| 463 | }; |
| 464 | |
| 465 | QList<PerfEvent> &counters = *eventTypes; |
| 466 | if (counters.isEmpty()) |
| 467 | counters = defaultCounters(); |
| 468 | if (fds.isEmpty()) { |
| 469 | pid_t pid = 0; // attach to the current process only |
| 470 | int cpu = -1; // on any CPU |
| 471 | int group_fd = -1; |
| 472 | int flags = PERF_FLAG_FD_CLOEXEC; |
| 473 | |
| 474 | fds.reserve(asize: counters.size()); |
| 475 | for (PerfEvent counter : std::as_const(t&: counters)) { |
| 476 | attr.type = counter.type; |
| 477 | attr.config = counter.config; |
| 478 | int fd = perf_event_open(attr: &attr, pid, cpu, group_fd, flags); |
| 479 | if (fd == -1) { |
| 480 | // probably a paranoid kernel (/proc/sys/kernel/perf_event_paranoid) |
| 481 | attr.exclude_kernel = true; |
| 482 | attr.exclude_hv = true; |
| 483 | fd = perf_event_open(attr: &attr, pid, cpu, group_fd, flags); |
| 484 | } |
| 485 | if (fd == -1) { |
| 486 | perror(s: "QBenchmarkPerfEventsMeasurer::start: perf_event_open" ); |
| 487 | exit(status: 1); |
| 488 | } |
| 489 | |
| 490 | fds.append(t: fd); |
| 491 | } |
| 492 | } |
| 493 | |
| 494 | // enable the counters |
| 495 | for (int fd : std::as_const(t&: fds)) |
| 496 | ::ioctl(fd: fd, PERF_EVENT_IOC_RESET); |
| 497 | prctl(PR_TASK_PERF_EVENTS_ENABLE); |
| 498 | } |
| 499 | |
| 500 | QList<QBenchmarkMeasurerBase::Measurement> QBenchmarkPerfEventsMeasurer::stop() |
| 501 | { |
| 502 | // disable the counters |
| 503 | prctl(PR_TASK_PERF_EVENTS_DISABLE); |
| 504 | |
| 505 | const QList<PerfEvent> &counters = *eventTypes; |
| 506 | QList<Measurement> result(counters.size(), {}); |
| 507 | for (qsizetype i = 0; i < counters.size(); ++i) { |
| 508 | result[i] = readValue(idx: i); |
| 509 | } |
| 510 | return result; |
| 511 | } |
| 512 | |
| 513 | bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(Measurement) |
| 514 | { |
| 515 | return true; |
| 516 | } |
| 517 | |
| 518 | int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int) |
| 519 | { |
| 520 | return 1; |
| 521 | } |
| 522 | |
| 523 | int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int) |
| 524 | { |
| 525 | return 1; |
| 526 | } |
| 527 | |
| 528 | static quint64 rawReadValue(int fd) |
| 529 | { |
| 530 | /* from the kernel docs: |
| 531 | * struct read_format { |
| 532 | * { u64 value; |
| 533 | * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED |
| 534 | * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING |
| 535 | * { u64 id; } && PERF_FORMAT_ID |
| 536 | * } && !PERF_FORMAT_GROUP |
| 537 | */ |
| 538 | |
| 539 | struct read_format { |
| 540 | quint64 value; |
| 541 | quint64 time_enabled; |
| 542 | quint64 time_running; |
| 543 | } results; |
| 544 | |
| 545 | size_t nread = 0; |
| 546 | while (nread < sizeof results) { |
| 547 | char *ptr = reinterpret_cast<char *>(&results); |
| 548 | qint64 r = qt_safe_read(fd, data: ptr + nread, maxlen: sizeof results - nread); |
| 549 | if (r < 0) { |
| 550 | perror(s: "QBenchmarkPerfEventsMeasurer::readValue: reading the results" ); |
| 551 | exit(status: 1); |
| 552 | } |
| 553 | nread += quint64(r); |
| 554 | } |
| 555 | |
| 556 | if (results.time_running == results.time_enabled) |
| 557 | return results.value; |
| 558 | |
| 559 | // scale the results, though this shouldn't happen! |
| 560 | return results.value * (double(results.time_running) / double(results.time_enabled)); |
| 561 | } |
| 562 | |
| 563 | QBenchmarkMeasurerBase::Measurement QBenchmarkPerfEventsMeasurer::readValue(qsizetype idx) |
| 564 | { |
| 565 | quint64 raw = rawReadValue(fd: fds.at(i: idx)); |
| 566 | return { .value: qreal(qint64(raw)), .metric: metricForEvent(counter: eventTypes->at(i: idx)) }; |
| 567 | } |
| 568 | |
| 569 | QT_END_NAMESPACE |
| 570 | |
| 571 | #endif |
| 572 | |