1 | // Copyright (C) 2016 Intel Corporation. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qbenchmarkperfevents_p.h" |
5 | #include "qbenchmarkmetric.h" |
6 | #include "qbenchmark_p.h" |
7 | |
8 | #ifdef QTESTLIB_USE_PERF_EVENTS |
9 | |
10 | // include the qcore_unix_p.h without core-private |
11 | // we only use inline functions anyway |
12 | #include "../corelib/kernel/qcore_unix_p.h" |
13 | |
14 | #include <sys/types.h> |
15 | #include <errno.h> |
16 | #include <fcntl.h> |
17 | #include <string.h> |
18 | #include <stdio.h> |
19 | |
20 | #include <sys/ioctl.h> |
21 | #include <sys/prctl.h> |
22 | #include <sys/syscall.h> |
23 | |
24 | #include "3rdparty/linux_perf_event_p.h" |
25 | |
26 | // for PERF_TYPE_HW_CACHE, the config is a bitmask |
27 | // lowest 8 bits: cache type |
28 | // bits 8 to 15: cache operation |
29 | // bits 16 to 23: cache result |
30 | #define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
31 | #define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
32 | #define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
33 | #define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
34 | #define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
35 | #define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
36 | #define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
37 | #define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
38 | #define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
39 | #define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
40 | #define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
41 | #define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
42 | #define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
43 | #define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
44 | #define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
45 | #define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
46 | #define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
47 | #define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
48 | |
49 | QT_BEGIN_NAMESPACE |
50 | |
51 | struct PerfEvent |
52 | { |
53 | quint32 type; |
54 | quint64 config; |
55 | }; |
56 | static perf_event_attr attr; |
57 | Q_GLOBAL_STATIC(QList<PerfEvent>, eventTypes); |
58 | |
59 | static void initPerf() |
60 | { |
61 | static bool done; |
62 | if (!done) { |
63 | memset(s: &attr, c: 0, n: sizeof attr); |
64 | attr.size = sizeof attr; |
65 | attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; |
66 | attr.disabled = true; // we'll enable later |
67 | attr.inherit = true; // let children processes inherit the monitoring |
68 | attr.pinned = true; // keep it running in the hardware |
69 | attr.inherit_stat = true; // aggregate all the info from child processes |
70 | attr.task = true; // trace fork/exits |
71 | |
72 | done = true; |
73 | } |
74 | } |
75 | |
76 | static QList<PerfEvent> defaultCounters() |
77 | { |
78 | return { |
79 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
80 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
81 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
82 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
83 | }; |
84 | } |
85 | |
86 | // This class does not exist in the API so it's qdoc comment marker was removed. |
87 | |
88 | /* |
89 | \class QBenchmarkPerfEvents |
90 | \brief The Linux perf events benchmark backend |
91 | |
92 | This benchmark backend uses the Linux Performance Counters interface, |
93 | introduced with the Linux kernel v2.6.31. The interface is done by one |
94 | system call (perf_event_open) which takes an attribute structure and |
95 | returns a file descriptor. |
96 | |
97 | More information: |
98 | \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt> |
99 | \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c> |
100 | (note: as of v3.3.1, the documentation is out-of-date with the kernel |
101 | interface, so reading the source code of existing tools is necessary) |
102 | |
103 | This benchlib backend monitors the current process as well as child process |
104 | launched. We do not try to benchmark in kernel or hypervisor mode, as that |
105 | usually requires elevated privileges. |
106 | */ |
107 | |
108 | static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) |
109 | { |
110 | #ifdef SYS_perf_event_open |
111 | // syscall() returns long, but perf_event_open() is used to get a file descriptor |
112 | return int(syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags)); |
113 | #else |
114 | Q_UNUSED(attr); |
115 | Q_UNUSED(pid); |
116 | Q_UNUSED(cpu); |
117 | Q_UNUSED(group_fd); |
118 | Q_UNUSED(flags); |
119 | errno = ENOSYS; |
120 | return -1; |
121 | #endif |
122 | } |
123 | |
124 | bool QBenchmarkPerfEventsMeasurer::isAvailable() |
125 | { |
126 | // this generates an EFAULT because attr == NULL if perf_event_open is available |
127 | // if the kernel is too old, it generates ENOSYS |
128 | return perf_event_open(attr: nullptr, pid: 0, cpu: 0, group_fd: 0, flags: 0) == -1 && errno != ENOSYS; |
129 | } |
130 | |
131 | /* Event list structure |
132 | The following table provides the list of supported events |
133 | |
134 | Event type Event counter Unit Name and aliases |
135 | HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles |
136 | HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles |
137 | HARDWARE INSTRUCTIONS Instructions instructions |
138 | HARDWARE CACHE_REFERENCES CacheReferences cache-references |
139 | HARDWARE CACHE_MISSES CacheMisses cache-misses |
140 | HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches |
141 | HARDWARE BRANCH_MISSES BranchMisses branch-misses |
142 | HARDWARE BUS_CYCLES BusCycles bus-cycles |
143 | HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend |
144 | HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend |
145 | SOFTWARE CPU_CLOCK WalltimeNanoseconds cpu-clock |
146 | SOFTWARE TASK_CLOCK WalltimeNanoseconds task-clock |
147 | SOFTWARE PAGE_FAULTS PageFaults page-faults faults |
148 | SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults |
149 | SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults |
150 | SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs |
151 | SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations |
152 | SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults |
153 | SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults |
154 | HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads |
155 | HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores |
156 | HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches |
157 | HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads |
158 | HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches |
159 | HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads |
160 | HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores |
161 | HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches |
162 | HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses |
163 | HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses |
164 | HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses |
165 | HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses |
166 | HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses |
167 | HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses |
168 | HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses |
169 | HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses |
170 | HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts |
171 | HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses |
172 | |
173 | Use the following Perl script to re-generate the list |
174 | === cut perl === |
175 | #!/usr/bin/env perl |
176 | # Load all entries into %map |
177 | while (<STDIN>) { |
178 | m/^\s*(.*)\s*$/; |
179 | @_ = split /\s+/, $1; |
180 | $type = shift @_; |
181 | $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" : |
182 | $type eq "SOFTWARE" ? "PERF_COUNT_SW_" : |
183 | $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_; |
184 | $unit = shift @_; |
185 | |
186 | for $string (@_) { |
187 | die "$string was already seen!" if defined($map{$string}); |
188 | $map{$string} = [-1, $type, $id, $unit]; |
189 | push @strings, $string; |
190 | } |
191 | } |
192 | |
193 | # sort the map and print the string list |
194 | @strings = sort @strings; |
195 | print "static const char eventlist_strings[] = \n"; |
196 | $counter = 0; |
197 | for $entry (@strings) { |
198 | print " \"$entry\\0\"\n"; |
199 | $map{$entry}[0] = $counter; |
200 | $counter += 1 + length $entry; |
201 | } |
202 | |
203 | # print the table |
204 | print " \"\\0\";\n\nstatic const Events eventlist[] = {\n"; |
205 | for $entry (sort @strings) { |
206 | printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n", |
207 | $map{$entry}[0], |
208 | $map{$entry}[1], |
209 | $map{$entry}[2], |
210 | $map{$entry}[3]; |
211 | } |
212 | print "};\n"; |
213 | === cut perl === |
214 | */ |
215 | |
216 | struct Events { |
217 | unsigned offset; |
218 | quint32 type; |
219 | quint64 event_id; |
220 | QTest::QBenchmarkMetric metric; |
221 | }; |
222 | |
223 | /* -- BEGIN GENERATED CODE -- */ |
224 | static const char eventlist_strings[] = |
225 | "alignment-faults\0" |
226 | "branch-instructions\0" |
227 | "branch-load-misses\0" |
228 | "branch-loads\0" |
229 | "branch-mispredicts\0" |
230 | "branch-misses\0" |
231 | "branch-predicts\0" |
232 | "branch-read-misses\0" |
233 | "branch-reads\0" |
234 | "branches\0" |
235 | "bus-cycles\0" |
236 | "cache-misses\0" |
237 | "cache-references\0" |
238 | "context-switches\0" |
239 | "cpu-clock\0" |
240 | "cpu-cycles\0" |
241 | "cpu-migrations\0" |
242 | "cs\0" |
243 | "cycles\0" |
244 | "emulation-faults\0" |
245 | "faults\0" |
246 | "idle-cycles-backend\0" |
247 | "idle-cycles-frontend\0" |
248 | "instructions\0" |
249 | "l1d-cache-load-misses\0" |
250 | "l1d-cache-loads\0" |
251 | "l1d-cache-prefetch-misses\0" |
252 | "l1d-cache-prefetches\0" |
253 | "l1d-cache-read-misses\0" |
254 | "l1d-cache-reads\0" |
255 | "l1d-cache-store-misses\0" |
256 | "l1d-cache-stores\0" |
257 | "l1d-cache-write-misses\0" |
258 | "l1d-cache-writes\0" |
259 | "l1d-load-misses\0" |
260 | "l1d-loads\0" |
261 | "l1d-prefetch-misses\0" |
262 | "l1d-prefetches\0" |
263 | "l1d-read-misses\0" |
264 | "l1d-reads\0" |
265 | "l1d-store-misses\0" |
266 | "l1d-stores\0" |
267 | "l1d-write-misses\0" |
268 | "l1d-writes\0" |
269 | "l1i-cache-load-misses\0" |
270 | "l1i-cache-loads\0" |
271 | "l1i-cache-prefetch-misses\0" |
272 | "l1i-cache-prefetches\0" |
273 | "l1i-cache-read-misses\0" |
274 | "l1i-cache-reads\0" |
275 | "l1i-load-misses\0" |
276 | "l1i-loads\0" |
277 | "l1i-prefetch-misses\0" |
278 | "l1i-prefetches\0" |
279 | "l1i-read-misses\0" |
280 | "l1i-reads\0" |
281 | "llc-cache-load-misses\0" |
282 | "llc-cache-loads\0" |
283 | "llc-cache-prefetch-misses\0" |
284 | "llc-cache-prefetches\0" |
285 | "llc-cache-read-misses\0" |
286 | "llc-cache-reads\0" |
287 | "llc-cache-store-misses\0" |
288 | "llc-cache-stores\0" |
289 | "llc-cache-write-misses\0" |
290 | "llc-cache-writes\0" |
291 | "llc-load-misses\0" |
292 | "llc-loads\0" |
293 | "llc-prefetch-misses\0" |
294 | "llc-prefetches\0" |
295 | "llc-read-misses\0" |
296 | "llc-reads\0" |
297 | "llc-store-misses\0" |
298 | "llc-stores\0" |
299 | "llc-write-misses\0" |
300 | "llc-writes\0" |
301 | "major-faults\0" |
302 | "migrations\0" |
303 | "minor-faults\0" |
304 | "page-faults\0" |
305 | "ref-cycles\0" |
306 | "stalled-cycles-backend\0" |
307 | "stalled-cycles-frontend\0" |
308 | "task-clock\0" |
309 | "\0" ; |
310 | |
311 | static const Events eventlist[] = { |
312 | { .offset: 0, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_ALIGNMENT_FAULTS, .metric: QTest::AlignmentFaults }, |
313 | { .offset: 17, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions }, |
314 | { .offset: 37, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
315 | { .offset: 56, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
316 | { .offset: 69, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
317 | { .offset: 88, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_MISSES, .metric: QTest::BranchMisses }, |
318 | { .offset: 102, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
319 | { .offset: 118, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
320 | { .offset: 137, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
321 | { .offset: 150, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions }, |
322 | { .offset: 159, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BUS_CYCLES, .metric: QTest::BusCycles }, |
323 | { .offset: 170, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_MISSES, .metric: QTest::CacheMisses }, |
324 | { .offset: 183, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_REFERENCES, .metric: QTest::CacheReferences }, |
325 | { .offset: 200, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches }, |
326 | { .offset: 217, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_CLOCK, .metric: QTest::WalltimeNanoseconds }, |
327 | { .offset: 227, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles }, |
328 | { .offset: 238, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations }, |
329 | { .offset: 253, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches }, |
330 | { .offset: 256, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles }, |
331 | { .offset: 263, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_EMULATION_FAULTS, .metric: QTest::EmulationFaults }, |
332 | { .offset: 280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults }, |
333 | { .offset: 287, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles }, |
334 | { .offset: 307, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles }, |
335 | { .offset: 328, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_INSTRUCTIONS, .metric: QTest::Instructions }, |
336 | { .offset: 341, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
337 | { .offset: 363, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
338 | { .offset: 379, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
339 | { .offset: 405, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches }, |
340 | { .offset: 426, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
341 | { .offset: 448, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
342 | { .offset: 464, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
343 | { .offset: 487, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
344 | { .offset: 504, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
345 | { .offset: 527, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
346 | { .offset: 544, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
347 | { .offset: 560, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
348 | { .offset: 570, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
349 | { .offset: 590, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches }, |
350 | { .offset: 605, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
351 | { .offset: 621, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
352 | { .offset: 631, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
353 | { .offset: 648, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
354 | { .offset: 659, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
355 | { .offset: 676, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
356 | { .offset: 687, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
357 | { .offset: 709, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
358 | { .offset: 725, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
359 | { .offset: 751, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches }, |
360 | { .offset: 772, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
361 | { .offset: 794, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
362 | { .offset: 810, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
363 | { .offset: 826, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
364 | { .offset: 836, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
365 | { .offset: 856, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches }, |
366 | { .offset: 871, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
367 | { .offset: 887, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
368 | { .offset: 897, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
369 | { .offset: 919, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
370 | { .offset: 935, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
371 | { .offset: 961, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches }, |
372 | { .offset: 982, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
373 | { .offset: 1004, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
374 | { .offset: 1020, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
375 | { .offset: 1043, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
376 | { .offset: 1060, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
377 | { .offset: 1083, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
378 | { .offset: 1100, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
379 | { .offset: 1116, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
380 | { .offset: 1126, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
381 | { .offset: 1146, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches }, |
382 | { .offset: 1161, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
383 | { .offset: 1177, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
384 | { .offset: 1187, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
385 | { .offset: 1204, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
386 | { .offset: 1215, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
387 | { .offset: 1232, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
388 | { .offset: 1243, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MAJ, .metric: QTest::MajorPageFaults }, |
389 | { .offset: 1256, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations }, |
390 | { .offset: 1267, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MIN, .metric: QTest::MinorPageFaults }, |
391 | { .offset: 1280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults }, |
392 | { .offset: 1292, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_REF_CPU_CYCLES, .metric: QTest::RefCPUCycles }, |
393 | { .offset: 1303, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles }, |
394 | { .offset: 1326, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles }, |
395 | { .offset: 1350, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_TASK_CLOCK, .metric: QTest::WalltimeNanoseconds }, |
396 | }; |
397 | /* -- END GENERATED CODE -- */ |
398 | |
399 | static QTest::QBenchmarkMetric metricForEvent(PerfEvent counter) |
400 | { |
401 | for (const Events &ev : eventlist) { |
402 | if (ev.type == counter.type && ev.event_id == counter.config) |
403 | return ev.metric; |
404 | } |
405 | return QTest::Events; |
406 | } |
407 | |
408 | void QBenchmarkPerfEventsMeasurer::setCounter(const char *name) |
409 | { |
410 | initPerf(); |
411 | eventTypes->clear(); |
412 | std::string_view input = name; |
413 | if (qsizetype idx = input.find(c: ':'); idx >= 0) |
414 | input = input.substr(pos: 0, n: idx); |
415 | |
416 | while (!input.empty()) { |
417 | std::string_view countername = input; |
418 | if (qsizetype idx = countername.find(c: ','); idx >= 0) |
419 | countername = countername.substr(pos: 0, n: idx); |
420 | |
421 | for (const Events &ev : eventlist) { |
422 | int c = countername.compare(str: eventlist_strings + ev.offset); |
423 | if (c > 0) |
424 | continue; |
425 | if (c < 0) { |
426 | fprintf(stderr, format: "ERROR: Performance counter type '%.*s' is unknown\n" , |
427 | int(countername.size()), countername.data()); |
428 | exit(status: 1); |
429 | } |
430 | eventTypes->append(t: { .type: ev.type, .config: ev.event_id }); |
431 | break; |
432 | } |
433 | |
434 | if (countername.size() == input.size()) |
435 | input = {}; |
436 | else |
437 | input.remove_prefix(n: countername.size() + 1); |
438 | } |
439 | |
440 | // We used to support attributes, but our code was the opposite of what |
441 | // perf(1) does, plus QBenchlib isn't exactly expected to be used to |
442 | // profile Linux kernel code or launch guest VMs as part of the workload. |
443 | // So we keep accepting the colon as a delimiter but ignore it. |
444 | } |
445 | |
446 | void QBenchmarkPerfEventsMeasurer::listCounters() |
447 | { |
448 | if (!isAvailable()) { |
449 | printf(format: "Performance counters are not available on this system\n" ); |
450 | return; |
451 | } |
452 | |
453 | printf(format: "The following performance counters are available:\n" ); |
454 | for (const Events &ev : eventlist) { |
455 | printf(format: " %-30s [%s]\n" , eventlist_strings + ev.offset, |
456 | ev.type == PERF_TYPE_HARDWARE ? "hardware" : |
457 | ev.type == PERF_TYPE_SOFTWARE ? "software" : |
458 | ev.type == PERF_TYPE_HW_CACHE ? "cache" : "other" ); |
459 | } |
460 | } |
461 | |
462 | QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default; |
463 | |
464 | QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer() |
465 | { |
466 | for (int fd : std::as_const(t&: fds)) |
467 | qt_safe_close(fd); |
468 | } |
469 | |
470 | void QBenchmarkPerfEventsMeasurer::init() |
471 | { |
472 | } |
473 | |
474 | void QBenchmarkPerfEventsMeasurer::start() |
475 | { |
476 | initPerf(); |
477 | QList<PerfEvent> &counters = *eventTypes; |
478 | if (counters.isEmpty()) |
479 | counters = defaultCounters(); |
480 | if (fds.isEmpty()) { |
481 | pid_t pid = 0; // attach to the current process only |
482 | int cpu = -1; // on any CPU |
483 | int group_fd = -1; |
484 | int flags = PERF_FLAG_FD_CLOEXEC; |
485 | |
486 | fds.reserve(asize: counters.size()); |
487 | for (PerfEvent counter : std::as_const(t&: counters)) { |
488 | attr.type = counter.type; |
489 | attr.config = counter.config; |
490 | int fd = perf_event_open(attr: &attr, pid, cpu, group_fd, flags); |
491 | if (fd == -1) { |
492 | // probably a paranoid kernel (/proc/sys/kernel/perf_event_paranoid) |
493 | attr.exclude_kernel = true; |
494 | attr.exclude_hv = true; |
495 | fd = perf_event_open(attr: &attr, pid, cpu, group_fd, flags); |
496 | } |
497 | if (fd == -1) { |
498 | perror(s: "QBenchmarkPerfEventsMeasurer::start: perf_event_open" ); |
499 | exit(status: 1); |
500 | } |
501 | |
502 | fds.append(t: fd); |
503 | } |
504 | } |
505 | |
506 | // enable the counters |
507 | for (int fd : std::as_const(t&: fds)) |
508 | ::ioctl(fd: fd, PERF_EVENT_IOC_RESET); |
509 | prctl(PR_TASK_PERF_EVENTS_ENABLE); |
510 | } |
511 | |
512 | QList<QBenchmarkMeasurerBase::Measurement> QBenchmarkPerfEventsMeasurer::stop() |
513 | { |
514 | // disable the counters |
515 | prctl(PR_TASK_PERF_EVENTS_DISABLE); |
516 | |
517 | const QList<PerfEvent> &counters = *eventTypes; |
518 | QList<Measurement> result(counters.size(), {}); |
519 | for (qsizetype i = 0; i < counters.size(); ++i) { |
520 | result[i] = readValue(idx: i); |
521 | } |
522 | return result; |
523 | } |
524 | |
525 | bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(Measurement) |
526 | { |
527 | return true; |
528 | } |
529 | |
530 | int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int) |
531 | { |
532 | return 1; |
533 | } |
534 | |
535 | int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int) |
536 | { |
537 | return 1; |
538 | } |
539 | |
540 | static quint64 rawReadValue(int fd) |
541 | { |
542 | /* from the kernel docs: |
543 | * struct read_format { |
544 | * { u64 value; |
545 | * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED |
546 | * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING |
547 | * { u64 id; } && PERF_FORMAT_ID |
548 | * } && !PERF_FORMAT_GROUP |
549 | */ |
550 | |
551 | struct read_format { |
552 | quint64 value; |
553 | quint64 time_enabled; |
554 | quint64 time_running; |
555 | } results; |
556 | |
557 | size_t nread = 0; |
558 | while (nread < sizeof results) { |
559 | char *ptr = reinterpret_cast<char *>(&results); |
560 | qint64 r = qt_safe_read(fd, data: ptr + nread, maxlen: sizeof results - nread); |
561 | if (r == -1) { |
562 | perror(s: "QBenchmarkPerfEventsMeasurer::readValue: reading the results" ); |
563 | exit(status: 1); |
564 | } |
565 | nread += quint64(r); |
566 | } |
567 | |
568 | if (results.time_running == results.time_enabled) |
569 | return results.value; |
570 | |
571 | // scale the results, though this shouldn't happen! |
572 | return results.value * (double(results.time_running) / double(results.time_enabled)); |
573 | } |
574 | |
575 | QBenchmarkMeasurerBase::Measurement QBenchmarkPerfEventsMeasurer::readValue(qsizetype idx) |
576 | { |
577 | quint64 raw = rawReadValue(fd: fds.at(i: idx)); |
578 | return { .value: qreal(qint64(raw)), .metric: metricForEvent(counter: eventTypes->at(i: idx)) }; |
579 | } |
580 | |
581 | QT_END_NAMESPACE |
582 | |
583 | #endif |
584 | |