1 | // Copyright (C) 2016 Intel Corporation. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qbenchmarkperfevents_p.h" |
5 | #include "qbenchmarkmetric.h" |
6 | #include "qbenchmark_p.h" |
7 | |
8 | #ifdef QTESTLIB_USE_PERF_EVENTS |
9 | |
10 | // include the qcore_unix_p.h without core-private |
11 | // we only use inline functions anyway |
12 | #include "../corelib/kernel/qcore_unix_p.h" |
13 | |
14 | #include <sys/types.h> |
15 | #include <errno.h> |
16 | #include <fcntl.h> |
17 | #include <string.h> |
18 | #include <stdio.h> |
19 | |
20 | #include <sys/ioctl.h> |
21 | #include <sys/prctl.h> |
22 | #include <sys/syscall.h> |
23 | |
24 | #include "3rdparty/linux/perf_event_p.h" |
25 | |
26 | // for PERF_TYPE_HW_CACHE, the config is a bitmask |
27 | // lowest 8 bits: cache type |
28 | // bits 8 to 15: cache operation |
29 | // bits 16 to 23: cache result |
30 | #define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
31 | #define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
32 | #define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
33 | #define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
34 | #define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
35 | #define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
36 | #define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
37 | #define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
38 | #define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
39 | #define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
40 | #define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
41 | #define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
42 | #define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
43 | #define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
44 | #define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
45 | #define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
46 | #define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
47 | #define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
48 | |
49 | QT_BEGIN_NAMESPACE |
50 | |
51 | struct PerfEvent |
52 | { |
53 | quint32 type; |
54 | quint64 config; |
55 | }; |
56 | Q_GLOBAL_STATIC(QList<PerfEvent>, eventTypes); |
57 | |
58 | static QList<PerfEvent> defaultCounters() |
59 | { |
60 | return { |
61 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
62 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
63 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
64 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
65 | }; |
66 | } |
67 | |
68 | // This class does not exist in the API so it's qdoc comment marker was removed. |
69 | |
70 | /* |
71 | \class QBenchmarkPerfEvents |
72 | \brief The Linux perf events benchmark backend |
73 | |
74 | This benchmark backend uses the Linux Performance Counters interface, |
75 | introduced with the Linux kernel v2.6.31. The interface is done by one |
76 | system call (perf_event_open) which takes an attribute structure and |
77 | returns a file descriptor. |
78 | |
79 | More information: |
80 | \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt> |
81 | \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c> |
82 | (note: as of v3.3.1, the documentation is out-of-date with the kernel |
83 | interface, so reading the source code of existing tools is necessary) |
84 | |
85 | This benchlib backend monitors the current process as well as child process |
86 | launched. We do not try to benchmark in kernel or hypervisor mode, as that |
87 | usually requires elevated privileges. |
88 | */ |
89 | |
90 | static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) |
91 | { |
92 | #ifdef SYS_perf_event_open |
93 | // syscall() returns long, but perf_event_open() is used to get a file descriptor |
94 | return int(syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags)); |
95 | #else |
96 | Q_UNUSED(attr); |
97 | Q_UNUSED(pid); |
98 | Q_UNUSED(cpu); |
99 | Q_UNUSED(group_fd); |
100 | Q_UNUSED(flags); |
101 | errno = ENOSYS; |
102 | return -1; |
103 | #endif |
104 | } |
105 | |
106 | bool QBenchmarkPerfEventsMeasurer::isAvailable() |
107 | { |
108 | // this generates an EFAULT because attr == NULL if perf_event_open is available |
109 | // if the kernel is too old, it generates ENOSYS |
110 | return perf_event_open(attr: nullptr, pid: 0, cpu: 0, group_fd: 0, flags: 0) == -1 && errno != ENOSYS; |
111 | } |
112 | |
113 | /* Event list structure |
114 | The following table provides the list of supported events |
115 | |
116 | Event type Event counter Unit Name and aliases |
117 | HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles |
118 | HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles |
119 | HARDWARE INSTRUCTIONS Instructions instructions |
120 | HARDWARE CACHE_REFERENCES CacheReferences cache-references |
121 | HARDWARE CACHE_MISSES CacheMisses cache-misses |
122 | HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches |
123 | HARDWARE BRANCH_MISSES BranchMisses branch-misses |
124 | HARDWARE BUS_CYCLES BusCycles bus-cycles |
125 | HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend |
126 | HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend |
127 | SOFTWARE CPU_CLOCK WalltimeNanoseconds cpu-clock |
128 | SOFTWARE TASK_CLOCK WalltimeNanoseconds task-clock |
129 | SOFTWARE PAGE_FAULTS PageFaults page-faults faults |
130 | SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults |
131 | SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults |
132 | SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs |
133 | SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations |
134 | SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults |
135 | SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults |
136 | HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads |
137 | HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores |
138 | HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches |
139 | HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads |
140 | HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches |
141 | HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads |
142 | HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores |
143 | HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches |
144 | HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses |
145 | HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses |
146 | HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses |
147 | HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses |
148 | HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses |
149 | HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses |
150 | HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses |
151 | HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses |
152 | HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts |
153 | HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses |
154 | |
155 | Use the following Perl script to re-generate the list |
156 | === cut perl === |
157 | #!/usr/bin/env perl |
158 | # Load all entries into %map |
159 | while (<STDIN>) { |
160 | m/^\s*(.*)\s*$/; |
161 | @_ = split /\s+/, $1; |
162 | $type = shift @_; |
163 | $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" : |
164 | $type eq "SOFTWARE" ? "PERF_COUNT_SW_" : |
165 | $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_; |
166 | $unit = shift @_; |
167 | |
168 | for $string (@_) { |
169 | die "$string was already seen!" if defined($map{$string}); |
170 | $map{$string} = [-1, $type, $id, $unit]; |
171 | push @strings, $string; |
172 | } |
173 | } |
174 | |
175 | # sort the map and print the string list |
176 | @strings = sort @strings; |
177 | print "static const char eventlist_strings[] = \n"; |
178 | $counter = 0; |
179 | for $entry (@strings) { |
180 | print " \"$entry\\0\"\n"; |
181 | $map{$entry}[0] = $counter; |
182 | $counter += 1 + length $entry; |
183 | } |
184 | |
185 | # print the table |
186 | print " \"\\0\";\n\nstatic const Events eventlist[] = {\n"; |
187 | for $entry (sort @strings) { |
188 | printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n", |
189 | $map{$entry}[0], |
190 | $map{$entry}[1], |
191 | $map{$entry}[2], |
192 | $map{$entry}[3]; |
193 | } |
194 | print "};\n"; |
195 | === cut perl === |
196 | */ |
197 | |
198 | struct Events { |
199 | unsigned offset; |
200 | quint32 type; |
201 | quint64 event_id; |
202 | QTest::QBenchmarkMetric metric; |
203 | }; |
204 | |
205 | /* -- BEGIN GENERATED CODE -- */ |
206 | static const char eventlist_strings[] = |
207 | "alignment-faults\0" |
208 | "branch-instructions\0" |
209 | "branch-load-misses\0" |
210 | "branch-loads\0" |
211 | "branch-mispredicts\0" |
212 | "branch-misses\0" |
213 | "branch-predicts\0" |
214 | "branch-read-misses\0" |
215 | "branch-reads\0" |
216 | "branches\0" |
217 | "bus-cycles\0" |
218 | "cache-misses\0" |
219 | "cache-references\0" |
220 | "context-switches\0" |
221 | "cpu-clock\0" |
222 | "cpu-cycles\0" |
223 | "cpu-migrations\0" |
224 | "cs\0" |
225 | "cycles\0" |
226 | "emulation-faults\0" |
227 | "faults\0" |
228 | "idle-cycles-backend\0" |
229 | "idle-cycles-frontend\0" |
230 | "instructions\0" |
231 | "l1d-cache-load-misses\0" |
232 | "l1d-cache-loads\0" |
233 | "l1d-cache-prefetch-misses\0" |
234 | "l1d-cache-prefetches\0" |
235 | "l1d-cache-read-misses\0" |
236 | "l1d-cache-reads\0" |
237 | "l1d-cache-store-misses\0" |
238 | "l1d-cache-stores\0" |
239 | "l1d-cache-write-misses\0" |
240 | "l1d-cache-writes\0" |
241 | "l1d-load-misses\0" |
242 | "l1d-loads\0" |
243 | "l1d-prefetch-misses\0" |
244 | "l1d-prefetches\0" |
245 | "l1d-read-misses\0" |
246 | "l1d-reads\0" |
247 | "l1d-store-misses\0" |
248 | "l1d-stores\0" |
249 | "l1d-write-misses\0" |
250 | "l1d-writes\0" |
251 | "l1i-cache-load-misses\0" |
252 | "l1i-cache-loads\0" |
253 | "l1i-cache-prefetch-misses\0" |
254 | "l1i-cache-prefetches\0" |
255 | "l1i-cache-read-misses\0" |
256 | "l1i-cache-reads\0" |
257 | "l1i-load-misses\0" |
258 | "l1i-loads\0" |
259 | "l1i-prefetch-misses\0" |
260 | "l1i-prefetches\0" |
261 | "l1i-read-misses\0" |
262 | "l1i-reads\0" |
263 | "llc-cache-load-misses\0" |
264 | "llc-cache-loads\0" |
265 | "llc-cache-prefetch-misses\0" |
266 | "llc-cache-prefetches\0" |
267 | "llc-cache-read-misses\0" |
268 | "llc-cache-reads\0" |
269 | "llc-cache-store-misses\0" |
270 | "llc-cache-stores\0" |
271 | "llc-cache-write-misses\0" |
272 | "llc-cache-writes\0" |
273 | "llc-load-misses\0" |
274 | "llc-loads\0" |
275 | "llc-prefetch-misses\0" |
276 | "llc-prefetches\0" |
277 | "llc-read-misses\0" |
278 | "llc-reads\0" |
279 | "llc-store-misses\0" |
280 | "llc-stores\0" |
281 | "llc-write-misses\0" |
282 | "llc-writes\0" |
283 | "major-faults\0" |
284 | "migrations\0" |
285 | "minor-faults\0" |
286 | "page-faults\0" |
287 | "ref-cycles\0" |
288 | "stalled-cycles-backend\0" |
289 | "stalled-cycles-frontend\0" |
290 | "task-clock\0" |
291 | "\0" ; |
292 | |
293 | static const Events eventlist[] = { |
294 | { .offset: 0, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_ALIGNMENT_FAULTS, .metric: QTest::AlignmentFaults }, |
295 | { .offset: 17, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions }, |
296 | { .offset: 37, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
297 | { .offset: 56, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
298 | { .offset: 69, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
299 | { .offset: 88, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_MISSES, .metric: QTest::BranchMisses }, |
300 | { .offset: 102, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
301 | { .offset: 118, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
302 | { .offset: 137, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
303 | { .offset: 150, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions }, |
304 | { .offset: 159, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BUS_CYCLES, .metric: QTest::BusCycles }, |
305 | { .offset: 170, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_MISSES, .metric: QTest::CacheMisses }, |
306 | { .offset: 183, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_REFERENCES, .metric: QTest::CacheReferences }, |
307 | { .offset: 200, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches }, |
308 | { .offset: 217, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_CLOCK, .metric: QTest::WalltimeNanoseconds }, |
309 | { .offset: 227, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles }, |
310 | { .offset: 238, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations }, |
311 | { .offset: 253, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches }, |
312 | { .offset: 256, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles }, |
313 | { .offset: 263, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_EMULATION_FAULTS, .metric: QTest::EmulationFaults }, |
314 | { .offset: 280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults }, |
315 | { .offset: 287, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles }, |
316 | { .offset: 307, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles }, |
317 | { .offset: 328, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_INSTRUCTIONS, .metric: QTest::Instructions }, |
318 | { .offset: 341, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
319 | { .offset: 363, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
320 | { .offset: 379, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
321 | { .offset: 405, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches }, |
322 | { .offset: 426, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
323 | { .offset: 448, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
324 | { .offset: 464, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
325 | { .offset: 487, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
326 | { .offset: 504, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
327 | { .offset: 527, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
328 | { .offset: 544, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
329 | { .offset: 560, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
330 | { .offset: 570, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
331 | { .offset: 590, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches }, |
332 | { .offset: 605, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
333 | { .offset: 621, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
334 | { .offset: 631, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
335 | { .offset: 648, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
336 | { .offset: 659, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
337 | { .offset: 676, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
338 | { .offset: 687, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
339 | { .offset: 709, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
340 | { .offset: 725, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
341 | { .offset: 751, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches }, |
342 | { .offset: 772, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
343 | { .offset: 794, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
344 | { .offset: 810, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
345 | { .offset: 826, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
346 | { .offset: 836, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
347 | { .offset: 856, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches }, |
348 | { .offset: 871, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
349 | { .offset: 887, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
350 | { .offset: 897, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
351 | { .offset: 919, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
352 | { .offset: 935, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
353 | { .offset: 961, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches }, |
354 | { .offset: 982, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
355 | { .offset: 1004, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
356 | { .offset: 1020, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
357 | { .offset: 1043, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
358 | { .offset: 1060, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
359 | { .offset: 1083, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
360 | { .offset: 1100, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
361 | { .offset: 1116, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
362 | { .offset: 1126, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
363 | { .offset: 1146, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches }, |
364 | { .offset: 1161, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
365 | { .offset: 1177, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
366 | { .offset: 1187, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
367 | { .offset: 1204, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
368 | { .offset: 1215, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
369 | { .offset: 1232, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
370 | { .offset: 1243, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MAJ, .metric: QTest::MajorPageFaults }, |
371 | { .offset: 1256, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations }, |
372 | { .offset: 1267, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MIN, .metric: QTest::MinorPageFaults }, |
373 | { .offset: 1280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults }, |
374 | { .offset: 1292, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_REF_CPU_CYCLES, .metric: QTest::RefCPUCycles }, |
375 | { .offset: 1303, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles }, |
376 | { .offset: 1326, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles }, |
377 | { .offset: 1350, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_TASK_CLOCK, .metric: QTest::WalltimeNanoseconds }, |
378 | }; |
379 | /* -- END GENERATED CODE -- */ |
380 | |
381 | static QTest::QBenchmarkMetric metricForEvent(PerfEvent counter) |
382 | { |
383 | for (const Events &ev : eventlist) { |
384 | if (ev.type == counter.type && ev.event_id == counter.config) |
385 | return ev.metric; |
386 | } |
387 | return QTest::Events; |
388 | } |
389 | |
390 | void QBenchmarkPerfEventsMeasurer::setCounter(const char *name) |
391 | { |
392 | eventTypes->clear(); |
393 | std::string_view input = name; |
394 | if (qsizetype idx = input.find(c: ':'); idx >= 0) |
395 | input = input.substr(pos: 0, n: idx); |
396 | |
397 | while (!input.empty()) { |
398 | std::string_view countername = input; |
399 | if (qsizetype idx = countername.find(c: ','); idx >= 0) |
400 | countername = countername.substr(pos: 0, n: idx); |
401 | |
402 | for (const Events &ev : eventlist) { |
403 | int c = countername.compare(str: eventlist_strings + ev.offset); |
404 | if (c > 0) |
405 | continue; |
406 | if (c < 0) { |
407 | fprintf(stderr, format: "ERROR: Performance counter type '%.*s' is unknown\n" , |
408 | int(countername.size()), countername.data()); |
409 | exit(status: 1); |
410 | } |
411 | eventTypes->append(t: { .type: ev.type, .config: ev.event_id }); |
412 | break; |
413 | } |
414 | |
415 | if (countername.size() == input.size()) |
416 | input = {}; |
417 | else |
418 | input.remove_prefix(n: countername.size() + 1); |
419 | } |
420 | |
421 | // We used to support attributes, but our code was the opposite of what |
422 | // perf(1) does, plus QBenchlib isn't exactly expected to be used to |
423 | // profile Linux kernel code or launch guest VMs as part of the workload. |
424 | // So we keep accepting the colon as a delimiter but ignore it. |
425 | } |
426 | |
427 | void QBenchmarkPerfEventsMeasurer::listCounters() |
428 | { |
429 | if (!isAvailable()) { |
430 | printf(format: "Performance counters are not available on this system\n" ); |
431 | return; |
432 | } |
433 | |
434 | printf(format: "The following performance counters are available:\n" ); |
435 | for (const Events &ev : eventlist) { |
436 | printf(format: " %-30s [%s]\n" , eventlist_strings + ev.offset, |
437 | ev.type == PERF_TYPE_HARDWARE ? "hardware" : |
438 | ev.type == PERF_TYPE_SOFTWARE ? "software" : |
439 | ev.type == PERF_TYPE_HW_CACHE ? "cache" : "other" ); |
440 | } |
441 | } |
442 | |
443 | QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default; |
444 | |
445 | QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer() |
446 | { |
447 | for (int fd : std::as_const(t&: fds)) |
448 | qt_safe_close(fd); |
449 | } |
450 | |
451 | void QBenchmarkPerfEventsMeasurer::start() |
452 | { |
453 | QT_WARNING_DISABLE_GCC("-Wmissing-field-initializers" ) |
454 | QT_WARNING_DISABLE_CLANG("-Wmissing-field-initializers" ) |
455 | perf_event_attr attr = { |
456 | .size = sizeof attr, |
457 | .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING, |
458 | .disabled = true, // we'll enable later |
459 | .inherit = true, // let children processes inherit the monitoring |
460 | .pinned = true, // keep it running in the hardware |
461 | .inherit_stat = true, // aggregate all the info from child processes |
462 | .task = true, // trace fork/exits |
463 | }; |
464 | |
465 | QList<PerfEvent> &counters = *eventTypes; |
466 | if (counters.isEmpty()) |
467 | counters = defaultCounters(); |
468 | if (fds.isEmpty()) { |
469 | pid_t pid = 0; // attach to the current process only |
470 | int cpu = -1; // on any CPU |
471 | int group_fd = -1; |
472 | int flags = PERF_FLAG_FD_CLOEXEC; |
473 | |
474 | fds.reserve(asize: counters.size()); |
475 | for (PerfEvent counter : std::as_const(t&: counters)) { |
476 | attr.type = counter.type; |
477 | attr.config = counter.config; |
478 | int fd = perf_event_open(attr: &attr, pid, cpu, group_fd, flags); |
479 | if (fd == -1) { |
480 | // probably a paranoid kernel (/proc/sys/kernel/perf_event_paranoid) |
481 | attr.exclude_kernel = true; |
482 | attr.exclude_hv = true; |
483 | fd = perf_event_open(attr: &attr, pid, cpu, group_fd, flags); |
484 | } |
485 | if (fd == -1) { |
486 | perror(s: "QBenchmarkPerfEventsMeasurer::start: perf_event_open" ); |
487 | exit(status: 1); |
488 | } |
489 | |
490 | fds.append(t: fd); |
491 | } |
492 | } |
493 | |
494 | // enable the counters |
495 | for (int fd : std::as_const(t&: fds)) |
496 | ::ioctl(fd: fd, PERF_EVENT_IOC_RESET); |
497 | prctl(PR_TASK_PERF_EVENTS_ENABLE); |
498 | } |
499 | |
500 | QList<QBenchmarkMeasurerBase::Measurement> QBenchmarkPerfEventsMeasurer::stop() |
501 | { |
502 | // disable the counters |
503 | prctl(PR_TASK_PERF_EVENTS_DISABLE); |
504 | |
505 | const QList<PerfEvent> &counters = *eventTypes; |
506 | QList<Measurement> result(counters.size(), {}); |
507 | for (qsizetype i = 0; i < counters.size(); ++i) { |
508 | result[i] = readValue(idx: i); |
509 | } |
510 | return result; |
511 | } |
512 | |
513 | bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(Measurement) |
514 | { |
515 | return true; |
516 | } |
517 | |
518 | int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int) |
519 | { |
520 | return 1; |
521 | } |
522 | |
523 | int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int) |
524 | { |
525 | return 1; |
526 | } |
527 | |
528 | static quint64 rawReadValue(int fd) |
529 | { |
530 | /* from the kernel docs: |
531 | * struct read_format { |
532 | * { u64 value; |
533 | * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED |
534 | * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING |
535 | * { u64 id; } && PERF_FORMAT_ID |
536 | * } && !PERF_FORMAT_GROUP |
537 | */ |
538 | |
539 | struct read_format { |
540 | quint64 value; |
541 | quint64 time_enabled; |
542 | quint64 time_running; |
543 | } results; |
544 | |
545 | size_t nread = 0; |
546 | while (nread < sizeof results) { |
547 | char *ptr = reinterpret_cast<char *>(&results); |
548 | qint64 r = qt_safe_read(fd, data: ptr + nread, maxlen: sizeof results - nread); |
549 | if (r == -1) { |
550 | perror(s: "QBenchmarkPerfEventsMeasurer::readValue: reading the results" ); |
551 | exit(status: 1); |
552 | } |
553 | nread += quint64(r); |
554 | } |
555 | |
556 | if (results.time_running == results.time_enabled) |
557 | return results.value; |
558 | |
559 | // scale the results, though this shouldn't happen! |
560 | return results.value * (double(results.time_running) / double(results.time_enabled)); |
561 | } |
562 | |
563 | QBenchmarkMeasurerBase::Measurement QBenchmarkPerfEventsMeasurer::readValue(qsizetype idx) |
564 | { |
565 | quint64 raw = rawReadValue(fd: fds.at(i: idx)); |
566 | return { .value: qreal(qint64(raw)), .metric: metricForEvent(counter: eventTypes->at(i: idx)) }; |
567 | } |
568 | |
569 | QT_END_NAMESPACE |
570 | |
571 | #endif |
572 | |