1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 Intel Corporation. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #include "qbenchmarkperfevents_p.h" |
41 | #include "qbenchmarkmetric.h" |
42 | #include "qbenchmark_p.h" |
43 | |
44 | #ifdef QTESTLIB_USE_PERF_EVENTS |
45 | |
46 | // include the qcore_unix_p.h without core-private |
47 | // we only use inline functions anyway |
48 | #include "../corelib/kernel/qcore_unix_p.h" |
49 | |
50 | #include <sys/types.h> |
51 | #include <errno.h> |
52 | #include <fcntl.h> |
53 | #include <string.h> |
54 | #include <stdio.h> |
55 | |
56 | #include <sys/syscall.h> |
57 | #include <sys/ioctl.h> |
58 | |
59 | #include "3rdparty/linux_perf_event_p.h" |
60 | |
61 | // for PERF_TYPE_HW_CACHE, the config is a bitmask |
62 | // lowest 8 bits: cache type |
63 | // bits 8 to 15: cache operation |
64 | // bits 16 to 23: cache result |
65 | #define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
66 | #define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
67 | #define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
68 | #define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
69 | #define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
70 | #define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
71 | #define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
72 | #define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
73 | #define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
74 | #define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
75 | #define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
76 | #define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
77 | #define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
78 | #define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
79 | #define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
80 | #define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
81 | #define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
82 | #define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
83 | |
84 | QT_BEGIN_NAMESPACE |
85 | |
86 | static perf_event_attr attr; |
87 | |
88 | static void initPerf() |
89 | { |
90 | static bool done; |
91 | if (!done) { |
92 | memset(s: &attr, c: 0, n: sizeof attr); |
93 | attr.size = sizeof attr; |
94 | attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; |
95 | attr.disabled = true; // we'll enable later |
96 | attr.inherit = true; // let children processes inherit the monitoring |
97 | attr.pinned = true; // keep it running in the hardware |
98 | attr.inherit_stat = true; // aggregate all the info from child processes |
99 | attr.task = true; // trace fork/exits |
100 | |
101 | // set a default performance counter: CPU cycles |
102 | attr.type = PERF_TYPE_HARDWARE; |
103 | attr.config = PERF_COUNT_HW_CPU_CYCLES; // default |
104 | |
105 | done = true; |
106 | } |
107 | } |
108 | |
109 | // This class does not exist in the API so it's qdoc comment marker was removed. |
110 | |
111 | /* |
112 | \class QBenchmarkPerfEvents |
113 | \brief The Linux perf events benchmark backend |
114 | |
115 | This benchmark backend uses the Linux Performance Counters interface, |
116 | introduced with the Linux kernel v2.6.31. The interface is done by one |
117 | system call (perf_event_open) which takes an attribute structure and |
118 | returns a file descriptor. |
119 | |
120 | More information: |
121 | \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt> |
122 | \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c> |
123 | (note: as of v3.3.1, the documentation is out-of-date with the kernel |
124 | interface, so reading the source code of existing tools is necessary) |
125 | |
126 | This benchlib backend monitors the current process as well as child process |
127 | launched. We do not try to benchmark in kernel or hypervisor mode, as that |
128 | usually requires elevated privileges. |
129 | */ |
130 | |
131 | static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) |
132 | { |
133 | #ifdef SYS_perf_event_open |
134 | return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags); |
135 | #else |
136 | Q_UNUSED(attr); |
137 | Q_UNUSED(pid); |
138 | Q_UNUSED(cpu); |
139 | Q_UNUSED(group_fd); |
140 | Q_UNUSED(flags); |
141 | errno = ENOSYS; |
142 | return -1; |
143 | #endif |
144 | } |
145 | |
146 | bool QBenchmarkPerfEventsMeasurer::isAvailable() |
147 | { |
148 | // this generates an EFAULT because attr == NULL if perf_event_open is available |
149 | // if the kernel is too old, it generates ENOSYS |
150 | return perf_event_open(attr: nullptr, pid: 0, cpu: 0, group_fd: 0, flags: 0) == -1 && errno != ENOSYS; |
151 | } |
152 | |
153 | /* Event list structure |
154 | The following table provides the list of supported events |
155 | |
156 | Event type Event counter Unit Name and aliases |
157 | HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles |
158 | HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles |
159 | HARDWARE INSTRUCTIONS Instructions instructions |
160 | HARDWARE CACHE_REFERENCES CacheReferences cache-references |
161 | HARDWARE CACHE_MISSES CacheMisses cache-misses |
162 | HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches |
163 | HARDWARE BRANCH_MISSES BranchMisses branch-misses |
164 | HARDWARE BUS_CYCLES BusCycles bus-cycles |
165 | HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend |
166 | HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend |
167 | SOFTWARE CPU_CLOCK WalltimeMilliseconds cpu-clock |
168 | SOFTWARE TASK_CLOCK WalltimeMilliseconds task-clock |
169 | SOFTWARE PAGE_FAULTS PageFaults page-faults faults |
170 | SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults |
171 | SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults |
172 | SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs |
173 | SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations |
174 | SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults |
175 | SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults |
176 | HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads |
177 | HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores |
178 | HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches |
179 | HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads |
180 | HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches |
181 | HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads |
182 | HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores |
183 | HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches |
184 | HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses |
185 | HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses |
186 | HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses |
187 | HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses |
188 | HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses |
189 | HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses |
190 | HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses |
191 | HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses |
192 | HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts |
193 | HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses |
194 | |
195 | Use the following Perl script to re-generate the list |
196 | === cut perl === |
197 | #!/usr/bin/env perl |
198 | # Load all entries into %map |
199 | while (<STDIN>) { |
200 | m/^\s*(.*)\s*$/; |
201 | @_ = split /\s+/, $1; |
202 | $type = shift @_; |
203 | $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" : |
204 | $type eq "SOFTWARE" ? "PERF_COUNT_SW_" : |
205 | $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_; |
206 | $unit = shift @_; |
207 | |
208 | for $string (@_) { |
209 | die "$string was already seen!" if defined($map{$string}); |
210 | $map{$string} = [-1, $type, $id, $unit]; |
211 | push @strings, $string; |
212 | } |
213 | } |
214 | |
215 | # sort the map and print the string list |
216 | @strings = sort @strings; |
217 | print "static const char eventlist_strings[] = \n"; |
218 | $counter = 0; |
219 | for $entry (@strings) { |
220 | print " \"$entry\\0\"\n"; |
221 | $map{$entry}[0] = $counter; |
222 | $counter += 1 + length $entry; |
223 | } |
224 | |
225 | # print the table |
226 | print " \"\\0\";\n\nstatic const Events eventlist[] = {\n"; |
227 | for $entry (sort @strings) { |
228 | printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n", |
229 | $map{$entry}[0], |
230 | $map{$entry}[1], |
231 | $map{$entry}[2], |
232 | $map{$entry}[3]; |
233 | } |
234 | print " { 0, PERF_TYPE_MAX, 0, QTest::Events }\n};\n"; |
235 | === cut perl === |
236 | */ |
237 | |
238 | struct Events { |
239 | unsigned offset; |
240 | quint32 type; |
241 | quint64 event_id; |
242 | QTest::QBenchmarkMetric metric; |
243 | }; |
244 | |
245 | /* -- BEGIN GENERATED CODE -- */ |
246 | static const char eventlist_strings[] = |
247 | "alignment-faults\0" |
248 | "branch-instructions\0" |
249 | "branch-load-misses\0" |
250 | "branch-loads\0" |
251 | "branch-mispredicts\0" |
252 | "branch-misses\0" |
253 | "branch-predicts\0" |
254 | "branch-read-misses\0" |
255 | "branch-reads\0" |
256 | "branches\0" |
257 | "bus-cycles\0" |
258 | "cache-misses\0" |
259 | "cache-references\0" |
260 | "context-switches\0" |
261 | "cpu-clock\0" |
262 | "cpu-cycles\0" |
263 | "cpu-migrations\0" |
264 | "cs\0" |
265 | "cycles\0" |
266 | "emulation-faults\0" |
267 | "faults\0" |
268 | "idle-cycles-backend\0" |
269 | "idle-cycles-frontend\0" |
270 | "instructions\0" |
271 | "l1d-cache-load-misses\0" |
272 | "l1d-cache-loads\0" |
273 | "l1d-cache-prefetch-misses\0" |
274 | "l1d-cache-prefetches\0" |
275 | "l1d-cache-read-misses\0" |
276 | "l1d-cache-reads\0" |
277 | "l1d-cache-store-misses\0" |
278 | "l1d-cache-stores\0" |
279 | "l1d-cache-write-misses\0" |
280 | "l1d-cache-writes\0" |
281 | "l1d-load-misses\0" |
282 | "l1d-loads\0" |
283 | "l1d-prefetch-misses\0" |
284 | "l1d-prefetches\0" |
285 | "l1d-read-misses\0" |
286 | "l1d-reads\0" |
287 | "l1d-store-misses\0" |
288 | "l1d-stores\0" |
289 | "l1d-write-misses\0" |
290 | "l1d-writes\0" |
291 | "l1i-cache-load-misses\0" |
292 | "l1i-cache-loads\0" |
293 | "l1i-cache-prefetch-misses\0" |
294 | "l1i-cache-prefetches\0" |
295 | "l1i-cache-read-misses\0" |
296 | "l1i-cache-reads\0" |
297 | "l1i-load-misses\0" |
298 | "l1i-loads\0" |
299 | "l1i-prefetch-misses\0" |
300 | "l1i-prefetches\0" |
301 | "l1i-read-misses\0" |
302 | "l1i-reads\0" |
303 | "llc-cache-load-misses\0" |
304 | "llc-cache-loads\0" |
305 | "llc-cache-prefetch-misses\0" |
306 | "llc-cache-prefetches\0" |
307 | "llc-cache-read-misses\0" |
308 | "llc-cache-reads\0" |
309 | "llc-cache-store-misses\0" |
310 | "llc-cache-stores\0" |
311 | "llc-cache-write-misses\0" |
312 | "llc-cache-writes\0" |
313 | "llc-load-misses\0" |
314 | "llc-loads\0" |
315 | "llc-prefetch-misses\0" |
316 | "llc-prefetches\0" |
317 | "llc-read-misses\0" |
318 | "llc-reads\0" |
319 | "llc-store-misses\0" |
320 | "llc-stores\0" |
321 | "llc-write-misses\0" |
322 | "llc-writes\0" |
323 | "major-faults\0" |
324 | "migrations\0" |
325 | "minor-faults\0" |
326 | "page-faults\0" |
327 | "ref-cycles\0" |
328 | "stalled-cycles-backend\0" |
329 | "stalled-cycles-frontend\0" |
330 | "task-clock\0" |
331 | "\0" ; |
332 | |
333 | static const Events eventlist[] = { |
334 | { .offset: 0, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_ALIGNMENT_FAULTS, .metric: QTest::AlignmentFaults }, |
335 | { .offset: 17, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions }, |
336 | { .offset: 37, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
337 | { .offset: 56, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
338 | { .offset: 69, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
339 | { .offset: 88, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_MISSES, .metric: QTest::BranchMisses }, |
340 | { .offset: 102, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
341 | { .offset: 118, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses }, |
342 | { .offset: 137, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions }, |
343 | { .offset: 150, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions }, |
344 | { .offset: 159, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BUS_CYCLES, .metric: QTest::BusCycles }, |
345 | { .offset: 170, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_MISSES, .metric: QTest::CacheMisses }, |
346 | { .offset: 183, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_REFERENCES, .metric: QTest::CacheReferences }, |
347 | { .offset: 200, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches }, |
348 | { .offset: 217, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_CLOCK, .metric: QTest::WalltimeMilliseconds }, |
349 | { .offset: 227, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles }, |
350 | { .offset: 238, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations }, |
351 | { .offset: 253, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches }, |
352 | { .offset: 256, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles }, |
353 | { .offset: 263, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_EMULATION_FAULTS, .metric: QTest::EmulationFaults }, |
354 | { .offset: 280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults }, |
355 | { .offset: 287, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles }, |
356 | { .offset: 307, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles }, |
357 | { .offset: 328, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_INSTRUCTIONS, .metric: QTest::Instructions }, |
358 | { .offset: 341, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
359 | { .offset: 363, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
360 | { .offset: 379, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
361 | { .offset: 405, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches }, |
362 | { .offset: 426, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
363 | { .offset: 448, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
364 | { .offset: 464, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
365 | { .offset: 487, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
366 | { .offset: 504, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
367 | { .offset: 527, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
368 | { .offset: 544, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
369 | { .offset: 560, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
370 | { .offset: 570, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
371 | { .offset: 590, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches }, |
372 | { .offset: 605, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads }, |
373 | { .offset: 621, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads }, |
374 | { .offset: 631, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
375 | { .offset: 648, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
376 | { .offset: 659, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites }, |
377 | { .offset: 676, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites }, |
378 | { .offset: 687, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
379 | { .offset: 709, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
380 | { .offset: 725, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
381 | { .offset: 751, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches }, |
382 | { .offset: 772, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
383 | { .offset: 794, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
384 | { .offset: 810, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
385 | { .offset: 826, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
386 | { .offset: 836, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
387 | { .offset: 856, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches }, |
388 | { .offset: 871, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads }, |
389 | { .offset: 887, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads }, |
390 | { .offset: 897, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
391 | { .offset: 919, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
392 | { .offset: 935, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
393 | { .offset: 961, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches }, |
394 | { .offset: 982, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
395 | { .offset: 1004, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
396 | { .offset: 1020, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
397 | { .offset: 1043, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
398 | { .offset: 1060, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
399 | { .offset: 1083, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
400 | { .offset: 1100, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
401 | { .offset: 1116, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
402 | { .offset: 1126, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches }, |
403 | { .offset: 1146, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches }, |
404 | { .offset: 1161, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads }, |
405 | { .offset: 1177, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads }, |
406 | { .offset: 1187, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
407 | { .offset: 1204, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
408 | { .offset: 1215, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites }, |
409 | { .offset: 1232, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites }, |
410 | { .offset: 1243, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MAJ, .metric: QTest::MajorPageFaults }, |
411 | { .offset: 1256, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations }, |
412 | { .offset: 1267, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MIN, .metric: QTest::MinorPageFaults }, |
413 | { .offset: 1280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults }, |
414 | { .offset: 1292, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_REF_CPU_CYCLES, .metric: QTest::RefCPUCycles }, |
415 | { .offset: 1303, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles }, |
416 | { .offset: 1326, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles }, |
417 | { .offset: 1350, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_TASK_CLOCK, .metric: QTest::WalltimeMilliseconds }, |
418 | { .offset: 0, .type: PERF_TYPE_MAX, .event_id: 0, .metric: QTest::Events } |
419 | }; |
420 | /* -- END GENERATED CODE -- */ |
421 | |
422 | QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricForEvent(quint32 type, quint64 event_id) |
423 | { |
424 | const Events *ptr = eventlist; |
425 | for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
426 | if (ptr->type == type && ptr->event_id == event_id) |
427 | return ptr->metric; |
428 | } |
429 | return QTest::Events; |
430 | } |
431 | |
432 | void QBenchmarkPerfEventsMeasurer::setCounter(const char *name) |
433 | { |
434 | initPerf(); |
435 | const char *colon = strchr(s: name, c: ':'); |
436 | int n = colon ? colon - name : strlen(s: name); |
437 | const Events *ptr = eventlist; |
438 | for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
439 | int c = strncmp(s1: name, s2: eventlist_strings + ptr->offset, n: n); |
440 | if (c == 0) |
441 | break; |
442 | if (c < 0) { |
443 | fprintf(stderr, format: "ERROR: Performance counter type '%s' is unknown\n" , name); |
444 | exit(status: 1); |
445 | } |
446 | } |
447 | |
448 | attr.type = ptr->type; |
449 | attr.config = ptr->event_id; |
450 | |
451 | // now parse the attributes |
452 | if (!colon) |
453 | return; |
454 | while (*++colon) { |
455 | switch (*colon) { |
456 | case 'u': |
457 | attr.exclude_user = true; |
458 | break; |
459 | case 'k': |
460 | attr.exclude_kernel = true; |
461 | break; |
462 | case 'h': |
463 | attr.exclude_hv = true; |
464 | break; |
465 | case 'G': |
466 | attr.exclude_guest = true; |
467 | break; |
468 | case 'H': |
469 | attr.exclude_host = true; |
470 | break; |
471 | default: |
472 | fprintf(stderr, format: "ERROR: Unknown attribute '%c'\n" , *colon); |
473 | exit(status: 1); |
474 | } |
475 | } |
476 | } |
477 | |
478 | void QBenchmarkPerfEventsMeasurer::listCounters() |
479 | { |
480 | if (!isAvailable()) { |
481 | printf(format: "Performance counters are not available on this system\n" ); |
482 | return; |
483 | } |
484 | |
485 | printf(format: "The following performance counters are available:\n" ); |
486 | const Events *ptr = eventlist; |
487 | for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
488 | printf(format: " %-30s [%s]\n" , eventlist_strings + ptr->offset, |
489 | ptr->type == PERF_TYPE_HARDWARE ? "hardware" : |
490 | ptr->type == PERF_TYPE_SOFTWARE ? "software" : |
491 | ptr->type == PERF_TYPE_HW_CACHE ? "cache" : "other" ); |
492 | } |
493 | |
494 | printf(format: "\nAttributes can be specified by adding a colon and the following:\n" |
495 | " u - exclude measuring in the userspace\n" |
496 | " k - exclude measuring in kernel mode\n" |
497 | " h - exclude measuring in the hypervisor\n" |
498 | " G - exclude measuring when running virtualized (guest VM)\n" |
499 | " H - exclude measuring when running non-virtualized (host system)\n" |
500 | "Attributes can be combined, for example: -perfcounter branch-mispredicts:kh\n" ); |
501 | } |
502 | |
503 | QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default; |
504 | |
505 | QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer() |
506 | { |
507 | qt_safe_close(fd); |
508 | } |
509 | |
510 | void QBenchmarkPerfEventsMeasurer::init() |
511 | { |
512 | } |
513 | |
514 | void QBenchmarkPerfEventsMeasurer::start() |
515 | { |
516 | |
517 | initPerf(); |
518 | if (fd == -1) { |
519 | // pid == 0 -> attach to the current process |
520 | // cpu == -1 -> monitor on all CPUs |
521 | // group_fd == -1 -> this is the group leader |
522 | // flags == 0 -> reserved, must be zero |
523 | fd = perf_event_open(attr: &attr, pid: 0, cpu: -1, group_fd: -1, flags: 0); |
524 | if (fd == -1) { |
525 | perror(s: "QBenchmarkPerfEventsMeasurer::start: perf_event_open" ); |
526 | exit(status: 1); |
527 | } else { |
528 | ::fcntl(fd: fd, F_SETFD, FD_CLOEXEC); |
529 | } |
530 | } |
531 | |
532 | // enable the counter |
533 | ::ioctl(fd: fd, PERF_EVENT_IOC_RESET); |
534 | ::ioctl(fd: fd, PERF_EVENT_IOC_ENABLE); |
535 | } |
536 | |
537 | qint64 QBenchmarkPerfEventsMeasurer::checkpoint() |
538 | { |
539 | ::ioctl(fd: fd, PERF_EVENT_IOC_DISABLE); |
540 | qint64 value = readValue(); |
541 | ::ioctl(fd: fd, PERF_EVENT_IOC_ENABLE); |
542 | return value; |
543 | } |
544 | |
545 | qint64 QBenchmarkPerfEventsMeasurer::stop() |
546 | { |
547 | // disable the counter |
548 | ::ioctl(fd: fd, PERF_EVENT_IOC_DISABLE); |
549 | return readValue(); |
550 | } |
551 | |
552 | bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(qint64) |
553 | { |
554 | return true; |
555 | } |
556 | |
557 | int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int) |
558 | { |
559 | return 1; |
560 | } |
561 | |
562 | int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int) |
563 | { |
564 | return 1; |
565 | } |
566 | |
567 | QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType() |
568 | { |
569 | return metricForEvent(type: attr.type, event_id: attr.config); |
570 | } |
571 | |
572 | static quint64 rawReadValue(int fd) |
573 | { |
574 | /* from the kernel docs: |
575 | * struct read_format { |
576 | * { u64 value; |
577 | * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED |
578 | * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING |
579 | * { u64 id; } && PERF_FORMAT_ID |
580 | * } && !PERF_FORMAT_GROUP |
581 | */ |
582 | |
583 | struct read_format { |
584 | quint64 value; |
585 | quint64 time_enabled; |
586 | quint64 time_running; |
587 | } results; |
588 | |
589 | size_t nread = 0; |
590 | while (nread < sizeof results) { |
591 | char *ptr = reinterpret_cast<char *>(&results); |
592 | qint64 r = qt_safe_read(fd, data: ptr + nread, maxlen: sizeof results - nread); |
593 | if (r == -1) { |
594 | perror(s: "QBenchmarkPerfEventsMeasurer::readValue: reading the results" ); |
595 | exit(status: 1); |
596 | } |
597 | nread += quint64(r); |
598 | } |
599 | |
600 | if (results.time_running == results.time_enabled) |
601 | return results.value; |
602 | |
603 | // scale the results, though this shouldn't happen! |
604 | return results.value * (double(results.time_running) / double(results.time_enabled)); |
605 | } |
606 | |
607 | qint64 QBenchmarkPerfEventsMeasurer::readValue() |
608 | { |
609 | quint64 raw = rawReadValue(fd); |
610 | if (metricType() == QTest::WalltimeMilliseconds) { |
611 | // perf returns nanoseconds |
612 | return raw / 1000000; |
613 | } |
614 | return raw; |
615 | } |
616 | |
617 | QT_END_NAMESPACE |
618 | |
619 | #endif |
620 | |