1// Copyright (C) 2016 Intel Corporation.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qbenchmarkperfevents_p.h"
5#include "qbenchmarkmetric.h"
6#include "qbenchmark_p.h"
7
8#ifdef QTESTLIB_USE_PERF_EVENTS
9
10// include the qcore_unix_p.h without core-private
11// we only use inline functions anyway
12#include "../corelib/kernel/qcore_unix_p.h"
13
14#include <sys/types.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <string.h>
18#include <stdio.h>
19
20#include <sys/ioctl.h>
21#include <sys/prctl.h>
22#include <sys/syscall.h>
23
24#include "3rdparty/linux_perf_event_p.h"
25
26// for PERF_TYPE_HW_CACHE, the config is a bitmask
27// lowest 8 bits: cache type
28// bits 8 to 15: cache operation
29// bits 16 to 23: cache result
30#define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
31#define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
32#define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
33#define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
34#define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
35#define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
36#define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
37#define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
38#define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
39#define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
40#define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
41#define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
42#define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
43#define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
44#define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
45#define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
46#define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
47#define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
48
49QT_BEGIN_NAMESPACE
50
51struct PerfEvent
52{
53 quint32 type;
54 quint64 config;
55};
56static perf_event_attr attr;
57Q_GLOBAL_STATIC(QList<PerfEvent>, eventTypes);
58
59static void initPerf()
60{
61 static bool done;
62 if (!done) {
63 memset(s: &attr, c: 0, n: sizeof attr);
64 attr.size = sizeof attr;
65 attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
66 attr.disabled = true; // we'll enable later
67 attr.inherit = true; // let children processes inherit the monitoring
68 attr.pinned = true; // keep it running in the hardware
69 attr.inherit_stat = true; // aggregate all the info from child processes
70 attr.task = true; // trace fork/exits
71
72 done = true;
73 }
74}
75
76static QList<PerfEvent> defaultCounters()
77{
78 return {
79 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
80 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
81 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
82 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
83 };
84}
85
86// This class does not exist in the API so it's qdoc comment marker was removed.
87
88/*
89 \class QBenchmarkPerfEvents
90 \brief The Linux perf events benchmark backend
91
92 This benchmark backend uses the Linux Performance Counters interface,
93 introduced with the Linux kernel v2.6.31. The interface is done by one
94 system call (perf_event_open) which takes an attribute structure and
95 returns a file descriptor.
96
97 More information:
98 \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt>
99 \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c>
100 (note: as of v3.3.1, the documentation is out-of-date with the kernel
101 interface, so reading the source code of existing tools is necessary)
102
103 This benchlib backend monitors the current process as well as child process
104 launched. We do not try to benchmark in kernel or hypervisor mode, as that
105 usually requires elevated privileges.
106 */
107
108static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
109{
110#ifdef SYS_perf_event_open
111 // syscall() returns long, but perf_event_open() is used to get a file descriptor
112 return int(syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags));
113#else
114 Q_UNUSED(attr);
115 Q_UNUSED(pid);
116 Q_UNUSED(cpu);
117 Q_UNUSED(group_fd);
118 Q_UNUSED(flags);
119 errno = ENOSYS;
120 return -1;
121#endif
122}
123
124bool QBenchmarkPerfEventsMeasurer::isAvailable()
125{
126 // this generates an EFAULT because attr == NULL if perf_event_open is available
127 // if the kernel is too old, it generates ENOSYS
128 return perf_event_open(attr: nullptr, pid: 0, cpu: 0, group_fd: 0, flags: 0) == -1 && errno != ENOSYS;
129}
130
131/* Event list structure
132 The following table provides the list of supported events
133
134 Event type Event counter Unit Name and aliases
135 HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles
136 HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles
137 HARDWARE INSTRUCTIONS Instructions instructions
138 HARDWARE CACHE_REFERENCES CacheReferences cache-references
139 HARDWARE CACHE_MISSES CacheMisses cache-misses
140 HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches
141 HARDWARE BRANCH_MISSES BranchMisses branch-misses
142 HARDWARE BUS_CYCLES BusCycles bus-cycles
143 HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend
144 HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend
145 SOFTWARE CPU_CLOCK WalltimeNanoseconds cpu-clock
146 SOFTWARE TASK_CLOCK WalltimeNanoseconds task-clock
147 SOFTWARE PAGE_FAULTS PageFaults page-faults faults
148 SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults
149 SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults
150 SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs
151 SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations
152 SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults
153 SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults
154 HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads
155 HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores
156 HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches
157 HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads
158 HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches
159 HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads
160 HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores
161 HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches
162 HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses
163 HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses
164 HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses
165 HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses
166 HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses
167 HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses
168 HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses
169 HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses
170 HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts
171 HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses
172
173 Use the following Perl script to re-generate the list
174=== cut perl ===
175#!/usr/bin/env perl
176# Load all entries into %map
177while (<STDIN>) {
178 m/^\s*(.*)\s*$/;
179 @_ = split /\s+/, $1;
180 $type = shift @_;
181 $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" :
182 $type eq "SOFTWARE" ? "PERF_COUNT_SW_" :
183 $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_;
184 $unit = shift @_;
185
186 for $string (@_) {
187 die "$string was already seen!" if defined($map{$string});
188 $map{$string} = [-1, $type, $id, $unit];
189 push @strings, $string;
190 }
191}
192
193# sort the map and print the string list
194@strings = sort @strings;
195print "static const char eventlist_strings[] = \n";
196$counter = 0;
197for $entry (@strings) {
198 print " \"$entry\\0\"\n";
199 $map{$entry}[0] = $counter;
200 $counter += 1 + length $entry;
201}
202
203# print the table
204print " \"\\0\";\n\nstatic const Events eventlist[] = {\n";
205for $entry (sort @strings) {
206 printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n",
207 $map{$entry}[0],
208 $map{$entry}[1],
209 $map{$entry}[2],
210 $map{$entry}[3];
211}
212print "};\n";
213=== cut perl ===
214*/
215
216struct Events {
217 unsigned offset;
218 quint32 type;
219 quint64 event_id;
220 QTest::QBenchmarkMetric metric;
221};
222
223/* -- BEGIN GENERATED CODE -- */
224static const char eventlist_strings[] =
225 "alignment-faults\0"
226 "branch-instructions\0"
227 "branch-load-misses\0"
228 "branch-loads\0"
229 "branch-mispredicts\0"
230 "branch-misses\0"
231 "branch-predicts\0"
232 "branch-read-misses\0"
233 "branch-reads\0"
234 "branches\0"
235 "bus-cycles\0"
236 "cache-misses\0"
237 "cache-references\0"
238 "context-switches\0"
239 "cpu-clock\0"
240 "cpu-cycles\0"
241 "cpu-migrations\0"
242 "cs\0"
243 "cycles\0"
244 "emulation-faults\0"
245 "faults\0"
246 "idle-cycles-backend\0"
247 "idle-cycles-frontend\0"
248 "instructions\0"
249 "l1d-cache-load-misses\0"
250 "l1d-cache-loads\0"
251 "l1d-cache-prefetch-misses\0"
252 "l1d-cache-prefetches\0"
253 "l1d-cache-read-misses\0"
254 "l1d-cache-reads\0"
255 "l1d-cache-store-misses\0"
256 "l1d-cache-stores\0"
257 "l1d-cache-write-misses\0"
258 "l1d-cache-writes\0"
259 "l1d-load-misses\0"
260 "l1d-loads\0"
261 "l1d-prefetch-misses\0"
262 "l1d-prefetches\0"
263 "l1d-read-misses\0"
264 "l1d-reads\0"
265 "l1d-store-misses\0"
266 "l1d-stores\0"
267 "l1d-write-misses\0"
268 "l1d-writes\0"
269 "l1i-cache-load-misses\0"
270 "l1i-cache-loads\0"
271 "l1i-cache-prefetch-misses\0"
272 "l1i-cache-prefetches\0"
273 "l1i-cache-read-misses\0"
274 "l1i-cache-reads\0"
275 "l1i-load-misses\0"
276 "l1i-loads\0"
277 "l1i-prefetch-misses\0"
278 "l1i-prefetches\0"
279 "l1i-read-misses\0"
280 "l1i-reads\0"
281 "llc-cache-load-misses\0"
282 "llc-cache-loads\0"
283 "llc-cache-prefetch-misses\0"
284 "llc-cache-prefetches\0"
285 "llc-cache-read-misses\0"
286 "llc-cache-reads\0"
287 "llc-cache-store-misses\0"
288 "llc-cache-stores\0"
289 "llc-cache-write-misses\0"
290 "llc-cache-writes\0"
291 "llc-load-misses\0"
292 "llc-loads\0"
293 "llc-prefetch-misses\0"
294 "llc-prefetches\0"
295 "llc-read-misses\0"
296 "llc-reads\0"
297 "llc-store-misses\0"
298 "llc-stores\0"
299 "llc-write-misses\0"
300 "llc-writes\0"
301 "major-faults\0"
302 "migrations\0"
303 "minor-faults\0"
304 "page-faults\0"
305 "ref-cycles\0"
306 "stalled-cycles-backend\0"
307 "stalled-cycles-frontend\0"
308 "task-clock\0"
309 "\0";
310
311static const Events eventlist[] = {
312 { .offset: 0, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_ALIGNMENT_FAULTS, .metric: QTest::AlignmentFaults },
313 { .offset: 17, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions },
314 { .offset: 37, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses },
315 { .offset: 56, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions },
316 { .offset: 69, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses },
317 { .offset: 88, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_MISSES, .metric: QTest::BranchMisses },
318 { .offset: 102, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions },
319 { .offset: 118, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, .metric: QTest::BranchMisses },
320 { .offset: 137, .type: PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, .metric: QTest::BranchInstructions },
321 { .offset: 150, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BRANCH_INSTRUCTIONS, .metric: QTest::BranchInstructions },
322 { .offset: 159, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_BUS_CYCLES, .metric: QTest::BusCycles },
323 { .offset: 170, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_MISSES, .metric: QTest::CacheMisses },
324 { .offset: 183, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CACHE_REFERENCES, .metric: QTest::CacheReferences },
325 { .offset: 200, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches },
326 { .offset: 217, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_CLOCK, .metric: QTest::WalltimeNanoseconds },
327 { .offset: 227, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles },
328 { .offset: 238, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations },
329 { .offset: 253, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, .metric: QTest::ContextSwitches },
330 { .offset: 256, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_CPU_CYCLES, .metric: QTest::CPUCycles },
331 { .offset: 263, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_EMULATION_FAULTS, .metric: QTest::EmulationFaults },
332 { .offset: 280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults },
333 { .offset: 287, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles },
334 { .offset: 307, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles },
335 { .offset: 328, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_INSTRUCTIONS, .metric: QTest::Instructions },
336 { .offset: 341, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads },
337 { .offset: 363, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads },
338 { .offset: 379, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches },
339 { .offset: 405, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches },
340 { .offset: 426, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads },
341 { .offset: 448, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads },
342 { .offset: 464, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites },
343 { .offset: 487, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites },
344 { .offset: 504, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites },
345 { .offset: 527, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites },
346 { .offset: 544, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads },
347 { .offset: 560, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads },
348 { .offset: 570, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, .metric: QTest::CachePrefetches },
349 { .offset: 590, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, .metric: QTest::CachePrefetches },
350 { .offset: 605, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, .metric: QTest::CacheReads },
351 { .offset: 621, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_READ, .metric: QTest::CacheReads },
352 { .offset: 631, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites },
353 { .offset: 648, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites },
354 { .offset: 659, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, .metric: QTest::CacheWrites },
355 { .offset: 676, .type: PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, .metric: QTest::CacheWrites },
356 { .offset: 687, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads },
357 { .offset: 709, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads },
358 { .offset: 725, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches },
359 { .offset: 751, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches },
360 { .offset: 772, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads },
361 { .offset: 794, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads },
362 { .offset: 810, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads },
363 { .offset: 826, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads },
364 { .offset: 836, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, .metric: QTest::CachePrefetches },
365 { .offset: 856, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, .metric: QTest::CachePrefetches },
366 { .offset: 871, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, .metric: QTest::CacheReads },
367 { .offset: 887, .type: PERF_TYPE_HW_CACHE, CACHE_L1I_READ, .metric: QTest::CacheReads },
368 { .offset: 897, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads },
369 { .offset: 919, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads },
370 { .offset: 935, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches },
371 { .offset: 961, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches },
372 { .offset: 982, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads },
373 { .offset: 1004, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads },
374 { .offset: 1020, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites },
375 { .offset: 1043, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites },
376 { .offset: 1060, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites },
377 { .offset: 1083, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites },
378 { .offset: 1100, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads },
379 { .offset: 1116, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads },
380 { .offset: 1126, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, .metric: QTest::CachePrefetches },
381 { .offset: 1146, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, .metric: QTest::CachePrefetches },
382 { .offset: 1161, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, .metric: QTest::CacheReads },
383 { .offset: 1177, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_READ, .metric: QTest::CacheReads },
384 { .offset: 1187, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites },
385 { .offset: 1204, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites },
386 { .offset: 1215, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, .metric: QTest::CacheWrites },
387 { .offset: 1232, .type: PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, .metric: QTest::CacheWrites },
388 { .offset: 1243, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MAJ, .metric: QTest::MajorPageFaults },
389 { .offset: 1256, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_CPU_MIGRATIONS, .metric: QTest::CPUMigrations },
390 { .offset: 1267, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS_MIN, .metric: QTest::MinorPageFaults },
391 { .offset: 1280, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_PAGE_FAULTS, .metric: QTest::PageFaults },
392 { .offset: 1292, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_REF_CPU_CYCLES, .metric: QTest::RefCPUCycles },
393 { .offset: 1303, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_BACKEND, .metric: QTest::StalledCycles },
394 { .offset: 1326, .type: PERF_TYPE_HARDWARE, .event_id: PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, .metric: QTest::StalledCycles },
395 { .offset: 1350, .type: PERF_TYPE_SOFTWARE, .event_id: PERF_COUNT_SW_TASK_CLOCK, .metric: QTest::WalltimeNanoseconds },
396};
397/* -- END GENERATED CODE -- */
398
399static QTest::QBenchmarkMetric metricForEvent(PerfEvent counter)
400{
401 for (const Events &ev : eventlist) {
402 if (ev.type == counter.type && ev.event_id == counter.config)
403 return ev.metric;
404 }
405 return QTest::Events;
406}
407
408void QBenchmarkPerfEventsMeasurer::setCounter(const char *name)
409{
410 initPerf();
411 eventTypes->clear();
412 std::string_view input = name;
413 if (qsizetype idx = input.find(c: ':'); idx >= 0)
414 input = input.substr(pos: 0, n: idx);
415
416 while (!input.empty()) {
417 std::string_view countername = input;
418 if (qsizetype idx = countername.find(c: ','); idx >= 0)
419 countername = countername.substr(pos: 0, n: idx);
420
421 for (const Events &ev : eventlist) {
422 int c = countername.compare(str: eventlist_strings + ev.offset);
423 if (c > 0)
424 continue;
425 if (c < 0) {
426 fprintf(stderr, format: "ERROR: Performance counter type '%.*s' is unknown\n",
427 int(countername.size()), countername.data());
428 exit(status: 1);
429 }
430 eventTypes->append(t: { .type: ev.type, .config: ev.event_id });
431 break;
432 }
433
434 if (countername.size() == input.size())
435 input = {};
436 else
437 input.remove_prefix(n: countername.size() + 1);
438 }
439
440 // We used to support attributes, but our code was the opposite of what
441 // perf(1) does, plus QBenchlib isn't exactly expected to be used to
442 // profile Linux kernel code or launch guest VMs as part of the workload.
443 // So we keep accepting the colon as a delimiter but ignore it.
444}
445
446void QBenchmarkPerfEventsMeasurer::listCounters()
447{
448 if (!isAvailable()) {
449 printf(format: "Performance counters are not available on this system\n");
450 return;
451 }
452
453 printf(format: "The following performance counters are available:\n");
454 for (const Events &ev : eventlist) {
455 printf(format: " %-30s [%s]\n", eventlist_strings + ev.offset,
456 ev.type == PERF_TYPE_HARDWARE ? "hardware" :
457 ev.type == PERF_TYPE_SOFTWARE ? "software" :
458 ev.type == PERF_TYPE_HW_CACHE ? "cache" : "other");
459 }
460}
461
462QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default;
463
464QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer()
465{
466 for (int fd : std::as_const(t&: fds))
467 qt_safe_close(fd);
468}
469
470void QBenchmarkPerfEventsMeasurer::init()
471{
472}
473
474void QBenchmarkPerfEventsMeasurer::start()
475{
476 initPerf();
477 QList<PerfEvent> &counters = *eventTypes;
478 if (counters.isEmpty())
479 counters = defaultCounters();
480 if (fds.isEmpty()) {
481 pid_t pid = 0; // attach to the current process only
482 int cpu = -1; // on any CPU
483 int group_fd = -1;
484 int flags = PERF_FLAG_FD_CLOEXEC;
485
486 fds.reserve(asize: counters.size());
487 for (PerfEvent counter : std::as_const(t&: counters)) {
488 attr.type = counter.type;
489 attr.config = counter.config;
490 int fd = perf_event_open(attr: &attr, pid, cpu, group_fd, flags);
491 if (fd == -1) {
492 // probably a paranoid kernel (/proc/sys/kernel/perf_event_paranoid)
493 attr.exclude_kernel = true;
494 attr.exclude_hv = true;
495 fd = perf_event_open(attr: &attr, pid, cpu, group_fd, flags);
496 }
497 if (fd == -1) {
498 perror(s: "QBenchmarkPerfEventsMeasurer::start: perf_event_open");
499 exit(status: 1);
500 }
501
502 fds.append(t: fd);
503 }
504 }
505
506 // enable the counters
507 for (int fd : std::as_const(t&: fds))
508 ::ioctl(fd: fd, PERF_EVENT_IOC_RESET);
509 prctl(PR_TASK_PERF_EVENTS_ENABLE);
510}
511
512QList<QBenchmarkMeasurerBase::Measurement> QBenchmarkPerfEventsMeasurer::stop()
513{
514 // disable the counters
515 prctl(PR_TASK_PERF_EVENTS_DISABLE);
516
517 const QList<PerfEvent> &counters = *eventTypes;
518 QList<Measurement> result(counters.size(), {});
519 for (qsizetype i = 0; i < counters.size(); ++i) {
520 result[i] = readValue(idx: i);
521 }
522 return result;
523}
524
525bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(Measurement)
526{
527 return true;
528}
529
530int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int)
531{
532 return 1;
533}
534
535int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int)
536{
537 return 1;
538}
539
540static quint64 rawReadValue(int fd)
541{
542 /* from the kernel docs:
543 * struct read_format {
544 * { u64 value;
545 * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
546 * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
547 * { u64 id; } && PERF_FORMAT_ID
548 * } && !PERF_FORMAT_GROUP
549 */
550
551 struct read_format {
552 quint64 value;
553 quint64 time_enabled;
554 quint64 time_running;
555 } results;
556
557 size_t nread = 0;
558 while (nread < sizeof results) {
559 char *ptr = reinterpret_cast<char *>(&results);
560 qint64 r = qt_safe_read(fd, data: ptr + nread, maxlen: sizeof results - nread);
561 if (r == -1) {
562 perror(s: "QBenchmarkPerfEventsMeasurer::readValue: reading the results");
563 exit(status: 1);
564 }
565 nread += quint64(r);
566 }
567
568 if (results.time_running == results.time_enabled)
569 return results.value;
570
571 // scale the results, though this shouldn't happen!
572 return results.value * (double(results.time_running) / double(results.time_enabled));
573}
574
575QBenchmarkMeasurerBase::Measurement QBenchmarkPerfEventsMeasurer::readValue(qsizetype idx)
576{
577 quint64 raw = rawReadValue(fd: fds.at(i: idx));
578 return { .value: qreal(qint64(raw)), .metric: metricForEvent(counter: eventTypes->at(i: idx)) };
579}
580
581QT_END_NAMESPACE
582
583#endif
584

source code of qtbase/src/testlib/qbenchmarkperfevents.cpp