1#include <random>
2#include <thread>
3
4#include "../src/perf_counters.h"
5#include "gmock/gmock.h"
6#include "gtest/gtest.h"
7
8#ifndef GTEST_SKIP
9struct MsgHandler {
10 void operator=(std::ostream&) {}
11};
12#define GTEST_SKIP() return MsgHandler() = std::cout
13#endif
14
15using benchmark::internal::PerfCounters;
16using benchmark::internal::PerfCountersMeasurement;
17using benchmark::internal::PerfCounterValues;
18using ::testing::AllOf;
19using ::testing::Gt;
20using ::testing::Lt;
21
22namespace {
23const char kGenericPerfEvent1[] = "CYCLES";
24const char kGenericPerfEvent2[] = "INSTRUCTIONS";
25
26TEST(PerfCountersTest, Init) {
27 EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
28}
29
30TEST(PerfCountersTest, OneCounter) {
31 if (!PerfCounters::kSupported) {
32 GTEST_SKIP() << "Performance counters not supported.\n";
33 }
34 EXPECT_TRUE(PerfCounters::Initialize());
35 EXPECT_EQ(PerfCounters::Create(counter_names: {kGenericPerfEvent1}).num_counters(), 1);
36}
37
38TEST(PerfCountersTest, NegativeTest) {
39 if (!PerfCounters::kSupported) {
40 EXPECT_FALSE(PerfCounters::Initialize());
41 return;
42 }
43 EXPECT_TRUE(PerfCounters::Initialize());
44 // Safety checks
45 // Create() will always create a valid object, even if passed no or
46 // wrong arguments as the new behavior is to warn and drop unsupported
47 // counters
48 EXPECT_EQ(PerfCounters::Create(counter_names: {}).num_counters(), 0);
49 EXPECT_EQ(PerfCounters::Create(counter_names: {""}).num_counters(), 0);
50 EXPECT_EQ(PerfCounters::Create(counter_names: {"not a counter name"}).num_counters(), 0);
51 {
52 // Try sneaking in a bad egg to see if it is filtered out. The
53 // number of counters has to be two, not zero
54 auto counter =
55 PerfCounters::Create(counter_names: {kGenericPerfEvent2, "", kGenericPerfEvent1});
56 EXPECT_EQ(counter.num_counters(), 2);
57 EXPECT_EQ(counter.names(), std::vector<std::string>(
58 {kGenericPerfEvent2, kGenericPerfEvent1}));
59 }
60 {
61 // Try sneaking in an outrageous counter, like a fat finger mistake
62 auto counter = PerfCounters::Create(
63 counter_names: {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
64 EXPECT_EQ(counter.num_counters(), 2);
65 EXPECT_EQ(counter.names(), std::vector<std::string>(
66 {kGenericPerfEvent2, kGenericPerfEvent1}));
67 }
68 {
69 // Finally try a golden input - it should like both of them
70 EXPECT_EQ(PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2})
71 .num_counters(),
72 2);
73 }
74 {
75 // Add a bad apple in the end of the chain to check the edges
76 auto counter = PerfCounters::Create(
77 counter_names: {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
78 EXPECT_EQ(counter.num_counters(), 2);
79 EXPECT_EQ(counter.names(), std::vector<std::string>(
80 {kGenericPerfEvent1, kGenericPerfEvent2}));
81 }
82}
83
84TEST(PerfCountersTest, Read1Counter) {
85 if (!PerfCounters::kSupported) {
86 GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
87 }
88 EXPECT_TRUE(PerfCounters::Initialize());
89 auto counters = PerfCounters::Create(counter_names: {kGenericPerfEvent1});
90 EXPECT_EQ(counters.num_counters(), 1);
91 PerfCounterValues values1(1);
92 EXPECT_TRUE(counters.Snapshot(values: &values1));
93 EXPECT_GT(values1[0], 0);
94 PerfCounterValues values2(1);
95 EXPECT_TRUE(counters.Snapshot(values: &values2));
96 EXPECT_GT(values2[0], 0);
97 EXPECT_GT(values2[0], values1[0]);
98}
99
100TEST(PerfCountersTest, Read2Counters) {
101 if (!PerfCounters::kSupported) {
102 GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
103 }
104 EXPECT_TRUE(PerfCounters::Initialize());
105 auto counters =
106 PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2});
107 EXPECT_EQ(counters.num_counters(), 2);
108 PerfCounterValues values1(2);
109 EXPECT_TRUE(counters.Snapshot(values: &values1));
110 EXPECT_GT(values1[0], 0);
111 EXPECT_GT(values1[1], 0);
112 PerfCounterValues values2(2);
113 EXPECT_TRUE(counters.Snapshot(values: &values2));
114 EXPECT_GT(values2[0], 0);
115 EXPECT_GT(values2[1], 0);
116}
117
118TEST(PerfCountersTest, ReopenExistingCounters) {
119 // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
120 // However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
121 if (!PerfCounters::kSupported) {
122 GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
123 }
124 EXPECT_TRUE(PerfCounters::Initialize());
125 std::vector<std::string> kMetrics({kGenericPerfEvent1});
126 std::vector<PerfCounters> counters(2);
127 for (auto& counter : counters) {
128 counter = PerfCounters::Create(counter_names: kMetrics);
129 }
130 PerfCounterValues values(1);
131 EXPECT_TRUE(counters[0].Snapshot(values: &values));
132 EXPECT_TRUE(counters[1].Snapshot(values: &values));
133}
134
135TEST(PerfCountersTest, CreateExistingMeasurements) {
136 // The test works (i.e. causes read to fail) for the assumptions
137 // about hardware capabilities (i.e. small number (2) hardware
138 // counters) at this date,
139 // the same as previous test ReopenExistingCounters.
140 if (!PerfCounters::kSupported) {
141 GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
142 }
143 EXPECT_TRUE(PerfCounters::Initialize());
144
145 // This means we will try 10 counters but we can only guarantee
146 // for sure at this time that only 3 will work. Perhaps in the future
147 // we could use libpfm to query for the hardware limits on this
148 // particular platform.
149 const int kMaxCounters = 10;
150 const int kMinValidCounters = 2;
151
152 // Let's use a ubiquitous counter that is guaranteed to work
153 // on all platforms
154 const std::vector<std::string> kMetrics{"cycles"};
155
156 // Cannot create a vector of actual objects because the
157 // copy constructor of PerfCounters is deleted - and so is
158 // implicitly deleted on PerfCountersMeasurement too
159 std::vector<std::unique_ptr<PerfCountersMeasurement>>
160 perf_counter_measurements;
161
162 perf_counter_measurements.reserve(n: kMaxCounters);
163 for (int j = 0; j < kMaxCounters; ++j) {
164 perf_counter_measurements.emplace_back(
165 args: new PerfCountersMeasurement(kMetrics));
166 }
167
168 std::vector<std::pair<std::string, double>> measurements;
169
170 // Start all counters together to see if they hold
171 size_t max_counters = kMaxCounters;
172 for (size_t i = 0; i < kMaxCounters; ++i) {
173 auto& counter(*perf_counter_measurements[i]);
174 EXPECT_EQ(counter.num_counters(), 1);
175 if (!counter.Start()) {
176 max_counters = i;
177 break;
178 };
179 }
180
181 ASSERT_GE(max_counters, kMinValidCounters);
182
183 // Start all together
184 for (size_t i = 0; i < max_counters; ++i) {
185 auto& counter(*perf_counter_measurements[i]);
186 EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
187 }
188
189 // Start/stop individually
190 for (size_t i = 0; i < max_counters; ++i) {
191 auto& counter(*perf_counter_measurements[i]);
192 measurements.clear();
193 counter.Start();
194 EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
195 }
196}
197
198// We try to do some meaningful work here but the compiler
199// insists in optimizing away our loop so we had to add a
200// no-optimize macro. In case it fails, we added some entropy
201// to this pool as well.
202
203BENCHMARK_DONT_OPTIMIZE size_t do_work() {
204 static std::mt19937 rd{std::random_device{}()};
205 static std::uniform_int_distribution<size_t> mrand(0, 10);
206 const size_t kNumLoops = 1000000;
207 size_t sum = 0;
208 for (size_t j = 0; j < kNumLoops; ++j) {
209 sum += mrand(rd);
210 }
211 benchmark::DoNotOptimize(value&: sum);
212 return sum;
213}
214
215void measure(size_t threadcount, PerfCounterValues* before,
216 PerfCounterValues* after) {
217 BM_CHECK_NE(before, nullptr);
218 BM_CHECK_NE(after, nullptr);
219 std::vector<std::thread> threads(threadcount);
220 auto work = [&]() { BM_CHECK(do_work() > 1000); };
221
222 // We need to first set up the counters, then start the threads, so the
223 // threads would inherit the counters. But later, we need to first destroy
224 // the thread pool (so all the work finishes), then measure the counters. So
225 // the scopes overlap, and we need to explicitly control the scope of the
226 // threadpool.
227 auto counters =
228 PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2});
229 for (auto& t : threads) t = std::thread(work);
230 counters.Snapshot(values: before);
231 for (auto& t : threads) t.join();
232 counters.Snapshot(values: after);
233}
234
235TEST(PerfCountersTest, MultiThreaded) {
236 if (!PerfCounters::kSupported) {
237 GTEST_SKIP() << "Test skipped because libpfm is not supported.";
238 }
239 EXPECT_TRUE(PerfCounters::Initialize());
240 PerfCounterValues before(2);
241 PerfCounterValues after(2);
242
243 // Notice that this test will work even if we taskset it to a single CPU
244 // In this case the threads will run sequentially
245 // Start two threads and measure the number of combined cycles and
246 // instructions
247 measure(threadcount: 2, before: &before, after: &after);
248 std::vector<double> Elapsed2Threads{
249 static_cast<double>(after[0] - before[0]),
250 static_cast<double>(after[1] - before[1])};
251
252 // Start four threads and measure the number of combined cycles and
253 // instructions
254 measure(threadcount: 4, before: &before, after: &after);
255 std::vector<double> Elapsed4Threads{
256 static_cast<double>(after[0] - before[0]),
257 static_cast<double>(after[1] - before[1])};
258
259 // The following expectations fail (at least on a beefy workstation with lots
260 // of cpus) - it seems that in some circumstances the runtime of 4 threads
261 // can even be better than with 2.
262 // So instead of expecting 4 threads to be slower, let's just make sure they
263 // do not differ too much in general (one is not more than 10x than the
264 // other).
265 EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
266 EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
267}
268
269TEST(PerfCountersTest, HardwareLimits) {
270 // The test works (i.e. causes read to fail) for the assumptions
271 // about hardware capabilities (i.e. small number (3-4) hardware
272 // counters) at this date,
273 // the same as previous test ReopenExistingCounters.
274 if (!PerfCounters::kSupported) {
275 GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
276 }
277 EXPECT_TRUE(PerfCounters::Initialize());
278
279 // Taken from `perf list`, but focusses only on those HW events that actually
280 // were reported when running `sudo perf stat -a sleep 10`, intersected over
281 // several platforms. All HW events listed in the first command not reported
282 // in the second seem to not work. This is sad as we don't really get to test
283 // the grouping here (groups can contain up to 6 members)...
284 std::vector<std::string> counter_names{
285 "cycles", // leader
286 "instructions", //
287 "branch-misses", //
288 };
289
290 // In the off-chance that some of these values are not supported,
291 // we filter them out so the test will complete without failure
292 // albeit it might not actually test the grouping on that platform
293 std::vector<std::string> valid_names;
294 for (const std::string& name : counter_names) {
295 if (PerfCounters::IsCounterSupported(name)) {
296 valid_names.push_back(x: name);
297 }
298 }
299 PerfCountersMeasurement counter(valid_names);
300
301 std::vector<std::pair<std::string, double>> measurements;
302
303 counter.Start();
304 EXPECT_TRUE(counter.Stop(measurements));
305}
306
307} // namespace
308

source code of third-party/benchmark/test/perf_counters_gtest.cc