1 | #include <random> |
2 | #include <thread> |
3 | |
4 | #include "../src/perf_counters.h" |
5 | #include "gmock/gmock.h" |
6 | #include "gtest/gtest.h" |
7 | |
8 | #ifndef GTEST_SKIP |
9 | struct MsgHandler { |
10 | void operator=(std::ostream&) {} |
11 | }; |
12 | #define GTEST_SKIP() return MsgHandler() = std::cout |
13 | #endif |
14 | |
15 | using benchmark::internal::PerfCounters; |
16 | using benchmark::internal::PerfCountersMeasurement; |
17 | using benchmark::internal::PerfCounterValues; |
18 | using ::testing::AllOf; |
19 | using ::testing::Gt; |
20 | using ::testing::Lt; |
21 | |
22 | namespace { |
23 | const char kGenericPerfEvent1[] = "CYCLES" ; |
24 | const char kGenericPerfEvent2[] = "INSTRUCTIONS" ; |
25 | |
26 | TEST(PerfCountersTest, Init) { |
27 | EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported); |
28 | } |
29 | |
30 | TEST(PerfCountersTest, OneCounter) { |
31 | if (!PerfCounters::kSupported) { |
32 | GTEST_SKIP() << "Performance counters not supported.\n" ; |
33 | } |
34 | EXPECT_TRUE(PerfCounters::Initialize()); |
35 | EXPECT_EQ(PerfCounters::Create(counter_names: {kGenericPerfEvent1}).num_counters(), 1); |
36 | } |
37 | |
38 | TEST(PerfCountersTest, NegativeTest) { |
39 | if (!PerfCounters::kSupported) { |
40 | EXPECT_FALSE(PerfCounters::Initialize()); |
41 | return; |
42 | } |
43 | EXPECT_TRUE(PerfCounters::Initialize()); |
44 | // Safety checks |
45 | // Create() will always create a valid object, even if passed no or |
46 | // wrong arguments as the new behavior is to warn and drop unsupported |
47 | // counters |
48 | EXPECT_EQ(PerfCounters::Create(counter_names: {}).num_counters(), 0); |
49 | EXPECT_EQ(PerfCounters::Create(counter_names: {"" }).num_counters(), 0); |
50 | EXPECT_EQ(PerfCounters::Create(counter_names: {"not a counter name" }).num_counters(), 0); |
51 | { |
52 | // Try sneaking in a bad egg to see if it is filtered out. The |
53 | // number of counters has to be two, not zero |
54 | auto counter = |
55 | PerfCounters::Create(counter_names: {kGenericPerfEvent2, "" , kGenericPerfEvent1}); |
56 | EXPECT_EQ(counter.num_counters(), 2); |
57 | EXPECT_EQ(counter.names(), std::vector<std::string>( |
58 | {kGenericPerfEvent2, kGenericPerfEvent1})); |
59 | } |
60 | { |
61 | // Try sneaking in an outrageous counter, like a fat finger mistake |
62 | auto counter = PerfCounters::Create( |
63 | counter_names: {kGenericPerfEvent2, "not a counter name" , kGenericPerfEvent1}); |
64 | EXPECT_EQ(counter.num_counters(), 2); |
65 | EXPECT_EQ(counter.names(), std::vector<std::string>( |
66 | {kGenericPerfEvent2, kGenericPerfEvent1})); |
67 | } |
68 | { |
69 | // Finally try a golden input - it should like both of them |
70 | EXPECT_EQ(PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2}) |
71 | .num_counters(), |
72 | 2); |
73 | } |
74 | { |
75 | // Add a bad apple in the end of the chain to check the edges |
76 | auto counter = PerfCounters::Create( |
77 | counter_names: {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name" }); |
78 | EXPECT_EQ(counter.num_counters(), 2); |
79 | EXPECT_EQ(counter.names(), std::vector<std::string>( |
80 | {kGenericPerfEvent1, kGenericPerfEvent2})); |
81 | } |
82 | } |
83 | |
84 | TEST(PerfCountersTest, Read1Counter) { |
85 | if (!PerfCounters::kSupported) { |
86 | GTEST_SKIP() << "Test skipped because libpfm is not supported.\n" ; |
87 | } |
88 | EXPECT_TRUE(PerfCounters::Initialize()); |
89 | auto counters = PerfCounters::Create(counter_names: {kGenericPerfEvent1}); |
90 | EXPECT_EQ(counters.num_counters(), 1); |
91 | PerfCounterValues values1(1); |
92 | EXPECT_TRUE(counters.Snapshot(values: &values1)); |
93 | EXPECT_GT(values1[0], 0); |
94 | PerfCounterValues values2(1); |
95 | EXPECT_TRUE(counters.Snapshot(values: &values2)); |
96 | EXPECT_GT(values2[0], 0); |
97 | EXPECT_GT(values2[0], values1[0]); |
98 | } |
99 | |
100 | TEST(PerfCountersTest, Read2Counters) { |
101 | if (!PerfCounters::kSupported) { |
102 | GTEST_SKIP() << "Test skipped because libpfm is not supported.\n" ; |
103 | } |
104 | EXPECT_TRUE(PerfCounters::Initialize()); |
105 | auto counters = |
106 | PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2}); |
107 | EXPECT_EQ(counters.num_counters(), 2); |
108 | PerfCounterValues values1(2); |
109 | EXPECT_TRUE(counters.Snapshot(values: &values1)); |
110 | EXPECT_GT(values1[0], 0); |
111 | EXPECT_GT(values1[1], 0); |
112 | PerfCounterValues values2(2); |
113 | EXPECT_TRUE(counters.Snapshot(values: &values2)); |
114 | EXPECT_GT(values2[0], 0); |
115 | EXPECT_GT(values2[1], 0); |
116 | } |
117 | |
118 | TEST(PerfCountersTest, ReopenExistingCounters) { |
119 | // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6. |
120 | // However we cannot make assumptions beyond 2 HW counters due to Pixel 6. |
121 | if (!PerfCounters::kSupported) { |
122 | GTEST_SKIP() << "Test skipped because libpfm is not supported.\n" ; |
123 | } |
124 | EXPECT_TRUE(PerfCounters::Initialize()); |
125 | std::vector<std::string> kMetrics({kGenericPerfEvent1}); |
126 | std::vector<PerfCounters> counters(2); |
127 | for (auto& counter : counters) { |
128 | counter = PerfCounters::Create(counter_names: kMetrics); |
129 | } |
130 | PerfCounterValues values(1); |
131 | EXPECT_TRUE(counters[0].Snapshot(values: &values)); |
132 | EXPECT_TRUE(counters[1].Snapshot(values: &values)); |
133 | } |
134 | |
135 | TEST(PerfCountersTest, CreateExistingMeasurements) { |
136 | // The test works (i.e. causes read to fail) for the assumptions |
137 | // about hardware capabilities (i.e. small number (2) hardware |
138 | // counters) at this date, |
139 | // the same as previous test ReopenExistingCounters. |
140 | if (!PerfCounters::kSupported) { |
141 | GTEST_SKIP() << "Test skipped because libpfm is not supported.\n" ; |
142 | } |
143 | EXPECT_TRUE(PerfCounters::Initialize()); |
144 | |
145 | // This means we will try 10 counters but we can only guarantee |
146 | // for sure at this time that only 3 will work. Perhaps in the future |
147 | // we could use libpfm to query for the hardware limits on this |
148 | // particular platform. |
149 | const int kMaxCounters = 10; |
150 | const int kMinValidCounters = 2; |
151 | |
152 | // Let's use a ubiquitous counter that is guaranteed to work |
153 | // on all platforms |
154 | const std::vector<std::string> kMetrics{"cycles" }; |
155 | |
156 | // Cannot create a vector of actual objects because the |
157 | // copy constructor of PerfCounters is deleted - and so is |
158 | // implicitly deleted on PerfCountersMeasurement too |
159 | std::vector<std::unique_ptr<PerfCountersMeasurement>> |
160 | perf_counter_measurements; |
161 | |
162 | perf_counter_measurements.reserve(n: kMaxCounters); |
163 | for (int j = 0; j < kMaxCounters; ++j) { |
164 | perf_counter_measurements.emplace_back( |
165 | args: new PerfCountersMeasurement(kMetrics)); |
166 | } |
167 | |
168 | std::vector<std::pair<std::string, double>> measurements; |
169 | |
170 | // Start all counters together to see if they hold |
171 | size_t max_counters = kMaxCounters; |
172 | for (size_t i = 0; i < kMaxCounters; ++i) { |
173 | auto& counter(*perf_counter_measurements[i]); |
174 | EXPECT_EQ(counter.num_counters(), 1); |
175 | if (!counter.Start()) { |
176 | max_counters = i; |
177 | break; |
178 | }; |
179 | } |
180 | |
181 | ASSERT_GE(max_counters, kMinValidCounters); |
182 | |
183 | // Start all together |
184 | for (size_t i = 0; i < max_counters; ++i) { |
185 | auto& counter(*perf_counter_measurements[i]); |
186 | EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters)); |
187 | } |
188 | |
189 | // Start/stop individually |
190 | for (size_t i = 0; i < max_counters; ++i) { |
191 | auto& counter(*perf_counter_measurements[i]); |
192 | measurements.clear(); |
193 | counter.Start(); |
194 | EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters)); |
195 | } |
196 | } |
197 | |
198 | // We try to do some meaningful work here but the compiler |
199 | // insists in optimizing away our loop so we had to add a |
200 | // no-optimize macro. In case it fails, we added some entropy |
201 | // to this pool as well. |
202 | |
203 | BENCHMARK_DONT_OPTIMIZE size_t do_work() { |
204 | static std::mt19937 rd{std::random_device{}()}; |
205 | static std::uniform_int_distribution<size_t> mrand(0, 10); |
206 | const size_t kNumLoops = 1000000; |
207 | size_t sum = 0; |
208 | for (size_t j = 0; j < kNumLoops; ++j) { |
209 | sum += mrand(rd); |
210 | } |
211 | benchmark::DoNotOptimize(value&: sum); |
212 | return sum; |
213 | } |
214 | |
215 | void measure(size_t threadcount, PerfCounterValues* before, |
216 | PerfCounterValues* after) { |
217 | BM_CHECK_NE(before, nullptr); |
218 | BM_CHECK_NE(after, nullptr); |
219 | std::vector<std::thread> threads(threadcount); |
220 | auto work = [&]() { BM_CHECK(do_work() > 1000); }; |
221 | |
222 | // We need to first set up the counters, then start the threads, so the |
223 | // threads would inherit the counters. But later, we need to first destroy |
224 | // the thread pool (so all the work finishes), then measure the counters. So |
225 | // the scopes overlap, and we need to explicitly control the scope of the |
226 | // threadpool. |
227 | auto counters = |
228 | PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2}); |
229 | for (auto& t : threads) t = std::thread(work); |
230 | counters.Snapshot(values: before); |
231 | for (auto& t : threads) t.join(); |
232 | counters.Snapshot(values: after); |
233 | } |
234 | |
235 | TEST(PerfCountersTest, MultiThreaded) { |
236 | if (!PerfCounters::kSupported) { |
237 | GTEST_SKIP() << "Test skipped because libpfm is not supported." ; |
238 | } |
239 | EXPECT_TRUE(PerfCounters::Initialize()); |
240 | PerfCounterValues before(2); |
241 | PerfCounterValues after(2); |
242 | |
243 | // Notice that this test will work even if we taskset it to a single CPU |
244 | // In this case the threads will run sequentially |
245 | // Start two threads and measure the number of combined cycles and |
246 | // instructions |
247 | measure(threadcount: 2, before: &before, after: &after); |
248 | std::vector<double> Elapsed2Threads{ |
249 | static_cast<double>(after[0] - before[0]), |
250 | static_cast<double>(after[1] - before[1])}; |
251 | |
252 | // Start four threads and measure the number of combined cycles and |
253 | // instructions |
254 | measure(threadcount: 4, before: &before, after: &after); |
255 | std::vector<double> Elapsed4Threads{ |
256 | static_cast<double>(after[0] - before[0]), |
257 | static_cast<double>(after[1] - before[1])}; |
258 | |
259 | // The following expectations fail (at least on a beefy workstation with lots |
260 | // of cpus) - it seems that in some circumstances the runtime of 4 threads |
261 | // can even be better than with 2. |
262 | // So instead of expecting 4 threads to be slower, let's just make sure they |
263 | // do not differ too much in general (one is not more than 10x than the |
264 | // other). |
265 | EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10))); |
266 | EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10))); |
267 | } |
268 | |
269 | TEST(PerfCountersTest, HardwareLimits) { |
270 | // The test works (i.e. causes read to fail) for the assumptions |
271 | // about hardware capabilities (i.e. small number (3-4) hardware |
272 | // counters) at this date, |
273 | // the same as previous test ReopenExistingCounters. |
274 | if (!PerfCounters::kSupported) { |
275 | GTEST_SKIP() << "Test skipped because libpfm is not supported.\n" ; |
276 | } |
277 | EXPECT_TRUE(PerfCounters::Initialize()); |
278 | |
279 | // Taken from `perf list`, but focusses only on those HW events that actually |
280 | // were reported when running `sudo perf stat -a sleep 10`, intersected over |
281 | // several platforms. All HW events listed in the first command not reported |
282 | // in the second seem to not work. This is sad as we don't really get to test |
283 | // the grouping here (groups can contain up to 6 members)... |
284 | std::vector<std::string> counter_names{ |
285 | "cycles" , // leader |
286 | "instructions" , // |
287 | "branch-misses" , // |
288 | }; |
289 | |
290 | // In the off-chance that some of these values are not supported, |
291 | // we filter them out so the test will complete without failure |
292 | // albeit it might not actually test the grouping on that platform |
293 | std::vector<std::string> valid_names; |
294 | for (const std::string& name : counter_names) { |
295 | if (PerfCounters::IsCounterSupported(name)) { |
296 | valid_names.push_back(x: name); |
297 | } |
298 | } |
299 | PerfCountersMeasurement counter(valid_names); |
300 | |
301 | std::vector<std::pair<std::string, double>> measurements; |
302 | |
303 | counter.Start(); |
304 | EXPECT_TRUE(counter.Stop(measurements)); |
305 | } |
306 | |
307 | } // namespace |
308 | |