perf_counters_gtest.cc source code [third-party/benchmark/test/perf_counters_gtest.cc]

1	#include <random>
2	#include <thread>
3
4	#include "../src/perf_counters.h"
5	#include "gmock/gmock.h"
6	#include "gtest/gtest.h"
7
8	#ifndef GTEST_SKIP
9	struct MsgHandler {
10	void operator=(std::ostream&) {}
11	};
12	#define GTEST_SKIP() return MsgHandler() = std::cout
13	#endif
14
15	using benchmark::internal::PerfCounters;
16	using benchmark::internal::PerfCountersMeasurement;
17	using benchmark::internal::PerfCounterValues;
18	using ::testing::AllOf;
19	using ::testing::Gt;
20	using ::testing::Lt;
21
22	namespace {
23	const char kGenericPerfEvent1[] = "CYCLES";
24	const char kGenericPerfEvent2[] = "INSTRUCTIONS";
25
26	TEST(PerfCountersTest, Init) {
27	EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
28	}
29
30	TEST(PerfCountersTest, OneCounter) {
31	if (!PerfCounters::kSupported) {
32	GTEST_SKIP() << "Performance counters not supported.\n";
33	}
34	EXPECT_TRUE(PerfCounters::Initialize());
35	EXPECT_EQ(PerfCounters::Create(counter_names: {kGenericPerfEvent1}).num_counters(), `1`);
36	}
37
38	TEST(PerfCountersTest, NegativeTest) {
39	if (!PerfCounters::kSupported) {
40	EXPECT_FALSE(PerfCounters::Initialize());
41	return;
42	}
43	EXPECT_TRUE(PerfCounters::Initialize());
44	// Safety checks
45	// Create() will always create a valid object, even if passed no or
46	// wrong arguments as the new behavior is to warn and drop unsupported
47	// counters
48	EXPECT_EQ(PerfCounters::Create(counter_names: {}).num_counters(), `0`);
49	EXPECT_EQ(PerfCounters::Create(counter_names: {""}).num_counters(), `0`);
50	EXPECT_EQ(PerfCounters::Create(counter_names: {"not a counter name"}).num_counters(), `0`);
51	{
52	// Try sneaking in a bad egg to see if it is filtered out. The
53	// number of counters has to be two, not zero
54	auto counter =
55	PerfCounters::Create(counter_names: {kGenericPerfEvent2, "", kGenericPerfEvent1});
56	EXPECT_EQ(counter.num_counters(), `2`);
57	EXPECT_EQ(counter.names(), std::vector<std::string>(
58	{kGenericPerfEvent2, kGenericPerfEvent1}));
59	}
60	{
61	// Try sneaking in an outrageous counter, like a fat finger mistake
62	auto counter = PerfCounters::Create(
63	counter_names: {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
64	EXPECT_EQ(counter.num_counters(), `2`);
65	EXPECT_EQ(counter.names(), std::vector<std::string>(
66	{kGenericPerfEvent2, kGenericPerfEvent1}));
67	}
68	{
69	// Finally try a golden input - it should like both of them
70	EXPECT_EQ(PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2})
71	.num_counters(),
72	`2`);
73	}
74	{
75	// Add a bad apple in the end of the chain to check the edges
76	auto counter = PerfCounters::Create(
77	counter_names: {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
78	EXPECT_EQ(counter.num_counters(), `2`);
79	EXPECT_EQ(counter.names(), std::vector<std::string>(
80	{kGenericPerfEvent1, kGenericPerfEvent2}));
81	}
82	}
83
84	TEST(PerfCountersTest, Read1Counter) {
85	if (!PerfCounters::kSupported) {
86	GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
87	}
88	EXPECT_TRUE(PerfCounters::Initialize());
89	auto counters = PerfCounters::Create(counter_names: {kGenericPerfEvent1});
90	EXPECT_EQ(counters.num_counters(), `1`);
91	PerfCounterValues values1(`1`);
92	EXPECT_TRUE(counters.Snapshot(values: &values1));
93	EXPECT_GT(values1 [`0`], `0`);
94	PerfCounterValues values2(`1`);
95	EXPECT_TRUE(counters.Snapshot(values: &values2));
96	EXPECT_GT(values2 [`0`], `0`);
97	EXPECT_GT(values2 [`0`], values1 [`0`]);
98	}
99
100	TEST(PerfCountersTest, Read2Counters) {
101	if (!PerfCounters::kSupported) {
102	GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
103	}
104	EXPECT_TRUE(PerfCounters::Initialize());
105	auto counters =
106	PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2});
107	EXPECT_EQ(counters.num_counters(), `2`);
108	PerfCounterValues values1(`2`);
109	EXPECT_TRUE(counters.Snapshot(values: &values1));
110	EXPECT_GT(values1 [`0`], `0`);
111	EXPECT_GT(values1 [`1`], `0`);
112	PerfCounterValues values2(`2`);
113	EXPECT_TRUE(counters.Snapshot(values: &values2));
114	EXPECT_GT(values2 [`0`], `0`);
115	EXPECT_GT(values2 [`1`], `0`);
116	}
117
118	TEST(PerfCountersTest, ReopenExistingCounters) {
119	// This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
120	// However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
121	if (!PerfCounters::kSupported) {
122	GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
123	}
124	EXPECT_TRUE(PerfCounters::Initialize());
125	std::vector<std::string> kMetrics({kGenericPerfEvent1});
126	std::vector<PerfCounters> counters(`2`);
127	for (auto& counter : counters) {
128	counter = PerfCounters::Create(counter_names: kMetrics);
129	}
130	PerfCounterValues values(`1`);
131	EXPECT_TRUE(counters [`0`].Snapshot(values: &values));
132	EXPECT_TRUE(counters [`1`].Snapshot(values: &values));
133	}
134
135	TEST(PerfCountersTest, CreateExistingMeasurements) {
136	// The test works (i.e. causes read to fail) for the assumptions
137	// about hardware capabilities (i.e. small number (2) hardware
138	// counters) at this date,
139	// the same as previous test ReopenExistingCounters.
140	if (!PerfCounters::kSupported) {
141	GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
142	}
143	EXPECT_TRUE(PerfCounters::Initialize());
144
145	// This means we will try 10 counters but we can only guarantee
146	// for sure at this time that only 3 will work. Perhaps in the future
147	// we could use libpfm to query for the hardware limits on this
148	// particular platform.
149	const int kMaxCounters = `10`;
150	const int kMinValidCounters = `2`;
151
152	// Let's use a ubiquitous counter that is guaranteed to work
153	// on all platforms
154	const std::vector<std::string> kMetrics{"cycles"};
155
156	// Cannot create a vector of actual objects because the
157	// copy constructor of PerfCounters is deleted - and so is
158	// implicitly deleted on PerfCountersMeasurement too
159	std::vector<std::unique_ptr<PerfCountersMeasurement>>
160	perf_counter_measurements;
161
162	perf_counter_measurements.reserve(n: kMaxCounters);
163	for (int j = `0`; j < kMaxCounters; ++j) {
164	perf_counter_measurements.emplace_back(
165	args: new PerfCountersMeasurement(kMetrics));
166	}
167
168	std::vector<std::pair<std::string, double>> measurements;
169
170	// Start all counters together to see if they hold
171	size_t max_counters = kMaxCounters;
172	for (size_t i = `0`; i < kMaxCounters; ++i) {
173	auto& counter(*perf_counter_measurements [i]);
174	EXPECT_EQ(counter.num_counters(), `1`);
175	if (!counter.Start()) {
176	max_counters = i;
177	break;
178	};
179	}
180
181	ASSERT_GE(max_counters, kMinValidCounters);
182
183	// Start all together
184	for (size_t i = `0`; i < max_counters; ++i) {
185	auto& counter(*perf_counter_measurements [i]);
186	EXPECT_TRUE(counter.Stop(measurements) \|\| (i >= kMinValidCounters));
187	}
188
189	// Start/stop individually
190	for (size_t i = `0`; i < max_counters; ++i) {
191	auto& counter(*perf_counter_measurements [i]);
192	measurements.clear();
193	counter.Start();
194	EXPECT_TRUE(counter.Stop(measurements) \|\| (i >= kMinValidCounters));
195	}
196	}
197
198	// We try to do some meaningful work here but the compiler
199	// insists in optimizing away our loop so we had to add a
200	// no-optimize macro. In case it fails, we added some entropy
201	// to this pool as well.
202
203	BENCHMARK_DONT_OPTIMIZE size_t do_work() {
204	static std::mt19937 rd{std::random_device {}()};
205	static std::uniform_int_distribution<size_t> mrand(`0`, `10`);
206	const size_t kNumLoops = `1000000`;
207	size_t sum = `0`;
208	for (size_t j = `0`; j < kNumLoops; ++j) {
209	sum += mrand (rd);
210	}
211	benchmark::DoNotOptimize(value&: sum);
212	return sum;
213	}
214
215	void measure(size_t threadcount, PerfCounterValues* before,
216	PerfCounterValues* after) {
217	BM_CHECK_NE(before, nullptr);
218	BM_CHECK_NE(after, nullptr);
219	std::vector<std::thread> threads(threadcount);
220	auto work = [&]() { BM_CHECK(do_work() > `1000`); };
221
222	// We need to first set up the counters, then start the threads, so the
223	// threads would inherit the counters. But later, we need to first destroy
224	// the thread pool (so all the work finishes), then measure the counters. So
225	// the scopes overlap, and we need to explicitly control the scope of the
226	// threadpool.
227	auto counters =
228	PerfCounters::Create(counter_names: {kGenericPerfEvent1, kGenericPerfEvent2});
229	for (auto& t : threads) t = std::thread (work);
230	counters.Snapshot(values: before);
231	for (auto& t : threads) t.join();
232	counters.Snapshot(values: after);
233	}
234
235	TEST(PerfCountersTest, MultiThreaded) {
236	if (!PerfCounters::kSupported) {
237	GTEST_SKIP() << "Test skipped because libpfm is not supported.";
238	}
239	EXPECT_TRUE(PerfCounters::Initialize());
240	PerfCounterValues before(`2`);
241	PerfCounterValues after(`2`);
242
243	// Notice that this test will work even if we taskset it to a single CPU
244	// In this case the threads will run sequentially
245	// Start two threads and measure the number of combined cycles and
246	// instructions
247	measure(threadcount: `2`, before: &before, after: &after);
248	std::vector<double> Elapsed2Threads{
249	static_cast<double>(after [`0`] - before [`0`]),
250	static_cast<double>(after [`1`] - before [`1`])};
251
252	// Start four threads and measure the number of combined cycles and
253	// instructions
254	measure(threadcount: `4`, before: &before, after: &after);
255	std::vector<double> Elapsed4Threads{
256	static_cast<double>(after [`0`] - before [`0`]),
257	static_cast<double>(after [`1`] - before [`1`])};
258
259	// The following expectations fail (at least on a beefy workstation with lots
260	// of cpus) - it seems that in some circumstances the runtime of 4 threads
261	// can even be better than with 2.
262	// So instead of expecting 4 threads to be slower, let's just make sure they
263	// do not differ too much in general (one is not more than 10x than the
264	// other).
265	EXPECT_THAT(Elapsed4Threads [`0`] / Elapsed2Threads [`0`], AllOf(Gt(`0.1`), Lt(`10`)));
266	EXPECT_THAT(Elapsed4Threads [`1`] / Elapsed2Threads [`1`], AllOf(Gt(`0.1`), Lt(`10`)));
267	}
268
269	TEST(PerfCountersTest, HardwareLimits) {
270	// The test works (i.e. causes read to fail) for the assumptions
271	// about hardware capabilities (i.e. small number (3-4) hardware
272	// counters) at this date,
273	// the same as previous test ReopenExistingCounters.
274	if (!PerfCounters::kSupported) {
275	GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
276	}
277	EXPECT_TRUE(PerfCounters::Initialize());
278
279	// Taken from `perf list`, but focusses only on those HW events that actually
280	// were reported when running `sudo perf stat -a sleep 10`, intersected over
281	// several platforms. All HW events listed in the first command not reported
282	// in the second seem to not work. This is sad as we don't really get to test
283	// the grouping here (groups can contain up to 6 members)...
284	std::vector<std::string> counter_names{
285	"cycles", // leader
286	"instructions", //
287	"branch-misses", //
288	};
289
290	// In the off-chance that some of these values are not supported,
291	// we filter them out so the test will complete without failure
292	// albeit it might not actually test the grouping on that platform
293	std::vector<std::string> valid_names;
294	for (const std::string& name : counter_names) {
295	if (PerfCounters::IsCounterSupported(name)) {
296	valid_names.push_back(x: name);
297	}
298	}
299	PerfCountersMeasurement counter(valid_names);
300
301	std::vector<std::pair<std::string, double>> measurements;
302
303	counter.Start();
304	EXPECT_TRUE(counter.Stop(measurements));
305	}
306
307	} // namespace
308

source code of third-party/benchmark/test/perf_counters_gtest.cc