1// Copyright 2021 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef BENCHMARK_PERF_COUNTERS_H
16#define BENCHMARK_PERF_COUNTERS_H
17
18#include <array>
19#include <cstdint>
20#include <cstring>
21#include <memory>
22#include <vector>
23
24#include "benchmark/benchmark.h"
25#include "check.h"
26#include "log.h"
27#include "mutex.h"
28
29#ifndef BENCHMARK_OS_WINDOWS
30#include <unistd.h>
31#endif
32
33#if defined(_MSC_VER)
34#pragma warning(push)
35// C4251: <symbol> needs to have dll-interface to be used by clients of class
36#pragma warning(disable : 4251)
37#endif
38
39namespace benchmark {
40namespace internal {
41
42// Typically, we can only read a small number of counters. There is also a
43// padding preceding counter values, when reading multiple counters with one
44// syscall (which is desirable). PerfCounterValues abstracts these details.
45// The implementation ensures the storage is inlined, and allows 0-based
46// indexing into the counter values.
47// The object is used in conjunction with a PerfCounters object, by passing it
48// to Snapshot(). The Read() method relocates individual reads, discarding
49// the initial padding from each group leader in the values buffer such that
50// all user accesses through the [] operator are correct.
51class BENCHMARK_EXPORT PerfCounterValues {
52 public:
53 explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
54 BM_CHECK_LE(nr_counters_, kMaxCounters);
55 }
56
57 // We are reading correctly now so the values don't need to skip padding
58 uint64_t operator[](size_t pos) const { return values_[pos]; }
59
60 // Increased the maximum to 32 only since the buffer
61 // is std::array<> backed
62 static constexpr size_t kMaxCounters = 32;
63
64 private:
65 friend class PerfCounters;
66 // Get the byte buffer in which perf counters can be captured.
67 // This is used by PerfCounters::Read
68 std::pair<char*, size_t> get_data_buffer() {
69 return {reinterpret_cast<char*>(values_.data()),
70 sizeof(uint64_t) * (kPadding + nr_counters_)};
71 }
72
73 // This reading is complex and as the goal of this class is to
74 // abstract away the intrincacies of the reading process, this is
75 // a better place for it
76 size_t Read(const std::vector<int>& leaders);
77
78 // Move the padding to 2 due to the reading algorithm (1st padding plus a
79 // current read padding)
80 static constexpr size_t kPadding = 2;
81 std::array<uint64_t, kPadding + kMaxCounters> values_;
82 const size_t nr_counters_;
83};
84
85// Collect PMU counters. The object, once constructed, is ready to be used by
86// calling read(). PMU counter collection is enabled from the time create() is
87// called, to obtain the object, until the object's destructor is called.
88class BENCHMARK_EXPORT PerfCounters final {
89 public:
90 // True iff this platform supports performance counters.
91 static const bool kSupported;
92
93 // Returns an empty object
94 static PerfCounters NoCounters() { return PerfCounters(); }
95
96 ~PerfCounters() { CloseCounters(); }
97 PerfCounters() = default;
98 PerfCounters(PerfCounters&&) = default;
99 PerfCounters(const PerfCounters&) = delete;
100 PerfCounters& operator=(PerfCounters&&) noexcept;
101 PerfCounters& operator=(const PerfCounters&) = delete;
102
103 // Platform-specific implementations may choose to do some library
104 // initialization here.
105 static bool Initialize();
106
107 // Check if the given counter is supported, if the app wants to
108 // check before passing
109 static bool IsCounterSupported(const std::string& name);
110
111 // Return a PerfCounters object ready to read the counters with the names
112 // specified. The values are user-mode only. The counter name format is
113 // implementation and OS specific.
114 // In case of failure, this method will in the worst case return an
115 // empty object whose state will still be valid.
116 static PerfCounters Create(const std::vector<std::string>& counter_names);
117
118 // Take a snapshot of the current value of the counters into the provided
119 // valid PerfCounterValues storage. The values are populated such that:
120 // names()[i]'s value is (*values)[i]
121 BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
122#ifndef BENCHMARK_OS_WINDOWS
123 assert(values != nullptr);
124 return values->Read(leaders: leader_ids_) == counter_ids_.size();
125#else
126 (void)values;
127 return false;
128#endif
129 }
130
131 const std::vector<std::string>& names() const { return counter_names_; }
132 size_t num_counters() const { return counter_names_.size(); }
133
134 private:
135 PerfCounters(const std::vector<std::string>& counter_names,
136 std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
137 : counter_ids_(std::move(counter_ids)),
138 leader_ids_(std::move(leader_ids)),
139 counter_names_(counter_names) {}
140
141 void CloseCounters() const;
142
143 std::vector<int> counter_ids_;
144 std::vector<int> leader_ids_;
145 std::vector<std::string> counter_names_;
146};
147
148// Typical usage of the above primitives.
149class BENCHMARK_EXPORT PerfCountersMeasurement final {
150 public:
151 PerfCountersMeasurement(const std::vector<std::string>& counter_names);
152
153 size_t num_counters() const { return counters_.num_counters(); }
154
155 std::vector<std::string> names() const { return counters_.names(); }
156
157 BENCHMARK_ALWAYS_INLINE bool Start() {
158 if (num_counters() == 0) return true;
159 // Tell the compiler to not move instructions above/below where we take
160 // the snapshot.
161 ClobberMemory();
162 valid_read_ &= counters_.Snapshot(values: &start_values_);
163 ClobberMemory();
164
165 return valid_read_;
166 }
167
168 BENCHMARK_ALWAYS_INLINE bool Stop(
169 std::vector<std::pair<std::string, double>>& measurements) {
170 if (num_counters() == 0) return true;
171 // Tell the compiler to not move instructions above/below where we take
172 // the snapshot.
173 ClobberMemory();
174 valid_read_ &= counters_.Snapshot(values: &end_values_);
175 ClobberMemory();
176
177 for (size_t i = 0; i < counters_.names().size(); ++i) {
178 double measurement = static_cast<double>(end_values_[i]) -
179 static_cast<double>(start_values_[i]);
180 measurements.push_back(x: {counters_.names()[i], measurement});
181 }
182
183 return valid_read_;
184 }
185
186 private:
187 PerfCounters counters_;
188 bool valid_read_ = true;
189 PerfCounterValues start_values_;
190 PerfCounterValues end_values_;
191};
192
193} // namespace internal
194} // namespace benchmark
195
196#if defined(_MSC_VER)
197#pragma warning(pop)
198#endif
199
200#endif // BENCHMARK_PERF_COUNTERS_H
201

source code of third-party/benchmark/src/perf_counters.h