| 1 | // Copyright 2021 Google Inc. All rights reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #ifndef BENCHMARK_PERF_COUNTERS_H |
| 16 | #define BENCHMARK_PERF_COUNTERS_H |
| 17 | |
| 18 | #include <array> |
| 19 | #include <cstdint> |
| 20 | #include <cstring> |
| 21 | #include <memory> |
| 22 | #include <vector> |
| 23 | |
| 24 | #include "benchmark/benchmark.h" |
| 25 | #include "check.h" |
| 26 | #include "log.h" |
| 27 | #include "mutex.h" |
| 28 | |
| 29 | #ifndef BENCHMARK_OS_WINDOWS |
| 30 | #include <unistd.h> |
| 31 | #endif |
| 32 | |
| 33 | #if defined(_MSC_VER) |
| 34 | #pragma warning(push) |
| 35 | // C4251: <symbol> needs to have dll-interface to be used by clients of class |
| 36 | #pragma warning(disable : 4251) |
| 37 | #endif |
| 38 | |
| 39 | namespace benchmark { |
| 40 | namespace internal { |
| 41 | |
| 42 | // Typically, we can only read a small number of counters. There is also a |
| 43 | // padding preceding counter values, when reading multiple counters with one |
| 44 | // syscall (which is desirable). PerfCounterValues abstracts these details. |
| 45 | // The implementation ensures the storage is inlined, and allows 0-based |
| 46 | // indexing into the counter values. |
| 47 | // The object is used in conjunction with a PerfCounters object, by passing it |
| 48 | // to Snapshot(). The Read() method relocates individual reads, discarding |
| 49 | // the initial padding from each group leader in the values buffer such that |
| 50 | // all user accesses through the [] operator are correct. |
| 51 | class BENCHMARK_EXPORT PerfCounterValues { |
| 52 | public: |
| 53 | explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { |
| 54 | BM_CHECK_LE(nr_counters_, kMaxCounters); |
| 55 | } |
| 56 | |
| 57 | // We are reading correctly now so the values don't need to skip padding |
| 58 | uint64_t operator[](size_t pos) const { return values_[pos]; } |
| 59 | |
| 60 | // Increased the maximum to 32 only since the buffer |
| 61 | // is std::array<> backed |
| 62 | static constexpr size_t kMaxCounters = 32; |
| 63 | |
| 64 | private: |
| 65 | friend class PerfCounters; |
| 66 | // Get the byte buffer in which perf counters can be captured. |
| 67 | // This is used by PerfCounters::Read |
| 68 | std::pair<char*, size_t> get_data_buffer() { |
| 69 | return {reinterpret_cast<char*>(values_.data()), |
| 70 | sizeof(uint64_t) * (kPadding + nr_counters_)}; |
| 71 | } |
| 72 | |
| 73 | // This reading is complex and as the goal of this class is to |
| 74 | // abstract away the intrincacies of the reading process, this is |
| 75 | // a better place for it |
| 76 | size_t Read(const std::vector<int>& leaders); |
| 77 | |
| 78 | // Move the padding to 2 due to the reading algorithm (1st padding plus a |
| 79 | // current read padding) |
| 80 | static constexpr size_t kPadding = 2; |
| 81 | std::array<uint64_t, kPadding + kMaxCounters> values_; |
| 82 | const size_t nr_counters_; |
| 83 | }; |
| 84 | |
| 85 | // Collect PMU counters. The object, once constructed, is ready to be used by |
| 86 | // calling read(). PMU counter collection is enabled from the time create() is |
| 87 | // called, to obtain the object, until the object's destructor is called. |
| 88 | class BENCHMARK_EXPORT PerfCounters final { |
| 89 | public: |
| 90 | // True iff this platform supports performance counters. |
| 91 | static const bool kSupported; |
| 92 | |
| 93 | // Returns an empty object |
| 94 | static PerfCounters NoCounters() { return PerfCounters(); } |
| 95 | |
| 96 | ~PerfCounters() { CloseCounters(); } |
| 97 | PerfCounters() = default; |
| 98 | PerfCounters(PerfCounters&&) = default; |
| 99 | PerfCounters(const PerfCounters&) = delete; |
| 100 | PerfCounters& operator=(PerfCounters&&) noexcept; |
| 101 | PerfCounters& operator=(const PerfCounters&) = delete; |
| 102 | |
| 103 | // Platform-specific implementations may choose to do some library |
| 104 | // initialization here. |
| 105 | static bool Initialize(); |
| 106 | |
| 107 | // Check if the given counter is supported, if the app wants to |
| 108 | // check before passing |
| 109 | static bool IsCounterSupported(const std::string& name); |
| 110 | |
| 111 | // Return a PerfCounters object ready to read the counters with the names |
| 112 | // specified. The values are user-mode only. The counter name format is |
| 113 | // implementation and OS specific. |
| 114 | // In case of failure, this method will in the worst case return an |
| 115 | // empty object whose state will still be valid. |
| 116 | static PerfCounters Create(const std::vector<std::string>& counter_names); |
| 117 | |
| 118 | // Take a snapshot of the current value of the counters into the provided |
| 119 | // valid PerfCounterValues storage. The values are populated such that: |
| 120 | // names()[i]'s value is (*values)[i] |
| 121 | BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { |
| 122 | #ifndef BENCHMARK_OS_WINDOWS |
| 123 | assert(values != nullptr); |
| 124 | return values->Read(leaders: leader_ids_) == counter_ids_.size(); |
| 125 | #else |
| 126 | (void)values; |
| 127 | return false; |
| 128 | #endif |
| 129 | } |
| 130 | |
| 131 | const std::vector<std::string>& names() const { return counter_names_; } |
| 132 | size_t num_counters() const { return counter_names_.size(); } |
| 133 | |
| 134 | private: |
| 135 | PerfCounters(const std::vector<std::string>& counter_names, |
| 136 | std::vector<int>&& counter_ids, std::vector<int>&& leader_ids) |
| 137 | : counter_ids_(std::move(counter_ids)), |
| 138 | leader_ids_(std::move(leader_ids)), |
| 139 | counter_names_(counter_names) {} |
| 140 | |
| 141 | void CloseCounters() const; |
| 142 | |
| 143 | std::vector<int> counter_ids_; |
| 144 | std::vector<int> leader_ids_; |
| 145 | std::vector<std::string> counter_names_; |
| 146 | }; |
| 147 | |
| 148 | // Typical usage of the above primitives. |
| 149 | class BENCHMARK_EXPORT PerfCountersMeasurement final { |
| 150 | public: |
| 151 | PerfCountersMeasurement(const std::vector<std::string>& counter_names); |
| 152 | |
| 153 | size_t num_counters() const { return counters_.num_counters(); } |
| 154 | |
| 155 | std::vector<std::string> names() const { return counters_.names(); } |
| 156 | |
| 157 | BENCHMARK_ALWAYS_INLINE bool Start() { |
| 158 | if (num_counters() == 0) return true; |
| 159 | // Tell the compiler to not move instructions above/below where we take |
| 160 | // the snapshot. |
| 161 | ClobberMemory(); |
| 162 | valid_read_ &= counters_.Snapshot(values: &start_values_); |
| 163 | ClobberMemory(); |
| 164 | |
| 165 | return valid_read_; |
| 166 | } |
| 167 | |
| 168 | BENCHMARK_ALWAYS_INLINE bool Stop( |
| 169 | std::vector<std::pair<std::string, double>>& measurements) { |
| 170 | if (num_counters() == 0) return true; |
| 171 | // Tell the compiler to not move instructions above/below where we take |
| 172 | // the snapshot. |
| 173 | ClobberMemory(); |
| 174 | valid_read_ &= counters_.Snapshot(values: &end_values_); |
| 175 | ClobberMemory(); |
| 176 | |
| 177 | for (size_t i = 0; i < counters_.names().size(); ++i) { |
| 178 | double measurement = static_cast<double>(end_values_[i]) - |
| 179 | static_cast<double>(start_values_[i]); |
| 180 | measurements.push_back(x: {counters_.names()[i], measurement}); |
| 181 | } |
| 182 | |
| 183 | return valid_read_; |
| 184 | } |
| 185 | |
| 186 | private: |
| 187 | PerfCounters counters_; |
| 188 | bool valid_read_ = true; |
| 189 | PerfCounterValues start_values_; |
| 190 | PerfCounterValues end_values_; |
| 191 | }; |
| 192 | |
| 193 | } // namespace internal |
| 194 | } // namespace benchmark |
| 195 | |
| 196 | #if defined(_MSC_VER) |
| 197 | #pragma warning(pop) |
| 198 | #endif |
| 199 | |
| 200 | #endif // BENCHMARK_PERF_COUNTERS_H |
| 201 | |