1//===-- Benchmark function --------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// This file mainly defines a `Benchmark` function.
10//
11// The benchmarking process is as follows:
12// - We start by measuring the time it takes to run the function
13// `InitialIterations` times. This is called a Sample. From this we can derive
14// the time it took to run a single iteration.
15//
16// - We repeat the previous step with a greater number of iterations to lower
17// the impact of the measurement. We can derive a more precise estimation of the
18// runtime for a single iteration.
19//
20// - Each sample gives a more accurate estimation of the runtime for a single
21// iteration but also takes more time to run. We stop the process when:
22// * The measure stabilize under a certain precision (Epsilon),
23// * The overall benchmarking time is greater than MaxDuration,
24// * The overall sample count is greater than MaxSamples,
25// * The last sample used more than MaxIterations iterations.
26//
27// - We also makes sure that the benchmark doesn't run for a too short period of
28// time by defining MinDuration and MinSamples.
29
30#ifndef LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
31#define LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
32
33#include "benchmark/benchmark.h"
34#include "llvm/ADT/ArrayRef.h"
35#include "llvm/ADT/SmallVector.h"
36#include <array>
37#include <chrono>
38#include <cmath>
39#include <cstdint>
40#include <optional>
41
42namespace llvm {
43namespace libc_benchmarks {
44
45using Duration = std::chrono::duration<double>;
46
47enum class BenchmarkLog {
48 None, // Don't keep the internal state of the benchmark.
49 Last, // Keep only the last batch.
50 Full // Keep all iterations states, useful for testing or debugging.
51};
52
53// An object to configure the benchmark stopping conditions.
54// See documentation at the beginning of the file for the overall algorithm and
55// meaning of each field.
56struct BenchmarkOptions {
57 // The minimum time for which the benchmark is running.
58 Duration MinDuration = std::chrono::seconds(0);
59 // The maximum time for which the benchmark is running.
60 Duration MaxDuration = std::chrono::seconds(10);
61 // The number of iterations in the first sample.
62 uint32_t InitialIterations = 1;
63 // The maximum number of iterations for any given sample.
64 uint32_t MaxIterations = 10000000;
65 // The minimum number of samples.
66 uint32_t MinSamples = 4;
67 // The maximum number of samples.
68 uint32_t MaxSamples = 1000;
69 // The benchmark will stop if the relative difference between the current and
70 // the last estimation is less than epsilon. This is 1% by default.
71 double Epsilon = 0.01;
72 // The number of iterations grows exponentially between each sample.
73 // Must be greater or equal to 1.
74 double ScalingFactor = 1.4;
75 BenchmarkLog Log = BenchmarkLog::None;
76};
77
78// The state of a benchmark.
79enum class BenchmarkStatus {
80 Running,
81 MaxDurationReached,
82 MaxIterationsReached,
83 MaxSamplesReached,
84 PrecisionReached,
85};
86
87// The internal state of the benchmark, useful to debug, test or report
88// statistics.
89struct BenchmarkState {
90 size_t LastSampleIterations;
91 Duration LastBatchElapsed;
92 BenchmarkStatus CurrentStatus;
93 Duration CurrentBestGuess; // The time estimation for a single run of `foo`.
94 double ChangeRatio; // The change in time estimation between previous and
95 // current samples.
96};
97
98// A lightweight result for a benchmark.
99struct BenchmarkResult {
100 BenchmarkStatus TerminationStatus = BenchmarkStatus::Running;
101 Duration BestGuess = {};
102 std::optional<llvm::SmallVector<BenchmarkState, 16>> MaybeBenchmarkLog;
103};
104
105// Stores information about a cache in the host memory system.
106struct CacheInfo {
107 std::string Type; // e.g. "Instruction", "Data", "Unified".
108 int Level; // 0 is closest to processing unit.
109 int Size; // In bytes.
110 int NumSharing; // The number of processing units (Hyper-Threading Thread)
111 // with which this cache is shared.
112};
113
114// Stores information about the host.
115struct HostState {
116 std::string CpuName; // returns a string compatible with the -march option.
117 double CpuFrequency; // in Hertz.
118 std::vector<CacheInfo> Caches;
119
120 static HostState get();
121};
122
123namespace internal {
124
125struct Measurement {
126 size_t Iterations = 0;
127 Duration Elapsed = {};
128};
129
130// Updates the estimation of the elapsed time for a single iteration.
131class RefinableRuntimeEstimation {
132 Duration TotalTime = {};
133 size_t TotalIterations = 0;
134
135public:
136 Duration update(const Measurement &M) {
137 assert(M.Iterations > 0);
138 // Duration is encoded as a double (see definition).
139 // `TotalTime` and `M.Elapsed` are of the same magnitude so we don't expect
140 // loss of precision due to radically different scales.
141 TotalTime += M.Elapsed;
142 TotalIterations += M.Iterations;
143 return TotalTime / TotalIterations;
144 }
145};
146
147// This class tracks the progression of the runtime estimation.
148class RuntimeEstimationProgression {
149 RefinableRuntimeEstimation RRE;
150
151public:
152 Duration CurrentEstimation = {};
153
154 // Returns the change ratio between our best guess so far and the one from the
155 // new measurement.
156 double computeImprovement(const Measurement &M) {
157 const Duration NewEstimation = RRE.update(M);
158 const double Ratio = fabs(x: ((CurrentEstimation / NewEstimation) - 1.0));
159 CurrentEstimation = NewEstimation;
160 return Ratio;
161 }
162};
163
164} // namespace internal
165
166// Measures the runtime of `foo` until conditions defined by `Options` are met.
167//
168// To avoid measurement's imprecisions we measure batches of `foo`.
169// The batch size is growing by `ScalingFactor` to minimize the effect of
170// measuring.
171//
172// Note: The benchmark is not responsible for serializing the executions of
173// `foo`. It is not suitable for measuring, very small & side effect free
174// functions, as the processor is free to execute several executions in
175// parallel.
176//
177// - Options: A set of parameters controlling the stopping conditions for the
178// benchmark.
179// - foo: The function under test. It takes one value and returns one value.
180// The input value is used to randomize the execution of `foo` as part of a
181// batch to mitigate the effect of the branch predictor. Signature:
182// `ProductType foo(ParameterProvider::value_type value);`
183// The output value is a product of the execution of `foo` and prevents the
184// compiler from optimizing out foo's body.
185// - ParameterProvider: An object responsible for providing a range of
186// `Iterations` values to use as input for `foo`. The `value_type` of the
187// returned container has to be compatible with `foo` argument.
188// Must implement one of:
189// `Container<ParameterType> generateBatch(size_t Iterations);`
190// `const Container<ParameterType>& generateBatch(size_t Iterations);`
191// - Clock: An object providing the current time. Must implement:
192// `std::chrono::time_point now();`
193template <typename Function, typename ParameterProvider,
194 typename BenchmarkClock = const std::chrono::high_resolution_clock>
195BenchmarkResult benchmark(const BenchmarkOptions &Options,
196 ParameterProvider &PP, Function foo,
197 BenchmarkClock &Clock = BenchmarkClock()) {
198 BenchmarkResult Result;
199 internal::RuntimeEstimationProgression REP;
200 Duration TotalBenchmarkDuration = {};
201 size_t Iterations = std::max(a: Options.InitialIterations, b: uint32_t(1));
202 size_t Samples = 0;
203 if (Options.ScalingFactor < 1.0)
204 report_fatal_error("ScalingFactor should be >= 1");
205 if (Options.Log != BenchmarkLog::None)
206 Result.MaybeBenchmarkLog.emplace();
207 for (;;) {
208 // Request a new Batch of size `Iterations`.
209 const auto &Batch = PP.generateBatch(Iterations);
210
211 // Measuring this Batch.
212 const auto StartTime = Clock.now();
213 for (const auto Parameter : Batch) {
214 const auto Production = foo(Parameter);
215 benchmark::DoNotOptimize(Production);
216 }
217 const auto EndTime = Clock.now();
218 const Duration Elapsed = EndTime - StartTime;
219
220 // Updating statistics.
221 ++Samples;
222 TotalBenchmarkDuration += Elapsed;
223 const double ChangeRatio = REP.computeImprovement(M: {.Iterations: Iterations, .Elapsed: Elapsed});
224 Result.BestGuess = REP.CurrentEstimation;
225
226 // Stopping condition.
227 if (TotalBenchmarkDuration >= Options.MinDuration &&
228 Samples >= Options.MinSamples && ChangeRatio < Options.Epsilon)
229 Result.TerminationStatus = BenchmarkStatus::PrecisionReached;
230 else if (Samples >= Options.MaxSamples)
231 Result.TerminationStatus = BenchmarkStatus::MaxSamplesReached;
232 else if (TotalBenchmarkDuration >= Options.MaxDuration)
233 Result.TerminationStatus = BenchmarkStatus::MaxDurationReached;
234 else if (Iterations >= Options.MaxIterations)
235 Result.TerminationStatus = BenchmarkStatus::MaxIterationsReached;
236
237 if (Result.MaybeBenchmarkLog) {
238 auto &BenchmarkLog = *Result.MaybeBenchmarkLog;
239 if (Options.Log == BenchmarkLog::Last && !BenchmarkLog.empty())
240 BenchmarkLog.pop_back();
241 BenchmarkState BS;
242 BS.LastSampleIterations = Iterations;
243 BS.LastBatchElapsed = Elapsed;
244 BS.CurrentStatus = Result.TerminationStatus;
245 BS.CurrentBestGuess = Result.BestGuess;
246 BS.ChangeRatio = ChangeRatio;
247 BenchmarkLog.push_back(BS);
248 }
249
250 if (Result.TerminationStatus != BenchmarkStatus::Running)
251 return Result;
252
253 if (Options.ScalingFactor > 1 &&
254 Iterations * Options.ScalingFactor == Iterations)
255 report_fatal_error(
256 "`Iterations *= ScalingFactor` is idempotent, increase ScalingFactor "
257 "or InitialIterations.");
258
259 Iterations *= Options.ScalingFactor;
260 }
261}
262
263// Interprets `Array` as a circular buffer of `Size` elements.
264template <typename T> class CircularArrayRef {
265 llvm::ArrayRef<T> Array;
266 size_t Size;
267
268public:
269 using value_type = T;
270 using reference = T &;
271 using const_reference = const T &;
272 using difference_type = ssize_t;
273 using size_type = size_t;
274
275 class const_iterator {
276 using iterator_category = std::input_iterator_tag;
277 llvm::ArrayRef<T> Array;
278 size_t Index;
279 size_t Offset;
280
281 public:
282 explicit const_iterator(llvm::ArrayRef<T> Array, size_t Index = 0)
283 : Array(Array), Index(Index), Offset(Index % Array.size()) {}
284 const_iterator &operator++() {
285 ++Index;
286 ++Offset;
287 if (Offset == Array.size())
288 Offset = 0;
289 return *this;
290 }
291 bool operator==(const_iterator Other) const { return Index == Other.Index; }
292 bool operator!=(const_iterator Other) const { return !(*this == Other); }
293 const T &operator*() const { return Array[Offset]; }
294 };
295
296 CircularArrayRef(llvm::ArrayRef<T> Array, size_t Size)
297 : Array(Array), Size(Size) {
298 assert(Array.size() > 0);
299 }
300
301 const_iterator begin() const { return const_iterator(Array); }
302 const_iterator end() const { return const_iterator(Array, Size); }
303};
304
305// A convenient helper to produce a CircularArrayRef from an ArrayRef.
306template <typename T>
307CircularArrayRef<T> cycle(llvm::ArrayRef<T> Array, size_t Size) {
308 return {Array, Size};
309}
310
311// Creates an std::array which storage size is constrained under `Bytes`.
312template <typename T, size_t Bytes>
313using ByteConstrainedArray = std::array<T, Bytes / sizeof(T)>;
314
315// A convenient helper to produce a CircularArrayRef from a
316// ByteConstrainedArray.
317template <typename T, size_t N>
318CircularArrayRef<T> cycle(const std::array<T, N> &Container, size_t Size) {
319 return {llvm::ArrayRef<T>(Container.cbegin(), Container.cend()), Size};
320}
321
322// Makes sure the binary was compiled in release mode and that frequency
323// governor is set on performance.
324void checkRequirements();
325
326} // namespace libc_benchmarks
327} // namespace llvm
328
329#endif // LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
330

source code of libc/benchmarks/LibcBenchmark.h