LibcBenchmark.h source code [libc/benchmarks/LibcBenchmark.h]

1	//===-- Benchmark function --------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// This file mainly defines a `Benchmark` function.
10	//
11	// The benchmarking process is as follows:
12	// - We start by measuring the time it takes to run the function
13	// `InitialIterations` times. This is called a Sample. From this we can derive
14	// the time it took to run a single iteration.
15	//
16	// - We repeat the previous step with a greater number of iterations to lower
17	// the impact of the measurement. We can derive a more precise estimation of the
18	// runtime for a single iteration.
19	//
20	// - Each sample gives a more accurate estimation of the runtime for a single
21	// iteration but also takes more time to run. We stop the process when:
22	// The measure stabilize under a certain precision (Epsilon),*
23	// The overall benchmarking time is greater than MaxDuration,*
24	// The overall sample count is greater than MaxSamples,*
25	// The last sample used more than MaxIterations iterations.*
26	//
27	// - We also makes sure that the benchmark doesn't run for a too short period of
28	// time by defining MinDuration and MinSamples.
29
30	#ifndef LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
31	#define LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
32
33	#include "benchmark/benchmark.h"
34	#include "llvm/ADT/ArrayRef.h"
35	#include "llvm/ADT/SmallVector.h"
36	#include <array>
37	#include <chrono>
38	#include <cmath>
39	#include <cstdint>
40	#include <optional>
41
42	namespace llvm {
43	namespace libc_benchmarks {
44
45	using Duration = std::chrono::duration<double>;
46
47	enum class BenchmarkLog {
48	None, // Don't keep the internal state of the benchmark.
49	Last, // Keep only the last batch.
50	Full // Keep all iterations states, useful for testing or debugging.
51	};
52
53	// An object to configure the benchmark stopping conditions.
54	// See documentation at the beginning of the file for the overall algorithm and
55	// meaning of each field.
56	struct BenchmarkOptions {
57	// The minimum time for which the benchmark is running.
58	Duration MinDuration = std::chrono::seconds (`0`);
59	// The maximum time for which the benchmark is running.
60	Duration MaxDuration = std::chrono::seconds (`10`);
61	// The number of iterations in the first sample.
62	uint32_t InitialIterations = `1`;
63	// The maximum number of iterations for any given sample.
64	uint32_t MaxIterations = `10000000`;
65	// The minimum number of samples.
66	uint32_t MinSamples = `4`;
67	// The maximum number of samples.
68	uint32_t MaxSamples = `1000`;
69	// The benchmark will stop if the relative difference between the current and
70	// the last estimation is less than epsilon. This is 1% by default.
71	double Epsilon = `0.01`;
72	// The number of iterations grows exponentially between each sample.
73	// Must be greater or equal to 1.
74	double ScalingFactor = `1.4`;
75	BenchmarkLog Log = BenchmarkLog::None;
76	};
77
78	// The state of a benchmark.
79	enum class BenchmarkStatus {
80	Running,
81	MaxDurationReached,
82	MaxIterationsReached,
83	MaxSamplesReached,
84	PrecisionReached,
85	};
86
87	// The internal state of the benchmark, useful to debug, test or report
88	// statistics.
89	struct BenchmarkState {
90	size_t LastSampleIterations;
91	Duration LastBatchElapsed;
92	BenchmarkStatus CurrentStatus;
93	Duration CurrentBestGuess; // The time estimation for a single run of `foo`.
94	double ChangeRatio; // The change in time estimation between previous and
95	// current samples.
96	};
97
98	// A lightweight result for a benchmark.
99	struct BenchmarkResult {
100	BenchmarkStatus TerminationStatus = BenchmarkStatus::Running;
101	Duration BestGuess = {};
102	std::optional<llvm::SmallVector<BenchmarkState, `16`>> MaybeBenchmarkLog;
103	};
104
105	// Stores information about a cache in the host memory system.
106	struct CacheInfo {
107	std::string Type; // e.g. "Instruction", "Data", "Unified".
108	int Level; // 0 is closest to processing unit.
109	int Size; // In bytes.
110	int NumSharing; // The number of processing units (Hyper-Threading Thread)
111	// with which this cache is shared.
112	};
113
114	// Stores information about the host.
115	struct HostState {
116	std::string CpuName; // returns a string compatible with the -march option.
117	double CpuFrequency; // in Hertz.
118	std::vector<CacheInfo> Caches;
119
120	static HostState get();
121	};
122
123	namespace internal {
124
125	struct Measurement {
126	size_t Iterations = `0`;
127	Duration Elapsed = {};
128	};
129
130	// Updates the estimation of the elapsed time for a single iteration.
131	class RefinableRuntimeEstimation {
132	Duration TotalTime = {};
133	size_t TotalIterations = `0`;
134
135	public:
136	Duration update(const Measurement &M) {
137	assert(M.Iterations > `0`);
138	// Duration is encoded as a double (see definition).
139	// `TotalTime` and `M.Elapsed` are of the same magnitude so we don't expect
140	// loss of precision due to radically different scales.
141	TotalTime += M.Elapsed;
142	TotalIterations += M.Iterations;
143	return TotalTime / TotalIterations;
144	}
145	};
146
147	// This class tracks the progression of the runtime estimation.
148	class RuntimeEstimationProgression {
149	RefinableRuntimeEstimation RRE;
150
151	public:
152	Duration CurrentEstimation = {};
153
154	// Returns the change ratio between our best guess so far and the one from the
155	// new measurement.
156	double computeImprovement(const Measurement &M) {
157	const Duration NewEstimation = RRE.update(M);
158	const double Ratio = fabs(x: ((CurrentEstimation / NewEstimation) - `1.0`));
159	CurrentEstimation = NewEstimation;
160	return Ratio;
161	}
162	};
163
164	} // namespace internal
165
166	// Measures the runtime of `foo` until conditions defined by `Options` are met.
167	//
168	// To avoid measurement's imprecisions we measure batches of `foo`.
169	// The batch size is growing by `ScalingFactor` to minimize the effect of
170	// measuring.
171	//
172	// Note: The benchmark is not responsible for serializing the executions of
173	// `foo`. It is not suitable for measuring, very small & side effect free
174	// functions, as the processor is free to execute several executions in
175	// parallel.
176	//
177	// - Options: A set of parameters controlling the stopping conditions for the
178	// benchmark.
179	// - foo: The function under test. It takes one value and returns one value.
180	// The input value is used to randomize the execution of `foo` as part of a
181	// batch to mitigate the effect of the branch predictor. Signature:
182	// `ProductType foo(ParameterProvider::value_type value);`
183	// The output value is a product of the execution of `foo` and prevents the
184	// compiler from optimizing out foo's body.
185	// - ParameterProvider: An object responsible for providing a range of
186	// `Iterations` values to use as input for `foo`. The `value_type` of the
187	// returned container has to be compatible with `foo` argument.
188	// Must implement one of:
189	// `Container<ParameterType> generateBatch(size_t Iterations);`
190	// `const Container<ParameterType>& generateBatch(size_t Iterations);`
191	// - Clock: An object providing the current time. Must implement:
192	// `std::chrono::time_point now();`
193	template <typename Function, typename ParameterProvider,
194	typename BenchmarkClock = const std::chrono::high_resolution_clock>
195	BenchmarkResult benchmark(const BenchmarkOptions &Options,
196	ParameterProvider &PP, Function foo,
197	BenchmarkClock &Clock = BenchmarkClock()) {
198	BenchmarkResult Result;
199	internal::RuntimeEstimationProgression REP;
200	Duration TotalBenchmarkDuration = {};
201	size_t Iterations = std::max(a: Options.InitialIterations, b: uint32_t(`1`));
202	size_t Samples = `0`;
203	if (Options.ScalingFactor < `1.0`)
204	report_fatal_error("ScalingFactor should be >= 1");
205	if (Options.Log != BenchmarkLog::None)
206	Result.MaybeBenchmarkLog.emplace();
207	for (;;) {
208	// Request a new Batch of size `Iterations`.
209	const auto &Batch = PP.generateBatch(Iterations);
210
211	// Measuring this Batch.
212	const auto StartTime = Clock.now();
213	for (const auto Parameter : Batch) {
214	const auto Production = foo(Parameter);
215	benchmark::DoNotOptimize(Production);
216	}
217	const auto EndTime = Clock.now();
218	const Duration Elapsed = EndTime - StartTime;
219
220	// Updating statistics.
221	++Samples;
222	TotalBenchmarkDuration += Elapsed;
223	const double ChangeRatio = REP.computeImprovement(M: {.Iterations: Iterations, .Elapsed: Elapsed});
224	Result.BestGuess = REP.CurrentEstimation;
225
226	// Stopping condition.
227	if (TotalBenchmarkDuration >= Options.MinDuration &&
228	Samples >= Options.MinSamples && ChangeRatio < Options.Epsilon)
229	Result.TerminationStatus = BenchmarkStatus::PrecisionReached;
230	else if (Samples >= Options.MaxSamples)
231	Result.TerminationStatus = BenchmarkStatus::MaxSamplesReached;
232	else if (TotalBenchmarkDuration >= Options.MaxDuration)
233	Result.TerminationStatus = BenchmarkStatus::MaxDurationReached;
234	else if (Iterations >= Options.MaxIterations)
235	Result.TerminationStatus = BenchmarkStatus::MaxIterationsReached;
236
237	if (Result.MaybeBenchmarkLog) {
238	auto &BenchmarkLog = *Result.MaybeBenchmarkLog;
239	if (Options.Log == BenchmarkLog::Last && !BenchmarkLog.empty())
240	BenchmarkLog.pop_back();
241	BenchmarkState BS;
242	BS.LastSampleIterations = Iterations;
243	BS.LastBatchElapsed = Elapsed;
244	BS.CurrentStatus = Result.TerminationStatus;
245	BS.CurrentBestGuess = Result.BestGuess;
246	BS.ChangeRatio = ChangeRatio;
247	BenchmarkLog.push_back(BS);
248	}
249
250	if (Result.TerminationStatus != BenchmarkStatus::Running)
251	return Result;
252
253	if (Options.ScalingFactor > `1` &&
254	Iterations * Options.ScalingFactor == Iterations)
255	report_fatal_error(
256	"`Iterations *= ScalingFactor` is idempotent, increase ScalingFactor "
257	"or InitialIterations.");
258
259	Iterations *= Options.ScalingFactor;
260	}
261	}
262
263	// Interprets `Array` as a circular buffer of `Size` elements.
264	template <typename T> class CircularArrayRef {
265	llvm::ArrayRef<T> Array;
266	size_t Size;
267
268	public:
269	using value_type = T;
270	using reference = T &;
271	using const_reference = const T &;
272	using difference_type = ssize_t;
273	using size_type = size_t;
274
275	class const_iterator {
276	using iterator_category = std::input_iterator_tag;
277	llvm::ArrayRef<T> Array;
278	size_t Index;
279	size_t Offset;
280
281	public:
282	explicit const_iterator(llvm::ArrayRef<T> Array, size_t Index = `0`)
283	: Array(Array), Index(Index), Offset(Index % Array.size()) {}
284	const_iterator &operator++() {
285	++Index;
286	++Offset;
287	if (Offset == Array.size())
288	Offset = `0`;
289	return *this;
290	}
291	bool operator==(const_iterator Other) const { return Index == Other.Index; }
292	bool operator!=(const_iterator Other) const { return !(*this == Other); }
293	const T &operator() const* { return Array[Offset]; }
294	};
295
296	CircularArrayRef(llvm::ArrayRef<T> Array, size_t Size)
297	: Array(Array), Size(Size) {
298	assert(Array.size() > `0`);
299	}
300
301	const_iterator begin() const { return const_iterator(Array); }
302	const_iterator end() const { return const_iterator(Array, Size); }
303	};
304
305	// A convenient helper to produce a CircularArrayRef from an ArrayRef.
306	template <typename T>
307	CircularArrayRef<T> cycle(llvm::ArrayRef<T> Array, size_t Size) {
308	return {Array, Size};
309	}
310
311	// Creates an std::array which storage size is constrained under `Bytes`.
312	template <typename T, size_t Bytes>
313	using ByteConstrainedArray = std::array<T, Bytes / sizeof(T)>;
314
315	// A convenient helper to produce a CircularArrayRef from a
316	// ByteConstrainedArray.
317	template <typename T, size_t N>
318	CircularArrayRef<T> cycle(const std::array<T, N> &Container, size_t Size) {
319	return {llvm::ArrayRef<T>(Container.cbegin(), Container.cend()), Size};
320	}
321
322	// Makes sure the binary was compiled in release mode and that frequency
323	// governor is set on performance.
324	void checkRequirements();
325
326	} // namespace libc_benchmarks
327	} // namespace llvm
328
329	#endif // LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
330

source code of libc/benchmarks/LibcBenchmark.h