1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Support for registering benchmarks for functions.
16
17/* Example usage:
18// Define a function that executes the code to be measured a
19// specified number of times:
20static void BM_StringCreation(benchmark::State& state) {
21 for (auto _ : state)
22 std::string empty_string;
23}
24
25// Register the function as a benchmark
26BENCHMARK(BM_StringCreation);
27
28// Define another benchmark
29static void BM_StringCopy(benchmark::State& state) {
30 std::string x = "hello";
31 for (auto _ : state)
32 std::string copy(x);
33}
34BENCHMARK(BM_StringCopy);
35
36// Augment the main() program to invoke benchmarks if specified
37// via the --benchmark_filter command line flag. E.g.,
38// my_unittest --benchmark_filter=all
39// my_unittest --benchmark_filter=BM_StringCreation
40// my_unittest --benchmark_filter=String
41// my_unittest --benchmark_filter='Copy|Creation'
42int main(int argc, char** argv) {
43 benchmark::Initialize(&argc, argv);
44 benchmark::RunSpecifiedBenchmarks();
45 benchmark::Shutdown();
46 return 0;
47}
48
49// Sometimes a family of microbenchmarks can be implemented with
50// just one routine that takes an extra argument to specify which
51// one of the family of benchmarks to run. For example, the following
52// code defines a family of microbenchmarks for measuring the speed
53// of memcpy() calls of different lengths:
54
55static void BM_memcpy(benchmark::State& state) {
56 char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
57 memset(src, 'x', state.range(0));
58 for (auto _ : state)
59 memcpy(dst, src, state.range(0));
60 state.SetBytesProcessed(state.iterations() * state.range(0));
61 delete[] src; delete[] dst;
62}
63BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64
65// The preceding code is quite repetitive, and can be replaced with the
66// following short-hand. The following invocation will pick a few
67// appropriate arguments in the specified range and will generate a
68// microbenchmark for each such argument.
69BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70
71// You might have a microbenchmark that depends on two inputs. For
72// example, the following code defines a family of microbenchmarks for
73// measuring the speed of set insertion.
74static void BM_SetInsert(benchmark::State& state) {
75 set<int> data;
76 for (auto _ : state) {
77 state.PauseTiming();
78 data = ConstructRandomSet(state.range(0));
79 state.ResumeTiming();
80 for (int j = 0; j < state.range(1); ++j)
81 data.insert(RandomNumber());
82 }
83}
84BENCHMARK(BM_SetInsert)
85 ->Args({1<<10, 128})
86 ->Args({2<<10, 128})
87 ->Args({4<<10, 128})
88 ->Args({8<<10, 128})
89 ->Args({1<<10, 512})
90 ->Args({2<<10, 512})
91 ->Args({4<<10, 512})
92 ->Args({8<<10, 512});
93
94// The preceding code is quite repetitive, and can be replaced with
95// the following short-hand. The following macro will pick a few
96// appropriate arguments in the product of the two specified ranges
97// and will generate a microbenchmark for each such pair.
98BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99
100// For more complex patterns of inputs, passing a custom function
101// to Apply allows programmatic specification of an
102// arbitrary set of arguments to run the microbenchmark on.
103// The following example enumerates a dense range on
104// one parameter, and a sparse range on the second.
105static void CustomArguments(benchmark::internal::Benchmark* b) {
106 for (int i = 0; i <= 10; ++i)
107 for (int j = 32; j <= 1024*1024; j *= 8)
108 b->Args({i, j});
109}
110BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111
112// Templated microbenchmarks work the same way:
113// Produce then consume 'size' messages 'iters' times
114// Measures throughput in the absence of multiprogramming.
115template <class Q> int BM_Sequential(benchmark::State& state) {
116 Q q;
117 typename Q::value_type v;
118 for (auto _ : state) {
119 for (int i = state.range(0); i--; )
120 q.push(v);
121 for (int e = state.range(0); e--; )
122 q.Wait(&v);
123 }
124 // actually messages, not bytes:
125 state.SetBytesProcessed(state.iterations() * state.range(0));
126}
127BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128
129Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130benchmark. This option overrides the `benchmark_min_time` flag.
131
132void BM_test(benchmark::State& state) {
133 ... body ...
134}
135BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
136
137In a multithreaded test, it is guaranteed that none of the threads will start
138until all have reached the loop start, and all will have finished before any
139thread exits the loop body. As such, any global setup or teardown you want to
140do can be wrapped in a check against the thread index:
141
142static void BM_MultiThreaded(benchmark::State& state) {
143 if (state.thread_index() == 0) {
144 // Setup code here.
145 }
146 for (auto _ : state) {
147 // Run the test as normal.
148 }
149 if (state.thread_index() == 0) {
150 // Teardown code here.
151 }
152}
153BENCHMARK(BM_MultiThreaded)->Threads(4);
154
155
156If a benchmark runs a few milliseconds it may be hard to visually compare the
157measured times, since the output data is given in nanoseconds per default. In
158order to manually set the time unit, you can specify it manually:
159
160BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
161*/
162
163#ifndef BENCHMARK_BENCHMARK_H_
164#define BENCHMARK_BENCHMARK_H_
165
166// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
167#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
168#define BENCHMARK_HAS_CXX11
169#endif
170
171// This _MSC_VER check should detect VS 2017 v15.3 and newer.
172#if __cplusplus >= 201703L || \
173 (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L)
174#define BENCHMARK_HAS_CXX17
175#endif
176
177#include <stdint.h>
178
179#include <algorithm>
180#include <cassert>
181#include <cstddef>
182#include <iosfwd>
183#include <limits>
184#include <map>
185#include <set>
186#include <string>
187#include <utility>
188#include <vector>
189
190#if defined(BENCHMARK_HAS_CXX11)
191#include <atomic>
192#include <initializer_list>
193#include <type_traits>
194#include <utility>
195#endif
196
197#if defined(_MSC_VER)
198#include <intrin.h> // for _ReadWriteBarrier
199#endif
200
201#ifndef BENCHMARK_HAS_CXX11
202#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
203 TypeName(const TypeName&); \
204 TypeName& operator=(const TypeName&)
205#else
206#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
207 TypeName(const TypeName&) = delete; \
208 TypeName& operator=(const TypeName&) = delete
209#endif
210
211#ifdef BENCHMARK_HAS_CXX17
212#define BENCHMARK_UNUSED [[maybe_unused]]
213#elif defined(__GNUC__) || defined(__clang__)
214#define BENCHMARK_UNUSED __attribute__((unused))
215#else
216#define BENCHMARK_UNUSED
217#endif
218
219#if defined(__GNUC__) || defined(__clang__)
220#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
221#define BENCHMARK_NOEXCEPT noexcept
222#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
223#elif defined(_MSC_VER) && !defined(__clang__)
224#define BENCHMARK_ALWAYS_INLINE __forceinline
225#if _MSC_VER >= 1900
226#define BENCHMARK_NOEXCEPT noexcept
227#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
228#else
229#define BENCHMARK_NOEXCEPT
230#define BENCHMARK_NOEXCEPT_OP(x)
231#endif
232#define __func__ __FUNCTION__
233#else
234#define BENCHMARK_ALWAYS_INLINE
235#define BENCHMARK_NOEXCEPT
236#define BENCHMARK_NOEXCEPT_OP(x)
237#endif
238
239#define BENCHMARK_INTERNAL_TOSTRING2(x) #x
240#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
241
242// clang-format off
243#if defined(__GNUC__) || defined(__clang__)
244#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
245#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
246#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
247 _Pragma("GCC diagnostic push") \
248 _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
249#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
250#else
251#define BENCHMARK_BUILTIN_EXPECT(x, y) x
252#define BENCHMARK_DEPRECATED_MSG(msg)
253#define BENCHMARK_WARNING_MSG(msg) \
254 __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
255 __LINE__) ") : warning note: " msg))
256#define BENCHMARK_DISABLE_DEPRECATED_WARNING
257#define BENCHMARK_RESTORE_DEPRECATED_WARNING
258#endif
259// clang-format on
260
261#if defined(__GNUC__) && !defined(__clang__)
262#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
263#endif
264
265#ifndef __has_builtin
266#define __has_builtin(x) 0
267#endif
268
269#if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
270#define BENCHMARK_UNREACHABLE() __builtin_unreachable()
271#elif defined(_MSC_VER)
272#define BENCHMARK_UNREACHABLE() __assume(false)
273#else
274#define BENCHMARK_UNREACHABLE() ((void)0)
275#endif
276
277#ifdef BENCHMARK_HAS_CXX11
278#define BENCHMARK_OVERRIDE override
279#else
280#define BENCHMARK_OVERRIDE
281#endif
282
283namespace benchmark {
284class BenchmarkReporter;
285
286void Initialize(int* argc, char** argv);
287void Shutdown();
288
289// Report to stdout all arguments in 'argv' as unrecognized except the first.
290// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
291bool ReportUnrecognizedArguments(int argc, char** argv);
292
293// Returns the current value of --benchmark_filter.
294std::string GetBenchmarkFilter();
295
296// Generate a list of benchmarks matching the specified --benchmark_filter flag
297// and if --benchmark_list_tests is specified return after printing the name
298// of each matching benchmark. Otherwise run each matching benchmark and
299// report the results.
300//
301// spec : Specify the benchmarks to run. If users do not specify this arg,
302// then the value of FLAGS_benchmark_filter
303// will be used.
304//
305// The second and third overload use the specified 'display_reporter' and
306// 'file_reporter' respectively. 'file_reporter' will write to the file
307// specified
308// by '--benchmark_output'. If '--benchmark_output' is not given the
309// 'file_reporter' is ignored.
310//
311// RETURNS: The number of matching benchmarks.
312size_t RunSpecifiedBenchmarks();
313size_t RunSpecifiedBenchmarks(std::string spec);
314
315size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
316size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
317 std::string spec);
318
319size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
320 BenchmarkReporter* file_reporter);
321size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
322 BenchmarkReporter* file_reporter,
323 std::string spec);
324
325// If a MemoryManager is registered (via RegisterMemoryManager()),
326// it can be used to collect and report allocation metrics for a run of the
327// benchmark.
328class MemoryManager {
329 public:
330 static const int64_t TombstoneValue;
331
332 struct Result {
333 Result()
334 : num_allocs(0),
335 max_bytes_used(0),
336 total_allocated_bytes(TombstoneValue),
337 net_heap_growth(TombstoneValue) {}
338
339 // The number of allocations made in total between Start and Stop.
340 int64_t num_allocs;
341
342 // The peak memory use between Start and Stop.
343 int64_t max_bytes_used;
344
345 // The total memory allocated, in bytes, between Start and Stop.
346 // Init'ed to TombstoneValue if metric not available.
347 int64_t total_allocated_bytes;
348
349 // The net changes in memory, in bytes, between Start and Stop.
350 // ie., total_allocated_bytes - total_deallocated_bytes.
351 // Init'ed to TombstoneValue if metric not available.
352 int64_t net_heap_growth;
353 };
354
355 virtual ~MemoryManager() {}
356
357 // Implement this to start recording allocation information.
358 virtual void Start() = 0;
359
360 // Implement this to stop recording and fill out the given Result structure.
361 BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead")
362 virtual void Stop(Result* result) = 0;
363
364 // FIXME(vyng): Make this pure virtual once we've migrated current users.
365 BENCHMARK_DISABLE_DEPRECATED_WARNING
366 virtual void Stop(Result& result) { Stop(result: &result); }
367 BENCHMARK_RESTORE_DEPRECATED_WARNING
368};
369
370// Register a MemoryManager instance that will be used to collect and report
371// allocation measurements for benchmark runs.
372void RegisterMemoryManager(MemoryManager* memory_manager);
373
374// Add a key-value pair to output as part of the context stanza in the report.
375void AddCustomContext(const std::string& key, const std::string& value);
376
377namespace internal {
378class Benchmark;
379class BenchmarkImp;
380class BenchmarkFamilies;
381
382void UseCharPointer(char const volatile*);
383
384// Take ownership of the pointer and register the benchmark. Return the
385// registered benchmark.
386Benchmark* RegisterBenchmarkInternal(Benchmark*);
387
388// Ensure that the standard streams are properly initialized in every TU.
389int InitializeStreams();
390BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
391
392} // namespace internal
393
394#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
395 defined(__EMSCRIPTEN__)
396#define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
397#endif
398
399// Force the compiler to flush pending writes to global memory. Acts as an
400// effective read/write barrier
401#ifdef BENCHMARK_HAS_CXX11
402inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
403 std::atomic_signal_fence(m: std::memory_order_acq_rel);
404}
405#endif
406
407// The DoNotOptimize(...) function can be used to prevent a value or
408// expression from being optimized away by the compiler. This function is
409// intended to add little to no overhead.
410// See: https://youtu.be/nXaxk27zwlk?t=2441
411#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
412template <class Tp>
413inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
414 asm volatile("" : : "r,m"(value) : "memory");
415}
416
417template <class Tp>
418inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
419#if defined(__clang__)
420 asm volatile("" : "+r,m"(value) : : "memory");
421#else
422 asm volatile("" : "+m,r"(value) : : "memory");
423#endif
424}
425
426#ifndef BENCHMARK_HAS_CXX11
427inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
428 asm volatile("" : : : "memory");
429}
430#endif
431#elif defined(_MSC_VER)
432template <class Tp>
433inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
434 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
435 _ReadWriteBarrier();
436}
437
438#ifndef BENCHMARK_HAS_CXX11
439inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
440#endif
441#else
442template <class Tp>
443inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
444 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
445}
446// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
447#endif
448
449// This class is used for user-defined counters.
450class Counter {
451 public:
452 enum Flags {
453 kDefaults = 0,
454 // Mark the counter as a rate. It will be presented divided
455 // by the duration of the benchmark.
456 kIsRate = 1 << 0,
457 // Mark the counter as a thread-average quantity. It will be
458 // presented divided by the number of threads.
459 kAvgThreads = 1 << 1,
460 // Mark the counter as a thread-average rate. See above.
461 kAvgThreadsRate = kIsRate | kAvgThreads,
462 // Mark the counter as a constant value, valid/same for *every* iteration.
463 // When reporting, it will be *multiplied* by the iteration count.
464 kIsIterationInvariant = 1 << 2,
465 // Mark the counter as a constant rate.
466 // When reporting, it will be *multiplied* by the iteration count
467 // and then divided by the duration of the benchmark.
468 kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
469 // Mark the counter as a iteration-average quantity.
470 // It will be presented divided by the number of iterations.
471 kAvgIterations = 1 << 3,
472 // Mark the counter as a iteration-average rate. See above.
473 kAvgIterationsRate = kIsRate | kAvgIterations,
474
475 // In the end, invert the result. This is always done last!
476 kInvert = 1 << 31
477 };
478
479 enum OneK {
480 // 1'000 items per 1k
481 kIs1000 = 1000,
482 // 1'024 items per 1k
483 kIs1024 = 1024
484 };
485
486 double value;
487 Flags flags;
488 OneK oneK;
489
490 BENCHMARK_ALWAYS_INLINE
491 Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
492 : value(v), flags(f), oneK(k) {}
493
494 BENCHMARK_ALWAYS_INLINE operator double const &() const { return value; }
495 BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
496};
497
498// A helper for user code to create unforeseen combinations of Flags, without
499// having to do this cast manually each time, or providing this operator.
500Counter::Flags inline operator|(const Counter::Flags& LHS,
501 const Counter::Flags& RHS) {
502 return static_cast<Counter::Flags>(static_cast<int>(LHS) |
503 static_cast<int>(RHS));
504}
505
506// This is the container for the user-defined counters.
507typedef std::map<std::string, Counter> UserCounters;
508
509// TimeUnit is passed to a benchmark in order to specify the order of magnitude
510// for the measured time.
511enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
512
513// BigO is passed to a benchmark in order to specify the asymptotic
514// computational
515// complexity for the benchmark. In case oAuto is selected, complexity will be
516// calculated automatically to the best fit.
517enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
518
519typedef uint64_t IterationCount;
520
521enum StatisticUnit { kTime, kPercentage };
522
523// BigOFunc is passed to a benchmark in order to specify the asymptotic
524// computational complexity for the benchmark.
525typedef double(BigOFunc)(IterationCount);
526
527// StatisticsFunc is passed to a benchmark in order to compute some descriptive
528// statistics over all the measurements of some type
529typedef double(StatisticsFunc)(const std::vector<double>&);
530
531namespace internal {
532struct Statistics {
533 std::string name_;
534 StatisticsFunc* compute_;
535 StatisticUnit unit_;
536
537 Statistics(const std::string& name, StatisticsFunc* compute,
538 StatisticUnit unit = kTime)
539 : name_(name), compute_(compute), unit_(unit) {}
540};
541
542class BenchmarkInstance;
543class ThreadTimer;
544class ThreadManager;
545class PerfCountersMeasurement;
546
547enum AggregationReportMode
548#if defined(BENCHMARK_HAS_CXX11)
549 : unsigned
550#else
551#endif
552{
553 // The mode has not been manually specified
554 ARM_Unspecified = 0,
555 // The mode is user-specified.
556 // This may or may not be set when the following bit-flags are set.
557 ARM_Default = 1U << 0U,
558 // File reporter should only output aggregates.
559 ARM_FileReportAggregatesOnly = 1U << 1U,
560 // Display reporter should only output aggregates
561 ARM_DisplayReportAggregatesOnly = 1U << 2U,
562 // Both reporters should only display aggregates.
563 ARM_ReportAggregatesOnly =
564 ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
565};
566
567} // namespace internal
568
569// State is passed to a running Benchmark and contains state for the
570// benchmark to use.
571class State {
572 public:
573 struct StateIterator;
574 friend struct StateIterator;
575
576 // Returns iterators used to run each iteration of a benchmark using a
577 // C++11 ranged-based for loop. These functions should not be called directly.
578 //
579 // REQUIRES: The benchmark has not started running yet. Neither begin nor end
580 // have been called previously.
581 //
582 // NOTE: KeepRunning may not be used after calling either of these functions.
583 BENCHMARK_ALWAYS_INLINE StateIterator begin();
584 BENCHMARK_ALWAYS_INLINE StateIterator end();
585
586 // Returns true if the benchmark should continue through another iteration.
587 // NOTE: A benchmark may not return from the test until KeepRunning() has
588 // returned false.
589 bool KeepRunning();
590
591 // Returns true iff the benchmark should run n more iterations.
592 // REQUIRES: 'n' > 0.
593 // NOTE: A benchmark must not return from the test until KeepRunningBatch()
594 // has returned false.
595 // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
596 //
597 // Intended usage:
598 // while (state.KeepRunningBatch(1000)) {
599 // // process 1000 elements
600 // }
601 bool KeepRunningBatch(IterationCount n);
602
603 // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
604 // by the current thread.
605 // Stop the benchmark timer. If not called, the timer will be
606 // automatically stopped after the last iteration of the benchmark loop.
607 //
608 // For threaded benchmarks the PauseTiming() function only pauses the timing
609 // for the current thread.
610 //
611 // NOTE: The "real time" measurement is per-thread. If different threads
612 // report different measurements the largest one is reported.
613 //
614 // NOTE: PauseTiming()/ResumeTiming() are relatively
615 // heavyweight, and so their use should generally be avoided
616 // within each benchmark iteration, if possible.
617 void PauseTiming();
618
619 // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
620 // by the current thread.
621 // Start the benchmark timer. The timer is NOT running on entrance to the
622 // benchmark function. It begins running after control flow enters the
623 // benchmark loop.
624 //
625 // NOTE: PauseTiming()/ResumeTiming() are relatively
626 // heavyweight, and so their use should generally be avoided
627 // within each benchmark iteration, if possible.
628 void ResumeTiming();
629
630 // REQUIRES: 'SkipWithError(...)' has not been called previously by the
631 // current thread.
632 // Report the benchmark as resulting in an error with the specified 'msg'.
633 // After this call the user may explicitly 'return' from the benchmark.
634 //
635 // If the ranged-for style of benchmark loop is used, the user must explicitly
636 // break from the loop, otherwise all future iterations will be run.
637 // If the 'KeepRunning()' loop is used the current thread will automatically
638 // exit the loop at the end of the current iteration.
639 //
640 // For threaded benchmarks only the current thread stops executing and future
641 // calls to `KeepRunning()` will block until all threads have completed
642 // the `KeepRunning()` loop. If multiple threads report an error only the
643 // first error message is used.
644 //
645 // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
646 // the current scope immediately. If the function is called from within
647 // the 'KeepRunning()' loop the current iteration will finish. It is the users
648 // responsibility to exit the scope as needed.
649 void SkipWithError(const char* msg);
650
651 // Returns true if an error has been reported with 'SkipWithError(...)'.
652 bool error_occurred() const { return error_occurred_; }
653
654 // REQUIRES: called exactly once per iteration of the benchmarking loop.
655 // Set the manually measured time for this benchmark iteration, which
656 // is used instead of automatically measured time if UseManualTime() was
657 // specified.
658 //
659 // For threaded benchmarks the final value will be set to the largest
660 // reported values.
661 void SetIterationTime(double seconds);
662
663 // Set the number of bytes processed by the current benchmark
664 // execution. This routine is typically called once at the end of a
665 // throughput oriented benchmark.
666 //
667 // REQUIRES: a benchmark has exited its benchmarking loop.
668 BENCHMARK_ALWAYS_INLINE
669 void SetBytesProcessed(int64_t bytes) {
670 counters["bytes_per_second"] =
671 Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
672 }
673
674 BENCHMARK_ALWAYS_INLINE
675 int64_t bytes_processed() const {
676 if (counters.find(x: "bytes_per_second") != counters.end())
677 return static_cast<int64_t>(counters.at(k: "bytes_per_second"));
678 return 0;
679 }
680
681 // If this routine is called with complexity_n > 0 and complexity report is
682 // requested for the
683 // family benchmark, then current benchmark will be part of the computation
684 // and complexity_n will
685 // represent the length of N.
686 BENCHMARK_ALWAYS_INLINE
687 void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
688
689 BENCHMARK_ALWAYS_INLINE
690 int64_t complexity_length_n() const { return complexity_n_; }
691
692 // If this routine is called with items > 0, then an items/s
693 // label is printed on the benchmark report line for the currently
694 // executing benchmark. It is typically called at the end of a processing
695 // benchmark where a processing items/second output is desired.
696 //
697 // REQUIRES: a benchmark has exited its benchmarking loop.
698 BENCHMARK_ALWAYS_INLINE
699 void SetItemsProcessed(int64_t items) {
700 counters["items_per_second"] =
701 Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
702 }
703
704 BENCHMARK_ALWAYS_INLINE
705 int64_t items_processed() const {
706 if (counters.find(x: "items_per_second") != counters.end())
707 return static_cast<int64_t>(counters.at(k: "items_per_second"));
708 return 0;
709 }
710
711 // If this routine is called, the specified label is printed at the
712 // end of the benchmark report line for the currently executing
713 // benchmark. Example:
714 // static void BM_Compress(benchmark::State& state) {
715 // ...
716 // double compress = input_size / output_size;
717 // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
718 // }
719 // Produces output that looks like:
720 // BM_Compress 50 50 14115038 compress:27.3%
721 //
722 // REQUIRES: a benchmark has exited its benchmarking loop.
723 void SetLabel(const char* label);
724
725 void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
726 this->SetLabel(str.c_str());
727 }
728
729 // Range arguments for this run. CHECKs if the argument has been set.
730 BENCHMARK_ALWAYS_INLINE
731 int64_t range(std::size_t pos = 0) const {
732 assert(range_.size() > pos);
733 return range_[pos];
734 }
735
736 BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
737 int64_t range_x() const { return range(pos: 0); }
738
739 BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
740 int64_t range_y() const { return range(pos: 1); }
741
742 // Number of threads concurrently executing the benchmark.
743 BENCHMARK_ALWAYS_INLINE
744 int threads() const { return threads_; }
745
746 // Index of the executing thread. Values from [0, threads).
747 BENCHMARK_ALWAYS_INLINE
748 int thread_index() const { return thread_index_; }
749
750 BENCHMARK_ALWAYS_INLINE
751 IterationCount iterations() const {
752 if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
753 return 0;
754 }
755 return max_iterations - total_iterations_ + batch_leftover_;
756 }
757
758 private:
759 // items we expect on the first cache line (ie 64 bytes of the struct)
760 // When total_iterations_ is 0, KeepRunning() and friends will return false.
761 // May be larger than max_iterations.
762 IterationCount total_iterations_;
763
764 // When using KeepRunningBatch(), batch_leftover_ holds the number of
765 // iterations beyond max_iters that were run. Used to track
766 // completed_iterations_ accurately.
767 IterationCount batch_leftover_;
768
769 public:
770 const IterationCount max_iterations;
771
772 private:
773 bool started_;
774 bool finished_;
775 bool error_occurred_;
776
777 // items we don't need on the first cache line
778 std::vector<int64_t> range_;
779
780 int64_t complexity_n_;
781
782 public:
783 // Container for user-defined counters.
784 UserCounters counters;
785
786 private:
787 State(IterationCount max_iters, const std::vector<int64_t>& ranges,
788 int thread_i, int n_threads, internal::ThreadTimer* timer,
789 internal::ThreadManager* manager,
790 internal::PerfCountersMeasurement* perf_counters_measurement);
791
792 void StartKeepRunning();
793 // Implementation of KeepRunning() and KeepRunningBatch().
794 // is_batch must be true unless n is 1.
795 bool KeepRunningInternal(IterationCount n, bool is_batch);
796 void FinishKeepRunning();
797
798 const int thread_index_;
799 const int threads_;
800
801 internal::ThreadTimer* const timer_;
802 internal::ThreadManager* const manager_;
803 internal::PerfCountersMeasurement* const perf_counters_measurement_;
804
805 friend class internal::BenchmarkInstance;
806};
807
808inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
809 return KeepRunningInternal(n: 1, /*is_batch=*/is_batch: false);
810}
811
812inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
813 return KeepRunningInternal(n, /*is_batch=*/is_batch: true);
814}
815
816inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
817 bool is_batch) {
818 // total_iterations_ is set to 0 by the constructor, and always set to a
819 // nonzero value by StartKepRunning().
820 assert(n > 0);
821 // n must be 1 unless is_batch is true.
822 assert(is_batch || n == 1);
823 if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
824 total_iterations_ -= n;
825 return true;
826 }
827 if (!started_) {
828 StartKeepRunning();
829 if (!error_occurred_ && total_iterations_ >= n) {
830 total_iterations_ -= n;
831 return true;
832 }
833 }
834 // For non-batch runs, total_iterations_ must be 0 by now.
835 if (is_batch && total_iterations_ != 0) {
836 batch_leftover_ = n - total_iterations_;
837 total_iterations_ = 0;
838 return true;
839 }
840 FinishKeepRunning();
841 return false;
842}
843
844struct State::StateIterator {
845 struct BENCHMARK_UNUSED Value {};
846 typedef std::forward_iterator_tag iterator_category;
847 typedef Value value_type;
848 typedef Value reference;
849 typedef Value pointer;
850 typedef std::ptrdiff_t difference_type;
851
852 private:
853 friend class State;
854 BENCHMARK_ALWAYS_INLINE
855 StateIterator() : cached_(0), parent_() {}
856
857 BENCHMARK_ALWAYS_INLINE
858 explicit StateIterator(State* st)
859 : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
860
861 public:
862 BENCHMARK_ALWAYS_INLINE
863 Value operator*() const { return Value(); }
864
865 BENCHMARK_ALWAYS_INLINE
866 StateIterator& operator++() {
867 assert(cached_ > 0);
868 --cached_;
869 return *this;
870 }
871
872 BENCHMARK_ALWAYS_INLINE
873 bool operator!=(StateIterator const&) const {
874 if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
875 parent_->FinishKeepRunning();
876 return false;
877 }
878
879 private:
880 IterationCount cached_;
881 State* const parent_;
882};
883
884inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
885 return StateIterator(this);
886}
887inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
888 StartKeepRunning();
889 return StateIterator();
890}
891
892namespace internal {
893
894typedef void(Function)(State&);
895
896// ------------------------------------------------------
897// Benchmark registration object. The BENCHMARK() macro expands
898// into an internal::Benchmark* object. Various methods can
899// be called on this object to change the properties of the benchmark.
900// Each method returns "this" so that multiple method calls can
901// chained into one expression.
902class Benchmark {
903 public:
904 virtual ~Benchmark();
905
906 // Note: the following methods all return "this" so that multiple
907 // method calls can be chained together in one expression.
908
909 // Specify the name of the benchmark
910 Benchmark* Name(const std::string& name);
911
912 // Run this benchmark once with "x" as the extra argument passed
913 // to the function.
914 // REQUIRES: The function passed to the constructor must accept an arg1.
915 Benchmark* Arg(int64_t x);
916
917 // Run this benchmark with the given time unit for the generated output report
918 Benchmark* Unit(TimeUnit unit);
919
920 // Run this benchmark once for a number of values picked from the
921 // range [start..limit]. (start and limit are always picked.)
922 // REQUIRES: The function passed to the constructor must accept an arg1.
923 Benchmark* Range(int64_t start, int64_t limit);
924
925 // Run this benchmark once for all values in the range [start..limit] with
926 // specific step
927 // REQUIRES: The function passed to the constructor must accept an arg1.
928 Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
929
930 // Run this benchmark once with "args" as the extra arguments passed
931 // to the function.
932 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
933 Benchmark* Args(const std::vector<int64_t>& args);
934
935 // Equivalent to Args({x, y})
936 // NOTE: This is a legacy C++03 interface provided for compatibility only.
937 // New code should use 'Args'.
938 Benchmark* ArgPair(int64_t x, int64_t y) {
939 std::vector<int64_t> args;
940 args.push_back(x: x);
941 args.push_back(x: y);
942 return Args(args);
943 }
944
945 // Run this benchmark once for a number of values picked from the
946 // ranges [start..limit]. (starts and limits are always picked.)
947 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
948 Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
949
950 // Run this benchmark once for each combination of values in the (cartesian)
951 // product of the supplied argument lists.
952 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
953 Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists);
954
955 // Equivalent to ArgNames({name})
956 Benchmark* ArgName(const std::string& name);
957
958 // Set the argument names to display in the benchmark name. If not called,
959 // only argument values will be shown.
960 Benchmark* ArgNames(const std::vector<std::string>& names);
961
962 // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
963 // NOTE: This is a legacy C++03 interface provided for compatibility only.
964 // New code should use 'Ranges'.
965 Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
966 std::vector<std::pair<int64_t, int64_t> > ranges;
967 ranges.push_back(x: std::make_pair(x&: lo1, y&: hi1));
968 ranges.push_back(x: std::make_pair(x&: lo2, y&: hi2));
969 return Ranges(ranges);
970 }
971
972 // Have "setup" and/or "teardown" invoked once for every benchmark run.
973 // If the benchmark is multi-threaded (will run in k threads concurrently),
974 // the setup callback will be invoked exactly once (not k times) before
975 // each run with k threads. Time allowing (e.g. for a short benchmark), there
976 // may be multiple such runs per benchmark, each run with its own
977 // "setup"/"teardown".
978 //
979 // If the benchmark uses different size groups of threads (e.g. via
980 // ThreadRange), the above will be true for each size group.
981 //
982 // The callback will be passed a State object, which includes the number
983 // of threads, thread-index, benchmark arguments, etc.
984 //
985 // The callback must not be NULL or self-deleting.
986 Benchmark* Setup(void (*setup)(const benchmark::State&));
987 Benchmark* Teardown(void (*teardown)(const benchmark::State&));
988
989 // Pass this benchmark object to *func, which can customize
990 // the benchmark by calling various methods like Arg, Args,
991 // Threads, etc.
992 Benchmark* Apply(void (*func)(Benchmark* benchmark));
993
994 // Set the range multiplier for non-dense range. If not called, the range
995 // multiplier kRangeMultiplier will be used.
996 Benchmark* RangeMultiplier(int multiplier);
997
998 // Set the minimum amount of time to use when running this benchmark. This
999 // option overrides the `benchmark_min_time` flag.
1000 // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
1001 Benchmark* MinTime(double t);
1002
1003 // Specify the amount of iterations that should be run by this benchmark.
1004 // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
1005 //
1006 // NOTE: This function should only be used when *exact* iteration control is
1007 // needed and never to control or limit how long a benchmark runs, where
1008 // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
1009 Benchmark* Iterations(IterationCount n);
1010
1011 // Specify the amount of times to repeat this benchmark. This option overrides
1012 // the `benchmark_repetitions` flag.
1013 // REQUIRES: `n > 0`
1014 Benchmark* Repetitions(int n);
1015
1016 // Specify if each repetition of the benchmark should be reported separately
1017 // or if only the final statistics should be reported. If the benchmark
1018 // is not repeated then the single result is always reported.
1019 // Applies to *ALL* reporters (display and file).
1020 Benchmark* ReportAggregatesOnly(bool value = true);
1021
1022 // Same as ReportAggregatesOnly(), but applies to display reporter only.
1023 Benchmark* DisplayAggregatesOnly(bool value = true);
1024
1025 // By default, the CPU time is measured only for the main thread, which may
1026 // be unrepresentative if the benchmark uses threads internally. If called,
1027 // the total CPU time spent by all the threads will be measured instead.
1028 // By default, the only the main thread CPU time will be measured.
1029 Benchmark* MeasureProcessCPUTime();
1030
1031 // If a particular benchmark should use the Wall clock instead of the CPU time
1032 // (be it either the CPU time of the main thread only (default), or the
1033 // total CPU usage of the benchmark), call this method. If called, the elapsed
1034 // (wall) time will be used to control how many iterations are run, and in the
1035 // printing of items/second or MB/seconds values.
1036 // If not called, the CPU time used by the benchmark will be used.
1037 Benchmark* UseRealTime();
1038
1039 // If a benchmark must measure time manually (e.g. if GPU execution time is
1040 // being
1041 // measured), call this method. If called, each benchmark iteration should
1042 // call
1043 // SetIterationTime(seconds) to report the measured time, which will be used
1044 // to control how many iterations are run, and in the printing of items/second
1045 // or MB/second values.
1046 Benchmark* UseManualTime();
1047
1048 // Set the asymptotic computational complexity for the benchmark. If called
1049 // the asymptotic computational complexity will be shown on the output.
1050 Benchmark* Complexity(BigO complexity = benchmark::oAuto);
1051
1052 // Set the asymptotic computational complexity for the benchmark. If called
1053 // the asymptotic computational complexity will be shown on the output.
1054 Benchmark* Complexity(BigOFunc* complexity);
1055
1056 // Add this statistics to be computed over all the values of benchmark run
1057 Benchmark* ComputeStatistics(const std::string& name,
1058 StatisticsFunc* statistics,
1059 StatisticUnit unit = kTime);
1060
1061 // Support for running multiple copies of the same benchmark concurrently
1062 // in multiple threads. This may be useful when measuring the scaling
1063 // of some piece of code.
1064
1065 // Run one instance of this benchmark concurrently in t threads.
1066 Benchmark* Threads(int t);
1067
1068 // Pick a set of values T from [min_threads,max_threads].
1069 // min_threads and max_threads are always included in T. Run this
1070 // benchmark once for each value in T. The benchmark run for a
1071 // particular value t consists of t threads running the benchmark
1072 // function concurrently. For example, consider:
1073 // BENCHMARK(Foo)->ThreadRange(1,16);
1074 // This will run the following benchmarks:
1075 // Foo in 1 thread
1076 // Foo in 2 threads
1077 // Foo in 4 threads
1078 // Foo in 8 threads
1079 // Foo in 16 threads
1080 Benchmark* ThreadRange(int min_threads, int max_threads);
1081
1082 // For each value n in the range, run this benchmark once using n threads.
1083 // min_threads and max_threads are always included in the range.
1084 // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
1085 // a benchmark with 1, 4, 7 and 8 threads.
1086 Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1087
1088 // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1089 Benchmark* ThreadPerCpu();
1090
1091 virtual void Run(State& state) = 0;
1092
1093 protected:
1094 explicit Benchmark(const char* name);
1095 Benchmark(Benchmark const&);
1096 void SetName(const char* name);
1097
1098 int ArgsCnt() const;
1099
1100 private:
1101 friend class BenchmarkFamilies;
1102 friend class BenchmarkInstance;
1103
1104 std::string name_;
1105 AggregationReportMode aggregation_report_mode_;
1106 std::vector<std::string> arg_names_; // Args for all benchmark runs
1107 std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs
1108 TimeUnit time_unit_;
1109 int range_multiplier_;
1110 double min_time_;
1111 IterationCount iterations_;
1112 int repetitions_;
1113 bool measure_process_cpu_time_;
1114 bool use_real_time_;
1115 bool use_manual_time_;
1116 BigO complexity_;
1117 BigOFunc* complexity_lambda_;
1118 std::vector<Statistics> statistics_;
1119 std::vector<int> thread_counts_;
1120
1121 typedef void (*callback_function)(const benchmark::State&);
1122 callback_function setup_;
1123 callback_function teardown_;
1124
1125 Benchmark& operator=(Benchmark const&);
1126};
1127
1128} // namespace internal
1129
1130// Create and register a benchmark with the specified 'name' that invokes
1131// the specified functor 'fn'.
1132//
1133// RETURNS: A pointer to the registered benchmark.
1134internal::Benchmark* RegisterBenchmark(const char* name,
1135 internal::Function* fn);
1136
1137#if defined(BENCHMARK_HAS_CXX11)
1138template <class Lambda>
1139internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
1140#endif
1141
1142// Remove all registered benchmarks. All pointers to previously registered
1143// benchmarks are invalidated.
1144void ClearRegisteredBenchmarks();
1145
1146namespace internal {
1147// The class used to hold all Benchmarks created from static function.
1148// (ie those created using the BENCHMARK(...) macros.
1149class FunctionBenchmark : public Benchmark {
1150 public:
1151 FunctionBenchmark(const char* name, Function* func)
1152 : Benchmark(name), func_(func) {}
1153
1154 virtual void Run(State& st) BENCHMARK_OVERRIDE;
1155
1156 private:
1157 Function* func_;
1158};
1159
1160#ifdef BENCHMARK_HAS_CXX11
1161template <class Lambda>
1162class LambdaBenchmark : public Benchmark {
1163 public:
1164 virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
1165
1166 private:
1167 template <class OLambda>
1168 LambdaBenchmark(const char* name, OLambda&& lam)
1169 : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1170
1171 LambdaBenchmark(LambdaBenchmark const&) = delete;
1172
1173 template <class Lam> // NOLINTNEXTLINE(readability-redundant-declaration)
1174 friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
1175
1176 Lambda lambda_;
1177};
1178#endif
1179
1180} // namespace internal
1181
1182inline internal::Benchmark* RegisterBenchmark(const char* name,
1183 internal::Function* fn) {
1184 return internal::RegisterBenchmarkInternal(
1185 ::new internal::FunctionBenchmark(name, fn));
1186}
1187
1188#ifdef BENCHMARK_HAS_CXX11
1189template <class Lambda>
1190internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
1191 using BenchType =
1192 internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1193 return internal::RegisterBenchmarkInternal(
1194 ::new BenchType(name, std::forward<Lambda>(fn)));
1195}
1196#endif
1197
1198#if defined(BENCHMARK_HAS_CXX11) && \
1199 (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1200template <class Lambda, class... Args>
1201internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1202 Args&&... args) {
1203 return benchmark::RegisterBenchmark(
1204 name, [=](benchmark::State& st) { fn(st, args...); });
1205}
1206#else
1207#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1208#endif
1209
1210// The base class for all fixture tests.
1211class Fixture : public internal::Benchmark {
1212 public:
1213 Fixture() : internal::Benchmark("") {}
1214
1215 virtual void Run(State& st) BENCHMARK_OVERRIDE {
1216 this->SetUp(st);
1217 this->BenchmarkCase(st);
1218 this->TearDown(st);
1219 }
1220
1221 // These will be deprecated ...
1222 virtual void SetUp(const State&) {}
1223 virtual void TearDown(const State&) {}
1224 // ... In favor of these.
1225 virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
1226 virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1227
1228 protected:
1229 virtual void BenchmarkCase(State&) = 0;
1230};
1231
1232} // namespace benchmark
1233
1234// ------------------------------------------------------
1235// Macro to register benchmarks
1236
1237// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1238// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1239// empty. If X is empty the expression becomes (+1 == +0).
1240#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1241#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1242#else
1243#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1244#endif
1245
1246// Helpers for generating unique variable names
1247#ifdef BENCHMARK_HAS_CXX11
1248#define BENCHMARK_PRIVATE_NAME(...) \
1249 BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
1250 __VA_ARGS__)
1251#else
1252#define BENCHMARK_PRIVATE_NAME(n) \
1253 BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1254#endif // BENCHMARK_HAS_CXX11
1255
1256#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1257#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1258// Helper for concatenation with macro name expansion
1259#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1260 BaseClass##_##Method##_Benchmark
1261
1262#define BENCHMARK_PRIVATE_DECLARE(n) \
1263 static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1264 BENCHMARK_UNUSED
1265
1266#ifdef BENCHMARK_HAS_CXX11
1267#define BENCHMARK(...) \
1268 BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1269 (::benchmark::internal::RegisterBenchmarkInternal( \
1270 new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
1271 &__VA_ARGS__)))
1272#else
1273#define BENCHMARK(n) \
1274 BENCHMARK_PRIVATE_DECLARE(n) = \
1275 (::benchmark::internal::RegisterBenchmarkInternal( \
1276 new ::benchmark::internal::FunctionBenchmark(#n, n)))
1277#endif // BENCHMARK_HAS_CXX11
1278
1279// Old-style macros
1280#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1281#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1282#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1283#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1284#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1285 BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1286
1287#ifdef BENCHMARK_HAS_CXX11
1288
1289// Register a benchmark which invokes the function specified by `func`
1290// with the additional arguments specified by `...`.
1291//
1292// For example:
1293//
1294// template <class ...ExtraArgs>`
1295// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1296// [...]
1297//}
1298// /* Registers a benchmark named "BM_takes_args/int_string_test` */
1299// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1300#define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1301 BENCHMARK_PRIVATE_DECLARE(func) = \
1302 (::benchmark::internal::RegisterBenchmarkInternal( \
1303 new ::benchmark::internal::FunctionBenchmark( \
1304 #func "/" #test_case_name, \
1305 [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1306
1307#endif // BENCHMARK_HAS_CXX11
1308
1309// This will register a benchmark for a templatized function. For example:
1310//
1311// template<int arg>
1312// void BM_Foo(int iters);
1313//
1314// BENCHMARK_TEMPLATE(BM_Foo, 1);
1315//
1316// will register BM_Foo<1> as a benchmark.
1317#define BENCHMARK_TEMPLATE1(n, a) \
1318 BENCHMARK_PRIVATE_DECLARE(n) = \
1319 (::benchmark::internal::RegisterBenchmarkInternal( \
1320 new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1321
1322#define BENCHMARK_TEMPLATE2(n, a, b) \
1323 BENCHMARK_PRIVATE_DECLARE(n) = \
1324 (::benchmark::internal::RegisterBenchmarkInternal( \
1325 new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1326 n<a, b>)))
1327
1328#ifdef BENCHMARK_HAS_CXX11
1329#define BENCHMARK_TEMPLATE(n, ...) \
1330 BENCHMARK_PRIVATE_DECLARE(n) = \
1331 (::benchmark::internal::RegisterBenchmarkInternal( \
1332 new ::benchmark::internal::FunctionBenchmark( \
1333 #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1334#else
1335#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1336#endif
1337
1338#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1339 class BaseClass##_##Method##_Benchmark : public BaseClass { \
1340 public: \
1341 BaseClass##_##Method##_Benchmark() { \
1342 this->SetName(#BaseClass "/" #Method); \
1343 } \
1344 \
1345 protected: \
1346 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1347 };
1348
1349#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1350 class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1351 public: \
1352 BaseClass##_##Method##_Benchmark() { \
1353 this->SetName(#BaseClass "<" #a ">/" #Method); \
1354 } \
1355 \
1356 protected: \
1357 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1358 };
1359
1360#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1361 class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1362 public: \
1363 BaseClass##_##Method##_Benchmark() { \
1364 this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1365 } \
1366 \
1367 protected: \
1368 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1369 };
1370
1371#ifdef BENCHMARK_HAS_CXX11
1372#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1373 class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1374 public: \
1375 BaseClass##_##Method##_Benchmark() { \
1376 this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1377 } \
1378 \
1379 protected: \
1380 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1381 };
1382#else
1383#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
1384 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1385#endif
1386
1387#define BENCHMARK_DEFINE_F(BaseClass, Method) \
1388 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1389 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1390
1391#define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1392 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1393 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1394
1395#define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1396 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1397 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1398
1399#ifdef BENCHMARK_HAS_CXX11
1400#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1401 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1402 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1403#else
1404#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
1405 BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1406#endif
1407
1408#define BENCHMARK_REGISTER_F(BaseClass, Method) \
1409 BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1410
1411#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1412 BENCHMARK_PRIVATE_DECLARE(TestName) = \
1413 (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1414
1415// This macro will define and register a benchmark within a fixture class.
1416#define BENCHMARK_F(BaseClass, Method) \
1417 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1418 BENCHMARK_REGISTER_F(BaseClass, Method); \
1419 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1420
1421#define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1422 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1423 BENCHMARK_REGISTER_F(BaseClass, Method); \
1424 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1425
1426#define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1427 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1428 BENCHMARK_REGISTER_F(BaseClass, Method); \
1429 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1430
1431#ifdef BENCHMARK_HAS_CXX11
1432#define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1433 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1434 BENCHMARK_REGISTER_F(BaseClass, Method); \
1435 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1436#else
1437#define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
1438 BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1439#endif
1440
1441// Helper macro to create a main routine in a test that runs the benchmarks
1442#define BENCHMARK_MAIN() \
1443 int main(int argc, char** argv) { \
1444 ::benchmark::Initialize(&argc, argv); \
1445 if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1446 ::benchmark::RunSpecifiedBenchmarks(); \
1447 ::benchmark::Shutdown(); \
1448 return 0; \
1449 } \
1450 int main(int, char**)
1451
1452// ------------------------------------------------------
1453// Benchmark Reporters
1454
1455namespace benchmark {
1456
1457struct CPUInfo {
1458 struct CacheInfo {
1459 std::string type;
1460 int level;
1461 int size;
1462 int num_sharing;
1463 };
1464
1465 enum Scaling { UNKNOWN, ENABLED, DISABLED };
1466
1467 int num_cpus;
1468 Scaling scaling;
1469 double cycles_per_second;
1470 std::vector<CacheInfo> caches;
1471 std::vector<double> load_avg;
1472
1473 static const CPUInfo& Get();
1474
1475 private:
1476 CPUInfo();
1477 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1478};
1479
1480// Adding Struct for System Information
1481struct SystemInfo {
1482 std::string name;
1483 static const SystemInfo& Get();
1484
1485 private:
1486 SystemInfo();
1487 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1488};
1489
1490// BenchmarkName contains the components of the Benchmark's name
1491// which allows individual fields to be modified or cleared before
1492// building the final name using 'str()'.
1493struct BenchmarkName {
1494 std::string function_name;
1495 std::string args;
1496 std::string min_time;
1497 std::string iterations;
1498 std::string repetitions;
1499 std::string time_type;
1500 std::string threads;
1501
1502 // Return the full name of the benchmark with each non-empty
1503 // field separated by a '/'
1504 std::string str() const;
1505};
1506
1507// Interface for custom benchmark result printers.
1508// By default, benchmark reports are printed to stdout. However an application
1509// can control the destination of the reports by calling
1510// RunSpecifiedBenchmarks and passing it a custom reporter object.
1511// The reporter object must implement the following interface.
1512class BenchmarkReporter {
1513 public:
1514 struct Context {
1515 CPUInfo const& cpu_info;
1516 SystemInfo const& sys_info;
1517 // The number of chars in the longest benchmark name.
1518 size_t name_field_width;
1519 static const char* executable_name;
1520 Context();
1521 };
1522
1523 struct Run {
1524 static const int64_t no_repetition_index = -1;
1525 enum RunType { RT_Iteration, RT_Aggregate };
1526
1527 Run()
1528 : run_type(RT_Iteration),
1529 aggregate_unit(kTime),
1530 error_occurred(false),
1531 iterations(1),
1532 threads(1),
1533 time_unit(kNanosecond),
1534 real_accumulated_time(0),
1535 cpu_accumulated_time(0),
1536 max_heapbytes_used(0),
1537 complexity(oNone),
1538 complexity_lambda(),
1539 complexity_n(0),
1540 report_big_o(false),
1541 report_rms(false),
1542 memory_result(NULL),
1543 allocs_per_iter(0.0) {}
1544
1545 std::string benchmark_name() const;
1546 BenchmarkName run_name;
1547 int64_t family_index;
1548 int64_t per_family_instance_index;
1549 RunType run_type;
1550 std::string aggregate_name;
1551 StatisticUnit aggregate_unit;
1552 std::string report_label; // Empty if not set by benchmark.
1553 bool error_occurred;
1554 std::string error_message;
1555
1556 IterationCount iterations;
1557 int64_t threads;
1558 int64_t repetition_index;
1559 int64_t repetitions;
1560 TimeUnit time_unit;
1561 double real_accumulated_time;
1562 double cpu_accumulated_time;
1563
1564 // Return a value representing the real time per iteration in the unit
1565 // specified by 'time_unit'.
1566 // NOTE: If 'iterations' is zero the returned value represents the
1567 // accumulated time.
1568 double GetAdjustedRealTime() const;
1569
1570 // Return a value representing the cpu time per iteration in the unit
1571 // specified by 'time_unit'.
1572 // NOTE: If 'iterations' is zero the returned value represents the
1573 // accumulated time.
1574 double GetAdjustedCPUTime() const;
1575
1576 // This is set to 0.0 if memory tracing is not enabled.
1577 double max_heapbytes_used;
1578
1579 // Keep track of arguments to compute asymptotic complexity
1580 BigO complexity;
1581 BigOFunc* complexity_lambda;
1582 int64_t complexity_n;
1583
1584 // what statistics to compute from the measurements
1585 const std::vector<internal::Statistics>* statistics;
1586
1587 // Inform print function whether the current run is a complexity report
1588 bool report_big_o;
1589 bool report_rms;
1590
1591 UserCounters counters;
1592
1593 // Memory metrics.
1594 const MemoryManager::Result* memory_result;
1595 double allocs_per_iter;
1596 };
1597
1598 struct PerFamilyRunReports {
1599 PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1600
1601 // How many runs will all instances of this benchmark perform?
1602 int num_runs_total;
1603
1604 // How many runs have happened already?
1605 int num_runs_done;
1606
1607 // The reports about (non-errneous!) runs of this family.
1608 std::vector<BenchmarkReporter::Run> Runs;
1609 };
1610
1611 // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1612 // and the error stream set to 'std::cerr'
1613 BenchmarkReporter();
1614
1615 // Called once for every suite of benchmarks run.
1616 // The parameter "context" contains information that the
1617 // reporter may wish to use when generating its report, for example the
1618 // platform under which the benchmarks are running. The benchmark run is
1619 // never started if this function returns false, allowing the reporter
1620 // to skip runs based on the context information.
1621 virtual bool ReportContext(const Context& context) = 0;
1622
1623 // Called once for each group of benchmark runs, gives information about
1624 // cpu-time and heap memory usage during the benchmark run. If the group
1625 // of runs contained more than two entries then 'report' contains additional
1626 // elements representing the mean and standard deviation of those runs.
1627 // Additionally if this group of runs was the last in a family of benchmarks
1628 // 'reports' contains additional entries representing the asymptotic
1629 // complexity and RMS of that benchmark family.
1630 virtual void ReportRuns(const std::vector<Run>& report) = 0;
1631
1632 // Called once and only once after ever group of benchmarks is run and
1633 // reported.
1634 virtual void Finalize() {}
1635
1636 // REQUIRES: The object referenced by 'out' is valid for the lifetime
1637 // of the reporter.
1638 void SetOutputStream(std::ostream* out) {
1639 assert(out);
1640 output_stream_ = out;
1641 }
1642
1643 // REQUIRES: The object referenced by 'err' is valid for the lifetime
1644 // of the reporter.
1645 void SetErrorStream(std::ostream* err) {
1646 assert(err);
1647 error_stream_ = err;
1648 }
1649
1650 std::ostream& GetOutputStream() const { return *output_stream_; }
1651
1652 std::ostream& GetErrorStream() const { return *error_stream_; }
1653
1654 virtual ~BenchmarkReporter();
1655
1656 // Write a human readable string to 'out' representing the specified
1657 // 'context'.
1658 // REQUIRES: 'out' is non-null.
1659 static void PrintBasicContext(std::ostream* out, Context const& context);
1660
1661 private:
1662 std::ostream* output_stream_;
1663 std::ostream* error_stream_;
1664};
1665
1666// Simple reporter that outputs benchmark data to the console. This is the
1667// default reporter used by RunSpecifiedBenchmarks().
1668class ConsoleReporter : public BenchmarkReporter {
1669 public:
1670 enum OutputOptions {
1671 OO_None = 0,
1672 OO_Color = 1,
1673 OO_Tabular = 2,
1674 OO_ColorTabular = OO_Color | OO_Tabular,
1675 OO_Defaults = OO_ColorTabular
1676 };
1677 explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
1678 : output_options_(opts_), name_field_width_(0), printed_header_(false) {}
1679
1680 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1681 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1682
1683 protected:
1684 virtual void PrintRunData(const Run& report);
1685 virtual void PrintHeader(const Run& report);
1686
1687 OutputOptions output_options_;
1688 size_t name_field_width_;
1689 UserCounters prev_counters_;
1690 bool printed_header_;
1691};
1692
1693class JSONReporter : public BenchmarkReporter {
1694 public:
1695 JSONReporter() : first_report_(true) {}
1696 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1697 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1698 virtual void Finalize() BENCHMARK_OVERRIDE;
1699
1700 private:
1701 void PrintRunData(const Run& report);
1702
1703 bool first_report_;
1704};
1705
1706class BENCHMARK_DEPRECATED_MSG(
1707 "The CSV Reporter will be removed in a future release") CSVReporter
1708 : public BenchmarkReporter {
1709 public:
1710 CSVReporter() : printed_header_(false) {}
1711 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1712 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1713
1714 private:
1715 void PrintRunData(const Run& report);
1716
1717 bool printed_header_;
1718 std::set<std::string> user_counter_names_;
1719};
1720
1721inline const char* GetTimeUnitString(TimeUnit unit) {
1722 switch (unit) {
1723 case kSecond:
1724 return "s";
1725 case kMillisecond:
1726 return "ms";
1727 case kMicrosecond:
1728 return "us";
1729 case kNanosecond:
1730 return "ns";
1731 }
1732 BENCHMARK_UNREACHABLE();
1733}
1734
1735inline double GetTimeUnitMultiplier(TimeUnit unit) {
1736 switch (unit) {
1737 case kSecond:
1738 return 1;
1739 case kMillisecond:
1740 return 1e3;
1741 case kMicrosecond:
1742 return 1e6;
1743 case kNanosecond:
1744 return 1e9;
1745 }
1746 BENCHMARK_UNREACHABLE();
1747}
1748
1749// Creates a list of integer values for the given range and multiplier.
1750// This can be used together with ArgsProduct() to allow multiple ranges
1751// with different multiplers.
1752// Example:
1753// ArgsProduct({
1754// CreateRange(0, 1024, /*multi=*/32),
1755// CreateRange(0, 100, /*multi=*/4),
1756// CreateDenseRange(0, 4, /*step=*/1),
1757// });
1758std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
1759
1760// Creates a list of integer values for the given range and step.
1761std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step);
1762
1763} // namespace benchmark
1764
1765#endif // BENCHMARK_BENCHMARK_H_
1766

source code of third-party/benchmark/include/benchmark/benchmark.h