| 1 | #include "benchmark/benchmark.h" |
| 2 | |
| 3 | #include <assert.h> |
| 4 | #include <math.h> |
| 5 | #include <stdint.h> |
| 6 | |
| 7 | #include <chrono> |
| 8 | #include <complex> |
| 9 | #include <cstdlib> |
| 10 | #include <iostream> |
| 11 | #include <limits> |
| 12 | #include <list> |
| 13 | #include <map> |
| 14 | #include <mutex> |
| 15 | #include <set> |
| 16 | #include <sstream> |
| 17 | #include <string> |
| 18 | #include <thread> |
| 19 | #include <type_traits> |
| 20 | #include <utility> |
| 21 | #include <vector> |
| 22 | |
| 23 | #if defined(__GNUC__) |
| 24 | #define BENCHMARK_NOINLINE __attribute__((noinline)) |
| 25 | #else |
| 26 | #define BENCHMARK_NOINLINE |
| 27 | #endif |
| 28 | |
| 29 | namespace { |
| 30 | |
| 31 | int BENCHMARK_NOINLINE Factorial(int n) { |
| 32 | return (n == 1) ? 1 : n * Factorial(n: n - 1); |
| 33 | } |
| 34 | |
| 35 | double CalculatePi(int depth) { |
| 36 | double pi = 0.0; |
| 37 | for (int i = 0; i < depth; ++i) { |
| 38 | double numerator = static_cast<double>(((i % 2) * 2) - 1); |
| 39 | double denominator = static_cast<double>((2 * i) - 1); |
| 40 | pi += numerator / denominator; |
| 41 | } |
| 42 | return (pi - 1.0) * 4; |
| 43 | } |
| 44 | |
| 45 | std::set<int64_t> ConstructRandomSet(int64_t size) { |
| 46 | std::set<int64_t> s; |
| 47 | for (int i = 0; i < size; ++i) s.insert(position: s.end(), x: i); |
| 48 | return s; |
| 49 | } |
| 50 | |
| 51 | std::mutex test_vector_mu; |
| 52 | std::vector<int>* test_vector = nullptr; |
| 53 | |
| 54 | } // end namespace |
| 55 | |
| 56 | static void BM_Factorial(benchmark::State& state) { |
| 57 | int fac_42 = 0; |
| 58 | for (auto _ : state) fac_42 = Factorial(n: 8); |
| 59 | // Prevent compiler optimizations |
| 60 | std::stringstream ss; |
| 61 | ss << fac_42; |
| 62 | state.SetLabel(ss.str()); |
| 63 | } |
| 64 | BENCHMARK(BM_Factorial); |
| 65 | BENCHMARK(BM_Factorial)->UseRealTime(); |
| 66 | |
| 67 | static void BM_CalculatePiRange(benchmark::State& state) { |
| 68 | double pi = 0.0; |
| 69 | for (auto _ : state) pi = CalculatePi(depth: static_cast<int>(state.range(pos: 0))); |
| 70 | std::stringstream ss; |
| 71 | ss << pi; |
| 72 | state.SetLabel(ss.str()); |
| 73 | } |
| 74 | BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); |
| 75 | |
| 76 | static void BM_CalculatePi(benchmark::State& state) { |
| 77 | static const int depth = 1024; |
| 78 | for (auto _ : state) { |
| 79 | double pi = CalculatePi(depth: static_cast<int>(depth)); |
| 80 | benchmark::DoNotOptimize(value&: pi); |
| 81 | } |
| 82 | } |
| 83 | BENCHMARK(BM_CalculatePi)->Threads(t: 8); |
| 84 | BENCHMARK(BM_CalculatePi)->ThreadRange(min_threads: 1, max_threads: 32); |
| 85 | BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); |
| 86 | |
| 87 | static void BM_SetInsert(benchmark::State& state) { |
| 88 | std::set<int64_t> data; |
| 89 | for (auto _ : state) { |
| 90 | state.PauseTiming(); |
| 91 | data = ConstructRandomSet(size: state.range(pos: 0)); |
| 92 | state.ResumeTiming(); |
| 93 | for (int j = 0; j < state.range(pos: 1); ++j) data.insert(x: rand()); |
| 94 | } |
| 95 | state.SetItemsProcessed(state.iterations() * state.range(pos: 1)); |
| 96 | state.SetBytesProcessed(state.iterations() * state.range(pos: 1) * |
| 97 | static_cast<int64_t>(sizeof(int))); |
| 98 | } |
| 99 | |
| 100 | // Test many inserts at once to reduce the total iterations needed. Otherwise, |
| 101 | // the slower, non-timed part of each iteration will make the benchmark take |
| 102 | // forever. |
| 103 | BENCHMARK(BM_SetInsert)->Ranges(ranges: {{1 << 10, 8 << 10}, {128, 512}}); |
| 104 | |
| 105 | template <typename Container, |
| 106 | typename ValueType = typename Container::value_type> |
| 107 | static void BM_Sequential(benchmark::State& state) { |
| 108 | ValueType v = 42; |
| 109 | for (auto _ : state) { |
| 110 | Container c; |
| 111 | for (int64_t i = state.range(pos: 0); --i;) c.push_back(v); |
| 112 | } |
| 113 | const int64_t items_processed = state.iterations() * state.range(pos: 0); |
| 114 | state.SetItemsProcessed(items_processed); |
| 115 | state.SetBytesProcessed(items_processed * static_cast<int64_t>(sizeof(v))); |
| 116 | } |
| 117 | BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int) |
| 118 | ->Range(start: 1 << 0, limit: 1 << 10); |
| 119 | BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(start: 1 << 0, limit: 1 << 10); |
| 120 | // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond. |
| 121 | #ifdef BENCHMARK_HAS_CXX11 |
| 122 | BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(x: 512); |
| 123 | #endif |
| 124 | |
| 125 | static void BM_StringCompare(benchmark::State& state) { |
| 126 | size_t len = static_cast<size_t>(state.range(pos: 0)); |
| 127 | std::string s1(len, '-'); |
| 128 | std::string s2(len, '-'); |
| 129 | for (auto _ : state) { |
| 130 | auto comp = s1.compare(str: s2); |
| 131 | benchmark::DoNotOptimize(value&: comp); |
| 132 | } |
| 133 | } |
| 134 | BENCHMARK(BM_StringCompare)->Range(start: 1, limit: 1 << 20); |
| 135 | |
| 136 | static void BM_SetupTeardown(benchmark::State& state) { |
| 137 | if (state.thread_index() == 0) { |
| 138 | // No need to lock test_vector_mu here as this is running single-threaded. |
| 139 | test_vector = new std::vector<int>(); |
| 140 | } |
| 141 | int i = 0; |
| 142 | for (auto _ : state) { |
| 143 | std::lock_guard<std::mutex> l(test_vector_mu); |
| 144 | if (i % 2 == 0) |
| 145 | test_vector->push_back(x: i); |
| 146 | else |
| 147 | test_vector->pop_back(); |
| 148 | ++i; |
| 149 | } |
| 150 | if (state.thread_index() == 0) { |
| 151 | delete test_vector; |
| 152 | } |
| 153 | } |
| 154 | BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); |
| 155 | |
| 156 | static void BM_LongTest(benchmark::State& state) { |
| 157 | double tracker = 0.0; |
| 158 | for (auto _ : state) { |
| 159 | for (int i = 0; i < state.range(pos: 0); ++i) |
| 160 | benchmark::DoNotOptimize(value&: tracker += i); |
| 161 | } |
| 162 | } |
| 163 | BENCHMARK(BM_LongTest)->Range(start: 1 << 16, limit: 1 << 28); |
| 164 | |
| 165 | static void BM_ParallelMemset(benchmark::State& state) { |
| 166 | int64_t size = state.range(pos: 0) / static_cast<int64_t>(sizeof(int)); |
| 167 | int thread_size = static_cast<int>(size) / state.threads(); |
| 168 | int from = thread_size * state.thread_index(); |
| 169 | int to = from + thread_size; |
| 170 | |
| 171 | if (state.thread_index() == 0) { |
| 172 | test_vector = new std::vector<int>(static_cast<size_t>(size)); |
| 173 | } |
| 174 | |
| 175 | for (auto _ : state) { |
| 176 | for (int i = from; i < to; i++) { |
| 177 | // No need to lock test_vector_mu as ranges |
| 178 | // do not overlap between threads. |
| 179 | benchmark::DoNotOptimize(value&: test_vector->at(n: static_cast<size_t>(i)) = 1); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | if (state.thread_index() == 0) { |
| 184 | delete test_vector; |
| 185 | } |
| 186 | } |
| 187 | BENCHMARK(BM_ParallelMemset)->Arg(x: 10 << 20)->ThreadRange(min_threads: 1, max_threads: 4); |
| 188 | |
| 189 | static void BM_ManualTiming(benchmark::State& state) { |
| 190 | int64_t slept_for = 0; |
| 191 | int64_t microseconds = state.range(pos: 0); |
| 192 | std::chrono::duration<double, std::micro> sleep_duration{ |
| 193 | static_cast<double>(microseconds)}; |
| 194 | |
| 195 | for (auto _ : state) { |
| 196 | auto start = std::chrono::high_resolution_clock::now(); |
| 197 | // Simulate some useful workload with a sleep |
| 198 | std::this_thread::sleep_for( |
| 199 | rtime: std::chrono::duration_cast<std::chrono::nanoseconds>(d: sleep_duration)); |
| 200 | auto end = std::chrono::high_resolution_clock::now(); |
| 201 | |
| 202 | auto elapsed = |
| 203 | std::chrono::duration_cast<std::chrono::duration<double>>(d: end - start); |
| 204 | |
| 205 | state.SetIterationTime(elapsed.count()); |
| 206 | slept_for += microseconds; |
| 207 | } |
| 208 | state.SetItemsProcessed(slept_for); |
| 209 | } |
| 210 | BENCHMARK(BM_ManualTiming)->Range(start: 1, limit: 1 << 14)->UseRealTime(); |
| 211 | BENCHMARK(BM_ManualTiming)->Range(start: 1, limit: 1 << 14)->UseManualTime(); |
| 212 | |
| 213 | #ifdef BENCHMARK_HAS_CXX11 |
| 214 | |
| 215 | template <class... Args> |
| 216 | void BM_with_args(benchmark::State& state, Args&&...) { |
| 217 | for (auto _ : state) { |
| 218 | } |
| 219 | } |
| 220 | BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44); |
| 221 | BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc" ), |
| 222 | std::pair<int, double>(42, 3.8)); |
| 223 | |
| 224 | void BM_non_template_args(benchmark::State& state, int, double) { |
| 225 | while (state.KeepRunning()) { |
| 226 | } |
| 227 | } |
| 228 | BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); |
| 229 | |
| 230 | template <class T, class U, class... ExtraArgs> |
| 231 | void BM_template2_capture(benchmark::State& state, ExtraArgs&&... ) { |
| 232 | static_assert(std::is_same<T, void>::value, "" ); |
| 233 | static_assert(std::is_same<U, char*>::value, "" ); |
| 234 | static_assert(std::is_same<ExtraArgs..., unsigned int>::value, "" ); |
| 235 | unsigned int dummy[sizeof...(ExtraArgs)] = {extra_args...}; |
| 236 | assert(dummy[0] == 42); |
| 237 | for (auto _ : state) { |
| 238 | } |
| 239 | } |
| 240 | BENCHMARK_TEMPLATE2_CAPTURE(BM_template2_capture, void, char*, foo, 42U); |
| 241 | BENCHMARK_CAPTURE((BM_template2_capture<void, char*>), foo, 42U); |
| 242 | |
| 243 | template <class T, class... ExtraArgs> |
| 244 | void BM_template1_capture(benchmark::State& state, ExtraArgs&&... ) { |
| 245 | static_assert(std::is_same<T, void>::value, "" ); |
| 246 | static_assert(std::is_same<ExtraArgs..., unsigned long>::value, "" ); |
| 247 | unsigned long dummy[sizeof...(ExtraArgs)] = {extra_args...}; |
| 248 | assert(dummy[0] == 24); |
| 249 | for (auto _ : state) { |
| 250 | } |
| 251 | } |
| 252 | BENCHMARK_TEMPLATE1_CAPTURE(BM_template1_capture, void, foo, 24UL); |
| 253 | BENCHMARK_CAPTURE(BM_template1_capture<void>, foo, 24UL); |
| 254 | |
| 255 | #endif // BENCHMARK_HAS_CXX11 |
| 256 | |
| 257 | static void BM_DenseThreadRanges(benchmark::State& st) { |
| 258 | switch (st.range(pos: 0)) { |
| 259 | case 1: |
| 260 | assert(st.threads() == 1 || st.threads() == 2 || st.threads() == 3); |
| 261 | break; |
| 262 | case 2: |
| 263 | assert(st.threads() == 1 || st.threads() == 3 || st.threads() == 4); |
| 264 | break; |
| 265 | case 3: |
| 266 | assert(st.threads() == 5 || st.threads() == 8 || st.threads() == 11 || |
| 267 | st.threads() == 14); |
| 268 | break; |
| 269 | default: |
| 270 | assert(false && "Invalid test case number" ); |
| 271 | } |
| 272 | while (st.KeepRunning()) { |
| 273 | } |
| 274 | } |
| 275 | BENCHMARK(BM_DenseThreadRanges)->Arg(x: 1)->DenseThreadRange(min_threads: 1, max_threads: 3); |
| 276 | BENCHMARK(BM_DenseThreadRanges)->Arg(x: 2)->DenseThreadRange(min_threads: 1, max_threads: 4, stride: 2); |
| 277 | BENCHMARK(BM_DenseThreadRanges)->Arg(x: 3)->DenseThreadRange(min_threads: 5, max_threads: 14, stride: 3); |
| 278 | |
| 279 | static void BM_BenchmarkName(benchmark::State& state) { |
| 280 | for (auto _ : state) { |
| 281 | } |
| 282 | |
| 283 | // Check that the benchmark name is passed correctly to `state`. |
| 284 | assert("BM_BenchmarkName" == state.name()); |
| 285 | } |
| 286 | BENCHMARK(BM_BenchmarkName); |
| 287 | |
| 288 | // regression test for #1446 |
| 289 | template <typename type> |
| 290 | static void BM_templated_test(benchmark::State& state) { |
| 291 | for (auto _ : state) { |
| 292 | type created_string; |
| 293 | benchmark::DoNotOptimize(created_string); |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | static auto BM_templated_test_double = BM_templated_test<std::complex<double>>; |
| 298 | BENCHMARK(BM_templated_test_double); |
| 299 | |
| 300 | BENCHMARK_MAIN(); |
| 301 | |