1 | #include "benchmark/benchmark.h" |
2 | |
3 | #include <assert.h> |
4 | #include <math.h> |
5 | #include <stdint.h> |
6 | |
7 | #include <chrono> |
8 | #include <complex> |
9 | #include <cstdlib> |
10 | #include <iostream> |
11 | #include <limits> |
12 | #include <list> |
13 | #include <map> |
14 | #include <mutex> |
15 | #include <set> |
16 | #include <sstream> |
17 | #include <string> |
18 | #include <thread> |
19 | #include <type_traits> |
20 | #include <utility> |
21 | #include <vector> |
22 | |
23 | #if defined(__GNUC__) |
24 | #define BENCHMARK_NOINLINE __attribute__((noinline)) |
25 | #else |
26 | #define BENCHMARK_NOINLINE |
27 | #endif |
28 | |
29 | namespace { |
30 | |
31 | int BENCHMARK_NOINLINE Factorial(int n) { |
32 | return (n == 1) ? 1 : n * Factorial(n: n - 1); |
33 | } |
34 | |
35 | double CalculatePi(int depth) { |
36 | double pi = 0.0; |
37 | for (int i = 0; i < depth; ++i) { |
38 | double numerator = static_cast<double>(((i % 2) * 2) - 1); |
39 | double denominator = static_cast<double>((2 * i) - 1); |
40 | pi += numerator / denominator; |
41 | } |
42 | return (pi - 1.0) * 4; |
43 | } |
44 | |
45 | std::set<int64_t> ConstructRandomSet(int64_t size) { |
46 | std::set<int64_t> s; |
47 | for (int i = 0; i < size; ++i) s.insert(position: s.end(), x: i); |
48 | return s; |
49 | } |
50 | |
51 | std::mutex test_vector_mu; |
52 | std::vector<int>* test_vector = nullptr; |
53 | |
54 | } // end namespace |
55 | |
56 | static void BM_Factorial(benchmark::State& state) { |
57 | int fac_42 = 0; |
58 | for (auto _ : state) fac_42 = Factorial(n: 8); |
59 | // Prevent compiler optimizations |
60 | std::stringstream ss; |
61 | ss << fac_42; |
62 | state.SetLabel(ss.str()); |
63 | } |
64 | BENCHMARK(BM_Factorial); |
65 | BENCHMARK(BM_Factorial)->UseRealTime(); |
66 | |
67 | static void BM_CalculatePiRange(benchmark::State& state) { |
68 | double pi = 0.0; |
69 | for (auto _ : state) pi = CalculatePi(depth: static_cast<int>(state.range(pos: 0))); |
70 | std::stringstream ss; |
71 | ss << pi; |
72 | state.SetLabel(ss.str()); |
73 | } |
74 | BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); |
75 | |
76 | static void BM_CalculatePi(benchmark::State& state) { |
77 | static const int depth = 1024; |
78 | for (auto _ : state) { |
79 | double pi = CalculatePi(depth: static_cast<int>(depth)); |
80 | benchmark::DoNotOptimize(value&: pi); |
81 | } |
82 | } |
83 | BENCHMARK(BM_CalculatePi)->Threads(t: 8); |
84 | BENCHMARK(BM_CalculatePi)->ThreadRange(min_threads: 1, max_threads: 32); |
85 | BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); |
86 | |
87 | static void BM_SetInsert(benchmark::State& state) { |
88 | std::set<int64_t> data; |
89 | for (auto _ : state) { |
90 | state.PauseTiming(); |
91 | data = ConstructRandomSet(size: state.range(pos: 0)); |
92 | state.ResumeTiming(); |
93 | for (int j = 0; j < state.range(pos: 1); ++j) data.insert(x: rand()); |
94 | } |
95 | state.SetItemsProcessed(state.iterations() * state.range(pos: 1)); |
96 | state.SetBytesProcessed(state.iterations() * state.range(pos: 1) * |
97 | static_cast<int64_t>(sizeof(int))); |
98 | } |
99 | |
100 | // Test many inserts at once to reduce the total iterations needed. Otherwise, |
101 | // the slower, non-timed part of each iteration will make the benchmark take |
102 | // forever. |
103 | BENCHMARK(BM_SetInsert)->Ranges(ranges: {{1 << 10, 8 << 10}, {128, 512}}); |
104 | |
105 | template <typename Container, |
106 | typename ValueType = typename Container::value_type> |
107 | static void BM_Sequential(benchmark::State& state) { |
108 | ValueType v = 42; |
109 | for (auto _ : state) { |
110 | Container c; |
111 | for (int64_t i = state.range(pos: 0); --i;) c.push_back(v); |
112 | } |
113 | const int64_t items_processed = state.iterations() * state.range(pos: 0); |
114 | state.SetItemsProcessed(items_processed); |
115 | state.SetBytesProcessed(items_processed * static_cast<int64_t>(sizeof(v))); |
116 | } |
117 | BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int) |
118 | ->Range(start: 1 << 0, limit: 1 << 10); |
119 | BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(start: 1 << 0, limit: 1 << 10); |
120 | // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond. |
121 | #ifdef BENCHMARK_HAS_CXX11 |
122 | BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(x: 512); |
123 | #endif |
124 | |
125 | static void BM_StringCompare(benchmark::State& state) { |
126 | size_t len = static_cast<size_t>(state.range(pos: 0)); |
127 | std::string s1(len, '-'); |
128 | std::string s2(len, '-'); |
129 | for (auto _ : state) { |
130 | auto comp = s1.compare(str: s2); |
131 | benchmark::DoNotOptimize(value&: comp); |
132 | } |
133 | } |
134 | BENCHMARK(BM_StringCompare)->Range(start: 1, limit: 1 << 20); |
135 | |
136 | static void BM_SetupTeardown(benchmark::State& state) { |
137 | if (state.thread_index() == 0) { |
138 | // No need to lock test_vector_mu here as this is running single-threaded. |
139 | test_vector = new std::vector<int>(); |
140 | } |
141 | int i = 0; |
142 | for (auto _ : state) { |
143 | std::lock_guard<std::mutex> l(test_vector_mu); |
144 | if (i % 2 == 0) |
145 | test_vector->push_back(x: i); |
146 | else |
147 | test_vector->pop_back(); |
148 | ++i; |
149 | } |
150 | if (state.thread_index() == 0) { |
151 | delete test_vector; |
152 | } |
153 | } |
154 | BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); |
155 | |
156 | static void BM_LongTest(benchmark::State& state) { |
157 | double tracker = 0.0; |
158 | for (auto _ : state) { |
159 | for (int i = 0; i < state.range(pos: 0); ++i) |
160 | benchmark::DoNotOptimize(value&: tracker += i); |
161 | } |
162 | } |
163 | BENCHMARK(BM_LongTest)->Range(start: 1 << 16, limit: 1 << 28); |
164 | |
165 | static void BM_ParallelMemset(benchmark::State& state) { |
166 | int64_t size = state.range(pos: 0) / static_cast<int64_t>(sizeof(int)); |
167 | int thread_size = static_cast<int>(size) / state.threads(); |
168 | int from = thread_size * state.thread_index(); |
169 | int to = from + thread_size; |
170 | |
171 | if (state.thread_index() == 0) { |
172 | test_vector = new std::vector<int>(static_cast<size_t>(size)); |
173 | } |
174 | |
175 | for (auto _ : state) { |
176 | for (int i = from; i < to; i++) { |
177 | // No need to lock test_vector_mu as ranges |
178 | // do not overlap between threads. |
179 | benchmark::DoNotOptimize(value&: test_vector->at(n: static_cast<size_t>(i)) = 1); |
180 | } |
181 | } |
182 | |
183 | if (state.thread_index() == 0) { |
184 | delete test_vector; |
185 | } |
186 | } |
187 | BENCHMARK(BM_ParallelMemset)->Arg(x: 10 << 20)->ThreadRange(min_threads: 1, max_threads: 4); |
188 | |
189 | static void BM_ManualTiming(benchmark::State& state) { |
190 | int64_t slept_for = 0; |
191 | int64_t microseconds = state.range(pos: 0); |
192 | std::chrono::duration<double, std::micro> sleep_duration{ |
193 | static_cast<double>(microseconds)}; |
194 | |
195 | for (auto _ : state) { |
196 | auto start = std::chrono::high_resolution_clock::now(); |
197 | // Simulate some useful workload with a sleep |
198 | std::this_thread::sleep_for( |
199 | rtime: std::chrono::duration_cast<std::chrono::nanoseconds>(d: sleep_duration)); |
200 | auto end = std::chrono::high_resolution_clock::now(); |
201 | |
202 | auto elapsed = |
203 | std::chrono::duration_cast<std::chrono::duration<double>>(d: end - start); |
204 | |
205 | state.SetIterationTime(elapsed.count()); |
206 | slept_for += microseconds; |
207 | } |
208 | state.SetItemsProcessed(slept_for); |
209 | } |
210 | BENCHMARK(BM_ManualTiming)->Range(start: 1, limit: 1 << 14)->UseRealTime(); |
211 | BENCHMARK(BM_ManualTiming)->Range(start: 1, limit: 1 << 14)->UseManualTime(); |
212 | |
213 | #ifdef BENCHMARK_HAS_CXX11 |
214 | |
215 | template <class... Args> |
216 | void BM_with_args(benchmark::State& state, Args&&...) { |
217 | for (auto _ : state) { |
218 | } |
219 | } |
220 | BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44); |
221 | BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc" ), |
222 | std::pair<int, double>(42, 3.8)); |
223 | |
224 | void BM_non_template_args(benchmark::State& state, int, double) { |
225 | while (state.KeepRunning()) { |
226 | } |
227 | } |
228 | BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); |
229 | |
230 | template <class T, class U, class... ExtraArgs> |
231 | void BM_template2_capture(benchmark::State& state, ExtraArgs&&... ) { |
232 | static_assert(std::is_same<T, void>::value, "" ); |
233 | static_assert(std::is_same<U, char*>::value, "" ); |
234 | static_assert(std::is_same<ExtraArgs..., unsigned int>::value, "" ); |
235 | unsigned int dummy[sizeof...(ExtraArgs)] = {extra_args...}; |
236 | assert(dummy[0] == 42); |
237 | for (auto _ : state) { |
238 | } |
239 | } |
240 | BENCHMARK_TEMPLATE2_CAPTURE(BM_template2_capture, void, char*, foo, 42U); |
241 | BENCHMARK_CAPTURE((BM_template2_capture<void, char*>), foo, 42U); |
242 | |
243 | template <class T, class... ExtraArgs> |
244 | void BM_template1_capture(benchmark::State& state, ExtraArgs&&... ) { |
245 | static_assert(std::is_same<T, void>::value, "" ); |
246 | static_assert(std::is_same<ExtraArgs..., unsigned long>::value, "" ); |
247 | unsigned long dummy[sizeof...(ExtraArgs)] = {extra_args...}; |
248 | assert(dummy[0] == 24); |
249 | for (auto _ : state) { |
250 | } |
251 | } |
252 | BENCHMARK_TEMPLATE1_CAPTURE(BM_template1_capture, void, foo, 24UL); |
253 | BENCHMARK_CAPTURE(BM_template1_capture<void>, foo, 24UL); |
254 | |
255 | #endif // BENCHMARK_HAS_CXX11 |
256 | |
257 | static void BM_DenseThreadRanges(benchmark::State& st) { |
258 | switch (st.range(pos: 0)) { |
259 | case 1: |
260 | assert(st.threads() == 1 || st.threads() == 2 || st.threads() == 3); |
261 | break; |
262 | case 2: |
263 | assert(st.threads() == 1 || st.threads() == 3 || st.threads() == 4); |
264 | break; |
265 | case 3: |
266 | assert(st.threads() == 5 || st.threads() == 8 || st.threads() == 11 || |
267 | st.threads() == 14); |
268 | break; |
269 | default: |
270 | assert(false && "Invalid test case number" ); |
271 | } |
272 | while (st.KeepRunning()) { |
273 | } |
274 | } |
275 | BENCHMARK(BM_DenseThreadRanges)->Arg(x: 1)->DenseThreadRange(min_threads: 1, max_threads: 3); |
276 | BENCHMARK(BM_DenseThreadRanges)->Arg(x: 2)->DenseThreadRange(min_threads: 1, max_threads: 4, stride: 2); |
277 | BENCHMARK(BM_DenseThreadRanges)->Arg(x: 3)->DenseThreadRange(min_threads: 5, max_threads: 14, stride: 3); |
278 | |
279 | static void BM_BenchmarkName(benchmark::State& state) { |
280 | for (auto _ : state) { |
281 | } |
282 | |
283 | // Check that the benchmark name is passed correctly to `state`. |
284 | assert("BM_BenchmarkName" == state.name()); |
285 | } |
286 | BENCHMARK(BM_BenchmarkName); |
287 | |
288 | // regression test for #1446 |
289 | template <typename type> |
290 | static void BM_templated_test(benchmark::State& state) { |
291 | for (auto _ : state) { |
292 | type created_string; |
293 | benchmark::DoNotOptimize(created_string); |
294 | } |
295 | } |
296 | |
297 | static auto BM_templated_test_double = BM_templated_test<std::complex<double>>; |
298 | BENCHMARK(BM_templated_test_double); |
299 | |
300 | BENCHMARK_MAIN(); |
301 | |