statistics.cc source code [third-party/benchmark/src/statistics.cc]

1	// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
2	// Copyright 2017 Roman Lebedev. All rights reserved.
3	//
4	// Licensed under the Apache License, Version 2.0 (the "License");
5	// you may not use this file except in compliance with the License.
6	// You may obtain a copy of the License at
7	//
8	// http://www.apache.org/licenses/LICENSE-2.0
9	//
10	// Unless required by applicable law or agreed to in writing, software
11	// distributed under the License is distributed on an "AS IS" BASIS,
12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	// See the License for the specific language governing permissions and
14	// limitations under the License.
15
16	#include "statistics.h"
17
18	#include <algorithm>
19	#include <cmath>
20	#include <numeric>
21	#include <string>
22	#include <vector>
23
24	#include "benchmark/benchmark.h"
25	#include "check.h"
26
27	namespace benchmark {
28
29	auto StatisticsSum = [](const std::vector<double>& v) {
30	return std::accumulate(first: v.begin(), last: v.end(), init: `0.0`);
31	};
32
33	double StatisticsMean(const std::vector<double>& v) {
34	if (v.empty()) return `0.0`;
35	return StatisticsSum (v) * (`1.0` / static_cast<double>(v.size()));
36	}
37
38	double StatisticsMedian(const std::vector<double>& v) {
39	if (v.size() < `3`) return StatisticsMean(v);
40	std::vector<double> copy(v);
41
42	auto center = copy.begin() + v.size() / `2`;
43	std::nth_element(first: copy.begin(), nth: center, last: copy.end());
44
45	// Did we have an odd number of samples? If yes, then center is the median.
46	// If not, then we are looking for the average between center and the value
47	// before. Instead of resorting, we just look for the max value before it,
48	// which is not necessarily the element immediately preceding `center` Since
49	// `copy` is only partially sorted by `nth_element`.
50	if (v.size() % `2` == `1`) return *center;
51	auto center2 = std::max_element(first: copy.begin(), last: center);
52	return (center + center2) / `2.0`;
53	}
54
55	// Return the sum of the squares of this sample set
56	auto SumSquares = [](const std::vector<double>& v) {
57	return std::inner_product(first1: v.begin(), last1: v.end(), first2: v.begin(), init: `0.0`);
58	};
59
60	auto Sqr = [](const double dat) { return dat * dat; };
61	auto Sqrt = [](const double dat) {
62	// Avoid NaN due to imprecision in the calculations
63	if (dat < `0.0`) return `0.0`;
64	return std::sqrt(x: dat);
65	};
66
67	double StatisticsStdDev(const std::vector<double>& v) {
68	const auto mean = StatisticsMean(v);
69	if (v.empty()) return mean;
70
71	// Sample standard deviation is undefined for n = 1
72	if (v.size() == `1`) return `0.0`;
73
74	const double avg_squares =
75	SumSquares (v) * (`1.0` / static_cast<double>(v.size()));
76	return Sqrt (static_cast<double>(v.size()) /
77	(static_cast<double>(v.size()) - `1.0`) *
78	(avg_squares - Sqr (mean)));
79	}
80
81	double StatisticsCV(const std::vector<double>& v) {
82	if (v.size() < `2`) return `0.0`;
83
84	const auto stddev = StatisticsStdDev(v);
85	const auto mean = StatisticsMean(v);
86
87	if (std::fpclassify(x: mean) == FP_ZERO) return `0.0`;
88
89	return stddev / mean;
90	}
91
92	std::vector<BenchmarkReporter::Run> ComputeStats(
93	const std::vector<BenchmarkReporter::Run>& reports) {
94	typedef BenchmarkReporter::Run Run;
95	std::vector<Run> results;
96
97	auto error_count = std::count_if(first: reports.begin(), last: reports.end(),
98	pred: [](Run const& run) { return run.skipped; });
99
100	if (reports.size() - error_count < `2`) {
101	// We don't report aggregated data if there was a single run.
102	return results;
103	}
104
105	// Accumulators.
106	std::vector<double> real_accumulated_time_stat;
107	std::vector<double> cpu_accumulated_time_stat;
108
109	real_accumulated_time_stat.reserve(n: reports.size());
110	cpu_accumulated_time_stat.reserve(n: reports.size());
111
112	// All repetitions should be run with the same number of iterations so we
113	// can take this information from the first benchmark.
114	const IterationCount run_iterations = reports.front().iterations;
115	// create stats for user counters
116	struct CounterStat {
117	Counter c;
118	std::vector<double> s;
119	};
120	std::map<std::string, CounterStat> counter_stats;
121	for (Run const& r : reports) {
122	for (auto const& cnt : r.counters) {
123	auto it = counter_stats.find(x: cnt.first);
124	if (it == counter_stats.end()) {
125	it = counter_stats
126	.emplace(args: cnt.first,
127	args: CounterStat{.c: cnt.second, .s: std::vector<double>{}})
128	.first;
129	it ->second.s.reserve(n: reports.size());
130	} else {
131	BM_CHECK_EQ(it ->second.c.flags, cnt.second.flags);
132	}
133	}
134	}
135
136	// Populate the accumulators.
137	for (Run const& run : reports) {
138	BM_CHECK_EQ(reports[`0`].benchmark_name(), run.benchmark_name());
139	BM_CHECK_EQ(run_iterations, run.iterations);
140	if (run.skipped) continue;
141	real_accumulated_time_stat.emplace_back(args: run.real_accumulated_time);
142	cpu_accumulated_time_stat.emplace_back(args: run.cpu_accumulated_time);
143	// user counters
144	for (auto const& cnt : run.counters) {
145	auto it = counter_stats.find(x: cnt.first);
146	BM_CHECK_NE(it, counter_stats.end());
147	it ->second.s.emplace_back(args: cnt.second);
148	}
149	}
150
151	// Only add label if it is same for all runs
152	std::string report_label = reports [`0`].report_label;
153	for (std::size_t i = `1`; i < reports.size(); i++) {
154	if (reports [i].report_label != report_label) {
155	report_label = "";
156	break;
157	}
158	}
159
160	const double iteration_rescale_factor =
161	double(reports.size()) / double(run_iterations);
162
163	for (const auto& Stat : *reports [`0`].statistics) {
164	// Get the data from the accumulator to BenchmarkReporter::Run's.
165	Run data;
166	data.run_name = reports [`0`].run_name;
167	data.family_index = reports [`0`].family_index;
168	data.per_family_instance_index = reports [`0`].per_family_instance_index;
169	data.run_type = BenchmarkReporter::Run::RT_Aggregate;
170	data.threads = reports [`0`].threads;
171	data.repetitions = reports [`0`].repetitions;
172	data.repetition_index = Run::no_repetition_index;
173	data.aggregate_name = Stat.name_;
174	data.aggregate_unit = Stat.unit_;
175	data.report_label = report_label;
176
177	// It is incorrect to say that an aggregate is computed over
178	// run's iterations, because those iterations already got averaged.
179	// Similarly, if there are N repetitions with 1 iterations each,
180	// an aggregate will be computed over N measurements, not 1.
181	// Thus it is best to simply use the count of separate reports.
182	data.iterations = reports.size();
183
184	data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
185	data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
186
187	if (data.aggregate_unit == StatisticUnit::kTime) {
188	// We will divide these times by data.iterations when reporting, but the
189	// data.iterations is not necessarily the scale of these measurements,
190	// because in each repetition, these timers are sum over all the iters.
191	// And if we want to say that the stats are over N repetitions and not
192	// M iterations, we need to multiply these by (N/M).
193	data.real_accumulated_time *= iteration_rescale_factor;
194	data.cpu_accumulated_time *= iteration_rescale_factor;
195	}
196
197	data.time_unit = reports [`0`].time_unit;
198
199	// user counters
200	for (auto const& kv : counter_stats) {
201	// Do NOT* rescale the custom counters. They are already properly scaled.*
202	const auto uc_stat = Stat.compute_(kv.second.s);
203	auto c = Counter (uc_stat, counter_stats [kv.first].c.flags,
204	counter_stats [kv.first].c.oneK);
205	data.counters [kv.first] = c;
206	}
207
208	results.push_back(x: data);
209	}
210
211	return results;
212	}
213
214	} // end namespace benchmark
215

source code of third-party/benchmark/src/statistics.cc