1// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
2// Copyright 2017 Roman Lebedev. All rights reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include "statistics.h"
17
18#include <algorithm>
19#include <cmath>
20#include <numeric>
21#include <string>
22#include <vector>
23
24#include "benchmark/benchmark.h"
25#include "check.h"
26
27namespace benchmark {
28
29auto StatisticsSum = [](const std::vector<double>& v) {
30 return std::accumulate(first: v.begin(), last: v.end(), init: 0.0);
31};
32
33double StatisticsMean(const std::vector<double>& v) {
34 if (v.empty()) return 0.0;
35 return StatisticsSum(v) * (1.0 / static_cast<double>(v.size()));
36}
37
38double StatisticsMedian(const std::vector<double>& v) {
39 if (v.size() < 3) return StatisticsMean(v);
40 std::vector<double> copy(v);
41
42 auto center = copy.begin() + v.size() / 2;
43 std::nth_element(first: copy.begin(), nth: center, last: copy.end());
44
45 // Did we have an odd number of samples? If yes, then center is the median.
46 // If not, then we are looking for the average between center and the value
47 // before. Instead of resorting, we just look for the max value before it,
48 // which is not necessarily the element immediately preceding `center` Since
49 // `copy` is only partially sorted by `nth_element`.
50 if (v.size() % 2 == 1) return *center;
51 auto center2 = std::max_element(first: copy.begin(), last: center);
52 return (*center + *center2) / 2.0;
53}
54
55// Return the sum of the squares of this sample set
56auto SumSquares = [](const std::vector<double>& v) {
57 return std::inner_product(first1: v.begin(), last1: v.end(), first2: v.begin(), init: 0.0);
58};
59
60auto Sqr = [](const double dat) { return dat * dat; };
61auto Sqrt = [](const double dat) {
62 // Avoid NaN due to imprecision in the calculations
63 if (dat < 0.0) return 0.0;
64 return std::sqrt(x: dat);
65};
66
67double StatisticsStdDev(const std::vector<double>& v) {
68 const auto mean = StatisticsMean(v);
69 if (v.empty()) return mean;
70
71 // Sample standard deviation is undefined for n = 1
72 if (v.size() == 1) return 0.0;
73
74 const double avg_squares =
75 SumSquares(v) * (1.0 / static_cast<double>(v.size()));
76 return Sqrt(static_cast<double>(v.size()) /
77 (static_cast<double>(v.size()) - 1.0) *
78 (avg_squares - Sqr(mean)));
79}
80
81double StatisticsCV(const std::vector<double>& v) {
82 if (v.size() < 2) return 0.0;
83
84 const auto stddev = StatisticsStdDev(v);
85 const auto mean = StatisticsMean(v);
86
87 if (std::fpclassify(x: mean) == FP_ZERO) return 0.0;
88
89 return stddev / mean;
90}
91
92std::vector<BenchmarkReporter::Run> ComputeStats(
93 const std::vector<BenchmarkReporter::Run>& reports) {
94 typedef BenchmarkReporter::Run Run;
95 std::vector<Run> results;
96
97 auto error_count = std::count_if(first: reports.begin(), last: reports.end(),
98 pred: [](Run const& run) { return run.skipped; });
99
100 if (reports.size() - error_count < 2) {
101 // We don't report aggregated data if there was a single run.
102 return results;
103 }
104
105 // Accumulators.
106 std::vector<double> real_accumulated_time_stat;
107 std::vector<double> cpu_accumulated_time_stat;
108
109 real_accumulated_time_stat.reserve(n: reports.size());
110 cpu_accumulated_time_stat.reserve(n: reports.size());
111
112 // All repetitions should be run with the same number of iterations so we
113 // can take this information from the first benchmark.
114 const IterationCount run_iterations = reports.front().iterations;
115 // create stats for user counters
116 struct CounterStat {
117 Counter c;
118 std::vector<double> s;
119 };
120 std::map<std::string, CounterStat> counter_stats;
121 for (Run const& r : reports) {
122 for (auto const& cnt : r.counters) {
123 auto it = counter_stats.find(x: cnt.first);
124 if (it == counter_stats.end()) {
125 it = counter_stats
126 .emplace(args: cnt.first,
127 args: CounterStat{.c: cnt.second, .s: std::vector<double>{}})
128 .first;
129 it->second.s.reserve(n: reports.size());
130 } else {
131 BM_CHECK_EQ(it->second.c.flags, cnt.second.flags);
132 }
133 }
134 }
135
136 // Populate the accumulators.
137 for (Run const& run : reports) {
138 BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
139 BM_CHECK_EQ(run_iterations, run.iterations);
140 if (run.skipped) continue;
141 real_accumulated_time_stat.emplace_back(args: run.real_accumulated_time);
142 cpu_accumulated_time_stat.emplace_back(args: run.cpu_accumulated_time);
143 // user counters
144 for (auto const& cnt : run.counters) {
145 auto it = counter_stats.find(x: cnt.first);
146 BM_CHECK_NE(it, counter_stats.end());
147 it->second.s.emplace_back(args: cnt.second);
148 }
149 }
150
151 // Only add label if it is same for all runs
152 std::string report_label = reports[0].report_label;
153 for (std::size_t i = 1; i < reports.size(); i++) {
154 if (reports[i].report_label != report_label) {
155 report_label = "";
156 break;
157 }
158 }
159
160 const double iteration_rescale_factor =
161 double(reports.size()) / double(run_iterations);
162
163 for (const auto& Stat : *reports[0].statistics) {
164 // Get the data from the accumulator to BenchmarkReporter::Run's.
165 Run data;
166 data.run_name = reports[0].run_name;
167 data.family_index = reports[0].family_index;
168 data.per_family_instance_index = reports[0].per_family_instance_index;
169 data.run_type = BenchmarkReporter::Run::RT_Aggregate;
170 data.threads = reports[0].threads;
171 data.repetitions = reports[0].repetitions;
172 data.repetition_index = Run::no_repetition_index;
173 data.aggregate_name = Stat.name_;
174 data.aggregate_unit = Stat.unit_;
175 data.report_label = report_label;
176
177 // It is incorrect to say that an aggregate is computed over
178 // run's iterations, because those iterations already got averaged.
179 // Similarly, if there are N repetitions with 1 iterations each,
180 // an aggregate will be computed over N measurements, not 1.
181 // Thus it is best to simply use the count of separate reports.
182 data.iterations = reports.size();
183
184 data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
185 data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
186
187 if (data.aggregate_unit == StatisticUnit::kTime) {
188 // We will divide these times by data.iterations when reporting, but the
189 // data.iterations is not necessarily the scale of these measurements,
190 // because in each repetition, these timers are sum over all the iters.
191 // And if we want to say that the stats are over N repetitions and not
192 // M iterations, we need to multiply these by (N/M).
193 data.real_accumulated_time *= iteration_rescale_factor;
194 data.cpu_accumulated_time *= iteration_rescale_factor;
195 }
196
197 data.time_unit = reports[0].time_unit;
198
199 // user counters
200 for (auto const& kv : counter_stats) {
201 // Do *NOT* rescale the custom counters. They are already properly scaled.
202 const auto uc_stat = Stat.compute_(kv.second.s);
203 auto c = Counter(uc_stat, counter_stats[kv.first].c.flags,
204 counter_stats[kv.first].c.oneK);
205 data.counters[kv.first] = c;
206 }
207
208 results.push_back(x: data);
209 }
210
211 return results;
212}
213
214} // end namespace benchmark
215

source code of third-party/benchmark/src/statistics.cc