1 | use std::path::Path; |
2 | |
3 | use crate::stats::bivariate::regression::Slope; |
4 | use crate::stats::bivariate::Data; |
5 | use crate::stats::univariate::outliers::tukey; |
6 | use crate::stats::univariate::Sample; |
7 | use crate::stats::{Distribution, Tails}; |
8 | |
9 | use crate::benchmark::BenchmarkConfig; |
10 | use crate::connection::OutgoingMessage; |
11 | use crate::estimate::{ |
12 | build_estimates, ConfidenceInterval, Distributions, Estimate, Estimates, PointEstimates, |
13 | }; |
14 | use crate::fs; |
15 | use crate::measurement::Measurement; |
16 | use crate::report::{BenchmarkId, Report, ReportContext}; |
17 | use crate::routine::Routine; |
18 | use crate::{Baseline, Criterion, SavedSample, Throughput}; |
19 | |
20 | macro_rules! elapsed { |
21 | ($msg:expr, $block:expr) => {{ |
22 | let start = ::std::time::Instant::now(); |
23 | let out = $block; |
24 | let elapsed = &start.elapsed(); |
25 | |
26 | info!( |
27 | "{} took {}" , |
28 | $msg, |
29 | crate::format::time(elapsed.as_nanos() as f64) |
30 | ); |
31 | |
32 | out |
33 | }}; |
34 | } |
35 | |
36 | mod compare; |
37 | |
38 | // Common analysis procedure |
39 | pub(crate) fn common<M: Measurement, T: ?Sized>( |
40 | id: &BenchmarkId, |
41 | routine: &mut dyn Routine<M, T>, |
42 | config: &BenchmarkConfig, |
43 | criterion: &Criterion<M>, |
44 | report_context: &ReportContext, |
45 | parameter: &T, |
46 | throughput: Option<Throughput>, |
47 | ) { |
48 | criterion.report.benchmark_start(id, report_context); |
49 | |
50 | if let Baseline::CompareStrict = criterion.baseline { |
51 | if !base_dir_exists( |
52 | id, |
53 | &criterion.baseline_directory, |
54 | &criterion.output_directory, |
55 | ) { |
56 | panic!( |
57 | "Baseline '{base}' must exist before comparison is allowed; try --save-baseline {base}" , |
58 | base=criterion.baseline_directory, |
59 | ); |
60 | } |
61 | } |
62 | |
63 | let (sampling_mode, iters, times); |
64 | if let Some(baseline) = &criterion.load_baseline { |
65 | let mut sample_path = criterion.output_directory.clone(); |
66 | sample_path.push(id.as_directory_name()); |
67 | sample_path.push(baseline); |
68 | sample_path.push("sample.json" ); |
69 | let loaded = fs::load::<SavedSample, _>(&sample_path); |
70 | |
71 | match loaded { |
72 | Err(err) => panic!( |
73 | "Baseline '{base}' must exist before it can be loaded; try --save-baseline {base}. Error: {err}" , |
74 | base = baseline, err = err |
75 | ), |
76 | Ok(samples) => { |
77 | sampling_mode = samples.sampling_mode; |
78 | iters = samples.iters.into_boxed_slice(); |
79 | times = samples.times.into_boxed_slice(); |
80 | } |
81 | } |
82 | } else { |
83 | let sample = routine.sample( |
84 | &criterion.measurement, |
85 | id, |
86 | config, |
87 | criterion, |
88 | report_context, |
89 | parameter, |
90 | ); |
91 | sampling_mode = sample.0; |
92 | iters = sample.1; |
93 | times = sample.2; |
94 | |
95 | if let Some(conn) = &criterion.connection { |
96 | conn.send(&OutgoingMessage::MeasurementComplete { |
97 | id: id.into(), |
98 | iters: &iters, |
99 | times: ×, |
100 | plot_config: (&report_context.plot_config).into(), |
101 | sampling_method: sampling_mode.into(), |
102 | benchmark_config: config.into(), |
103 | }) |
104 | .unwrap(); |
105 | |
106 | conn.serve_value_formatter(criterion.measurement.formatter()) |
107 | .unwrap(); |
108 | return; |
109 | } |
110 | } |
111 | |
112 | criterion.report.analysis(id, report_context); |
113 | |
114 | if times.iter().any(|&f| f == 0.0) { |
115 | error!( |
116 | "At least one measurement of benchmark {} took zero time per \ |
117 | iteration. This should not be possible. If using iter_custom, please verify \ |
118 | that your routine is correctly measured." , |
119 | id.as_title() |
120 | ); |
121 | return; |
122 | } |
123 | |
124 | let avg_times = iters |
125 | .iter() |
126 | .zip(times.iter()) |
127 | .map(|(&iters, &elapsed)| elapsed / iters) |
128 | .collect::<Vec<f64>>(); |
129 | let avg_times = Sample::new(&avg_times); |
130 | |
131 | if criterion.should_save_baseline() { |
132 | log_if_err!({ |
133 | let mut new_dir = criterion.output_directory.clone(); |
134 | new_dir.push(id.as_directory_name()); |
135 | new_dir.push("new" ); |
136 | fs::mkdirp(&new_dir) |
137 | }); |
138 | } |
139 | |
140 | let data = Data::new(&iters, ×); |
141 | let labeled_sample = tukey::classify(avg_times); |
142 | if criterion.should_save_baseline() { |
143 | log_if_err!({ |
144 | let mut tukey_file = criterion.output_directory.to_owned(); |
145 | tukey_file.push(id.as_directory_name()); |
146 | tukey_file.push("new" ); |
147 | tukey_file.push("tukey.json" ); |
148 | fs::save(&labeled_sample.fences(), &tukey_file) |
149 | }); |
150 | } |
151 | let (mut distributions, mut estimates) = estimates(avg_times, config); |
152 | if sampling_mode.is_linear() { |
153 | let (distribution, slope) = regression(&data, config); |
154 | |
155 | estimates.slope = Some(slope); |
156 | distributions.slope = Some(distribution); |
157 | } |
158 | |
159 | if criterion.should_save_baseline() { |
160 | log_if_err!({ |
161 | let mut sample_file = criterion.output_directory.clone(); |
162 | sample_file.push(id.as_directory_name()); |
163 | sample_file.push("new" ); |
164 | sample_file.push("sample.json" ); |
165 | fs::save( |
166 | &SavedSample { |
167 | sampling_mode, |
168 | iters: data.x().as_ref().to_vec(), |
169 | times: data.y().as_ref().to_vec(), |
170 | }, |
171 | &sample_file, |
172 | ) |
173 | }); |
174 | log_if_err!({ |
175 | let mut estimates_file = criterion.output_directory.clone(); |
176 | estimates_file.push(id.as_directory_name()); |
177 | estimates_file.push("new" ); |
178 | estimates_file.push("estimates.json" ); |
179 | fs::save(&estimates, &estimates_file) |
180 | }); |
181 | } |
182 | |
183 | let compare_data = if base_dir_exists( |
184 | id, |
185 | &criterion.baseline_directory, |
186 | &criterion.output_directory, |
187 | ) { |
188 | let result = compare::common(id, avg_times, config, criterion); |
189 | match result { |
190 | Ok(( |
191 | t_value, |
192 | t_distribution, |
193 | relative_estimates, |
194 | relative_distributions, |
195 | base_iter_counts, |
196 | base_sample_times, |
197 | base_avg_times, |
198 | base_estimates, |
199 | )) => { |
200 | let p_value = t_distribution.p_value(t_value, &Tails::Two); |
201 | Some(crate::report::ComparisonData { |
202 | p_value, |
203 | t_distribution, |
204 | t_value, |
205 | relative_estimates, |
206 | relative_distributions, |
207 | significance_threshold: config.significance_level, |
208 | noise_threshold: config.noise_threshold, |
209 | base_iter_counts, |
210 | base_sample_times, |
211 | base_avg_times, |
212 | base_estimates, |
213 | }) |
214 | } |
215 | Err(e) => { |
216 | crate::error::log_error(&e); |
217 | None |
218 | } |
219 | } |
220 | } else { |
221 | None |
222 | }; |
223 | |
224 | let measurement_data = crate::report::MeasurementData { |
225 | data: Data::new(&iters, ×), |
226 | avg_times: labeled_sample, |
227 | absolute_estimates: estimates, |
228 | distributions, |
229 | comparison: compare_data, |
230 | throughput, |
231 | }; |
232 | |
233 | criterion.report.measurement_complete( |
234 | id, |
235 | report_context, |
236 | &measurement_data, |
237 | criterion.measurement.formatter(), |
238 | ); |
239 | |
240 | if criterion.should_save_baseline() { |
241 | log_if_err!({ |
242 | let mut benchmark_file = criterion.output_directory.clone(); |
243 | benchmark_file.push(id.as_directory_name()); |
244 | benchmark_file.push("new" ); |
245 | benchmark_file.push("benchmark.json" ); |
246 | fs::save(&id, &benchmark_file) |
247 | }); |
248 | } |
249 | |
250 | if criterion.connection.is_none() { |
251 | if let Baseline::Save = criterion.baseline { |
252 | copy_new_dir_to_base( |
253 | id.as_directory_name(), |
254 | &criterion.baseline_directory, |
255 | &criterion.output_directory, |
256 | ); |
257 | } |
258 | } |
259 | } |
260 | |
261 | fn base_dir_exists(id: &BenchmarkId, baseline: &str, output_directory: &Path) -> bool { |
262 | let mut base_dir = output_directory.to_owned(); |
263 | base_dir.push(id.as_directory_name()); |
264 | base_dir.push(baseline); |
265 | base_dir.exists() |
266 | } |
267 | |
268 | // Performs a simple linear regression on the sample |
269 | fn regression( |
270 | data: &Data<'_, f64, f64>, |
271 | config: &BenchmarkConfig, |
272 | ) -> (Distribution<f64>, Estimate) { |
273 | let cl = config.confidence_level; |
274 | |
275 | let distribution = elapsed!( |
276 | "Bootstrapped linear regression" , |
277 | data.bootstrap(config.nresamples, |d| (Slope::fit(&d).0,)) |
278 | ) |
279 | .0; |
280 | |
281 | let point = Slope::fit(data); |
282 | let (lb, ub) = distribution.confidence_interval(config.confidence_level); |
283 | let se = distribution.std_dev(None); |
284 | |
285 | ( |
286 | distribution, |
287 | Estimate { |
288 | confidence_interval: ConfidenceInterval { |
289 | confidence_level: cl, |
290 | lower_bound: lb, |
291 | upper_bound: ub, |
292 | }, |
293 | point_estimate: point.0, |
294 | standard_error: se, |
295 | }, |
296 | ) |
297 | } |
298 | |
299 | // Estimates the statistics of the population from the sample |
300 | fn estimates(avg_times: &Sample<f64>, config: &BenchmarkConfig) -> (Distributions, Estimates) { |
301 | fn stats(sample: &Sample<f64>) -> (f64, f64, f64, f64) { |
302 | let mean = sample.mean(); |
303 | let std_dev = sample.std_dev(Some(mean)); |
304 | let median = sample.percentiles().median(); |
305 | let mad = sample.median_abs_dev(Some(median)); |
306 | |
307 | (mean, std_dev, median, mad) |
308 | } |
309 | |
310 | let cl = config.confidence_level; |
311 | let nresamples = config.nresamples; |
312 | |
313 | let (mean, std_dev, median, mad) = stats(avg_times); |
314 | let points = PointEstimates { |
315 | mean, |
316 | median, |
317 | std_dev, |
318 | median_abs_dev: mad, |
319 | }; |
320 | |
321 | let (dist_mean, dist_stddev, dist_median, dist_mad) = elapsed!( |
322 | "Bootstrapping the absolute statistics." , |
323 | avg_times.bootstrap(nresamples, stats) |
324 | ); |
325 | |
326 | let distributions = Distributions { |
327 | mean: dist_mean, |
328 | slope: None, |
329 | median: dist_median, |
330 | median_abs_dev: dist_mad, |
331 | std_dev: dist_stddev, |
332 | }; |
333 | |
334 | let estimates = build_estimates(&distributions, &points, cl); |
335 | |
336 | (distributions, estimates) |
337 | } |
338 | |
339 | fn copy_new_dir_to_base(id: &str, baseline: &str, output_directory: &Path) { |
340 | let root_dir = Path::new(output_directory).join(id); |
341 | let base_dir = root_dir.join(baseline); |
342 | let new_dir = root_dir.join("new" ); |
343 | |
344 | if !new_dir.exists() { |
345 | return; |
346 | }; |
347 | if !base_dir.exists() { |
348 | try_else_return!(fs::mkdirp(&base_dir)); |
349 | } |
350 | |
351 | // TODO: consider using walkdir or similar to generically copy. |
352 | try_else_return!(fs::cp( |
353 | &new_dir.join("estimates.json" ), |
354 | &base_dir.join("estimates.json" ) |
355 | )); |
356 | try_else_return!(fs::cp( |
357 | &new_dir.join("sample.json" ), |
358 | &base_dir.join("sample.json" ) |
359 | )); |
360 | try_else_return!(fs::cp( |
361 | &new_dir.join("tukey.json" ), |
362 | &base_dir.join("tukey.json" ) |
363 | )); |
364 | try_else_return!(fs::cp( |
365 | &new_dir.join("benchmark.json" ), |
366 | &base_dir.join("benchmark.json" ) |
367 | )); |
368 | #[cfg (feature = "csv_output" )] |
369 | try_else_return!(fs::cp(&new_dir.join("raw.csv" ), &base_dir.join("raw.csv" ))); |
370 | } |
371 | |