1 | //! Benchmarking module. |
2 | use super::{ |
3 | event::CompletedTest, |
4 | options::BenchMode, |
5 | test_result::TestResult, |
6 | types::{TestDesc, TestId}, |
7 | Sender, |
8 | }; |
9 | |
10 | use crate::stats; |
11 | use std::cmp; |
12 | use std::io; |
13 | use std::panic::{catch_unwind, AssertUnwindSafe}; |
14 | use std::sync::{Arc, Mutex}; |
15 | use std::time::{Duration, Instant}; |
16 | |
17 | /// An identity function that *__hints__* to the compiler to be maximally pessimistic about what |
18 | /// `black_box` could do. |
19 | /// |
20 | /// See [`std::hint::black_box`] for details. |
21 | #[inline (always)] |
22 | pub fn black_box<T>(dummy: T) -> T { |
23 | std::hint::black_box(dummy) |
24 | } |
25 | |
26 | /// Manager of the benchmarking runs. |
27 | /// |
28 | /// This is fed into functions marked with `#[bench]` to allow for |
29 | /// set-up & tear-down before running a piece of code repeatedly via a |
30 | /// call to `iter`. |
31 | #[derive(Clone)] |
32 | pub struct Bencher { |
33 | mode: BenchMode, |
34 | summary: Option<stats::Summary>, |
35 | pub bytes: u64, |
36 | } |
37 | |
38 | impl Bencher { |
39 | /// Callback for benchmark functions to run in their body. |
40 | pub fn iter<T, F>(&mut self, mut inner: F) |
41 | where |
42 | F: FnMut() -> T, |
43 | { |
44 | if self.mode == BenchMode::Single { |
45 | ns_iter_inner(&mut inner, k:1); |
46 | return; |
47 | } |
48 | |
49 | self.summary = Some(iter(&mut inner)); |
50 | } |
51 | |
52 | pub fn bench<F>(&mut self, mut f: F) -> Result<Option<stats::Summary>, String> |
53 | where |
54 | F: FnMut(&mut Bencher) -> Result<(), String>, |
55 | { |
56 | let result = f(self); |
57 | result.map(|_| self.summary) |
58 | } |
59 | } |
60 | |
61 | #[derive(Debug, Clone, PartialEq)] |
62 | pub struct BenchSamples { |
63 | pub ns_iter_summ: stats::Summary, |
64 | pub mb_s: usize, |
65 | } |
66 | |
67 | pub fn fmt_bench_samples(bs: &BenchSamples) -> String { |
68 | use std::fmt::Write; |
69 | let mut output = String::new(); |
70 | |
71 | let median: usize = bs.ns_iter_summ.median as usize; |
72 | let deviation: usize = (bs.ns_iter_summ.max - bs.ns_iter_summ.min) as usize; |
73 | |
74 | write!( |
75 | output, |
76 | "{:>11} ns/iter (+/- {})" , |
77 | fmt_thousands_sep(median, ',' ), |
78 | fmt_thousands_sep(deviation, ',' ) |
79 | ) |
80 | .unwrap(); |
81 | if bs.mb_s != 0 { |
82 | write!(output, " = {} MB/s" , bs.mb_s).unwrap(); |
83 | } |
84 | output |
85 | } |
86 | |
87 | // Format a number with thousands separators |
88 | fn fmt_thousands_sep(mut n: usize, sep: char) -> String { |
89 | use std::fmt::Write; |
90 | let mut output = String::new(); |
91 | let mut trailing: bool = false; |
92 | for &pow in &[9, 6, 3, 0] { |
93 | let base = 10_usize.pow(pow); |
94 | if pow == 0 || trailing || n / base != 0 { |
95 | if !trailing { |
96 | write!(output, "{}" , n / base).unwrap(); |
97 | } else { |
98 | write!(output, "{:03}" , n / base).unwrap(); |
99 | } |
100 | if pow != 0 { |
101 | output.push(sep); |
102 | } |
103 | trailing = true; |
104 | } |
105 | n %= base; |
106 | } |
107 | |
108 | output |
109 | } |
110 | |
111 | fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64 |
112 | where |
113 | F: FnMut() -> T, |
114 | { |
115 | let start = Instant::now(); |
116 | for _ in 0..k { |
117 | black_box(inner()); |
118 | } |
119 | start.elapsed().as_nanos() as u64 |
120 | } |
121 | |
122 | pub fn iter<T, F>(inner: &mut F) -> stats::Summary |
123 | where |
124 | F: FnMut() -> T, |
125 | { |
126 | // Initial bench run to get ballpark figure. |
127 | let ns_single = ns_iter_inner(inner, 1); |
128 | |
129 | // Try to estimate iter count for 1ms falling back to 1m |
130 | // iterations if first run took < 1ns. |
131 | let ns_target_total = 1_000_000; // 1ms |
132 | let mut n = ns_target_total / cmp::max(1, ns_single); |
133 | |
134 | // if the first run took more than 1ms we don't want to just |
135 | // be left doing 0 iterations on every loop. The unfortunate |
136 | // side effect of not being able to do as many runs is |
137 | // automatically handled by the statistical analysis below |
138 | // (i.e., larger error bars). |
139 | n = cmp::max(1, n); |
140 | |
141 | let mut total_run = Duration::new(0, 0); |
142 | let samples: &mut [f64] = &mut [0.0_f64; 50]; |
143 | loop { |
144 | let loop_start = Instant::now(); |
145 | |
146 | for p in &mut *samples { |
147 | *p = ns_iter_inner(inner, n) as f64 / n as f64; |
148 | } |
149 | |
150 | stats::winsorize(samples, 5.0); |
151 | let summ = stats::Summary::new(samples); |
152 | |
153 | for p in &mut *samples { |
154 | let ns = ns_iter_inner(inner, 5 * n); |
155 | *p = ns as f64 / (5 * n) as f64; |
156 | } |
157 | |
158 | stats::winsorize(samples, 5.0); |
159 | let summ5 = stats::Summary::new(samples); |
160 | |
161 | let loop_run = loop_start.elapsed(); |
162 | |
163 | // If we've run for 100ms and seem to have converged to a |
164 | // stable median. |
165 | if loop_run > Duration::from_millis(100) |
166 | && summ.median_abs_dev_pct < 1.0 |
167 | && summ.median - summ5.median < summ5.median_abs_dev |
168 | { |
169 | return summ5; |
170 | } |
171 | |
172 | total_run += loop_run; |
173 | // Longest we ever run for is 3s. |
174 | if total_run > Duration::from_secs(3) { |
175 | return summ5; |
176 | } |
177 | |
178 | // If we overflow here just return the results so far. We check a |
179 | // multiplier of 10 because we're about to multiply by 2 and the |
180 | // next iteration of the loop will also multiply by 5 (to calculate |
181 | // the summ5 result) |
182 | n = match n.checked_mul(10) { |
183 | Some(_) => n * 2, |
184 | None => { |
185 | return summ5; |
186 | } |
187 | }; |
188 | } |
189 | } |
190 | |
191 | pub fn benchmark<F>( |
192 | id: TestId, |
193 | desc: TestDesc, |
194 | monitor_ch: Sender<CompletedTest>, |
195 | nocapture: bool, |
196 | f: F, |
197 | ) where |
198 | F: FnMut(&mut Bencher) -> Result<(), String>, |
199 | { |
200 | let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 }; |
201 | |
202 | let data = Arc::new(Mutex::new(Vec::new())); |
203 | |
204 | if !nocapture { |
205 | io::set_output_capture(Some(data.clone())); |
206 | } |
207 | |
208 | let result = catch_unwind(AssertUnwindSafe(|| bs.bench(f))); |
209 | |
210 | io::set_output_capture(None); |
211 | |
212 | let test_result = match result { |
213 | //bs.bench(f) { |
214 | Ok(Ok(Some(ns_iter_summ))) => { |
215 | let ns_iter = cmp::max(ns_iter_summ.median as u64, 1); |
216 | let mb_s = bs.bytes * 1000 / ns_iter; |
217 | |
218 | let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize }; |
219 | TestResult::TrBench(bs) |
220 | } |
221 | Ok(Ok(None)) => { |
222 | // iter not called, so no data. |
223 | // FIXME: error in this case? |
224 | let samples: &mut [f64] = &mut [0.0_f64; 1]; |
225 | let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 }; |
226 | TestResult::TrBench(bs) |
227 | } |
228 | Err(_) => TestResult::TrFailed, |
229 | Ok(Err(_)) => TestResult::TrFailed, |
230 | }; |
231 | |
232 | let stdout = data.lock().unwrap().to_vec(); |
233 | let message = CompletedTest::new(id, desc, test_result, None, stdout); |
234 | monitor_ch.send(message).unwrap(); |
235 | } |
236 | |
237 | pub fn run_once<F>(f: F) -> Result<(), String> |
238 | where |
239 | F: FnMut(&mut Bencher) -> Result<(), String>, |
240 | { |
241 | let mut bs: Bencher = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 }; |
242 | bs.bench(f).map(|_| ()) |
243 | } |
244 | |