1 | //! Benchmarking module. |
2 | |
3 | use std::panic::{AssertUnwindSafe, catch_unwind}; |
4 | use std::sync::{Arc, Mutex}; |
5 | use std::time::{Duration, Instant}; |
6 | use std::{cmp, io}; |
7 | |
8 | use super::Sender; |
9 | use super::event::CompletedTest; |
10 | use super::options::BenchMode; |
11 | use super::test_result::TestResult; |
12 | use super::types::{TestDesc, TestId}; |
13 | use crate::stats; |
14 | |
15 | /// An identity function that *__hints__* to the compiler to be maximally pessimistic about what |
16 | /// `black_box` could do. |
17 | /// |
18 | /// See [`std::hint::black_box`] for details. |
19 | #[inline (always)] |
20 | pub fn black_box<T>(dummy: T) -> T { |
21 | std::hint::black_box(dummy) |
22 | } |
23 | |
24 | /// Manager of the benchmarking runs. |
25 | /// |
26 | /// This is fed into functions marked with `#[bench]` to allow for |
27 | /// set-up & tear-down before running a piece of code repeatedly via a |
28 | /// call to `iter`. |
29 | #[derive (Clone)] |
30 | pub struct Bencher { |
31 | mode: BenchMode, |
32 | summary: Option<stats::Summary>, |
33 | pub bytes: u64, |
34 | } |
35 | |
36 | impl Bencher { |
37 | /// Callback for benchmark functions to run in their body. |
38 | pub fn iter<T, F>(&mut self, mut inner: F) |
39 | where |
40 | F: FnMut() -> T, |
41 | { |
42 | if self.mode == BenchMode::Single { |
43 | ns_iter_inner(&mut inner, k:1); |
44 | return; |
45 | } |
46 | |
47 | self.summary = Some(iter(&mut inner)); |
48 | } |
49 | |
50 | pub fn bench<F>(&mut self, mut f: F) -> Result<Option<stats::Summary>, String> |
51 | where |
52 | F: FnMut(&mut Bencher) -> Result<(), String>, |
53 | { |
54 | let result: Result<(), String> = f(self); |
55 | result.map(|_| self.summary) |
56 | } |
57 | } |
58 | |
59 | #[derive (Debug, Clone, PartialEq)] |
60 | pub struct BenchSamples { |
61 | pub ns_iter_summ: stats::Summary, |
62 | pub mb_s: usize, |
63 | } |
64 | |
65 | pub fn fmt_bench_samples(bs: &BenchSamples) -> String { |
66 | use std::fmt::Write; |
67 | let mut output: String = String::new(); |
68 | |
69 | let median: f64 = bs.ns_iter_summ.median; |
70 | let deviation: f64 = bs.ns_iter_summ.max - bs.ns_iter_summ.min; |
71 | |
72 | writeResult<(), Error>!( |
73 | output, |
74 | " {:>14} ns/iter (+/- {})" , |
75 | fmt_thousands_sep(median, ',' ), |
76 | fmt_thousands_sep(deviation, ',' ) |
77 | ) |
78 | .unwrap(); |
79 | if bs.mb_s != 0 { |
80 | write!(output, " = {} MB/s" , bs.mb_s).unwrap(); |
81 | } |
82 | output |
83 | } |
84 | |
85 | // Format a number with thousands separators |
86 | fn fmt_thousands_sep(mut n: f64, sep: char) -> String { |
87 | use std::fmt::Write; |
88 | let mut output = String::new(); |
89 | let mut trailing = false; |
90 | for &pow in &[9, 6, 3, 0] { |
91 | let base = 10_usize.pow(pow); |
92 | if pow == 0 || trailing || n / base as f64 >= 1.0 { |
93 | match (pow, trailing) { |
94 | // modern CPUs can execute multiple instructions per nanosecond |
95 | // e.g. benching an ADD takes about 0.25ns. |
96 | (0, true) => write!(output, " {:06.2}" , n / base as f64).unwrap(), |
97 | (0, false) => write!(output, " {:.2}" , n / base as f64).unwrap(), |
98 | (_, true) => write!(output, " {:03}" , n as usize / base).unwrap(), |
99 | _ => write!(output, " {}" , n as usize / base).unwrap(), |
100 | } |
101 | if pow != 0 { |
102 | output.push(sep); |
103 | } |
104 | trailing = true; |
105 | } |
106 | n %= base as f64; |
107 | } |
108 | |
109 | output |
110 | } |
111 | |
112 | fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64 |
113 | where |
114 | F: FnMut() -> T, |
115 | { |
116 | let start: Instant = Instant::now(); |
117 | for _ in 0..k { |
118 | black_box(dummy:inner()); |
119 | } |
120 | start.elapsed().as_nanos() as u64 |
121 | } |
122 | |
123 | pub fn iter<T, F>(inner: &mut F) -> stats::Summary |
124 | where |
125 | F: FnMut() -> T, |
126 | { |
127 | // Initial bench run to get ballpark figure. |
128 | let ns_single = ns_iter_inner(inner, 1); |
129 | |
130 | // Try to estimate iter count for 1ms falling back to 1m |
131 | // iterations if first run took < 1ns. |
132 | let ns_target_total = 1_000_000; // 1ms |
133 | let mut n = ns_target_total / cmp::max(1, ns_single); |
134 | |
135 | // if the first run took more than 1ms we don't want to just |
136 | // be left doing 0 iterations on every loop. The unfortunate |
137 | // side effect of not being able to do as many runs is |
138 | // automatically handled by the statistical analysis below |
139 | // (i.e., larger error bars). |
140 | n = cmp::max(1, n); |
141 | |
142 | let mut total_run = Duration::new(0, 0); |
143 | let samples: &mut [f64] = &mut [0.0_f64; 50]; |
144 | loop { |
145 | let loop_start = Instant::now(); |
146 | |
147 | for p in &mut *samples { |
148 | *p = ns_iter_inner(inner, n) as f64 / n as f64; |
149 | } |
150 | |
151 | stats::winsorize(samples, 5.0); |
152 | let summ = stats::Summary::new(samples); |
153 | |
154 | for p in &mut *samples { |
155 | let ns = ns_iter_inner(inner, 5 * n); |
156 | *p = ns as f64 / (5 * n) as f64; |
157 | } |
158 | |
159 | stats::winsorize(samples, 5.0); |
160 | let summ5 = stats::Summary::new(samples); |
161 | |
162 | let loop_run = loop_start.elapsed(); |
163 | |
164 | // If we've run for 100ms and seem to have converged to a |
165 | // stable median. |
166 | if loop_run > Duration::from_millis(100) |
167 | && summ.median_abs_dev_pct < 1.0 |
168 | && summ.median - summ5.median < summ5.median_abs_dev |
169 | { |
170 | return summ5; |
171 | } |
172 | |
173 | total_run += loop_run; |
174 | // Longest we ever run for is 3s. |
175 | if total_run > Duration::from_secs(3) { |
176 | return summ5; |
177 | } |
178 | |
179 | // If we overflow here just return the results so far. We check a |
180 | // multiplier of 10 because we're about to multiply by 2 and the |
181 | // next iteration of the loop will also multiply by 5 (to calculate |
182 | // the summ5 result) |
183 | n = match n.checked_mul(10) { |
184 | Some(_) => n * 2, |
185 | None => { |
186 | return summ5; |
187 | } |
188 | }; |
189 | } |
190 | } |
191 | |
192 | pub fn benchmark<F>( |
193 | id: TestId, |
194 | desc: TestDesc, |
195 | monitor_ch: Sender<CompletedTest>, |
196 | nocapture: bool, |
197 | f: F, |
198 | ) where |
199 | F: FnMut(&mut Bencher) -> Result<(), String>, |
200 | { |
201 | let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 }; |
202 | |
203 | let data = Arc::new(Mutex::new(Vec::new())); |
204 | |
205 | if !nocapture { |
206 | io::set_output_capture(Some(data.clone())); |
207 | } |
208 | |
209 | let result = catch_unwind(AssertUnwindSafe(|| bs.bench(f))); |
210 | |
211 | io::set_output_capture(None); |
212 | |
213 | let test_result = match result { |
214 | //bs.bench(f) { |
215 | Ok(Ok(Some(ns_iter_summ))) => { |
216 | let ns_iter = cmp::max(ns_iter_summ.median as u64, 1); |
217 | let mb_s = bs.bytes * 1000 / ns_iter; |
218 | |
219 | let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize }; |
220 | TestResult::TrBench(bs) |
221 | } |
222 | Ok(Ok(None)) => { |
223 | // iter not called, so no data. |
224 | // FIXME: error in this case? |
225 | let samples: &mut [f64] = &mut [0.0_f64; 1]; |
226 | let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 }; |
227 | TestResult::TrBench(bs) |
228 | } |
229 | Err(_) => TestResult::TrFailed, |
230 | Ok(Err(_)) => TestResult::TrFailed, |
231 | }; |
232 | |
233 | let stdout = data.lock().unwrap().to_vec(); |
234 | let message = CompletedTest::new(id, desc, test_result, None, stdout); |
235 | monitor_ch.send(message).unwrap(); |
236 | } |
237 | |
238 | pub fn run_once<F>(f: F) -> Result<(), String> |
239 | where |
240 | F: FnMut(&mut Bencher) -> Result<(), String>, |
241 | { |
242 | let mut bs: Bencher = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 }; |
243 | bs.bench(f).map(|_| ()) |
244 | } |
245 | |