1 | //! Benchmarking module. |
2 | |
3 | use super::{ |
4 | event::CompletedTest, options::BenchMode, test_result::TestResult, types::TestDesc, Sender, |
5 | }; |
6 | |
7 | use crate::stats; |
8 | use std::cmp; |
9 | #[cfg (feature = "capture" )] |
10 | use std::io; |
11 | use std::panic::{catch_unwind, AssertUnwindSafe}; |
12 | use std::sync::{Arc, Mutex}; |
13 | use std::time::{Duration, Instant}; |
14 | |
15 | #[cfg (feature = "asm_black_box" )] |
16 | pub use std::hint::black_box; |
17 | |
18 | #[cfg (not(feature = "asm_black_box" ))] |
19 | #[inline (never)] |
20 | pub fn black_box<T>(dummy: T) -> T { |
21 | dummy |
22 | } |
23 | |
24 | /// Manager of the benchmarking runs. |
25 | /// |
26 | /// This is fed into functions marked with `#[bench]` to allow for |
27 | /// set-up & tear-down before running a piece of code repeatedly via a |
28 | /// call to `iter`. |
29 | #[derive (Clone)] |
30 | pub struct Bencher { |
31 | mode: BenchMode, |
32 | summary: Option<stats::Summary>, |
33 | pub bytes: u64, |
34 | } |
35 | |
36 | impl Bencher { |
37 | /// Callback for benchmark functions to run in their body. |
38 | pub fn iter<T, F>(&mut self, mut inner: F) |
39 | where |
40 | F: FnMut() -> T, |
41 | { |
42 | if self.mode == BenchMode::Single { |
43 | ns_iter_inner(&mut inner, k:1); |
44 | return; |
45 | } |
46 | |
47 | self.summary = Some(iter(&mut inner)); |
48 | } |
49 | |
50 | pub fn bench<F>(&mut self, mut f: F) -> Option<stats::Summary> |
51 | where |
52 | F: FnMut(&mut Bencher), |
53 | { |
54 | f(self); |
55 | self.summary |
56 | } |
57 | } |
58 | |
59 | #[derive (Debug, Clone, PartialEq)] |
60 | pub struct BenchSamples { |
61 | pub ns_iter_summ: stats::Summary, |
62 | pub mb_s: usize, |
63 | } |
64 | |
65 | pub fn fmt_bench_samples(bs: &BenchSamples) -> String { |
66 | use std::fmt::Write; |
67 | let mut output: String = String::new(); |
68 | |
69 | let median: usize = bs.ns_iter_summ.median as usize; |
70 | let deviation: usize = (bs.ns_iter_summ.max - bs.ns_iter_summ.min) as usize; |
71 | |
72 | writeResult<(), Error>!( |
73 | output, |
74 | " {:>11} ns/iter (+/- {})" , |
75 | fmt_thousands_sep(median, ',' ), |
76 | fmt_thousands_sep(deviation, ',' ) |
77 | ) |
78 | .unwrap(); |
79 | if bs.mb_s != 0 { |
80 | write!(output, " = {} MB/s" , bs.mb_s).unwrap(); |
81 | } |
82 | output |
83 | } |
84 | |
85 | // Format a number with thousands separators |
86 | fn fmt_thousands_sep(mut n: usize, sep: char) -> String { |
87 | use std::fmt::Write; |
88 | let mut output: String = String::new(); |
89 | let mut trailing: bool = false; |
90 | for &pow: u32 in &[9, 6, 3, 0] { |
91 | let base: usize = 10_usize.pow(exp:pow); |
92 | if pow == 0 || trailing || n / base != 0 { |
93 | if !trailing { |
94 | write!(output, " {}" , n / base).unwrap(); |
95 | } else { |
96 | write!(output, " {:03}" , n / base).unwrap(); |
97 | } |
98 | if pow != 0 { |
99 | output.push(ch:sep); |
100 | } |
101 | trailing = true; |
102 | } |
103 | n %= base; |
104 | } |
105 | |
106 | output |
107 | } |
108 | |
109 | fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64 |
110 | where |
111 | F: FnMut() -> T, |
112 | { |
113 | let start: Instant = Instant::now(); |
114 | for _ in 0..k { |
115 | black_box(dummy:inner()); |
116 | } |
117 | start.elapsed().as_nanos() as u64 |
118 | } |
119 | |
120 | pub fn iter<T, F>(inner: &mut F) -> stats::Summary |
121 | where |
122 | F: FnMut() -> T, |
123 | { |
124 | // Initial bench run to get ballpark figure. |
125 | let ns_single = ns_iter_inner(inner, 1); |
126 | |
127 | // Try to estimate iter count for 1ms falling back to 1m |
128 | // iterations if first run took < 1ns. |
129 | let ns_target_total = 1_000_000; // 1ms |
130 | let mut n = ns_target_total / cmp::max(1, ns_single); |
131 | |
132 | // if the first run took more than 1ms we don't want to just |
133 | // be left doing 0 iterations on every loop. The unfortunate |
134 | // side effect of not being able to do as many runs is |
135 | // automatically handled by the statistical analysis below |
136 | // (i.e., larger error bars). |
137 | n = cmp::max(1, n); |
138 | |
139 | let mut total_run = Duration::new(0, 0); |
140 | let samples: &mut [f64] = &mut [0.0_f64; 50]; |
141 | loop { |
142 | let loop_start = Instant::now(); |
143 | |
144 | for p in &mut *samples { |
145 | *p = ns_iter_inner(inner, n) as f64 / n as f64; |
146 | } |
147 | |
148 | stats::winsorize(samples, 5.0); |
149 | let summ = stats::Summary::new(samples); |
150 | |
151 | for p in &mut *samples { |
152 | let ns = ns_iter_inner(inner, 5 * n); |
153 | *p = ns as f64 / (5 * n) as f64; |
154 | } |
155 | |
156 | stats::winsorize(samples, 5.0); |
157 | let summ5 = stats::Summary::new(samples); |
158 | |
159 | let loop_run = loop_start.elapsed(); |
160 | |
161 | // If we've run for 100ms and seem to have converged to a |
162 | // stable median. |
163 | if loop_run > Duration::from_millis(100) |
164 | && summ.median_abs_dev_pct < 1.0 |
165 | && summ.median - summ5.median < summ5.median_abs_dev |
166 | { |
167 | return summ5; |
168 | } |
169 | |
170 | total_run += loop_run; |
171 | // Longest we ever run for is 3s. |
172 | if total_run > Duration::from_secs(3) { |
173 | return summ5; |
174 | } |
175 | |
176 | // If we overflow here just return the results so far. We check a |
177 | // multiplier of 10 because we're about to multiply by 2 and the |
178 | // next iteration of the loop will also multiply by 5 (to calculate |
179 | // the summ5 result) |
180 | n = match n.checked_mul(10) { |
181 | Some(_) => n * 2, |
182 | None => { |
183 | return summ5; |
184 | } |
185 | }; |
186 | } |
187 | } |
188 | |
189 | pub fn benchmark<F>(desc: TestDesc, monitor_ch: Sender<CompletedTest>, nocapture: bool, f: F) |
190 | where |
191 | F: FnMut(&mut Bencher), |
192 | { |
193 | let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 }; |
194 | |
195 | let data = Arc::new(Mutex::new(Vec::new())); |
196 | |
197 | if !nocapture { |
198 | #[cfg (feature = "capture" )] |
199 | io::set_output_capture(Some(data.clone())); |
200 | } |
201 | |
202 | let result = catch_unwind(AssertUnwindSafe(|| bs.bench(f))); |
203 | |
204 | #[cfg (feature = "capture" )] |
205 | io::set_output_capture(None); |
206 | |
207 | let test_result = match result { |
208 | //bs.bench(f) { |
209 | Ok(Some(ns_iter_summ)) => { |
210 | let ns_iter = cmp::max(ns_iter_summ.median as u64, 1); |
211 | let mb_s = bs.bytes * 1000 / ns_iter; |
212 | |
213 | let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize }; |
214 | TestResult::TrBench(bs) |
215 | } |
216 | Ok(None) => { |
217 | // iter not called, so no data. |
218 | // FIXME: error in this case? |
219 | let samples: &mut [f64] = &mut [0.0_f64; 1]; |
220 | let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 }; |
221 | TestResult::TrBench(bs) |
222 | } |
223 | Err(_) => TestResult::TrFailed, |
224 | }; |
225 | |
226 | let stdout = data.lock().unwrap().to_vec(); |
227 | let message = CompletedTest::new(desc, test_result, None, stdout); |
228 | monitor_ch.send(message).unwrap(); |
229 | } |
230 | |
231 | pub fn run_once<F>(f: F) |
232 | where |
233 | F: FnMut(&mut Bencher), |
234 | { |
235 | let mut bs: Bencher = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 }; |
236 | bs.bench(f); |
237 | } |
238 | |