| 1 | //! Benchmarking module. |
| 2 | |
| 3 | use std::panic::{AssertUnwindSafe, catch_unwind}; |
| 4 | use std::sync::{Arc, Mutex}; |
| 5 | use std::time::{Duration, Instant}; |
| 6 | use std::{cmp, io}; |
| 7 | |
| 8 | use super::Sender; |
| 9 | use super::event::CompletedTest; |
| 10 | use super::options::BenchMode; |
| 11 | use super::test_result::TestResult; |
| 12 | use super::types::{TestDesc, TestId}; |
| 13 | use crate::stats; |
| 14 | |
| 15 | /// An identity function that *__hints__* to the compiler to be maximally pessimistic about what |
| 16 | /// `black_box` could do. |
| 17 | /// |
| 18 | /// See [`std::hint::black_box`] for details. |
| 19 | #[inline (always)] |
| 20 | pub fn black_box<T>(dummy: T) -> T { |
| 21 | std::hint::black_box(dummy) |
| 22 | } |
| 23 | |
| 24 | /// Manager of the benchmarking runs. |
| 25 | /// |
| 26 | /// This is fed into functions marked with `#[bench]` to allow for |
| 27 | /// set-up & tear-down before running a piece of code repeatedly via a |
| 28 | /// call to `iter`. |
| 29 | #[derive (Clone)] |
| 30 | pub struct Bencher { |
| 31 | mode: BenchMode, |
| 32 | summary: Option<stats::Summary>, |
| 33 | pub bytes: u64, |
| 34 | } |
| 35 | |
| 36 | impl Bencher { |
| 37 | /// Callback for benchmark functions to run in their body. |
| 38 | pub fn iter<T, F>(&mut self, mut inner: F) |
| 39 | where |
| 40 | F: FnMut() -> T, |
| 41 | { |
| 42 | if self.mode == BenchMode::Single { |
| 43 | ns_iter_inner(&mut inner, k:1); |
| 44 | return; |
| 45 | } |
| 46 | |
| 47 | self.summary = Some(iter(&mut inner)); |
| 48 | } |
| 49 | |
| 50 | pub fn bench<F>(&mut self, mut f: F) -> Result<Option<stats::Summary>, String> |
| 51 | where |
| 52 | F: FnMut(&mut Bencher) -> Result<(), String>, |
| 53 | { |
| 54 | let result: Result<(), String> = f(self); |
| 55 | result.map(|_| self.summary) |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | #[derive (Debug, Clone, PartialEq)] |
| 60 | pub struct BenchSamples { |
| 61 | pub ns_iter_summ: stats::Summary, |
| 62 | pub mb_s: usize, |
| 63 | } |
| 64 | |
| 65 | pub fn fmt_bench_samples(bs: &BenchSamples) -> String { |
| 66 | use std::fmt::Write; |
| 67 | let mut output: String = String::new(); |
| 68 | |
| 69 | let median: f64 = bs.ns_iter_summ.median; |
| 70 | let deviation: f64 = bs.ns_iter_summ.max - bs.ns_iter_summ.min; |
| 71 | |
| 72 | writeResult<(), Error>!( |
| 73 | output, |
| 74 | " {:>14} ns/iter (+/- {})" , |
| 75 | fmt_thousands_sep(median, ',' ), |
| 76 | fmt_thousands_sep(deviation, ',' ) |
| 77 | ) |
| 78 | .unwrap(); |
| 79 | if bs.mb_s != 0 { |
| 80 | write!(output, " = {} MB/s" , bs.mb_s).unwrap(); |
| 81 | } |
| 82 | output |
| 83 | } |
| 84 | |
| 85 | // Format a number with thousands separators |
| 86 | fn fmt_thousands_sep(mut n: f64, sep: char) -> String { |
| 87 | use std::fmt::Write; |
| 88 | let mut output = String::new(); |
| 89 | let mut trailing = false; |
| 90 | for &pow in &[9, 6, 3, 0] { |
| 91 | let base = 10_usize.pow(pow); |
| 92 | if pow == 0 || trailing || n / base as f64 >= 1.0 { |
| 93 | match (pow, trailing) { |
| 94 | // modern CPUs can execute multiple instructions per nanosecond |
| 95 | // e.g. benching an ADD takes about 0.25ns. |
| 96 | (0, true) => write!(output, " {:06.2}" , n / base as f64).unwrap(), |
| 97 | (0, false) => write!(output, " {:.2}" , n / base as f64).unwrap(), |
| 98 | (_, true) => write!(output, " {:03}" , n as usize / base).unwrap(), |
| 99 | _ => write!(output, " {}" , n as usize / base).unwrap(), |
| 100 | } |
| 101 | if pow != 0 { |
| 102 | output.push(sep); |
| 103 | } |
| 104 | trailing = true; |
| 105 | } |
| 106 | n %= base as f64; |
| 107 | } |
| 108 | |
| 109 | output |
| 110 | } |
| 111 | |
| 112 | fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64 |
| 113 | where |
| 114 | F: FnMut() -> T, |
| 115 | { |
| 116 | let start: Instant = Instant::now(); |
| 117 | for _ in 0..k { |
| 118 | black_box(dummy:inner()); |
| 119 | } |
| 120 | start.elapsed().as_nanos() as u64 |
| 121 | } |
| 122 | |
| 123 | pub fn iter<T, F>(inner: &mut F) -> stats::Summary |
| 124 | where |
| 125 | F: FnMut() -> T, |
| 126 | { |
| 127 | // Initial bench run to get ballpark figure. |
| 128 | let ns_single = ns_iter_inner(inner, 1); |
| 129 | |
| 130 | // Try to estimate iter count for 1ms falling back to 1m |
| 131 | // iterations if first run took < 1ns. |
| 132 | let ns_target_total = 1_000_000; // 1ms |
| 133 | let mut n = ns_target_total / cmp::max(1, ns_single); |
| 134 | |
| 135 | // if the first run took more than 1ms we don't want to just |
| 136 | // be left doing 0 iterations on every loop. The unfortunate |
| 137 | // side effect of not being able to do as many runs is |
| 138 | // automatically handled by the statistical analysis below |
| 139 | // (i.e., larger error bars). |
| 140 | n = cmp::max(1, n); |
| 141 | |
| 142 | let mut total_run = Duration::new(0, 0); |
| 143 | let samples: &mut [f64] = &mut [0.0_f64; 50]; |
| 144 | loop { |
| 145 | let loop_start = Instant::now(); |
| 146 | |
| 147 | for p in &mut *samples { |
| 148 | *p = ns_iter_inner(inner, n) as f64 / n as f64; |
| 149 | } |
| 150 | |
| 151 | stats::winsorize(samples, 5.0); |
| 152 | let summ = stats::Summary::new(samples); |
| 153 | |
| 154 | for p in &mut *samples { |
| 155 | let ns = ns_iter_inner(inner, 5 * n); |
| 156 | *p = ns as f64 / (5 * n) as f64; |
| 157 | } |
| 158 | |
| 159 | stats::winsorize(samples, 5.0); |
| 160 | let summ5 = stats::Summary::new(samples); |
| 161 | |
| 162 | let loop_run = loop_start.elapsed(); |
| 163 | |
| 164 | // If we've run for 100ms and seem to have converged to a |
| 165 | // stable median. |
| 166 | if loop_run > Duration::from_millis(100) |
| 167 | && summ.median_abs_dev_pct < 1.0 |
| 168 | && summ.median - summ5.median < summ5.median_abs_dev |
| 169 | { |
| 170 | return summ5; |
| 171 | } |
| 172 | |
| 173 | total_run += loop_run; |
| 174 | // Longest we ever run for is 3s. |
| 175 | if total_run > Duration::from_secs(3) { |
| 176 | return summ5; |
| 177 | } |
| 178 | |
| 179 | // If we overflow here just return the results so far. We check a |
| 180 | // multiplier of 10 because we're about to multiply by 2 and the |
| 181 | // next iteration of the loop will also multiply by 5 (to calculate |
| 182 | // the summ5 result) |
| 183 | n = match n.checked_mul(10) { |
| 184 | Some(_) => n * 2, |
| 185 | None => { |
| 186 | return summ5; |
| 187 | } |
| 188 | }; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | pub fn benchmark<F>( |
| 193 | id: TestId, |
| 194 | desc: TestDesc, |
| 195 | monitor_ch: Sender<CompletedTest>, |
| 196 | nocapture: bool, |
| 197 | f: F, |
| 198 | ) where |
| 199 | F: FnMut(&mut Bencher) -> Result<(), String>, |
| 200 | { |
| 201 | let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 }; |
| 202 | |
| 203 | let data = Arc::new(Mutex::new(Vec::new())); |
| 204 | |
| 205 | if !nocapture { |
| 206 | io::set_output_capture(Some(data.clone())); |
| 207 | } |
| 208 | |
| 209 | let result = catch_unwind(AssertUnwindSafe(|| bs.bench(f))); |
| 210 | |
| 211 | io::set_output_capture(None); |
| 212 | |
| 213 | let test_result = match result { |
| 214 | //bs.bench(f) { |
| 215 | Ok(Ok(Some(ns_iter_summ))) => { |
| 216 | let ns_iter = cmp::max(ns_iter_summ.median as u64, 1); |
| 217 | let mb_s = bs.bytes * 1000 / ns_iter; |
| 218 | |
| 219 | let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize }; |
| 220 | TestResult::TrBench(bs) |
| 221 | } |
| 222 | Ok(Ok(None)) => { |
| 223 | // iter not called, so no data. |
| 224 | // FIXME: error in this case? |
| 225 | let samples: &mut [f64] = &mut [0.0_f64; 1]; |
| 226 | let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 }; |
| 227 | TestResult::TrBench(bs) |
| 228 | } |
| 229 | Err(_) => TestResult::TrFailed, |
| 230 | Ok(Err(_)) => TestResult::TrFailed, |
| 231 | }; |
| 232 | |
| 233 | let stdout = data.lock().unwrap().to_vec(); |
| 234 | let message = CompletedTest::new(id, desc, test_result, None, stdout); |
| 235 | monitor_ch.send(message).unwrap(); |
| 236 | } |
| 237 | |
| 238 | pub fn run_once<F>(f: F) -> Result<(), String> |
| 239 | where |
| 240 | F: FnMut(&mut Bencher) -> Result<(), String>, |
| 241 | { |
| 242 | let mut bs: Bencher = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 }; |
| 243 | bs.bench(f).map(|_| ()) |
| 244 | } |
| 245 | |