| 1 | //! Benchmarking module. | 
| 2 |  | 
|---|
| 3 | use std::panic::{AssertUnwindSafe, catch_unwind}; | 
|---|
| 4 | use std::sync::{Arc, Mutex}; | 
|---|
| 5 | use std::time::{Duration, Instant}; | 
|---|
| 6 | use std::{cmp, io}; | 
|---|
| 7 |  | 
|---|
| 8 | use super::Sender; | 
|---|
| 9 | use super::event::CompletedTest; | 
|---|
| 10 | use super::options::BenchMode; | 
|---|
| 11 | use super::test_result::TestResult; | 
|---|
| 12 | use super::types::{TestDesc, TestId}; | 
|---|
| 13 | use crate::stats; | 
|---|
| 14 |  | 
|---|
| 15 | /// An identity function that *__hints__* to the compiler to be maximally pessimistic about what | 
|---|
| 16 | /// `black_box` could do. | 
|---|
| 17 | /// | 
|---|
| 18 | /// See [`std::hint::black_box`] for details. | 
|---|
| 19 | #[ inline(always)] | 
|---|
| 20 | pub fn black_box<T>(dummy: T) -> T { | 
|---|
| 21 | std::hint::black_box(dummy) | 
|---|
| 22 | } | 
|---|
| 23 |  | 
|---|
| 24 | /// Manager of the benchmarking runs. | 
|---|
| 25 | /// | 
|---|
| 26 | /// This is fed into functions marked with `#[bench]` to allow for | 
|---|
| 27 | /// set-up & tear-down before running a piece of code repeatedly via a | 
|---|
| 28 | /// call to `iter`. | 
|---|
| 29 | #[ derive(Clone)] | 
|---|
| 30 | pub struct Bencher { | 
|---|
| 31 | mode: BenchMode, | 
|---|
| 32 | summary: Option<stats::Summary>, | 
|---|
| 33 | pub bytes: u64, | 
|---|
| 34 | } | 
|---|
| 35 |  | 
|---|
| 36 | impl Bencher { | 
|---|
| 37 | /// Callback for benchmark functions to run in their body. | 
|---|
| 38 | pub fn iter<T, F>(&mut self, mut inner: F) | 
|---|
| 39 | where | 
|---|
| 40 | F: FnMut() -> T, | 
|---|
| 41 | { | 
|---|
| 42 | if self.mode == BenchMode::Single { | 
|---|
| 43 | ns_iter_inner(&mut inner, k:1); | 
|---|
| 44 | return; | 
|---|
| 45 | } | 
|---|
| 46 |  | 
|---|
| 47 | self.summary = Some(iter(&mut inner)); | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | pub fn bench<F>(&mut self, mut f: F) -> Result<Option<stats::Summary>, String> | 
|---|
| 51 | where | 
|---|
| 52 | F: FnMut(&mut Bencher) -> Result<(), String>, | 
|---|
| 53 | { | 
|---|
| 54 | let result: Result<(), String> = f(self); | 
|---|
| 55 | result.map(|_| self.summary) | 
|---|
| 56 | } | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | #[ derive(Debug, Clone, PartialEq)] | 
|---|
| 60 | pub struct BenchSamples { | 
|---|
| 61 | pub ns_iter_summ: stats::Summary, | 
|---|
| 62 | pub mb_s: usize, | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 | pub fn fmt_bench_samples(bs: &BenchSamples) -> String { | 
|---|
| 66 | use std::fmt::Write; | 
|---|
| 67 | let mut output: String = String::new(); | 
|---|
| 68 |  | 
|---|
| 69 | let median: f64 = bs.ns_iter_summ.median; | 
|---|
| 70 | let deviation: f64 = bs.ns_iter_summ.max - bs.ns_iter_summ.min; | 
|---|
| 71 |  | 
|---|
| 72 | writeResult<(), Error>!( | 
|---|
| 73 | output, | 
|---|
| 74 | "{:>14}  ns/iter (+/- {} )", | 
|---|
| 75 | fmt_thousands_sep(median, ','), | 
|---|
| 76 | fmt_thousands_sep(deviation, ',') | 
|---|
| 77 | ) | 
|---|
| 78 | .unwrap(); | 
|---|
| 79 | if bs.mb_s != 0 { | 
|---|
| 80 | write!(output, " = {}  MB/s", bs.mb_s).unwrap(); | 
|---|
| 81 | } | 
|---|
| 82 | output | 
|---|
| 83 | } | 
|---|
| 84 |  | 
|---|
| 85 | // Format a number with thousands separators | 
|---|
| 86 | fn fmt_thousands_sep(mut n: f64, sep: char) -> String { | 
|---|
| 87 | use std::fmt::Write; | 
|---|
| 88 | let mut output = String::new(); | 
|---|
| 89 | let mut trailing = false; | 
|---|
| 90 | for &pow in &[9, 6, 3, 0] { | 
|---|
| 91 | let base = 10_usize.pow(pow); | 
|---|
| 92 | if pow == 0 || trailing || n / base as f64 >= 1.0 { | 
|---|
| 93 | match (pow, trailing) { | 
|---|
| 94 | // modern CPUs can execute multiple instructions per nanosecond | 
|---|
| 95 | // e.g. benching an ADD takes about 0.25ns. | 
|---|
| 96 | (0, true) => write!(output, "{:06.2} ", n / base as f64).unwrap(), | 
|---|
| 97 | (0, false) => write!(output, "{:.2} ", n / base as f64).unwrap(), | 
|---|
| 98 | (_, true) => write!(output, "{:03} ", n as usize / base).unwrap(), | 
|---|
| 99 | _ => write!(output, "{} ", n as usize / base).unwrap(), | 
|---|
| 100 | } | 
|---|
| 101 | if pow != 0 { | 
|---|
| 102 | output.push(sep); | 
|---|
| 103 | } | 
|---|
| 104 | trailing = true; | 
|---|
| 105 | } | 
|---|
| 106 | n %= base as f64; | 
|---|
| 107 | } | 
|---|
| 108 |  | 
|---|
| 109 | output | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64 | 
|---|
| 113 | where | 
|---|
| 114 | F: FnMut() -> T, | 
|---|
| 115 | { | 
|---|
| 116 | let start: Instant = Instant::now(); | 
|---|
| 117 | for _ in 0..k { | 
|---|
| 118 | black_box(dummy:inner()); | 
|---|
| 119 | } | 
|---|
| 120 | start.elapsed().as_nanos() as u64 | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | pub fn iter<T, F>(inner: &mut F) -> stats::Summary | 
|---|
| 124 | where | 
|---|
| 125 | F: FnMut() -> T, | 
|---|
| 126 | { | 
|---|
| 127 | // Initial bench run to get ballpark figure. | 
|---|
| 128 | let ns_single = ns_iter_inner(inner, 1); | 
|---|
| 129 |  | 
|---|
| 130 | // Try to estimate iter count for 1ms falling back to 1m | 
|---|
| 131 | // iterations if first run took < 1ns. | 
|---|
| 132 | let ns_target_total = 1_000_000; // 1ms | 
|---|
| 133 | let mut n = ns_target_total / cmp::max(1, ns_single); | 
|---|
| 134 |  | 
|---|
| 135 | // if the first run took more than 1ms we don't want to just | 
|---|
| 136 | // be left doing 0 iterations on every loop. The unfortunate | 
|---|
| 137 | // side effect of not being able to do as many runs is | 
|---|
| 138 | // automatically handled by the statistical analysis below | 
|---|
| 139 | // (i.e., larger error bars). | 
|---|
| 140 | n = cmp::max(1, n); | 
|---|
| 141 |  | 
|---|
| 142 | let mut total_run = Duration::new(0, 0); | 
|---|
| 143 | let samples: &mut [f64] = &mut [0.0_f64; 50]; | 
|---|
| 144 | loop { | 
|---|
| 145 | let loop_start = Instant::now(); | 
|---|
| 146 |  | 
|---|
| 147 | for p in &mut *samples { | 
|---|
| 148 | *p = ns_iter_inner(inner, n) as f64 / n as f64; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | stats::winsorize(samples, 5.0); | 
|---|
| 152 | let summ = stats::Summary::new(samples); | 
|---|
| 153 |  | 
|---|
| 154 | for p in &mut *samples { | 
|---|
| 155 | let ns = ns_iter_inner(inner, 5 * n); | 
|---|
| 156 | *p = ns as f64 / (5 * n) as f64; | 
|---|
| 157 | } | 
|---|
| 158 |  | 
|---|
| 159 | stats::winsorize(samples, 5.0); | 
|---|
| 160 | let summ5 = stats::Summary::new(samples); | 
|---|
| 161 |  | 
|---|
| 162 | let loop_run = loop_start.elapsed(); | 
|---|
| 163 |  | 
|---|
| 164 | // If we've run for 100ms and seem to have converged to a | 
|---|
| 165 | // stable median. | 
|---|
| 166 | if loop_run > Duration::from_millis(100) | 
|---|
| 167 | && summ.median_abs_dev_pct < 1.0 | 
|---|
| 168 | && summ.median - summ5.median < summ5.median_abs_dev | 
|---|
| 169 | { | 
|---|
| 170 | return summ5; | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | total_run += loop_run; | 
|---|
| 174 | // Longest we ever run for is 3s. | 
|---|
| 175 | if total_run > Duration::from_secs(3) { | 
|---|
| 176 | return summ5; | 
|---|
| 177 | } | 
|---|
| 178 |  | 
|---|
| 179 | // If we overflow here just return the results so far. We check a | 
|---|
| 180 | // multiplier of 10 because we're about to multiply by 2 and the | 
|---|
| 181 | // next iteration of the loop will also multiply by 5 (to calculate | 
|---|
| 182 | // the summ5 result) | 
|---|
| 183 | n = match n.checked_mul(10) { | 
|---|
| 184 | Some(_) => n * 2, | 
|---|
| 185 | None => { | 
|---|
| 186 | return summ5; | 
|---|
| 187 | } | 
|---|
| 188 | }; | 
|---|
| 189 | } | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|
| 192 | pub fn benchmark<F>( | 
|---|
| 193 | id: TestId, | 
|---|
| 194 | desc: TestDesc, | 
|---|
| 195 | monitor_ch: Sender<CompletedTest>, | 
|---|
| 196 | nocapture: bool, | 
|---|
| 197 | f: F, | 
|---|
| 198 | ) where | 
|---|
| 199 | F: FnMut(&mut Bencher) -> Result<(), String>, | 
|---|
| 200 | { | 
|---|
| 201 | let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 }; | 
|---|
| 202 |  | 
|---|
| 203 | let data = Arc::new(Mutex::new(Vec::new())); | 
|---|
| 204 |  | 
|---|
| 205 | if !nocapture { | 
|---|
| 206 | io::set_output_capture(Some(data.clone())); | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 | let result = catch_unwind(AssertUnwindSafe(|| bs.bench(f))); | 
|---|
| 210 |  | 
|---|
| 211 | io::set_output_capture(None); | 
|---|
| 212 |  | 
|---|
| 213 | let test_result = match result { | 
|---|
| 214 | //bs.bench(f) { | 
|---|
| 215 | Ok(Ok(Some(ns_iter_summ))) => { | 
|---|
| 216 | let ns_iter = cmp::max(ns_iter_summ.median as u64, 1); | 
|---|
| 217 | let mb_s = bs.bytes * 1000 / ns_iter; | 
|---|
| 218 |  | 
|---|
| 219 | let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize }; | 
|---|
| 220 | TestResult::TrBench(bs) | 
|---|
| 221 | } | 
|---|
| 222 | Ok(Ok(None)) => { | 
|---|
| 223 | // iter not called, so no data. | 
|---|
| 224 | // FIXME: error in this case? | 
|---|
| 225 | let samples: &mut [f64] = &mut [0.0_f64; 1]; | 
|---|
| 226 | let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 }; | 
|---|
| 227 | TestResult::TrBench(bs) | 
|---|
| 228 | } | 
|---|
| 229 | Err(_) => TestResult::TrFailed, | 
|---|
| 230 | Ok(Err(_)) => TestResult::TrFailed, | 
|---|
| 231 | }; | 
|---|
| 232 |  | 
|---|
| 233 | let stdout = data.lock().unwrap().to_vec(); | 
|---|
| 234 | let message = CompletedTest::new(id, desc, test_result, None, stdout); | 
|---|
| 235 | monitor_ch.send(message).unwrap(); | 
|---|
| 236 | } | 
|---|
| 237 |  | 
|---|
| 238 | pub fn run_once<F>(f: F) -> Result<(), String> | 
|---|
| 239 | where | 
|---|
| 240 | F: FnMut(&mut Bencher) -> Result<(), String>, | 
|---|
| 241 | { | 
|---|
| 242 | let mut bs: Bencher = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 }; | 
|---|
| 243 | bs.bench(f).map(|_| ()) | 
|---|
| 244 | } | 
|---|
| 245 |  | 
|---|