| 1 | use crate::benchmark::BenchmarkConfig; |
| 2 | use crate::connection::OutgoingMessage; |
| 3 | use crate::measurement::Measurement; |
| 4 | use crate::report::{BenchmarkId, Report, ReportContext}; |
| 5 | use crate::{black_box, ActualSamplingMode, Bencher, Criterion}; |
| 6 | use std::marker::PhantomData; |
| 7 | use std::time::Duration; |
| 8 | |
| 9 | /// PRIVATE |
| 10 | pub(crate) trait Routine<M: Measurement, T: ?Sized> { |
| 11 | /// PRIVATE |
| 12 | fn bench(&mut self, m: &M, iters: &[u64], parameter: &T) -> Vec<f64>; |
| 13 | /// PRIVATE |
| 14 | fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64); |
| 15 | |
| 16 | /// PRIVATE |
| 17 | fn test(&mut self, m: &M, parameter: &T) { |
| 18 | self.bench(m, &[1u64], parameter); |
| 19 | } |
| 20 | |
| 21 | /// Iterates the benchmarked function for a fixed length of time, but takes no measurements. |
| 22 | /// This keeps the overall benchmark suite runtime constant-ish even when running under a |
| 23 | /// profiler with an unknown amount of overhead. Since no measurements are taken, it also |
| 24 | /// reduces the amount of time the execution spends in Criterion.rs code, which should help |
| 25 | /// show the performance of the benchmarked code more clearly as well. |
| 26 | fn profile( |
| 27 | &mut self, |
| 28 | measurement: &M, |
| 29 | id: &BenchmarkId, |
| 30 | criterion: &Criterion<M>, |
| 31 | report_context: &ReportContext, |
| 32 | time: Duration, |
| 33 | parameter: &T, |
| 34 | ) { |
| 35 | criterion |
| 36 | .report |
| 37 | .profile(id, report_context, time.as_nanos() as f64); |
| 38 | |
| 39 | let mut profile_path = report_context.output_directory.clone(); |
| 40 | if (*crate::CARGO_CRITERION_CONNECTION).is_some() { |
| 41 | // If connected to cargo-criterion, generate a cargo-criterion-style path. |
| 42 | // This is kind of a hack. |
| 43 | profile_path.push("profile" ); |
| 44 | profile_path.push(id.as_directory_name()); |
| 45 | } else { |
| 46 | profile_path.push(id.as_directory_name()); |
| 47 | profile_path.push("profile" ); |
| 48 | } |
| 49 | criterion |
| 50 | .profiler |
| 51 | .borrow_mut() |
| 52 | .start_profiling(id.id(), &profile_path); |
| 53 | |
| 54 | let time = time.as_nanos() as u64; |
| 55 | |
| 56 | // TODO: Some profilers will show the two batches of iterations as |
| 57 | // being different code-paths even though they aren't really. |
| 58 | |
| 59 | // Get the warmup time for one second |
| 60 | let (wu_elapsed, wu_iters) = self.warm_up(measurement, Duration::from_secs(1), parameter); |
| 61 | if wu_elapsed < time { |
| 62 | // Initial guess for the mean execution time |
| 63 | let met = wu_elapsed as f64 / wu_iters as f64; |
| 64 | |
| 65 | // Guess how many iterations will be required for the remaining time |
| 66 | let remaining = (time - wu_elapsed) as f64; |
| 67 | |
| 68 | let iters = remaining / met; |
| 69 | let iters = iters as u64; |
| 70 | |
| 71 | self.bench(measurement, &[iters], parameter); |
| 72 | } |
| 73 | |
| 74 | criterion |
| 75 | .profiler |
| 76 | .borrow_mut() |
| 77 | .stop_profiling(id.id(), &profile_path); |
| 78 | |
| 79 | criterion.report.terminated(id, report_context); |
| 80 | } |
| 81 | |
| 82 | fn sample( |
| 83 | &mut self, |
| 84 | measurement: &M, |
| 85 | id: &BenchmarkId, |
| 86 | config: &BenchmarkConfig, |
| 87 | criterion: &Criterion<M>, |
| 88 | report_context: &ReportContext, |
| 89 | parameter: &T, |
| 90 | ) -> (ActualSamplingMode, Box<[f64]>, Box<[f64]>) { |
| 91 | if config.quick_mode { |
| 92 | let minimum_bench_duration = Duration::from_millis(100); |
| 93 | let maximum_bench_duration = config.measurement_time; // default: 5 seconds |
| 94 | let target_rel_stdev = config.significance_level; // default: 5%, 0.05 |
| 95 | |
| 96 | use std::time::Instant; |
| 97 | let time_start = Instant::now(); |
| 98 | |
| 99 | let sq = |val| val * val; |
| 100 | let mut n = 1; |
| 101 | let mut t_prev = *self.bench(measurement, &[n], parameter).first().unwrap(); |
| 102 | |
| 103 | // Early exit for extremely long running benchmarks: |
| 104 | if time_start.elapsed() > maximum_bench_duration { |
| 105 | let iters = vec![n as f64, n as f64].into_boxed_slice(); |
| 106 | // prevent gnuplot bug when all values are equal |
| 107 | let elapsed = vec![t_prev, t_prev + 0.000001].into_boxed_slice(); |
| 108 | return (ActualSamplingMode::Flat, iters, elapsed); |
| 109 | } |
| 110 | |
| 111 | // Main data collection loop. |
| 112 | loop { |
| 113 | let t_now = *self |
| 114 | .bench(measurement, &[n * 2], parameter) |
| 115 | .first() |
| 116 | .unwrap(); |
| 117 | let t = (t_prev + 2. * t_now) / 5.; |
| 118 | let stdev = (sq(t_prev - t) + sq(t_now - 2. * t)).sqrt(); |
| 119 | // println!("Sample: {} {:.2}", n, stdev / t); |
| 120 | let elapsed = time_start.elapsed(); |
| 121 | if (stdev < target_rel_stdev * t && elapsed > minimum_bench_duration) |
| 122 | || elapsed > maximum_bench_duration |
| 123 | { |
| 124 | let iters = vec![n as f64, (n * 2) as f64].into_boxed_slice(); |
| 125 | let elapsed = vec![t_prev, t_now].into_boxed_slice(); |
| 126 | return (ActualSamplingMode::Linear, iters, elapsed); |
| 127 | } |
| 128 | n *= 2; |
| 129 | t_prev = t_now; |
| 130 | } |
| 131 | } |
| 132 | let wu = config.warm_up_time; |
| 133 | let m_ns = config.measurement_time.as_nanos(); |
| 134 | |
| 135 | criterion |
| 136 | .report |
| 137 | .warmup(id, report_context, wu.as_nanos() as f64); |
| 138 | |
| 139 | if let Some(conn) = &criterion.connection { |
| 140 | conn.send(&OutgoingMessage::Warmup { |
| 141 | id: id.into(), |
| 142 | nanos: wu.as_nanos() as f64, |
| 143 | }) |
| 144 | .unwrap(); |
| 145 | } |
| 146 | |
| 147 | let (wu_elapsed, wu_iters) = self.warm_up(measurement, wu, parameter); |
| 148 | if crate::debug_enabled() { |
| 149 | println!( |
| 150 | " \nCompleted {} iterations in {} nanoseconds, estimated execution time is {} ns" , |
| 151 | wu_iters, |
| 152 | wu_elapsed, |
| 153 | wu_elapsed as f64 / wu_iters as f64 |
| 154 | ); |
| 155 | } |
| 156 | |
| 157 | // Initial guess for the mean execution time |
| 158 | let met = wu_elapsed as f64 / wu_iters as f64; |
| 159 | |
| 160 | let n = config.sample_size as u64; |
| 161 | |
| 162 | let actual_sampling_mode = config |
| 163 | .sampling_mode |
| 164 | .choose_sampling_mode(met, n, m_ns as f64); |
| 165 | |
| 166 | let m_iters = actual_sampling_mode.iteration_counts(met, n, &config.measurement_time); |
| 167 | |
| 168 | let expected_ns = m_iters |
| 169 | .iter() |
| 170 | .copied() |
| 171 | .map(|count| count as f64 * met) |
| 172 | .sum(); |
| 173 | |
| 174 | // Use saturating_add to handle overflow. |
| 175 | let mut total_iters = 0u64; |
| 176 | for count in m_iters.iter().copied() { |
| 177 | total_iters = total_iters.saturating_add(count); |
| 178 | } |
| 179 | |
| 180 | criterion |
| 181 | .report |
| 182 | .measurement_start(id, report_context, n, expected_ns, total_iters); |
| 183 | |
| 184 | if let Some(conn) = &criterion.connection { |
| 185 | conn.send(&OutgoingMessage::MeasurementStart { |
| 186 | id: id.into(), |
| 187 | sample_count: n, |
| 188 | estimate_ns: expected_ns, |
| 189 | iter_count: total_iters, |
| 190 | }) |
| 191 | .unwrap(); |
| 192 | } |
| 193 | |
| 194 | let m_elapsed = self.bench(measurement, &m_iters, parameter); |
| 195 | |
| 196 | let m_iters_f: Vec<f64> = m_iters.iter().map(|&x| x as f64).collect(); |
| 197 | |
| 198 | ( |
| 199 | actual_sampling_mode, |
| 200 | m_iters_f.into_boxed_slice(), |
| 201 | m_elapsed.into_boxed_slice(), |
| 202 | ) |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | pub struct Function<M: Measurement, F, T> |
| 207 | where |
| 208 | F: FnMut(&mut Bencher<'_, M>, &T), |
| 209 | T: ?Sized, |
| 210 | { |
| 211 | f: F, |
| 212 | // TODO: Is there some way to remove these? |
| 213 | _phantom: PhantomData<T>, |
| 214 | _phamtom2: PhantomData<M>, |
| 215 | } |
| 216 | impl<M: Measurement, F, T> Function<M, F, T> |
| 217 | where |
| 218 | F: FnMut(&mut Bencher<'_, M>, &T), |
| 219 | T: ?Sized, |
| 220 | { |
| 221 | pub fn new(f: F) -> Function<M, F, T> { |
| 222 | Function { |
| 223 | f, |
| 224 | _phantom: PhantomData, |
| 225 | _phamtom2: PhantomData, |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | impl<M: Measurement, F, T> Routine<M, T> for Function<M, F, T> |
| 231 | where |
| 232 | F: FnMut(&mut Bencher<'_, M>, &T), |
| 233 | T: ?Sized, |
| 234 | { |
| 235 | fn bench(&mut self, m: &M, iters: &[u64], parameter: &T) -> Vec<f64> { |
| 236 | let f = &mut self.f; |
| 237 | |
| 238 | let mut b = Bencher { |
| 239 | iterated: false, |
| 240 | iters: 0, |
| 241 | value: m.zero(), |
| 242 | measurement: m, |
| 243 | elapsed_time: Duration::from_millis(0), |
| 244 | }; |
| 245 | |
| 246 | iters |
| 247 | .iter() |
| 248 | .map(|iters| { |
| 249 | b.iters = *iters; |
| 250 | (*f)(&mut b, black_box(parameter)); |
| 251 | b.assert_iterated(); |
| 252 | m.to_f64(&b.value) |
| 253 | }) |
| 254 | .collect() |
| 255 | } |
| 256 | |
| 257 | fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64) { |
| 258 | let f = &mut self.f; |
| 259 | let mut b = Bencher { |
| 260 | iterated: false, |
| 261 | iters: 1, |
| 262 | value: m.zero(), |
| 263 | measurement: m, |
| 264 | elapsed_time: Duration::from_millis(0), |
| 265 | }; |
| 266 | |
| 267 | let mut total_iters = 0; |
| 268 | let mut elapsed_time = Duration::from_millis(0); |
| 269 | loop { |
| 270 | (*f)(&mut b, black_box(parameter)); |
| 271 | |
| 272 | b.assert_iterated(); |
| 273 | |
| 274 | total_iters += b.iters; |
| 275 | elapsed_time += b.elapsed_time; |
| 276 | if elapsed_time > how_long { |
| 277 | return (elapsed_time.as_nanos() as u64, total_iters); |
| 278 | } |
| 279 | |
| 280 | b.iters = b.iters.wrapping_mul(2); |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | |