1 | use crate::benchmark::BenchmarkConfig; |
2 | use crate::connection::OutgoingMessage; |
3 | use crate::measurement::Measurement; |
4 | use crate::report::{BenchmarkId, Report, ReportContext}; |
5 | use crate::{black_box, ActualSamplingMode, Bencher, Criterion}; |
6 | use std::marker::PhantomData; |
7 | use std::time::Duration; |
8 | |
9 | /// PRIVATE |
10 | pub(crate) trait Routine<M: Measurement, T: ?Sized> { |
11 | /// PRIVATE |
12 | fn bench(&mut self, m: &M, iters: &[u64], parameter: &T) -> Vec<f64>; |
13 | /// PRIVATE |
14 | fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64); |
15 | |
16 | /// PRIVATE |
17 | fn test(&mut self, m: &M, parameter: &T) { |
18 | self.bench(m, &[1u64], parameter); |
19 | } |
20 | |
21 | /// Iterates the benchmarked function for a fixed length of time, but takes no measurements. |
22 | /// This keeps the overall benchmark suite runtime constant-ish even when running under a |
23 | /// profiler with an unknown amount of overhead. Since no measurements are taken, it also |
24 | /// reduces the amount of time the execution spends in Criterion.rs code, which should help |
25 | /// show the performance of the benchmarked code more clearly as well. |
26 | fn profile( |
27 | &mut self, |
28 | measurement: &M, |
29 | id: &BenchmarkId, |
30 | criterion: &Criterion<M>, |
31 | report_context: &ReportContext, |
32 | time: Duration, |
33 | parameter: &T, |
34 | ) { |
35 | criterion |
36 | .report |
37 | .profile(id, report_context, time.as_nanos() as f64); |
38 | |
39 | let mut profile_path = report_context.output_directory.clone(); |
40 | if (*crate::CARGO_CRITERION_CONNECTION).is_some() { |
41 | // If connected to cargo-criterion, generate a cargo-criterion-style path. |
42 | // This is kind of a hack. |
43 | profile_path.push("profile" ); |
44 | profile_path.push(id.as_directory_name()); |
45 | } else { |
46 | profile_path.push(id.as_directory_name()); |
47 | profile_path.push("profile" ); |
48 | } |
49 | criterion |
50 | .profiler |
51 | .borrow_mut() |
52 | .start_profiling(id.id(), &profile_path); |
53 | |
54 | let time = time.as_nanos() as u64; |
55 | |
56 | // TODO: Some profilers will show the two batches of iterations as |
57 | // being different code-paths even though they aren't really. |
58 | |
59 | // Get the warmup time for one second |
60 | let (wu_elapsed, wu_iters) = self.warm_up(measurement, Duration::from_secs(1), parameter); |
61 | if wu_elapsed < time { |
62 | // Initial guess for the mean execution time |
63 | let met = wu_elapsed as f64 / wu_iters as f64; |
64 | |
65 | // Guess how many iterations will be required for the remaining time |
66 | let remaining = (time - wu_elapsed) as f64; |
67 | |
68 | let iters = remaining / met; |
69 | let iters = iters as u64; |
70 | |
71 | self.bench(measurement, &[iters], parameter); |
72 | } |
73 | |
74 | criterion |
75 | .profiler |
76 | .borrow_mut() |
77 | .stop_profiling(id.id(), &profile_path); |
78 | |
79 | criterion.report.terminated(id, report_context); |
80 | } |
81 | |
82 | fn sample( |
83 | &mut self, |
84 | measurement: &M, |
85 | id: &BenchmarkId, |
86 | config: &BenchmarkConfig, |
87 | criterion: &Criterion<M>, |
88 | report_context: &ReportContext, |
89 | parameter: &T, |
90 | ) -> (ActualSamplingMode, Box<[f64]>, Box<[f64]>) { |
91 | if config.quick_mode { |
92 | let minimum_bench_duration = Duration::from_millis(100); |
93 | let maximum_bench_duration = config.measurement_time; // default: 5 seconds |
94 | let target_rel_stdev = config.significance_level; // default: 5%, 0.05 |
95 | |
96 | use std::time::Instant; |
97 | let time_start = Instant::now(); |
98 | |
99 | let sq = |val| val * val; |
100 | let mut n = 1; |
101 | let mut t_prev = *self.bench(measurement, &[n], parameter).first().unwrap(); |
102 | |
103 | // Early exit for extremely long running benchmarks: |
104 | if time_start.elapsed() > maximum_bench_duration { |
105 | let iters = vec![n as f64, n as f64].into_boxed_slice(); |
106 | // prevent gnuplot bug when all values are equal |
107 | let elapsed = vec![t_prev, t_prev + 0.000001].into_boxed_slice(); |
108 | return (ActualSamplingMode::Flat, iters, elapsed); |
109 | } |
110 | |
111 | // Main data collection loop. |
112 | loop { |
113 | let t_now = *self |
114 | .bench(measurement, &[n * 2], parameter) |
115 | .first() |
116 | .unwrap(); |
117 | let t = (t_prev + 2. * t_now) / 5.; |
118 | let stdev = (sq(t_prev - t) + sq(t_now - 2. * t)).sqrt(); |
119 | // println!("Sample: {} {:.2}", n, stdev / t); |
120 | let elapsed = time_start.elapsed(); |
121 | if (stdev < target_rel_stdev * t && elapsed > minimum_bench_duration) |
122 | || elapsed > maximum_bench_duration |
123 | { |
124 | let iters = vec![n as f64, (n * 2) as f64].into_boxed_slice(); |
125 | let elapsed = vec![t_prev, t_now].into_boxed_slice(); |
126 | return (ActualSamplingMode::Linear, iters, elapsed); |
127 | } |
128 | n *= 2; |
129 | t_prev = t_now; |
130 | } |
131 | } |
132 | let wu = config.warm_up_time; |
133 | let m_ns = config.measurement_time.as_nanos(); |
134 | |
135 | criterion |
136 | .report |
137 | .warmup(id, report_context, wu.as_nanos() as f64); |
138 | |
139 | if let Some(conn) = &criterion.connection { |
140 | conn.send(&OutgoingMessage::Warmup { |
141 | id: id.into(), |
142 | nanos: wu.as_nanos() as f64, |
143 | }) |
144 | .unwrap(); |
145 | } |
146 | |
147 | let (wu_elapsed, wu_iters) = self.warm_up(measurement, wu, parameter); |
148 | if crate::debug_enabled() { |
149 | println!( |
150 | " \nCompleted {} iterations in {} nanoseconds, estimated execution time is {} ns" , |
151 | wu_iters, |
152 | wu_elapsed, |
153 | wu_elapsed as f64 / wu_iters as f64 |
154 | ); |
155 | } |
156 | |
157 | // Initial guess for the mean execution time |
158 | let met = wu_elapsed as f64 / wu_iters as f64; |
159 | |
160 | let n = config.sample_size as u64; |
161 | |
162 | let actual_sampling_mode = config |
163 | .sampling_mode |
164 | .choose_sampling_mode(met, n, m_ns as f64); |
165 | |
166 | let m_iters = actual_sampling_mode.iteration_counts(met, n, &config.measurement_time); |
167 | |
168 | let expected_ns = m_iters |
169 | .iter() |
170 | .copied() |
171 | .map(|count| count as f64 * met) |
172 | .sum(); |
173 | |
174 | // Use saturating_add to handle overflow. |
175 | let mut total_iters = 0u64; |
176 | for count in m_iters.iter().copied() { |
177 | total_iters = total_iters.saturating_add(count); |
178 | } |
179 | |
180 | criterion |
181 | .report |
182 | .measurement_start(id, report_context, n, expected_ns, total_iters); |
183 | |
184 | if let Some(conn) = &criterion.connection { |
185 | conn.send(&OutgoingMessage::MeasurementStart { |
186 | id: id.into(), |
187 | sample_count: n, |
188 | estimate_ns: expected_ns, |
189 | iter_count: total_iters, |
190 | }) |
191 | .unwrap(); |
192 | } |
193 | |
194 | let m_elapsed = self.bench(measurement, &m_iters, parameter); |
195 | |
196 | let m_iters_f: Vec<f64> = m_iters.iter().map(|&x| x as f64).collect(); |
197 | |
198 | ( |
199 | actual_sampling_mode, |
200 | m_iters_f.into_boxed_slice(), |
201 | m_elapsed.into_boxed_slice(), |
202 | ) |
203 | } |
204 | } |
205 | |
206 | pub struct Function<M: Measurement, F, T> |
207 | where |
208 | F: FnMut(&mut Bencher<'_, M>, &T), |
209 | T: ?Sized, |
210 | { |
211 | f: F, |
212 | // TODO: Is there some way to remove these? |
213 | _phantom: PhantomData<T>, |
214 | _phamtom2: PhantomData<M>, |
215 | } |
216 | impl<M: Measurement, F, T> Function<M, F, T> |
217 | where |
218 | F: FnMut(&mut Bencher<'_, M>, &T), |
219 | T: ?Sized, |
220 | { |
221 | pub fn new(f: F) -> Function<M, F, T> { |
222 | Function { |
223 | f, |
224 | _phantom: PhantomData, |
225 | _phamtom2: PhantomData, |
226 | } |
227 | } |
228 | } |
229 | |
230 | impl<M: Measurement, F, T> Routine<M, T> for Function<M, F, T> |
231 | where |
232 | F: FnMut(&mut Bencher<'_, M>, &T), |
233 | T: ?Sized, |
234 | { |
235 | fn bench(&mut self, m: &M, iters: &[u64], parameter: &T) -> Vec<f64> { |
236 | let f = &mut self.f; |
237 | |
238 | let mut b = Bencher { |
239 | iterated: false, |
240 | iters: 0, |
241 | value: m.zero(), |
242 | measurement: m, |
243 | elapsed_time: Duration::from_millis(0), |
244 | }; |
245 | |
246 | iters |
247 | .iter() |
248 | .map(|iters| { |
249 | b.iters = *iters; |
250 | (*f)(&mut b, black_box(parameter)); |
251 | b.assert_iterated(); |
252 | m.to_f64(&b.value) |
253 | }) |
254 | .collect() |
255 | } |
256 | |
257 | fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64) { |
258 | let f = &mut self.f; |
259 | let mut b = Bencher { |
260 | iterated: false, |
261 | iters: 1, |
262 | value: m.zero(), |
263 | measurement: m, |
264 | elapsed_time: Duration::from_millis(0), |
265 | }; |
266 | |
267 | let mut total_iters = 0; |
268 | let mut elapsed_time = Duration::from_millis(0); |
269 | loop { |
270 | (*f)(&mut b, black_box(parameter)); |
271 | |
272 | b.assert_iterated(); |
273 | |
274 | total_iters += b.iters; |
275 | elapsed_time += b.elapsed_time; |
276 | if elapsed_time > how_long { |
277 | return (elapsed_time.as_nanos() as u64, total_iters); |
278 | } |
279 | |
280 | b.iters = b.iters.wrapping_mul(2); |
281 | } |
282 | } |
283 | } |
284 | |