1 | use std::{ |
2 | cell::UnsafeCell, |
3 | fmt, |
4 | mem::{self, MaybeUninit}, |
5 | num::NonZeroUsize, |
6 | sync::Barrier, |
7 | }; |
8 | |
9 | use crate::{ |
10 | alloc::{ |
11 | AllocOp, AllocOpMap, AllocTally, ThreadAllocInfo, ThreadAllocTally, TotalAllocTallyMap, |
12 | }, |
13 | black_box, black_box_drop, |
14 | counter::{ |
15 | AnyCounter, AsCountUInt, BytesCount, CharsCount, Counter, CounterCollection, CyclesCount, |
16 | IntoCounter, ItemsCount, KnownCounterKind, MaxCountUInt, |
17 | }, |
18 | divan::SharedContext, |
19 | stats::{RawSample, SampleCollection, Stats, StatsSet, TimeSample}, |
20 | thread_pool::BENCH_POOL, |
21 | time::{FineDuration, Timestamp, UntaggedTimestamp}, |
22 | util::{self, sync::SyncWrap, Unit}, |
23 | }; |
24 | |
25 | #[cfg (test)] |
26 | mod tests; |
27 | |
28 | mod args; |
29 | mod defer; |
30 | mod options; |
31 | |
32 | use defer::{DeferSlot, DeferStore}; |
33 | |
34 | pub use self::{ |
35 | args::{BenchArgs, BenchArgsRunner}, |
36 | options::BenchOptions, |
37 | }; |
38 | |
39 | pub(crate) const DEFAULT_SAMPLE_COUNT: u32 = 100; |
40 | |
41 | /// Enables contextual benchmarking in [`#[divan::bench]`](attr.bench.html). |
42 | /// |
43 | /// # Examples |
44 | /// |
45 | /// ``` |
46 | /// use divan::{Bencher, black_box}; |
47 | /// |
48 | /// #[divan::bench] |
49 | /// fn copy_from_slice(bencher: Bencher) { |
50 | /// // Input and output buffers get used in the closure. |
51 | /// let src = (0..100).collect::<Vec<i32>>(); |
52 | /// let mut dst = vec![0; src.len()]; |
53 | /// |
54 | /// bencher.bench_local(|| { |
55 | /// black_box(&mut dst).copy_from_slice(black_box(&src)); |
56 | /// }); |
57 | /// } |
58 | /// ``` |
59 | #[must_use = "a benchmark function must be registered" ] |
60 | pub struct Bencher<'a, 'b, C = BencherConfig> { |
61 | pub(crate) context: &'a mut BenchContext<'b>, |
62 | pub(crate) config: C, |
63 | } |
64 | |
65 | /// Public-in-private type for statically-typed `Bencher` configuration. |
66 | /// |
67 | /// This enables configuring `Bencher` using the builder pattern with zero |
68 | /// runtime cost. |
69 | pub struct BencherConfig<GenI = Unit> { |
70 | gen_input: GenI, |
71 | } |
72 | |
73 | impl<C> fmt::Debug for Bencher<'_, '_, C> { |
74 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
75 | f.debug_struct(name:"Bencher" ).finish_non_exhaustive() |
76 | } |
77 | } |
78 | |
79 | impl<'a, 'b> Bencher<'a, 'b> { |
80 | #[inline ] |
81 | pub(crate) fn new(context: &'a mut BenchContext<'b>) -> Self { |
82 | Self { context, config: BencherConfig { gen_input: Unit } } |
83 | } |
84 | } |
85 | |
86 | impl<'a, 'b> Bencher<'a, 'b> { |
87 | /// Benchmarks a function. |
88 | /// |
89 | /// The function can be benchmarked in parallel using the [`threads` |
90 | /// option](macro@crate::bench#threads). If the function is strictly |
91 | /// single-threaded, use [`Bencher::bench_local`] instead. |
92 | /// |
93 | /// # Examples |
94 | /// |
95 | /// ``` |
96 | /// #[divan::bench] |
97 | /// fn bench(bencher: divan::Bencher) { |
98 | /// bencher.bench(|| { |
99 | /// // Benchmarked code... |
100 | /// }); |
101 | /// } |
102 | /// ``` |
103 | pub fn bench<O, B>(self, benched: B) |
104 | where |
105 | B: Fn() -> O + Sync, |
106 | { |
107 | // Reusing `bench_values` for a zero-sized non-drop input type should |
108 | // have no overhead. |
109 | self.with_inputs(|| ()).bench_values(|_: ()| benched()); |
110 | } |
111 | |
112 | /// Benchmarks a function on the current thread. |
113 | /// |
114 | /// # Examples |
115 | /// |
116 | /// ``` |
117 | /// #[divan::bench] |
118 | /// fn bench(bencher: divan::Bencher) { |
119 | /// bencher.bench_local(|| { |
120 | /// // Benchmarked code... |
121 | /// }); |
122 | /// } |
123 | /// ``` |
124 | pub fn bench_local<O, B>(self, mut benched: B) |
125 | where |
126 | B: FnMut() -> O, |
127 | { |
128 | // Reusing `bench_local_values` for a zero-sized non-drop input type |
129 | // should have no overhead. |
130 | self.with_inputs(|| ()).bench_local_values(|_: ()| benched()); |
131 | } |
132 | |
133 | /// Generate inputs for the [benchmarked function](#input-bench). |
134 | /// |
135 | /// Time spent generating inputs does not affect benchmark timing. |
136 | /// |
137 | /// When [benchmarking in parallel](macro@crate::bench#threads), the input |
138 | /// generator is called on the same thread as the sample loop that uses that |
139 | /// input. |
140 | /// |
141 | /// # Examples |
142 | /// |
143 | /// ``` |
144 | /// #[divan::bench] |
145 | /// fn bench(bencher: divan::Bencher) { |
146 | /// bencher |
147 | /// .with_inputs(|| { |
148 | /// // Generate input: |
149 | /// String::from("..." ) |
150 | /// }) |
151 | /// .bench_values(|s| { |
152 | /// // Use input by-value: |
153 | /// s + "123" |
154 | /// }); |
155 | /// } |
156 | /// ``` |
157 | pub fn with_inputs<G>(self, gen_input: G) -> Bencher<'a, 'b, BencherConfig<G>> { |
158 | Bencher { context: self.context, config: BencherConfig { gen_input } } |
159 | } |
160 | } |
161 | |
162 | impl<'a, 'b, GenI> Bencher<'a, 'b, BencherConfig<GenI>> { |
163 | /// Assign a [`Counter`] for all iterations of the benchmarked function. |
164 | /// |
165 | /// This will either: |
166 | /// - Assign a new counter |
167 | /// - Override an existing counter of the same type |
168 | /// |
169 | /// If the counter depends on [generated inputs](Self::with_inputs), use |
170 | /// [`Bencher::input_counter`] instead. |
171 | /// |
172 | /// If context is not needed, the counter can instead be set via |
173 | /// [`#[divan::bench(counters = ...)]`](macro@crate::bench#counters). |
174 | /// |
175 | /// # Examples |
176 | /// |
177 | /// ``` |
178 | /// use divan::{Bencher, counter::BytesCount}; |
179 | /// |
180 | /// #[divan::bench] |
181 | /// fn char_count(bencher: Bencher) { |
182 | /// let s: String = // ... |
183 | /// # String::new(); |
184 | /// |
185 | /// bencher |
186 | /// .counter(BytesCount::of_str(&s)) |
187 | /// .bench(|| { |
188 | /// divan::black_box(&s).chars().count() |
189 | /// }); |
190 | /// } |
191 | /// ``` |
192 | #[doc (alias = "throughput" )] |
193 | pub fn counter<C>(self, counter: C) -> Self |
194 | where |
195 | C: IntoCounter, |
196 | { |
197 | let counter = AnyCounter::new(counter); |
198 | self.context.counters.set_counter(counter); |
199 | self |
200 | } |
201 | } |
202 | |
203 | /// <span id="input-bench"></span> Benchmark over [generated inputs](Self::with_inputs). |
204 | impl<'a, 'b, I, GenI> Bencher<'a, 'b, BencherConfig<GenI>> |
205 | where |
206 | GenI: FnMut() -> I, |
207 | { |
208 | /// Calls a closure to create a [`Counter`] for each input of the |
209 | /// benchmarked function. |
210 | /// |
211 | /// This will either: |
212 | /// - Assign a new counter |
213 | /// - Override an existing counter of the same type |
214 | /// |
215 | /// If the counter is constant, use [`Bencher::counter`] instead. |
216 | /// |
217 | /// When [benchmarking in parallel](macro@crate::bench#threads), the input |
218 | /// counter is called on the same thread as the sample loop that generates |
219 | /// and uses that input. |
220 | /// |
221 | /// # Examples |
222 | /// |
223 | /// The following example emits info for the number of bytes processed when |
224 | /// benchmarking [`char`-counting](std::str::Chars::count). The byte count |
225 | /// is gotten by calling [`BytesCount::of_str`] on each iteration's input |
226 | /// [`String`]. |
227 | /// |
228 | /// ``` |
229 | /// use divan::{Bencher, counter::BytesCount}; |
230 | /// |
231 | /// #[divan::bench] |
232 | /// fn char_count(bencher: Bencher) { |
233 | /// bencher |
234 | /// .with_inputs(|| -> String { |
235 | /// // ... |
236 | /// # String::new() |
237 | /// }) |
238 | /// .input_counter(BytesCount::of_str) |
239 | /// .bench_refs(|s| { |
240 | /// s.chars().count() |
241 | /// }); |
242 | /// } |
243 | /// ``` |
244 | pub fn input_counter<C, F>(self, make_counter: F) -> Self |
245 | where |
246 | F: Fn(&I) -> C + Sync + 'static, |
247 | C: IntoCounter, |
248 | { |
249 | self.context.counters.set_input_counter(make_counter); |
250 | self |
251 | } |
252 | |
253 | /// Creates a [`Counter`] from each input of the benchmarked function. |
254 | /// |
255 | /// This may be used if the input returns [`u8`]–[`u64`], [`usize`], or any |
256 | /// nesting of references to those types. |
257 | /// |
258 | /// # Examples |
259 | /// |
260 | /// The following example emits info for the number of items processed when |
261 | /// benchmarking [`FromIterator`] from |
262 | /// <code>[Range](std::ops::Range)<[usize]></code> to [`Vec`]. |
263 | /// |
264 | /// ``` |
265 | /// use divan::{Bencher, counter::ItemsCount}; |
266 | /// |
267 | /// #[divan::bench] |
268 | /// fn range_to_vec(bencher: Bencher) { |
269 | /// bencher |
270 | /// .with_inputs(|| -> usize { |
271 | /// // ... |
272 | /// # 0 |
273 | /// }) |
274 | /// .count_inputs_as::<ItemsCount>() |
275 | /// .bench_values(|n| -> Vec<usize> { |
276 | /// (0..n).collect() |
277 | /// }); |
278 | /// } |
279 | /// ``` |
280 | #[inline ] |
281 | pub fn count_inputs_as<C>(self) -> Self |
282 | where |
283 | C: Counter, |
284 | I: AsCountUInt, |
285 | { |
286 | match KnownCounterKind::of::<C>() { |
287 | KnownCounterKind::Bytes => self.input_counter(|c| BytesCount::from(c)), |
288 | KnownCounterKind::Chars => self.input_counter(|c| CharsCount::from(c)), |
289 | KnownCounterKind::Cycles => self.input_counter(|c| CyclesCount::from(c)), |
290 | KnownCounterKind::Items => self.input_counter(|c| ItemsCount::from(c)), |
291 | } |
292 | } |
293 | |
294 | /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), |
295 | /// provided by-value. |
296 | /// |
297 | /// Per-iteration means the benchmarked function is called exactly once for |
298 | /// each generated input. |
299 | /// |
300 | /// The function can be benchmarked in parallel using the [`threads` |
301 | /// option](macro@crate::bench#threads). If the function is strictly |
302 | /// single-threaded, use [`Bencher::bench_local_values`] instead. |
303 | /// |
304 | /// # Examples |
305 | /// |
306 | /// ``` |
307 | /// #[divan::bench] |
308 | /// fn bench(bencher: divan::Bencher) { |
309 | /// bencher |
310 | /// .with_inputs(|| { |
311 | /// // Generate input: |
312 | /// String::from("..." ) |
313 | /// }) |
314 | /// .bench_values(|s| { |
315 | /// // Use input by-value: |
316 | /// s + "123" |
317 | /// }); |
318 | /// } |
319 | /// ``` |
320 | pub fn bench_values<O, B>(self, benched: B) |
321 | where |
322 | B: Fn(I) -> O + Sync, |
323 | GenI: Fn() -> I + Sync, |
324 | { |
325 | self.context.bench_loop_threaded( |
326 | self.config.gen_input, |
327 | |input| { |
328 | // SAFETY: Input is guaranteed to be initialized and not |
329 | // currently referenced by anything else. |
330 | let input = unsafe { input.get().read().assume_init() }; |
331 | |
332 | benched(input) |
333 | }, |
334 | // Input ownership is transferred to `benched`. |
335 | |_input| {}, |
336 | ); |
337 | } |
338 | |
339 | /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), |
340 | /// provided by-value. |
341 | /// |
342 | /// Per-iteration means the benchmarked function is called exactly once for |
343 | /// each generated input. |
344 | /// |
345 | /// # Examples |
346 | /// |
347 | /// ``` |
348 | /// #[divan::bench] |
349 | /// fn bench(bencher: divan::Bencher) { |
350 | /// let mut values = Vec::new(); |
351 | /// bencher |
352 | /// .with_inputs(|| { |
353 | /// // Generate input: |
354 | /// String::from("..." ) |
355 | /// }) |
356 | /// .bench_local_values(|s| { |
357 | /// // Use input by-value: |
358 | /// values.push(s); |
359 | /// }); |
360 | /// } |
361 | /// ``` |
362 | pub fn bench_local_values<O, B>(self, mut benched: B) |
363 | where |
364 | B: FnMut(I) -> O, |
365 | { |
366 | self.context.bench_loop_local( |
367 | self.config.gen_input, |
368 | |input| { |
369 | // SAFETY: Input is guaranteed to be initialized and not |
370 | // currently referenced by anything else. |
371 | let input = unsafe { input.get().read().assume_init() }; |
372 | |
373 | benched(input) |
374 | }, |
375 | // Input ownership is transferred to `benched`. |
376 | |_input| {}, |
377 | ); |
378 | } |
379 | |
380 | /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), |
381 | /// provided by-reference. |
382 | /// |
383 | /// Per-iteration means the benchmarked function is called exactly once for |
384 | /// each generated input. |
385 | /// |
386 | /// # Examples |
387 | /// |
388 | /// ``` |
389 | /// #[divan::bench] |
390 | /// fn bench(bencher: divan::Bencher) { |
391 | /// bencher |
392 | /// .with_inputs(|| { |
393 | /// // Generate input: |
394 | /// String::from("..." ) |
395 | /// }) |
396 | /// .bench_refs(|s| { |
397 | /// // Use input by-reference: |
398 | /// *s += "123" ; |
399 | /// }); |
400 | /// } |
401 | /// ``` |
402 | pub fn bench_refs<O, B>(self, benched: B) |
403 | where |
404 | B: Fn(&mut I) -> O + Sync, |
405 | GenI: Fn() -> I + Sync, |
406 | { |
407 | // TODO: Allow `O` to reference `&mut I` as long as `I` outlives `O`. |
408 | self.context.bench_loop_threaded( |
409 | self.config.gen_input, |
410 | |input| { |
411 | // SAFETY: Input is guaranteed to be initialized and not |
412 | // currently referenced by anything else. |
413 | let input = unsafe { (*input.get()).assume_init_mut() }; |
414 | |
415 | benched(input) |
416 | }, |
417 | // Input ownership was not transferred to `benched`. |
418 | |input| { |
419 | // SAFETY: This function is called after `benched` outputs are |
420 | // dropped, so we have exclusive access. |
421 | unsafe { (*input.get()).assume_init_drop() } |
422 | }, |
423 | ); |
424 | } |
425 | |
426 | /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), |
427 | /// provided by-reference. |
428 | /// |
429 | /// Per-iteration means the benchmarked function is called exactly once for |
430 | /// each generated input. |
431 | /// |
432 | /// # Examples |
433 | /// |
434 | /// ``` |
435 | /// #[divan::bench] |
436 | /// fn bench(bencher: divan::Bencher) { |
437 | /// bencher |
438 | /// .with_inputs(|| { |
439 | /// // Generate input: |
440 | /// String::from("..." ) |
441 | /// }) |
442 | /// .bench_local_refs(|s| { |
443 | /// // Use input by-reference: |
444 | /// *s += "123" ; |
445 | /// }); |
446 | /// } |
447 | /// ``` |
448 | pub fn bench_local_refs<O, B>(self, mut benched: B) |
449 | where |
450 | B: FnMut(&mut I) -> O, |
451 | { |
452 | // TODO: Allow `O` to reference `&mut I` as long as `I` outlives `O`. |
453 | self.context.bench_loop_local( |
454 | self.config.gen_input, |
455 | |input| { |
456 | // SAFETY: Input is guaranteed to be initialized and not |
457 | // currently referenced by anything else. |
458 | let input = unsafe { (*input.get()).assume_init_mut() }; |
459 | |
460 | benched(input) |
461 | }, |
462 | // Input ownership was not transferred to `benched`. |
463 | |input| { |
464 | // SAFETY: This function is called after `benched` outputs are |
465 | // dropped, so we have exclusive access. |
466 | unsafe { (*input.get()).assume_init_drop() } |
467 | }, |
468 | ); |
469 | } |
470 | } |
471 | |
472 | /// State machine for how the benchmark is being run. |
473 | #[derive (Clone, Copy)] |
474 | pub(crate) enum BenchMode { |
475 | /// The benchmark is being run as `--test`. |
476 | /// |
477 | /// Don't collect samples and run exactly once. |
478 | Test, |
479 | |
480 | /// Scale `sample_size` to determine the right size for collecting. |
481 | Tune { sample_size: u32 }, |
482 | |
483 | /// Simply collect samples. |
484 | Collect { sample_size: u32 }, |
485 | } |
486 | |
487 | impl BenchMode { |
488 | #[inline ] |
489 | pub fn is_test(self) -> bool { |
490 | matches!(self, Self::Test) |
491 | } |
492 | |
493 | #[inline ] |
494 | pub fn is_tune(self) -> bool { |
495 | matches!(self, Self::Tune { .. }) |
496 | } |
497 | |
498 | #[inline ] |
499 | pub fn is_collect(self) -> bool { |
500 | matches!(self, Self::Collect { .. }) |
501 | } |
502 | |
503 | #[inline ] |
504 | pub fn sample_size(self) -> u32 { |
505 | match self { |
506 | Self::Test => 1, |
507 | Self::Tune { sample_size: u32, .. } | Self::Collect { sample_size: u32, .. } => sample_size, |
508 | } |
509 | } |
510 | } |
511 | |
512 | /// `#[divan::bench]` loop context. |
513 | /// |
514 | /// Functions called within the benchmark loop should be `#[inline(always)]` to |
515 | /// ensure instruction cache locality. |
516 | pub(crate) struct BenchContext<'a> { |
517 | shared_context: &'a SharedContext, |
518 | |
519 | /// User-configured options. |
520 | pub options: &'a BenchOptions<'a>, |
521 | |
522 | /// Whether the benchmark loop was started. |
523 | pub did_run: bool, |
524 | |
525 | /// The number of threads to run the benchmark. The default is 1. |
526 | /// |
527 | /// When set to 1, the benchmark loop is guaranteed to stay on the current |
528 | /// thread and not spawn any threads. |
529 | pub thread_count: NonZeroUsize, |
530 | |
531 | /// Recorded samples. |
532 | samples: SampleCollection, |
533 | |
534 | /// Per-iteration counters grouped by sample. |
535 | counters: CounterCollection, |
536 | } |
537 | |
538 | impl<'a> BenchContext<'a> { |
539 | /// Creates a new benchmarking context. |
540 | pub fn new( |
541 | shared_context: &'a SharedContext, |
542 | options: &'a BenchOptions, |
543 | thread_count: NonZeroUsize, |
544 | ) -> Self { |
545 | Self { |
546 | shared_context, |
547 | options, |
548 | thread_count, |
549 | did_run: false, |
550 | samples: SampleCollection::default(), |
551 | counters: options.counters.to_collection(), |
552 | } |
553 | } |
554 | |
555 | /// Runs the single-threaded loop for benchmarking `benched`. |
556 | /// |
557 | /// # Safety |
558 | /// |
559 | /// See `bench_loop_threaded`. |
560 | pub fn bench_loop_local<I, O>( |
561 | &mut self, |
562 | gen_input: impl FnMut() -> I, |
563 | benched: impl FnMut(&UnsafeCell<MaybeUninit<I>>) -> O, |
564 | drop_input: impl Fn(&UnsafeCell<MaybeUninit<I>>), |
565 | ) { |
566 | // SAFETY: Closures are guaranteed to run on the current thread, so they |
567 | // can safely be mutable and non-`Sync`. |
568 | unsafe { |
569 | let gen_input = SyncWrap::new(UnsafeCell::new(gen_input)); |
570 | let benched = SyncWrap::new(UnsafeCell::new(benched)); |
571 | let drop_input = SyncWrap::new(drop_input); |
572 | |
573 | self.thread_count = NonZeroUsize::MIN; |
574 | self.bench_loop_threaded::<I, O>( |
575 | || (*gen_input.get())(), |
576 | |input| (*benched.get())(input), |
577 | |input| drop_input(input), |
578 | ) |
579 | } |
580 | } |
581 | |
582 | /// Runs the multi-threaded loop for benchmarking `benched`. |
583 | /// |
584 | /// # Safety |
585 | /// |
586 | /// If `self.threads` is 1, the incoming closures will not escape the |
587 | /// current thread. This guarantee ensures `bench_loop_local` can soundly |
588 | /// reuse this method with mutable non-`Sync` closures. |
589 | /// |
590 | /// When `benched` is called: |
591 | /// - `I` is guaranteed to be initialized. |
592 | /// - No external `&I` or `&mut I` exists. |
593 | /// |
594 | /// When `drop_input` is called: |
595 | /// - All instances of `O` returned from `benched` have been dropped. |
596 | /// - The same guarantees for `I` apply as in `benched`, unless `benched` |
597 | /// escaped references to `I`. |
598 | fn bench_loop_threaded<I, O>( |
599 | &mut self, |
600 | gen_input: impl Fn() -> I + Sync, |
601 | benched: impl Fn(&UnsafeCell<MaybeUninit<I>>) -> O + Sync, |
602 | drop_input: impl Fn(&UnsafeCell<MaybeUninit<I>>) + Sync, |
603 | ) { |
604 | self.did_run = true; |
605 | |
606 | let mut current_mode = self.initial_mode(); |
607 | let is_test = current_mode.is_test(); |
608 | |
609 | let record_sample = self.sample_recorder(gen_input, benched, drop_input); |
610 | |
611 | let thread_count = self.thread_count.get(); |
612 | let aux_thread_count = thread_count - 1; |
613 | |
614 | let is_single_thread = aux_thread_count == 0; |
615 | |
616 | // Per-thread sample info returned by `record_sample`. These are |
617 | // processed locally to emit user-facing sample info. As a result, this |
618 | // only contains `thread_count` many elements at a time. |
619 | let mut raw_samples = Vec::<Option<RawSample>>::new(); |
620 | |
621 | // The time spent benchmarking, in picoseconds. |
622 | // |
623 | // Unless `skip_ext_time` is set, this includes time external to |
624 | // `benched`, such as time spent generating inputs and running drop. |
625 | let mut elapsed_picos: u128 = 0; |
626 | |
627 | // The minimum time for benchmarking, in picoseconds. |
628 | let min_picos = self.options.min_time().picos; |
629 | |
630 | // The remaining time left for benchmarking, in picoseconds. |
631 | let max_picos = self.options.max_time().picos; |
632 | |
633 | // Don't bother running if user specifies 0 max time or 0 samples. |
634 | if max_picos == 0 || !self.options.has_samples() { |
635 | return; |
636 | } |
637 | |
638 | let timer = self.shared_context.timer; |
639 | let timer_kind = timer.kind(); |
640 | |
641 | let mut rem_samples = if current_mode.is_collect() { |
642 | Some(self.options.sample_count.unwrap_or(DEFAULT_SAMPLE_COUNT)) |
643 | } else { |
644 | None |
645 | }; |
646 | |
647 | // Only measure precision if we need to tune sample size. |
648 | let timer_precision = |
649 | if current_mode.is_tune() { timer.precision() } else { FineDuration::default() }; |
650 | |
651 | if !is_test { |
652 | self.samples.time_samples.reserve(self.options.sample_count.unwrap_or(1) as usize); |
653 | } |
654 | |
655 | let skip_ext_time = self.options.skip_ext_time.unwrap_or_default(); |
656 | let initial_start = if skip_ext_time { None } else { Some(Timestamp::start(timer_kind)) }; |
657 | |
658 | let bench_overheads = timer.bench_overheads(); |
659 | |
660 | while { |
661 | // Conditions for when sampling is over: |
662 | if elapsed_picos >= max_picos { |
663 | // Depleted the benchmarking time budget. This is a strict |
664 | // condition regardless of sample count and minimum time. |
665 | false |
666 | } else if rem_samples.unwrap_or(1) > 0 { |
667 | // More samples expected. |
668 | true |
669 | } else { |
670 | // Continue if we haven't reached the time floor. |
671 | elapsed_picos < min_picos |
672 | } |
673 | } { |
674 | let sample_size = current_mode.sample_size(); |
675 | self.samples.sample_size = sample_size; |
676 | |
677 | let barrier = if is_single_thread { None } else { Some(Barrier::new(thread_count)) }; |
678 | |
679 | // Sample loop helper: |
680 | let record_sample = || -> RawSample { |
681 | let mut counter_totals: [u128; KnownCounterKind::COUNT] = |
682 | [0; KnownCounterKind::COUNT]; |
683 | |
684 | // Updates per-input counter info for this sample. |
685 | let mut count_input = |input: &I| { |
686 | for counter_kind in KnownCounterKind::ALL { |
687 | // SAFETY: The `I` type cannot change since `with_inputs` |
688 | // cannot be called more than once on the same `Bencher`. |
689 | if let Some(count) = |
690 | unsafe { self.counters.get_input_count(counter_kind, input) } |
691 | { |
692 | let total = &mut counter_totals[counter_kind as usize]; |
693 | *total = (*total).saturating_add(count as u128); |
694 | } |
695 | } |
696 | }; |
697 | |
698 | // Sample loop: |
699 | let ([start, end], alloc_info) = |
700 | record_sample(sample_size as usize, barrier.as_ref(), &mut count_input); |
701 | |
702 | RawSample { start, end, timer, alloc_info, counter_totals } |
703 | }; |
704 | |
705 | // Sample loop: |
706 | raw_samples.clear(); |
707 | BENCH_POOL.par_extend(&mut raw_samples, aux_thread_count, |_| record_sample()); |
708 | |
709 | // Convert `&[Option<RawSample>]` to `&[Sample]`. |
710 | let raw_samples: &[RawSample] = { |
711 | if let Some(thread) = raw_samples |
712 | .iter() |
713 | .enumerate() |
714 | .find_map(|(thread, sample)| sample.is_none().then_some(thread)) |
715 | { |
716 | panic!("Divan benchmarking thread {thread} panicked" ); |
717 | } |
718 | |
719 | unsafe { |
720 | assert_eq!(mem::size_of::<RawSample>(), mem::size_of::<Option<RawSample>>()); |
721 | std::slice::from_raw_parts(raw_samples.as_ptr().cast(), raw_samples.len()) |
722 | } |
723 | }; |
724 | |
725 | // If testing, exit the benchmarking loop immediately after timing a |
726 | // single run. |
727 | if is_test { |
728 | break; |
729 | } |
730 | |
731 | let slowest_sample = raw_samples.iter().max_by_key(|s| s.duration()).unwrap(); |
732 | let slowest_time = slowest_sample.duration(); |
733 | |
734 | // TODO: Make tuning be less influenced by early runs. Currently if |
735 | // early runs are very quick but later runs are slow, benchmarking |
736 | // will take a very long time. |
737 | // |
738 | // TODO: Make `sample_size` consider time generating inputs and |
739 | // dropping inputs/outputs. Currently benchmarks like |
740 | // `Bencher::bench_refs(String::clear)` take a very long time. |
741 | if current_mode.is_tune() { |
742 | // Clear previous smaller samples. |
743 | self.samples.clear(); |
744 | self.counters.clear_input_counts(); |
745 | |
746 | // If within 100x timer precision, continue tuning. |
747 | let precision_multiple = slowest_time.picos / timer_precision.picos; |
748 | if precision_multiple <= 100 { |
749 | current_mode = BenchMode::Tune { sample_size: sample_size * 2 }; |
750 | } else { |
751 | current_mode = BenchMode::Collect { sample_size }; |
752 | rem_samples = Some(self.options.sample_count.unwrap_or(DEFAULT_SAMPLE_COUNT)); |
753 | } |
754 | } |
755 | |
756 | // Returns the sample's duration adjusted for overhead. |
757 | let sample_duration_sub_overhead = |raw_sample: &RawSample| { |
758 | let overhead = bench_overheads.total_overhead(sample_size, &raw_sample.alloc_info); |
759 | |
760 | FineDuration { |
761 | picos: raw_sample |
762 | .duration() |
763 | .clamp_to(timer_precision) |
764 | .picos |
765 | .saturating_sub(overhead.picos), |
766 | } |
767 | .clamp_to(timer_precision) |
768 | }; |
769 | |
770 | for raw_sample in raw_samples { |
771 | let sample_index = self.samples.time_samples.len(); |
772 | |
773 | self.samples |
774 | .time_samples |
775 | .push(TimeSample { duration: sample_duration_sub_overhead(raw_sample) }); |
776 | |
777 | if !raw_sample.alloc_info.tallies.is_empty() { |
778 | self.samples |
779 | .alloc_info_by_sample |
780 | .insert(sample_index as u32, raw_sample.alloc_info.clone()); |
781 | } |
782 | |
783 | // Insert per-input counter information. |
784 | for counter_kind in KnownCounterKind::ALL { |
785 | if !self.counters.uses_input_counts(counter_kind) { |
786 | continue; |
787 | } |
788 | |
789 | let total_count = raw_sample.counter_totals[counter_kind as usize]; |
790 | |
791 | // Cannot overflow `MaxCountUInt` because `total_count` |
792 | // cannot exceed `MaxCountUInt::MAX * sample_size`. |
793 | let per_iter_count = (total_count / sample_size as u128) as MaxCountUInt; |
794 | |
795 | self.counters.push_counter(AnyCounter::known(counter_kind, per_iter_count)); |
796 | } |
797 | |
798 | if let Some(rem_samples) = &mut rem_samples { |
799 | *rem_samples = rem_samples.saturating_sub(1); |
800 | } |
801 | } |
802 | |
803 | if let Some(initial_start) = initial_start { |
804 | let last_end = raw_samples.iter().map(|s| s.end).max().unwrap(); |
805 | elapsed_picos = last_end.duration_since(initial_start, timer).picos; |
806 | } else { |
807 | // Progress by at least 1ns to prevent extremely fast |
808 | // functions from taking forever when `min_time` is set. |
809 | let progress_picos = slowest_time.picos.max(1_000); |
810 | elapsed_picos = elapsed_picos.saturating_add(progress_picos); |
811 | } |
812 | } |
813 | |
814 | // Reset flag for ignoring allocations. |
815 | crate::alloc::IGNORE_ALLOC.set(false); |
816 | } |
817 | |
818 | /// Returns a closure that takes the sample size and input counter, and then |
819 | /// returns a newly recorded sample. |
820 | fn sample_recorder<I, O>( |
821 | &self, |
822 | gen_input: impl Fn() -> I, |
823 | benched: impl Fn(&UnsafeCell<MaybeUninit<I>>) -> O, |
824 | drop_input: impl Fn(&UnsafeCell<MaybeUninit<I>>), |
825 | ) -> impl Fn(usize, Option<&Barrier>, &mut dyn FnMut(&I)) -> ([Timestamp; 2], ThreadAllocInfo) |
826 | { |
827 | // We defer: |
828 | // - Usage of `gen_input` values. |
829 | // - Drop destructor for `O`, preventing it from affecting sample |
830 | // measurements. Outputs are stored into a pre-allocated buffer during |
831 | // the sample loop. The allocation is reused between samples to reduce |
832 | // time spent between samples. |
833 | |
834 | let timer_kind = self.shared_context.timer.kind(); |
835 | |
836 | move |sample_size: usize, barrier: Option<&Barrier>, count_input: &mut dyn FnMut(&I)| { |
837 | let mut defer_store = DeferStore::<I, O>::default(); |
838 | |
839 | let mut saved_alloc_info = ThreadAllocInfo::new(); |
840 | let mut save_alloc_info = || { |
841 | if crate::alloc::IGNORE_ALLOC.get() { |
842 | return; |
843 | } |
844 | |
845 | if let Some(alloc_info) = ThreadAllocInfo::try_current() { |
846 | // SAFETY: We have exclusive access. |
847 | saved_alloc_info = unsafe { alloc_info.as_ptr().read() }; |
848 | } |
849 | }; |
850 | |
851 | // Synchronize all threads to start timed section simultaneously and |
852 | // clear every thread's memory profiling info. |
853 | // |
854 | // This ensures work external to the timed section does not affect |
855 | // the timing of other threads. |
856 | let sync_threads = |is_start: bool| { |
857 | sync_impl(barrier, is_start); |
858 | |
859 | // Monomorphize implementation to reduce code size. |
860 | #[inline (never)] |
861 | fn sync_impl(barrier: Option<&Barrier>, is_start: bool) { |
862 | // Ensure benchmarked section has a `ThreadAllocInfo` |
863 | // allocated for the current thread and clear previous info. |
864 | let alloc_info = if is_start { ThreadAllocInfo::current() } else { None }; |
865 | |
866 | // Synchronize all threads. |
867 | // |
868 | // This is the final synchronization point for the end. |
869 | if let Some(barrier) = barrier { |
870 | barrier.wait(); |
871 | } |
872 | |
873 | if let Some(mut alloc_info) = alloc_info { |
874 | // SAFETY: We have exclusive access. |
875 | let alloc_info = unsafe { alloc_info.as_mut() }; |
876 | |
877 | alloc_info.clear(); |
878 | |
879 | // Synchronize all threads. |
880 | if let Some(barrier) = barrier { |
881 | barrier.wait(); |
882 | } |
883 | } |
884 | } |
885 | }; |
886 | |
887 | // The following logic chooses how to efficiently sample the |
888 | // benchmark function once and assigns `sample_start`/`sample_end` |
889 | // before/after the sample loop. |
890 | // |
891 | // NOTE: Testing and benchmarking should behave exactly the same |
892 | // when getting the sample time span. We don't want to introduce |
893 | // extra work that may worsen measurement quality for real |
894 | // benchmarking. |
895 | let sample_start: UntaggedTimestamp; |
896 | let sample_end: UntaggedTimestamp; |
897 | |
898 | if size_of::<I>() == 0 && (size_of::<O>() == 0 || !mem::needs_drop::<O>()) { |
899 | // Use a range instead of `defer_store` to make the benchmarking |
900 | // loop cheaper. |
901 | |
902 | // Run `gen_input` the expected number of times in case it |
903 | // updates external state used by `benched`. |
904 | for _ in 0..sample_size { |
905 | let input = gen_input(); |
906 | count_input(&input); |
907 | |
908 | // Inputs are consumed/dropped later. |
909 | mem::forget(input); |
910 | } |
911 | |
912 | sync_threads(true); |
913 | sample_start = UntaggedTimestamp::start(timer_kind); |
914 | |
915 | // Sample loop: |
916 | for _ in 0..sample_size { |
917 | // SAFETY: Input is a ZST, so we can construct one out of |
918 | // thin air. |
919 | let input = unsafe { UnsafeCell::new(MaybeUninit::<I>::zeroed()) }; |
920 | |
921 | mem::forget(black_box(benched(&input))); |
922 | } |
923 | |
924 | sample_end = UntaggedTimestamp::end(timer_kind); |
925 | sync_threads(false); |
926 | save_alloc_info(); |
927 | |
928 | // Drop outputs and inputs. |
929 | for _ in 0..sample_size { |
930 | // Output only needs drop if ZST. |
931 | if size_of::<O>() == 0 { |
932 | // SAFETY: Output is a ZST, so we can construct one out |
933 | // of thin air. |
934 | unsafe { _ = mem::zeroed::<O>() } |
935 | } |
936 | |
937 | if mem::needs_drop::<I>() { |
938 | // SAFETY: Input is a ZST, so we can construct one out |
939 | // of thin air and not worry about aliasing. |
940 | unsafe { drop_input(&UnsafeCell::new(MaybeUninit::<I>::zeroed())) } |
941 | } |
942 | } |
943 | } else { |
944 | defer_store.prepare(sample_size); |
945 | |
946 | match defer_store.slots() { |
947 | // Output needs to be dropped. We defer drop in the sample |
948 | // loop by inserting it into `defer_store`. |
949 | Ok(defer_slots_slice) => { |
950 | // Initialize and store inputs. |
951 | for DeferSlot { input, .. } in defer_slots_slice { |
952 | // SAFETY: We have exclusive access to `input`. |
953 | let input = unsafe { &mut *input.get() }; |
954 | let input = input.write(gen_input()); |
955 | count_input(input); |
956 | |
957 | // Make input opaque to benchmarked function. |
958 | black_box(input); |
959 | } |
960 | |
961 | // Create iterator before the sample timing section to |
962 | // reduce benchmarking overhead. |
963 | let defer_slots_iter = defer_slots_slice.iter(); |
964 | |
965 | sync_threads(true); |
966 | sample_start = UntaggedTimestamp::start(timer_kind); |
967 | |
968 | // Sample loop: |
969 | for defer_slot in defer_slots_iter { |
970 | // SAFETY: All inputs in `defer_store` were |
971 | // initialized and we have exclusive access to the |
972 | // output slot. |
973 | unsafe { |
974 | let output = benched(&defer_slot.input); |
975 | *defer_slot.output.get() = MaybeUninit::new(output); |
976 | } |
977 | } |
978 | |
979 | sample_end = UntaggedTimestamp::end(timer_kind); |
980 | sync_threads(false); |
981 | save_alloc_info(); |
982 | |
983 | // Prevent the optimizer from removing writes to inputs |
984 | // and outputs in the sample loop. |
985 | black_box(defer_slots_slice); |
986 | |
987 | // Drop outputs and inputs. |
988 | for DeferSlot { input, output } in defer_slots_slice { |
989 | // SAFETY: All outputs were initialized in the |
990 | // sample loop and we have exclusive access. |
991 | unsafe { (*output.get()).assume_init_drop() } |
992 | |
993 | if mem::needs_drop::<I>() { |
994 | // SAFETY: The output was dropped and thus we |
995 | // have exclusive access to inputs. |
996 | unsafe { drop_input(input) } |
997 | } |
998 | } |
999 | } |
1000 | |
1001 | // Output does not need to be dropped. |
1002 | Err(defer_inputs_slice) => { |
1003 | // Initialize and store inputs. |
1004 | for input in defer_inputs_slice { |
1005 | // SAFETY: We have exclusive access to `input`. |
1006 | let input = unsafe { &mut *input.get() }; |
1007 | let input = input.write(gen_input()); |
1008 | count_input(input); |
1009 | |
1010 | // Make input opaque to benchmarked function. |
1011 | black_box(input); |
1012 | } |
1013 | |
1014 | // Create iterator before the sample timing section to |
1015 | // reduce benchmarking overhead. |
1016 | let defer_inputs_iter = defer_inputs_slice.iter(); |
1017 | |
1018 | sync_threads(true); |
1019 | sample_start = UntaggedTimestamp::start(timer_kind); |
1020 | |
1021 | // Sample loop: |
1022 | for input in defer_inputs_iter { |
1023 | // SAFETY: All inputs in `defer_store` were |
1024 | // initialized. |
1025 | black_box_drop(unsafe { benched(input) }); |
1026 | } |
1027 | |
1028 | sample_end = UntaggedTimestamp::end(timer_kind); |
1029 | sync_threads(false); |
1030 | save_alloc_info(); |
1031 | |
1032 | // Prevent the optimizer from removing writes to inputs |
1033 | // in the sample loop. |
1034 | black_box(defer_inputs_slice); |
1035 | |
1036 | // Drop inputs. |
1037 | if mem::needs_drop::<I>() { |
1038 | for input in defer_inputs_slice { |
1039 | // SAFETY: We have exclusive access to inputs. |
1040 | unsafe { drop_input(input) } |
1041 | } |
1042 | } |
1043 | } |
1044 | } |
1045 | } |
1046 | |
1047 | // SAFETY: These values are guaranteed to be the correct variant |
1048 | // because they were created from the same `timer_kind`. |
1049 | let interval = unsafe { |
1050 | [sample_start.into_timestamp(timer_kind), sample_end.into_timestamp(timer_kind)] |
1051 | }; |
1052 | |
1053 | (interval, saved_alloc_info) |
1054 | } |
1055 | } |
1056 | |
1057 | #[inline ] |
1058 | fn initial_mode(&self) -> BenchMode { |
1059 | if self.shared_context.action.is_test() { |
1060 | BenchMode::Test |
1061 | } else if let Some(sample_size) = self.options.sample_size { |
1062 | BenchMode::Collect { sample_size } |
1063 | } else { |
1064 | BenchMode::Tune { sample_size: 1 } |
1065 | } |
1066 | } |
1067 | |
1068 | pub fn compute_stats(&self) -> Stats { |
1069 | let time_samples = &self.samples.time_samples; |
1070 | let alloc_info_by_sample = &self.samples.alloc_info_by_sample; |
1071 | |
1072 | let sample_count = time_samples.len(); |
1073 | let sample_size = self.samples.sample_size; |
1074 | |
1075 | let total_count = self.samples.iter_count(); |
1076 | |
1077 | let total_duration = self.samples.total_duration(); |
1078 | let mean_duration = FineDuration { |
1079 | picos: total_duration.picos.checked_div(total_count as u128).unwrap_or_default(), |
1080 | }; |
1081 | |
1082 | // Samples sorted by duration. |
1083 | let sorted_samples = self.samples.sorted_samples(); |
1084 | let median_samples = util::slice_middle(&sorted_samples); |
1085 | |
1086 | let index_of_sample = |sample: &TimeSample| -> usize { |
1087 | util::slice_ptr_index(&self.samples.time_samples, sample) |
1088 | }; |
1089 | |
1090 | let counter_count_for_sample = |
1091 | |sample: &TimeSample, counter_kind: KnownCounterKind| -> Option<MaxCountUInt> { |
1092 | let counts = self.counters.counts(counter_kind); |
1093 | |
1094 | let index = if self.counters.uses_input_counts(counter_kind) { |
1095 | index_of_sample(sample) |
1096 | } else { |
1097 | 0 |
1098 | }; |
1099 | |
1100 | counts.get(index).copied() |
1101 | }; |
1102 | |
1103 | let min_duration = |
1104 | sorted_samples.first().map(|s| s.duration / sample_size).unwrap_or_default(); |
1105 | let max_duration = |
1106 | sorted_samples.last().map(|s| s.duration / sample_size).unwrap_or_default(); |
1107 | |
1108 | let median_duration = if median_samples.is_empty() { |
1109 | FineDuration::default() |
1110 | } else { |
1111 | let sum: u128 = median_samples.iter().map(|s| s.duration.picos).sum(); |
1112 | FineDuration { picos: sum / median_samples.len() as u128 } / sample_size |
1113 | }; |
1114 | |
1115 | let counts = KnownCounterKind::ALL.map(|counter_kind| { |
1116 | let median: MaxCountUInt = { |
1117 | let mut sum: u128 = 0; |
1118 | |
1119 | for sample in median_samples { |
1120 | let sample_count = counter_count_for_sample(sample, counter_kind)? as u128; |
1121 | |
1122 | // Saturating add in case `MaxUIntCount > u64`. |
1123 | sum = sum.saturating_add(sample_count); |
1124 | } |
1125 | |
1126 | (sum / median_samples.len() as u128) as MaxCountUInt |
1127 | }; |
1128 | |
1129 | Some(StatsSet { |
1130 | fastest: sorted_samples |
1131 | .first() |
1132 | .and_then(|s| counter_count_for_sample(s, counter_kind))?, |
1133 | slowest: sorted_samples |
1134 | .last() |
1135 | .and_then(|s| counter_count_for_sample(s, counter_kind))?, |
1136 | median, |
1137 | mean: self.counters.mean_count(counter_kind), |
1138 | }) |
1139 | }); |
1140 | |
1141 | let sample_alloc_info = |sample: Option<&TimeSample>| -> Option<&ThreadAllocInfo> { |
1142 | sample |
1143 | .and_then(|sample| u32::try_from(index_of_sample(sample)).ok()) |
1144 | .and_then(|index| self.samples.alloc_info_by_sample.get(&index)) |
1145 | }; |
1146 | |
1147 | let sample_alloc_tally = |sample: Option<&TimeSample>, op: AllocOp| -> ThreadAllocTally { |
1148 | sample_alloc_info(sample) |
1149 | .map(|alloc_info| alloc_info.tallies.get(op)) |
1150 | .copied() |
1151 | .unwrap_or_default() |
1152 | }; |
1153 | |
1154 | let mut alloc_total_max_count = 0u128; |
1155 | let mut alloc_total_max_size = 0u128; |
1156 | let mut alloc_total_tallies = TotalAllocTallyMap::default(); |
1157 | |
1158 | for alloc_info in alloc_info_by_sample.values() { |
1159 | alloc_total_max_count += alloc_info.max_count as u128; |
1160 | alloc_total_max_size += alloc_info.max_size as u128; |
1161 | alloc_info.tallies.add_to_total(&mut alloc_total_tallies); |
1162 | } |
1163 | |
1164 | let sample_size = f64::from(sample_size); |
1165 | Stats { |
1166 | sample_count: sample_count as u32, |
1167 | iter_count: total_count, |
1168 | time: StatsSet { |
1169 | fastest: min_duration, |
1170 | slowest: max_duration, |
1171 | median: median_duration, |
1172 | mean: mean_duration, |
1173 | }, |
1174 | max_alloc: StatsSet { |
1175 | fastest: { |
1176 | let alloc_info = sample_alloc_info(sorted_samples.first().copied()); |
1177 | |
1178 | AllocTally { |
1179 | count: alloc_info.map(|info| info.max_count as f64).unwrap_or_default() |
1180 | / sample_size, |
1181 | size: alloc_info.map(|info| info.max_size as f64).unwrap_or_default() |
1182 | / sample_size, |
1183 | } |
1184 | }, |
1185 | slowest: { |
1186 | let alloc_info = sample_alloc_info(sorted_samples.last().copied()); |
1187 | |
1188 | AllocTally { |
1189 | count: alloc_info.map(|info| info.max_count as f64).unwrap_or_default() |
1190 | / sample_size, |
1191 | size: alloc_info.map(|info| info.max_size as f64).unwrap_or_default() |
1192 | / sample_size, |
1193 | } |
1194 | }, |
1195 | // TODO: Switch to median of alloc info itself, rather than |
1196 | // basing off of median times. |
1197 | median: { |
1198 | let alloc_info_for_median = |
1199 | |index| sample_alloc_info(median_samples.get(index).copied()); |
1200 | |
1201 | let max_count_for_median = |index: usize| -> f64 { |
1202 | alloc_info_for_median(index) |
1203 | .map(|info| info.max_count as f64) |
1204 | .unwrap_or_default() |
1205 | }; |
1206 | |
1207 | let max_size_for_median = |index: usize| -> f64 { |
1208 | alloc_info_for_median(index) |
1209 | .map(|info| info.max_size as f64) |
1210 | .unwrap_or_default() |
1211 | }; |
1212 | |
1213 | let median_count = median_samples.len().max(1) as f64; |
1214 | |
1215 | let median_max_count = max_count_for_median(0) + max_count_for_median(1); |
1216 | let median_max_size = max_size_for_median(0) + max_size_for_median(1); |
1217 | |
1218 | AllocTally { |
1219 | count: median_max_count / median_count / sample_size, |
1220 | size: median_max_size / median_count / sample_size, |
1221 | } |
1222 | }, |
1223 | mean: AllocTally { |
1224 | count: alloc_total_max_count as f64 / total_count as f64, |
1225 | size: alloc_total_max_size as f64 / total_count as f64, |
1226 | }, |
1227 | } |
1228 | .transpose(), |
1229 | alloc_tallies: AllocOpMap { |
1230 | values: AllocOp::ALL |
1231 | .map(|op| StatsSet { |
1232 | fastest: { |
1233 | let fastest = sample_alloc_tally(sorted_samples.first().copied(), op); |
1234 | |
1235 | AllocTally { |
1236 | count: fastest.count as f64 / sample_size, |
1237 | size: fastest.size as f64 / sample_size, |
1238 | } |
1239 | }, |
1240 | slowest: { |
1241 | let slowest = sample_alloc_tally(sorted_samples.last().copied(), op); |
1242 | |
1243 | AllocTally { |
1244 | count: slowest.count as f64 / sample_size, |
1245 | size: slowest.size as f64 / sample_size, |
1246 | } |
1247 | }, |
1248 | median: { |
1249 | let tally_for_median = |index: usize| -> ThreadAllocTally { |
1250 | sample_alloc_tally(median_samples.get(index).copied(), op) |
1251 | }; |
1252 | |
1253 | let a = tally_for_median(0); |
1254 | let b = tally_for_median(1); |
1255 | |
1256 | let median_count = median_samples.len().max(1) as f64; |
1257 | |
1258 | let avg_count = (a.count as f64 + b.count as f64) / median_count; |
1259 | let avg_size = (a.size as f64 + b.size as f64) / median_count; |
1260 | |
1261 | AllocTally { |
1262 | count: avg_count / sample_size, |
1263 | size: avg_size / sample_size, |
1264 | } |
1265 | }, |
1266 | mean: { |
1267 | let tally = alloc_total_tallies.get(op); |
1268 | AllocTally { |
1269 | count: tally.count as f64 / total_count as f64, |
1270 | size: tally.size as f64 / total_count as f64, |
1271 | } |
1272 | }, |
1273 | }) |
1274 | .map(StatsSet::transpose), |
1275 | }, |
1276 | counts, |
1277 | } |
1278 | } |
1279 | } |
1280 | |
1281 | impl<T> StatsSet<AllocTally<T>> { |
1282 | #[inline ] |
1283 | pub fn transpose(self) -> AllocTally<StatsSet<T>> { |
1284 | AllocTally { |
1285 | count: StatsSet { |
1286 | fastest: self.fastest.count, |
1287 | slowest: self.slowest.count, |
1288 | median: self.median.count, |
1289 | mean: self.mean.count, |
1290 | }, |
1291 | size: StatsSet { |
1292 | fastest: self.fastest.size, |
1293 | slowest: self.slowest.size, |
1294 | median: self.median.size, |
1295 | mean: self.mean.size, |
1296 | }, |
1297 | } |
1298 | } |
1299 | } |
1300 | |