1use std::{cmp::Ordering, num::NonZeroU64, sync::OnceLock};
2
3use crate::{
4 alloc::{AllocOp, ThreadAllocInfo},
5 black_box,
6 time::{FineDuration, TscTimestamp, TscUnavailable, UntaggedTimestamp},
7};
8
9/// Measures time.
10#[derive(Clone, Copy, Default)]
11pub(crate) enum Timer {
12 /// Operating system timer.
13 #[default]
14 Os,
15
16 /// CPU timestamp counter.
17 Tsc {
18 /// [`TscTimestamp::frequency`].
19 frequency: NonZeroU64,
20 },
21}
22
23impl Timer {
24 const COUNT: usize = 2;
25
26 /// Returns all available timers.
27 #[cfg(test)]
28 pub fn available() -> Vec<Self> {
29 let mut timers = vec![Self::Os];
30
31 if let Ok(tsc) = Self::get_tsc() {
32 timers.push(tsc);
33 }
34
35 timers
36 }
37
38 /// Attempts to get the CPU timestamp counter.
39 #[inline]
40 pub fn get_tsc() -> Result<Self, TscUnavailable> {
41 Ok(Self::Tsc { frequency: TscTimestamp::frequency()? })
42 }
43
44 #[inline]
45 pub fn kind(self) -> TimerKind {
46 match self {
47 Self::Os => TimerKind::Os,
48 Self::Tsc { .. } => TimerKind::Tsc,
49 }
50 }
51
52 /// Returns the smallest non-zero duration that this timer can measure.
53 ///
54 /// The result is cached.
55 pub fn precision(self) -> FineDuration {
56 static CACHED: [OnceLock<FineDuration>; Timer::COUNT] = [OnceLock::new(), OnceLock::new()];
57
58 let cached = &CACHED[self.kind() as usize];
59
60 *cached.get_or_init(|| self.measure_precision())
61 }
62
63 fn measure_precision(self) -> FineDuration {
64 let timer_kind = self.kind();
65
66 // Start with the worst possible minimum.
67 let mut min_sample = FineDuration::MAX;
68 let mut seen_count = 0;
69
70 // If timing in immediate succession fails to produce a non-zero sample,
71 // an artificial delay is added by looping. `usize` is intentionally
72 // used to make looping cheap.
73 let mut delay_len: usize = 0;
74
75 loop {
76 for _ in 0..100 {
77 // Use `UntaggedTimestamp` to minimize overhead.
78 let sample_start: UntaggedTimestamp;
79 let sample_end: UntaggedTimestamp;
80
81 if delay_len == 0 {
82 // Immediate succession.
83 sample_start = UntaggedTimestamp::start(timer_kind);
84 sample_end = UntaggedTimestamp::end(timer_kind);
85 } else {
86 // Add delay.
87 sample_start = UntaggedTimestamp::start(timer_kind);
88 for n in 0..delay_len {
89 crate::black_box(n);
90 }
91 sample_end = UntaggedTimestamp::end(timer_kind);
92 }
93
94 // SAFETY: These values are guaranteed to be the correct variant
95 // because they were created from the same `timer_kind`.
96 let [sample_start, sample_end] = unsafe {
97 [sample_start.into_timestamp(timer_kind), sample_end.into_timestamp(timer_kind)]
98 };
99
100 let sample = sample_end.duration_since(sample_start, self);
101
102 // Discard sample if irrelevant.
103 if sample.is_zero() {
104 continue;
105 }
106
107 match sample.cmp(&min_sample) {
108 Ordering::Greater => {
109 // If we already delayed a lot, and not hit the seen
110 // count threshold, then use current minimum.
111 if delay_len > 100 {
112 return min_sample;
113 }
114 }
115 Ordering::Equal => {
116 seen_count += 1;
117
118 // If we've seen this min 100 times, we have high
119 // confidence this is the smallest duration.
120 if seen_count >= 100 {
121 return min_sample;
122 }
123 }
124 Ordering::Less => {
125 min_sample = sample;
126 seen_count = 0;
127 }
128 }
129 }
130
131 delay_len = delay_len.saturating_add(1);
132 }
133 }
134
135 /// Returns the overheads added by the benchmarker.
136 ///
137 /// `min_time` and `max_time` do not consider this as benchmarking time.
138 pub fn bench_overheads(self) -> &'static TimedOverhead {
139 // Miri is slow, so don't waste time on this.
140 if cfg!(miri) {
141 return &TimedOverhead::ZERO;
142 }
143
144 static CACHED: [OnceLock<TimedOverhead>; Timer::COUNT] = [OnceLock::new(), OnceLock::new()];
145
146 let cached = &CACHED[self.kind() as usize];
147
148 cached.get_or_init(|| TimedOverhead {
149 sample_loop: self.sample_loop_overhead(),
150 tally_alloc: self.measure_tally_alloc_overhead(),
151 tally_dealloc: self.measure_tally_dealloc_overhead(),
152 tally_realloc: self.measure_tally_realloc_overhead(),
153 })
154 }
155
156 /// Returns the per-iteration overhead of the benchmarking sample loop.
157 fn sample_loop_overhead(self) -> FineDuration {
158 // Miri is slow, so don't waste time on this.
159 if cfg!(miri) {
160 return FineDuration::default();
161 }
162
163 static CACHED: [OnceLock<FineDuration>; Timer::COUNT] = [OnceLock::new(), OnceLock::new()];
164
165 let cached = &CACHED[self.kind() as usize];
166
167 *cached.get_or_init(|| self.measure_sample_loop_overhead())
168 }
169
170 /// Calculates the per-iteration overhead of the benchmarking sample loop.
171 fn measure_sample_loop_overhead(self) -> FineDuration {
172 let timer_kind = self.kind();
173
174 let sample_count: usize = 100;
175 let sample_size: usize = 10_000;
176
177 // The minimum non-zero sample.
178 let mut min_sample = FineDuration::default();
179
180 for _ in 0..sample_count {
181 let start = UntaggedTimestamp::start(timer_kind);
182
183 for i in 0..sample_size {
184 _ = crate::black_box(i);
185 }
186
187 let end = UntaggedTimestamp::end(timer_kind);
188
189 // SAFETY: These values are guaranteed to be the correct variant because
190 // they were created from the same `timer_kind`.
191 let [start, end] =
192 unsafe { [start.into_timestamp(timer_kind), end.into_timestamp(timer_kind)] };
193
194 let mut sample = end.duration_since(start, self);
195 sample.picos /= sample_size as u128;
196
197 min_sample = min_sample.clamp_to_min(sample);
198 }
199
200 min_sample
201 }
202
203 fn measure_tally_alloc_overhead(self) -> FineDuration {
204 let size = black_box(0);
205 self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_alloc(size))
206 }
207
208 fn measure_tally_dealloc_overhead(self) -> FineDuration {
209 let size = black_box(0);
210 self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_dealloc(size))
211 }
212
213 fn measure_tally_realloc_overhead(self) -> FineDuration {
214 let new_size = black_box(0);
215 let old_size = black_box(0);
216 self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_realloc(old_size, new_size))
217 }
218
219 // SAFETY: This function is not reentrant. Calling it within `operation`
220 // would cause aliasing of `ThreadAllocInfo::current`.
221 fn measure_alloc_info_overhead(self, operation: impl Fn(&mut ThreadAllocInfo)) -> FineDuration {
222 // Initialize the current thread's alloc info.
223 let alloc_info = ThreadAllocInfo::current();
224
225 let sample_count = 100;
226 let sample_size = 50_000;
227
228 let result = self.measure_min_time(sample_count, sample_size, || {
229 if let Some(mut alloc_info) = ThreadAllocInfo::try_current() {
230 // SAFETY: We have exclusive access.
231 operation(unsafe { alloc_info.as_mut() });
232 }
233 });
234
235 // Clear alloc info.
236 if let Some(mut alloc_info) = alloc_info {
237 // SAFETY: We have exclusive access.
238 let alloc_info = unsafe { alloc_info.as_mut() };
239
240 alloc_info.clear();
241 }
242
243 result
244 }
245
246 /// Calculates the smallest non-zero time to perform an operation.
247 fn measure_min_time(
248 self,
249 sample_count: usize,
250 sample_size: usize,
251 operation: impl Fn(),
252 ) -> FineDuration {
253 let timer_kind = self.kind();
254
255 let loop_overhead = self.sample_loop_overhead();
256 let mut min_sample = FineDuration::default();
257
258 for _ in 0..sample_count {
259 let start = UntaggedTimestamp::start(timer_kind);
260
261 for _ in 0..sample_size {
262 operation();
263 }
264
265 let end = UntaggedTimestamp::end(timer_kind);
266
267 // SAFETY: These values are guaranteed to be the correct variant
268 // because they were created from the same `timer_kind`.
269 let [start, end] =
270 unsafe { [start.into_timestamp(timer_kind), end.into_timestamp(timer_kind)] };
271
272 let mut sample = end.duration_since(start, self);
273 sample.picos /= sample_size as u128;
274
275 // Remove benchmarking loop overhead.
276 sample.picos = sample.picos.saturating_sub(loop_overhead.picos);
277
278 min_sample = min_sample.clamp_to_min(sample);
279 }
280
281 min_sample
282 }
283}
284
285/// [`Timer`] kind.
286#[derive(Clone, Copy, Default)]
287pub(crate) enum TimerKind {
288 /// Operating system timer.
289 #[default]
290 Os,
291
292 /// CPU timestamp counter.
293 Tsc,
294}
295
296/// The measured overhead of various benchmarking operations.
297pub(crate) struct TimedOverhead {
298 pub sample_loop: FineDuration,
299 pub tally_alloc: FineDuration,
300 pub tally_dealloc: FineDuration,
301 pub tally_realloc: FineDuration,
302}
303
304impl TimedOverhead {
305 pub const ZERO: Self = Self {
306 sample_loop: FineDuration::ZERO,
307 tally_alloc: FineDuration::ZERO,
308 tally_dealloc: FineDuration::ZERO,
309 tally_realloc: FineDuration::ZERO,
310 };
311
312 pub fn total_overhead(&self, sample_size: u32, alloc_info: &ThreadAllocInfo) -> FineDuration {
313 let sample_loop_overhead = self.sample_loop.picos.saturating_mul(sample_size as u128);
314
315 let tally_alloc_overhead = self
316 .tally_alloc
317 .picos
318 .saturating_mul(alloc_info.tallies.get(AllocOp::Alloc).count as u128);
319
320 let tally_dealloc_overhead = self
321 .tally_dealloc
322 .picos
323 .saturating_mul(alloc_info.tallies.get(AllocOp::Dealloc).count as u128);
324
325 let tally_realloc_overhead = self.tally_realloc.picos.saturating_mul(
326 alloc_info.tallies.get(AllocOp::Grow).count as u128
327 + alloc_info.tallies.get(AllocOp::Shrink).count as u128,
328 );
329
330 FineDuration {
331 picos: sample_loop_overhead
332 .saturating_add(tally_alloc_overhead)
333 .saturating_add(tally_dealloc_overhead)
334 .saturating_add(tally_realloc_overhead),
335 }
336 }
337}
338
339#[cfg(feature = "internal_benches")]
340mod benches {
341 use super::*;
342
343 #[crate::bench(crate = crate)]
344 fn get_tsc() -> Result<Timer, TscUnavailable> {
345 Timer::get_tsc()
346 }
347
348 mod measure {
349 use super::*;
350
351 #[crate::bench(crate = crate)]
352 fn precision() -> FineDuration {
353 Timer::Os.measure_precision()
354 }
355
356 #[crate::bench(crate = crate)]
357 fn sample_loop_overhead() -> FineDuration {
358 Timer::Os.measure_sample_loop_overhead()
359 }
360
361 #[crate::bench(crate = crate)]
362 fn tally_alloc_overhead() -> FineDuration {
363 Timer::Os.measure_tally_alloc_overhead()
364 }
365
366 #[crate::bench(crate = crate)]
367 fn tally_dealloc_overhead() -> FineDuration {
368 Timer::Os.measure_tally_dealloc_overhead()
369 }
370
371 #[crate::bench(crate = crate)]
372 fn tally_realloc_overhead() -> FineDuration {
373 Timer::Os.measure_tally_realloc_overhead()
374 }
375 }
376}
377