timer.rs source code [crates/divan/src/time/timer.rs]

1	use std::{cmp::Ordering, num::NonZeroU64, sync::OnceLock};
2
3	use crate::{
4	alloc::{AllocOp, ThreadAllocInfo},
5	black_box,
6	time::{FineDuration, TscTimestamp, TscUnavailable, UntaggedTimestamp},
7	};
8
9	/// Measures time.
10	#[derive(Clone, Copy, Default)]
11	pub(crate) enum Timer {
12	/// Operating system timer.
13	#[default]
14	Os,
15
16	/// CPU timestamp counter.
17	Tsc {
18	/// [`TscTimestamp::frequency`].
19	frequency: NonZeroU64,
20	},
21	}
22
23	impl Timer {
24	const COUNT: usize = `2`;
25
26	/// Returns all available timers.
27	#[cfg(test)]
28	pub fn available() -> Vec<Self> {
29	let mut timers = vec![Self::Os];
30
31	if let Ok(tsc) = Self::get_tsc() {
32	timers.push(tsc);
33	}
34
35	timers
36	}
37
38	/// Attempts to get the CPU timestamp counter.
39	#[inline]
40	pub fn get_tsc() -> Result<Self, TscUnavailable> {
41	Ok(Self::Tsc { frequency: TscTimestamp::frequency()? })
42	}
43
44	#[inline]
45	pub fn kind(self) -> TimerKind {
46	match self {
47	Self::Os => TimerKind::Os,
48	Self::Tsc { .. } => TimerKind::Tsc,
49	}
50	}
51
52	/// Returns the smallest non-zero duration that this timer can measure.
53	///
54	/// The result is cached.
55	pub fn precision(self) -> FineDuration {
56	static CACHED: [OnceLock<FineDuration>; Timer::COUNT] = [OnceLock::new(), OnceLock::new()];
57
58	let cached = &CACHED[self.kind() as usize];
59
60	*cached.get_or_init(\|\| self.measure_precision())
61	}
62
63	fn measure_precision(self) -> FineDuration {
64	let timer_kind = self.kind();
65
66	// Start with the worst possible minimum.
67	let mut min_sample = FineDuration::MAX;
68	let mut seen_count = `0`;
69
70	// If timing in immediate succession fails to produce a non-zero sample,
71	// an artificial delay is added by looping. `usize` is intentionally
72	// used to make looping cheap.
73	let mut delay_len: usize = `0`;
74
75	loop {
76	for _ in `0`..`100` {
77	// Use `UntaggedTimestamp` to minimize overhead.
78	let sample_start: UntaggedTimestamp;
79	let sample_end: UntaggedTimestamp;
80
81	if delay_len == `0` {
82	// Immediate succession.
83	sample_start = UntaggedTimestamp::start(timer_kind);
84	sample_end = UntaggedTimestamp::end(timer_kind);
85	} else {
86	// Add delay.
87	sample_start = UntaggedTimestamp::start(timer_kind);
88	for n in `0`..delay_len {
89	crate::black_box(n);
90	}
91	sample_end = UntaggedTimestamp::end(timer_kind);
92	}
93
94	// SAFETY: These values are guaranteed to be the correct variant
95	// because they were created from the same `timer_kind`.
96	let [sample_start, sample_end] = unsafe {
97	[sample_start.into_timestamp(timer_kind), sample_end.into_timestamp(timer_kind)]
98	};
99
100	let sample = sample_end.duration_since(sample_start, self);
101
102	// Discard sample if irrelevant.
103	if sample.is_zero() {
104	continue;
105	}
106
107	match sample.cmp(&min_sample) {
108	Ordering::Greater => {
109	// If we already delayed a lot, and not hit the seen
110	// count threshold, then use current minimum.
111	if delay_len > `100` {
112	return min_sample;
113	}
114	}
115	Ordering::Equal => {
116	seen_count += `1`;
117
118	// If we've seen this min 100 times, we have high
119	// confidence this is the smallest duration.
120	if seen_count >= `100` {
121	return min_sample;
122	}
123	}
124	Ordering::Less => {
125	min_sample = sample;
126	seen_count = `0`;
127	}
128	}
129	}
130
131	delay_len = delay_len.saturating_add(`1`);
132	}
133	}
134
135	/// Returns the overheads added by the benchmarker.
136	///
137	/// `min_time` and `max_time` do not consider this as benchmarking time.
138	pub fn bench_overheads(self) -> &'static TimedOverhead {
139	// Miri is slow, so don't waste time on this.
140	if cfg!(miri) {
141	return &TimedOverhead::ZERO;
142	}
143
144	static CACHED: [OnceLock<TimedOverhead>; Timer::COUNT] = [OnceLock::new(), OnceLock::new()];
145
146	let cached = &CACHED[self.kind() as usize];
147
148	cached.get_or_init(\|\| TimedOverhead {
149	sample_loop: self.sample_loop_overhead(),
150	tally_alloc: self.measure_tally_alloc_overhead(),
151	tally_dealloc: self.measure_tally_dealloc_overhead(),
152	tally_realloc: self.measure_tally_realloc_overhead(),
153	})
154	}
155
156	/// Returns the per-iteration overhead of the benchmarking sample loop.
157	fn sample_loop_overhead(self) -> FineDuration {
158	// Miri is slow, so don't waste time on this.
159	if cfg!(miri) {
160	return FineDuration::default();
161	}
162
163	static CACHED: [OnceLock<FineDuration>; Timer::COUNT] = [OnceLock::new(), OnceLock::new()];
164
165	let cached = &CACHED[self.kind() as usize];
166
167	*cached.get_or_init(\|\| self.measure_sample_loop_overhead())
168	}
169
170	/// Calculates the per-iteration overhead of the benchmarking sample loop.
171	fn measure_sample_loop_overhead(self) -> FineDuration {
172	let timer_kind = self.kind();
173
174	let sample_count: usize = `100`;
175	let sample_size: usize = `10_000`;
176
177	// The minimum non-zero sample.
178	let mut min_sample = FineDuration::default();
179
180	for _ in `0`..sample_count {
181	let start = UntaggedTimestamp::start(timer_kind);
182
183	for i in `0`..sample_size {
184	_ = crate::black_box(i);
185	}
186
187	let end = UntaggedTimestamp::end(timer_kind);
188
189	// SAFETY: These values are guaranteed to be the correct variant because
190	// they were created from the same `timer_kind`.
191	let [start, end] =
192	unsafe { [start.into_timestamp(timer_kind), end.into_timestamp(timer_kind)] };
193
194	let mut sample = end.duration_since(start, self);
195	sample.picos /= sample_size as u128;
196
197	min_sample = min_sample.clamp_to_min(sample);
198	}
199
200	min_sample
201	}
202
203	fn measure_tally_alloc_overhead(self) -> FineDuration {
204	let size = black_box(`0`);
205	self.measure_alloc_info_overhead(\|alloc_info\| alloc_info.tally_alloc(size))
206	}
207
208	fn measure_tally_dealloc_overhead(self) -> FineDuration {
209	let size = black_box(`0`);
210	self.measure_alloc_info_overhead(\|alloc_info\| alloc_info.tally_dealloc(size))
211	}
212
213	fn measure_tally_realloc_overhead(self) -> FineDuration {
214	let new_size = black_box(`0`);
215	let old_size = black_box(`0`);
216	self.measure_alloc_info_overhead(\|alloc_info\| alloc_info.tally_realloc(old_size, new_size))
217	}
218
219	// SAFETY: This function is not reentrant. Calling it within `operation`
220	// would cause aliasing of `ThreadAllocInfo::current`.
221	fn measure_alloc_info_overhead(self, operation: impl Fn(&mut ThreadAllocInfo)) -> FineDuration {
222	// Initialize the current thread's alloc info.
223	let alloc_info = ThreadAllocInfo::current();
224
225	let sample_count = `100`;
226	let sample_size = `50_000`;
227
228	let result = self.measure_min_time(sample_count, sample_size, \|\| {
229	if let Some(mut alloc_info) = ThreadAllocInfo::try_current() {
230	// SAFETY: We have exclusive access.
231	operation(unsafe { alloc_info.as_mut() });
232	}
233	});
234
235	// Clear alloc info.
236	if let Some(mut alloc_info) = alloc_info {
237	// SAFETY: We have exclusive access.
238	let alloc_info = unsafe { alloc_info.as_mut() };
239
240	alloc_info.clear();
241	}
242
243	result
244	}
245
246	/// Calculates the smallest non-zero time to perform an operation.
247	fn measure_min_time(
248	self,
249	sample_count: usize,
250	sample_size: usize,
251	operation: impl Fn(),
252	) -> FineDuration {
253	let timer_kind = self.kind();
254
255	let loop_overhead = self.sample_loop_overhead();
256	let mut min_sample = FineDuration::default();
257
258	for _ in `0`..sample_count {
259	let start = UntaggedTimestamp::start(timer_kind);
260
261	for _ in `0`..sample_size {
262	operation();
263	}
264
265	let end = UntaggedTimestamp::end(timer_kind);
266
267	// SAFETY: These values are guaranteed to be the correct variant
268	// because they were created from the same `timer_kind`.
269	let [start, end] =
270	unsafe { [start.into_timestamp(timer_kind), end.into_timestamp(timer_kind)] };
271
272	let mut sample = end.duration_since(start, self);
273	sample.picos /= sample_size as u128;
274
275	// Remove benchmarking loop overhead.
276	sample.picos = sample.picos.saturating_sub(loop_overhead.picos);
277
278	min_sample = min_sample.clamp_to_min(sample);
279	}
280
281	min_sample
282	}
283	}
284
285	/// [`Timer`] kind.
286	#[derive(Clone, Copy, Default)]
287	pub(crate) enum TimerKind {
288	/// Operating system timer.
289	#[default]
290	Os,
291
292	/// CPU timestamp counter.
293	Tsc,
294	}
295
296	/// The measured overhead of various benchmarking operations.
297	pub(crate) struct TimedOverhead {
298	pub sample_loop: FineDuration,
299	pub tally_alloc: FineDuration,
300	pub tally_dealloc: FineDuration,
301	pub tally_realloc: FineDuration,
302	}
303
304	impl TimedOverhead {
305	pub const ZERO: Self = Self {
306	sample_loop: FineDuration::ZERO,
307	tally_alloc: FineDuration::ZERO,
308	tally_dealloc: FineDuration::ZERO,
309	tally_realloc: FineDuration::ZERO,
310	};
311
312	pub fn total_overhead(&self, sample_size: u32, alloc_info: &ThreadAllocInfo) -> FineDuration {
313	let sample_loop_overhead = self.sample_loop.picos.saturating_mul(sample_size as u128);
314
315	let tally_alloc_overhead = self
316	.tally_alloc
317	.picos
318	.saturating_mul(alloc_info.tallies.get(AllocOp::Alloc).count as u128);
319
320	let tally_dealloc_overhead = self
321	.tally_dealloc
322	.picos
323	.saturating_mul(alloc_info.tallies.get(AllocOp::Dealloc).count as u128);
324
325	let tally_realloc_overhead = self.tally_realloc.picos.saturating_mul(
326	alloc_info.tallies.get(AllocOp::Grow).count as u128
327	+ alloc_info.tallies.get(AllocOp::Shrink).count as u128,
328	);
329
330	FineDuration {
331	picos: sample_loop_overhead
332	.saturating_add(tally_alloc_overhead)
333	.saturating_add(tally_dealloc_overhead)
334	.saturating_add(tally_realloc_overhead),
335	}
336	}
337	}
338
339	#[cfg(feature = "internal_benches")]
340	mod benches {
341	use super::*;
342
343	#[crate::bench(crate = crate)]
344	fn get_tsc() -> Result<Timer, TscUnavailable> {
345	Timer::get_tsc()
346	}
347
348	mod measure {
349	use super::*;
350
351	#[crate::bench(crate = crate)]
352	fn precision() -> FineDuration {
353	Timer::Os.measure_precision()
354	}
355
356	#[crate::bench(crate = crate)]
357	fn sample_loop_overhead() -> FineDuration {
358	Timer::Os.measure_sample_loop_overhead()
359	}
360
361	#[crate::bench(crate = crate)]
362	fn tally_alloc_overhead() -> FineDuration {
363	Timer::Os.measure_tally_alloc_overhead()
364	}
365
366	#[crate::bench(crate = crate)]
367	fn tally_dealloc_overhead() -> FineDuration {
368	Timer::Os.measure_tally_dealloc_overhead()
369	}
370
371	#[crate::bench(crate = crate)]
372	fn tally_realloc_overhead() -> FineDuration {
373	Timer::Os.measure_tally_realloc_overhead()
374	}
375	}
376	}
377