x86.rs source code [crates/divan/src/time/timestamp/tsc/x86.rs]

1	#[cfg(target_arch = "x86")]
2	use std::arch::x86;
3
4	#[cfg(target_arch = "x86_64")]
5	use std::arch::x86_64 as x86;
6
7	use std::time::{Duration, Instant};
8
9	use crate::time::{fence, TscUnavailable};
10
11	#[inline(always)]
12	pub(crate) fn start_timestamp() -> u64 {
13	// Serialize previous operations before `rdtsc` to ensure they are not
14	// inside the timed section.
15	util::lfence();
16
17	let tsc: u64 = util::rdtsc();
18
19	// Serialize `rdtsc` before any measured code.
20	util::lfence();
21
22	tsc
23	}
24
25	#[inline(always)]
26	pub(crate) fn end_timestamp() -> u64 {
27	// `rdtscp` is serialized after all previous operations.
28	let tsc: u64 = util::rdtscp();
29
30	// Serialize `rdtscp` before any subsequent code.
31	util::lfence();
32
33	tsc
34	}
35
36	pub(crate) fn frequency() -> Result<u64, TscUnavailable> {
37	if !util::tsc_is_available() {
38	return Err(TscUnavailable::MissingInstructions);
39	}
40
41	if !util::tsc_is_invariant() {
42	return Err(TscUnavailable::VariableFrequency);
43	}
44
45	let nominal = nominal_frequency();
46	let measured = measure::measure_frequency();
47
48	// Use the nominal frequency if within 0.1% of the measured frequency.
49	//
50	// The nominal frequency is used for getting an exact value if the measured
51	// frequency is slightly off. It is not blindly trusted because it may not
52	// match the TSC frequency.
53	if let Some(nominal) = nominal {
54	if measured * `0.999` < nominal && nominal < measured * `1.001` {
55	return Ok(nominal.round() as u64);
56	}
57	}
58
59	Ok(measured.round() as u64)
60	}
61
62	/// Parses the CPU frequency in the brand name, e.g. "2.50GHz".
63	fn nominal_frequency() -> Option<f64> {
64	let name = util::cpu_name()?;
65	let name = {
66	let len = name.iter().position(\|&ch\| ch == `0`).unwrap_or(name.len());
67	std::str::from_utf8(&name[..len]).ok()?
68	};
69
70	#[rustfmt::skip]
71	let frequencies = [
72	("MHz", `1e6`),
73	("GHz", `1e9`),
74	("THz", `1e12`),
75	];
76
77	for (unit, scale) in frequencies {
78	let Some(unit_start) = name.find(unit) else {
79	continue;
80	};
81
82	let pre_unit = &name[..unit_start];
83	let num = match pre_unit.rsplit_once(' ') {
84	Some((_, num)) => num,
85	None => pre_unit,
86	};
87
88	if let Ok(num) = num.parse::<f64>() {
89	return Some(num * scale);
90	};
91	}
92
93	None
94	}
95
96	mod util {
97	use super::*;
98
99	#[inline(always)]
100	pub fn rdtsc() -> u64 {
101	fence::compiler_fence();
102
103	// SAFETY: Reading the TSC is memory safe.
104	let tsc = unsafe { x86::_rdtsc() };
105
106	fence::compiler_fence();
107	tsc
108	}
109
110	#[inline(always)]
111	pub fn rdtscp() -> u64 {
112	fence::compiler_fence();
113
114	// SAFETY: Reading the TSC is memory safe.
115	let tsc = unsafe { x86::__rdtscp(&mut `0`) };
116
117	fence::compiler_fence();
118	tsc
119	}
120
121	#[inline(always)]
122	pub fn lfence() {
123	// SAFETY: A load fence is memory safe.
124	unsafe { x86::_mm_lfence() }
125	}
126
127	#[inline]
128	fn cpuid(leaf: u32) -> x86::CpuidResult {
129	// SAFETY: `cpuid` is never unsafe to call.
130	unsafe { x86::__cpuid(leaf) }
131	}
132
133	/// Invokes CPUID and converts its output registers to an ordered array.
134	#[inline]
135	fn cpuid_array(leaf: u32) -> [u32; `4`] {
136	let cpuid = cpuid(leaf);
137	[cpuid.eax, cpuid.ebx, cpuid.ecx, cpuid.edx]
138	}
139
140	/// Returns `true` if the given CPUID leaf is available.
141	#[inline]
142	fn cpuid_has_leaf(leaf: u32) -> bool {
143	cpuid(`0x8000_0000`).eax >= leaf
144	}
145
146	/// Returns `true` if CPUID indicates that the `rdtsc` and `rdtscp`
147	/// instructions are available.
148	#[inline]
149	pub fn tsc_is_available() -> bool {
150	let bits = cpuid(`0x8000_0001`).edx;
151
152	let rdtsc = `1` << `4`;
153	let rdtscp = `1` << `27`;
154
155	bits & (rdtsc \| rdtscp) != `0`
156	}
157
158	/// Returns `true` if CPUID indicates that the timestamp counter has a
159	/// constant frequency.
160	#[inline]
161	pub fn tsc_is_invariant() -> bool {
162	let leaf = `0x8000_0007`;
163
164	if !cpuid_has_leaf(leaf) {
165	return `false`;
166	}
167
168	cpuid(leaf).edx & (`1` << `8`) != `0`
169	}
170
171	/// Returns the processor model name as a null-terminated ASCII string.
172	pub fn cpu_name() -> Option<[u8; `48`]> {
173	if !cpuid_has_leaf(`0x8000_0004`) {
174	return None;
175	}
176
177	#[rustfmt::skip]
178	let result = [
179	cpuid_array(`0x8000_0002`),
180	cpuid_array(`0x8000_0003`),
181	cpuid_array(`0x8000_0004`),
182	];
183
184	// SAFETY: Converting from `u32` to bytes.
185	Some(unsafe { std::mem::transmute(result) })
186	}
187	}
188
189	mod measure {
190	use super::*;
191
192	/// Returns the TSC frequency by measuring it.
193	pub fn measure_frequency() -> f64 {
194	const TRIES: usize = `8`;
195
196	// Start with delay of 1ms up to 256ms (2^TRIES).
197	let mut delay_ms = `1`;
198
199	let mut prev_measure = f64::NEG_INFINITY;
200	let mut measures = [`0.0`; TRIES];
201
202	for slot in &mut measures {
203	let measure = measure_frequency_once(Duration::from_millis(delay_ms));
204
205	// This measurement is sufficiently accurate if within 0.1% of the
206	// previous.
207	if measure * `0.999` < prev_measure && prev_measure < measure * `1.001` {
208	return measure;
209	}
210
211	*slot = measure;
212	prev_measure = measure;
213
214	delay_ms *= `2`;
215	}
216
217	// If no frequencies were within 0.1% of each other, find the frequency
218	// with the smallest delta.
219	let mut min_delta = f64::INFINITY;
220	let mut result_index = `0`;
221
222	for i in `0`..TRIES {
223	for j in (i + `1`)..TRIES {
224	let delta = (measures[i] - measures[j]).abs();
225
226	if delta < min_delta {
227	min_delta = delta;
228	result_index = i;
229	}
230	}
231	}
232
233	measures[result_index]
234	}
235
236	fn measure_frequency_once(delay: Duration) -> f64 {
237	let (start_tsc, start_instant) = tsc_instant_pair();
238	std::thread::sleep(delay);
239	let (end_tsc, end_instant) = tsc_instant_pair();
240
241	let elapsed_tsc = end_tsc.saturating_sub(start_tsc);
242	let elapsed_duration = end_instant.duration_since(start_instant);
243
244	(elapsed_tsc as f64 / elapsed_duration.as_nanos() as f64) * `1e9`
245	}
246
247	/// Returns a timestamp/instant pair that has a small latency between
248	/// getting the two values.
249	fn tsc_instant_pair() -> (u64, Instant) {
250	let mut best_latency = Duration::MAX;
251	let mut best_pair = (`0`, Instant::now());
252
253	// Make up to 100 attempts to get a low latency pair.
254	for _ in `0`..`100` {
255	let instant = Instant::now();
256	let tsc = util::rdtsc();
257	let latency = instant.elapsed();
258
259	let pair = (tsc, instant);
260
261	if latency.is_zero() {
262	return pair;
263	}
264
265	if latency < best_latency {
266	best_latency = latency;
267	best_pair = pair;
268	}
269	}
270
271	best_pair
272	}
273	}
274