1#[cfg(target_arch = "x86")]
2use std::arch::x86;
3
4#[cfg(target_arch = "x86_64")]
5use std::arch::x86_64 as x86;
6
7use std::time::{Duration, Instant};
8
9use crate::time::{fence, TscUnavailable};
10
11#[inline(always)]
12pub(crate) fn start_timestamp() -> u64 {
13 // Serialize previous operations before `rdtsc` to ensure they are not
14 // inside the timed section.
15 util::lfence();
16
17 let tsc: u64 = util::rdtsc();
18
19 // Serialize `rdtsc` before any measured code.
20 util::lfence();
21
22 tsc
23}
24
25#[inline(always)]
26pub(crate) fn end_timestamp() -> u64 {
27 // `rdtscp` is serialized after all previous operations.
28 let tsc: u64 = util::rdtscp();
29
30 // Serialize `rdtscp` before any subsequent code.
31 util::lfence();
32
33 tsc
34}
35
36pub(crate) fn frequency() -> Result<u64, TscUnavailable> {
37 if !util::tsc_is_available() {
38 return Err(TscUnavailable::MissingInstructions);
39 }
40
41 if !util::tsc_is_invariant() {
42 return Err(TscUnavailable::VariableFrequency);
43 }
44
45 let nominal = nominal_frequency();
46 let measured = measure::measure_frequency();
47
48 // Use the nominal frequency if within 0.1% of the measured frequency.
49 //
50 // The nominal frequency is used for getting an exact value if the measured
51 // frequency is slightly off. It is not blindly trusted because it may not
52 // match the TSC frequency.
53 if let Some(nominal) = nominal {
54 if measured * 0.999 < nominal && nominal < measured * 1.001 {
55 return Ok(nominal.round() as u64);
56 }
57 }
58
59 Ok(measured.round() as u64)
60}
61
62/// Parses the CPU frequency in the brand name, e.g. "2.50GHz".
63fn nominal_frequency() -> Option<f64> {
64 let name = util::cpu_name()?;
65 let name = {
66 let len = name.iter().position(|&ch| ch == 0).unwrap_or(name.len());
67 std::str::from_utf8(&name[..len]).ok()?
68 };
69
70 #[rustfmt::skip]
71 let frequencies = [
72 ("MHz", 1e6),
73 ("GHz", 1e9),
74 ("THz", 1e12),
75 ];
76
77 for (unit, scale) in frequencies {
78 let Some(unit_start) = name.find(unit) else {
79 continue;
80 };
81
82 let pre_unit = &name[..unit_start];
83 let num = match pre_unit.rsplit_once(' ') {
84 Some((_, num)) => num,
85 None => pre_unit,
86 };
87
88 if let Ok(num) = num.parse::<f64>() {
89 return Some(num * scale);
90 };
91 }
92
93 None
94}
95
96mod util {
97 use super::*;
98
99 #[inline(always)]
100 pub fn rdtsc() -> u64 {
101 fence::compiler_fence();
102
103 // SAFETY: Reading the TSC is memory safe.
104 let tsc = unsafe { x86::_rdtsc() };
105
106 fence::compiler_fence();
107 tsc
108 }
109
110 #[inline(always)]
111 pub fn rdtscp() -> u64 {
112 fence::compiler_fence();
113
114 // SAFETY: Reading the TSC is memory safe.
115 let tsc = unsafe { x86::__rdtscp(&mut 0) };
116
117 fence::compiler_fence();
118 tsc
119 }
120
121 #[inline(always)]
122 pub fn lfence() {
123 // SAFETY: A load fence is memory safe.
124 unsafe { x86::_mm_lfence() }
125 }
126
127 #[inline]
128 fn cpuid(leaf: u32) -> x86::CpuidResult {
129 // SAFETY: `cpuid` is never unsafe to call.
130 unsafe { x86::__cpuid(leaf) }
131 }
132
133 /// Invokes CPUID and converts its output registers to an ordered array.
134 #[inline]
135 fn cpuid_array(leaf: u32) -> [u32; 4] {
136 let cpuid = cpuid(leaf);
137 [cpuid.eax, cpuid.ebx, cpuid.ecx, cpuid.edx]
138 }
139
140 /// Returns `true` if the given CPUID leaf is available.
141 #[inline]
142 fn cpuid_has_leaf(leaf: u32) -> bool {
143 cpuid(0x8000_0000).eax >= leaf
144 }
145
146 /// Returns `true` if CPUID indicates that the `rdtsc` and `rdtscp`
147 /// instructions are available.
148 #[inline]
149 pub fn tsc_is_available() -> bool {
150 let bits = cpuid(0x8000_0001).edx;
151
152 let rdtsc = 1 << 4;
153 let rdtscp = 1 << 27;
154
155 bits & (rdtsc | rdtscp) != 0
156 }
157
158 /// Returns `true` if CPUID indicates that the timestamp counter has a
159 /// constant frequency.
160 #[inline]
161 pub fn tsc_is_invariant() -> bool {
162 let leaf = 0x8000_0007;
163
164 if !cpuid_has_leaf(leaf) {
165 return false;
166 }
167
168 cpuid(leaf).edx & (1 << 8) != 0
169 }
170
171 /// Returns the processor model name as a null-terminated ASCII string.
172 pub fn cpu_name() -> Option<[u8; 48]> {
173 if !cpuid_has_leaf(0x8000_0004) {
174 return None;
175 }
176
177 #[rustfmt::skip]
178 let result = [
179 cpuid_array(0x8000_0002),
180 cpuid_array(0x8000_0003),
181 cpuid_array(0x8000_0004),
182 ];
183
184 // SAFETY: Converting from `u32` to bytes.
185 Some(unsafe { std::mem::transmute(result) })
186 }
187}
188
189mod measure {
190 use super::*;
191
192 /// Returns the TSC frequency by measuring it.
193 pub fn measure_frequency() -> f64 {
194 const TRIES: usize = 8;
195
196 // Start with delay of 1ms up to 256ms (2^TRIES).
197 let mut delay_ms = 1;
198
199 let mut prev_measure = f64::NEG_INFINITY;
200 let mut measures = [0.0; TRIES];
201
202 for slot in &mut measures {
203 let measure = measure_frequency_once(Duration::from_millis(delay_ms));
204
205 // This measurement is sufficiently accurate if within 0.1% of the
206 // previous.
207 if measure * 0.999 < prev_measure && prev_measure < measure * 1.001 {
208 return measure;
209 }
210
211 *slot = measure;
212 prev_measure = measure;
213
214 delay_ms *= 2;
215 }
216
217 // If no frequencies were within 0.1% of each other, find the frequency
218 // with the smallest delta.
219 let mut min_delta = f64::INFINITY;
220 let mut result_index = 0;
221
222 for i in 0..TRIES {
223 for j in (i + 1)..TRIES {
224 let delta = (measures[i] - measures[j]).abs();
225
226 if delta < min_delta {
227 min_delta = delta;
228 result_index = i;
229 }
230 }
231 }
232
233 measures[result_index]
234 }
235
236 fn measure_frequency_once(delay: Duration) -> f64 {
237 let (start_tsc, start_instant) = tsc_instant_pair();
238 std::thread::sleep(delay);
239 let (end_tsc, end_instant) = tsc_instant_pair();
240
241 let elapsed_tsc = end_tsc.saturating_sub(start_tsc);
242 let elapsed_duration = end_instant.duration_since(start_instant);
243
244 (elapsed_tsc as f64 / elapsed_duration.as_nanos() as f64) * 1e9
245 }
246
247 /// Returns a timestamp/instant pair that has a small latency between
248 /// getting the two values.
249 fn tsc_instant_pair() -> (u64, Instant) {
250 let mut best_latency = Duration::MAX;
251 let mut best_pair = (0, Instant::now());
252
253 // Make up to 100 attempts to get a low latency pair.
254 for _ in 0..100 {
255 let instant = Instant::now();
256 let tsc = util::rdtsc();
257 let latency = instant.elapsed();
258
259 let pair = (tsc, instant);
260
261 if latency.is_zero() {
262 return pair;
263 }
264
265 if latency < best_latency {
266 best_latency = latency;
267 best_pair = pair;
268 }
269 }
270
271 best_pair
272 }
273}
274