1 | #[cfg (target_arch = "x86" )] |
2 | use std::arch::x86; |
3 | |
4 | #[cfg (target_arch = "x86_64" )] |
5 | use std::arch::x86_64 as x86; |
6 | |
7 | use std::time::{Duration, Instant}; |
8 | |
9 | use crate::time::{fence, TscUnavailable}; |
10 | |
11 | #[inline (always)] |
12 | pub(crate) fn start_timestamp() -> u64 { |
13 | // Serialize previous operations before `rdtsc` to ensure they are not |
14 | // inside the timed section. |
15 | util::lfence(); |
16 | |
17 | let tsc: u64 = util::rdtsc(); |
18 | |
19 | // Serialize `rdtsc` before any measured code. |
20 | util::lfence(); |
21 | |
22 | tsc |
23 | } |
24 | |
25 | #[inline (always)] |
26 | pub(crate) fn end_timestamp() -> u64 { |
27 | // `rdtscp` is serialized after all previous operations. |
28 | let tsc: u64 = util::rdtscp(); |
29 | |
30 | // Serialize `rdtscp` before any subsequent code. |
31 | util::lfence(); |
32 | |
33 | tsc |
34 | } |
35 | |
36 | pub(crate) fn frequency() -> Result<u64, TscUnavailable> { |
37 | if !util::tsc_is_available() { |
38 | return Err(TscUnavailable::MissingInstructions); |
39 | } |
40 | |
41 | if !util::tsc_is_invariant() { |
42 | return Err(TscUnavailable::VariableFrequency); |
43 | } |
44 | |
45 | let nominal = nominal_frequency(); |
46 | let measured = measure::measure_frequency(); |
47 | |
48 | // Use the nominal frequency if within 0.1% of the measured frequency. |
49 | // |
50 | // The nominal frequency is used for getting an exact value if the measured |
51 | // frequency is slightly off. It is not blindly trusted because it may not |
52 | // match the TSC frequency. |
53 | if let Some(nominal) = nominal { |
54 | if measured * 0.999 < nominal && nominal < measured * 1.001 { |
55 | return Ok(nominal.round() as u64); |
56 | } |
57 | } |
58 | |
59 | Ok(measured.round() as u64) |
60 | } |
61 | |
62 | /// Parses the CPU frequency in the brand name, e.g. "2.50GHz". |
63 | fn nominal_frequency() -> Option<f64> { |
64 | let name = util::cpu_name()?; |
65 | let name = { |
66 | let len = name.iter().position(|&ch| ch == 0).unwrap_or(name.len()); |
67 | std::str::from_utf8(&name[..len]).ok()? |
68 | }; |
69 | |
70 | #[rustfmt::skip] |
71 | let frequencies = [ |
72 | ("MHz" , 1e6), |
73 | ("GHz" , 1e9), |
74 | ("THz" , 1e12), |
75 | ]; |
76 | |
77 | for (unit, scale) in frequencies { |
78 | let Some(unit_start) = name.find(unit) else { |
79 | continue; |
80 | }; |
81 | |
82 | let pre_unit = &name[..unit_start]; |
83 | let num = match pre_unit.rsplit_once(' ' ) { |
84 | Some((_, num)) => num, |
85 | None => pre_unit, |
86 | }; |
87 | |
88 | if let Ok(num) = num.parse::<f64>() { |
89 | return Some(num * scale); |
90 | }; |
91 | } |
92 | |
93 | None |
94 | } |
95 | |
96 | mod util { |
97 | use super::*; |
98 | |
99 | #[inline (always)] |
100 | pub fn rdtsc() -> u64 { |
101 | fence::compiler_fence(); |
102 | |
103 | // SAFETY: Reading the TSC is memory safe. |
104 | let tsc = unsafe { x86::_rdtsc() }; |
105 | |
106 | fence::compiler_fence(); |
107 | tsc |
108 | } |
109 | |
110 | #[inline (always)] |
111 | pub fn rdtscp() -> u64 { |
112 | fence::compiler_fence(); |
113 | |
114 | // SAFETY: Reading the TSC is memory safe. |
115 | let tsc = unsafe { x86::__rdtscp(&mut 0) }; |
116 | |
117 | fence::compiler_fence(); |
118 | tsc |
119 | } |
120 | |
121 | #[inline (always)] |
122 | pub fn lfence() { |
123 | // SAFETY: A load fence is memory safe. |
124 | unsafe { x86::_mm_lfence() } |
125 | } |
126 | |
127 | #[inline ] |
128 | fn cpuid(leaf: u32) -> x86::CpuidResult { |
129 | // SAFETY: `cpuid` is never unsafe to call. |
130 | unsafe { x86::__cpuid(leaf) } |
131 | } |
132 | |
133 | /// Invokes CPUID and converts its output registers to an ordered array. |
134 | #[inline ] |
135 | fn cpuid_array(leaf: u32) -> [u32; 4] { |
136 | let cpuid = cpuid(leaf); |
137 | [cpuid.eax, cpuid.ebx, cpuid.ecx, cpuid.edx] |
138 | } |
139 | |
140 | /// Returns `true` if the given CPUID leaf is available. |
141 | #[inline ] |
142 | fn cpuid_has_leaf(leaf: u32) -> bool { |
143 | cpuid(0x8000_0000).eax >= leaf |
144 | } |
145 | |
146 | /// Returns `true` if CPUID indicates that the `rdtsc` and `rdtscp` |
147 | /// instructions are available. |
148 | #[inline ] |
149 | pub fn tsc_is_available() -> bool { |
150 | let bits = cpuid(0x8000_0001).edx; |
151 | |
152 | let rdtsc = 1 << 4; |
153 | let rdtscp = 1 << 27; |
154 | |
155 | bits & (rdtsc | rdtscp) != 0 |
156 | } |
157 | |
158 | /// Returns `true` if CPUID indicates that the timestamp counter has a |
159 | /// constant frequency. |
160 | #[inline ] |
161 | pub fn tsc_is_invariant() -> bool { |
162 | let leaf = 0x8000_0007; |
163 | |
164 | if !cpuid_has_leaf(leaf) { |
165 | return false; |
166 | } |
167 | |
168 | cpuid(leaf).edx & (1 << 8) != 0 |
169 | } |
170 | |
171 | /// Returns the processor model name as a null-terminated ASCII string. |
172 | pub fn cpu_name() -> Option<[u8; 48]> { |
173 | if !cpuid_has_leaf(0x8000_0004) { |
174 | return None; |
175 | } |
176 | |
177 | #[rustfmt::skip] |
178 | let result = [ |
179 | cpuid_array(0x8000_0002), |
180 | cpuid_array(0x8000_0003), |
181 | cpuid_array(0x8000_0004), |
182 | ]; |
183 | |
184 | // SAFETY: Converting from `u32` to bytes. |
185 | Some(unsafe { std::mem::transmute(result) }) |
186 | } |
187 | } |
188 | |
189 | mod measure { |
190 | use super::*; |
191 | |
192 | /// Returns the TSC frequency by measuring it. |
193 | pub fn measure_frequency() -> f64 { |
194 | const TRIES: usize = 8; |
195 | |
196 | // Start with delay of 1ms up to 256ms (2^TRIES). |
197 | let mut delay_ms = 1; |
198 | |
199 | let mut prev_measure = f64::NEG_INFINITY; |
200 | let mut measures = [0.0; TRIES]; |
201 | |
202 | for slot in &mut measures { |
203 | let measure = measure_frequency_once(Duration::from_millis(delay_ms)); |
204 | |
205 | // This measurement is sufficiently accurate if within 0.1% of the |
206 | // previous. |
207 | if measure * 0.999 < prev_measure && prev_measure < measure * 1.001 { |
208 | return measure; |
209 | } |
210 | |
211 | *slot = measure; |
212 | prev_measure = measure; |
213 | |
214 | delay_ms *= 2; |
215 | } |
216 | |
217 | // If no frequencies were within 0.1% of each other, find the frequency |
218 | // with the smallest delta. |
219 | let mut min_delta = f64::INFINITY; |
220 | let mut result_index = 0; |
221 | |
222 | for i in 0..TRIES { |
223 | for j in (i + 1)..TRIES { |
224 | let delta = (measures[i] - measures[j]).abs(); |
225 | |
226 | if delta < min_delta { |
227 | min_delta = delta; |
228 | result_index = i; |
229 | } |
230 | } |
231 | } |
232 | |
233 | measures[result_index] |
234 | } |
235 | |
236 | fn measure_frequency_once(delay: Duration) -> f64 { |
237 | let (start_tsc, start_instant) = tsc_instant_pair(); |
238 | std::thread::sleep(delay); |
239 | let (end_tsc, end_instant) = tsc_instant_pair(); |
240 | |
241 | let elapsed_tsc = end_tsc.saturating_sub(start_tsc); |
242 | let elapsed_duration = end_instant.duration_since(start_instant); |
243 | |
244 | (elapsed_tsc as f64 / elapsed_duration.as_nanos() as f64) * 1e9 |
245 | } |
246 | |
247 | /// Returns a timestamp/instant pair that has a small latency between |
248 | /// getting the two values. |
249 | fn tsc_instant_pair() -> (u64, Instant) { |
250 | let mut best_latency = Duration::MAX; |
251 | let mut best_pair = (0, Instant::now()); |
252 | |
253 | // Make up to 100 attempts to get a low latency pair. |
254 | for _ in 0..100 { |
255 | let instant = Instant::now(); |
256 | let tsc = util::rdtsc(); |
257 | let latency = instant.elapsed(); |
258 | |
259 | let pair = (tsc, instant); |
260 | |
261 | if latency.is_zero() { |
262 | return pair; |
263 | } |
264 | |
265 | if latency < best_latency { |
266 | best_latency = latency; |
267 | best_pair = pair; |
268 | } |
269 | } |
270 | |
271 | best_pair |
272 | } |
273 | } |
274 | |