| 1 | #[cfg (target_arch = "x86" )] |
| 2 | use std::arch::x86; |
| 3 | |
| 4 | #[cfg (target_arch = "x86_64" )] |
| 5 | use std::arch::x86_64 as x86; |
| 6 | |
| 7 | use std::time::{Duration, Instant}; |
| 8 | |
| 9 | use crate::time::{fence, TscUnavailable}; |
| 10 | |
| 11 | #[inline (always)] |
| 12 | pub(crate) fn start_timestamp() -> u64 { |
| 13 | // Serialize previous operations before `rdtsc` to ensure they are not |
| 14 | // inside the timed section. |
| 15 | util::lfence(); |
| 16 | |
| 17 | let tsc: u64 = util::rdtsc(); |
| 18 | |
| 19 | // Serialize `rdtsc` before any measured code. |
| 20 | util::lfence(); |
| 21 | |
| 22 | tsc |
| 23 | } |
| 24 | |
| 25 | #[inline (always)] |
| 26 | pub(crate) fn end_timestamp() -> u64 { |
| 27 | // `rdtscp` is serialized after all previous operations. |
| 28 | let tsc: u64 = util::rdtscp(); |
| 29 | |
| 30 | // Serialize `rdtscp` before any subsequent code. |
| 31 | util::lfence(); |
| 32 | |
| 33 | tsc |
| 34 | } |
| 35 | |
| 36 | pub(crate) fn frequency() -> Result<u64, TscUnavailable> { |
| 37 | if !util::tsc_is_available() { |
| 38 | return Err(TscUnavailable::MissingInstructions); |
| 39 | } |
| 40 | |
| 41 | if !util::tsc_is_invariant() { |
| 42 | return Err(TscUnavailable::VariableFrequency); |
| 43 | } |
| 44 | |
| 45 | let nominal = nominal_frequency(); |
| 46 | let measured = measure::measure_frequency(); |
| 47 | |
| 48 | // Use the nominal frequency if within 0.1% of the measured frequency. |
| 49 | // |
| 50 | // The nominal frequency is used for getting an exact value if the measured |
| 51 | // frequency is slightly off. It is not blindly trusted because it may not |
| 52 | // match the TSC frequency. |
| 53 | if let Some(nominal) = nominal { |
| 54 | if measured * 0.999 < nominal && nominal < measured * 1.001 { |
| 55 | return Ok(nominal.round() as u64); |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | Ok(measured.round() as u64) |
| 60 | } |
| 61 | |
| 62 | /// Parses the CPU frequency in the brand name, e.g. "2.50GHz". |
| 63 | fn nominal_frequency() -> Option<f64> { |
| 64 | let name = util::cpu_name()?; |
| 65 | let name = { |
| 66 | let len = name.iter().position(|&ch| ch == 0).unwrap_or(name.len()); |
| 67 | std::str::from_utf8(&name[..len]).ok()? |
| 68 | }; |
| 69 | |
| 70 | #[rustfmt::skip] |
| 71 | let frequencies = [ |
| 72 | ("MHz" , 1e6), |
| 73 | ("GHz" , 1e9), |
| 74 | ("THz" , 1e12), |
| 75 | ]; |
| 76 | |
| 77 | for (unit, scale) in frequencies { |
| 78 | let Some(unit_start) = name.find(unit) else { |
| 79 | continue; |
| 80 | }; |
| 81 | |
| 82 | let pre_unit = &name[..unit_start]; |
| 83 | let num = match pre_unit.rsplit_once(' ' ) { |
| 84 | Some((_, num)) => num, |
| 85 | None => pre_unit, |
| 86 | }; |
| 87 | |
| 88 | if let Ok(num) = num.parse::<f64>() { |
| 89 | return Some(num * scale); |
| 90 | }; |
| 91 | } |
| 92 | |
| 93 | None |
| 94 | } |
| 95 | |
| 96 | mod util { |
| 97 | use super::*; |
| 98 | |
| 99 | #[inline (always)] |
| 100 | pub fn rdtsc() -> u64 { |
| 101 | fence::compiler_fence(); |
| 102 | |
| 103 | // SAFETY: Reading the TSC is memory safe. |
| 104 | let tsc = unsafe { x86::_rdtsc() }; |
| 105 | |
| 106 | fence::compiler_fence(); |
| 107 | tsc |
| 108 | } |
| 109 | |
| 110 | #[inline (always)] |
| 111 | pub fn rdtscp() -> u64 { |
| 112 | fence::compiler_fence(); |
| 113 | |
| 114 | // SAFETY: Reading the TSC is memory safe. |
| 115 | let tsc = unsafe { x86::__rdtscp(&mut 0) }; |
| 116 | |
| 117 | fence::compiler_fence(); |
| 118 | tsc |
| 119 | } |
| 120 | |
| 121 | #[inline (always)] |
| 122 | pub fn lfence() { |
| 123 | // SAFETY: A load fence is memory safe. |
| 124 | unsafe { x86::_mm_lfence() } |
| 125 | } |
| 126 | |
| 127 | #[inline ] |
| 128 | fn cpuid(leaf: u32) -> x86::CpuidResult { |
| 129 | // SAFETY: `cpuid` is never unsafe to call. |
| 130 | unsafe { x86::__cpuid(leaf) } |
| 131 | } |
| 132 | |
| 133 | /// Invokes CPUID and converts its output registers to an ordered array. |
| 134 | #[inline ] |
| 135 | fn cpuid_array(leaf: u32) -> [u32; 4] { |
| 136 | let cpuid = cpuid(leaf); |
| 137 | [cpuid.eax, cpuid.ebx, cpuid.ecx, cpuid.edx] |
| 138 | } |
| 139 | |
| 140 | /// Returns `true` if the given CPUID leaf is available. |
| 141 | #[inline ] |
| 142 | fn cpuid_has_leaf(leaf: u32) -> bool { |
| 143 | cpuid(0x8000_0000).eax >= leaf |
| 144 | } |
| 145 | |
| 146 | /// Returns `true` if CPUID indicates that the `rdtsc` and `rdtscp` |
| 147 | /// instructions are available. |
| 148 | #[inline ] |
| 149 | pub fn tsc_is_available() -> bool { |
| 150 | let bits = cpuid(0x8000_0001).edx; |
| 151 | |
| 152 | let rdtsc = 1 << 4; |
| 153 | let rdtscp = 1 << 27; |
| 154 | |
| 155 | bits & (rdtsc | rdtscp) != 0 |
| 156 | } |
| 157 | |
| 158 | /// Returns `true` if CPUID indicates that the timestamp counter has a |
| 159 | /// constant frequency. |
| 160 | #[inline ] |
| 161 | pub fn tsc_is_invariant() -> bool { |
| 162 | let leaf = 0x8000_0007; |
| 163 | |
| 164 | if !cpuid_has_leaf(leaf) { |
| 165 | return false; |
| 166 | } |
| 167 | |
| 168 | cpuid(leaf).edx & (1 << 8) != 0 |
| 169 | } |
| 170 | |
| 171 | /// Returns the processor model name as a null-terminated ASCII string. |
| 172 | pub fn cpu_name() -> Option<[u8; 48]> { |
| 173 | if !cpuid_has_leaf(0x8000_0004) { |
| 174 | return None; |
| 175 | } |
| 176 | |
| 177 | #[rustfmt::skip] |
| 178 | let result = [ |
| 179 | cpuid_array(0x8000_0002), |
| 180 | cpuid_array(0x8000_0003), |
| 181 | cpuid_array(0x8000_0004), |
| 182 | ]; |
| 183 | |
| 184 | // SAFETY: Converting from `u32` to bytes. |
| 185 | Some(unsafe { std::mem::transmute(result) }) |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | mod measure { |
| 190 | use super::*; |
| 191 | |
| 192 | /// Returns the TSC frequency by measuring it. |
| 193 | pub fn measure_frequency() -> f64 { |
| 194 | const TRIES: usize = 8; |
| 195 | |
| 196 | // Start with delay of 1ms up to 256ms (2^TRIES). |
| 197 | let mut delay_ms = 1; |
| 198 | |
| 199 | let mut prev_measure = f64::NEG_INFINITY; |
| 200 | let mut measures = [0.0; TRIES]; |
| 201 | |
| 202 | for slot in &mut measures { |
| 203 | let measure = measure_frequency_once(Duration::from_millis(delay_ms)); |
| 204 | |
| 205 | // This measurement is sufficiently accurate if within 0.1% of the |
| 206 | // previous. |
| 207 | if measure * 0.999 < prev_measure && prev_measure < measure * 1.001 { |
| 208 | return measure; |
| 209 | } |
| 210 | |
| 211 | *slot = measure; |
| 212 | prev_measure = measure; |
| 213 | |
| 214 | delay_ms *= 2; |
| 215 | } |
| 216 | |
| 217 | // If no frequencies were within 0.1% of each other, find the frequency |
| 218 | // with the smallest delta. |
| 219 | let mut min_delta = f64::INFINITY; |
| 220 | let mut result_index = 0; |
| 221 | |
| 222 | for i in 0..TRIES { |
| 223 | for j in (i + 1)..TRIES { |
| 224 | let delta = (measures[i] - measures[j]).abs(); |
| 225 | |
| 226 | if delta < min_delta { |
| 227 | min_delta = delta; |
| 228 | result_index = i; |
| 229 | } |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | measures[result_index] |
| 234 | } |
| 235 | |
| 236 | fn measure_frequency_once(delay: Duration) -> f64 { |
| 237 | let (start_tsc, start_instant) = tsc_instant_pair(); |
| 238 | std::thread::sleep(delay); |
| 239 | let (end_tsc, end_instant) = tsc_instant_pair(); |
| 240 | |
| 241 | let elapsed_tsc = end_tsc.saturating_sub(start_tsc); |
| 242 | let elapsed_duration = end_instant.duration_since(start_instant); |
| 243 | |
| 244 | (elapsed_tsc as f64 / elapsed_duration.as_nanos() as f64) * 1e9 |
| 245 | } |
| 246 | |
| 247 | /// Returns a timestamp/instant pair that has a small latency between |
| 248 | /// getting the two values. |
| 249 | fn tsc_instant_pair() -> (u64, Instant) { |
| 250 | let mut best_latency = Duration::MAX; |
| 251 | let mut best_pair = (0, Instant::now()); |
| 252 | |
| 253 | // Make up to 100 attempts to get a low latency pair. |
| 254 | for _ in 0..100 { |
| 255 | let instant = Instant::now(); |
| 256 | let tsc = util::rdtsc(); |
| 257 | let latency = instant.elapsed(); |
| 258 | |
| 259 | let pair = (tsc, instant); |
| 260 | |
| 261 | if latency.is_zero() { |
| 262 | return pair; |
| 263 | } |
| 264 | |
| 265 | if latency < best_latency { |
| 266 | best_latency = latency; |
| 267 | best_pair = pair; |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | best_pair |
| 272 | } |
| 273 | } |
| 274 | |