| 1 | #![stable (feature = "core_hint" , since = "1.27.0" )] |
| 2 | |
| 3 | //! Hints to compiler that affects how code should be emitted or optimized. |
| 4 | //! |
| 5 | //! Hints may be compile time or runtime. |
| 6 | |
| 7 | use crate::marker::Destruct; |
| 8 | use crate::mem::MaybeUninit; |
| 9 | use crate::{intrinsics, ub_checks}; |
| 10 | |
| 11 | /// Informs the compiler that the site which is calling this function is not |
| 12 | /// reachable, possibly enabling further optimizations. |
| 13 | /// |
| 14 | /// # Safety |
| 15 | /// |
| 16 | /// Reaching this function is *Undefined Behavior*. |
| 17 | /// |
| 18 | /// As the compiler assumes that all forms of Undefined Behavior can never |
| 19 | /// happen, it will eliminate all branches in the surrounding code that it can |
| 20 | /// determine will invariably lead to a call to `unreachable_unchecked()`. |
| 21 | /// |
| 22 | /// If the assumptions embedded in using this function turn out to be wrong - |
| 23 | /// that is, if the site which is calling `unreachable_unchecked()` is actually |
| 24 | /// reachable at runtime - the compiler may have generated nonsensical machine |
| 25 | /// instructions for this situation, including in seemingly unrelated code, |
| 26 | /// causing difficult-to-debug problems. |
| 27 | /// |
| 28 | /// Use this function sparingly. Consider using the [`unreachable!`] macro, |
| 29 | /// which may prevent some optimizations but will safely panic in case it is |
| 30 | /// actually reached at runtime. Benchmark your code to find out if using |
| 31 | /// `unreachable_unchecked()` comes with a performance benefit. |
| 32 | /// |
| 33 | /// # Examples |
| 34 | /// |
| 35 | /// `unreachable_unchecked()` can be used in situations where the compiler |
| 36 | /// can't prove invariants that were previously established. Such situations |
| 37 | /// have a higher chance of occurring if those invariants are upheld by |
| 38 | /// external code that the compiler can't analyze. |
| 39 | /// ``` |
| 40 | /// fn prepare_inputs(divisors: &mut Vec<u32>) { |
| 41 | /// // Note to future-self when making changes: The invariant established |
| 42 | /// // here is NOT checked in `do_computation()`; if this changes, you HAVE |
| 43 | /// // to change `do_computation()`. |
| 44 | /// divisors.retain(|divisor| *divisor != 0) |
| 45 | /// } |
| 46 | /// |
| 47 | /// /// # Safety |
| 48 | /// /// All elements of `divisor` must be non-zero. |
| 49 | /// unsafe fn do_computation(i: u32, divisors: &[u32]) -> u32 { |
| 50 | /// divisors.iter().fold(i, |acc, divisor| { |
| 51 | /// // Convince the compiler that a division by zero can't happen here |
| 52 | /// // and a check is not needed below. |
| 53 | /// if *divisor == 0 { |
| 54 | /// // Safety: `divisor` can't be zero because of `prepare_inputs`, |
| 55 | /// // but the compiler does not know about this. We *promise* |
| 56 | /// // that we always call `prepare_inputs`. |
| 57 | /// unsafe { std::hint::unreachable_unchecked() } |
| 58 | /// } |
| 59 | /// // The compiler would normally introduce a check here that prevents |
| 60 | /// // a division by zero. However, if `divisor` was zero, the branch |
| 61 | /// // above would reach what we explicitly marked as unreachable. |
| 62 | /// // The compiler concludes that `divisor` can't be zero at this point |
| 63 | /// // and removes the - now proven useless - check. |
| 64 | /// acc / divisor |
| 65 | /// }) |
| 66 | /// } |
| 67 | /// |
| 68 | /// let mut divisors = vec![2, 0, 4]; |
| 69 | /// prepare_inputs(&mut divisors); |
| 70 | /// let result = unsafe { |
| 71 | /// // Safety: prepare_inputs() guarantees that divisors is non-zero |
| 72 | /// do_computation(100, &divisors) |
| 73 | /// }; |
| 74 | /// assert_eq!(result, 12); |
| 75 | /// |
| 76 | /// ``` |
| 77 | /// |
| 78 | /// While using `unreachable_unchecked()` is perfectly sound in the following |
| 79 | /// example, as the compiler is able to prove that a division by zero is not |
| 80 | /// possible, benchmarking reveals that `unreachable_unchecked()` provides |
| 81 | /// no benefit over using [`unreachable!`], while the latter does not introduce |
| 82 | /// the possibility of Undefined Behavior. |
| 83 | /// |
| 84 | /// ``` |
| 85 | /// fn div_1(a: u32, b: u32) -> u32 { |
| 86 | /// use std::hint::unreachable_unchecked; |
| 87 | /// |
| 88 | /// // `b.saturating_add(1)` is always positive (not zero), |
| 89 | /// // hence `checked_div` will never return `None`. |
| 90 | /// // Therefore, the else branch is unreachable. |
| 91 | /// a.checked_div(b.saturating_add(1)) |
| 92 | /// .unwrap_or_else(|| unsafe { unreachable_unchecked() }) |
| 93 | /// } |
| 94 | /// |
| 95 | /// assert_eq!(div_1(7, 0), 7); |
| 96 | /// assert_eq!(div_1(9, 1), 4); |
| 97 | /// assert_eq!(div_1(11, u32::MAX), 0); |
| 98 | /// ``` |
| 99 | #[inline ] |
| 100 | #[stable (feature = "unreachable" , since = "1.27.0" )] |
| 101 | #[rustc_const_stable (feature = "const_unreachable_unchecked" , since = "1.57.0" )] |
| 102 | #[track_caller ] |
| 103 | pub const unsafe fn unreachable_unchecked() -> ! { |
| 104 | ub_checks::assert_unsafe_precondition!( |
| 105 | check_language_ub, |
| 106 | "hint::unreachable_unchecked must never be reached" , |
| 107 | () => false |
| 108 | ); |
| 109 | // SAFETY: the safety contract for `intrinsics::unreachable` must |
| 110 | // be upheld by the caller. |
| 111 | unsafe { intrinsics::unreachable() } |
| 112 | } |
| 113 | |
| 114 | /// Makes a *soundness* promise to the compiler that `cond` holds. |
| 115 | /// |
| 116 | /// This may allow the optimizer to simplify things, but it might also make the generated code |
| 117 | /// slower. Either way, calling it will most likely make compilation take longer. |
| 118 | /// |
| 119 | /// You may know this from other places as |
| 120 | /// [`llvm.assume`](https://llvm.org/docs/LangRef.html#llvm-assume-intrinsic) or, in C, |
| 121 | /// [`__builtin_assume`](https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume). |
| 122 | /// |
| 123 | /// This promotes a correctness requirement to a soundness requirement. Don't do that without |
| 124 | /// very good reason. |
| 125 | /// |
| 126 | /// # Usage |
| 127 | /// |
| 128 | /// This is a situational tool for micro-optimization, and is allowed to do nothing. Any use |
| 129 | /// should come with a repeatable benchmark to show the value, with the expectation to drop it |
| 130 | /// later should the optimizer get smarter and no longer need it. |
| 131 | /// |
| 132 | /// The more complicated the condition, the less likely this is to be useful. For example, |
| 133 | /// `assert_unchecked(foo.is_sorted())` is a complex enough value that the compiler is unlikely |
| 134 | /// to be able to take advantage of it. |
| 135 | /// |
| 136 | /// There's also no need to `assert_unchecked` basic properties of things. For example, the |
| 137 | /// compiler already knows the range of `count_ones`, so there is no benefit to |
| 138 | /// `let n = u32::count_ones(x); assert_unchecked(n <= u32::BITS);`. |
| 139 | /// |
| 140 | /// `assert_unchecked` is logically equivalent to `if !cond { unreachable_unchecked(); }`. If |
| 141 | /// ever you are tempted to write `assert_unchecked(false)`, you should instead use |
| 142 | /// [`unreachable_unchecked()`] directly. |
| 143 | /// |
| 144 | /// # Safety |
| 145 | /// |
| 146 | /// `cond` must be `true`. It is immediate UB to call this with `false`. |
| 147 | /// |
| 148 | /// # Example |
| 149 | /// |
| 150 | /// ``` |
| 151 | /// use core::hint; |
| 152 | /// |
| 153 | /// /// # Safety |
| 154 | /// /// |
| 155 | /// /// `p` must be nonnull and valid |
| 156 | /// pub unsafe fn next_value(p: *const i32) -> i32 { |
| 157 | /// // SAFETY: caller invariants guarantee that `p` is not null |
| 158 | /// unsafe { hint::assert_unchecked(!p.is_null()) } |
| 159 | /// |
| 160 | /// if p.is_null() { |
| 161 | /// return -1; |
| 162 | /// } else { |
| 163 | /// // SAFETY: caller invariants guarantee that `p` is valid |
| 164 | /// unsafe { *p + 1 } |
| 165 | /// } |
| 166 | /// } |
| 167 | /// ``` |
| 168 | /// |
| 169 | /// Without the `assert_unchecked`, the above function produces the following with optimizations |
| 170 | /// enabled: |
| 171 | /// |
| 172 | /// ```asm |
| 173 | /// next_value: |
| 174 | /// test rdi, rdi |
| 175 | /// je .LBB0_1 |
| 176 | /// mov eax, dword ptr [rdi] |
| 177 | /// inc eax |
| 178 | /// ret |
| 179 | /// .LBB0_1: |
| 180 | /// mov eax, -1 |
| 181 | /// ret |
| 182 | /// ``` |
| 183 | /// |
| 184 | /// Adding the assertion allows the optimizer to remove the extra check: |
| 185 | /// |
| 186 | /// ```asm |
| 187 | /// next_value: |
| 188 | /// mov eax, dword ptr [rdi] |
| 189 | /// inc eax |
| 190 | /// ret |
| 191 | /// ``` |
| 192 | /// |
| 193 | /// This example is quite unlike anything that would be used in the real world: it is redundant |
| 194 | /// to put an assertion right next to code that checks the same thing, and dereferencing a |
| 195 | /// pointer already has the builtin assumption that it is nonnull. However, it illustrates the |
| 196 | /// kind of changes the optimizer can make even when the behavior is less obviously related. |
| 197 | #[track_caller ] |
| 198 | #[inline (always)] |
| 199 | #[doc (alias = "assume" )] |
| 200 | #[stable (feature = "hint_assert_unchecked" , since = "1.81.0" )] |
| 201 | #[rustc_const_stable (feature = "hint_assert_unchecked" , since = "1.81.0" )] |
| 202 | pub const unsafe fn assert_unchecked(cond: bool) { |
| 203 | // SAFETY: The caller promised `cond` is true. |
| 204 | unsafe { |
| 205 | ub_checks::assert_unsafe_precondition!( |
| 206 | check_language_ub, |
| 207 | "hint::assert_unchecked must never be called when the condition is false" , |
| 208 | (cond: bool = cond) => cond, |
| 209 | ); |
| 210 | crate::intrinsics::assume(cond); |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | /// Emits a machine instruction to signal the processor that it is running in |
| 215 | /// a busy-wait spin-loop ("spin lock"). |
| 216 | /// |
| 217 | /// Upon receiving the spin-loop signal the processor can optimize its behavior by, |
| 218 | /// for example, saving power or switching hyper-threads. |
| 219 | /// |
| 220 | /// This function is different from [`thread::yield_now`] which directly |
| 221 | /// yields to the system's scheduler, whereas `spin_loop` does not interact |
| 222 | /// with the operating system. |
| 223 | /// |
| 224 | /// A common use case for `spin_loop` is implementing bounded optimistic |
| 225 | /// spinning in a CAS loop in synchronization primitives. To avoid problems |
| 226 | /// like priority inversion, it is strongly recommended that the spin loop is |
| 227 | /// terminated after a finite amount of iterations and an appropriate blocking |
| 228 | /// syscall is made. |
| 229 | /// |
| 230 | /// **Note**: On platforms that do not support receiving spin-loop hints this |
| 231 | /// function does not do anything at all. |
| 232 | /// |
| 233 | /// # Examples |
| 234 | /// |
| 235 | /// ```ignore-wasm |
| 236 | /// use std::sync::atomic::{AtomicBool, Ordering}; |
| 237 | /// use std::sync::Arc; |
| 238 | /// use std::{hint, thread}; |
| 239 | /// |
| 240 | /// // A shared atomic value that threads will use to coordinate |
| 241 | /// let live = Arc::new(AtomicBool::new(false)); |
| 242 | /// |
| 243 | /// // In a background thread we'll eventually set the value |
| 244 | /// let bg_work = { |
| 245 | /// let live = live.clone(); |
| 246 | /// thread::spawn(move || { |
| 247 | /// // Do some work, then make the value live |
| 248 | /// do_some_work(); |
| 249 | /// live.store(true, Ordering::Release); |
| 250 | /// }) |
| 251 | /// }; |
| 252 | /// |
| 253 | /// // Back on our current thread, we wait for the value to be set |
| 254 | /// while !live.load(Ordering::Acquire) { |
| 255 | /// // The spin loop is a hint to the CPU that we're waiting, but probably |
| 256 | /// // not for very long |
| 257 | /// hint::spin_loop(); |
| 258 | /// } |
| 259 | /// |
| 260 | /// // The value is now set |
| 261 | /// # fn do_some_work() {} |
| 262 | /// do_some_work(); |
| 263 | /// bg_work.join()?; |
| 264 | /// # Ok::<(), Box<dyn core::any::Any + Send + 'static>>(()) |
| 265 | /// ``` |
| 266 | /// |
| 267 | /// [`thread::yield_now`]: ../../std/thread/fn.yield_now.html |
| 268 | #[inline (always)] |
| 269 | #[stable (feature = "renamed_spin_loop" , since = "1.49.0" )] |
| 270 | pub fn spin_loop() { |
| 271 | crate::cfg_select! { |
| 272 | miri => { |
| 273 | unsafe extern "Rust" { |
| 274 | safe fn miri_spin_loop(); |
| 275 | } |
| 276 | |
| 277 | // Miri does support some of the intrinsics that are called below, but to guarantee |
| 278 | // consistent behavior across targets, this custom function is used. |
| 279 | miri_spin_loop(); |
| 280 | } |
| 281 | target_arch = "x86" => { |
| 282 | // SAFETY: the `cfg` attr ensures that we only execute this on x86 targets. |
| 283 | crate::arch::x86::_mm_pause() |
| 284 | } |
| 285 | target_arch = "x86_64" => { |
| 286 | // SAFETY: the `cfg` attr ensures that we only execute this on x86_64 targets. |
| 287 | crate::arch::x86_64::_mm_pause() |
| 288 | } |
| 289 | target_arch = "riscv32" => crate::arch::riscv32::pause(), |
| 290 | target_arch = "riscv64" => crate::arch::riscv64::pause(), |
| 291 | any(target_arch = "aarch64" , target_arch = "arm64ec" ) => { |
| 292 | // SAFETY: the `cfg` attr ensures that we only execute this on aarch64 targets. |
| 293 | unsafe { crate::arch::aarch64::__isb(crate::arch::aarch64::SY) } |
| 294 | } |
| 295 | all( |
| 296 | target_arch = "arm" , |
| 297 | any( |
| 298 | all(target_feature = "v6k" , not(target_feature = "thumb-mode" )), |
| 299 | target_feature = "v6t2" , |
| 300 | all(target_feature = "v6" , target_feature = "mclass" ), |
| 301 | ) |
| 302 | ) => { |
| 303 | // SAFETY: the `cfg` attr ensures that we only execute this on arm |
| 304 | // targets with support for the this feature. On ARMv6 in Thumb |
| 305 | // mode, T2 is required (see Arm DDI0406C Section A8.8.427), |
| 306 | // otherwise ARMv6-M or ARMv6K is enough |
| 307 | unsafe { crate::arch::arm::__yield() } |
| 308 | } |
| 309 | target_arch = "loongarch32" => crate::arch::loongarch32::ibar::<0>(), |
| 310 | target_arch = "loongarch64" => crate::arch::loongarch64::ibar::<0>(), |
| 311 | _ => { /* do nothing */ } |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | /// An identity function that *__hints__* to the compiler to be maximally pessimistic about what |
| 316 | /// `black_box` could do. |
| 317 | /// |
| 318 | /// Unlike [`std::convert::identity`], a Rust compiler is encouraged to assume that `black_box` can |
| 319 | /// use `dummy` in any possible valid way that Rust code is allowed to without introducing undefined |
| 320 | /// behavior in the calling code. This property makes `black_box` useful for writing code in which |
| 321 | /// certain optimizations are not desired, such as benchmarks. |
| 322 | /// |
| 323 | /// <div class="warning"> |
| 324 | /// |
| 325 | /// Note however, that `black_box` is only (and can only be) provided on a "best-effort" basis. The |
| 326 | /// extent to which it can block optimisations may vary depending upon the platform and code-gen |
| 327 | /// backend used. Programs cannot rely on `black_box` for *correctness*, beyond it behaving as the |
| 328 | /// identity function. As such, it **must not be relied upon to control critical program behavior.** |
| 329 | /// This also means that this function does not offer any guarantees for cryptographic or security |
| 330 | /// purposes. |
| 331 | /// |
| 332 | /// This limitation is not specific to `black_box`; there is no mechanism in the entire Rust |
| 333 | /// language that can provide the guarantees required for constant-time cryptography. |
| 334 | /// (There is also no such mechanism in LLVM, so the same is true for every other LLVM-based compiler.) |
| 335 | /// |
| 336 | /// </div> |
| 337 | /// |
| 338 | /// [`std::convert::identity`]: crate::convert::identity |
| 339 | /// |
| 340 | /// # When is this useful? |
| 341 | /// |
| 342 | /// While not suitable in those mission-critical cases, `black_box`'s functionality can generally be |
| 343 | /// relied upon for benchmarking, and should be used there. It will try to ensure that the |
| 344 | /// compiler doesn't optimize away part of the intended test code based on context. For |
| 345 | /// example: |
| 346 | /// |
| 347 | /// ``` |
| 348 | /// fn contains(haystack: &[&str], needle: &str) -> bool { |
| 349 | /// haystack.iter().any(|x| x == &needle) |
| 350 | /// } |
| 351 | /// |
| 352 | /// pub fn benchmark() { |
| 353 | /// let haystack = vec!["abc" , "def" , "ghi" , "jkl" , "mno" ]; |
| 354 | /// let needle = "ghi" ; |
| 355 | /// for _ in 0..10 { |
| 356 | /// contains(&haystack, needle); |
| 357 | /// } |
| 358 | /// } |
| 359 | /// ``` |
| 360 | /// |
| 361 | /// The compiler could theoretically make optimizations like the following: |
| 362 | /// |
| 363 | /// - The `needle` and `haystack` do not change, move the call to `contains` outside the loop and |
| 364 | /// delete the loop |
| 365 | /// - Inline `contains` |
| 366 | /// - `needle` and `haystack` have values known at compile time, `contains` is always true. Remove |
| 367 | /// the call and replace with `true` |
| 368 | /// - Nothing is done with the result of `contains`: delete this function call entirely |
| 369 | /// - `benchmark` now has no purpose: delete this function |
| 370 | /// |
| 371 | /// It is not likely that all of the above happens, but the compiler is definitely able to make some |
| 372 | /// optimizations that could result in a very inaccurate benchmark. This is where `black_box` comes |
| 373 | /// in: |
| 374 | /// |
| 375 | /// ``` |
| 376 | /// use std::hint::black_box; |
| 377 | /// |
| 378 | /// // Same `contains` function. |
| 379 | /// fn contains(haystack: &[&str], needle: &str) -> bool { |
| 380 | /// haystack.iter().any(|x| x == &needle) |
| 381 | /// } |
| 382 | /// |
| 383 | /// pub fn benchmark() { |
| 384 | /// let haystack = vec!["abc" , "def" , "ghi" , "jkl" , "mno" ]; |
| 385 | /// let needle = "ghi" ; |
| 386 | /// for _ in 0..10 { |
| 387 | /// // Force the compiler to run `contains`, even though it is a pure function whose |
| 388 | /// // results are unused. |
| 389 | /// black_box(contains( |
| 390 | /// // Prevent the compiler from making assumptions about the input. |
| 391 | /// black_box(&haystack), |
| 392 | /// black_box(needle), |
| 393 | /// )); |
| 394 | /// } |
| 395 | /// } |
| 396 | /// ``` |
| 397 | /// |
| 398 | /// This essentially tells the compiler to block optimizations across any calls to `black_box`. So, |
| 399 | /// it now: |
| 400 | /// |
| 401 | /// - Treats both arguments to `contains` as unpredictable: the body of `contains` can no longer be |
| 402 | /// optimized based on argument values |
| 403 | /// - Treats the call to `contains` and its result as volatile: the body of `benchmark` cannot |
| 404 | /// optimize this away |
| 405 | /// |
| 406 | /// This makes our benchmark much more realistic to how the function would actually be used, where |
| 407 | /// arguments are usually not known at compile time and the result is used in some way. |
| 408 | /// |
| 409 | /// # How to use this |
| 410 | /// |
| 411 | /// In practice, `black_box` serves two purposes: |
| 412 | /// |
| 413 | /// 1. It prevents the compiler from making optimizations related to the value returned by `black_box` |
| 414 | /// 2. It forces the value passed to `black_box` to be calculated, even if the return value of `black_box` is unused |
| 415 | /// |
| 416 | /// ``` |
| 417 | /// use std::hint::black_box; |
| 418 | /// |
| 419 | /// let zero = 0; |
| 420 | /// let five = 5; |
| 421 | /// |
| 422 | /// // The compiler will see this and remove the `* five` call, because it knows that multiplying |
| 423 | /// // any integer by 0 will result in 0. |
| 424 | /// let c = zero * five; |
| 425 | /// |
| 426 | /// // Adding `black_box` here disables the compiler's ability to reason about the first operand in the multiplication. |
| 427 | /// // It is forced to assume that it can be any possible number, so it cannot remove the `* five` |
| 428 | /// // operation. |
| 429 | /// let c = black_box(zero) * five; |
| 430 | /// ``` |
| 431 | /// |
| 432 | /// While most cases will not be as clear-cut as the above example, it still illustrates how |
| 433 | /// `black_box` can be used. When benchmarking a function, you usually want to wrap its inputs in |
| 434 | /// `black_box` so the compiler cannot make optimizations that would be unrealistic in real-life |
| 435 | /// use. |
| 436 | /// |
| 437 | /// ``` |
| 438 | /// use std::hint::black_box; |
| 439 | /// |
| 440 | /// // This is a simple function that increments its input by 1. Note that it is pure, meaning it |
| 441 | /// // has no side-effects. This function has no effect if its result is unused. (An example of a |
| 442 | /// // function *with* side-effects is `println!()`.) |
| 443 | /// fn increment(x: u8) -> u8 { |
| 444 | /// x + 1 |
| 445 | /// } |
| 446 | /// |
| 447 | /// // Here, we call `increment` but discard its result. The compiler, seeing this and knowing that |
| 448 | /// // `increment` is pure, will eliminate this function call entirely. This may not be desired, |
| 449 | /// // though, especially if we're trying to track how much time `increment` takes to execute. |
| 450 | /// let _ = increment(black_box(5)); |
| 451 | /// |
| 452 | /// // Here, we force `increment` to be executed. This is because the compiler treats `black_box` |
| 453 | /// // as if it has side-effects, and thus must compute its input. |
| 454 | /// let _ = black_box(increment(black_box(5))); |
| 455 | /// ``` |
| 456 | /// |
| 457 | /// There may be additional situations where you want to wrap the result of a function in |
| 458 | /// `black_box` to force its execution. This is situational though, and may not have any effect |
| 459 | /// (such as when the function returns a zero-sized type such as [`()` unit][unit]). |
| 460 | /// |
| 461 | /// Note that `black_box` has no effect on how its input is treated, only its output. As such, |
| 462 | /// expressions passed to `black_box` may still be optimized: |
| 463 | /// |
| 464 | /// ``` |
| 465 | /// use std::hint::black_box; |
| 466 | /// |
| 467 | /// // The compiler sees this... |
| 468 | /// let y = black_box(5 * 10); |
| 469 | /// |
| 470 | /// // ...as this. As such, it will likely simplify `5 * 10` to just `50`. |
| 471 | /// let _0 = 5 * 10; |
| 472 | /// let y = black_box(_0); |
| 473 | /// ``` |
| 474 | /// |
| 475 | /// In the above example, the `5 * 10` expression is considered distinct from the `black_box` call, |
| 476 | /// and thus is still optimized by the compiler. You can prevent this by moving the multiplication |
| 477 | /// operation outside of `black_box`: |
| 478 | /// |
| 479 | /// ``` |
| 480 | /// use std::hint::black_box; |
| 481 | /// |
| 482 | /// // No assumptions can be made about either operand, so the multiplication is not optimized out. |
| 483 | /// let y = black_box(5) * black_box(10); |
| 484 | /// ``` |
| 485 | /// |
| 486 | /// During constant evaluation, `black_box` is treated as a no-op. |
| 487 | #[inline ] |
| 488 | #[stable (feature = "bench_black_box" , since = "1.66.0" )] |
| 489 | #[rustc_const_stable (feature = "const_black_box" , since = "1.86.0" )] |
| 490 | pub const fn black_box<T>(dummy: T) -> T { |
| 491 | crate::intrinsics::black_box(dummy) |
| 492 | } |
| 493 | |
| 494 | /// An identity function that causes an `unused_must_use` warning to be |
| 495 | /// triggered if the given value is not used (returned, stored in a variable, |
| 496 | /// etc) by the caller. |
| 497 | /// |
| 498 | /// This is primarily intended for use in macro-generated code, in which a |
| 499 | /// [`#[must_use]` attribute][must_use] either on a type or a function would not |
| 500 | /// be convenient. |
| 501 | /// |
| 502 | /// [must_use]: https://doc.rust-lang.org/reference/attributes/diagnostics.html#the-must_use-attribute |
| 503 | /// |
| 504 | /// # Example |
| 505 | /// |
| 506 | /// ``` |
| 507 | /// #![feature(hint_must_use)] |
| 508 | /// |
| 509 | /// use core::fmt; |
| 510 | /// |
| 511 | /// pub struct Error(/* ... */); |
| 512 | /// |
| 513 | /// #[macro_export] |
| 514 | /// macro_rules! make_error { |
| 515 | /// ($($args:expr),*) => { |
| 516 | /// core::hint::must_use({ |
| 517 | /// let error = $crate::make_error(core::format_args!($($args),*)); |
| 518 | /// error |
| 519 | /// }) |
| 520 | /// }; |
| 521 | /// } |
| 522 | /// |
| 523 | /// // Implementation detail of make_error! macro. |
| 524 | /// #[doc(hidden)] |
| 525 | /// pub fn make_error(args: fmt::Arguments<'_>) -> Error { |
| 526 | /// Error(/* ... */) |
| 527 | /// } |
| 528 | /// |
| 529 | /// fn demo() -> Option<Error> { |
| 530 | /// if true { |
| 531 | /// // Oops, meant to write `return Some(make_error!("..."));` |
| 532 | /// Some(make_error!("..." )); |
| 533 | /// } |
| 534 | /// None |
| 535 | /// } |
| 536 | /// # |
| 537 | /// # // Make rustdoc not wrap the whole snippet in fn main, so that $crate::make_error works |
| 538 | /// # fn main() {} |
| 539 | /// ``` |
| 540 | /// |
| 541 | /// In the above example, we'd like an `unused_must_use` lint to apply to the |
| 542 | /// value created by `make_error!`. However, neither `#[must_use]` on a struct |
| 543 | /// nor `#[must_use]` on a function is appropriate here, so the macro expands |
| 544 | /// using `core::hint::must_use` instead. |
| 545 | /// |
| 546 | /// - We wouldn't want `#[must_use]` on the `struct Error` because that would |
| 547 | /// make the following unproblematic code trigger a warning: |
| 548 | /// |
| 549 | /// ``` |
| 550 | /// # struct Error; |
| 551 | /// # |
| 552 | /// fn f(arg: &str) -> Result<(), Error> |
| 553 | /// # { Ok(()) } |
| 554 | /// |
| 555 | /// #[test] |
| 556 | /// fn t() { |
| 557 | /// // Assert that `f` returns error if passed an empty string. |
| 558 | /// // A value of type `Error` is unused here but that's not a problem. |
| 559 | /// f("" ).unwrap_err(); |
| 560 | /// } |
| 561 | /// ``` |
| 562 | /// |
| 563 | /// - Using `#[must_use]` on `fn make_error` can't help because the return value |
| 564 | /// *is* used, as the right-hand side of a `let` statement. The `let` |
| 565 | /// statement looks useless but is in fact necessary for ensuring that |
| 566 | /// temporaries within the `format_args` expansion are not kept alive past the |
| 567 | /// creation of the `Error`, as keeping them alive past that point can cause |
| 568 | /// autotrait issues in async code: |
| 569 | /// |
| 570 | /// ``` |
| 571 | /// # #![feature(hint_must_use)] |
| 572 | /// # |
| 573 | /// # struct Error; |
| 574 | /// # |
| 575 | /// # macro_rules! make_error { |
| 576 | /// # ($($args:expr),*) => { |
| 577 | /// # core::hint::must_use({ |
| 578 | /// # // If `let` isn't used, then `f()` produces a non-Send future. |
| 579 | /// # let error = make_error(core::format_args!($($args),*)); |
| 580 | /// # error |
| 581 | /// # }) |
| 582 | /// # }; |
| 583 | /// # } |
| 584 | /// # |
| 585 | /// # fn make_error(args: core::fmt::Arguments<'_>) -> Error { |
| 586 | /// # Error |
| 587 | /// # } |
| 588 | /// # |
| 589 | /// async fn f() { |
| 590 | /// // Using `let` inside the make_error expansion causes temporaries like |
| 591 | /// // `unsync()` to drop at the semicolon of that `let` statement, which |
| 592 | /// // is prior to the await point. They would otherwise stay around until |
| 593 | /// // the semicolon on *this* statement, which is after the await point, |
| 594 | /// // and the enclosing Future would not implement Send. |
| 595 | /// log(make_error!("look: {:p}" , unsync())).await; |
| 596 | /// } |
| 597 | /// |
| 598 | /// async fn log(error: Error) {/* ... */} |
| 599 | /// |
| 600 | /// // Returns something without a Sync impl. |
| 601 | /// fn unsync() -> *const () { |
| 602 | /// 0 as *const () |
| 603 | /// } |
| 604 | /// # |
| 605 | /// # fn test() { |
| 606 | /// # fn assert_send(_: impl Send) {} |
| 607 | /// # assert_send(f()); |
| 608 | /// # } |
| 609 | /// ``` |
| 610 | #[unstable (feature = "hint_must_use" , issue = "94745" )] |
| 611 | #[must_use ] // <-- :) |
| 612 | #[inline (always)] |
| 613 | pub const fn must_use<T>(value: T) -> T { |
| 614 | value |
| 615 | } |
| 616 | |
| 617 | /// Hints to the compiler that a branch condition is likely to be true. |
| 618 | /// Returns the value passed to it. |
| 619 | /// |
| 620 | /// It can be used with `if` or boolean `match` expressions. |
| 621 | /// |
| 622 | /// When used outside of a branch condition, it may still influence a nearby branch, but |
| 623 | /// probably will not have any effect. |
| 624 | /// |
| 625 | /// It can also be applied to parts of expressions, such as `likely(a) && unlikely(b)`, or to |
| 626 | /// compound expressions, such as `likely(a && b)`. When applied to compound expressions, it has |
| 627 | /// the following effect: |
| 628 | /// ```text |
| 629 | /// likely(!a) => !unlikely(a) |
| 630 | /// likely(a && b) => likely(a) && likely(b) |
| 631 | /// likely(a || b) => a || likely(b) |
| 632 | /// ``` |
| 633 | /// |
| 634 | /// See also the function [`cold_path()`] which may be more appropriate for idiomatic Rust code. |
| 635 | /// |
| 636 | /// # Examples |
| 637 | /// |
| 638 | /// ``` |
| 639 | /// #![feature(likely_unlikely)] |
| 640 | /// use core::hint::likely; |
| 641 | /// |
| 642 | /// fn foo(x: i32) { |
| 643 | /// if likely(x > 0) { |
| 644 | /// println!("this branch is likely to be taken" ); |
| 645 | /// } else { |
| 646 | /// println!("this branch is unlikely to be taken" ); |
| 647 | /// } |
| 648 | /// |
| 649 | /// match likely(x > 0) { |
| 650 | /// true => println!("this branch is likely to be taken" ), |
| 651 | /// false => println!("this branch is unlikely to be taken" ), |
| 652 | /// } |
| 653 | /// |
| 654 | /// // Use outside of a branch condition may still influence a nearby branch |
| 655 | /// let cond = likely(x != 0); |
| 656 | /// if cond { |
| 657 | /// println!("this branch is likely to be taken" ); |
| 658 | /// } |
| 659 | /// } |
| 660 | /// ``` |
| 661 | #[unstable (feature = "likely_unlikely" , issue = "151619" )] |
| 662 | #[inline (always)] |
| 663 | pub const fn likely(b: bool) -> bool { |
| 664 | crate::intrinsics::likely(b) |
| 665 | } |
| 666 | |
| 667 | /// Hints to the compiler that a branch condition is unlikely to be true. |
| 668 | /// Returns the value passed to it. |
| 669 | /// |
| 670 | /// It can be used with `if` or boolean `match` expressions. |
| 671 | /// |
| 672 | /// When used outside of a branch condition, it may still influence a nearby branch, but |
| 673 | /// probably will not have any effect. |
| 674 | /// |
| 675 | /// It can also be applied to parts of expressions, such as `likely(a) && unlikely(b)`, or to |
| 676 | /// compound expressions, such as `unlikely(a && b)`. When applied to compound expressions, it has |
| 677 | /// the following effect: |
| 678 | /// ```text |
| 679 | /// unlikely(!a) => !likely(a) |
| 680 | /// unlikely(a && b) => a && unlikely(b) |
| 681 | /// unlikely(a || b) => unlikely(a) || unlikely(b) |
| 682 | /// ``` |
| 683 | /// |
| 684 | /// See also the function [`cold_path()`] which may be more appropriate for idiomatic Rust code. |
| 685 | /// |
| 686 | /// # Examples |
| 687 | /// |
| 688 | /// ``` |
| 689 | /// #![feature(likely_unlikely)] |
| 690 | /// use core::hint::unlikely; |
| 691 | /// |
| 692 | /// fn foo(x: i32) { |
| 693 | /// if unlikely(x > 0) { |
| 694 | /// println!("this branch is unlikely to be taken" ); |
| 695 | /// } else { |
| 696 | /// println!("this branch is likely to be taken" ); |
| 697 | /// } |
| 698 | /// |
| 699 | /// match unlikely(x > 0) { |
| 700 | /// true => println!("this branch is unlikely to be taken" ), |
| 701 | /// false => println!("this branch is likely to be taken" ), |
| 702 | /// } |
| 703 | /// |
| 704 | /// // Use outside of a branch condition may still influence a nearby branch |
| 705 | /// let cond = unlikely(x != 0); |
| 706 | /// if cond { |
| 707 | /// println!("this branch is likely to be taken" ); |
| 708 | /// } |
| 709 | /// } |
| 710 | /// ``` |
| 711 | #[unstable (feature = "likely_unlikely" , issue = "151619" )] |
| 712 | #[inline (always)] |
| 713 | pub const fn unlikely(b: bool) -> bool { |
| 714 | crate::intrinsics::unlikely(b) |
| 715 | } |
| 716 | |
| 717 | /// Hints to the compiler that given path is cold, i.e., unlikely to be taken. The compiler may |
| 718 | /// choose to optimize paths that are not cold at the expense of paths that are cold. |
| 719 | /// |
| 720 | /// Note that like all hints, the exact effect to codegen is not guaranteed. Using `cold_path` |
| 721 | /// can actually *decrease* performance if the branch is called more than expected. It is advisable |
| 722 | /// to perform benchmarks to tell if this function is useful. |
| 723 | /// |
| 724 | /// # Examples |
| 725 | /// |
| 726 | /// ``` |
| 727 | /// use core::hint::cold_path; |
| 728 | /// |
| 729 | /// fn foo(x: &[i32]) { |
| 730 | /// if let Some(first) = x.get(0) { |
| 731 | /// // this is the fast path |
| 732 | /// } else { |
| 733 | /// // this path is unlikely |
| 734 | /// cold_path(); |
| 735 | /// } |
| 736 | /// } |
| 737 | /// |
| 738 | /// fn bar(x: i32) -> i32 { |
| 739 | /// match x { |
| 740 | /// 1 => 10, |
| 741 | /// 2 => 100, |
| 742 | /// 3 => { cold_path(); 1000 }, // this branch is unlikely |
| 743 | /// _ => { cold_path(); 10000 }, // this is also unlikely |
| 744 | /// } |
| 745 | /// } |
| 746 | /// ``` |
| 747 | /// |
| 748 | /// This can also be used to implement `likely` and `unlikely` helpers to hint the condition rather |
| 749 | /// than the branch: |
| 750 | /// |
| 751 | /// ``` |
| 752 | /// use core::hint::cold_path; |
| 753 | /// |
| 754 | /// #[inline(always)] |
| 755 | /// pub const fn likely(b: bool) -> bool { |
| 756 | /// if !b { |
| 757 | /// cold_path(); |
| 758 | /// } |
| 759 | /// b |
| 760 | /// } |
| 761 | /// |
| 762 | /// #[inline(always)] |
| 763 | /// pub const fn unlikely(b: bool) -> bool { |
| 764 | /// if b { |
| 765 | /// cold_path(); |
| 766 | /// } |
| 767 | /// b |
| 768 | /// } |
| 769 | /// |
| 770 | /// fn foo(x: i32) { |
| 771 | /// if likely(x > 0) { |
| 772 | /// println!("this branch is likely to be taken" ); |
| 773 | /// } else { |
| 774 | /// println!("this branch is unlikely to be taken" ); |
| 775 | /// } |
| 776 | /// } |
| 777 | /// ``` |
| 778 | #[stable (feature = "cold_path" , since = "CURRENT_RUSTC_VERSION" )] |
| 779 | #[rustc_const_stable (feature = "cold_path" , since = "CURRENT_RUSTC_VERSION" )] |
| 780 | #[inline (always)] |
| 781 | pub const fn cold_path() { |
| 782 | crate::intrinsics::cold_path() |
| 783 | } |
| 784 | |
| 785 | /// Returns either `true_val` or `false_val` depending on the value of |
| 786 | /// `condition`, with a hint to the compiler that `condition` is unlikely to be |
| 787 | /// correctly predicted by a CPU’s branch predictor. |
| 788 | /// |
| 789 | /// This method is functionally equivalent to |
| 790 | /// ```ignore (this is just for illustrative purposes) |
| 791 | /// fn select_unpredictable<T>(b: bool, true_val: T, false_val: T) -> T { |
| 792 | /// if b { true_val } else { false_val } |
| 793 | /// } |
| 794 | /// ``` |
| 795 | /// but might generate different assembly. In particular, on platforms with |
| 796 | /// a conditional move or select instruction (like `cmov` on x86 or `csel` |
| 797 | /// on ARM) the optimizer might use these instructions to avoid branches, |
| 798 | /// which can benefit performance if the branch predictor is struggling |
| 799 | /// with predicting `condition`, such as in an implementation of binary |
| 800 | /// search. |
| 801 | /// |
| 802 | /// Note however that this lowering is not guaranteed (on any platform) and |
| 803 | /// should not be relied upon when trying to write cryptographic constant-time |
| 804 | /// code. Also be aware that this lowering might *decrease* performance if |
| 805 | /// `condition` is well-predictable. It is advisable to perform benchmarks to |
| 806 | /// tell if this function is useful. |
| 807 | /// |
| 808 | /// # Examples |
| 809 | /// |
| 810 | /// Distribute values evenly between two buckets: |
| 811 | /// ``` |
| 812 | /// use std::hash::BuildHasher; |
| 813 | /// use std::hint; |
| 814 | /// |
| 815 | /// fn append<H: BuildHasher>(hasher: &H, v: i32, bucket_one: &mut Vec<i32>, bucket_two: &mut Vec<i32>) { |
| 816 | /// let hash = hasher.hash_one(&v); |
| 817 | /// let bucket = hint::select_unpredictable(hash % 2 == 0, bucket_one, bucket_two); |
| 818 | /// bucket.push(v); |
| 819 | /// } |
| 820 | /// # let hasher = std::collections::hash_map::RandomState::new(); |
| 821 | /// # let mut bucket_one = Vec::new(); |
| 822 | /// # let mut bucket_two = Vec::new(); |
| 823 | /// # append(&hasher, 42, &mut bucket_one, &mut bucket_two); |
| 824 | /// # assert_eq!(bucket_one.len() + bucket_two.len(), 1); |
| 825 | /// ``` |
| 826 | #[inline (always)] |
| 827 | #[stable (feature = "select_unpredictable" , since = "1.88.0" )] |
| 828 | #[rustc_const_unstable (feature = "const_select_unpredictable" , issue = "145938" )] |
| 829 | pub const fn select_unpredictable<T>(condition: bool, true_val: T, false_val: T) -> T |
| 830 | where |
| 831 | T: [const] Destruct, |
| 832 | { |
| 833 | // FIXME(https://github.com/rust-lang/unsafe-code-guidelines/issues/245): |
| 834 | // Change this to use ManuallyDrop instead. |
| 835 | let mut true_val = MaybeUninit::new(true_val); |
| 836 | let mut false_val = MaybeUninit::new(false_val); |
| 837 | |
| 838 | struct DropOnPanic<T> { |
| 839 | // Invariant: valid pointer and points to an initialized value that is not further used, |
| 840 | // i.e. it can be dropped by this guard. |
| 841 | inner: *mut T, |
| 842 | } |
| 843 | |
| 844 | impl<T> Drop for DropOnPanic<T> { |
| 845 | fn drop(&mut self) { |
| 846 | // SAFETY: Must be guaranteed on construction of local type `DropOnPanic`. |
| 847 | unsafe { self.inner.drop_in_place() } |
| 848 | } |
| 849 | } |
| 850 | |
| 851 | let true_ptr = true_val.as_mut_ptr(); |
| 852 | let false_ptr = false_val.as_mut_ptr(); |
| 853 | |
| 854 | // SAFETY: The value that is not selected is dropped, and the selected one |
| 855 | // is returned. This is necessary because the intrinsic doesn't drop the |
| 856 | // value that is not selected. |
| 857 | unsafe { |
| 858 | // Extract the selected value first, ensure it is dropped as well if dropping the unselected |
| 859 | // value panics. We construct a temporary by-pointer guard around the selected value while |
| 860 | // dropping the unselected value. Arguments overlap here, so we can not use mutable |
| 861 | // reference for these arguments. |
| 862 | let guard = crate::intrinsics::select_unpredictable(condition, true_ptr, false_ptr); |
| 863 | let drop = crate::intrinsics::select_unpredictable(condition, false_ptr, true_ptr); |
| 864 | |
| 865 | // SAFETY: both pointers are well-aligned and point to initialized values inside a |
| 866 | // `MaybeUninit` each. In both possible values for `condition` the pointer `guard` and |
| 867 | // `drop` do not alias (even though the two argument pairs we have selected from did alias |
| 868 | // each other). |
| 869 | let guard = DropOnPanic { inner: guard }; |
| 870 | drop.drop_in_place(); |
| 871 | crate::mem::forget(guard); |
| 872 | |
| 873 | // Note that it is important to use the values here. Reading from the pointer we got makes |
| 874 | // LLVM forget the !unpredictable annotation sometimes (in tests, integer sized values in |
| 875 | // particular seemed to confuse it, also observed in llvm/llvm-project #82340). |
| 876 | crate::intrinsics::select_unpredictable(condition, true_val, false_val).assume_init() |
| 877 | } |
| 878 | } |
| 879 | |
| 880 | /// The expected temporal locality of a memory prefetch operation. |
| 881 | /// |
| 882 | /// Locality expresses how likely the prefetched data is to be reused soon, |
| 883 | /// and therefore which level of cache it should be brought into. |
| 884 | /// |
| 885 | /// The locality is just a hint, and may be ignored on some targets or by the hardware. |
| 886 | /// |
| 887 | /// Used with functions like [`prefetch_read`] and [`prefetch_write`]. |
| 888 | /// |
| 889 | /// [`prefetch_read`]: crate::hint::prefetch_read |
| 890 | /// [`prefetch_write`]: crate::hint::prefetch_write |
| 891 | #[unstable (feature = "hint_prefetch" , issue = "146941" )] |
| 892 | #[non_exhaustive ] |
| 893 | #[derive (Debug, Clone, Copy, PartialEq, Eq, Hash)] |
| 894 | pub enum Locality { |
| 895 | /// Data is expected to be reused eventually. |
| 896 | /// |
| 897 | /// Typically prefetches into L3 cache (if the CPU supports it). |
| 898 | L3, |
| 899 | /// Data is expected to be reused in the near future. |
| 900 | /// |
| 901 | /// Typically prefetches into L2 cache. |
| 902 | L2, |
| 903 | /// Data is expected to be reused very soon. |
| 904 | /// |
| 905 | /// Typically prefetches into L1 cache. |
| 906 | L1, |
| 907 | } |
| 908 | |
| 909 | impl Locality { |
| 910 | /// Convert to the constant that LLVM associates with a locality. |
| 911 | const fn to_llvm(self) -> i32 { |
| 912 | match self { |
| 913 | Self::L3 => 1, |
| 914 | Self::L2 => 2, |
| 915 | Self::L1 => 3, |
| 916 | } |
| 917 | } |
| 918 | } |
| 919 | |
| 920 | /// Prefetch the cache line containing `ptr` for a future read. |
| 921 | /// |
| 922 | /// A strategically placed prefetch can reduce cache miss latency if the data is accessed |
| 923 | /// soon after, but may also increase bandwidth usage or evict other cache lines. |
| 924 | /// |
| 925 | /// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware. |
| 926 | /// |
| 927 | /// Passing a dangling or invalid pointer is permitted: the memory will not |
| 928 | /// actually be dereferenced, and no faults are raised. |
| 929 | /// |
| 930 | /// # Examples |
| 931 | /// |
| 932 | /// ``` |
| 933 | /// #![feature(hint_prefetch)] |
| 934 | /// use std::hint::{Locality, prefetch_read}; |
| 935 | /// use std::mem::size_of_val; |
| 936 | /// |
| 937 | /// // Prefetch all of `slice` into the L1 cache. |
| 938 | /// fn prefetch_slice<T>(slice: &[T]) { |
| 939 | /// // On most systems the cache line size is 64 bytes. |
| 940 | /// for offset in (0..size_of_val(slice)).step_by(64) { |
| 941 | /// prefetch_read(slice.as_ptr().wrapping_add(offset), Locality::L1); |
| 942 | /// } |
| 943 | /// } |
| 944 | /// ``` |
| 945 | #[inline (always)] |
| 946 | #[unstable (feature = "hint_prefetch" , issue = "146941" )] |
| 947 | pub const fn prefetch_read<T>(ptr: *const T, locality: Locality) { |
| 948 | match locality { |
| 949 | Locality::L3 => intrinsics::prefetch_read_data::<T, { Locality::L3.to_llvm() }>(ptr), |
| 950 | Locality::L2 => intrinsics::prefetch_read_data::<T, { Locality::L2.to_llvm() }>(ptr), |
| 951 | Locality::L1 => intrinsics::prefetch_read_data::<T, { Locality::L1.to_llvm() }>(ptr), |
| 952 | } |
| 953 | } |
| 954 | |
| 955 | /// Prefetch the cache line containing `ptr` for a single future read, but attempt to avoid |
| 956 | /// polluting the cache. |
| 957 | /// |
| 958 | /// A strategically placed prefetch can reduce cache miss latency if the data is accessed |
| 959 | /// soon after, but may also increase bandwidth usage or evict other cache lines. |
| 960 | /// |
| 961 | /// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware. |
| 962 | /// |
| 963 | /// Passing a dangling or invalid pointer is permitted: the memory will not |
| 964 | /// actually be dereferenced, and no faults are raised. |
| 965 | #[inline (always)] |
| 966 | #[unstable (feature = "hint_prefetch" , issue = "146941" )] |
| 967 | pub const fn prefetch_read_non_temporal<T>(ptr: *const T, locality: Locality) { |
| 968 | // The LLVM intrinsic does not currently support specifying the locality. |
| 969 | let _ = locality; |
| 970 | intrinsics::prefetch_read_data::<T, 0>(ptr) |
| 971 | } |
| 972 | |
| 973 | /// Prefetch the cache line containing `ptr` for a future write. |
| 974 | /// |
| 975 | /// A strategically placed prefetch can reduce cache miss latency if the data is accessed |
| 976 | /// soon after, but may also increase bandwidth usage or evict other cache lines. |
| 977 | /// |
| 978 | /// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware. |
| 979 | /// |
| 980 | /// Passing a dangling or invalid pointer is permitted: the memory will not |
| 981 | /// actually be dereferenced, and no faults are raised. |
| 982 | #[inline (always)] |
| 983 | #[unstable (feature = "hint_prefetch" , issue = "146941" )] |
| 984 | pub const fn prefetch_write<T>(ptr: *mut T, locality: Locality) { |
| 985 | match locality { |
| 986 | Locality::L3 => intrinsics::prefetch_write_data::<T, { Locality::L3.to_llvm() }>(ptr), |
| 987 | Locality::L2 => intrinsics::prefetch_write_data::<T, { Locality::L2.to_llvm() }>(ptr), |
| 988 | Locality::L1 => intrinsics::prefetch_write_data::<T, { Locality::L1.to_llvm() }>(ptr), |
| 989 | } |
| 990 | } |
| 991 | |
| 992 | /// Prefetch the cache line containing `ptr` for a single future write, but attempt to avoid |
| 993 | /// polluting the cache. |
| 994 | /// |
| 995 | /// A strategically placed prefetch can reduce cache miss latency if the data is accessed |
| 996 | /// soon after, but may also increase bandwidth usage or evict other cache lines. |
| 997 | /// |
| 998 | /// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware. |
| 999 | /// |
| 1000 | /// Passing a dangling or invalid pointer is permitted: the memory will not |
| 1001 | /// actually be dereferenced, and no faults are raised. |
| 1002 | #[inline (always)] |
| 1003 | #[unstable (feature = "hint_prefetch" , issue = "146941" )] |
| 1004 | pub const fn prefetch_write_non_temporal<T>(ptr: *const T, locality: Locality) { |
| 1005 | // The LLVM intrinsic does not currently support specifying the locality. |
| 1006 | let _ = locality; |
| 1007 | intrinsics::prefetch_write_data::<T, 0>(ptr) |
| 1008 | } |
| 1009 | |
| 1010 | /// Prefetch the cache line containing `ptr` into the instruction cache for a future read. |
| 1011 | /// |
| 1012 | /// A strategically placed prefetch can reduce cache miss latency if the instructions are |
| 1013 | /// accessed soon after, but may also increase bandwidth usage or evict other cache lines. |
| 1014 | /// |
| 1015 | /// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware. |
| 1016 | /// |
| 1017 | /// Passing a dangling or invalid pointer is permitted: the memory will not |
| 1018 | /// actually be dereferenced, and no faults are raised. |
| 1019 | #[inline (always)] |
| 1020 | #[unstable (feature = "hint_prefetch" , issue = "146941" )] |
| 1021 | pub const fn prefetch_read_instruction<T>(ptr: *const T, locality: Locality) { |
| 1022 | match locality { |
| 1023 | Locality::L3 => intrinsics::prefetch_read_instruction::<T, { Locality::L3.to_llvm() }>(data:ptr), |
| 1024 | Locality::L2 => intrinsics::prefetch_read_instruction::<T, { Locality::L2.to_llvm() }>(data:ptr), |
| 1025 | Locality::L1 => intrinsics::prefetch_read_instruction::<T, { Locality::L1.to_llvm() }>(data:ptr), |
| 1026 | } |
| 1027 | } |
| 1028 | |