| 1 | //! Benchmark the overhead that the synchronization of `OnceCell::get` causes. |
| 2 | //! We do some other operations that write to memory to get an imprecise but somewhat realistic |
| 3 | //! measurement. |
| 4 | |
| 5 | use once_cell::sync::OnceCell; |
| 6 | use std::sync::atomic::{AtomicUsize, Ordering}; |
| 7 | |
| 8 | const N_THREADS: usize = 16; |
| 9 | const N_ROUNDS: usize = 1_000_000; |
| 10 | |
| 11 | static CELL: OnceCell<usize> = OnceCell::new(); |
| 12 | static OTHER: AtomicUsize = AtomicUsize::new(0); |
| 13 | |
| 14 | fn main() { |
| 15 | let start = std::time::Instant::now(); |
| 16 | let threads = |
| 17 | (0..N_THREADS).map(|i| std::thread::spawn(move || thread_main(i))).collect::<Vec<_>>(); |
| 18 | for thread in threads { |
| 19 | thread.join().unwrap(); |
| 20 | } |
| 21 | println!("{:?}" , start.elapsed()); |
| 22 | println!("{:?}" , OTHER.load(Ordering::Relaxed)); |
| 23 | } |
| 24 | |
| 25 | #[inline (never)] |
| 26 | fn thread_main(i: usize) { |
| 27 | // The operations we do here don't really matter, as long as we do multiple writes, and |
| 28 | // everything is messy enough to prevent the compiler from optimizing the loop away. |
| 29 | let mut data = [i; 128]; |
| 30 | let mut accum = 0usize; |
| 31 | for _ in 0..N_ROUNDS { |
| 32 | let _value = CELL.get_or_init(|| i + 1); |
| 33 | let k = OTHER.fetch_add(data[accum & 0x7F] as usize, Ordering::Relaxed); |
| 34 | for j in data.iter_mut() { |
| 35 | *j = (*j).wrapping_add(accum); |
| 36 | accum = accum.wrapping_add(k); |
| 37 | } |
| 38 | } |
| 39 | } |
| 40 | |