1 | //! Benchmark the overhead that the synchronization of `OnceCell::get` causes. |
2 | //! We do some other operations that write to memory to get an imprecise but somewhat realistic |
3 | //! measurement. |
4 | |
5 | use once_cell::sync::OnceCell; |
6 | use std::sync::atomic::{AtomicUsize, Ordering}; |
7 | |
8 | const N_THREADS: usize = 16; |
9 | const N_ROUNDS: usize = 1_000_000; |
10 | |
11 | static CELL: OnceCell<usize> = OnceCell::new(); |
12 | static OTHER: AtomicUsize = AtomicUsize::new(0); |
13 | |
14 | fn main() { |
15 | let start = std::time::Instant::now(); |
16 | let threads = |
17 | (0..N_THREADS).map(|i| std::thread::spawn(move || thread_main(i))).collect::<Vec<_>>(); |
18 | for thread in threads { |
19 | thread.join().unwrap(); |
20 | } |
21 | println!("{:?}" , start.elapsed()); |
22 | println!("{:?}" , OTHER.load(Ordering::Relaxed)); |
23 | } |
24 | |
25 | #[inline (never)] |
26 | fn thread_main(i: usize) { |
27 | // The operations we do here don't really matter, as long as we do multiple writes, and |
28 | // everything is messy enough to prevent the compiler from optimizing the loop away. |
29 | let mut data = [i; 128]; |
30 | let mut accum = 0usize; |
31 | for _ in 0..N_ROUNDS { |
32 | let _value = CELL.get_or_init(|| i + 1); |
33 | let k = OTHER.fetch_add(data[accum & 0x7F] as usize, Ordering::Relaxed); |
34 | for j in data.iter_mut() { |
35 | *j = (*j).wrapping_add(accum); |
36 | accum = accum.wrapping_add(k); |
37 | } |
38 | } |
39 | } |
40 | |