bench_acquire.rs - Codebrowser

1	//! Benchmark the overhead that the synchronization of `OnceCell::get` causes.
2	//! We do some other operations that write to memory to get an imprecise but somewhat realistic
3	//! measurement.
4
5	use once_cell::sync::OnceCell;
6	use std::sync::atomic::{AtomicUsize, Ordering};
7
8	const N_THREADS: usize = `16`;
9	const N_ROUNDS: usize = `1_000_000`;
10
11	static CELL: OnceCell<usize> = OnceCell::new();
12	static OTHER: AtomicUsize = AtomicUsize::new(`0`);
13
14	fn main() {
15	let start = std::time::Instant::now();
16	let threads =
17	(`0`..N_THREADS).map(\|i\| std::thread::spawn(move \|\| thread_main(i))).collect::<Vec<_>>();
18	for thread in threads {
19	thread.join().unwrap();
20	}
21	println!("{:?}", start.elapsed());
22	println!("{:?}", OTHER.load(Ordering::Relaxed));
23	}
24
25	#[inline(never)]
26	fn thread_main(i: usize) {
27	// The operations we do here don't really matter, as long as we do multiple writes, and
28	// everything is messy enough to prevent the compiler from optimizing the loop away.
29	let mut data = [i; `128`];
30	let mut accum = `0usize`;
31	for _ in `0`..N_ROUNDS {
32	let _value = CELL.get_or_init(\|\| i + `1`);
33	let k = OTHER.fetch_add(data[accum & `0x7F`] as usize, Ordering::Relaxed);
34	for j in data.iter_mut() {
35	j = (j).wrapping_add(accum);
36	accum = accum.wrapping_add(k);
37	}
38	}
39	}
40