1 | // SPDX-License-Identifier: Apache-2.0 OR MIT |
2 | |
3 | // Fallback implementation using global locks. |
4 | // |
5 | // This implementation uses seqlock for global locks. |
6 | // |
7 | // This is basically based on global locks in crossbeam-utils's `AtomicCell`, |
8 | // but seqlock is implemented in a way that does not depend on UB |
9 | // (see comments in optimistic_read method in atomic! macro for details). |
10 | // |
11 | // Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic |
12 | // type and the value type must be the same. |
13 | |
14 | #![cfg_attr ( |
15 | any( |
16 | all( |
17 | target_arch = "x86_64" , |
18 | not(portable_atomic_no_cmpxchg16b_target_feature), |
19 | not(portable_atomic_no_outline_atomics), |
20 | not(any(target_env = "sgx" , miri)), |
21 | ), |
22 | all( |
23 | target_arch = "powerpc64" , |
24 | feature = "fallback" , |
25 | not(portable_atomic_no_outline_atomics), |
26 | portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default |
27 | any( |
28 | all( |
29 | target_os = "linux" , |
30 | any( |
31 | target_env = "gnu" , |
32 | all( |
33 | any(target_env = "musl" , target_env = "ohos" ), |
34 | not(target_feature = "crt-static" ), |
35 | ), |
36 | portable_atomic_outline_atomics, |
37 | ), |
38 | ), |
39 | target_os = "android" , |
40 | target_os = "freebsd" , |
41 | ), |
42 | not(any(miri, portable_atomic_sanitize_thread)), |
43 | ), |
44 | all( |
45 | target_arch = "arm" , |
46 | not(portable_atomic_no_asm), |
47 | any(target_os = "linux" , target_os = "android" ), |
48 | not(portable_atomic_no_outline_atomics), |
49 | ), |
50 | ), |
51 | allow(dead_code) |
52 | )] |
53 | |
54 | #[macro_use ] |
55 | pub(crate) mod utils; |
56 | |
57 | // Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap |
58 | // around. |
59 | // |
60 | // In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be |
61 | // vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the |
62 | // counter will not be increased that fast. |
63 | // |
64 | // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, |
65 | // aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast, |
66 | // so use it to implement normal sequence lock. |
67 | cfg_has_fast_atomic_64! { |
68 | mod seq_lock; |
69 | } |
70 | cfg_no_fast_atomic_64! { |
71 | #[path = "seq_lock_wide.rs" ] |
72 | mod seq_lock; |
73 | } |
74 | |
75 | use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; |
76 | |
77 | use seq_lock::{SeqLock, SeqLockWriteGuard}; |
78 | use utils::CachePadded; |
79 | |
80 | // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, |
81 | // aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast, |
82 | // so use it to reduce chunks of byte-wise atomic memcpy. |
83 | use seq_lock::{AtomicChunk, Chunk}; |
84 | |
85 | // Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016. |
86 | #[inline ] |
87 | #[must_use ] |
88 | fn lock(addr: usize) -> &'static SeqLock { |
89 | // The number of locks is a prime number because we want to make sure `addr % LEN` gets |
90 | // dispersed across all locks. |
91 | // |
92 | // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded, |
93 | // so the actual concurrency level will be smaller. |
94 | const LEN: usize = 67; |
95 | #[allow (clippy::declare_interior_mutable_const)] |
96 | const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new()); |
97 | static LOCKS: [CachePadded<SeqLock>; LEN] = [ |
98 | L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, |
99 | L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, |
100 | L, L, L, L, L, L, L, |
101 | ]; |
102 | |
103 | // If the modulus is a constant number, the compiler will use crazy math to transform this into |
104 | // a sequence of cheap arithmetic operations rather than using the slow modulo instruction. |
105 | &LOCKS[addr % LEN] |
106 | } |
107 | |
108 | macro_rules! atomic { |
109 | ($atomic_type:ident, $int_type:ident, $align:literal) => { |
110 | #[repr(C, align($align))] |
111 | pub(crate) struct $atomic_type { |
112 | v: UnsafeCell<$int_type>, |
113 | } |
114 | |
115 | impl $atomic_type { |
116 | const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>(); |
117 | |
118 | #[inline] |
119 | unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] { |
120 | static_assert!($atomic_type::LEN > 1); |
121 | static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0); |
122 | |
123 | // SAFETY: the caller must uphold the safety contract for `chunks`. |
124 | unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) } |
125 | } |
126 | |
127 | #[inline] |
128 | fn optimistic_read(&self) -> $int_type { |
129 | // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M |
130 | let mut dst: [Chunk; Self::LEN] = [0; Self::LEN]; |
131 | // SAFETY: |
132 | // - There are no threads that perform non-atomic concurrent write operations. |
133 | // - There is no writer that updates the value using atomic operations of different granularity. |
134 | // |
135 | // If the atomic operation is not used here, it will cause a data race |
136 | // when `write` performs concurrent write operation. |
137 | // Such a data race is sometimes considered virtually unproblematic |
138 | // in SeqLock implementations: |
139 | // |
140 | // - https://github.com/Amanieu/seqlock/issues/2 |
141 | // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116 |
142 | // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F |
143 | // |
144 | // However, in our use case, the implementation that loads/stores value as |
145 | // chunks of usize is enough fast and sound, so we use that implementation. |
146 | // |
147 | // See also atomic-memcpy crate, a generic implementation of this pattern: |
148 | // https://github.com/taiki-e/atomic-memcpy |
149 | let chunks = unsafe { self.chunks() }; |
150 | for i in 0..Self::LEN { |
151 | dst[i] = chunks[i].load(Ordering::Relaxed); |
152 | } |
153 | // SAFETY: integers are plain old data types so we can always transmute to them. |
154 | unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) } |
155 | } |
156 | |
157 | #[inline] |
158 | fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type { |
159 | // This calls optimistic_read that can return teared value, but the resulting value |
160 | // is guaranteed not to be teared because we hold the lock to write. |
161 | self.optimistic_read() |
162 | } |
163 | |
164 | #[inline] |
165 | fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) { |
166 | // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers. |
167 | let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) }; |
168 | // SAFETY: |
169 | // - The guard guarantees that we hold the lock to write. |
170 | // - There are no threads that perform non-atomic concurrent read or write operations. |
171 | // |
172 | // See optimistic_read for the reason that atomic operations are used here. |
173 | let chunks = unsafe { self.chunks() }; |
174 | for i in 0..Self::LEN { |
175 | chunks[i].store(val[i], Ordering::Relaxed); |
176 | } |
177 | } |
178 | } |
179 | |
180 | // Send is implicitly implemented. |
181 | // SAFETY: any data races are prevented by the lock and atomic operation. |
182 | unsafe impl Sync for $atomic_type {} |
183 | |
184 | impl_default_no_fetch_ops!($atomic_type, $int_type); |
185 | impl_default_bit_opts!($atomic_type, $int_type); |
186 | impl $atomic_type { |
187 | #[inline] |
188 | pub(crate) const fn new(v: $int_type) -> Self { |
189 | Self { v: UnsafeCell::new(v) } |
190 | } |
191 | |
192 | #[inline] |
193 | pub(crate) fn is_lock_free() -> bool { |
194 | Self::is_always_lock_free() |
195 | } |
196 | #[inline] |
197 | pub(crate) const fn is_always_lock_free() -> bool { |
198 | false |
199 | } |
200 | |
201 | #[inline] |
202 | pub(crate) fn get_mut(&mut self) -> &mut $int_type { |
203 | // SAFETY: the mutable reference guarantees unique ownership. |
204 | // (UnsafeCell::get_mut requires Rust 1.50) |
205 | unsafe { &mut *self.v.get() } |
206 | } |
207 | |
208 | #[inline] |
209 | pub(crate) fn into_inner(self) -> $int_type { |
210 | self.v.into_inner() |
211 | } |
212 | |
213 | #[inline] |
214 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
215 | pub(crate) fn load(&self, order: Ordering) -> $int_type { |
216 | crate::utils::assert_load_ordering(order); |
217 | let lock = lock(self.v.get() as usize); |
218 | |
219 | // Try doing an optimistic read first. |
220 | if let Some(stamp) = lock.optimistic_read() { |
221 | let val = self.optimistic_read(); |
222 | |
223 | if lock.validate_read(stamp) { |
224 | return val; |
225 | } |
226 | } |
227 | |
228 | // Grab a regular write lock so that writers don't starve this load. |
229 | let guard = lock.write(); |
230 | let val = self.read(&guard); |
231 | // The value hasn't been changed. Drop the guard without incrementing the stamp. |
232 | guard.abort(); |
233 | val |
234 | } |
235 | |
236 | #[inline] |
237 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
238 | pub(crate) fn store(&self, val: $int_type, order: Ordering) { |
239 | crate::utils::assert_store_ordering(order); |
240 | let guard = lock(self.v.get() as usize).write(); |
241 | self.write(val, &guard) |
242 | } |
243 | |
244 | #[inline] |
245 | pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { |
246 | let guard = lock(self.v.get() as usize).write(); |
247 | let prev = self.read(&guard); |
248 | self.write(val, &guard); |
249 | prev |
250 | } |
251 | |
252 | #[inline] |
253 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
254 | pub(crate) fn compare_exchange( |
255 | &self, |
256 | current: $int_type, |
257 | new: $int_type, |
258 | success: Ordering, |
259 | failure: Ordering, |
260 | ) -> Result<$int_type, $int_type> { |
261 | crate::utils::assert_compare_exchange_ordering(success, failure); |
262 | let guard = lock(self.v.get() as usize).write(); |
263 | let prev = self.read(&guard); |
264 | if prev == current { |
265 | self.write(new, &guard); |
266 | Ok(prev) |
267 | } else { |
268 | // The value hasn't been changed. Drop the guard without incrementing the stamp. |
269 | guard.abort(); |
270 | Err(prev) |
271 | } |
272 | } |
273 | |
274 | #[inline] |
275 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
276 | pub(crate) fn compare_exchange_weak( |
277 | &self, |
278 | current: $int_type, |
279 | new: $int_type, |
280 | success: Ordering, |
281 | failure: Ordering, |
282 | ) -> Result<$int_type, $int_type> { |
283 | self.compare_exchange(current, new, success, failure) |
284 | } |
285 | |
286 | #[inline] |
287 | pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { |
288 | let guard = lock(self.v.get() as usize).write(); |
289 | let prev = self.read(&guard); |
290 | self.write(prev.wrapping_add(val), &guard); |
291 | prev |
292 | } |
293 | |
294 | #[inline] |
295 | pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { |
296 | let guard = lock(self.v.get() as usize).write(); |
297 | let prev = self.read(&guard); |
298 | self.write(prev.wrapping_sub(val), &guard); |
299 | prev |
300 | } |
301 | |
302 | #[inline] |
303 | pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { |
304 | let guard = lock(self.v.get() as usize).write(); |
305 | let prev = self.read(&guard); |
306 | self.write(prev & val, &guard); |
307 | prev |
308 | } |
309 | |
310 | #[inline] |
311 | pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { |
312 | let guard = lock(self.v.get() as usize).write(); |
313 | let prev = self.read(&guard); |
314 | self.write(!(prev & val), &guard); |
315 | prev |
316 | } |
317 | |
318 | #[inline] |
319 | pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { |
320 | let guard = lock(self.v.get() as usize).write(); |
321 | let prev = self.read(&guard); |
322 | self.write(prev | val, &guard); |
323 | prev |
324 | } |
325 | |
326 | #[inline] |
327 | pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { |
328 | let guard = lock(self.v.get() as usize).write(); |
329 | let prev = self.read(&guard); |
330 | self.write(prev ^ val, &guard); |
331 | prev |
332 | } |
333 | |
334 | #[inline] |
335 | pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { |
336 | let guard = lock(self.v.get() as usize).write(); |
337 | let prev = self.read(&guard); |
338 | self.write(core::cmp::max(prev, val), &guard); |
339 | prev |
340 | } |
341 | |
342 | #[inline] |
343 | pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { |
344 | let guard = lock(self.v.get() as usize).write(); |
345 | let prev = self.read(&guard); |
346 | self.write(core::cmp::min(prev, val), &guard); |
347 | prev |
348 | } |
349 | |
350 | #[inline] |
351 | pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { |
352 | let guard = lock(self.v.get() as usize).write(); |
353 | let prev = self.read(&guard); |
354 | self.write(!prev, &guard); |
355 | prev |
356 | } |
357 | #[inline] |
358 | pub(crate) fn not(&self, order: Ordering) { |
359 | self.fetch_not(order); |
360 | } |
361 | |
362 | #[inline] |
363 | pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { |
364 | let guard = lock(self.v.get() as usize).write(); |
365 | let prev = self.read(&guard); |
366 | self.write(prev.wrapping_neg(), &guard); |
367 | prev |
368 | } |
369 | #[inline] |
370 | pub(crate) fn neg(&self, order: Ordering) { |
371 | self.fetch_neg(order); |
372 | } |
373 | |
374 | #[inline] |
375 | pub(crate) const fn as_ptr(&self) -> *mut $int_type { |
376 | self.v.get() |
377 | } |
378 | } |
379 | }; |
380 | } |
381 | |
382 | #[cfg_attr (portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] |
383 | #[cfg_attr ( |
384 | not(portable_atomic_no_cfg_target_has_atomic), |
385 | cfg(any(test, not(target_has_atomic = "64" ))) |
386 | )] |
387 | cfg_no_fast_atomic_64! { |
388 | atomic!(AtomicI64, i64, 8); |
389 | atomic!(AtomicU64, u64, 8); |
390 | } |
391 | |
392 | atomic!(AtomicI128, i128, 16); |
393 | atomic!(AtomicU128, u128, 16); |
394 | |
395 | #[cfg (test)] |
396 | mod tests { |
397 | use super::*; |
398 | |
399 | cfg_no_fast_atomic_64! { |
400 | test_atomic_int!(i64); |
401 | test_atomic_int!(u64); |
402 | } |
403 | test_atomic_int!(i128); |
404 | test_atomic_int!(u128); |
405 | |
406 | // load/store/swap implementation is not affected by signedness, so it is |
407 | // enough to test only unsigned types. |
408 | cfg_no_fast_atomic_64! { |
409 | stress_test!(u64); |
410 | } |
411 | stress_test!(u128); |
412 | } |
413 | |