| 1 | // SPDX-License-Identifier: Apache-2.0 OR MIT |
| 2 | |
| 3 | /* |
| 4 | Fallback implementation using global locks. |
| 5 | |
| 6 | This implementation uses seqlock for global locks. |
| 7 | |
| 8 | This is basically based on global locks in crossbeam-utils's `AtomicCell`, |
| 9 | but seqlock is implemented in a way that does not depend on UB |
| 10 | (see comments in optimistic_read method in atomic! macro for details). |
| 11 | |
| 12 | Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic |
| 13 | type and the value type must be the same. |
| 14 | */ |
| 15 | |
| 16 | #![cfg_attr ( |
| 17 | any( |
| 18 | all( |
| 19 | target_arch = "x86_64" , |
| 20 | not(portable_atomic_no_outline_atomics), |
| 21 | not(any(target_env = "sgx" , miri)), |
| 22 | ), |
| 23 | all( |
| 24 | target_arch = "powerpc64" , |
| 25 | feature = "fallback" , |
| 26 | not(portable_atomic_no_outline_atomics), |
| 27 | any( |
| 28 | all( |
| 29 | target_os = "linux" , |
| 30 | any( |
| 31 | all( |
| 32 | target_env = "gnu" , |
| 33 | any(target_endian = "little" , not(target_feature = "crt-static" )), |
| 34 | ), |
| 35 | all( |
| 36 | any(target_env = "musl" , target_env = "ohos" , target_env = "uclibc" ), |
| 37 | not(target_feature = "crt-static" ), |
| 38 | ), |
| 39 | portable_atomic_outline_atomics, |
| 40 | ), |
| 41 | ), |
| 42 | target_os = "android" , |
| 43 | target_os = "freebsd" , |
| 44 | target_os = "openbsd" , |
| 45 | ), |
| 46 | not(any(miri, portable_atomic_sanitize_thread)), |
| 47 | ), |
| 48 | all( |
| 49 | target_arch = "riscv32" , |
| 50 | not(any(miri, portable_atomic_sanitize_thread)), |
| 51 | not(portable_atomic_no_asm), |
| 52 | any( |
| 53 | target_feature = "experimental-zacas" , |
| 54 | portable_atomic_target_feature = "experimental-zacas" , |
| 55 | all( |
| 56 | feature = "fallback" , |
| 57 | not(portable_atomic_no_outline_atomics), |
| 58 | any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default |
| 59 | any(target_os = "linux" , target_os = "android" ), |
| 60 | ), |
| 61 | ), |
| 62 | ), |
| 63 | all( |
| 64 | target_arch = "riscv64" , |
| 65 | not(portable_atomic_no_asm), |
| 66 | any( |
| 67 | target_feature = "experimental-zacas" , |
| 68 | portable_atomic_target_feature = "experimental-zacas" , |
| 69 | all( |
| 70 | feature = "fallback" , |
| 71 | not(portable_atomic_no_outline_atomics), |
| 72 | any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default |
| 73 | any(target_os = "linux" , target_os = "android" ), |
| 74 | not(any(miri, portable_atomic_sanitize_thread)), |
| 75 | ), |
| 76 | ), |
| 77 | ), |
| 78 | all( |
| 79 | target_arch = "arm" , |
| 80 | any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), |
| 81 | any(target_os = "linux" , target_os = "android" ), |
| 82 | not(portable_atomic_no_outline_atomics), |
| 83 | ), |
| 84 | ), |
| 85 | allow(dead_code) |
| 86 | )] |
| 87 | |
| 88 | #[macro_use ] |
| 89 | pub(crate) mod utils; |
| 90 | |
| 91 | // Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap |
| 92 | // around. |
| 93 | // |
| 94 | // In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be |
| 95 | // vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the |
| 96 | // counter will not be increased that fast. |
| 97 | // |
| 98 | // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, |
| 99 | // AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast, |
| 100 | // so use it to implement normal sequence lock. |
| 101 | cfg_has_fast_atomic_64! { |
| 102 | mod seq_lock; |
| 103 | } |
| 104 | cfg_no_fast_atomic_64! { |
| 105 | #[path = "seq_lock_wide.rs" ] |
| 106 | mod seq_lock; |
| 107 | } |
| 108 | |
| 109 | use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; |
| 110 | |
| 111 | use self::{ |
| 112 | seq_lock::{SeqLock, SeqLockWriteGuard}, |
| 113 | utils::CachePadded, |
| 114 | }; |
| 115 | |
| 116 | // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, |
| 117 | // AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast, |
| 118 | // so use it to reduce chunks of byte-wise atomic memcpy. |
| 119 | use self::seq_lock::{AtomicChunk, Chunk}; |
| 120 | |
| 121 | // Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016. |
| 122 | #[inline ] |
| 123 | #[must_use ] |
| 124 | fn lock(addr: usize) -> &'static SeqLock { |
| 125 | // The number of locks is a prime number because we want to make sure `addr % LEN` gets |
| 126 | // dispersed across all locks. |
| 127 | // |
| 128 | // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded, |
| 129 | // so the actual concurrency level will be smaller. |
| 130 | const LEN: usize = 67; |
| 131 | const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new()); |
| 132 | static LOCKS: [CachePadded<SeqLock>; LEN] = [ |
| 133 | L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, |
| 134 | L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, |
| 135 | L, L, L, L, L, L, L, |
| 136 | ]; |
| 137 | |
| 138 | // If the modulus is a constant number, the compiler will use crazy math to transform this into |
| 139 | // a sequence of cheap arithmetic operations rather than using the slow modulo instruction. |
| 140 | &LOCKS[addr % LEN] |
| 141 | } |
| 142 | |
| 143 | macro_rules! atomic { |
| 144 | ($atomic_type:ident, $int_type:ident, $align:literal) => { |
| 145 | #[repr(C, align($align))] |
| 146 | pub(crate) struct $atomic_type { |
| 147 | v: UnsafeCell<$int_type>, |
| 148 | } |
| 149 | |
| 150 | impl $atomic_type { |
| 151 | const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>(); |
| 152 | |
| 153 | #[inline] |
| 154 | unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] { |
| 155 | static_assert!($atomic_type::LEN > 1); |
| 156 | static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0); |
| 157 | |
| 158 | // SAFETY: the caller must uphold the safety contract for `chunks`. |
| 159 | unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) } |
| 160 | } |
| 161 | |
| 162 | #[inline] |
| 163 | fn optimistic_read(&self) -> $int_type { |
| 164 | // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M |
| 165 | let mut dst: [Chunk; Self::LEN] = [0; Self::LEN]; |
| 166 | // SAFETY: |
| 167 | // - There are no threads that perform non-atomic concurrent write operations. |
| 168 | // - There is no writer that updates the value using atomic operations of different granularity. |
| 169 | // |
| 170 | // If the atomic operation is not used here, it will cause a data race |
| 171 | // when `write` performs concurrent write operation. |
| 172 | // Such a data race is sometimes considered virtually unproblematic |
| 173 | // in SeqLock implementations: |
| 174 | // |
| 175 | // - https://github.com/Amanieu/seqlock/issues/2 |
| 176 | // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116 |
| 177 | // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F |
| 178 | // |
| 179 | // However, in our use case, the implementation that loads/stores value as |
| 180 | // chunks of usize is enough fast and sound, so we use that implementation. |
| 181 | // |
| 182 | // See also atomic-memcpy crate, a generic implementation of this pattern: |
| 183 | // https://github.com/taiki-e/atomic-memcpy |
| 184 | let chunks = unsafe { self.chunks() }; |
| 185 | for i in 0..Self::LEN { |
| 186 | dst[i] = chunks[i].load(Ordering::Relaxed); |
| 187 | } |
| 188 | // SAFETY: integers are plain old data types so we can always transmute to them. |
| 189 | unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) } |
| 190 | } |
| 191 | |
| 192 | #[inline] |
| 193 | fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type { |
| 194 | // This calls optimistic_read that can return teared value, but the resulting value |
| 195 | // is guaranteed not to be teared because we hold the lock to write. |
| 196 | self.optimistic_read() |
| 197 | } |
| 198 | |
| 199 | #[inline] |
| 200 | fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) { |
| 201 | // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers. |
| 202 | let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) }; |
| 203 | // SAFETY: |
| 204 | // - The guard guarantees that we hold the lock to write. |
| 205 | // - There are no threads that perform non-atomic concurrent read or write operations. |
| 206 | // |
| 207 | // See optimistic_read for the reason that atomic operations are used here. |
| 208 | let chunks = unsafe { self.chunks() }; |
| 209 | for i in 0..Self::LEN { |
| 210 | chunks[i].store(val[i], Ordering::Relaxed); |
| 211 | } |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | // Send is implicitly implemented. |
| 216 | // SAFETY: any data races are prevented by the lock and atomic operation. |
| 217 | unsafe impl Sync for $atomic_type {} |
| 218 | |
| 219 | impl_default_no_fetch_ops!($atomic_type, $int_type); |
| 220 | impl_default_bit_opts!($atomic_type, $int_type); |
| 221 | impl $atomic_type { |
| 222 | #[inline] |
| 223 | pub(crate) const fn new(v: $int_type) -> Self { |
| 224 | Self { v: UnsafeCell::new(v) } |
| 225 | } |
| 226 | |
| 227 | #[inline] |
| 228 | pub(crate) fn is_lock_free() -> bool { |
| 229 | Self::IS_ALWAYS_LOCK_FREE |
| 230 | } |
| 231 | pub(crate) const IS_ALWAYS_LOCK_FREE: bool = false; |
| 232 | |
| 233 | #[inline] |
| 234 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
| 235 | pub(crate) fn load(&self, order: Ordering) -> $int_type { |
| 236 | crate::utils::assert_load_ordering(order); |
| 237 | let lock = lock(self.v.get() as usize); |
| 238 | |
| 239 | // Try doing an optimistic read first. |
| 240 | if let Some(stamp) = lock.optimistic_read() { |
| 241 | let val = self.optimistic_read(); |
| 242 | |
| 243 | if lock.validate_read(stamp) { |
| 244 | return val; |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | // Grab a regular write lock so that writers don't starve this load. |
| 249 | let guard = lock.write(); |
| 250 | let val = self.read(&guard); |
| 251 | // The value hasn't been changed. Drop the guard without incrementing the stamp. |
| 252 | guard.abort(); |
| 253 | val |
| 254 | } |
| 255 | |
| 256 | #[inline] |
| 257 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
| 258 | pub(crate) fn store(&self, val: $int_type, order: Ordering) { |
| 259 | crate::utils::assert_store_ordering(order); |
| 260 | let guard = lock(self.v.get() as usize).write(); |
| 261 | self.write(val, &guard) |
| 262 | } |
| 263 | |
| 264 | #[inline] |
| 265 | pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 266 | let guard = lock(self.v.get() as usize).write(); |
| 267 | let prev = self.read(&guard); |
| 268 | self.write(val, &guard); |
| 269 | prev |
| 270 | } |
| 271 | |
| 272 | #[inline] |
| 273 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
| 274 | pub(crate) fn compare_exchange( |
| 275 | &self, |
| 276 | current: $int_type, |
| 277 | new: $int_type, |
| 278 | success: Ordering, |
| 279 | failure: Ordering, |
| 280 | ) -> Result<$int_type, $int_type> { |
| 281 | crate::utils::assert_compare_exchange_ordering(success, failure); |
| 282 | let guard = lock(self.v.get() as usize).write(); |
| 283 | let prev = self.read(&guard); |
| 284 | if prev == current { |
| 285 | self.write(new, &guard); |
| 286 | Ok(prev) |
| 287 | } else { |
| 288 | // The value hasn't been changed. Drop the guard without incrementing the stamp. |
| 289 | guard.abort(); |
| 290 | Err(prev) |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | #[inline] |
| 295 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
| 296 | pub(crate) fn compare_exchange_weak( |
| 297 | &self, |
| 298 | current: $int_type, |
| 299 | new: $int_type, |
| 300 | success: Ordering, |
| 301 | failure: Ordering, |
| 302 | ) -> Result<$int_type, $int_type> { |
| 303 | self.compare_exchange(current, new, success, failure) |
| 304 | } |
| 305 | |
| 306 | #[inline] |
| 307 | pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 308 | let guard = lock(self.v.get() as usize).write(); |
| 309 | let prev = self.read(&guard); |
| 310 | self.write(prev.wrapping_add(val), &guard); |
| 311 | prev |
| 312 | } |
| 313 | |
| 314 | #[inline] |
| 315 | pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 316 | let guard = lock(self.v.get() as usize).write(); |
| 317 | let prev = self.read(&guard); |
| 318 | self.write(prev.wrapping_sub(val), &guard); |
| 319 | prev |
| 320 | } |
| 321 | |
| 322 | #[inline] |
| 323 | pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 324 | let guard = lock(self.v.get() as usize).write(); |
| 325 | let prev = self.read(&guard); |
| 326 | self.write(prev & val, &guard); |
| 327 | prev |
| 328 | } |
| 329 | |
| 330 | #[inline] |
| 331 | pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 332 | let guard = lock(self.v.get() as usize).write(); |
| 333 | let prev = self.read(&guard); |
| 334 | self.write(!(prev & val), &guard); |
| 335 | prev |
| 336 | } |
| 337 | |
| 338 | #[inline] |
| 339 | pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 340 | let guard = lock(self.v.get() as usize).write(); |
| 341 | let prev = self.read(&guard); |
| 342 | self.write(prev | val, &guard); |
| 343 | prev |
| 344 | } |
| 345 | |
| 346 | #[inline] |
| 347 | pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 348 | let guard = lock(self.v.get() as usize).write(); |
| 349 | let prev = self.read(&guard); |
| 350 | self.write(prev ^ val, &guard); |
| 351 | prev |
| 352 | } |
| 353 | |
| 354 | #[inline] |
| 355 | pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 356 | let guard = lock(self.v.get() as usize).write(); |
| 357 | let prev = self.read(&guard); |
| 358 | self.write(core::cmp::max(prev, val), &guard); |
| 359 | prev |
| 360 | } |
| 361 | |
| 362 | #[inline] |
| 363 | pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { |
| 364 | let guard = lock(self.v.get() as usize).write(); |
| 365 | let prev = self.read(&guard); |
| 366 | self.write(core::cmp::min(prev, val), &guard); |
| 367 | prev |
| 368 | } |
| 369 | |
| 370 | #[inline] |
| 371 | pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { |
| 372 | let guard = lock(self.v.get() as usize).write(); |
| 373 | let prev = self.read(&guard); |
| 374 | self.write(!prev, &guard); |
| 375 | prev |
| 376 | } |
| 377 | #[inline] |
| 378 | pub(crate) fn not(&self, order: Ordering) { |
| 379 | self.fetch_not(order); |
| 380 | } |
| 381 | |
| 382 | #[inline] |
| 383 | pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { |
| 384 | let guard = lock(self.v.get() as usize).write(); |
| 385 | let prev = self.read(&guard); |
| 386 | self.write(prev.wrapping_neg(), &guard); |
| 387 | prev |
| 388 | } |
| 389 | #[inline] |
| 390 | pub(crate) fn neg(&self, order: Ordering) { |
| 391 | self.fetch_neg(order); |
| 392 | } |
| 393 | |
| 394 | #[inline] |
| 395 | pub(crate) const fn as_ptr(&self) -> *mut $int_type { |
| 396 | self.v.get() |
| 397 | } |
| 398 | } |
| 399 | }; |
| 400 | } |
| 401 | |
| 402 | #[cfg_attr (portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] |
| 403 | #[cfg_attr ( |
| 404 | not(portable_atomic_no_cfg_target_has_atomic), |
| 405 | cfg(any( |
| 406 | test, |
| 407 | not(any( |
| 408 | target_has_atomic = "64" , |
| 409 | all( |
| 410 | target_arch = "riscv32" , |
| 411 | not(any(miri, portable_atomic_sanitize_thread)), |
| 412 | not(portable_atomic_no_asm), |
| 413 | any( |
| 414 | target_feature = "experimental-zacas" , |
| 415 | portable_atomic_target_feature = "experimental-zacas" , |
| 416 | ), |
| 417 | ), |
| 418 | )) |
| 419 | )) |
| 420 | )] |
| 421 | cfg_no_fast_atomic_64! { |
| 422 | atomic!(AtomicI64, i64, 8); |
| 423 | atomic!(AtomicU64, u64, 8); |
| 424 | } |
| 425 | |
| 426 | atomic!(AtomicI128, i128, 16); |
| 427 | atomic!(AtomicU128, u128, 16); |
| 428 | |
| 429 | #[cfg (test)] |
| 430 | mod tests { |
| 431 | use super::*; |
| 432 | |
| 433 | cfg_no_fast_atomic_64! { |
| 434 | test_atomic_int!(i64); |
| 435 | test_atomic_int!(u64); |
| 436 | } |
| 437 | test_atomic_int!(i128); |
| 438 | test_atomic_int!(u128); |
| 439 | |
| 440 | // load/store/swap implementation is not affected by signedness, so it is |
| 441 | // enough to test only unsigned types. |
| 442 | cfg_no_fast_atomic_64! { |
| 443 | stress_test!(u64); |
| 444 | } |
| 445 | stress_test!(u128); |
| 446 | } |
| 447 | |