1 | // SPDX-License-Identifier: Apache-2.0 OR MIT |
2 | |
3 | /* |
4 | Fallback implementation using global locks. |
5 | |
6 | This implementation uses seqlock for global locks. |
7 | |
8 | This is basically based on global locks in crossbeam-utils's `AtomicCell`, |
9 | but seqlock is implemented in a way that does not depend on UB |
10 | (see comments in optimistic_read method in atomic! macro for details). |
11 | |
12 | Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic |
13 | type and the value type must be the same. |
14 | */ |
15 | |
16 | #![cfg_attr ( |
17 | any( |
18 | all( |
19 | target_arch = "x86_64" , |
20 | not(portable_atomic_no_outline_atomics), |
21 | not(any(target_env = "sgx" , miri)), |
22 | ), |
23 | all( |
24 | target_arch = "powerpc64" , |
25 | feature = "fallback" , |
26 | not(portable_atomic_no_outline_atomics), |
27 | any( |
28 | all( |
29 | target_os = "linux" , |
30 | any( |
31 | all( |
32 | target_env = "gnu" , |
33 | any(target_endian = "little" , not(target_feature = "crt-static" )), |
34 | ), |
35 | all( |
36 | any(target_env = "musl" , target_env = "ohos" , target_env = "uclibc" ), |
37 | not(target_feature = "crt-static" ), |
38 | ), |
39 | portable_atomic_outline_atomics, |
40 | ), |
41 | ), |
42 | target_os = "android" , |
43 | target_os = "freebsd" , |
44 | target_os = "openbsd" , |
45 | ), |
46 | not(any(miri, portable_atomic_sanitize_thread)), |
47 | ), |
48 | all( |
49 | target_arch = "riscv32" , |
50 | not(any(miri, portable_atomic_sanitize_thread)), |
51 | not(portable_atomic_no_asm), |
52 | any( |
53 | target_feature = "experimental-zacas" , |
54 | portable_atomic_target_feature = "experimental-zacas" , |
55 | all( |
56 | feature = "fallback" , |
57 | not(portable_atomic_no_outline_atomics), |
58 | any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default |
59 | any(target_os = "linux" , target_os = "android" ), |
60 | ), |
61 | ), |
62 | ), |
63 | all( |
64 | target_arch = "riscv64" , |
65 | not(portable_atomic_no_asm), |
66 | any( |
67 | target_feature = "experimental-zacas" , |
68 | portable_atomic_target_feature = "experimental-zacas" , |
69 | all( |
70 | feature = "fallback" , |
71 | not(portable_atomic_no_outline_atomics), |
72 | any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default |
73 | any(target_os = "linux" , target_os = "android" ), |
74 | not(any(miri, portable_atomic_sanitize_thread)), |
75 | ), |
76 | ), |
77 | ), |
78 | all( |
79 | target_arch = "arm" , |
80 | any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), |
81 | any(target_os = "linux" , target_os = "android" ), |
82 | not(portable_atomic_no_outline_atomics), |
83 | ), |
84 | ), |
85 | allow(dead_code) |
86 | )] |
87 | |
88 | #[macro_use ] |
89 | pub(crate) mod utils; |
90 | |
91 | // Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap |
92 | // around. |
93 | // |
94 | // In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be |
95 | // vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the |
96 | // counter will not be increased that fast. |
97 | // |
98 | // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, |
99 | // AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast, |
100 | // so use it to implement normal sequence lock. |
101 | cfg_has_fast_atomic_64! { |
102 | mod seq_lock; |
103 | } |
104 | cfg_no_fast_atomic_64! { |
105 | #[path = "seq_lock_wide.rs" ] |
106 | mod seq_lock; |
107 | } |
108 | |
109 | use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; |
110 | |
111 | use self::{ |
112 | seq_lock::{SeqLock, SeqLockWriteGuard}, |
113 | utils::CachePadded, |
114 | }; |
115 | |
116 | // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, |
117 | // AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast, |
118 | // so use it to reduce chunks of byte-wise atomic memcpy. |
119 | use self::seq_lock::{AtomicChunk, Chunk}; |
120 | |
121 | // Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016. |
122 | #[inline ] |
123 | #[must_use ] |
124 | fn lock(addr: usize) -> &'static SeqLock { |
125 | // The number of locks is a prime number because we want to make sure `addr % LEN` gets |
126 | // dispersed across all locks. |
127 | // |
128 | // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded, |
129 | // so the actual concurrency level will be smaller. |
130 | const LEN: usize = 67; |
131 | const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new()); |
132 | static LOCKS: [CachePadded<SeqLock>; LEN] = [ |
133 | L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, |
134 | L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, |
135 | L, L, L, L, L, L, L, |
136 | ]; |
137 | |
138 | // If the modulus is a constant number, the compiler will use crazy math to transform this into |
139 | // a sequence of cheap arithmetic operations rather than using the slow modulo instruction. |
140 | &LOCKS[addr % LEN] |
141 | } |
142 | |
143 | macro_rules! atomic { |
144 | ($atomic_type:ident, $int_type:ident, $align:literal) => { |
145 | #[repr(C, align($align))] |
146 | pub(crate) struct $atomic_type { |
147 | v: UnsafeCell<$int_type>, |
148 | } |
149 | |
150 | impl $atomic_type { |
151 | const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>(); |
152 | |
153 | #[inline] |
154 | unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] { |
155 | static_assert!($atomic_type::LEN > 1); |
156 | static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0); |
157 | |
158 | // SAFETY: the caller must uphold the safety contract for `chunks`. |
159 | unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) } |
160 | } |
161 | |
162 | #[inline] |
163 | fn optimistic_read(&self) -> $int_type { |
164 | // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M |
165 | let mut dst: [Chunk; Self::LEN] = [0; Self::LEN]; |
166 | // SAFETY: |
167 | // - There are no threads that perform non-atomic concurrent write operations. |
168 | // - There is no writer that updates the value using atomic operations of different granularity. |
169 | // |
170 | // If the atomic operation is not used here, it will cause a data race |
171 | // when `write` performs concurrent write operation. |
172 | // Such a data race is sometimes considered virtually unproblematic |
173 | // in SeqLock implementations: |
174 | // |
175 | // - https://github.com/Amanieu/seqlock/issues/2 |
176 | // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116 |
177 | // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F |
178 | // |
179 | // However, in our use case, the implementation that loads/stores value as |
180 | // chunks of usize is enough fast and sound, so we use that implementation. |
181 | // |
182 | // See also atomic-memcpy crate, a generic implementation of this pattern: |
183 | // https://github.com/taiki-e/atomic-memcpy |
184 | let chunks = unsafe { self.chunks() }; |
185 | for i in 0..Self::LEN { |
186 | dst[i] = chunks[i].load(Ordering::Relaxed); |
187 | } |
188 | // SAFETY: integers are plain old data types so we can always transmute to them. |
189 | unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) } |
190 | } |
191 | |
192 | #[inline] |
193 | fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type { |
194 | // This calls optimistic_read that can return teared value, but the resulting value |
195 | // is guaranteed not to be teared because we hold the lock to write. |
196 | self.optimistic_read() |
197 | } |
198 | |
199 | #[inline] |
200 | fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) { |
201 | // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers. |
202 | let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) }; |
203 | // SAFETY: |
204 | // - The guard guarantees that we hold the lock to write. |
205 | // - There are no threads that perform non-atomic concurrent read or write operations. |
206 | // |
207 | // See optimistic_read for the reason that atomic operations are used here. |
208 | let chunks = unsafe { self.chunks() }; |
209 | for i in 0..Self::LEN { |
210 | chunks[i].store(val[i], Ordering::Relaxed); |
211 | } |
212 | } |
213 | } |
214 | |
215 | // Send is implicitly implemented. |
216 | // SAFETY: any data races are prevented by the lock and atomic operation. |
217 | unsafe impl Sync for $atomic_type {} |
218 | |
219 | impl_default_no_fetch_ops!($atomic_type, $int_type); |
220 | impl_default_bit_opts!($atomic_type, $int_type); |
221 | impl $atomic_type { |
222 | #[inline] |
223 | pub(crate) const fn new(v: $int_type) -> Self { |
224 | Self { v: UnsafeCell::new(v) } |
225 | } |
226 | |
227 | #[inline] |
228 | pub(crate) fn is_lock_free() -> bool { |
229 | Self::IS_ALWAYS_LOCK_FREE |
230 | } |
231 | pub(crate) const IS_ALWAYS_LOCK_FREE: bool = false; |
232 | |
233 | #[inline] |
234 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
235 | pub(crate) fn load(&self, order: Ordering) -> $int_type { |
236 | crate::utils::assert_load_ordering(order); |
237 | let lock = lock(self.v.get() as usize); |
238 | |
239 | // Try doing an optimistic read first. |
240 | if let Some(stamp) = lock.optimistic_read() { |
241 | let val = self.optimistic_read(); |
242 | |
243 | if lock.validate_read(stamp) { |
244 | return val; |
245 | } |
246 | } |
247 | |
248 | // Grab a regular write lock so that writers don't starve this load. |
249 | let guard = lock.write(); |
250 | let val = self.read(&guard); |
251 | // The value hasn't been changed. Drop the guard without incrementing the stamp. |
252 | guard.abort(); |
253 | val |
254 | } |
255 | |
256 | #[inline] |
257 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
258 | pub(crate) fn store(&self, val: $int_type, order: Ordering) { |
259 | crate::utils::assert_store_ordering(order); |
260 | let guard = lock(self.v.get() as usize).write(); |
261 | self.write(val, &guard) |
262 | } |
263 | |
264 | #[inline] |
265 | pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { |
266 | let guard = lock(self.v.get() as usize).write(); |
267 | let prev = self.read(&guard); |
268 | self.write(val, &guard); |
269 | prev |
270 | } |
271 | |
272 | #[inline] |
273 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
274 | pub(crate) fn compare_exchange( |
275 | &self, |
276 | current: $int_type, |
277 | new: $int_type, |
278 | success: Ordering, |
279 | failure: Ordering, |
280 | ) -> Result<$int_type, $int_type> { |
281 | crate::utils::assert_compare_exchange_ordering(success, failure); |
282 | let guard = lock(self.v.get() as usize).write(); |
283 | let prev = self.read(&guard); |
284 | if prev == current { |
285 | self.write(new, &guard); |
286 | Ok(prev) |
287 | } else { |
288 | // The value hasn't been changed. Drop the guard without incrementing the stamp. |
289 | guard.abort(); |
290 | Err(prev) |
291 | } |
292 | } |
293 | |
294 | #[inline] |
295 | #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] |
296 | pub(crate) fn compare_exchange_weak( |
297 | &self, |
298 | current: $int_type, |
299 | new: $int_type, |
300 | success: Ordering, |
301 | failure: Ordering, |
302 | ) -> Result<$int_type, $int_type> { |
303 | self.compare_exchange(current, new, success, failure) |
304 | } |
305 | |
306 | #[inline] |
307 | pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { |
308 | let guard = lock(self.v.get() as usize).write(); |
309 | let prev = self.read(&guard); |
310 | self.write(prev.wrapping_add(val), &guard); |
311 | prev |
312 | } |
313 | |
314 | #[inline] |
315 | pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { |
316 | let guard = lock(self.v.get() as usize).write(); |
317 | let prev = self.read(&guard); |
318 | self.write(prev.wrapping_sub(val), &guard); |
319 | prev |
320 | } |
321 | |
322 | #[inline] |
323 | pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { |
324 | let guard = lock(self.v.get() as usize).write(); |
325 | let prev = self.read(&guard); |
326 | self.write(prev & val, &guard); |
327 | prev |
328 | } |
329 | |
330 | #[inline] |
331 | pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { |
332 | let guard = lock(self.v.get() as usize).write(); |
333 | let prev = self.read(&guard); |
334 | self.write(!(prev & val), &guard); |
335 | prev |
336 | } |
337 | |
338 | #[inline] |
339 | pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { |
340 | let guard = lock(self.v.get() as usize).write(); |
341 | let prev = self.read(&guard); |
342 | self.write(prev | val, &guard); |
343 | prev |
344 | } |
345 | |
346 | #[inline] |
347 | pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { |
348 | let guard = lock(self.v.get() as usize).write(); |
349 | let prev = self.read(&guard); |
350 | self.write(prev ^ val, &guard); |
351 | prev |
352 | } |
353 | |
354 | #[inline] |
355 | pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { |
356 | let guard = lock(self.v.get() as usize).write(); |
357 | let prev = self.read(&guard); |
358 | self.write(core::cmp::max(prev, val), &guard); |
359 | prev |
360 | } |
361 | |
362 | #[inline] |
363 | pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { |
364 | let guard = lock(self.v.get() as usize).write(); |
365 | let prev = self.read(&guard); |
366 | self.write(core::cmp::min(prev, val), &guard); |
367 | prev |
368 | } |
369 | |
370 | #[inline] |
371 | pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { |
372 | let guard = lock(self.v.get() as usize).write(); |
373 | let prev = self.read(&guard); |
374 | self.write(!prev, &guard); |
375 | prev |
376 | } |
377 | #[inline] |
378 | pub(crate) fn not(&self, order: Ordering) { |
379 | self.fetch_not(order); |
380 | } |
381 | |
382 | #[inline] |
383 | pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { |
384 | let guard = lock(self.v.get() as usize).write(); |
385 | let prev = self.read(&guard); |
386 | self.write(prev.wrapping_neg(), &guard); |
387 | prev |
388 | } |
389 | #[inline] |
390 | pub(crate) fn neg(&self, order: Ordering) { |
391 | self.fetch_neg(order); |
392 | } |
393 | |
394 | #[inline] |
395 | pub(crate) const fn as_ptr(&self) -> *mut $int_type { |
396 | self.v.get() |
397 | } |
398 | } |
399 | }; |
400 | } |
401 | |
402 | #[cfg_attr (portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] |
403 | #[cfg_attr ( |
404 | not(portable_atomic_no_cfg_target_has_atomic), |
405 | cfg(any( |
406 | test, |
407 | not(any( |
408 | target_has_atomic = "64" , |
409 | all( |
410 | target_arch = "riscv32" , |
411 | not(any(miri, portable_atomic_sanitize_thread)), |
412 | not(portable_atomic_no_asm), |
413 | any( |
414 | target_feature = "experimental-zacas" , |
415 | portable_atomic_target_feature = "experimental-zacas" , |
416 | ), |
417 | ), |
418 | )) |
419 | )) |
420 | )] |
421 | cfg_no_fast_atomic_64! { |
422 | atomic!(AtomicI64, i64, 8); |
423 | atomic!(AtomicU64, u64, 8); |
424 | } |
425 | |
426 | atomic!(AtomicI128, i128, 16); |
427 | atomic!(AtomicU128, u128, 16); |
428 | |
429 | #[cfg (test)] |
430 | mod tests { |
431 | use super::*; |
432 | |
433 | cfg_no_fast_atomic_64! { |
434 | test_atomic_int!(i64); |
435 | test_atomic_int!(u64); |
436 | } |
437 | test_atomic_int!(i128); |
438 | test_atomic_int!(u128); |
439 | |
440 | // load/store/swap implementation is not affected by signedness, so it is |
441 | // enough to test only unsigned types. |
442 | cfg_no_fast_atomic_64! { |
443 | stress_test!(u64); |
444 | } |
445 | stress_test!(u128); |
446 | } |
447 | |