1 | // SPDX-License-Identifier: Apache-2.0 OR MIT |
2 | |
3 | /* |
4 | Atomic operations implementation on x86/x86_64. |
5 | |
6 | This module provides atomic operations not supported by LLVM or optimizes |
7 | cases where LLVM code generation is not optimal. |
8 | |
9 | Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use |
10 | this module and use CAS loop instead. |
11 | |
12 | Refs: |
13 | - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 |
14 | |
15 | Generated asm: |
16 | - x86_64 https://godbolt.org/z/ETa1MGTP3 |
17 | */ |
18 | |
19 | #[cfg (not(portable_atomic_no_asm))] |
20 | use core::arch::asm; |
21 | use core::sync::atomic::Ordering; |
22 | |
23 | use super::core_atomic::{ |
24 | AtomicI8, AtomicI16, AtomicI32, AtomicI64, AtomicIsize, AtomicU8, AtomicU16, AtomicU32, |
25 | AtomicU64, AtomicUsize, |
26 | }; |
27 | |
28 | #[cfg (target_pointer_width = "32" )] |
29 | macro_rules! ptr_modifier { |
30 | () => { |
31 | ":e" |
32 | }; |
33 | } |
34 | #[cfg (target_pointer_width = "64" )] |
35 | macro_rules! ptr_modifier { |
36 | () => { |
37 | "" |
38 | }; |
39 | } |
40 | |
41 | macro_rules! atomic_int { |
42 | ($atomic_type:ident, $ptr_size:tt) => { |
43 | impl $atomic_type { |
44 | #[inline] |
45 | pub(crate) fn not(&self, _order: Ordering) { |
46 | let dst = self.as_ptr(); |
47 | // SAFETY: any data races are prevented by atomic intrinsics and the raw |
48 | // pointer passed in is valid because we got it from a reference. |
49 | // |
50 | // https://www.felixcloutier.com/x86/not |
51 | unsafe { |
52 | // atomic RMW is always SeqCst. |
53 | asm!( |
54 | concat!("lock not " , $ptr_size, " ptr [{dst" , ptr_modifier!(), "}]" ), |
55 | dst = in(reg) dst, |
56 | options(nostack, preserves_flags), |
57 | ); |
58 | } |
59 | } |
60 | #[inline] |
61 | pub(crate) fn neg(&self, _order: Ordering) { |
62 | let dst = self.as_ptr(); |
63 | // SAFETY: any data races are prevented by atomic intrinsics and the raw |
64 | // pointer passed in is valid because we got it from a reference. |
65 | // |
66 | // https://www.felixcloutier.com/x86/neg |
67 | unsafe { |
68 | // atomic RMW is always SeqCst. |
69 | asm!( |
70 | concat!("lock neg " , $ptr_size, " ptr [{dst" , ptr_modifier!(), "}]" ), |
71 | dst = in(reg) dst, |
72 | // Do not use `preserves_flags` because NEG modifies the CF, OF, SF, ZF, AF, and PF flag. |
73 | options(nostack), |
74 | ); |
75 | } |
76 | } |
77 | } |
78 | }; |
79 | } |
80 | |
81 | atomic_int!(AtomicI8, "byte" ); |
82 | atomic_int!(AtomicU8, "byte" ); |
83 | atomic_int!(AtomicI16, "word" ); |
84 | atomic_int!(AtomicU16, "word" ); |
85 | atomic_int!(AtomicI32, "dword" ); |
86 | atomic_int!(AtomicU32, "dword" ); |
87 | #[cfg (target_arch = "x86_64" )] |
88 | atomic_int!(AtomicI64, "qword" ); |
89 | #[cfg (target_arch = "x86_64" )] |
90 | atomic_int!(AtomicU64, "qword" ); |
91 | #[cfg (target_pointer_width = "32" )] |
92 | atomic_int!(AtomicIsize, "dword" ); |
93 | #[cfg (target_pointer_width = "32" )] |
94 | atomic_int!(AtomicUsize, "dword" ); |
95 | #[cfg (target_pointer_width = "64" )] |
96 | atomic_int!(AtomicIsize, "qword" ); |
97 | #[cfg (target_pointer_width = "64" )] |
98 | atomic_int!(AtomicUsize, "qword" ); |
99 | |
100 | #[cfg (target_arch = "x86" )] |
101 | impl AtomicI64 { |
102 | #[inline ] |
103 | pub(crate) fn not(&self, order: Ordering) { |
104 | self.fetch_not(order); |
105 | } |
106 | #[inline ] |
107 | pub(crate) fn neg(&self, order: Ordering) { |
108 | self.fetch_neg(order); |
109 | } |
110 | } |
111 | #[cfg (target_arch = "x86" )] |
112 | impl AtomicU64 { |
113 | #[inline ] |
114 | pub(crate) fn not(&self, order: Ordering) { |
115 | self.fetch_not(order); |
116 | } |
117 | #[inline ] |
118 | pub(crate) fn neg(&self, order: Ordering) { |
119 | self.fetch_neg(order); |
120 | } |
121 | } |
122 | |
123 | macro_rules! atomic_bit_opts { |
124 | ($atomic_type:ident, $int_type:ident, $val_modifier:tt, $ptr_size:tt) => { |
125 | // LLVM 14 and older don't support generating `lock bt{s,r,c}`. |
126 | // LLVM 15 only supports generating `lock bt{s,r,c}` for immediate bit offsets. |
127 | // LLVM 16+ can generate `lock bt{s,r,c}` for both immediate and register bit offsets. |
128 | // https://godbolt.org/z/TGhr5z4ds |
129 | // So, use fetch_* based implementations on LLVM 16+, otherwise use asm based implementations. |
130 | #[cfg(not(portable_atomic_pre_llvm_16))] |
131 | impl_default_bit_opts!($atomic_type, $int_type); |
132 | #[cfg(portable_atomic_pre_llvm_16)] |
133 | impl $atomic_type { |
134 | // `<integer>::BITS` requires Rust 1.53 |
135 | const BITS: u32 = (core::mem::size_of::<$int_type>() * 8) as u32; |
136 | #[inline] |
137 | pub(crate) fn bit_set(&self, bit: u32, _order: Ordering) -> bool { |
138 | let dst = self.as_ptr(); |
139 | // SAFETY: any data races are prevented by atomic intrinsics and the raw |
140 | // pointer passed in is valid because we got it from a reference. |
141 | // the masking by the bit size of the type ensures that we do not shift |
142 | // out of bounds. |
143 | // |
144 | // https://www.felixcloutier.com/x86/bts |
145 | unsafe { |
146 | let r: u8; |
147 | // atomic RMW is always SeqCst. |
148 | asm!( |
149 | concat!("lock bts " , $ptr_size, " ptr [{dst" , ptr_modifier!(), "}], {bit" , $val_modifier, "}" ), |
150 | "setb {r}" , |
151 | dst = in(reg) dst, |
152 | bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, |
153 | r = out(reg_byte) r, |
154 | // Do not use `preserves_flags` because BTS modifies the CF flag. |
155 | options(nostack), |
156 | ); |
157 | crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test |
158 | r != 0 |
159 | } |
160 | } |
161 | #[inline] |
162 | pub(crate) fn bit_clear(&self, bit: u32, _order: Ordering) -> bool { |
163 | let dst = self.as_ptr(); |
164 | // SAFETY: any data races are prevented by atomic intrinsics and the raw |
165 | // pointer passed in is valid because we got it from a reference. |
166 | // the masking by the bit size of the type ensures that we do not shift |
167 | // out of bounds. |
168 | // |
169 | // https://www.felixcloutier.com/x86/btr |
170 | unsafe { |
171 | let r: u8; |
172 | // atomic RMW is always SeqCst. |
173 | asm!( |
174 | concat!("lock btr " , $ptr_size, " ptr [{dst" , ptr_modifier!(), "}], {bit" , $val_modifier, "}" ), |
175 | "setb {r}" , |
176 | dst = in(reg) dst, |
177 | bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, |
178 | r = out(reg_byte) r, |
179 | // Do not use `preserves_flags` because BTR modifies the CF flag. |
180 | options(nostack), |
181 | ); |
182 | crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test |
183 | r != 0 |
184 | } |
185 | } |
186 | #[inline] |
187 | pub(crate) fn bit_toggle(&self, bit: u32, _order: Ordering) -> bool { |
188 | let dst = self.as_ptr(); |
189 | // SAFETY: any data races are prevented by atomic intrinsics and the raw |
190 | // pointer passed in is valid because we got it from a reference. |
191 | // the masking by the bit size of the type ensures that we do not shift |
192 | // out of bounds. |
193 | // |
194 | // https://www.felixcloutier.com/x86/btc |
195 | unsafe { |
196 | let r: u8; |
197 | // atomic RMW is always SeqCst. |
198 | asm!( |
199 | concat!("lock btc " , $ptr_size, " ptr [{dst" , ptr_modifier!(), "}], {bit" , $val_modifier, "}" ), |
200 | "setb {r}" , |
201 | dst = in(reg) dst, |
202 | bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, |
203 | r = out(reg_byte) r, |
204 | // Do not use `preserves_flags` because BTC modifies the CF flag. |
205 | options(nostack), |
206 | ); |
207 | crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test |
208 | r != 0 |
209 | } |
210 | } |
211 | } |
212 | }; |
213 | } |
214 | |
215 | impl_default_bit_opts!(AtomicI8, i8); |
216 | impl_default_bit_opts!(AtomicU8, u8); |
217 | atomic_bit_opts!(AtomicI16, i16, ":x" , "word" ); |
218 | atomic_bit_opts!(AtomicU16, u16, ":x" , "word" ); |
219 | atomic_bit_opts!(AtomicI32, i32, ":e" , "dword" ); |
220 | atomic_bit_opts!(AtomicU32, u32, ":e" , "dword" ); |
221 | #[cfg (target_arch = "x86_64" )] |
222 | atomic_bit_opts!(AtomicI64, i64, "" , "qword" ); |
223 | #[cfg (target_arch = "x86_64" )] |
224 | atomic_bit_opts!(AtomicU64, u64, "" , "qword" ); |
225 | #[cfg (target_arch = "x86" )] |
226 | impl_default_bit_opts!(AtomicI64, i64); |
227 | #[cfg (target_arch = "x86" )] |
228 | impl_default_bit_opts!(AtomicU64, u64); |
229 | #[cfg (target_pointer_width = "32" )] |
230 | atomic_bit_opts!(AtomicIsize, isize, ":e" , "dword" ); |
231 | #[cfg (target_pointer_width = "32" )] |
232 | atomic_bit_opts!(AtomicUsize, usize, ":e" , "dword" ); |
233 | #[cfg (target_pointer_width = "64" )] |
234 | atomic_bit_opts!(AtomicIsize, isize, "" , "qword" ); |
235 | #[cfg (target_pointer_width = "64" )] |
236 | atomic_bit_opts!(AtomicUsize, usize, "" , "qword" ); |
237 | |