| 1 | //! The half-lock structure |
| 2 | //! |
| 3 | //! We need a way to protect the structure with configured hooks ‒ a signal may happen in arbitrary |
| 4 | //! thread and needs to read them while another thread might be manipulating the structure. |
| 5 | //! |
| 6 | //! Under ordinary circumstances we would be happy to just use `Mutex<HashMap<c_int, _>>`. However, |
| 7 | //! as we use it in the signal handler, we are severely limited in what we can or can't use. So we |
| 8 | //! choose to implement kind of spin-look thing with atomics. |
| 9 | //! |
| 10 | //! In the reader it is always simply locked and then unlocked, making sure it doesn't disappear |
| 11 | //! while in use. |
| 12 | //! |
| 13 | //! The writer has a separate mutex (that prevents other writers; this is used outside of the |
| 14 | //! signal handler), makes a copy of the data and swaps an atomic pointer to the data structure. |
| 15 | //! But it waits until everything is unlocked (no signal handler has the old data) for dropping the |
| 16 | //! old instance. There's a generation trick to make sure that new signal locks another instance. |
| 17 | //! |
| 18 | //! The downside is, this is an active spin lock at the writer end. However, we assume than: |
| 19 | //! |
| 20 | //! * Signals are one time setup before we actually have threads. We just need to make *sure* we |
| 21 | //! are safe even if this is not true. |
| 22 | //! * Signals are rare, happening at the same time as the write even rarer. |
| 23 | //! * Signals are short, as there is mostly nothing allowed inside them anyway. |
| 24 | //! * Our tool box is severely limited. |
| 25 | //! |
| 26 | //! Therefore this is hopefully reasonable trade-off. |
| 27 | //! |
| 28 | //! # Atomic orderings |
| 29 | //! |
| 30 | //! The whole code uses SeqCst conservatively. Atomics are not used because of performance here and |
| 31 | //! are the minor price around signals anyway. But the comments state which orderings should be |
| 32 | //! enough in practice in case someone wants to get inspired (but do make your own check through |
| 33 | //! them anyway). |
| 34 | |
| 35 | use std::isize; |
| 36 | use std::marker::PhantomData; |
| 37 | use std::ops::Deref; |
| 38 | use std::sync::atomic::{self, AtomicPtr, AtomicUsize, Ordering}; |
| 39 | use std::sync::{Mutex, MutexGuard, PoisonError}; |
| 40 | use std::thread; |
| 41 | |
| 42 | use libc; |
| 43 | |
| 44 | const YIELD_EVERY: usize = 16; |
| 45 | const MAX_GUARDS: usize = (isize::MAX) as usize; |
| 46 | |
| 47 | pub(crate) struct ReadGuard<'a, T: 'a> { |
| 48 | data: &'a T, |
| 49 | lock: &'a AtomicUsize, |
| 50 | } |
| 51 | |
| 52 | impl<'a, T> Deref for ReadGuard<'a, T> { |
| 53 | type Target = T; |
| 54 | fn deref(&self) -> &T { |
| 55 | self.data |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | impl<'a, T> Drop for ReadGuard<'a, T> { |
| 60 | fn drop(&mut self) { |
| 61 | // We effectively unlock; Release would be enough. |
| 62 | self.lock.fetch_sub(val:1, order:Ordering::SeqCst); |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | pub(crate) struct WriteGuard<'a, T: 'a> { |
| 67 | _guard: MutexGuard<'a, ()>, |
| 68 | lock: &'a HalfLock<T>, |
| 69 | data: &'a T, |
| 70 | } |
| 71 | |
| 72 | impl<'a, T> WriteGuard<'a, T> { |
| 73 | pub(crate) fn store(&mut self, val: T) { |
| 74 | // Move to the heap and convert to raw pointer for AtomicPtr. |
| 75 | let new: *mut T = Box::into_raw(Box::new(val)); |
| 76 | |
| 77 | self.data = unsafe { &*new }; |
| 78 | |
| 79 | // We can just put the new value in here safely, we worry only about dropping the old one. |
| 80 | // Release might (?) be enough, to "upload" the data. |
| 81 | let old: *mut T = self.lock.data.swap(ptr:new, order:Ordering::SeqCst); |
| 82 | |
| 83 | // Now we make sure there's no reader having the old data. |
| 84 | self.lock.write_barrier(); |
| 85 | |
| 86 | drop(unsafe { Box::from_raw(old) }); |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | impl<'a, T> Deref for WriteGuard<'a, T> { |
| 91 | type Target = T; |
| 92 | fn deref(&self) -> &T { |
| 93 | // Protected by that mutex |
| 94 | self.data |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | pub(crate) struct HalfLock<T> { |
| 99 | // We conceptually contain an instance of T |
| 100 | _t: PhantomData<T>, |
| 101 | // The actual data as a pointer. |
| 102 | data: AtomicPtr<T>, |
| 103 | // The generation of the data. Influences which slot of the lock counter we use. |
| 104 | generation: AtomicUsize, |
| 105 | // How many active locks are there? |
| 106 | lock: [AtomicUsize; 2], |
| 107 | // Mutex for the writers; only one writer. |
| 108 | write_mutex: Mutex<()>, |
| 109 | } |
| 110 | |
| 111 | impl<T> HalfLock<T> { |
| 112 | pub(crate) fn new(data: T) -> Self { |
| 113 | // Move to the heap so we can safely point there. Then convert to raw pointer as AtomicPtr |
| 114 | // operates on raw pointers. The AtomicPtr effectively acts like Box for us semantically. |
| 115 | let ptr = Box::into_raw(Box::new(data)); |
| 116 | Self { |
| 117 | _t: PhantomData, |
| 118 | data: AtomicPtr::new(ptr), |
| 119 | generation: AtomicUsize::new(0), |
| 120 | lock: [AtomicUsize::new(0), AtomicUsize::new(0)], |
| 121 | write_mutex: Mutex::new(()), |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | pub(crate) fn read(&self) -> ReadGuard<T> { |
| 126 | // Relaxed should be enough; we only pick one or the other slot and the writer observes |
| 127 | // that both were 0 at some time. So the actual value doesn't really matter for safety, |
| 128 | // only the changing improves the performance. |
| 129 | let gen = self.generation.load(Ordering::SeqCst); |
| 130 | let lock = &self.lock[gen % 2]; |
| 131 | // Effectively locking something, acquire should be enough. |
| 132 | let guard_cnt = lock.fetch_add(1, Ordering::SeqCst); |
| 133 | |
| 134 | // This is to prevent overflowing the counter in some degenerate cases, which could lead to |
| 135 | // UB (freeing data while still in use). However, as this data structure is used only |
| 136 | // internally and it's not possible to leak the guard and the guard itself takes some |
| 137 | // memory, it should be really impossible to trigger this case. Still, we include it from |
| 138 | // abundance of caution. |
| 139 | // |
| 140 | // This technically is not fully correct as enough threads being in between here and the |
| 141 | // abort below could still overflow it and it could get freed for some *other* thread, but |
| 142 | // that would mean having too many active threads to fit into RAM too and is even more |
| 143 | // absurd corner case than the above. |
| 144 | if guard_cnt > MAX_GUARDS { |
| 145 | unsafe { libc::abort() }; |
| 146 | } |
| 147 | |
| 148 | // Acquire should be enough; we need to "download" the data, paired with the swap on the |
| 149 | // same pointer. |
| 150 | let data = self.data.load(Ordering::SeqCst); |
| 151 | // Safe: |
| 152 | // * It did point to valid data when put in. |
| 153 | // * Protected by lock, so still valid. |
| 154 | let data = unsafe { &*data }; |
| 155 | |
| 156 | ReadGuard { data, lock } |
| 157 | } |
| 158 | |
| 159 | fn update_seen(&self, seen_zero: &mut [bool; 2]) { |
| 160 | for (seen, slot) in seen_zero.iter_mut().zip(&self.lock) { |
| 161 | *seen = *seen || slot.load(Ordering::SeqCst) == 0; |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | fn write_barrier(&self) { |
| 166 | // Do a first check of seeing zeroes before we switch the generation. At least one of them |
| 167 | // should be zero by now, due to having drained the generation before leaving the previous |
| 168 | // writer. |
| 169 | let mut seen_zero = [false; 2]; |
| 170 | self.update_seen(&mut seen_zero); |
| 171 | // By switching the generation to the other slot, we make sure the currently active starts |
| 172 | // draining while the other will start filling up. |
| 173 | self.generation.fetch_add(1, Ordering::SeqCst); // Overflow is fine. |
| 174 | |
| 175 | let mut iter = 0usize; |
| 176 | while !seen_zero.iter().all(|s| *s) { |
| 177 | iter = iter.wrapping_add(1); |
| 178 | |
| 179 | // Be somewhat less aggressive while looping, switch to the other threads if possible. |
| 180 | if cfg!(not(miri)) { |
| 181 | if iter % YIELD_EVERY == 0 { |
| 182 | thread::yield_now(); |
| 183 | } else { |
| 184 | // Replaced by hint::spin_loop, but we want to support older compiler |
| 185 | #[allow (deprecated)] |
| 186 | atomic::spin_loop_hint(); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | self.update_seen(&mut seen_zero); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | pub(crate) fn write(&self) -> WriteGuard<T> { |
| 195 | // While it's possible the user code panics, our code in store doesn't and the data gets |
| 196 | // swapped atomically. So if it panics, nothing gets changed, therefore poisons are of no |
| 197 | // interest here. |
| 198 | let guard = self |
| 199 | .write_mutex |
| 200 | .lock() |
| 201 | .unwrap_or_else(PoisonError::into_inner); |
| 202 | |
| 203 | // Relaxed should be enough, as we are under the same mutex that was used to get the data |
| 204 | // in. |
| 205 | let data = self.data.load(Ordering::SeqCst); |
| 206 | // Safe: |
| 207 | // * Stored as valid data |
| 208 | // * Only this method, protected by mutex, can change the pointer, so it didn't go away. |
| 209 | let data = unsafe { &*data }; |
| 210 | |
| 211 | WriteGuard { |
| 212 | data, |
| 213 | _guard: guard, |
| 214 | lock: self, |
| 215 | } |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | impl<T> Drop for HalfLock<T> { |
| 220 | fn drop(&mut self) { |
| 221 | // During drop we are sure there are no other borrows of the data so we are free to just |
| 222 | // drop it. Also, the drop impl won't be called in practice in our case, as it is used |
| 223 | // solely as a global variable, but we provide it for completeness and tests anyway. |
| 224 | // |
| 225 | // unsafe: the pointer in there is always valid, we just take the last instance out. |
| 226 | unsafe { |
| 227 | // Acquire should be enough. |
| 228 | let data: Box = Box::from_raw(self.data.load(order:Ordering::SeqCst)); |
| 229 | drop(data); |
| 230 | } |
| 231 | } |
| 232 | } |
| 233 | |