| 1 | use std::{ |
| 2 | cell::UnsafeCell, |
| 3 | mem::{ManuallyDrop, MaybeUninit}, |
| 4 | }; |
| 5 | |
| 6 | /// Defers input usage and output drop during benchmarking. |
| 7 | /// |
| 8 | /// To reduce memory usage, this only allocates storage for inputs if outputs do |
| 9 | /// not need deferred drop. |
| 10 | pub(crate) union DeferStore<I, O> { |
| 11 | /// The variant used if outputs need to be dropped. |
| 12 | /// |
| 13 | /// Inputs are stored are stored contiguously with outputs in memory. This |
| 14 | /// improves performance by: |
| 15 | /// - Removing the overhead of `zip` between two separate buffers. |
| 16 | /// - Improving cache locality and cache prefetching. Input is strategically |
| 17 | /// placed before output because iteration is from low to high addresses, |
| 18 | /// so doing this makes memory access patterns very predictable. |
| 19 | slots: ManuallyDrop<Vec<DeferSlot<I, O>>>, |
| 20 | |
| 21 | /// The variant used if `Self::ONLY_INPUTS`, i.e. outputs do not need to be |
| 22 | /// dropped. |
| 23 | inputs: ManuallyDrop<Vec<DeferSlotItem<I>>>, |
| 24 | } |
| 25 | |
| 26 | impl<I, O> Drop for DeferStore<I, O> { |
| 27 | #[inline ] |
| 28 | fn drop(&mut self) { |
| 29 | // SAFETY: The correct variant is used based on `ONLY_INPUTS`. |
| 30 | unsafe { |
| 31 | if Self::ONLY_INPUTS { |
| 32 | ManuallyDrop::drop(&mut self.inputs) |
| 33 | } else { |
| 34 | ManuallyDrop::drop(&mut self.slots) |
| 35 | } |
| 36 | } |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | impl<I, O> Default for DeferStore<I, O> { |
| 41 | #[inline ] |
| 42 | fn default() -> Self { |
| 43 | // SAFETY: The correct variant is used based on `ONLY_INPUTS`. |
| 44 | unsafe { |
| 45 | if Self::ONLY_INPUTS { |
| 46 | Self { inputs: ManuallyDrop::new(Vec::new()) } |
| 47 | } else { |
| 48 | Self { slots: ManuallyDrop::new(Vec::new()) } |
| 49 | } |
| 50 | } |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | impl<I, O> DeferStore<I, O> { |
| 55 | /// Whether only inputs need to be deferred. |
| 56 | /// |
| 57 | /// If `true`, outputs do not get inserted into `DeferStore`. |
| 58 | const ONLY_INPUTS: bool = !std::mem::needs_drop::<O>(); |
| 59 | |
| 60 | /// Prepares storage for iterating over `DeferSlot`s for a sample. |
| 61 | #[inline ] |
| 62 | pub fn prepare(&mut self, sample_size: usize) { |
| 63 | // Common implementation regardless of `Vec` item type. |
| 64 | macro_rules! imp { |
| 65 | ($vec:expr) => {{ |
| 66 | $vec.clear(); |
| 67 | $vec.reserve_exact(sample_size); |
| 68 | |
| 69 | // SAFETY: `Vec` only contains `MaybeUninit` fields, so values |
| 70 | // may be safely created from uninitialized memory. |
| 71 | unsafe { $vec.set_len(sample_size) } |
| 72 | }}; |
| 73 | } |
| 74 | |
| 75 | // SAFETY: The correct variant is used based on `ONLY_INPUTS`. |
| 76 | unsafe { |
| 77 | if Self::ONLY_INPUTS { |
| 78 | imp!(self.inputs) |
| 79 | } else { |
| 80 | imp!(self.slots) |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | /// Returns the sample's slots for iteration. |
| 86 | /// |
| 87 | /// The caller is expected to use the returned slice to initialize inputs |
| 88 | /// for the sample loop. |
| 89 | /// |
| 90 | /// This returns `Err` containing only input slots if `O` does not need |
| 91 | /// deferred drop. Ideally this would be implemented directly on `DeferSlot` |
| 92 | /// but there's no way to change its size based on `needs_drop::<O>()`. |
| 93 | #[inline (always)] |
| 94 | pub fn slots(&self) -> Result<&[DeferSlot<I, O>], &[DeferSlotItem<I>]> { |
| 95 | unsafe { |
| 96 | if Self::ONLY_INPUTS { |
| 97 | Err(&self.inputs) |
| 98 | } else { |
| 99 | Ok(&self.slots) |
| 100 | } |
| 101 | } |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | /// Storage for a single iteration within a sample. |
| 106 | /// |
| 107 | /// Input is stored before output to improve cache prefetching since iteration |
| 108 | /// progresses from low to high addresses. |
| 109 | /// |
| 110 | /// # UnsafeCell |
| 111 | /// |
| 112 | /// `UnsafeCell` is used to allow `output` to safely refer to `input`. Although |
| 113 | /// `output` itself is never aliased, it is also stored as `UnsafeCell` in order |
| 114 | /// to get mutable access through a shared `&DeferSlot`. |
| 115 | /// |
| 116 | /// # Safety |
| 117 | /// |
| 118 | /// All fields **must** be `MaybeUninit`. This allows us to safely set the |
| 119 | /// length of `Vec<DeferSlot>` within the allocated capacity. |
| 120 | #[repr (C)] |
| 121 | pub(crate) struct DeferSlot<I, O> { |
| 122 | pub input: DeferSlotItem<I>, |
| 123 | pub output: DeferSlotItem<O>, |
| 124 | } |
| 125 | |
| 126 | type DeferSlotItem<T> = UnsafeCell<MaybeUninit<T>>; |
| 127 | |
| 128 | #[cfg (test)] |
| 129 | mod tests { |
| 130 | use super::*; |
| 131 | |
| 132 | /// Tests that accessing an uninitialized `DeferSlot` is safe due to all of |
| 133 | /// its fields being `MaybeUninit`. |
| 134 | #[test ] |
| 135 | fn access_uninit_slot() { |
| 136 | let mut slot: MaybeUninit<DeferSlot<String, String>> = MaybeUninit::uninit(); |
| 137 | |
| 138 | let slot_ref = unsafe { slot.assume_init_mut() }; |
| 139 | slot_ref.input = UnsafeCell::new(MaybeUninit::new(String::new())); |
| 140 | slot_ref.output = UnsafeCell::new(MaybeUninit::new(String::new())); |
| 141 | |
| 142 | unsafe { |
| 143 | let slot = slot.assume_init(); |
| 144 | assert_eq!(slot.input.into_inner().assume_init(), "" ); |
| 145 | assert_eq!(slot.output.into_inner().assume_init(), "" ); |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | /// Tests that accessing `DeferSlot.input` through an aliased reference in |
| 150 | /// `DeferSlot.output` is safe due `input` being an `UnsafeCell`. |
| 151 | #[test ] |
| 152 | fn access_aliased_input() { |
| 153 | struct Output<'i> { |
| 154 | input: &'i mut String, |
| 155 | } |
| 156 | |
| 157 | impl Drop for Output<'_> { |
| 158 | fn drop(&mut self) { |
| 159 | assert_eq!(self.input, "hello" ); |
| 160 | self.input.push_str(" world" ); |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | let slot: MaybeUninit<DeferSlot<String, Output>> = MaybeUninit::uninit(); |
| 165 | let slot_ref = unsafe { slot.assume_init_ref() }; |
| 166 | |
| 167 | // Loop to ensure previous iterations don't affect later uses of the |
| 168 | // same entry slot. |
| 169 | for _ in 0..5 { |
| 170 | unsafe { |
| 171 | let input_ptr = slot_ref.input.get().cast::<String>(); |
| 172 | let output_ptr = slot_ref.output.get().cast::<Output>(); |
| 173 | |
| 174 | // Initialize input and output. |
| 175 | input_ptr.write("hello" .to_owned()); |
| 176 | output_ptr.write(Output { input: &mut *input_ptr }); |
| 177 | |
| 178 | // Use and discard output. |
| 179 | assert_eq!((*output_ptr).input, "hello" ); |
| 180 | output_ptr.drop_in_place(); |
| 181 | assert_eq!(&*input_ptr, "hello world" ); |
| 182 | |
| 183 | // Discard input. |
| 184 | input_ptr.drop_in_place(); |
| 185 | } |
| 186 | } |
| 187 | } |
| 188 | } |
| 189 | |