1 | use std::{ |
2 | cell::UnsafeCell, |
3 | mem::{ManuallyDrop, MaybeUninit}, |
4 | }; |
5 | |
6 | /// Defers input usage and output drop during benchmarking. |
7 | /// |
8 | /// To reduce memory usage, this only allocates storage for inputs if outputs do |
9 | /// not need deferred drop. |
10 | pub(crate) union DeferStore<I, O> { |
11 | /// The variant used if outputs need to be dropped. |
12 | /// |
13 | /// Inputs are stored are stored contiguously with outputs in memory. This |
14 | /// improves performance by: |
15 | /// - Removing the overhead of `zip` between two separate buffers. |
16 | /// - Improving cache locality and cache prefetching. Input is strategically |
17 | /// placed before output because iteration is from low to high addresses, |
18 | /// so doing this makes memory access patterns very predictable. |
19 | slots: ManuallyDrop<Vec<DeferSlot<I, O>>>, |
20 | |
21 | /// The variant used if `Self::ONLY_INPUTS`, i.e. outputs do not need to be |
22 | /// dropped. |
23 | inputs: ManuallyDrop<Vec<DeferSlotItem<I>>>, |
24 | } |
25 | |
26 | impl<I, O> Drop for DeferStore<I, O> { |
27 | #[inline ] |
28 | fn drop(&mut self) { |
29 | // SAFETY: The correct variant is used based on `ONLY_INPUTS`. |
30 | unsafe { |
31 | if Self::ONLY_INPUTS { |
32 | ManuallyDrop::drop(&mut self.inputs) |
33 | } else { |
34 | ManuallyDrop::drop(&mut self.slots) |
35 | } |
36 | } |
37 | } |
38 | } |
39 | |
40 | impl<I, O> Default for DeferStore<I, O> { |
41 | #[inline ] |
42 | fn default() -> Self { |
43 | // SAFETY: The correct variant is used based on `ONLY_INPUTS`. |
44 | unsafe { |
45 | if Self::ONLY_INPUTS { |
46 | Self { inputs: ManuallyDrop::new(Vec::new()) } |
47 | } else { |
48 | Self { slots: ManuallyDrop::new(Vec::new()) } |
49 | } |
50 | } |
51 | } |
52 | } |
53 | |
54 | impl<I, O> DeferStore<I, O> { |
55 | /// Whether only inputs need to be deferred. |
56 | /// |
57 | /// If `true`, outputs do not get inserted into `DeferStore`. |
58 | const ONLY_INPUTS: bool = !std::mem::needs_drop::<O>(); |
59 | |
60 | /// Prepares storage for iterating over `DeferSlot`s for a sample. |
61 | #[inline ] |
62 | pub fn prepare(&mut self, sample_size: usize) { |
63 | // Common implementation regardless of `Vec` item type. |
64 | macro_rules! imp { |
65 | ($vec:expr) => {{ |
66 | $vec.clear(); |
67 | $vec.reserve_exact(sample_size); |
68 | |
69 | // SAFETY: `Vec` only contains `MaybeUninit` fields, so values |
70 | // may be safely created from uninitialized memory. |
71 | unsafe { $vec.set_len(sample_size) } |
72 | }}; |
73 | } |
74 | |
75 | // SAFETY: The correct variant is used based on `ONLY_INPUTS`. |
76 | unsafe { |
77 | if Self::ONLY_INPUTS { |
78 | imp!(self.inputs) |
79 | } else { |
80 | imp!(self.slots) |
81 | } |
82 | } |
83 | } |
84 | |
85 | /// Returns the sample's slots for iteration. |
86 | /// |
87 | /// The caller is expected to use the returned slice to initialize inputs |
88 | /// for the sample loop. |
89 | /// |
90 | /// This returns `Err` containing only input slots if `O` does not need |
91 | /// deferred drop. Ideally this would be implemented directly on `DeferSlot` |
92 | /// but there's no way to change its size based on `needs_drop::<O>()`. |
93 | #[inline (always)] |
94 | pub fn slots(&self) -> Result<&[DeferSlot<I, O>], &[DeferSlotItem<I>]> { |
95 | unsafe { |
96 | if Self::ONLY_INPUTS { |
97 | Err(&self.inputs) |
98 | } else { |
99 | Ok(&self.slots) |
100 | } |
101 | } |
102 | } |
103 | } |
104 | |
105 | /// Storage for a single iteration within a sample. |
106 | /// |
107 | /// Input is stored before output to improve cache prefetching since iteration |
108 | /// progresses from low to high addresses. |
109 | /// |
110 | /// # UnsafeCell |
111 | /// |
112 | /// `UnsafeCell` is used to allow `output` to safely refer to `input`. Although |
113 | /// `output` itself is never aliased, it is also stored as `UnsafeCell` in order |
114 | /// to get mutable access through a shared `&DeferSlot`. |
115 | /// |
116 | /// # Safety |
117 | /// |
118 | /// All fields **must** be `MaybeUninit`. This allows us to safely set the |
119 | /// length of `Vec<DeferSlot>` within the allocated capacity. |
120 | #[repr (C)] |
121 | pub(crate) struct DeferSlot<I, O> { |
122 | pub input: DeferSlotItem<I>, |
123 | pub output: DeferSlotItem<O>, |
124 | } |
125 | |
126 | type DeferSlotItem<T> = UnsafeCell<MaybeUninit<T>>; |
127 | |
128 | #[cfg (test)] |
129 | mod tests { |
130 | use super::*; |
131 | |
132 | /// Tests that accessing an uninitialized `DeferSlot` is safe due to all of |
133 | /// its fields being `MaybeUninit`. |
134 | #[test ] |
135 | fn access_uninit_slot() { |
136 | let mut slot: MaybeUninit<DeferSlot<String, String>> = MaybeUninit::uninit(); |
137 | |
138 | let slot_ref = unsafe { slot.assume_init_mut() }; |
139 | slot_ref.input = UnsafeCell::new(MaybeUninit::new(String::new())); |
140 | slot_ref.output = UnsafeCell::new(MaybeUninit::new(String::new())); |
141 | |
142 | unsafe { |
143 | let slot = slot.assume_init(); |
144 | assert_eq!(slot.input.into_inner().assume_init(), "" ); |
145 | assert_eq!(slot.output.into_inner().assume_init(), "" ); |
146 | } |
147 | } |
148 | |
149 | /// Tests that accessing `DeferSlot.input` through an aliased reference in |
150 | /// `DeferSlot.output` is safe due `input` being an `UnsafeCell`. |
151 | #[test ] |
152 | fn access_aliased_input() { |
153 | struct Output<'i> { |
154 | input: &'i mut String, |
155 | } |
156 | |
157 | impl Drop for Output<'_> { |
158 | fn drop(&mut self) { |
159 | assert_eq!(self.input, "hello" ); |
160 | self.input.push_str(" world" ); |
161 | } |
162 | } |
163 | |
164 | let slot: MaybeUninit<DeferSlot<String, Output>> = MaybeUninit::uninit(); |
165 | let slot_ref = unsafe { slot.assume_init_ref() }; |
166 | |
167 | // Loop to ensure previous iterations don't affect later uses of the |
168 | // same entry slot. |
169 | for _ in 0..5 { |
170 | unsafe { |
171 | let input_ptr = slot_ref.input.get().cast::<String>(); |
172 | let output_ptr = slot_ref.output.get().cast::<Output>(); |
173 | |
174 | // Initialize input and output. |
175 | input_ptr.write("hello" .to_owned()); |
176 | output_ptr.write(Output { input: &mut *input_ptr }); |
177 | |
178 | // Use and discard output. |
179 | assert_eq!((*output_ptr).input, "hello" ); |
180 | output_ptr.drop_in_place(); |
181 | assert_eq!(&*input_ptr, "hello world" ); |
182 | |
183 | // Discard input. |
184 | input_ptr.drop_in_place(); |
185 | } |
186 | } |
187 | } |
188 | } |
189 | |