1use std::{
2 cell::UnsafeCell,
3 mem::{ManuallyDrop, MaybeUninit},
4};
5
6/// Defers input usage and output drop during benchmarking.
7///
8/// To reduce memory usage, this only allocates storage for inputs if outputs do
9/// not need deferred drop.
10pub(crate) union DeferStore<I, O> {
11 /// The variant used if outputs need to be dropped.
12 ///
13 /// Inputs are stored are stored contiguously with outputs in memory. This
14 /// improves performance by:
15 /// - Removing the overhead of `zip` between two separate buffers.
16 /// - Improving cache locality and cache prefetching. Input is strategically
17 /// placed before output because iteration is from low to high addresses,
18 /// so doing this makes memory access patterns very predictable.
19 slots: ManuallyDrop<Vec<DeferSlot<I, O>>>,
20
21 /// The variant used if `Self::ONLY_INPUTS`, i.e. outputs do not need to be
22 /// dropped.
23 inputs: ManuallyDrop<Vec<DeferSlotItem<I>>>,
24}
25
26impl<I, O> Drop for DeferStore<I, O> {
27 #[inline]
28 fn drop(&mut self) {
29 // SAFETY: The correct variant is used based on `ONLY_INPUTS`.
30 unsafe {
31 if Self::ONLY_INPUTS {
32 ManuallyDrop::drop(&mut self.inputs)
33 } else {
34 ManuallyDrop::drop(&mut self.slots)
35 }
36 }
37 }
38}
39
40impl<I, O> Default for DeferStore<I, O> {
41 #[inline]
42 fn default() -> Self {
43 // SAFETY: The correct variant is used based on `ONLY_INPUTS`.
44 unsafe {
45 if Self::ONLY_INPUTS {
46 Self { inputs: ManuallyDrop::new(Vec::new()) }
47 } else {
48 Self { slots: ManuallyDrop::new(Vec::new()) }
49 }
50 }
51 }
52}
53
54impl<I, O> DeferStore<I, O> {
55 /// Whether only inputs need to be deferred.
56 ///
57 /// If `true`, outputs do not get inserted into `DeferStore`.
58 const ONLY_INPUTS: bool = !std::mem::needs_drop::<O>();
59
60 /// Prepares storage for iterating over `DeferSlot`s for a sample.
61 #[inline]
62 pub fn prepare(&mut self, sample_size: usize) {
63 // Common implementation regardless of `Vec` item type.
64 macro_rules! imp {
65 ($vec:expr) => {{
66 $vec.clear();
67 $vec.reserve_exact(sample_size);
68
69 // SAFETY: `Vec` only contains `MaybeUninit` fields, so values
70 // may be safely created from uninitialized memory.
71 unsafe { $vec.set_len(sample_size) }
72 }};
73 }
74
75 // SAFETY: The correct variant is used based on `ONLY_INPUTS`.
76 unsafe {
77 if Self::ONLY_INPUTS {
78 imp!(self.inputs)
79 } else {
80 imp!(self.slots)
81 }
82 }
83 }
84
85 /// Returns the sample's slots for iteration.
86 ///
87 /// The caller is expected to use the returned slice to initialize inputs
88 /// for the sample loop.
89 ///
90 /// This returns `Err` containing only input slots if `O` does not need
91 /// deferred drop. Ideally this would be implemented directly on `DeferSlot`
92 /// but there's no way to change its size based on `needs_drop::<O>()`.
93 #[inline(always)]
94 pub fn slots(&self) -> Result<&[DeferSlot<I, O>], &[DeferSlotItem<I>]> {
95 unsafe {
96 if Self::ONLY_INPUTS {
97 Err(&self.inputs)
98 } else {
99 Ok(&self.slots)
100 }
101 }
102 }
103}
104
105/// Storage for a single iteration within a sample.
106///
107/// Input is stored before output to improve cache prefetching since iteration
108/// progresses from low to high addresses.
109///
110/// # UnsafeCell
111///
112/// `UnsafeCell` is used to allow `output` to safely refer to `input`. Although
113/// `output` itself is never aliased, it is also stored as `UnsafeCell` in order
114/// to get mutable access through a shared `&DeferSlot`.
115///
116/// # Safety
117///
118/// All fields **must** be `MaybeUninit`. This allows us to safely set the
119/// length of `Vec<DeferSlot>` within the allocated capacity.
120#[repr(C)]
121pub(crate) struct DeferSlot<I, O> {
122 pub input: DeferSlotItem<I>,
123 pub output: DeferSlotItem<O>,
124}
125
126type DeferSlotItem<T> = UnsafeCell<MaybeUninit<T>>;
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131
132 /// Tests that accessing an uninitialized `DeferSlot` is safe due to all of
133 /// its fields being `MaybeUninit`.
134 #[test]
135 fn access_uninit_slot() {
136 let mut slot: MaybeUninit<DeferSlot<String, String>> = MaybeUninit::uninit();
137
138 let slot_ref = unsafe { slot.assume_init_mut() };
139 slot_ref.input = UnsafeCell::new(MaybeUninit::new(String::new()));
140 slot_ref.output = UnsafeCell::new(MaybeUninit::new(String::new()));
141
142 unsafe {
143 let slot = slot.assume_init();
144 assert_eq!(slot.input.into_inner().assume_init(), "");
145 assert_eq!(slot.output.into_inner().assume_init(), "");
146 }
147 }
148
149 /// Tests that accessing `DeferSlot.input` through an aliased reference in
150 /// `DeferSlot.output` is safe due `input` being an `UnsafeCell`.
151 #[test]
152 fn access_aliased_input() {
153 struct Output<'i> {
154 input: &'i mut String,
155 }
156
157 impl Drop for Output<'_> {
158 fn drop(&mut self) {
159 assert_eq!(self.input, "hello");
160 self.input.push_str(" world");
161 }
162 }
163
164 let slot: MaybeUninit<DeferSlot<String, Output>> = MaybeUninit::uninit();
165 let slot_ref = unsafe { slot.assume_init_ref() };
166
167 // Loop to ensure previous iterations don't affect later uses of the
168 // same entry slot.
169 for _ in 0..5 {
170 unsafe {
171 let input_ptr = slot_ref.input.get().cast::<String>();
172 let output_ptr = slot_ref.output.get().cast::<Output>();
173
174 // Initialize input and output.
175 input_ptr.write("hello".to_owned());
176 output_ptr.write(Output { input: &mut *input_ptr });
177
178 // Use and discard output.
179 assert_eq!((*output_ptr).input, "hello");
180 output_ptr.drop_in_place();
181 assert_eq!(&*input_ptr, "hello world");
182
183 // Discard input.
184 input_ptr.drop_in_place();
185 }
186 }
187 }
188}
189