1 | //! A small module giving you a simple container that allows easy and cheap |
2 | //! replacement of parts of its content, with the ability to prevent changing |
3 | //! the same parts multiple times. |
4 | |
5 | use anyhow::{anyhow, ensure, Error}; |
6 | use std::rc::Rc; |
7 | |
8 | #[derive (Debug, Clone, PartialEq, Eq)] |
9 | enum State { |
10 | Initial, |
11 | Replaced(Rc<[u8]>), |
12 | Inserted(Rc<[u8]>), |
13 | } |
14 | |
15 | impl State { |
16 | fn is_inserted(&self) -> bool { |
17 | matches!(*self, State::Inserted(..)) |
18 | } |
19 | } |
20 | |
21 | #[derive (Debug, Clone, PartialEq, Eq)] |
22 | struct Span { |
23 | /// Start of this span in parent data |
24 | start: usize, |
25 | /// up to end including |
26 | end: usize, |
27 | data: State, |
28 | } |
29 | |
30 | /// A container that allows easily replacing chunks of its data |
31 | #[derive (Debug, Clone, Default)] |
32 | pub struct Data { |
33 | original: Vec<u8>, |
34 | parts: Vec<Span>, |
35 | } |
36 | |
37 | impl Data { |
38 | /// Create a new data container from a slice of bytes |
39 | pub fn new(data: &[u8]) -> Self { |
40 | Data { |
41 | original: data.into(), |
42 | parts: vec![Span { |
43 | data: State::Initial, |
44 | start: 0, |
45 | end: data.len().saturating_sub(1), |
46 | }], |
47 | } |
48 | } |
49 | |
50 | /// Render this data as a vector of bytes |
51 | pub fn to_vec(&self) -> Vec<u8> { |
52 | if self.original.is_empty() { |
53 | return Vec::new(); |
54 | } |
55 | |
56 | self.parts.iter().fold(Vec::new(), |mut acc, d| { |
57 | match d.data { |
58 | State::Initial => acc.extend_from_slice(&self.original[d.start..=d.end]), |
59 | State::Replaced(ref d) | State::Inserted(ref d) => acc.extend_from_slice(d), |
60 | }; |
61 | acc |
62 | }) |
63 | } |
64 | |
65 | /// Replace a chunk of data with the given slice, erroring when this part |
66 | /// was already changed previously. |
67 | pub fn replace_range( |
68 | &mut self, |
69 | from: usize, |
70 | up_to_and_including: usize, |
71 | data: &[u8], |
72 | ) -> Result<(), Error> { |
73 | let exclusive_end = up_to_and_including + 1; |
74 | |
75 | ensure!( |
76 | from <= exclusive_end, |
77 | "Invalid range {}... {}, start is larger than end" , |
78 | from, |
79 | up_to_and_including |
80 | ); |
81 | |
82 | ensure!( |
83 | up_to_and_including <= self.original.len(), |
84 | "Invalid range {}... {} given, original data is only {} byte long" , |
85 | from, |
86 | up_to_and_including, |
87 | self.original.len() |
88 | ); |
89 | |
90 | let insert_only = from == exclusive_end; |
91 | |
92 | // Since we error out when replacing an already replaced chunk of data, |
93 | // we can take some shortcuts here. For example, there can be no |
94 | // overlapping replacements -- we _always_ split a chunk of 'initial' |
95 | // data into three[^empty] parts, and there can't ever be two 'initial' |
96 | // parts touching. |
97 | // |
98 | // [^empty]: Leading and trailing ones might be empty if we replace |
99 | // the whole chunk. As an optimization and without loss of generality we |
100 | // don't add empty parts. |
101 | let new_parts = { |
102 | let index_of_part_to_split = self |
103 | .parts |
104 | .iter() |
105 | .position(|p| { |
106 | !p.data.is_inserted() && p.start <= from && p.end >= up_to_and_including |
107 | }) |
108 | .ok_or_else(|| { |
109 | use log::Level::Debug; |
110 | if log_enabled!(Debug) { |
111 | let slices = self |
112 | .parts |
113 | .iter() |
114 | .map(|p| { |
115 | ( |
116 | p.start, |
117 | p.end, |
118 | match p.data { |
119 | State::Initial => "initial" , |
120 | State::Replaced(..) => "replaced" , |
121 | State::Inserted(..) => "inserted" , |
122 | }, |
123 | ) |
124 | }) |
125 | .collect::<Vec<_>>(); |
126 | debug!( |
127 | "no single slice covering {}... {}, current slices: {:?}" , |
128 | from, up_to_and_including, slices, |
129 | ); |
130 | } |
131 | |
132 | anyhow!( |
133 | "Could not replace range {}... {} in file \ |
134 | -- maybe parts of it were already replaced?" , |
135 | from, |
136 | up_to_and_including |
137 | ) |
138 | })?; |
139 | |
140 | let part_to_split = &self.parts[index_of_part_to_split]; |
141 | |
142 | // If this replacement matches exactly the part that we would |
143 | // otherwise split then we ignore this for now. This means that you |
144 | // can replace the exact same range with the exact same content |
145 | // multiple times and we'll process and allow it. |
146 | // |
147 | // This is currently done to alleviate issues like |
148 | // rust-lang/rust#51211 although this clause likely wants to be |
149 | // removed if that's fixed deeper in the compiler. |
150 | if part_to_split.start == from && part_to_split.end == up_to_and_including { |
151 | if let State::Replaced(ref replacement) = part_to_split.data { |
152 | if &**replacement == data { |
153 | return Ok(()); |
154 | } |
155 | } |
156 | } |
157 | |
158 | ensure!( |
159 | part_to_split.data == State::Initial, |
160 | "Cannot replace slice of data that was already replaced" |
161 | ); |
162 | |
163 | let mut new_parts = Vec::with_capacity(self.parts.len() + 2); |
164 | |
165 | // Previous parts |
166 | if let Some(ps) = self.parts.get(..index_of_part_to_split) { |
167 | new_parts.extend_from_slice(ps); |
168 | } |
169 | |
170 | // Keep initial data on left side of part |
171 | if from > part_to_split.start { |
172 | new_parts.push(Span { |
173 | start: part_to_split.start, |
174 | end: from.saturating_sub(1), |
175 | data: State::Initial, |
176 | }); |
177 | } |
178 | |
179 | // New part |
180 | new_parts.push(Span { |
181 | start: from, |
182 | end: up_to_and_including, |
183 | data: if insert_only { |
184 | State::Inserted(data.into()) |
185 | } else { |
186 | State::Replaced(data.into()) |
187 | }, |
188 | }); |
189 | |
190 | // Keep initial data on right side of part |
191 | if up_to_and_including < part_to_split.end { |
192 | new_parts.push(Span { |
193 | start: up_to_and_including + 1, |
194 | end: part_to_split.end, |
195 | data: State::Initial, |
196 | }); |
197 | } |
198 | |
199 | // Following parts |
200 | if let Some(ps) = self.parts.get(index_of_part_to_split + 1..) { |
201 | new_parts.extend_from_slice(ps); |
202 | } |
203 | |
204 | new_parts |
205 | }; |
206 | |
207 | self.parts = new_parts; |
208 | |
209 | Ok(()) |
210 | } |
211 | } |
212 | |
213 | #[cfg (test)] |
214 | mod tests { |
215 | use super::*; |
216 | use proptest::prelude::*; |
217 | |
218 | fn str(i: &[u8]) -> &str { |
219 | ::std::str::from_utf8(i).unwrap() |
220 | } |
221 | |
222 | #[test ] |
223 | fn replace_some_stuff() { |
224 | let mut d = Data::new(b"foo bar baz" ); |
225 | d.replace_range(4, 6, b"lol" ).unwrap(); |
226 | assert_eq!("foo lol baz" , str(&d.to_vec())); |
227 | } |
228 | |
229 | #[test ] |
230 | fn replace_a_single_char() { |
231 | let mut d = Data::new(b"let y = true;" ); |
232 | d.replace_range(4, 4, b"mut y" ).unwrap(); |
233 | assert_eq!("let mut y = true;" , str(&d.to_vec())); |
234 | } |
235 | |
236 | #[test ] |
237 | fn replace_multiple_lines() { |
238 | let mut d = Data::new(b"lorem \nipsum \ndolor" ); |
239 | |
240 | d.replace_range(6, 10, b"lol" ).unwrap(); |
241 | assert_eq!("lorem \nlol \ndolor" , str(&d.to_vec())); |
242 | |
243 | d.replace_range(12, 16, b"lol" ).unwrap(); |
244 | assert_eq!("lorem \nlol \nlol" , str(&d.to_vec())); |
245 | } |
246 | |
247 | #[test ] |
248 | fn replace_multiple_lines_with_insert_only() { |
249 | let mut d = Data::new(b"foo!" ); |
250 | |
251 | d.replace_range(3, 2, b"bar" ).unwrap(); |
252 | assert_eq!("foobar!" , str(&d.to_vec())); |
253 | |
254 | d.replace_range(0, 2, b"baz" ).unwrap(); |
255 | assert_eq!("bazbar!" , str(&d.to_vec())); |
256 | |
257 | d.replace_range(3, 3, b"?" ).unwrap(); |
258 | assert_eq!("bazbar?" , str(&d.to_vec())); |
259 | } |
260 | |
261 | #[test ] |
262 | fn replace_invalid_range() { |
263 | let mut d = Data::new(b"foo!" ); |
264 | |
265 | assert!(d.replace_range(2, 0, b"bar" ).is_err()); |
266 | assert!(d.replace_range(0, 2, b"bar" ).is_ok()); |
267 | } |
268 | |
269 | #[test ] |
270 | fn empty_to_vec_roundtrip() { |
271 | let s = "" ; |
272 | assert_eq!(s.as_bytes(), Data::new(s.as_bytes()).to_vec().as_slice()); |
273 | } |
274 | |
275 | #[test ] |
276 | #[should_panic (expected = "Cannot replace slice of data that was already replaced" )] |
277 | fn replace_overlapping_stuff_errs() { |
278 | let mut d = Data::new(b"foo bar baz" ); |
279 | |
280 | d.replace_range(4, 6, b"lol" ).unwrap(); |
281 | assert_eq!("foo lol baz" , str(&d.to_vec())); |
282 | |
283 | d.replace_range(4, 6, b"lol2" ).unwrap(); |
284 | } |
285 | |
286 | #[test ] |
287 | #[should_panic (expected = "original data is only 3 byte long" )] |
288 | fn broken_replacements() { |
289 | let mut d = Data::new(b"foo" ); |
290 | d.replace_range(4, 7, b"lol" ).unwrap(); |
291 | } |
292 | |
293 | #[test ] |
294 | fn replace_same_twice() { |
295 | let mut d = Data::new(b"foo" ); |
296 | d.replace_range(0, 0, b"b" ).unwrap(); |
297 | d.replace_range(0, 0, b"b" ).unwrap(); |
298 | assert_eq!("boo" , str(&d.to_vec())); |
299 | } |
300 | |
301 | proptest! { |
302 | #[test] |
303 | #[ignore] |
304 | fn new_to_vec_roundtrip(ref s in " \\PC*" ) { |
305 | assert_eq!(s.as_bytes(), Data::new(s.as_bytes()).to_vec().as_slice()); |
306 | } |
307 | |
308 | #[test] |
309 | #[ignore] |
310 | fn replace_random_chunks( |
311 | ref data in " \\PC*" , |
312 | ref replacements in prop::collection::vec( |
313 | (any::<::std::ops::Range<usize>>(), any::<Vec<u8>>()), |
314 | 1..1337, |
315 | ) |
316 | ) { |
317 | let mut d = Data::new(data.as_bytes()); |
318 | for &(ref range, ref bytes) in replacements { |
319 | let _ = d.replace_range(range.start, range.end, bytes); |
320 | } |
321 | } |
322 | } |
323 | } |
324 | |