1//! A small module giving you a simple container that allows easy and cheap
2//! replacement of parts of its content, with the ability to prevent changing
3//! the same parts multiple times.
4
5use anyhow::{anyhow, ensure, Error};
6use std::rc::Rc;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9enum State {
10 Initial,
11 Replaced(Rc<[u8]>),
12 Inserted(Rc<[u8]>),
13}
14
15impl State {
16 fn is_inserted(&self) -> bool {
17 matches!(*self, State::Inserted(..))
18 }
19}
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22struct Span {
23 /// Start of this span in parent data
24 start: usize,
25 /// up to end including
26 end: usize,
27 data: State,
28}
29
30/// A container that allows easily replacing chunks of its data
31#[derive(Debug, Clone, Default)]
32pub struct Data {
33 original: Vec<u8>,
34 parts: Vec<Span>,
35}
36
37impl Data {
38 /// Create a new data container from a slice of bytes
39 pub fn new(data: &[u8]) -> Self {
40 Data {
41 original: data.into(),
42 parts: vec![Span {
43 data: State::Initial,
44 start: 0,
45 end: data.len().saturating_sub(1),
46 }],
47 }
48 }
49
50 /// Render this data as a vector of bytes
51 pub fn to_vec(&self) -> Vec<u8> {
52 if self.original.is_empty() {
53 return Vec::new();
54 }
55
56 self.parts.iter().fold(Vec::new(), |mut acc, d| {
57 match d.data {
58 State::Initial => acc.extend_from_slice(&self.original[d.start..=d.end]),
59 State::Replaced(ref d) | State::Inserted(ref d) => acc.extend_from_slice(d),
60 };
61 acc
62 })
63 }
64
65 /// Replace a chunk of data with the given slice, erroring when this part
66 /// was already changed previously.
67 pub fn replace_range(
68 &mut self,
69 from: usize,
70 up_to_and_including: usize,
71 data: &[u8],
72 ) -> Result<(), Error> {
73 let exclusive_end = up_to_and_including + 1;
74
75 ensure!(
76 from <= exclusive_end,
77 "Invalid range {}...{}, start is larger than end",
78 from,
79 up_to_and_including
80 );
81
82 ensure!(
83 up_to_and_including <= self.original.len(),
84 "Invalid range {}...{} given, original data is only {} byte long",
85 from,
86 up_to_and_including,
87 self.original.len()
88 );
89
90 let insert_only = from == exclusive_end;
91
92 // Since we error out when replacing an already replaced chunk of data,
93 // we can take some shortcuts here. For example, there can be no
94 // overlapping replacements -- we _always_ split a chunk of 'initial'
95 // data into three[^empty] parts, and there can't ever be two 'initial'
96 // parts touching.
97 //
98 // [^empty]: Leading and trailing ones might be empty if we replace
99 // the whole chunk. As an optimization and without loss of generality we
100 // don't add empty parts.
101 let new_parts = {
102 let index_of_part_to_split = self
103 .parts
104 .iter()
105 .position(|p| {
106 !p.data.is_inserted() && p.start <= from && p.end >= up_to_and_including
107 })
108 .ok_or_else(|| {
109 use log::Level::Debug;
110 if log_enabled!(Debug) {
111 let slices = self
112 .parts
113 .iter()
114 .map(|p| {
115 (
116 p.start,
117 p.end,
118 match p.data {
119 State::Initial => "initial",
120 State::Replaced(..) => "replaced",
121 State::Inserted(..) => "inserted",
122 },
123 )
124 })
125 .collect::<Vec<_>>();
126 debug!(
127 "no single slice covering {}...{}, current slices: {:?}",
128 from, up_to_and_including, slices,
129 );
130 }
131
132 anyhow!(
133 "Could not replace range {}...{} in file \
134 -- maybe parts of it were already replaced?",
135 from,
136 up_to_and_including
137 )
138 })?;
139
140 let part_to_split = &self.parts[index_of_part_to_split];
141
142 // If this replacement matches exactly the part that we would
143 // otherwise split then we ignore this for now. This means that you
144 // can replace the exact same range with the exact same content
145 // multiple times and we'll process and allow it.
146 //
147 // This is currently done to alleviate issues like
148 // rust-lang/rust#51211 although this clause likely wants to be
149 // removed if that's fixed deeper in the compiler.
150 if part_to_split.start == from && part_to_split.end == up_to_and_including {
151 if let State::Replaced(ref replacement) = part_to_split.data {
152 if &**replacement == data {
153 return Ok(());
154 }
155 }
156 }
157
158 ensure!(
159 part_to_split.data == State::Initial,
160 "Cannot replace slice of data that was already replaced"
161 );
162
163 let mut new_parts = Vec::with_capacity(self.parts.len() + 2);
164
165 // Previous parts
166 if let Some(ps) = self.parts.get(..index_of_part_to_split) {
167 new_parts.extend_from_slice(ps);
168 }
169
170 // Keep initial data on left side of part
171 if from > part_to_split.start {
172 new_parts.push(Span {
173 start: part_to_split.start,
174 end: from.saturating_sub(1),
175 data: State::Initial,
176 });
177 }
178
179 // New part
180 new_parts.push(Span {
181 start: from,
182 end: up_to_and_including,
183 data: if insert_only {
184 State::Inserted(data.into())
185 } else {
186 State::Replaced(data.into())
187 },
188 });
189
190 // Keep initial data on right side of part
191 if up_to_and_including < part_to_split.end {
192 new_parts.push(Span {
193 start: up_to_and_including + 1,
194 end: part_to_split.end,
195 data: State::Initial,
196 });
197 }
198
199 // Following parts
200 if let Some(ps) = self.parts.get(index_of_part_to_split + 1..) {
201 new_parts.extend_from_slice(ps);
202 }
203
204 new_parts
205 };
206
207 self.parts = new_parts;
208
209 Ok(())
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216 use proptest::prelude::*;
217
218 fn str(i: &[u8]) -> &str {
219 ::std::str::from_utf8(i).unwrap()
220 }
221
222 #[test]
223 fn replace_some_stuff() {
224 let mut d = Data::new(b"foo bar baz");
225 d.replace_range(4, 6, b"lol").unwrap();
226 assert_eq!("foo lol baz", str(&d.to_vec()));
227 }
228
229 #[test]
230 fn replace_a_single_char() {
231 let mut d = Data::new(b"let y = true;");
232 d.replace_range(4, 4, b"mut y").unwrap();
233 assert_eq!("let mut y = true;", str(&d.to_vec()));
234 }
235
236 #[test]
237 fn replace_multiple_lines() {
238 let mut d = Data::new(b"lorem\nipsum\ndolor");
239
240 d.replace_range(6, 10, b"lol").unwrap();
241 assert_eq!("lorem\nlol\ndolor", str(&d.to_vec()));
242
243 d.replace_range(12, 16, b"lol").unwrap();
244 assert_eq!("lorem\nlol\nlol", str(&d.to_vec()));
245 }
246
247 #[test]
248 fn replace_multiple_lines_with_insert_only() {
249 let mut d = Data::new(b"foo!");
250
251 d.replace_range(3, 2, b"bar").unwrap();
252 assert_eq!("foobar!", str(&d.to_vec()));
253
254 d.replace_range(0, 2, b"baz").unwrap();
255 assert_eq!("bazbar!", str(&d.to_vec()));
256
257 d.replace_range(3, 3, b"?").unwrap();
258 assert_eq!("bazbar?", str(&d.to_vec()));
259 }
260
261 #[test]
262 fn replace_invalid_range() {
263 let mut d = Data::new(b"foo!");
264
265 assert!(d.replace_range(2, 0, b"bar").is_err());
266 assert!(d.replace_range(0, 2, b"bar").is_ok());
267 }
268
269 #[test]
270 fn empty_to_vec_roundtrip() {
271 let s = "";
272 assert_eq!(s.as_bytes(), Data::new(s.as_bytes()).to_vec().as_slice());
273 }
274
275 #[test]
276 #[should_panic(expected = "Cannot replace slice of data that was already replaced")]
277 fn replace_overlapping_stuff_errs() {
278 let mut d = Data::new(b"foo bar baz");
279
280 d.replace_range(4, 6, b"lol").unwrap();
281 assert_eq!("foo lol baz", str(&d.to_vec()));
282
283 d.replace_range(4, 6, b"lol2").unwrap();
284 }
285
286 #[test]
287 #[should_panic(expected = "original data is only 3 byte long")]
288 fn broken_replacements() {
289 let mut d = Data::new(b"foo");
290 d.replace_range(4, 7, b"lol").unwrap();
291 }
292
293 #[test]
294 fn replace_same_twice() {
295 let mut d = Data::new(b"foo");
296 d.replace_range(0, 0, b"b").unwrap();
297 d.replace_range(0, 0, b"b").unwrap();
298 assert_eq!("boo", str(&d.to_vec()));
299 }
300
301 proptest! {
302 #[test]
303 #[ignore]
304 fn new_to_vec_roundtrip(ref s in "\\PC*") {
305 assert_eq!(s.as_bytes(), Data::new(s.as_bytes()).to_vec().as_slice());
306 }
307
308 #[test]
309 #[ignore]
310 fn replace_random_chunks(
311 ref data in "\\PC*",
312 ref replacements in prop::collection::vec(
313 (any::<::std::ops::Range<usize>>(), any::<Vec<u8>>()),
314 1..1337,
315 )
316 ) {
317 let mut d = Data::new(data.as_bytes());
318 for &(ref range, ref bytes) in replacements {
319 let _ = d.replace_range(range.start, range.end, bytes);
320 }
321 }
322 }
323}
324