1 | //! LCS diff algorithm. |
2 | //! |
3 | //! * time: `O((NM)D log (M)D)` |
4 | //! * space `O(MN)` |
5 | use std::collections::BTreeMap; |
6 | use std::ops::{Index, Range}; |
7 | use std::time::Instant; |
8 | |
9 | use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range}; |
10 | use crate::algorithms::DiffHook; |
11 | |
12 | /// LCS diff algorithm. |
13 | /// |
14 | /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. |
15 | /// |
16 | /// This diff is done with an optional deadline that defines the maximal |
17 | /// execution time permitted before it bails and falls back to an very bad |
18 | /// approximation. Deadlines with LCS do not make a lot of sense and should |
19 | /// not be used. |
20 | pub fn diff<Old, New, D>( |
21 | d: &mut D, |
22 | old: &Old, |
23 | old_range: Range<usize>, |
24 | new: &New, |
25 | new_range: Range<usize>, |
26 | ) -> Result<(), D::Error> |
27 | where |
28 | Old: Index<usize> + ?Sized, |
29 | New: Index<usize> + ?Sized, |
30 | D: DiffHook, |
31 | New::Output: PartialEq<Old::Output>, |
32 | { |
33 | diff_deadline(d, old, old_range, new, new_range, deadline:None) |
34 | } |
35 | |
36 | /// LCS diff algorithm. |
37 | /// |
38 | /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. |
39 | /// |
40 | /// This diff is done with an optional deadline that defines the maximal |
41 | /// execution time permitted before it bails and falls back to an approximation. |
42 | pub fn diff_deadline<Old, New, D>( |
43 | d: &mut D, |
44 | old: &Old, |
45 | old_range: Range<usize>, |
46 | new: &New, |
47 | new_range: Range<usize>, |
48 | deadline: Option<Instant>, |
49 | ) -> Result<(), D::Error> |
50 | where |
51 | Old: Index<usize> + ?Sized, |
52 | New: Index<usize> + ?Sized, |
53 | D: DiffHook, |
54 | New::Output: PartialEq<Old::Output>, |
55 | { |
56 | if is_empty_range(&new_range) { |
57 | d.delete(old_range.start, old_range.len(), new_range.start)?; |
58 | d.finish()?; |
59 | return Ok(()); |
60 | } else if is_empty_range(&old_range) { |
61 | d.insert(old_range.start, new_range.start, new_range.len())?; |
62 | d.finish()?; |
63 | return Ok(()); |
64 | } |
65 | |
66 | let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); |
67 | let common_suffix_len = common_suffix_len( |
68 | old, |
69 | old_range.start + common_prefix_len..old_range.end, |
70 | new, |
71 | new_range.start + common_prefix_len..new_range.end, |
72 | ); |
73 | |
74 | // If the sequences are not different then we're done |
75 | if common_prefix_len == old_range.len() && (old_range.len() == new_range.len()) { |
76 | d.equal(0, 0, old_range.len())?; |
77 | d.finish()?; |
78 | return Ok(()); |
79 | } |
80 | |
81 | let maybe_table = make_table( |
82 | old, |
83 | common_prefix_len..(old_range.len() - common_suffix_len), |
84 | new, |
85 | common_prefix_len..(new_range.len() - common_suffix_len), |
86 | deadline, |
87 | ); |
88 | let mut old_idx = 0; |
89 | let mut new_idx = 0; |
90 | let new_len = new_range.len() - common_prefix_len - common_suffix_len; |
91 | let old_len = old_range.len() - common_prefix_len - common_suffix_len; |
92 | |
93 | if common_prefix_len > 0 { |
94 | d.equal(old_range.start, new_range.start, common_prefix_len)?; |
95 | } |
96 | |
97 | if let Some(table) = maybe_table { |
98 | while new_idx < new_len && old_idx < old_len { |
99 | let old_orig_idx = old_range.start + common_prefix_len + old_idx; |
100 | let new_orig_idx = new_range.start + common_prefix_len + new_idx; |
101 | |
102 | if new[new_orig_idx] == old[old_orig_idx] { |
103 | d.equal(old_orig_idx, new_orig_idx, 1)?; |
104 | old_idx += 1; |
105 | new_idx += 1; |
106 | } else if table.get(&(new_idx, old_idx + 1)).unwrap_or(&0) |
107 | >= table.get(&(new_idx + 1, old_idx)).unwrap_or(&0) |
108 | { |
109 | d.delete(old_orig_idx, 1, new_orig_idx)?; |
110 | old_idx += 1; |
111 | } else { |
112 | d.insert(old_orig_idx, new_orig_idx, 1)?; |
113 | new_idx += 1; |
114 | } |
115 | } |
116 | } else { |
117 | let old_orig_idx = old_range.start + common_prefix_len + old_idx; |
118 | let new_orig_idx = new_range.start + common_prefix_len + new_idx; |
119 | d.delete(old_orig_idx, old_len, new_orig_idx)?; |
120 | d.insert(old_orig_idx, new_orig_idx, new_len)?; |
121 | } |
122 | |
123 | if old_idx < old_len { |
124 | d.delete( |
125 | old_range.start + common_prefix_len + old_idx, |
126 | old_len - old_idx, |
127 | new_range.start + common_prefix_len + new_idx, |
128 | )?; |
129 | old_idx += old_len - old_idx; |
130 | } |
131 | |
132 | if new_idx < new_len { |
133 | d.insert( |
134 | old_range.start + common_prefix_len + old_idx, |
135 | new_range.start + common_prefix_len + new_idx, |
136 | new_len - new_idx, |
137 | )?; |
138 | } |
139 | |
140 | if common_suffix_len > 0 { |
141 | d.equal( |
142 | old_range.start + old_len + common_prefix_len, |
143 | new_range.start + new_len + common_prefix_len, |
144 | common_suffix_len, |
145 | )?; |
146 | } |
147 | |
148 | d.finish() |
149 | } |
150 | |
151 | fn make_table<Old, New>( |
152 | old: &Old, |
153 | old_range: Range<usize>, |
154 | new: &New, |
155 | new_range: Range<usize>, |
156 | deadline: Option<Instant>, |
157 | ) -> Option<BTreeMap<(usize, usize), u32>> |
158 | where |
159 | Old: Index<usize> + ?Sized, |
160 | New: Index<usize> + ?Sized, |
161 | New::Output: PartialEq<Old::Output>, |
162 | { |
163 | let old_len = old_range.len(); |
164 | let new_len = new_range.len(); |
165 | let mut table = BTreeMap::new(); |
166 | |
167 | for i in (0..new_len).rev() { |
168 | // are we running for too long? give up on the table |
169 | if let Some(deadline) = deadline { |
170 | if Instant::now() > deadline { |
171 | return None; |
172 | } |
173 | } |
174 | |
175 | for j in (0..old_len).rev() { |
176 | let val = if new[i] == old[j] { |
177 | table.get(&(i + 1, j + 1)).unwrap_or(&0) + 1 |
178 | } else { |
179 | *table |
180 | .get(&(i + 1, j)) |
181 | .unwrap_or(&0) |
182 | .max(table.get(&(i, j + 1)).unwrap_or(&0)) |
183 | }; |
184 | if val > 0 { |
185 | table.insert((i, j), val); |
186 | } |
187 | } |
188 | } |
189 | |
190 | Some(table) |
191 | } |
192 | |
193 | #[test ] |
194 | fn test_table() { |
195 | let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap(); |
196 | let expected = { |
197 | let mut m = BTreeMap::new(); |
198 | m.insert((1, 0), 1); |
199 | m.insert((0, 0), 1); |
200 | m.insert((2, 0), 1); |
201 | m |
202 | }; |
203 | assert_eq!(table, expected); |
204 | } |
205 | |
206 | #[test ] |
207 | fn test_diff() { |
208 | let a: &[usize] = &[0, 1, 2, 3, 4]; |
209 | let b: &[usize] = &[0, 1, 2, 9, 4]; |
210 | |
211 | let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); |
212 | diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); |
213 | insta::assert_debug_snapshot!(d.into_inner().ops()); |
214 | } |
215 | |
216 | #[test ] |
217 | fn test_contiguous() { |
218 | let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; |
219 | let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7]; |
220 | |
221 | let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); |
222 | diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); |
223 | insta::assert_debug_snapshot!(d.into_inner().ops()); |
224 | } |
225 | |
226 | #[test ] |
227 | fn test_pat() { |
228 | let a: &[usize] = &[0, 1, 3, 4, 5]; |
229 | let b: &[usize] = &[0, 1, 4, 5, 8, 9]; |
230 | |
231 | let mut d = crate::algorithms::Capture::new(); |
232 | diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); |
233 | insta::assert_debug_snapshot!(d.ops()); |
234 | } |
235 | |
236 | #[test ] |
237 | fn test_same() { |
238 | let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; |
239 | let b: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; |
240 | |
241 | let mut d = crate::algorithms::Capture::new(); |
242 | diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); |
243 | insta::assert_debug_snapshot!(d.ops()); |
244 | } |
245 | |
246 | #[test ] |
247 | fn test_finish_called() { |
248 | struct HasRunFinish(bool); |
249 | |
250 | impl DiffHook for HasRunFinish { |
251 | type Error = (); |
252 | fn finish(&mut self) -> Result<(), Self::Error> { |
253 | self.0 = true; |
254 | Ok(()) |
255 | } |
256 | } |
257 | |
258 | let mut d = HasRunFinish(false); |
259 | let slice = &[1, 2]; |
260 | let slice2 = &[1, 2, 3]; |
261 | diff(&mut d, slice, 0..slice.len(), slice2, 0..slice2.len()).unwrap(); |
262 | assert!(d.0); |
263 | |
264 | let mut d = HasRunFinish(false); |
265 | let slice = &[1, 2]; |
266 | diff(&mut d, slice, 0..slice.len(), slice, 0..slice.len()).unwrap(); |
267 | assert!(d.0); |
268 | |
269 | let mut d = HasRunFinish(false); |
270 | let slice: &[u8] = &[]; |
271 | diff(&mut d, slice, 0..slice.len(), slice, 0..slice.len()).unwrap(); |
272 | assert!(d.0); |
273 | } |
274 | |
275 | #[test ] |
276 | fn test_bad_range_regression() { |
277 | use crate::algorithms::Capture; |
278 | use crate::DiffOp; |
279 | let mut d = Capture::new(); |
280 | diff(&mut d, &[0], 0..1, &[0, 0], 0..2).unwrap(); |
281 | assert_eq!( |
282 | d.into_ops(), |
283 | vec![ |
284 | DiffOp::Equal { |
285 | old_index: 0, |
286 | new_index: 0, |
287 | len: 1 |
288 | }, |
289 | DiffOp::Insert { |
290 | old_index: 1, |
291 | new_index: 1, |
292 | new_len: 1 |
293 | } |
294 | ] |
295 | ); |
296 | } |
297 | |