1 | //! Utilities for common diff related operations. |
2 | //! |
3 | //! This module provides specialized utilities and simplified diff operations |
4 | //! for common operations. It's useful when you want to work with text diffs |
5 | //! and you're interested in getting vectors of these changes directly. |
6 | //! |
7 | //! # Slice Remapping |
8 | //! |
9 | //! When working with [`TextDiff`] it's common that one takes advantage of the |
10 | //! built-in tokenization of the differ. This for instance lets you do |
11 | //! grapheme level diffs. This is implemented by the differ generating rather |
12 | //! small slices of strings and running a diff algorithm over them. |
13 | //! |
14 | //! The downside of this is that all the [`DiffOp`] objects produced by the |
15 | //! diffing algorithm encode operations on these rather small slices. For |
16 | //! a lot of use cases this is not what one wants which can make this very |
17 | //! inconvenient. This module provides a [`TextDiffRemapper`] which lets you |
18 | //! map from the ranges that the [`TextDiff`] returns to the original input |
19 | //! strings. For more information see [`TextDiffRemapper`]. |
20 | //! |
21 | //! # Simple Diff Functions |
22 | //! |
23 | //! This module provides a range of common test diff functions that will |
24 | //! produce vectors of `(change_tag, value)` tuples. They will automatically |
25 | //! optimize towards returning the most useful slice that one would expect for |
26 | //! the type of diff performed. |
27 | |
28 | use std::hash::Hash; |
29 | use std::ops::{Index, Range}; |
30 | |
31 | use crate::{ |
32 | capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff, |
33 | }; |
34 | |
35 | struct SliceRemapper<'x, T: ?Sized> { |
36 | source: &'x T, |
37 | indexes: Vec<Range<usize>>, |
38 | } |
39 | |
40 | impl<'x, T: DiffableStr + ?Sized> SliceRemapper<'x, T> { |
41 | fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> { |
42 | let indexes: Vec> = slicesimpl Iterator- >
|
43 | .iter() |
44 | .scan(initial_state:0, |state: &mut usize, item: &&T| { |
45 | let start: usize = *state; |
46 | let end: usize = start + item.len(); |
47 | *state = end; |
48 | Some(start..end) |
49 | }) |
50 | .collect(); |
51 | SliceRemapper { source, indexes } |
52 | } |
53 | |
54 | fn slice(&self, range: Range<usize>) -> Option<&'x T> { |
55 | let start: usize = self.indexes.get(index:range.start)?.start; |
56 | let end: usize = self.indexes.get(index:range.end - 1)?.end; |
57 | Some(self.source.slice(rng:start..end)) |
58 | } |
59 | } |
60 | |
61 | impl<'x, T: DiffableStr + ?Sized> Index<Range<usize>> for SliceRemapper<'x, T> { |
62 | type Output = T; |
63 | |
64 | fn index(&self, range: Range<usize>) -> &Self::Output { |
65 | self.slice(range).expect(msg:"out of bounds" ) |
66 | } |
67 | } |
68 | |
69 | /// A remapper that can remap diff ops to the original slices. |
70 | /// |
71 | /// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from |
72 | /// two strings and the internal tokenization is used, this remapper can take |
73 | /// a range in the tokenized sequences and remap it to the original string. |
74 | /// This is particularly useful when you want to do things like character or |
75 | /// grapheme level diffs but you want to not have to iterate over small sequences |
76 | /// but large consequitive ones from the source. |
77 | /// |
78 | /// ```rust |
79 | /// use similar::{ChangeTag, TextDiff}; |
80 | /// use similar::utils::TextDiffRemapper; |
81 | /// |
82 | /// let old = "yo! foo bar baz" ; |
83 | /// let new = "yo! foo bor baz" ; |
84 | /// let diff = TextDiff::from_words(old, new); |
85 | /// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); |
86 | /// let changes: Vec<_> = diff.ops() |
87 | /// .iter() |
88 | /// .flat_map(move |x| remapper.iter_slices(x)) |
89 | /// .collect(); |
90 | /// |
91 | /// assert_eq!(changes, vec![ |
92 | /// (ChangeTag::Equal, "yo! foo " ), |
93 | /// (ChangeTag::Delete, "bar" ), |
94 | /// (ChangeTag::Insert, "bor" ), |
95 | /// (ChangeTag::Equal, " baz" ) |
96 | /// ]); |
97 | pub struct TextDiffRemapper<'x, T: ?Sized> { |
98 | old: SliceRemapper<'x, T>, |
99 | new: SliceRemapper<'x, T>, |
100 | } |
101 | |
102 | impl<'x, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> { |
103 | /// Creates a new remapper from strings and slices. |
104 | pub fn new( |
105 | old_slices: &[&'x T], |
106 | new_slices: &[&'x T], |
107 | old: &'x T, |
108 | new: &'x T, |
109 | ) -> TextDiffRemapper<'x, T> { |
110 | TextDiffRemapper { |
111 | old: SliceRemapper::new(old, old_slices), |
112 | new: SliceRemapper::new(new, new_slices), |
113 | } |
114 | } |
115 | |
116 | /// Creates a new remapper from a text diff and the original strings. |
117 | pub fn from_text_diff<'old, 'new, 'bufs>( |
118 | diff: &TextDiff<'old, 'new, 'bufs, T>, |
119 | old: &'x T, |
120 | new: &'x T, |
121 | ) -> TextDiffRemapper<'x, T> |
122 | where |
123 | 'old: 'x, |
124 | 'new: 'x, |
125 | { |
126 | TextDiffRemapper { |
127 | old: SliceRemapper::new(old, diff.old_slices()), |
128 | new: SliceRemapper::new(new, diff.new_slices()), |
129 | } |
130 | } |
131 | |
132 | /// Slices into the old string. |
133 | pub fn slice_old(&self, range: Range<usize>) -> Option<&'x T> { |
134 | self.old.slice(range) |
135 | } |
136 | |
137 | /// Slices into the new string. |
138 | pub fn slice_new(&self, range: Range<usize>) -> Option<&'x T> { |
139 | self.new.slice(range) |
140 | } |
141 | |
142 | /// Given a diffop yields the changes it encodes against the original strings. |
143 | /// |
144 | /// This is the same as the [`DiffOp::iter_slices`] method. |
145 | /// |
146 | /// ## Panics |
147 | /// |
148 | /// This method can panic if the input strings passed to the constructor |
149 | /// are incompatible with the input strings passed to the diffing algorithm. |
150 | pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator<Item = (ChangeTag, &'x T)> { |
151 | // note: this is equivalent to the code in `DiffOp::iter_slices`. It is |
152 | // a copy/paste because the slicing currently cannot be well abstracted |
153 | // because of lifetime issues caused by the `Index` trait. |
154 | match *op { |
155 | DiffOp::Equal { old_index, len, .. } => { |
156 | Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len))) |
157 | .into_iter() |
158 | .chain(None) |
159 | } |
160 | DiffOp::Insert { |
161 | new_index, new_len, .. |
162 | } => Some(( |
163 | ChangeTag::Insert, |
164 | self.new.slice(new_index..new_index + new_len), |
165 | )) |
166 | .into_iter() |
167 | .chain(None), |
168 | DiffOp::Delete { |
169 | old_index, old_len, .. |
170 | } => Some(( |
171 | ChangeTag::Delete, |
172 | self.old.slice(old_index..old_index + old_len), |
173 | )) |
174 | .into_iter() |
175 | .chain(None), |
176 | DiffOp::Replace { |
177 | old_index, |
178 | old_len, |
179 | new_index, |
180 | new_len, |
181 | } => Some(( |
182 | ChangeTag::Delete, |
183 | self.old.slice(old_index..old_index + old_len), |
184 | )) |
185 | .into_iter() |
186 | .chain(Some(( |
187 | ChangeTag::Insert, |
188 | self.new.slice(new_index..new_index + new_len), |
189 | ))), |
190 | } |
191 | .map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds" ))) |
192 | } |
193 | } |
194 | |
195 | /// Shortcut for diffing two slices. |
196 | /// |
197 | /// This function produces the diff of two slices and returns a vector |
198 | /// with the changes. |
199 | /// |
200 | /// ```rust |
201 | /// use similar::{Algorithm, ChangeTag}; |
202 | /// use similar::utils::diff_slices; |
203 | /// |
204 | /// let old = "foo \nbar \nbaz" .lines().collect::<Vec<_>>(); |
205 | /// let new = "foo \nbar \nBAZ" .lines().collect::<Vec<_>>(); |
206 | /// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![ |
207 | /// (ChangeTag::Equal, &["foo" , "bar" ][..]), |
208 | /// (ChangeTag::Delete, &["baz" ][..]), |
209 | /// (ChangeTag::Insert, &["BAZ" ][..]), |
210 | /// ]); |
211 | /// ``` |
212 | pub fn diff_slices<'x, T: PartialEq + Hash + Ord>( |
213 | alg: Algorithm, |
214 | old: &'x [T], |
215 | new: &'x [T], |
216 | ) -> Vec<(ChangeTag, &'x [T])> { |
217 | capture_diff_slicesimpl Iterator (alg, old, new) |
218 | .iter() |
219 | .flat_map(|op: &DiffOp| op.iter_slices(old, new)) |
220 | .collect() |
221 | } |
222 | |
223 | /// Shortcut for making a character level diff. |
224 | /// |
225 | /// This function produces the diff of two strings and returns a vector |
226 | /// with the changes. It returns connected slices into the original string |
227 | /// rather than character level slices. |
228 | /// |
229 | /// ```rust |
230 | /// use similar::{Algorithm, ChangeTag}; |
231 | /// use similar::utils::diff_chars; |
232 | /// |
233 | /// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz" , "fooBARbaz" ), vec![ |
234 | /// (ChangeTag::Equal, "foo" ), |
235 | /// (ChangeTag::Delete, "bar" ), |
236 | /// (ChangeTag::Insert, "BAR" ), |
237 | /// (ChangeTag::Equal, "baz" ), |
238 | /// ]); |
239 | /// ``` |
240 | pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>( |
241 | alg: Algorithm, |
242 | old: &'x T, |
243 | new: &'x T, |
244 | ) -> Vec<(ChangeTag, &'x T::Output)> { |
245 | let old: &::Output = old.as_diffable_str(); |
246 | let new: &::Output = new.as_diffable_str(); |
247 | let diff: TextDiff<'_, '_, '_, ::Output> = TextDiff::configure().algorithm(alg).diff_chars(old, new); |
248 | let remapper: TextDiffRemapper<'_, ::Output> = TextDiffRemapper::from_text_diff(&diff, old, new); |
249 | diffimpl Iterator .ops() |
250 | .iter() |
251 | .flat_map(move |x: &DiffOp| remapper.iter_slices(op:x)) |
252 | .collect() |
253 | } |
254 | |
255 | /// Shortcut for making a word level diff. |
256 | /// |
257 | /// This function produces the diff of two strings and returns a vector |
258 | /// with the changes. It returns connected slices into the original string |
259 | /// rather than word level slices. |
260 | /// |
261 | /// ```rust |
262 | /// use similar::{Algorithm, ChangeTag}; |
263 | /// use similar::utils::diff_words; |
264 | /// |
265 | /// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz" , "foo bor baz" ), vec![ |
266 | /// (ChangeTag::Equal, "foo " ), |
267 | /// (ChangeTag::Delete, "bar" ), |
268 | /// (ChangeTag::Insert, "bor" ), |
269 | /// (ChangeTag::Equal, " baz" ), |
270 | /// ]); |
271 | /// ``` |
272 | pub fn diff_words<'x, T: DiffableStrRef + ?Sized>( |
273 | alg: Algorithm, |
274 | old: &'x T, |
275 | new: &'x T, |
276 | ) -> Vec<(ChangeTag, &'x T::Output)> { |
277 | let old: &::Output = old.as_diffable_str(); |
278 | let new: &::Output = new.as_diffable_str(); |
279 | let diff: TextDiff<'_, '_, '_, ::Output> = TextDiff::configure().algorithm(alg).diff_words(old, new); |
280 | let remapper: TextDiffRemapper<'_, ::Output> = TextDiffRemapper::from_text_diff(&diff, old, new); |
281 | diffimpl Iterator .ops() |
282 | .iter() |
283 | .flat_map(move |x: &DiffOp| remapper.iter_slices(op:x)) |
284 | .collect() |
285 | } |
286 | |
287 | /// Shortcut for making a unicode word level diff. |
288 | /// |
289 | /// This function produces the diff of two strings and returns a vector |
290 | /// with the changes. It returns connected slices into the original string |
291 | /// rather than word level slices. |
292 | /// |
293 | /// ```rust |
294 | /// use similar::{Algorithm, ChangeTag}; |
295 | /// use similar::utils::diff_unicode_words; |
296 | /// |
297 | /// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?"; |
298 | /// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?"; |
299 | /// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![ |
300 | /// (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "), |
301 | /// (ChangeTag::Delete, "32.3"), |
302 | /// (ChangeTag::Insert, "9.84"), |
303 | /// (ChangeTag::Equal, " "), |
304 | /// (ChangeTag::Delete, "feet"), |
305 | /// (ChangeTag::Insert, "meters"), |
306 | /// (ChangeTag::Equal, ", right?") |
307 | /// ]); |
308 | /// ``` |
309 | /// |
310 | /// This requires the `unicode` feature. |
311 | #[cfg (feature = "unicode" )] |
312 | pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>( |
313 | alg: Algorithm, |
314 | old: &'x T, |
315 | new: &'x T, |
316 | ) -> Vec<(ChangeTag, &'x T::Output)> { |
317 | let old = old.as_diffable_str(); |
318 | let new = new.as_diffable_str(); |
319 | let diff = TextDiff::configure() |
320 | .algorithm(alg) |
321 | .diff_unicode_words(old, new); |
322 | let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); |
323 | diff.ops() |
324 | .iter() |
325 | .flat_map(move |x| remapper.iter_slices(x)) |
326 | .collect() |
327 | } |
328 | |
329 | /// Shortcut for making a grapheme level diff. |
330 | /// |
331 | /// This function produces the diff of two strings and returns a vector |
332 | /// with the changes. It returns connected slices into the original string |
333 | /// rather than grapheme level slices. |
334 | /// |
335 | /// ```rust |
336 | /// use similar::{Algorithm, ChangeTag}; |
337 | /// use similar::utils::diff_graphemes; |
338 | /// |
339 | /// let old = "The flag of Austria is 🇦🇹"; |
340 | /// let new = "The flag of Albania is 🇦🇱"; |
341 | /// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![ |
342 | /// (ChangeTag::Equal, "The flag of A"), |
343 | /// (ChangeTag::Delete, "ustr"), |
344 | /// (ChangeTag::Insert, "lban"), |
345 | /// (ChangeTag::Equal, "ia is "), |
346 | /// (ChangeTag::Delete, "🇦🇹"), |
347 | /// (ChangeTag::Insert, "🇦🇱"), |
348 | /// ]); |
349 | /// ``` |
350 | /// |
351 | /// This requires the `unicode` feature. |
352 | #[cfg (feature = "unicode" )] |
353 | pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>( |
354 | alg: Algorithm, |
355 | old: &'x T, |
356 | new: &'x T, |
357 | ) -> Vec<(ChangeTag, &'x T::Output)> { |
358 | let old = old.as_diffable_str(); |
359 | let new = new.as_diffable_str(); |
360 | let diff = TextDiff::configure() |
361 | .algorithm(alg) |
362 | .diff_graphemes(old, new); |
363 | let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); |
364 | diff.ops() |
365 | .iter() |
366 | .flat_map(move |x| remapper.iter_slices(x)) |
367 | .collect() |
368 | } |
369 | |
370 | /// Shortcut for making a line diff. |
371 | /// |
372 | /// This function produces the diff of two slices and returns a vector |
373 | /// with the changes. Unlike [`diff_chars`] or [`diff_slices`] it returns a |
374 | /// change tag for each line. |
375 | /// |
376 | /// ```rust |
377 | /// use similar::{Algorithm, ChangeTag}; |
378 | /// use similar::utils::diff_lines; |
379 | /// |
380 | /// assert_eq!(diff_lines(Algorithm::Myers, "foo \nbar \nbaz \nblah" , "foo \nbar \nbaz \nblurgh" ), vec![ |
381 | /// (ChangeTag::Equal, "foo \n" ), |
382 | /// (ChangeTag::Equal, "bar \n" ), |
383 | /// (ChangeTag::Equal, "baz \n" ), |
384 | /// (ChangeTag::Delete, "blah" ), |
385 | /// (ChangeTag::Insert, "blurgh" ), |
386 | /// ]); |
387 | /// ``` |
388 | pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>( |
389 | alg: Algorithm, |
390 | old: &'x T, |
391 | new: &'x T, |
392 | ) -> Vec<(ChangeTag, &'x T::Output)> { |
393 | TextDiffimpl Iterator ::configure() |
394 | .algorithm(alg) |
395 | .diff_lines(old, new) |
396 | .iter_all_changes() |
397 | .map(|change: Change<&::Output>| (change.tag(), change.value())) |
398 | .collect() |
399 | } |
400 | |
401 | #[test ] |
402 | fn test_remapper() { |
403 | let a = "foo bar baz" ; |
404 | let words = a.tokenize_words(); |
405 | dbg!(&words); |
406 | let remap = SliceRemapper::new(a, &words); |
407 | assert_eq!(remap.slice(0..3), Some("foo bar" )); |
408 | assert_eq!(remap.slice(1..3), Some(" bar" )); |
409 | assert_eq!(remap.slice(0..1), Some("foo" )); |
410 | assert_eq!(remap.slice(0..5), Some("foo bar baz" )); |
411 | assert_eq!(remap.slice(0..6), None); |
412 | } |
413 | |