1 | use std::fmt; |
2 | use std::ops::{Index, Range}; |
3 | |
4 | use crate::algorithms::utils::is_empty_range; |
5 | use crate::algorithms::DiffHook; |
6 | use crate::iter::ChangesIter; |
7 | |
8 | /// An enum representing a diffing algorithm. |
9 | #[derive (Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] |
10 | #[cfg_attr ( |
11 | feature = "serde" , |
12 | derive(serde::Serialize, serde::Deserialize), |
13 | serde(rename_all = "snake_case" ) |
14 | )] |
15 | pub enum Algorithm { |
16 | /// Picks the myers algorithm from [`crate::algorithms::myers`] |
17 | Myers, |
18 | /// Picks the patience algorithm from [`crate::algorithms::patience`] |
19 | Patience, |
20 | /// Picks the LCS algorithm from [`crate::algorithms::lcs`] |
21 | Lcs, |
22 | } |
23 | |
24 | impl Default for Algorithm { |
25 | /// Returns the default algorithm ([`Algorithm::Myers`]). |
26 | fn default() -> Algorithm { |
27 | Algorithm::Myers |
28 | } |
29 | } |
30 | |
31 | /// The tag of a change. |
32 | #[derive (Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] |
33 | #[cfg_attr ( |
34 | feature = "serde" , |
35 | derive(serde::Serialize, serde::Deserialize), |
36 | serde(rename_all = "snake_case" ) |
37 | )] |
38 | pub enum ChangeTag { |
39 | /// The change indicates equality (not a change) |
40 | Equal, |
41 | /// The change indicates deleted text. |
42 | Delete, |
43 | /// The change indicates inserted text. |
44 | Insert, |
45 | } |
46 | |
47 | impl fmt::Display for ChangeTag { |
48 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
49 | write!( |
50 | f, |
51 | " {}" , |
52 | match &self { |
53 | ChangeTag::Equal => ' ' , |
54 | ChangeTag::Delete => '-' , |
55 | ChangeTag::Insert => '+' , |
56 | } |
57 | ) |
58 | } |
59 | } |
60 | |
61 | /// Represents the expanded [`DiffOp`] change. |
62 | /// |
63 | /// This type is returned from [`DiffOp::iter_changes`] and |
64 | /// [`TextDiff::iter_changes`](crate::text::TextDiff::iter_changes). |
65 | /// |
66 | /// It exists so that it's more convenient to work with textual differences as |
67 | /// the underlying [`DiffOp`] encodes a group of changes. |
68 | /// |
69 | /// This type has additional methods that are only available for types |
70 | /// implementing [`DiffableStr`](crate::text::DiffableStr). |
71 | #[derive (Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] |
72 | #[cfg_attr (feature = "serde" , derive(serde::Serialize))] |
73 | pub struct Change<T> { |
74 | pub(crate) tag: ChangeTag, |
75 | pub(crate) old_index: Option<usize>, |
76 | pub(crate) new_index: Option<usize>, |
77 | pub(crate) value: T, |
78 | } |
79 | |
80 | /// These methods are available for all change types. |
81 | impl<T: Clone> Change<T> { |
82 | /// Returns the change tag. |
83 | pub fn tag(&self) -> ChangeTag { |
84 | self.tag |
85 | } |
86 | |
87 | /// Returns the old index if available. |
88 | pub fn old_index(&self) -> Option<usize> { |
89 | self.old_index |
90 | } |
91 | |
92 | /// Returns the new index if available. |
93 | pub fn new_index(&self) -> Option<usize> { |
94 | self.new_index |
95 | } |
96 | |
97 | /// Returns the underlying changed value. |
98 | /// |
99 | /// Depending on the type of the underlying [`crate::text::DiffableStr`] |
100 | /// this value is more or less useful. If you always want to have a utf-8 |
101 | /// string it's best to use the [`Change::as_str`] and |
102 | /// [`Change::to_string_lossy`] methods. |
103 | pub fn value(&self) -> T { |
104 | self.value.clone() |
105 | } |
106 | |
107 | /// Returns the underlying changed value as reference. |
108 | pub fn value_ref(&self) -> &T { |
109 | &self.value |
110 | } |
111 | |
112 | /// Returns the underlying changed value as mutable reference. |
113 | pub fn value_mut(&mut self) -> &mut T { |
114 | &mut self.value |
115 | } |
116 | } |
117 | |
118 | /// Utility enum to capture a diff operation. |
119 | /// |
120 | /// This is used by [`Capture`](crate::algorithms::Capture). |
121 | #[derive (Debug, PartialEq, Eq, Hash, Clone, Copy)] |
122 | #[cfg_attr ( |
123 | feature = "serde" , |
124 | derive(serde::Serialize, serde::Deserialize), |
125 | serde(rename_all = "snake_case" , tag = "op" ) |
126 | )] |
127 | pub enum DiffOp { |
128 | /// A segment is equal (see [`DiffHook::equal`]) |
129 | Equal { |
130 | /// The starting index in the old sequence. |
131 | old_index: usize, |
132 | /// The starting index in the new sequence. |
133 | new_index: usize, |
134 | /// The length of the segment. |
135 | len: usize, |
136 | }, |
137 | /// A segment was deleted (see [`DiffHook::delete`]) |
138 | Delete { |
139 | /// The starting index in the old sequence. |
140 | old_index: usize, |
141 | /// The length of the old segment. |
142 | old_len: usize, |
143 | /// The starting index in the new sequence. |
144 | new_index: usize, |
145 | }, |
146 | /// A segment was inserted (see [`DiffHook::insert`]) |
147 | Insert { |
148 | /// The starting index in the old sequence. |
149 | old_index: usize, |
150 | /// The starting index in the new sequence. |
151 | new_index: usize, |
152 | /// The length of the new segment. |
153 | new_len: usize, |
154 | }, |
155 | /// A segment was replaced (see [`DiffHook::replace`]) |
156 | Replace { |
157 | /// The starting index in the old sequence. |
158 | old_index: usize, |
159 | /// The length of the old segment. |
160 | old_len: usize, |
161 | /// The starting index in the new sequence. |
162 | new_index: usize, |
163 | /// The length of the new segment. |
164 | new_len: usize, |
165 | }, |
166 | } |
167 | |
168 | /// The tag of a diff operation. |
169 | #[derive (Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] |
170 | #[cfg_attr ( |
171 | feature = "serde" , |
172 | derive(serde::Serialize, serde::Deserialize), |
173 | serde(rename_all = "snake_case" ) |
174 | )] |
175 | pub enum DiffTag { |
176 | /// The diff op encodes an equal segment. |
177 | Equal, |
178 | /// The diff op encodes a deleted segment. |
179 | Delete, |
180 | /// The diff op encodes an inserted segment. |
181 | Insert, |
182 | /// The diff op encodes a replaced segment. |
183 | Replace, |
184 | } |
185 | |
186 | impl DiffOp { |
187 | /// Returns the tag of the operation. |
188 | pub fn tag(self) -> DiffTag { |
189 | self.as_tag_tuple().0 |
190 | } |
191 | |
192 | /// Returns the old range. |
193 | pub fn old_range(&self) -> Range<usize> { |
194 | self.as_tag_tuple().1 |
195 | } |
196 | |
197 | /// Returns the new range. |
198 | pub fn new_range(&self) -> Range<usize> { |
199 | self.as_tag_tuple().2 |
200 | } |
201 | |
202 | /// Transform the op into a tuple of diff tag and ranges. |
203 | /// |
204 | /// This is useful when operating on slices. The returned format is |
205 | /// `(tag, i1..i2, j1..j2)`: |
206 | /// |
207 | /// * `Replace`: `a[i1..i2]` should be replaced by `b[j1..j2]` |
208 | /// * `Delete`: `a[i1..i2]` should be deleted (`j1 == j2` in this case). |
209 | /// * `Insert`: `b[j1..j2]` should be inserted at `a[i1..i2]` (`i1 == i2` in this case). |
210 | /// * `Equal`: `a[i1..i2]` is equal to `b[j1..j2]`. |
211 | pub fn as_tag_tuple(&self) -> (DiffTag, Range<usize>, Range<usize>) { |
212 | match *self { |
213 | DiffOp::Equal { |
214 | old_index, |
215 | new_index, |
216 | len, |
217 | } => ( |
218 | DiffTag::Equal, |
219 | old_index..old_index + len, |
220 | new_index..new_index + len, |
221 | ), |
222 | DiffOp::Delete { |
223 | old_index, |
224 | new_index, |
225 | old_len, |
226 | } => ( |
227 | DiffTag::Delete, |
228 | old_index..old_index + old_len, |
229 | new_index..new_index, |
230 | ), |
231 | DiffOp::Insert { |
232 | old_index, |
233 | new_index, |
234 | new_len, |
235 | } => ( |
236 | DiffTag::Insert, |
237 | old_index..old_index, |
238 | new_index..new_index + new_len, |
239 | ), |
240 | DiffOp::Replace { |
241 | old_index, |
242 | old_len, |
243 | new_index, |
244 | new_len, |
245 | } => ( |
246 | DiffTag::Replace, |
247 | old_index..old_index + old_len, |
248 | new_index..new_index + new_len, |
249 | ), |
250 | } |
251 | } |
252 | |
253 | /// Apply this operation to a diff hook. |
254 | pub fn apply_to_hook<D: DiffHook>(&self, d: &mut D) -> Result<(), D::Error> { |
255 | match *self { |
256 | DiffOp::Equal { |
257 | old_index, |
258 | new_index, |
259 | len, |
260 | } => d.equal(old_index, new_index, len), |
261 | DiffOp::Delete { |
262 | old_index, |
263 | old_len, |
264 | new_index, |
265 | } => d.delete(old_index, old_len, new_index), |
266 | DiffOp::Insert { |
267 | old_index, |
268 | new_index, |
269 | new_len, |
270 | } => d.insert(old_index, new_index, new_len), |
271 | DiffOp::Replace { |
272 | old_index, |
273 | old_len, |
274 | new_index, |
275 | new_len, |
276 | } => d.replace(old_index, old_len, new_index, new_len), |
277 | } |
278 | } |
279 | |
280 | /// Iterates over all changes encoded in the diff op against old and new |
281 | /// sequences. |
282 | /// |
283 | /// `old` and `new` are two indexable objects like the types you pass to |
284 | /// the diffing algorithm functions. |
285 | /// |
286 | /// ```rust |
287 | /// use similar::{ChangeTag, Algorithm}; |
288 | /// use similar::capture_diff_slices; |
289 | /// let old = vec!["foo" , "bar" , "baz" ]; |
290 | /// let new = vec!["foo" , "bar" , "blah" ]; |
291 | /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); |
292 | /// let changes: Vec<_> = ops |
293 | /// .iter() |
294 | /// .flat_map(|x| x.iter_changes(&old, &new)) |
295 | /// .map(|x| (x.tag(), x.value())) |
296 | /// .collect(); |
297 | /// assert_eq!(changes, vec![ |
298 | /// (ChangeTag::Equal, "foo" ), |
299 | /// (ChangeTag::Equal, "bar" ), |
300 | /// (ChangeTag::Delete, "baz" ), |
301 | /// (ChangeTag::Insert, "blah" ), |
302 | /// ]); |
303 | /// ``` |
304 | pub fn iter_changes<'lookup, Old, New, T>( |
305 | &self, |
306 | old: &'lookup Old, |
307 | new: &'lookup New, |
308 | ) -> ChangesIter<'lookup, Old, New, T> |
309 | where |
310 | Old: Index<usize, Output = T> + ?Sized, |
311 | New: Index<usize, Output = T> + ?Sized, |
312 | { |
313 | ChangesIter::new(old, new, *self) |
314 | } |
315 | |
316 | /// Given a diffop yields the changes it encodes against the given slices. |
317 | /// |
318 | /// This is similar to [`DiffOp::iter_changes`] but instead of yielding the |
319 | /// individual changes it yields consequitive changed slices. |
320 | /// |
321 | /// This will only ever yield a single tuple or two tuples in case a |
322 | /// [`DiffOp::Replace`] operation is passed. |
323 | /// |
324 | /// ```rust |
325 | /// use similar::{ChangeTag, Algorithm}; |
326 | /// use similar::capture_diff_slices; |
327 | /// let old = vec!["foo" , "bar" , "baz" ]; |
328 | /// let new = vec!["foo" , "bar" , "blah" ]; |
329 | /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); |
330 | /// let changes: Vec<_> = ops.iter().flat_map(|x| x.iter_slices(&old, &new)).collect(); |
331 | /// assert_eq!(changes, vec![ |
332 | /// (ChangeTag::Equal, &["foo" , "bar" ][..]), |
333 | /// (ChangeTag::Delete, &["baz" ][..]), |
334 | /// (ChangeTag::Insert, &["blah" ][..]), |
335 | /// ]); |
336 | /// ``` |
337 | /// |
338 | /// Due to lifetime restrictions it's currently impossible for the |
339 | /// returned slices to outlive the lookup. |
340 | pub fn iter_slices<'lookup, Old, New, T>( |
341 | &self, |
342 | old: &'lookup Old, |
343 | new: &'lookup New, |
344 | ) -> impl Iterator<Item = (ChangeTag, &'lookup T)> |
345 | where |
346 | T: 'lookup + ?Sized, |
347 | Old: Index<Range<usize>, Output = T> + ?Sized, |
348 | New: Index<Range<usize>, Output = T> + ?Sized, |
349 | { |
350 | match *self { |
351 | DiffOp::Equal { old_index, len, .. } => { |
352 | Some((ChangeTag::Equal, &old[old_index..old_index + len])) |
353 | .into_iter() |
354 | .chain(None) |
355 | } |
356 | DiffOp::Insert { |
357 | new_index, new_len, .. |
358 | } => Some((ChangeTag::Insert, &new[new_index..new_index + new_len])) |
359 | .into_iter() |
360 | .chain(None), |
361 | DiffOp::Delete { |
362 | old_index, old_len, .. |
363 | } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) |
364 | .into_iter() |
365 | .chain(None), |
366 | DiffOp::Replace { |
367 | old_index, |
368 | old_len, |
369 | new_index, |
370 | new_len, |
371 | } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) |
372 | .into_iter() |
373 | .chain(Some(( |
374 | ChangeTag::Insert, |
375 | &new[new_index..new_index + new_len], |
376 | ))), |
377 | } |
378 | } |
379 | |
380 | pub(crate) fn is_empty(&self) -> bool { |
381 | let (_, old, new) = self.as_tag_tuple(); |
382 | is_empty_range(&old) && is_empty_range(&new) |
383 | } |
384 | |
385 | pub(crate) fn shift_left(&mut self, adjust: usize) { |
386 | self.adjust((adjust, true), (0, false)); |
387 | } |
388 | |
389 | pub(crate) fn shift_right(&mut self, adjust: usize) { |
390 | self.adjust((adjust, false), (0, false)); |
391 | } |
392 | |
393 | pub(crate) fn grow_left(&mut self, adjust: usize) { |
394 | self.adjust((adjust, true), (adjust, false)); |
395 | } |
396 | |
397 | pub(crate) fn grow_right(&mut self, adjust: usize) { |
398 | self.adjust((0, false), (adjust, false)); |
399 | } |
400 | |
401 | pub(crate) fn shrink_left(&mut self, adjust: usize) { |
402 | self.adjust((0, false), (adjust, true)); |
403 | } |
404 | |
405 | pub(crate) fn shrink_right(&mut self, adjust: usize) { |
406 | self.adjust((adjust, false), (adjust, true)); |
407 | } |
408 | |
409 | fn adjust(&mut self, adjust_offset: (usize, bool), adjust_len: (usize, bool)) { |
410 | #[inline (always)] |
411 | fn modify(val: &mut usize, adj: (usize, bool)) { |
412 | if adj.1 { |
413 | *val -= adj.0; |
414 | } else { |
415 | *val += adj.0; |
416 | } |
417 | } |
418 | |
419 | match self { |
420 | DiffOp::Equal { |
421 | old_index, |
422 | new_index, |
423 | len, |
424 | } => { |
425 | modify(old_index, adjust_offset); |
426 | modify(new_index, adjust_offset); |
427 | modify(len, adjust_len); |
428 | } |
429 | DiffOp::Delete { |
430 | old_index, |
431 | old_len, |
432 | new_index, |
433 | } => { |
434 | modify(old_index, adjust_offset); |
435 | modify(old_len, adjust_len); |
436 | modify(new_index, adjust_offset); |
437 | } |
438 | DiffOp::Insert { |
439 | old_index, |
440 | new_index, |
441 | new_len, |
442 | } => { |
443 | modify(old_index, adjust_offset); |
444 | modify(new_index, adjust_offset); |
445 | modify(new_len, adjust_len); |
446 | } |
447 | DiffOp::Replace { |
448 | old_index, |
449 | old_len, |
450 | new_index, |
451 | new_len, |
452 | } => { |
453 | modify(old_index, adjust_offset); |
454 | modify(old_len, adjust_len); |
455 | modify(new_index, adjust_offset); |
456 | modify(new_len, adjust_len); |
457 | } |
458 | } |
459 | } |
460 | } |
461 | |
462 | #[cfg (feature = "text" )] |
463 | mod text_additions { |
464 | use super::*; |
465 | use crate::text::DiffableStr; |
466 | use std::borrow::Cow; |
467 | |
468 | /// The text interface can produce changes over [`DiffableStr`] implementing |
469 | /// values. As those are generic interfaces for different types of strings |
470 | /// utility methods to make working with standard rust strings more enjoyable. |
471 | impl<'s, T: DiffableStr + ?Sized> Change<&'s T> { |
472 | /// Returns the value as string if it is utf-8. |
473 | pub fn as_str(&self) -> Option<&'s str> { |
474 | T::as_str(self.value) |
475 | } |
476 | |
477 | /// Returns the value (lossy) decoded as utf-8 string. |
478 | pub fn to_string_lossy(&self) -> Cow<'s, str> { |
479 | T::to_string_lossy(self.value) |
480 | } |
481 | |
482 | /// Returns `true` if this change does not end in a newline and must be |
483 | /// followed up by one if line based diffs are used. |
484 | /// |
485 | /// The [`std::fmt::Display`] implementation of [`Change`] will automatically |
486 | /// insert a newline after the value if this is true. |
487 | pub fn missing_newline(&self) -> bool { |
488 | !T::ends_with_newline(self.value) |
489 | } |
490 | } |
491 | |
492 | impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<&'s T> { |
493 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
494 | write!( |
495 | f, |
496 | " {}{}" , |
497 | self.to_string_lossy(), |
498 | if self.missing_newline() { " \n" } else { "" } |
499 | ) |
500 | } |
501 | } |
502 | } |
503 | |