| 1 | use std::fmt; |
| 2 | use std::ops::{Index, Range}; |
| 3 | |
| 4 | use crate::algorithms::utils::is_empty_range; |
| 5 | use crate::algorithms::DiffHook; |
| 6 | use crate::iter::ChangesIter; |
| 7 | |
| 8 | /// An enum representing a diffing algorithm. |
| 9 | #[derive (Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] |
| 10 | #[cfg_attr ( |
| 11 | feature = "serde" , |
| 12 | derive(serde::Serialize, serde::Deserialize), |
| 13 | serde(rename_all = "snake_case" ) |
| 14 | )] |
| 15 | pub enum Algorithm { |
| 16 | /// Picks the myers algorithm from [`crate::algorithms::myers`] |
| 17 | Myers, |
| 18 | /// Picks the patience algorithm from [`crate::algorithms::patience`] |
| 19 | Patience, |
| 20 | /// Picks the LCS algorithm from [`crate::algorithms::lcs`] |
| 21 | Lcs, |
| 22 | } |
| 23 | |
| 24 | impl Default for Algorithm { |
| 25 | /// Returns the default algorithm ([`Algorithm::Myers`]). |
| 26 | fn default() -> Algorithm { |
| 27 | Algorithm::Myers |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | /// The tag of a change. |
| 32 | #[derive (Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] |
| 33 | #[cfg_attr ( |
| 34 | feature = "serde" , |
| 35 | derive(serde::Serialize, serde::Deserialize), |
| 36 | serde(rename_all = "snake_case" ) |
| 37 | )] |
| 38 | pub enum ChangeTag { |
| 39 | /// The change indicates equality (not a change) |
| 40 | Equal, |
| 41 | /// The change indicates deleted text. |
| 42 | Delete, |
| 43 | /// The change indicates inserted text. |
| 44 | Insert, |
| 45 | } |
| 46 | |
| 47 | impl fmt::Display for ChangeTag { |
| 48 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 49 | write!( |
| 50 | f, |
| 51 | " {}" , |
| 52 | match &self { |
| 53 | ChangeTag::Equal => ' ' , |
| 54 | ChangeTag::Delete => '-' , |
| 55 | ChangeTag::Insert => '+' , |
| 56 | } |
| 57 | ) |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | /// Represents the expanded [`DiffOp`] change. |
| 62 | /// |
| 63 | /// This type is returned from [`DiffOp::iter_changes`] and |
| 64 | /// [`TextDiff::iter_changes`](crate::text::TextDiff::iter_changes). |
| 65 | /// |
| 66 | /// It exists so that it's more convenient to work with textual differences as |
| 67 | /// the underlying [`DiffOp`] encodes a group of changes. |
| 68 | /// |
| 69 | /// This type has additional methods that are only available for types |
| 70 | /// implementing [`DiffableStr`](crate::text::DiffableStr). |
| 71 | #[derive (Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] |
| 72 | #[cfg_attr (feature = "serde" , derive(serde::Serialize))] |
| 73 | pub struct Change<T> { |
| 74 | pub(crate) tag: ChangeTag, |
| 75 | pub(crate) old_index: Option<usize>, |
| 76 | pub(crate) new_index: Option<usize>, |
| 77 | pub(crate) value: T, |
| 78 | } |
| 79 | |
| 80 | /// These methods are available for all change types. |
| 81 | impl<T: Clone> Change<T> { |
| 82 | /// Returns the change tag. |
| 83 | pub fn tag(&self) -> ChangeTag { |
| 84 | self.tag |
| 85 | } |
| 86 | |
| 87 | /// Returns the old index if available. |
| 88 | pub fn old_index(&self) -> Option<usize> { |
| 89 | self.old_index |
| 90 | } |
| 91 | |
| 92 | /// Returns the new index if available. |
| 93 | pub fn new_index(&self) -> Option<usize> { |
| 94 | self.new_index |
| 95 | } |
| 96 | |
| 97 | /// Returns the underlying changed value. |
| 98 | /// |
| 99 | /// Depending on the type of the underlying [`crate::text::DiffableStr`] |
| 100 | /// this value is more or less useful. If you always want to have a utf-8 |
| 101 | /// string it's best to use the [`Change::as_str`] and |
| 102 | /// [`Change::to_string_lossy`] methods. |
| 103 | pub fn value(&self) -> T { |
| 104 | self.value.clone() |
| 105 | } |
| 106 | |
| 107 | /// Returns the underlying changed value as reference. |
| 108 | pub fn value_ref(&self) -> &T { |
| 109 | &self.value |
| 110 | } |
| 111 | |
| 112 | /// Returns the underlying changed value as mutable reference. |
| 113 | pub fn value_mut(&mut self) -> &mut T { |
| 114 | &mut self.value |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | /// Utility enum to capture a diff operation. |
| 119 | /// |
| 120 | /// This is used by [`Capture`](crate::algorithms::Capture). |
| 121 | #[derive (Debug, PartialEq, Eq, Hash, Clone, Copy)] |
| 122 | #[cfg_attr ( |
| 123 | feature = "serde" , |
| 124 | derive(serde::Serialize, serde::Deserialize), |
| 125 | serde(rename_all = "snake_case" , tag = "op" ) |
| 126 | )] |
| 127 | pub enum DiffOp { |
| 128 | /// A segment is equal (see [`DiffHook::equal`]) |
| 129 | Equal { |
| 130 | /// The starting index in the old sequence. |
| 131 | old_index: usize, |
| 132 | /// The starting index in the new sequence. |
| 133 | new_index: usize, |
| 134 | /// The length of the segment. |
| 135 | len: usize, |
| 136 | }, |
| 137 | /// A segment was deleted (see [`DiffHook::delete`]) |
| 138 | Delete { |
| 139 | /// The starting index in the old sequence. |
| 140 | old_index: usize, |
| 141 | /// The length of the old segment. |
| 142 | old_len: usize, |
| 143 | /// The starting index in the new sequence. |
| 144 | new_index: usize, |
| 145 | }, |
| 146 | /// A segment was inserted (see [`DiffHook::insert`]) |
| 147 | Insert { |
| 148 | /// The starting index in the old sequence. |
| 149 | old_index: usize, |
| 150 | /// The starting index in the new sequence. |
| 151 | new_index: usize, |
| 152 | /// The length of the new segment. |
| 153 | new_len: usize, |
| 154 | }, |
| 155 | /// A segment was replaced (see [`DiffHook::replace`]) |
| 156 | Replace { |
| 157 | /// The starting index in the old sequence. |
| 158 | old_index: usize, |
| 159 | /// The length of the old segment. |
| 160 | old_len: usize, |
| 161 | /// The starting index in the new sequence. |
| 162 | new_index: usize, |
| 163 | /// The length of the new segment. |
| 164 | new_len: usize, |
| 165 | }, |
| 166 | } |
| 167 | |
| 168 | /// The tag of a diff operation. |
| 169 | #[derive (Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] |
| 170 | #[cfg_attr ( |
| 171 | feature = "serde" , |
| 172 | derive(serde::Serialize, serde::Deserialize), |
| 173 | serde(rename_all = "snake_case" ) |
| 174 | )] |
| 175 | pub enum DiffTag { |
| 176 | /// The diff op encodes an equal segment. |
| 177 | Equal, |
| 178 | /// The diff op encodes a deleted segment. |
| 179 | Delete, |
| 180 | /// The diff op encodes an inserted segment. |
| 181 | Insert, |
| 182 | /// The diff op encodes a replaced segment. |
| 183 | Replace, |
| 184 | } |
| 185 | |
| 186 | impl DiffOp { |
| 187 | /// Returns the tag of the operation. |
| 188 | pub fn tag(self) -> DiffTag { |
| 189 | self.as_tag_tuple().0 |
| 190 | } |
| 191 | |
| 192 | /// Returns the old range. |
| 193 | pub fn old_range(&self) -> Range<usize> { |
| 194 | self.as_tag_tuple().1 |
| 195 | } |
| 196 | |
| 197 | /// Returns the new range. |
| 198 | pub fn new_range(&self) -> Range<usize> { |
| 199 | self.as_tag_tuple().2 |
| 200 | } |
| 201 | |
| 202 | /// Transform the op into a tuple of diff tag and ranges. |
| 203 | /// |
| 204 | /// This is useful when operating on slices. The returned format is |
| 205 | /// `(tag, i1..i2, j1..j2)`: |
| 206 | /// |
| 207 | /// * `Replace`: `a[i1..i2]` should be replaced by `b[j1..j2]` |
| 208 | /// * `Delete`: `a[i1..i2]` should be deleted (`j1 == j2` in this case). |
| 209 | /// * `Insert`: `b[j1..j2]` should be inserted at `a[i1..i2]` (`i1 == i2` in this case). |
| 210 | /// * `Equal`: `a[i1..i2]` is equal to `b[j1..j2]`. |
| 211 | pub fn as_tag_tuple(&self) -> (DiffTag, Range<usize>, Range<usize>) { |
| 212 | match *self { |
| 213 | DiffOp::Equal { |
| 214 | old_index, |
| 215 | new_index, |
| 216 | len, |
| 217 | } => ( |
| 218 | DiffTag::Equal, |
| 219 | old_index..old_index + len, |
| 220 | new_index..new_index + len, |
| 221 | ), |
| 222 | DiffOp::Delete { |
| 223 | old_index, |
| 224 | new_index, |
| 225 | old_len, |
| 226 | } => ( |
| 227 | DiffTag::Delete, |
| 228 | old_index..old_index + old_len, |
| 229 | new_index..new_index, |
| 230 | ), |
| 231 | DiffOp::Insert { |
| 232 | old_index, |
| 233 | new_index, |
| 234 | new_len, |
| 235 | } => ( |
| 236 | DiffTag::Insert, |
| 237 | old_index..old_index, |
| 238 | new_index..new_index + new_len, |
| 239 | ), |
| 240 | DiffOp::Replace { |
| 241 | old_index, |
| 242 | old_len, |
| 243 | new_index, |
| 244 | new_len, |
| 245 | } => ( |
| 246 | DiffTag::Replace, |
| 247 | old_index..old_index + old_len, |
| 248 | new_index..new_index + new_len, |
| 249 | ), |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | /// Apply this operation to a diff hook. |
| 254 | pub fn apply_to_hook<D: DiffHook>(&self, d: &mut D) -> Result<(), D::Error> { |
| 255 | match *self { |
| 256 | DiffOp::Equal { |
| 257 | old_index, |
| 258 | new_index, |
| 259 | len, |
| 260 | } => d.equal(old_index, new_index, len), |
| 261 | DiffOp::Delete { |
| 262 | old_index, |
| 263 | old_len, |
| 264 | new_index, |
| 265 | } => d.delete(old_index, old_len, new_index), |
| 266 | DiffOp::Insert { |
| 267 | old_index, |
| 268 | new_index, |
| 269 | new_len, |
| 270 | } => d.insert(old_index, new_index, new_len), |
| 271 | DiffOp::Replace { |
| 272 | old_index, |
| 273 | old_len, |
| 274 | new_index, |
| 275 | new_len, |
| 276 | } => d.replace(old_index, old_len, new_index, new_len), |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | /// Iterates over all changes encoded in the diff op against old and new |
| 281 | /// sequences. |
| 282 | /// |
| 283 | /// `old` and `new` are two indexable objects like the types you pass to |
| 284 | /// the diffing algorithm functions. |
| 285 | /// |
| 286 | /// ```rust |
| 287 | /// use similar::{ChangeTag, Algorithm}; |
| 288 | /// use similar::capture_diff_slices; |
| 289 | /// let old = vec!["foo" , "bar" , "baz" ]; |
| 290 | /// let new = vec!["foo" , "bar" , "blah" ]; |
| 291 | /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); |
| 292 | /// let changes: Vec<_> = ops |
| 293 | /// .iter() |
| 294 | /// .flat_map(|x| x.iter_changes(&old, &new)) |
| 295 | /// .map(|x| (x.tag(), x.value())) |
| 296 | /// .collect(); |
| 297 | /// assert_eq!(changes, vec![ |
| 298 | /// (ChangeTag::Equal, "foo" ), |
| 299 | /// (ChangeTag::Equal, "bar" ), |
| 300 | /// (ChangeTag::Delete, "baz" ), |
| 301 | /// (ChangeTag::Insert, "blah" ), |
| 302 | /// ]); |
| 303 | /// ``` |
| 304 | pub fn iter_changes<'lookup, Old, New, T>( |
| 305 | &self, |
| 306 | old: &'lookup Old, |
| 307 | new: &'lookup New, |
| 308 | ) -> ChangesIter<'lookup, Old, New, T> |
| 309 | where |
| 310 | Old: Index<usize, Output = T> + ?Sized, |
| 311 | New: Index<usize, Output = T> + ?Sized, |
| 312 | { |
| 313 | ChangesIter::new(old, new, *self) |
| 314 | } |
| 315 | |
| 316 | /// Given a diffop yields the changes it encodes against the given slices. |
| 317 | /// |
| 318 | /// This is similar to [`DiffOp::iter_changes`] but instead of yielding the |
| 319 | /// individual changes it yields consequitive changed slices. |
| 320 | /// |
| 321 | /// This will only ever yield a single tuple or two tuples in case a |
| 322 | /// [`DiffOp::Replace`] operation is passed. |
| 323 | /// |
| 324 | /// ```rust |
| 325 | /// use similar::{ChangeTag, Algorithm}; |
| 326 | /// use similar::capture_diff_slices; |
| 327 | /// let old = vec!["foo" , "bar" , "baz" ]; |
| 328 | /// let new = vec!["foo" , "bar" , "blah" ]; |
| 329 | /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); |
| 330 | /// let changes: Vec<_> = ops.iter().flat_map(|x| x.iter_slices(&old, &new)).collect(); |
| 331 | /// assert_eq!(changes, vec![ |
| 332 | /// (ChangeTag::Equal, &["foo" , "bar" ][..]), |
| 333 | /// (ChangeTag::Delete, &["baz" ][..]), |
| 334 | /// (ChangeTag::Insert, &["blah" ][..]), |
| 335 | /// ]); |
| 336 | /// ``` |
| 337 | /// |
| 338 | /// Due to lifetime restrictions it's currently impossible for the |
| 339 | /// returned slices to outlive the lookup. |
| 340 | pub fn iter_slices<'lookup, Old, New, T>( |
| 341 | &self, |
| 342 | old: &'lookup Old, |
| 343 | new: &'lookup New, |
| 344 | ) -> impl Iterator<Item = (ChangeTag, &'lookup T)> |
| 345 | where |
| 346 | T: 'lookup + ?Sized, |
| 347 | Old: Index<Range<usize>, Output = T> + ?Sized, |
| 348 | New: Index<Range<usize>, Output = T> + ?Sized, |
| 349 | { |
| 350 | match *self { |
| 351 | DiffOp::Equal { old_index, len, .. } => { |
| 352 | Some((ChangeTag::Equal, &old[old_index..old_index + len])) |
| 353 | .into_iter() |
| 354 | .chain(None) |
| 355 | } |
| 356 | DiffOp::Insert { |
| 357 | new_index, new_len, .. |
| 358 | } => Some((ChangeTag::Insert, &new[new_index..new_index + new_len])) |
| 359 | .into_iter() |
| 360 | .chain(None), |
| 361 | DiffOp::Delete { |
| 362 | old_index, old_len, .. |
| 363 | } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) |
| 364 | .into_iter() |
| 365 | .chain(None), |
| 366 | DiffOp::Replace { |
| 367 | old_index, |
| 368 | old_len, |
| 369 | new_index, |
| 370 | new_len, |
| 371 | } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) |
| 372 | .into_iter() |
| 373 | .chain(Some(( |
| 374 | ChangeTag::Insert, |
| 375 | &new[new_index..new_index + new_len], |
| 376 | ))), |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | pub(crate) fn is_empty(&self) -> bool { |
| 381 | let (_, old, new) = self.as_tag_tuple(); |
| 382 | is_empty_range(&old) && is_empty_range(&new) |
| 383 | } |
| 384 | |
| 385 | pub(crate) fn shift_left(&mut self, adjust: usize) { |
| 386 | self.adjust((adjust, true), (0, false)); |
| 387 | } |
| 388 | |
| 389 | pub(crate) fn shift_right(&mut self, adjust: usize) { |
| 390 | self.adjust((adjust, false), (0, false)); |
| 391 | } |
| 392 | |
| 393 | pub(crate) fn grow_left(&mut self, adjust: usize) { |
| 394 | self.adjust((adjust, true), (adjust, false)); |
| 395 | } |
| 396 | |
| 397 | pub(crate) fn grow_right(&mut self, adjust: usize) { |
| 398 | self.adjust((0, false), (adjust, false)); |
| 399 | } |
| 400 | |
| 401 | pub(crate) fn shrink_left(&mut self, adjust: usize) { |
| 402 | self.adjust((0, false), (adjust, true)); |
| 403 | } |
| 404 | |
| 405 | pub(crate) fn shrink_right(&mut self, adjust: usize) { |
| 406 | self.adjust((adjust, false), (adjust, true)); |
| 407 | } |
| 408 | |
| 409 | fn adjust(&mut self, adjust_offset: (usize, bool), adjust_len: (usize, bool)) { |
| 410 | #[inline (always)] |
| 411 | fn modify(val: &mut usize, adj: (usize, bool)) { |
| 412 | if adj.1 { |
| 413 | *val -= adj.0; |
| 414 | } else { |
| 415 | *val += adj.0; |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | match self { |
| 420 | DiffOp::Equal { |
| 421 | old_index, |
| 422 | new_index, |
| 423 | len, |
| 424 | } => { |
| 425 | modify(old_index, adjust_offset); |
| 426 | modify(new_index, adjust_offset); |
| 427 | modify(len, adjust_len); |
| 428 | } |
| 429 | DiffOp::Delete { |
| 430 | old_index, |
| 431 | old_len, |
| 432 | new_index, |
| 433 | } => { |
| 434 | modify(old_index, adjust_offset); |
| 435 | modify(old_len, adjust_len); |
| 436 | modify(new_index, adjust_offset); |
| 437 | } |
| 438 | DiffOp::Insert { |
| 439 | old_index, |
| 440 | new_index, |
| 441 | new_len, |
| 442 | } => { |
| 443 | modify(old_index, adjust_offset); |
| 444 | modify(new_index, adjust_offset); |
| 445 | modify(new_len, adjust_len); |
| 446 | } |
| 447 | DiffOp::Replace { |
| 448 | old_index, |
| 449 | old_len, |
| 450 | new_index, |
| 451 | new_len, |
| 452 | } => { |
| 453 | modify(old_index, adjust_offset); |
| 454 | modify(old_len, adjust_len); |
| 455 | modify(new_index, adjust_offset); |
| 456 | modify(new_len, adjust_len); |
| 457 | } |
| 458 | } |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | #[cfg (feature = "text" )] |
| 463 | mod text_additions { |
| 464 | use super::*; |
| 465 | use crate::text::DiffableStr; |
| 466 | use std::borrow::Cow; |
| 467 | |
| 468 | /// The text interface can produce changes over [`DiffableStr`] implementing |
| 469 | /// values. As those are generic interfaces for different types of strings |
| 470 | /// utility methods to make working with standard rust strings more enjoyable. |
| 471 | impl<'s, T: DiffableStr + ?Sized> Change<&'s T> { |
| 472 | /// Returns the value as string if it is utf-8. |
| 473 | pub fn as_str(&self) -> Option<&'s str> { |
| 474 | T::as_str(self.value) |
| 475 | } |
| 476 | |
| 477 | /// Returns the value (lossy) decoded as utf-8 string. |
| 478 | pub fn to_string_lossy(&self) -> Cow<'s, str> { |
| 479 | T::to_string_lossy(self.value) |
| 480 | } |
| 481 | |
| 482 | /// Returns `true` if this change does not end in a newline and must be |
| 483 | /// followed up by one if line based diffs are used. |
| 484 | /// |
| 485 | /// The [`std::fmt::Display`] implementation of [`Change`] will automatically |
| 486 | /// insert a newline after the value if this is true. |
| 487 | pub fn missing_newline(&self) -> bool { |
| 488 | !T::ends_with_newline(self.value) |
| 489 | } |
| 490 | } |
| 491 | |
| 492 | impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<&'s T> { |
| 493 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 494 | write!( |
| 495 | f, |
| 496 | " {}{}" , |
| 497 | self.to_string_lossy(), |
| 498 | if self.missing_newline() { " \n" } else { "" } |
| 499 | ) |
| 500 | } |
| 501 | } |
| 502 | } |
| 503 | |