1 | //! Text diffing utilities. |
2 | use std::borrow::Cow; |
3 | use std::cmp::Reverse; |
4 | use std::collections::BinaryHeap; |
5 | use std::time::{Duration, Instant}; |
6 | |
7 | mod abstraction; |
8 | #[cfg (feature = "inline" )] |
9 | mod inline; |
10 | mod utils; |
11 | |
12 | pub use self::abstraction::{DiffableStr, DiffableStrRef}; |
13 | #[cfg (feature = "inline" )] |
14 | pub use self::inline::InlineChange; |
15 | |
16 | use self::utils::{upper_seq_ratio, QuickSeqRatio}; |
17 | use crate::algorithms::IdentifyDistinct; |
18 | use crate::iter::{AllChangesIter, ChangesIter}; |
19 | use crate::udiff::UnifiedDiff; |
20 | use crate::{capture_diff_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp}; |
21 | |
22 | #[derive (Debug, Clone, Copy)] |
23 | enum Deadline { |
24 | Absolute(Instant), |
25 | Relative(Duration), |
26 | } |
27 | |
28 | impl Deadline { |
29 | fn into_instant(self) -> Instant { |
30 | match self { |
31 | Deadline::Absolute(instant: Instant) => instant, |
32 | Deadline::Relative(duration: Duration) => Instant::now() + duration, |
33 | } |
34 | } |
35 | } |
36 | |
37 | /// A builder type config for more complex uses of [`TextDiff`]. |
38 | /// |
39 | /// Requires the `text` feature. |
40 | #[derive (Clone, Debug, Default)] |
41 | pub struct TextDiffConfig { |
42 | algorithm: Algorithm, |
43 | newline_terminated: Option<bool>, |
44 | deadline: Option<Deadline>, |
45 | } |
46 | |
47 | impl TextDiffConfig { |
48 | /// Changes the algorithm. |
49 | /// |
50 | /// The default algorithm is [`Algorithm::Myers`]. |
51 | pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self { |
52 | self.algorithm = alg; |
53 | self |
54 | } |
55 | |
56 | /// Sets a deadline for the diff operation. |
57 | /// |
58 | /// By default a diff will take as long as it takes. For certain diff |
59 | /// algorithms like Myer's and Patience a maximum running time can be |
60 | /// defined after which the algorithm gives up and approximates. |
61 | pub fn deadline(&mut self, deadline: Instant) -> &mut Self { |
62 | self.deadline = Some(Deadline::Absolute(deadline)); |
63 | self |
64 | } |
65 | |
66 | /// Sets a timeout for thediff operation. |
67 | /// |
68 | /// This is like [`deadline`](Self::deadline) but accepts a duration. |
69 | pub fn timeout(&mut self, timeout: Duration) -> &mut Self { |
70 | self.deadline = Some(Deadline::Relative(timeout)); |
71 | self |
72 | } |
73 | |
74 | /// Changes the newline termination flag. |
75 | /// |
76 | /// The default is automatic based on input. This flag controls the |
77 | /// behavior of [`TextDiff::iter_changes`] and unified diff generation |
78 | /// with regards to newlines. When the flag is set to `false` (which |
79 | /// is the default) then newlines are added. Otherwise the newlines |
80 | /// from the source sequences are reused. |
81 | pub fn newline_terminated(&mut self, yes: bool) -> &mut Self { |
82 | self.newline_terminated = Some(yes); |
83 | self |
84 | } |
85 | |
86 | /// Creates a diff of lines. |
87 | /// |
88 | /// This splits the text `old` and `new` into lines preserving newlines |
89 | /// in the input. Line diffs are very common and because of that enjoy |
90 | /// special handling in similar. When a line diff is created with this |
91 | /// method the `newline_terminated` flag is flipped to `true` and will |
92 | /// influence the behavior of unified diff generation. |
93 | /// |
94 | /// ```rust |
95 | /// use similar::{TextDiff, ChangeTag}; |
96 | /// |
97 | /// let diff = TextDiff::configure().diff_lines("a \nb \nc" , "a \nb \nC" ); |
98 | /// let changes: Vec<_> = diff |
99 | /// .iter_all_changes() |
100 | /// .map(|x| (x.tag(), x.value())) |
101 | /// .collect(); |
102 | /// |
103 | /// assert_eq!(changes, vec![ |
104 | /// (ChangeTag::Equal, "a \n" ), |
105 | /// (ChangeTag::Equal, "b \n" ), |
106 | /// (ChangeTag::Delete, "c" ), |
107 | /// (ChangeTag::Insert, "C" ), |
108 | /// ]); |
109 | /// ``` |
110 | pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( |
111 | &self, |
112 | old: &'old T, |
113 | new: &'new T, |
114 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
115 | self.diff( |
116 | Cow::Owned(old.as_diffable_str().tokenize_lines()), |
117 | Cow::Owned(new.as_diffable_str().tokenize_lines()), |
118 | true, |
119 | ) |
120 | } |
121 | |
122 | /// Creates a diff of words. |
123 | /// |
124 | /// This splits the text into words and whitespace. |
125 | /// |
126 | /// Note on word diffs: because the text differ will tokenize the strings |
127 | /// into small segments it can be inconvenient to work with the results |
128 | /// depending on the use case. You might also want to combine word level |
129 | /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) |
130 | /// which lets you remap the diffs back to the original input strings. |
131 | /// |
132 | /// ```rust |
133 | /// use similar::{TextDiff, ChangeTag}; |
134 | /// |
135 | /// let diff = TextDiff::configure().diff_words("foo bar baz" , "foo BAR baz" ); |
136 | /// let changes: Vec<_> = diff |
137 | /// .iter_all_changes() |
138 | /// .map(|x| (x.tag(), x.value())) |
139 | /// .collect(); |
140 | /// |
141 | /// assert_eq!(changes, vec![ |
142 | /// (ChangeTag::Equal, "foo" ), |
143 | /// (ChangeTag::Equal, " " ), |
144 | /// (ChangeTag::Delete, "bar" ), |
145 | /// (ChangeTag::Insert, "BAR" ), |
146 | /// (ChangeTag::Equal, " " ), |
147 | /// (ChangeTag::Equal, "baz" ), |
148 | /// ]); |
149 | /// ``` |
150 | pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( |
151 | &self, |
152 | old: &'old T, |
153 | new: &'new T, |
154 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
155 | self.diff( |
156 | Cow::Owned(old.as_diffable_str().tokenize_words()), |
157 | Cow::Owned(new.as_diffable_str().tokenize_words()), |
158 | false, |
159 | ) |
160 | } |
161 | |
162 | /// Creates a diff of characters. |
163 | /// |
164 | /// Note on character diffs: because the text differ will tokenize the strings |
165 | /// into small segments it can be inconvenient to work with the results |
166 | /// depending on the use case. You might also want to combine word level |
167 | /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) |
168 | /// which lets you remap the diffs back to the original input strings. |
169 | /// |
170 | /// ```rust |
171 | /// use similar::{TextDiff, ChangeTag}; |
172 | /// |
173 | /// let diff = TextDiff::configure().diff_chars("abcdef" , "abcDDf" ); |
174 | /// let changes: Vec<_> = diff |
175 | /// .iter_all_changes() |
176 | /// .map(|x| (x.tag(), x.value())) |
177 | /// .collect(); |
178 | /// |
179 | /// assert_eq!(changes, vec![ |
180 | /// (ChangeTag::Equal, "a" ), |
181 | /// (ChangeTag::Equal, "b" ), |
182 | /// (ChangeTag::Equal, "c" ), |
183 | /// (ChangeTag::Delete, "d" ), |
184 | /// (ChangeTag::Delete, "e" ), |
185 | /// (ChangeTag::Insert, "D" ), |
186 | /// (ChangeTag::Insert, "D" ), |
187 | /// (ChangeTag::Equal, "f" ), |
188 | /// ]); |
189 | /// ``` |
190 | pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( |
191 | &self, |
192 | old: &'old T, |
193 | new: &'new T, |
194 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
195 | self.diff( |
196 | Cow::Owned(old.as_diffable_str().tokenize_chars()), |
197 | Cow::Owned(new.as_diffable_str().tokenize_chars()), |
198 | false, |
199 | ) |
200 | } |
201 | |
202 | /// Creates a diff of unicode words. |
203 | /// |
204 | /// This splits the text into words according to unicode rules. This is |
205 | /// generally recommended over [`TextDiffConfig::diff_words`] but |
206 | /// requires a dependency. |
207 | /// |
208 | /// This requires the `unicode` feature. |
209 | /// |
210 | /// Note on word diffs: because the text differ will tokenize the strings |
211 | /// into small segments it can be inconvenient to work with the results |
212 | /// depending on the use case. You might also want to combine word level |
213 | /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) |
214 | /// which lets you remap the diffs back to the original input strings. |
215 | /// |
216 | /// ```rust |
217 | /// use similar::{TextDiff, ChangeTag}; |
218 | /// |
219 | /// let diff = TextDiff::configure().diff_unicode_words("ah(be)ce", "ah(ah)ce"); |
220 | /// let changes: Vec<_> = diff |
221 | /// .iter_all_changes() |
222 | /// .map(|x| (x.tag(), x.value())) |
223 | /// .collect(); |
224 | /// |
225 | /// assert_eq!(changes, vec![ |
226 | /// (ChangeTag::Equal, "ah"), |
227 | /// (ChangeTag::Equal, "("), |
228 | /// (ChangeTag::Delete, "be"), |
229 | /// (ChangeTag::Insert, "ah"), |
230 | /// (ChangeTag::Equal, ")"), |
231 | /// (ChangeTag::Equal, "ce"), |
232 | /// ]); |
233 | /// ``` |
234 | #[cfg (feature = "unicode" )] |
235 | pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( |
236 | &self, |
237 | old: &'old T, |
238 | new: &'new T, |
239 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
240 | self.diff( |
241 | Cow::Owned(old.as_diffable_str().tokenize_unicode_words()), |
242 | Cow::Owned(new.as_diffable_str().tokenize_unicode_words()), |
243 | false, |
244 | ) |
245 | } |
246 | |
247 | /// Creates a diff of graphemes. |
248 | /// |
249 | /// This requires the `unicode` feature. |
250 | /// |
251 | /// Note on grapheme diffs: because the text differ will tokenize the strings |
252 | /// into small segments it can be inconvenient to work with the results |
253 | /// depending on the use case. You might also want to combine word level |
254 | /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) |
255 | /// which lets you remap the diffs back to the original input strings. |
256 | /// |
257 | /// ```rust |
258 | /// use similar::{TextDiff, ChangeTag}; |
259 | /// |
260 | /// let diff = TextDiff::configure().diff_graphemes("💩🇦🇹🦠", "💩🇦🇱❄️"); |
261 | /// let changes: Vec<_> = diff |
262 | /// .iter_all_changes() |
263 | /// .map(|x| (x.tag(), x.value())) |
264 | /// .collect(); |
265 | /// |
266 | /// assert_eq!(changes, vec![ |
267 | /// (ChangeTag::Equal, "💩"), |
268 | /// (ChangeTag::Delete, "🇦🇹"), |
269 | /// (ChangeTag::Delete, "🦠"), |
270 | /// (ChangeTag::Insert, "🇦🇱"), |
271 | /// (ChangeTag::Insert, "❄️"), |
272 | /// ]); |
273 | /// ``` |
274 | #[cfg (feature = "unicode" )] |
275 | pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( |
276 | &self, |
277 | old: &'old T, |
278 | new: &'new T, |
279 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
280 | self.diff( |
281 | Cow::Owned(old.as_diffable_str().tokenize_graphemes()), |
282 | Cow::Owned(new.as_diffable_str().tokenize_graphemes()), |
283 | false, |
284 | ) |
285 | } |
286 | |
287 | /// Creates a diff of arbitrary slices. |
288 | /// |
289 | /// ```rust |
290 | /// use similar::{TextDiff, ChangeTag}; |
291 | /// |
292 | /// let old = &["foo" , "bar" , "baz" ]; |
293 | /// let new = &["foo" , "BAR" , "baz" ]; |
294 | /// let diff = TextDiff::configure().diff_slices(old, new); |
295 | /// let changes: Vec<_> = diff |
296 | /// .iter_all_changes() |
297 | /// .map(|x| (x.tag(), x.value())) |
298 | /// .collect(); |
299 | /// |
300 | /// assert_eq!(changes, vec![ |
301 | /// (ChangeTag::Equal, "foo" ), |
302 | /// (ChangeTag::Delete, "bar" ), |
303 | /// (ChangeTag::Insert, "BAR" ), |
304 | /// (ChangeTag::Equal, "baz" ), |
305 | /// ]); |
306 | /// ``` |
307 | pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( |
308 | &self, |
309 | old: &'bufs [&'old T], |
310 | new: &'bufs [&'new T], |
311 | ) -> TextDiff<'old, 'new, 'bufs, T> { |
312 | self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false) |
313 | } |
314 | |
315 | fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( |
316 | &self, |
317 | old: Cow<'bufs, [&'old T]>, |
318 | new: Cow<'bufs, [&'new T]>, |
319 | newline_terminated: bool, |
320 | ) -> TextDiff<'old, 'new, 'bufs, T> { |
321 | let deadline = self.deadline.map(|x| x.into_instant()); |
322 | let ops = if old.len() > 100 || new.len() > 100 { |
323 | let ih = IdentifyDistinct::<u32>::new(&old[..], 0..old.len(), &new[..], 0..new.len()); |
324 | capture_diff_deadline( |
325 | self.algorithm, |
326 | ih.old_lookup(), |
327 | ih.old_range(), |
328 | ih.new_lookup(), |
329 | ih.new_range(), |
330 | deadline, |
331 | ) |
332 | } else { |
333 | capture_diff_deadline( |
334 | self.algorithm, |
335 | &old[..], |
336 | 0..old.len(), |
337 | &new[..], |
338 | 0..new.len(), |
339 | deadline, |
340 | ) |
341 | }; |
342 | TextDiff { |
343 | old, |
344 | new, |
345 | ops, |
346 | newline_terminated: self.newline_terminated.unwrap_or(newline_terminated), |
347 | algorithm: self.algorithm, |
348 | } |
349 | } |
350 | } |
351 | |
352 | /// Captures diff op codes for textual diffs. |
353 | /// |
354 | /// The exact diff behavior is depending on the underlying [`DiffableStr`]. |
355 | /// For instance diffs on bytes and strings are slightly different. You can |
356 | /// create a text diff from constructors such as [`TextDiff::from_lines`] or |
357 | /// the [`TextDiffConfig`] created by [`TextDiff::configure`]. |
358 | /// |
359 | /// Requires the `text` feature. |
360 | pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> { |
361 | old: Cow<'bufs, [&'old T]>, |
362 | new: Cow<'bufs, [&'new T]>, |
363 | ops: Vec<DiffOp>, |
364 | newline_terminated: bool, |
365 | algorithm: Algorithm, |
366 | } |
367 | |
368 | impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { |
369 | /// Configures a text differ before diffing. |
370 | pub fn configure() -> TextDiffConfig { |
371 | TextDiffConfig::default() |
372 | } |
373 | |
374 | /// Creates a diff of lines. |
375 | /// |
376 | /// For more information see [`TextDiffConfig::diff_lines`]. |
377 | pub fn from_lines<T: DiffableStrRef + ?Sized>( |
378 | old: &'old T, |
379 | new: &'new T, |
380 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
381 | TextDiff::configure().diff_lines(old, new) |
382 | } |
383 | |
384 | /// Creates a diff of words. |
385 | /// |
386 | /// For more information see [`TextDiffConfig::diff_words`]. |
387 | pub fn from_words<T: DiffableStrRef + ?Sized>( |
388 | old: &'old T, |
389 | new: &'new T, |
390 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
391 | TextDiff::configure().diff_words(old, new) |
392 | } |
393 | |
394 | /// Creates a diff of chars. |
395 | /// |
396 | /// For more information see [`TextDiffConfig::diff_chars`]. |
397 | pub fn from_chars<T: DiffableStrRef + ?Sized>( |
398 | old: &'old T, |
399 | new: &'new T, |
400 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
401 | TextDiff::configure().diff_chars(old, new) |
402 | } |
403 | |
404 | /// Creates a diff of unicode words. |
405 | /// |
406 | /// For more information see [`TextDiffConfig::diff_unicode_words`]. |
407 | /// |
408 | /// This requires the `unicode` feature. |
409 | #[cfg (feature = "unicode" )] |
410 | pub fn from_unicode_words<T: DiffableStrRef + ?Sized>( |
411 | old: &'old T, |
412 | new: &'new T, |
413 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
414 | TextDiff::configure().diff_unicode_words(old, new) |
415 | } |
416 | |
417 | /// Creates a diff of graphemes. |
418 | /// |
419 | /// For more information see [`TextDiffConfig::diff_graphemes`]. |
420 | /// |
421 | /// This requires the `unicode` feature. |
422 | #[cfg (feature = "unicode" )] |
423 | pub fn from_graphemes<T: DiffableStrRef + ?Sized>( |
424 | old: &'old T, |
425 | new: &'new T, |
426 | ) -> TextDiff<'old, 'new, 'bufs, T::Output> { |
427 | TextDiff::configure().diff_graphemes(old, new) |
428 | } |
429 | } |
430 | |
431 | impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> { |
432 | /// Creates a diff of arbitrary slices. |
433 | /// |
434 | /// For more information see [`TextDiffConfig::diff_slices`]. |
435 | pub fn from_slices( |
436 | old: &'bufs [&'old T], |
437 | new: &'bufs [&'new T], |
438 | ) -> TextDiff<'old, 'new, 'bufs, T> { |
439 | TextDiff::configure().diff_slices(old, new) |
440 | } |
441 | |
442 | /// The name of the algorithm that created the diff. |
443 | pub fn algorithm(&self) -> Algorithm { |
444 | self.algorithm |
445 | } |
446 | |
447 | /// Returns `true` if items in the slice are newline terminated. |
448 | /// |
449 | /// This flag is used by the unified diff writer to determine if extra |
450 | /// newlines have to be added. |
451 | pub fn newline_terminated(&self) -> bool { |
452 | self.newline_terminated |
453 | } |
454 | |
455 | /// Returns all old slices. |
456 | pub fn old_slices(&self) -> &[&'old T] { |
457 | &self.old |
458 | } |
459 | |
460 | /// Returns all new slices. |
461 | pub fn new_slices(&self) -> &[&'new T] { |
462 | &self.new |
463 | } |
464 | |
465 | /// Return a measure of the sequences' similarity in the range `0..=1`. |
466 | /// |
467 | /// A ratio of `1.0` means the two sequences are a complete match, a |
468 | /// ratio of `0.0` would indicate completely distinct sequences. |
469 | /// |
470 | /// ```rust |
471 | /// # use similar::TextDiff; |
472 | /// let diff = TextDiff::from_chars("abcd" , "bcde" ); |
473 | /// assert_eq!(diff.ratio(), 0.75); |
474 | /// ``` |
475 | pub fn ratio(&self) -> f32 { |
476 | get_diff_ratio(self.ops(), self.old.len(), self.new.len()) |
477 | } |
478 | |
479 | /// Iterates over the changes the op expands to. |
480 | /// |
481 | /// This method is a convenient way to automatically resolve the different |
482 | /// ways in which a change could be encoded (insert/delete vs replace), look |
483 | /// up the value from the appropriate slice and also handle correct index |
484 | /// handling. |
485 | pub fn iter_changes<'x, 'slf>( |
486 | &'slf self, |
487 | op: &DiffOp, |
488 | ) -> ChangesIter<'slf, [&'x T], [&'x T], &'x T> |
489 | where |
490 | 'x: 'slf, |
491 | 'old: 'x, |
492 | 'new: 'x, |
493 | { |
494 | op.iter_changes(self.old_slices(), self.new_slices()) |
495 | } |
496 | |
497 | /// Returns the captured diff ops. |
498 | pub fn ops(&self) -> &[DiffOp] { |
499 | &self.ops |
500 | } |
501 | |
502 | /// Isolate change clusters by eliminating ranges with no changes. |
503 | /// |
504 | /// This is equivalent to calling [`group_diff_ops`] on [`TextDiff::ops`]. |
505 | pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> { |
506 | group_diff_ops(self.ops().to_vec(), n) |
507 | } |
508 | |
509 | /// Flattens out the diff into all changes. |
510 | /// |
511 | /// This is a shortcut for combining [`TextDiff::ops`] with |
512 | /// [`TextDiff::iter_changes`]. |
513 | pub fn iter_all_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T> |
514 | where |
515 | 'x: 'slf + 'old + 'new, |
516 | 'old: 'x, |
517 | 'new: 'x, |
518 | { |
519 | AllChangesIter::new(&self.old[..], &self.new[..], self.ops()) |
520 | } |
521 | |
522 | /// Utility to return a unified diff formatter. |
523 | pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> { |
524 | UnifiedDiff::from_text_diff(self) |
525 | } |
526 | |
527 | /// Iterates over the changes the op expands to with inline emphasis. |
528 | /// |
529 | /// This is very similar to [`TextDiff::iter_changes`] but it performs a second |
530 | /// level diff on adjacent line replacements. The exact behavior of |
531 | /// this function with regards to how it detects those inline changes |
532 | /// is currently not defined and will likely change over time. |
533 | /// |
534 | /// This method has a hardcoded 500ms deadline which is often not ideal. For |
535 | /// fine tuning use [`iter_inline_changes_deadline`](Self::iter_inline_changes_deadline). |
536 | /// |
537 | /// As of similar 1.2.0 the behavior of this function changes depending on |
538 | /// if the `unicode` feature is enabled or not. It will prefer unicode word |
539 | /// splitting over word splitting depending on the feature flag. |
540 | /// |
541 | /// Requires the `inline` feature. |
542 | #[cfg (feature = "inline" )] |
543 | pub fn iter_inline_changes<'slf>( |
544 | &'slf self, |
545 | op: &DiffOp, |
546 | ) -> impl Iterator<Item = InlineChange<'slf, T>> + '_ |
547 | where |
548 | 'slf: 'old + 'new, |
549 | { |
550 | inline::iter_inline_changes(self, op, Some(Instant::now() + Duration::from_millis(500))) |
551 | } |
552 | |
553 | /// Iterates over the changes the op expands to with inline emphasis with a deadline. |
554 | /// |
555 | /// Like [`iter_inline_changes`](Self::iter_inline_changes) but with an explicit deadline. |
556 | #[cfg (feature = "inline" )] |
557 | pub fn iter_inline_changes_deadline<'slf>( |
558 | &'slf self, |
559 | op: &DiffOp, |
560 | deadline: Option<Instant>, |
561 | ) -> impl Iterator<Item = InlineChange<'slf, T>> + '_ |
562 | where |
563 | 'slf: 'old + 'new, |
564 | { |
565 | inline::iter_inline_changes(self, op, deadline) |
566 | } |
567 | } |
568 | |
569 | /// Use the text differ to find `n` close matches. |
570 | /// |
571 | /// `cutoff` defines the threshold which needs to be reached for a word |
572 | /// to be considered similar. See [`TextDiff::ratio`] for more information. |
573 | /// |
574 | /// ``` |
575 | /// # use similar::get_close_matches; |
576 | /// let matches = get_close_matches( |
577 | /// "appel" , |
578 | /// &["ape" , "apple" , "peach" , "puppy" ][..], |
579 | /// 3, |
580 | /// 0.6 |
581 | /// ); |
582 | /// assert_eq!(matches, vec!["apple" , "ape" ]); |
583 | /// ``` |
584 | /// |
585 | /// Requires the `text` feature. |
586 | pub fn get_close_matches<'a, T: DiffableStr + ?Sized>( |
587 | word: &T, |
588 | possibilities: &[&'a T], |
589 | n: usize, |
590 | cutoff: f32, |
591 | ) -> Vec<&'a T> { |
592 | let mut matches = BinaryHeap::new(); |
593 | let seq1 = word.tokenize_chars(); |
594 | let quick_ratio = QuickSeqRatio::new(&seq1); |
595 | |
596 | for &possibility in possibilities { |
597 | let seq2 = possibility.tokenize_chars(); |
598 | |
599 | if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff { |
600 | continue; |
601 | } |
602 | |
603 | let diff = TextDiff::from_slices(&seq1, &seq2); |
604 | let ratio = diff.ratio(); |
605 | if ratio >= cutoff { |
606 | // we're putting the word itself in reverse in so that matches with |
607 | // the same ratio are ordered lexicographically. |
608 | matches.push(((ratio * u32::MAX as f32) as u32, Reverse(possibility))); |
609 | } |
610 | } |
611 | |
612 | let mut rv = vec![]; |
613 | for _ in 0..n { |
614 | if let Some((_, elt)) = matches.pop() { |
615 | rv.push(elt.0); |
616 | } else { |
617 | break; |
618 | } |
619 | } |
620 | |
621 | rv |
622 | } |
623 | |
624 | #[test ] |
625 | fn test_captured_ops() { |
626 | let diff = TextDiff::from_lines( |
627 | "Hello World \nsome stuff here \nsome more stuff here \n" , |
628 | "Hello World \nsome amazing stuff here \nsome more stuff here \n" , |
629 | ); |
630 | insta::assert_debug_snapshot!(&diff.ops()); |
631 | } |
632 | |
633 | #[test ] |
634 | fn test_captured_word_ops() { |
635 | let diff = TextDiff::from_words( |
636 | "Hello World \nsome stuff here \nsome more stuff here \n" , |
637 | "Hello World \nsome amazing stuff here \nsome more stuff here \n" , |
638 | ); |
639 | let changes = diff |
640 | .ops() |
641 | .iter() |
642 | .flat_map(|op| diff.iter_changes(op)) |
643 | .collect::<Vec<_>>(); |
644 | insta::assert_debug_snapshot!(&changes); |
645 | } |
646 | |
647 | #[test ] |
648 | fn test_unified_diff() { |
649 | let diff = TextDiff::from_lines( |
650 | "Hello World \nsome stuff here \nsome more stuff here \n" , |
651 | "Hello World \nsome amazing stuff here \nsome more stuff here \n" , |
652 | ); |
653 | assert!(diff.newline_terminated()); |
654 | insta::assert_snapshot!(&diff |
655 | .unified_diff() |
656 | .context_radius(3) |
657 | .header("old" , "new" ) |
658 | .to_string()); |
659 | } |
660 | |
661 | #[test ] |
662 | fn test_line_ops() { |
663 | let a = "Hello World \nsome stuff here \nsome more stuff here \n" ; |
664 | let b = "Hello World \nsome amazing stuff here \nsome more stuff here \n" ; |
665 | let diff = TextDiff::from_lines(a, b); |
666 | assert!(diff.newline_terminated()); |
667 | let changes = diff |
668 | .ops() |
669 | .iter() |
670 | .flat_map(|op| diff.iter_changes(op)) |
671 | .collect::<Vec<_>>(); |
672 | insta::assert_debug_snapshot!(&changes); |
673 | |
674 | #[cfg (feature = "bytes" )] |
675 | { |
676 | let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes()); |
677 | let byte_changes = byte_diff |
678 | .ops() |
679 | .iter() |
680 | .flat_map(|op| byte_diff.iter_changes(op)) |
681 | .collect::<Vec<_>>(); |
682 | for (change, byte_change) in changes.iter().zip(byte_changes.iter()) { |
683 | assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy()); |
684 | } |
685 | } |
686 | } |
687 | |
688 | #[test ] |
689 | fn test_virtual_newlines() { |
690 | let diff = TextDiff::from_lines("a \nb" , "a \nc \n" ); |
691 | assert!(diff.newline_terminated()); |
692 | let changes = diff |
693 | .ops() |
694 | .iter() |
695 | .flat_map(|op| diff.iter_changes(op)) |
696 | .collect::<Vec<_>>(); |
697 | insta::assert_debug_snapshot!(&changes); |
698 | } |
699 | |
700 | #[test ] |
701 | fn test_char_diff() { |
702 | let diff = TextDiff::from_chars("Hello World" , "Hallo Welt" ); |
703 | insta::assert_debug_snapshot!(diff.ops()); |
704 | |
705 | #[cfg (feature = "bytes" )] |
706 | { |
707 | let byte_diff = TextDiff::from_chars("Hello World" .as_bytes(), "Hallo Welt" .as_bytes()); |
708 | assert_eq!(diff.ops(), byte_diff.ops()); |
709 | } |
710 | } |
711 | |
712 | #[test ] |
713 | fn test_ratio() { |
714 | let diff = TextDiff::from_chars("abcd" , "bcde" ); |
715 | assert_eq!(diff.ratio(), 0.75); |
716 | let diff = TextDiff::from_chars("" , "" ); |
717 | assert_eq!(diff.ratio(), 1.0); |
718 | } |
719 | |
720 | #[test ] |
721 | fn test_get_close_matches() { |
722 | let matches = get_close_matches("appel" , &["ape" , "apple" , "peach" , "puppy" ][..], 3, 0.6); |
723 | assert_eq!(matches, vec!["apple" , "ape" ]); |
724 | let matches = get_close_matches( |
725 | "hulo" , |
726 | &[ |
727 | "hi" , "hulu" , "hali" , "hoho" , "amaz" , "zulo" , "blah" , "hopp" , "uulo" , "aulo" , |
728 | ][..], |
729 | 5, |
730 | 0.7, |
731 | ); |
732 | assert_eq!(matches, vec!["aulo" , "hulu" , "uulo" , "zulo" ]); |
733 | } |
734 | |
735 | #[test ] |
736 | fn test_lifetimes_on_iter() { |
737 | use crate::Change; |
738 | |
739 | fn diff_lines<'x, T>(old: &'x T, new: &'x T) -> Vec<Change<&'x T::Output>> |
740 | where |
741 | T: DiffableStrRef + ?Sized, |
742 | { |
743 | TextDiff::from_lines(old, new).iter_all_changes().collect() |
744 | } |
745 | |
746 | let a = "1 \n2 \n3 \n" .to_string(); |
747 | let b = "1 \n99 \n3 \n" .to_string(); |
748 | let changes = diff_lines(&a, &b); |
749 | insta::assert_debug_snapshot!(&changes); |
750 | } |
751 | |
752 | #[test ] |
753 | #[cfg (feature = "serde" )] |
754 | fn test_serde() { |
755 | let diff = TextDiff::from_lines( |
756 | "Hello World \nsome stuff here \nsome more stuff here \n\nAha stuff here \nand more stuff" , |
757 | "Stuff \nHello World \nsome amazing stuff here \nsome more stuff here \n" , |
758 | ); |
759 | let changes = diff |
760 | .ops() |
761 | .iter() |
762 | .flat_map(|op| diff.iter_changes(op)) |
763 | .collect::<Vec<_>>(); |
764 | let json = serde_json::to_string_pretty(&changes).unwrap(); |
765 | insta::assert_snapshot!(&json); |
766 | } |
767 | |
768 | #[test ] |
769 | #[cfg (feature = "serde" )] |
770 | fn test_serde_ops() { |
771 | let diff = TextDiff::from_lines( |
772 | "Hello World \nsome stuff here \nsome more stuff here \n\nAha stuff here \nand more stuff" , |
773 | "Stuff \nHello World \nsome amazing stuff here \nsome more stuff here \n" , |
774 | ); |
775 | let changes = diff.ops(); |
776 | let json = serde_json::to_string_pretty(&changes).unwrap(); |
777 | insta::assert_snapshot!(&json); |
778 | } |
779 | |
780 | #[test ] |
781 | fn test_regression_issue_37() { |
782 | let config = TextDiffConfig::default(); |
783 | let diff = config.diff_lines(" \u{18}\n\n" , " \n\n\r" ); |
784 | let mut output = diff.unified_diff(); |
785 | assert_eq!( |
786 | output.context_radius(0).to_string(), |
787 | "@@ -1 +1,0 @@ \n- \u{18}\n@@ -2,0 +2,2 @@ \n+ \n+ \r" |
788 | ); |
789 | } |
790 | |