| 1 | //! [![github]](https://github.com/dtolnay/dissimilar) [![crates-io]](https://crates.io/crates/dissimilar) [![docs-rs]](https://docs.rs/dissimilar) | 
| 2 | //! | 
|---|
| 3 | //! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github | 
|---|
| 4 | //! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust | 
|---|
| 5 | //! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs | 
|---|
| 6 | //! | 
|---|
| 7 | //! <br> | 
|---|
| 8 | //! | 
|---|
| 9 | //! ## Diff library with semantic cleanup, based on Google's diff-match-patch | 
|---|
| 10 | //! | 
|---|
| 11 | //! This library is a port of the Diff component of [Diff Match Patch] to Rust. | 
|---|
| 12 | //! The diff implementation is based on [Myers' diff algorithm] but includes | 
|---|
| 13 | //! some [semantic cleanups] to increase human readability by factoring out | 
|---|
| 14 | //! commonalities which are likely to be coincidental. | 
|---|
| 15 | //! | 
|---|
| 16 | //! Diff Match Patch was originally built in 2006 to power Google Docs. | 
|---|
| 17 | //! | 
|---|
| 18 | //! # Interface | 
|---|
| 19 | //! | 
|---|
| 20 | //! Here is the entire API of the Rust implementation. It operates on borrowed | 
|---|
| 21 | //! strings and the return value of the diff algorithm is a vector of chunks | 
|---|
| 22 | //! pointing into slices of those input strings. | 
|---|
| 23 | //! | 
|---|
| 24 | //! ``` | 
|---|
| 25 | //! pub enum Chunk<'a> { | 
|---|
| 26 | //!     Equal(&'a str), | 
|---|
| 27 | //!     Delete(&'a str), | 
|---|
| 28 | //!     Insert(&'a str), | 
|---|
| 29 | //! } | 
|---|
| 30 | //! | 
|---|
| 31 | //! # const IGNORE: &str = stringify! { | 
|---|
| 32 | //! pub fn diff(text1: &str, text2: &str) -> Vec<Chunk>; | 
|---|
| 33 | //! # }; | 
|---|
| 34 | //! ``` | 
|---|
| 35 | //! | 
|---|
| 36 | //! [Diff Match Patch]: https://github.com/google/diff-match-patch | 
|---|
| 37 | //! [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf | 
|---|
| 38 | //! [semantic cleanups]: https://neil.fraser.name/writing/diff/ | 
|---|
| 39 |  | 
|---|
| 40 | #![ doc(html_root_url = "https://docs.rs/dissimilar/1.0.9")] | 
|---|
| 41 | #![ allow( | 
|---|
| 42 | clippy::blocks_in_conditions, | 
|---|
| 43 | clippy::bool_to_int_with_if, | 
|---|
| 44 | clippy::cast_possible_wrap, | 
|---|
| 45 | clippy::cast_sign_loss, | 
|---|
| 46 | clippy::cloned_instead_of_copied, // https://github.com/rust-lang/rust-clippy/issues/7127 | 
|---|
| 47 | clippy::collapsible_else_if, | 
|---|
| 48 | clippy::comparison_chain, | 
|---|
| 49 | clippy::implied_bounds_in_impls, | 
|---|
| 50 | clippy::items_after_test_module, // https://github.com/rust-lang/rust-clippy/issues/10713 | 
|---|
| 51 | clippy::let_underscore_untyped, | 
|---|
| 52 | clippy::match_same_arms, | 
|---|
| 53 | clippy::module_name_repetitions, | 
|---|
| 54 | clippy::must_use_candidate, | 
|---|
| 55 | clippy::new_without_default, | 
|---|
| 56 | clippy::octal_escapes, | 
|---|
| 57 | clippy::shadow_unrelated, | 
|---|
| 58 | clippy::similar_names, | 
|---|
| 59 | clippy::too_many_lines, | 
|---|
| 60 | clippy::unseparated_literal_suffix, | 
|---|
| 61 | unused_parens, // false positive on Some(&(mut diff)) pattern | 
|---|
| 62 | )] | 
|---|
| 63 |  | 
|---|
| 64 | mod find; | 
|---|
| 65 | mod range; | 
|---|
| 66 |  | 
|---|
| 67 | #[ cfg(test)] | 
|---|
| 68 | mod tests; | 
|---|
| 69 |  | 
|---|
| 70 | use crate::range::{slice, Range}; | 
|---|
| 71 | use std::cmp; | 
|---|
| 72 | use std::collections::VecDeque; | 
|---|
| 73 | use std::fmt::{self, Debug, Display, Write}; | 
|---|
| 74 |  | 
|---|
| 75 | #[ derive(Copy, Clone, PartialEq, Eq)] | 
|---|
| 76 | pub enum Chunk<'a> { | 
|---|
| 77 | Equal(&'a str), | 
|---|
| 78 | Delete(&'a str), | 
|---|
| 79 | Insert(&'a str), | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | #[ derive(Copy, Clone)] | 
|---|
| 83 | enum Diff<'a, 'b> { | 
|---|
| 84 | Equal(Range<'a>, Range<'b>), | 
|---|
| 85 | Delete(Range<'a>), | 
|---|
| 86 | Insert(Range<'b>), | 
|---|
| 87 | } | 
|---|
| 88 |  | 
|---|
| 89 | impl<'tmp, 'a: 'tmp, 'b: 'tmp> Diff<'a, 'b> { | 
|---|
| 90 | fn text(&self) -> Range<'tmp> { | 
|---|
| 91 | match *self { | 
|---|
| 92 | Diff::Equal(range, _) | Diff::Delete(range) | Diff::Insert(range) => range, | 
|---|
| 93 | } | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | fn grow_left(&mut self, increment: usize) { | 
|---|
| 97 | self.for_each(|range| { | 
|---|
| 98 | range.offset -= increment; | 
|---|
| 99 | range.len += increment; | 
|---|
| 100 | }); | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | fn grow_right(&mut self, increment: usize) { | 
|---|
| 104 | self.for_each(|range| range.len += increment); | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 | fn shift_left(&mut self, increment: usize) { | 
|---|
| 108 | self.for_each(|range| range.offset -= increment); | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | fn shift_right(&mut self, increment: usize) { | 
|---|
| 112 | self.for_each(|range| range.offset += increment); | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | fn for_each(&mut self, f: impl Fn(&mut Range)) { | 
|---|
| 116 | match self { | 
|---|
| 117 | Diff::Equal(range1, range2) => { | 
|---|
| 118 | f(range1); | 
|---|
| 119 | f(range2); | 
|---|
| 120 | } | 
|---|
| 121 | Diff::Delete(range) => f(range), | 
|---|
| 122 | Diff::Insert(range) => f(range), | 
|---|
| 123 | } | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | pub fn diff<'a>(text1: &'a str, text2: &'a str) -> Vec<Chunk<'a>> { | 
|---|
| 128 | let chars1: Vec<char> = text1.chars().collect(); | 
|---|
| 129 | let chars2: Vec<char> = text2.chars().collect(); | 
|---|
| 130 | let range1 = Range::new(&chars1, ..); | 
|---|
| 131 | let range2 = Range::new(&chars2, ..); | 
|---|
| 132 |  | 
|---|
| 133 | let mut solution = main(range1, range2); | 
|---|
| 134 | cleanup_char_boundary(&mut solution); | 
|---|
| 135 | cleanup_semantic(&mut solution); | 
|---|
| 136 | cleanup_merge(&mut solution); | 
|---|
| 137 |  | 
|---|
| 138 | let mut chunks = Vec::new(); | 
|---|
| 139 | let mut pos1 = 0; | 
|---|
| 140 | let mut pos2 = 0; | 
|---|
| 141 | for diff in solution.diffs { | 
|---|
| 142 | chunks.push(match diff { | 
|---|
| 143 | Diff::Equal(range, _) => { | 
|---|
| 144 | let len = range.len_bytes(); | 
|---|
| 145 | let chunk = Chunk::Equal(&text1[pos1..pos1 + len]); | 
|---|
| 146 | pos1 += len; | 
|---|
| 147 | pos2 += len; | 
|---|
| 148 | chunk | 
|---|
| 149 | } | 
|---|
| 150 | Diff::Delete(range) => { | 
|---|
| 151 | let len = range.len_bytes(); | 
|---|
| 152 | let chunk = Chunk::Delete(&text1[pos1..pos1 + len]); | 
|---|
| 153 | pos1 += len; | 
|---|
| 154 | chunk | 
|---|
| 155 | } | 
|---|
| 156 | Diff::Insert(range) => { | 
|---|
| 157 | let len = range.len_bytes(); | 
|---|
| 158 | let chunk = Chunk::Insert(&text2[pos2..pos2 + len]); | 
|---|
| 159 | pos2 += len; | 
|---|
| 160 | chunk | 
|---|
| 161 | } | 
|---|
| 162 | }); | 
|---|
| 163 | } | 
|---|
| 164 | chunks | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | struct Solution<'a, 'b> { | 
|---|
| 168 | text1: Range<'a>, | 
|---|
| 169 | text2: Range<'b>, | 
|---|
| 170 | diffs: Vec<Diff<'a, 'b>>, | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> { | 
|---|
| 174 | let whole1 = text1; | 
|---|
| 175 | let whole2 = text2; | 
|---|
| 176 |  | 
|---|
| 177 | // Trim off common prefix. | 
|---|
| 178 | let common_prefix_len = common_prefix(text1, text2); | 
|---|
| 179 | let common_prefix = Diff::Equal( | 
|---|
| 180 | text1.substring(..common_prefix_len), | 
|---|
| 181 | text2.substring(..common_prefix_len), | 
|---|
| 182 | ); | 
|---|
| 183 | text1 = text1.substring(common_prefix_len..); | 
|---|
| 184 | text2 = text2.substring(common_prefix_len..); | 
|---|
| 185 |  | 
|---|
| 186 | // Trim off common suffix. | 
|---|
| 187 | let common_suffix_len = common_suffix(text1, text2); | 
|---|
| 188 | let common_suffix = Diff::Equal( | 
|---|
| 189 | text1.substring(text1.len - common_suffix_len..), | 
|---|
| 190 | text2.substring(text2.len - common_suffix_len..), | 
|---|
| 191 | ); | 
|---|
| 192 | text1 = text1.substring(..text1.len - common_suffix_len); | 
|---|
| 193 | text2 = text2.substring(..text2.len - common_suffix_len); | 
|---|
| 194 |  | 
|---|
| 195 | // Compute the diff on the middle block. | 
|---|
| 196 | let mut solution = Solution { | 
|---|
| 197 | text1: whole1, | 
|---|
| 198 | text2: whole2, | 
|---|
| 199 | diffs: compute(text1, text2), | 
|---|
| 200 | }; | 
|---|
| 201 |  | 
|---|
| 202 | // Restore the prefix and suffix. | 
|---|
| 203 | if common_prefix_len > 0 { | 
|---|
| 204 | solution.diffs.insert(0, common_prefix); | 
|---|
| 205 | } | 
|---|
| 206 | if common_suffix_len > 0 { | 
|---|
| 207 | solution.diffs.push(common_suffix); | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | cleanup_merge(&mut solution); | 
|---|
| 211 |  | 
|---|
| 212 | solution | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | // Find the differences between two texts. Assumes that the texts do not have | 
|---|
| 216 | // any common prefix or suffix. | 
|---|
| 217 | fn compute<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> { | 
|---|
| 218 | match (text1.is_empty(), text2.is_empty()) { | 
|---|
| 219 | (true, true) => return Vec::new(), | 
|---|
| 220 | (true, false) => return vec![Diff::Insert(text2)], | 
|---|
| 221 | (false, true) => return vec![Diff::Delete(text1)], | 
|---|
| 222 | (false, false) => {} | 
|---|
| 223 | } | 
|---|
| 224 |  | 
|---|
| 225 | // Check for entire shorter text inside the longer text. | 
|---|
| 226 | if text1.len > text2.len { | 
|---|
| 227 | if let Some(i) = text1.find(text2) { | 
|---|
| 228 | return vec![ | 
|---|
| 229 | Diff::Delete(text1.substring(..i)), | 
|---|
| 230 | Diff::Equal(text1.substring(i..i + text2.len), text2), | 
|---|
| 231 | Diff::Delete(text1.substring(i + text2.len..)), | 
|---|
| 232 | ]; | 
|---|
| 233 | } | 
|---|
| 234 | } else { | 
|---|
| 235 | if let Some(i) = text2.find(text1) { | 
|---|
| 236 | return vec![ | 
|---|
| 237 | Diff::Insert(text2.substring(..i)), | 
|---|
| 238 | Diff::Equal(text1, text2.substring(i..i + text1.len)), | 
|---|
| 239 | Diff::Insert(text2.substring(i + text1.len..)), | 
|---|
| 240 | ]; | 
|---|
| 241 | } | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|
| 244 | if text1.len == 1 || text2.len == 1 { | 
|---|
| 245 | // Single character string. | 
|---|
| 246 | // After the previous check, the character can't be an equality. | 
|---|
| 247 | return vec![Diff::Delete(text1), Diff::Insert(text2)]; | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | bisect(text1, text2) | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | // Find the 'middle snake' of a diff, split the problem in two and return the | 
|---|
| 254 | // recursively constructed diff. | 
|---|
| 255 | // | 
|---|
| 256 | // See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. | 
|---|
| 257 | fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> { | 
|---|
| 258 | let max_d = (text1.len + text2.len + 1) / 2; | 
|---|
| 259 | let v_offset = max_d; | 
|---|
| 260 | let v_len = 2 * max_d; | 
|---|
| 261 | let mut v1 = vec![-1isize; v_len]; | 
|---|
| 262 | let mut v2 = vec![-1isize; v_len]; | 
|---|
| 263 | v1[v_offset + 1] = 0; | 
|---|
| 264 | v2[v_offset + 1] = 0; | 
|---|
| 265 | let delta = text1.len as isize - text2.len as isize; | 
|---|
| 266 | // If the total number of characters is odd, then the front path will | 
|---|
| 267 | // collide with the reverse path. | 
|---|
| 268 | let front = delta % 2 != 0; | 
|---|
| 269 | // Offsets for start and end of k loop. | 
|---|
| 270 | // Prevents mapping of space beyond the grid. | 
|---|
| 271 | let mut k1start = 0; | 
|---|
| 272 | let mut k1end = 0; | 
|---|
| 273 | let mut k2start = 0; | 
|---|
| 274 | let mut k2end = 0; | 
|---|
| 275 | for d in 0..max_d as isize { | 
|---|
| 276 | // Walk the front path one step. | 
|---|
| 277 | let mut k1 = -d + k1start; | 
|---|
| 278 | while k1 <= d - k1end { | 
|---|
| 279 | let k1_offset = (v_offset as isize + k1) as usize; | 
|---|
| 280 | let mut x1 = if k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) { | 
|---|
| 281 | v1[k1_offset + 1] | 
|---|
| 282 | } else { | 
|---|
| 283 | v1[k1_offset - 1] + 1 | 
|---|
| 284 | } as usize; | 
|---|
| 285 | let mut y1 = (x1 as isize - k1) as usize; | 
|---|
| 286 | if let (Some(s1), Some(s2)) = (text1.get(x1..), text2.get(y1..)) { | 
|---|
| 287 | let advance = common_prefix(s1, s2); | 
|---|
| 288 | x1 += advance; | 
|---|
| 289 | y1 += advance; | 
|---|
| 290 | } | 
|---|
| 291 | v1[k1_offset] = x1 as isize; | 
|---|
| 292 | if x1 > text1.len { | 
|---|
| 293 | // Ran off the right of the graph. | 
|---|
| 294 | k1end += 2; | 
|---|
| 295 | } else if y1 > text2.len { | 
|---|
| 296 | // Ran off the bottom of the graph. | 
|---|
| 297 | k1start += 2; | 
|---|
| 298 | } else if front { | 
|---|
| 299 | let k2_offset = v_offset as isize + delta - k1; | 
|---|
| 300 | if k2_offset >= 0 && k2_offset < v_len as isize && v2[k2_offset as usize] != -1 { | 
|---|
| 301 | // Mirror x2 onto top-left coordinate system. | 
|---|
| 302 | let x2 = text1.len as isize - v2[k2_offset as usize]; | 
|---|
| 303 | if x1 as isize >= x2 { | 
|---|
| 304 | // Overlap detected. | 
|---|
| 305 | return bisect_split(text1, text2, x1, y1); | 
|---|
| 306 | } | 
|---|
| 307 | } | 
|---|
| 308 | } | 
|---|
| 309 | k1 += 2; | 
|---|
| 310 | } | 
|---|
| 311 |  | 
|---|
| 312 | // Walk the reverse path one step. | 
|---|
| 313 | let mut k2 = -d + k2start; | 
|---|
| 314 | while k2 <= d - k2end { | 
|---|
| 315 | let k2_offset = (v_offset as isize + k2) as usize; | 
|---|
| 316 | let mut x2 = if k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) { | 
|---|
| 317 | v2[k2_offset + 1] | 
|---|
| 318 | } else { | 
|---|
| 319 | v2[k2_offset - 1] + 1 | 
|---|
| 320 | } as usize; | 
|---|
| 321 | let mut y2 = (x2 as isize - k2) as usize; | 
|---|
| 322 | if x2 < text1.len && y2 < text2.len { | 
|---|
| 323 | let advance = common_suffix( | 
|---|
| 324 | text1.substring(..text1.len - x2), | 
|---|
| 325 | text2.substring(..text2.len - y2), | 
|---|
| 326 | ); | 
|---|
| 327 | x2 += advance; | 
|---|
| 328 | y2 += advance; | 
|---|
| 329 | } | 
|---|
| 330 | v2[k2_offset] = x2 as isize; | 
|---|
| 331 | if x2 > text1.len { | 
|---|
| 332 | // Ran off the left of the graph. | 
|---|
| 333 | k2end += 2; | 
|---|
| 334 | } else if y2 > text2.len { | 
|---|
| 335 | // Ran off the top of the graph. | 
|---|
| 336 | k2start += 2; | 
|---|
| 337 | } else if !front { | 
|---|
| 338 | let k1_offset = v_offset as isize + delta - k2; | 
|---|
| 339 | if k1_offset >= 0 && k1_offset < v_len as isize && v1[k1_offset as usize] != -1 { | 
|---|
| 340 | let x1 = v1[k1_offset as usize] as usize; | 
|---|
| 341 | let y1 = v_offset + x1 - k1_offset as usize; | 
|---|
| 342 | // Mirror x2 onto top-left coordinate system. | 
|---|
| 343 | x2 = text1.len - x2; | 
|---|
| 344 | if x1 >= x2 { | 
|---|
| 345 | // Overlap detected. | 
|---|
| 346 | return bisect_split(text1, text2, x1, y1); | 
|---|
| 347 | } | 
|---|
| 348 | } | 
|---|
| 349 | } | 
|---|
| 350 | k2 += 2; | 
|---|
| 351 | } | 
|---|
| 352 | } | 
|---|
| 353 | // Number of diffs equals number of characters, no commonality at all. | 
|---|
| 354 | vec![Diff::Delete(text1), Diff::Insert(text2)] | 
|---|
| 355 | } | 
|---|
| 356 |  | 
|---|
| 357 | // Given the location of the 'middle snake', split the diff in two parts and | 
|---|
| 358 | // recurse. | 
|---|
| 359 | fn bisect_split<'a, 'b>( | 
|---|
| 360 | text1: Range<'a>, | 
|---|
| 361 | text2: Range<'b>, | 
|---|
| 362 | x: usize, | 
|---|
| 363 | y: usize, | 
|---|
| 364 | ) -> Vec<Diff<'a, 'b>> { | 
|---|
| 365 | let (text1a: Range<'_>, text1b: Range<'_>) = text1.split_at(mid:x); | 
|---|
| 366 | let (text2a: Range<'_>, text2b: Range<'_>) = text2.split_at(mid:y); | 
|---|
| 367 |  | 
|---|
| 368 | // Compute both diffs serially. | 
|---|
| 369 | let mut diffs: Vec> = main(text1:text1a, text2:text2a).diffs; | 
|---|
| 370 | diffs.extend(iter:main(text1:text1b, text2:text2b).diffs); | 
|---|
| 371 |  | 
|---|
| 372 | diffs | 
|---|
| 373 | } | 
|---|
| 374 |  | 
|---|
| 375 | // Determine the length of the common prefix of two strings. | 
|---|
| 376 | fn common_prefix(text1: Range, text2: Range) -> usize { | 
|---|
| 377 | for (i: usize, (b1: char, b2: char)) in text1.chars().zip(text2.chars()).enumerate() { | 
|---|
| 378 | if b1 != b2 { | 
|---|
| 379 | return i; | 
|---|
| 380 | } | 
|---|
| 381 | } | 
|---|
| 382 | cmp::min(v1:text1.len, v2:text2.len) | 
|---|
| 383 | } | 
|---|
| 384 |  | 
|---|
| 385 | // Determine the length of the common suffix of two strings. | 
|---|
| 386 | fn common_suffix(text1: Range, text2: Range) -> usize { | 
|---|
| 387 | for (i: usize, (b1: char, b2: char)) in text1.chars().rev().zip(text2.chars().rev()).enumerate() { | 
|---|
| 388 | if b1 != b2 { | 
|---|
| 389 | return i; | 
|---|
| 390 | } | 
|---|
| 391 | } | 
|---|
| 392 | cmp::min(v1:text1.len, v2:text2.len) | 
|---|
| 393 | } | 
|---|
| 394 |  | 
|---|
| 395 | // Determine if the suffix of one string is the prefix of another. | 
|---|
| 396 | // | 
|---|
| 397 | // Returns the number of characters common to the end of the first string and | 
|---|
| 398 | // the start of the second string. | 
|---|
| 399 | fn common_overlap(mut text1: Range, mut text2: Range) -> usize { | 
|---|
| 400 | // Eliminate the null case. | 
|---|
| 401 | if text1.is_empty() || text2.is_empty() { | 
|---|
| 402 | return 0; | 
|---|
| 403 | } | 
|---|
| 404 | // Truncate the longer string. | 
|---|
| 405 | if text1.len > text2.len { | 
|---|
| 406 | text1 = text1.substring(text1.len - text2.len..); | 
|---|
| 407 | } else if text1.len < text2.len { | 
|---|
| 408 | text2 = text2.substring(..text1.len); | 
|---|
| 409 | } | 
|---|
| 410 | // Quick check for the worst case. | 
|---|
| 411 | if slice(text1) == slice(text2) { | 
|---|
| 412 | return text1.len; | 
|---|
| 413 | } | 
|---|
| 414 |  | 
|---|
| 415 | // Start by looking for a single character match | 
|---|
| 416 | // and increase length until no match is found. | 
|---|
| 417 | // Performance analysis: https://neil.fraser.name/news/2010/11/04/ | 
|---|
| 418 | let mut best = 0; | 
|---|
| 419 | let mut length = 1; | 
|---|
| 420 | loop { | 
|---|
| 421 | let pattern = text1.substring(text1.len - length..); | 
|---|
| 422 | let found = match text2.find(pattern) { | 
|---|
| 423 | Some(found) => found, | 
|---|
| 424 | None => return best, | 
|---|
| 425 | }; | 
|---|
| 426 | length += found; | 
|---|
| 427 | if found == 0 | 
|---|
| 428 | || slice(text1.substring(text1.len - length..)) == slice(text2.substring(..length)) | 
|---|
| 429 | { | 
|---|
| 430 | best = length; | 
|---|
| 431 | length += 1; | 
|---|
| 432 | } | 
|---|
| 433 | } | 
|---|
| 434 | } | 
|---|
| 435 |  | 
|---|
| 436 | fn cleanup_char_boundary(solution: &mut Solution) { | 
|---|
| 437 | fn is_segmentation_boundary(doc: &[char], pos: usize) -> bool { | 
|---|
| 438 | // FIXME: use unicode-segmentation crate? | 
|---|
| 439 | let _ = doc; | 
|---|
| 440 | let _ = pos; | 
|---|
| 441 | true | 
|---|
| 442 | } | 
|---|
| 443 |  | 
|---|
| 444 | fn boundary_down(doc: &[char], pos: usize) -> usize { | 
|---|
| 445 | let mut adjust = 0; | 
|---|
| 446 | while !is_segmentation_boundary(doc, pos - adjust) { | 
|---|
| 447 | adjust += 1; | 
|---|
| 448 | } | 
|---|
| 449 | adjust | 
|---|
| 450 | } | 
|---|
| 451 |  | 
|---|
| 452 | fn boundary_up(doc: &[char], pos: usize) -> usize { | 
|---|
| 453 | let mut adjust = 0; | 
|---|
| 454 | while !is_segmentation_boundary(doc, pos + adjust) { | 
|---|
| 455 | adjust += 1; | 
|---|
| 456 | } | 
|---|
| 457 | adjust | 
|---|
| 458 | } | 
|---|
| 459 |  | 
|---|
| 460 | fn skip_overlap<'a>(prev: &Range<'a>, range: &mut Range<'a>) { | 
|---|
| 461 | let prev_end = prev.offset + prev.len; | 
|---|
| 462 | if prev_end > range.offset { | 
|---|
| 463 | let delta = cmp::min(prev_end - range.offset, range.len); | 
|---|
| 464 | range.offset += delta; | 
|---|
| 465 | range.len -= delta; | 
|---|
| 466 | } | 
|---|
| 467 | } | 
|---|
| 468 |  | 
|---|
| 469 | let mut read = 0; | 
|---|
| 470 | let mut retain = 0; | 
|---|
| 471 | let mut last_delete = Range::empty(); | 
|---|
| 472 | let mut last_insert = Range::empty(); | 
|---|
| 473 | while let Some(&(mut diff)) = solution.diffs.get(read) { | 
|---|
| 474 | read += 1; | 
|---|
| 475 | match &mut diff { | 
|---|
| 476 | Diff::Equal(range1, range2) => { | 
|---|
| 477 | let adjust = boundary_up(range1.doc, range1.offset); | 
|---|
| 478 | // If the whole range is sub-character, skip it. | 
|---|
| 479 | if range1.len <= adjust { | 
|---|
| 480 | continue; | 
|---|
| 481 | } | 
|---|
| 482 | range1.offset += adjust; | 
|---|
| 483 | range1.len -= adjust; | 
|---|
| 484 | range2.offset += adjust; | 
|---|
| 485 | range2.len -= adjust; | 
|---|
| 486 | let adjust = boundary_down(range1.doc, range1.offset + range1.len); | 
|---|
| 487 | range1.len -= adjust; | 
|---|
| 488 | range2.len -= adjust; | 
|---|
| 489 | last_delete = Range::empty(); | 
|---|
| 490 | last_insert = Range::empty(); | 
|---|
| 491 | } | 
|---|
| 492 | Diff::Delete(range) => { | 
|---|
| 493 | skip_overlap(&last_delete, range); | 
|---|
| 494 | if range.len == 0 { | 
|---|
| 495 | continue; | 
|---|
| 496 | } | 
|---|
| 497 | let adjust = boundary_down(range.doc, range.offset); | 
|---|
| 498 | range.offset -= adjust; | 
|---|
| 499 | range.len += adjust; | 
|---|
| 500 | let adjust = boundary_up(range.doc, range.offset + range.len); | 
|---|
| 501 | range.len += adjust; | 
|---|
| 502 | last_delete = *range; | 
|---|
| 503 | } | 
|---|
| 504 | Diff::Insert(range) => { | 
|---|
| 505 | skip_overlap(&last_insert, range); | 
|---|
| 506 | if range.len == 0 { | 
|---|
| 507 | continue; | 
|---|
| 508 | } | 
|---|
| 509 | let adjust = boundary_down(range.doc, range.offset); | 
|---|
| 510 | range.offset -= adjust; | 
|---|
| 511 | range.len += adjust; | 
|---|
| 512 | let adjust = boundary_up(range.doc, range.offset + range.len); | 
|---|
| 513 | range.len += adjust; | 
|---|
| 514 | last_insert = *range; | 
|---|
| 515 | } | 
|---|
| 516 | } | 
|---|
| 517 | solution.diffs[retain] = diff; | 
|---|
| 518 | retain += 1; | 
|---|
| 519 | } | 
|---|
| 520 |  | 
|---|
| 521 | solution.diffs.truncate(retain); | 
|---|
| 522 | } | 
|---|
| 523 |  | 
|---|
| 524 | // Reduce the number of edits by eliminating semantically trivial equalities. | 
|---|
| 525 | fn cleanup_semantic(solution: &mut Solution) { | 
|---|
| 526 | let mut diffs = &mut solution.diffs; | 
|---|
| 527 | if diffs.is_empty() { | 
|---|
| 528 | return; | 
|---|
| 529 | } | 
|---|
| 530 |  | 
|---|
| 531 | let mut changes = false; | 
|---|
| 532 | let mut equalities = VecDeque::new(); // Double-ended queue of equalities. | 
|---|
| 533 | let mut last_equality = None; // Always equal to equalities.peek().text | 
|---|
| 534 | let mut pointer = 0; | 
|---|
| 535 | // Number of characters that changed prior to the equality. | 
|---|
| 536 | let mut len_insertions1 = 0; | 
|---|
| 537 | let mut len_deletions1 = 0; | 
|---|
| 538 | // Number of characters that changed after the equality. | 
|---|
| 539 | let mut len_insertions2 = 0; | 
|---|
| 540 | let mut len_deletions2 = 0; | 
|---|
| 541 | while let Some(&this_diff) = diffs.get(pointer) { | 
|---|
| 542 | match this_diff { | 
|---|
| 543 | Diff::Equal(text1, text2) => { | 
|---|
| 544 | equalities.push_back(pointer); | 
|---|
| 545 | len_insertions1 = len_insertions2; | 
|---|
| 546 | len_deletions1 = len_deletions2; | 
|---|
| 547 | len_insertions2 = 0; | 
|---|
| 548 | len_deletions2 = 0; | 
|---|
| 549 | last_equality = Some((text1, text2)); | 
|---|
| 550 | pointer += 1; | 
|---|
| 551 | continue; | 
|---|
| 552 | } | 
|---|
| 553 | Diff::Delete(text) => len_deletions2 += text.len, | 
|---|
| 554 | Diff::Insert(text) => len_insertions2 += text.len, | 
|---|
| 555 | } | 
|---|
| 556 | // Eliminate an equality that is smaller or equal to the edits on both | 
|---|
| 557 | // sides of it. | 
|---|
| 558 | if last_equality.map_or(false, |(last_equality, _)| { | 
|---|
| 559 | last_equality.len <= cmp::max(len_insertions1, len_deletions1) | 
|---|
| 560 | && last_equality.len <= cmp::max(len_insertions2, len_deletions2) | 
|---|
| 561 | }) { | 
|---|
| 562 | // Jump back to offending equality. | 
|---|
| 563 | pointer = equalities.pop_back().unwrap(); | 
|---|
| 564 |  | 
|---|
| 565 | // Replace equality with a delete. | 
|---|
| 566 | diffs[pointer] = Diff::Delete(last_equality.unwrap().0); | 
|---|
| 567 | // Insert a corresponding insert. | 
|---|
| 568 | diffs.insert(pointer + 1, Diff::Insert(last_equality.unwrap().1)); | 
|---|
| 569 |  | 
|---|
| 570 | len_insertions1 = 0; // Reset the counters. | 
|---|
| 571 | len_insertions2 = 0; | 
|---|
| 572 | len_deletions1 = 0; | 
|---|
| 573 | len_deletions2 = 0; | 
|---|
| 574 | last_equality = None; | 
|---|
| 575 | changes = true; | 
|---|
| 576 |  | 
|---|
| 577 | // Throw away the previous equality (it needs to be reevaluated). | 
|---|
| 578 | equalities.pop_back(); | 
|---|
| 579 | if let Some(back) = equalities.back() { | 
|---|
| 580 | // There is a safe equality we can fall back to. | 
|---|
| 581 | pointer = *back; | 
|---|
| 582 | } else { | 
|---|
| 583 | // There are no previous equalities, jump back to the start. | 
|---|
| 584 | pointer = 0; | 
|---|
| 585 | continue; | 
|---|
| 586 | } | 
|---|
| 587 | } | 
|---|
| 588 | pointer += 1; | 
|---|
| 589 | } | 
|---|
| 590 |  | 
|---|
| 591 | // Normalize the diff. | 
|---|
| 592 | if changes { | 
|---|
| 593 | cleanup_merge(solution); | 
|---|
| 594 | } | 
|---|
| 595 | cleanup_semantic_lossless(solution); | 
|---|
| 596 | diffs = &mut solution.diffs; | 
|---|
| 597 |  | 
|---|
| 598 | // Find any overlaps between deletions and insertions. | 
|---|
| 599 | // e.g: <del>abcxxx</del><ins>xxxdef</ins> | 
|---|
| 600 | //   -> <del>abc</del>xxx<ins>def</ins> | 
|---|
| 601 | // e.g: <del>xxxabc</del><ins>defxxx</ins> | 
|---|
| 602 | //   -> <ins>def</ins>xxx<del>abc</del> | 
|---|
| 603 | // Only extract an overlap if it is as big as the edit ahead or behind it. | 
|---|
| 604 | let mut pointer = 1; | 
|---|
| 605 | while let Some(&this_diff) = diffs.get(pointer) { | 
|---|
| 606 | let prev_diff = diffs[pointer - 1]; | 
|---|
| 607 | if let (Diff::Delete(deletion), Diff::Insert(insertion)) = (prev_diff, this_diff) { | 
|---|
| 608 | let overlap_len1 = common_overlap(deletion, insertion); | 
|---|
| 609 | let overlap_len2 = common_overlap(insertion, deletion); | 
|---|
| 610 | let overlap_min = cmp::min(deletion.len, insertion.len); | 
|---|
| 611 | if overlap_len1 >= overlap_len2 && 2 * overlap_len1 >= overlap_min { | 
|---|
| 612 | // Overlap found. Insert an equality and trim the surrounding edits. | 
|---|
| 613 | diffs.insert( | 
|---|
| 614 | pointer, | 
|---|
| 615 | Diff::Equal( | 
|---|
| 616 | deletion.substring(deletion.len - overlap_len1..deletion.len), | 
|---|
| 617 | insertion.substring(..overlap_len1), | 
|---|
| 618 | ), | 
|---|
| 619 | ); | 
|---|
| 620 | diffs[pointer - 1] = | 
|---|
| 621 | Diff::Delete(deletion.substring(..deletion.len - overlap_len1)); | 
|---|
| 622 | diffs[pointer + 1] = Diff::Insert(insertion.substring(overlap_len1..)); | 
|---|
| 623 | } else if overlap_len1 < overlap_len2 && 2 * overlap_len2 >= overlap_min { | 
|---|
| 624 | // Reverse overlap found. | 
|---|
| 625 | // Insert an equality and swap and trim the surrounding edits. | 
|---|
| 626 | diffs.insert( | 
|---|
| 627 | pointer, | 
|---|
| 628 | Diff::Equal( | 
|---|
| 629 | deletion.substring(..overlap_len2), | 
|---|
| 630 | insertion.substring(insertion.len - overlap_len2..insertion.len), | 
|---|
| 631 | ), | 
|---|
| 632 | ); | 
|---|
| 633 | diffs[pointer - 1] = | 
|---|
| 634 | Diff::Insert(insertion.substring(..insertion.len - overlap_len2)); | 
|---|
| 635 | diffs[pointer + 1] = Diff::Delete(deletion.substring(overlap_len2..)); | 
|---|
| 636 | } | 
|---|
| 637 | pointer += 1; | 
|---|
| 638 | } | 
|---|
| 639 | pointer += 1; | 
|---|
| 640 | } | 
|---|
| 641 | } | 
|---|
| 642 |  | 
|---|
| 643 | // Look for single edits surrounded on both sides by equalities which can be | 
|---|
| 644 | // shifted sideways to align the edit to a word boundary. | 
|---|
| 645 | // | 
|---|
| 646 | // e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. | 
|---|
| 647 | fn cleanup_semantic_lossless(solution: &mut Solution) { | 
|---|
| 648 | let diffs = &mut solution.diffs; | 
|---|
| 649 | let mut pointer = 1; | 
|---|
| 650 | while let Some(&next_diff) = diffs.get(pointer + 1) { | 
|---|
| 651 | let prev_diff = diffs[pointer - 1]; | 
|---|
| 652 | if let ( | 
|---|
| 653 | Diff::Equal(mut prev_equal1, mut prev_equal2), | 
|---|
| 654 | Diff::Equal(mut next_equal1, mut next_equal2), | 
|---|
| 655 | ) = (prev_diff, next_diff) | 
|---|
| 656 | { | 
|---|
| 657 | // This is a single edit surrounded by equalities. | 
|---|
| 658 | let mut edit = diffs[pointer]; | 
|---|
| 659 |  | 
|---|
| 660 | // First, shift the edit as far left as possible. | 
|---|
| 661 | let common_offset = common_suffix(prev_equal1, edit.text()); | 
|---|
| 662 | let original_prev_len = prev_equal1.len; | 
|---|
| 663 | prev_equal1.len -= common_offset; | 
|---|
| 664 | prev_equal2.len -= common_offset; | 
|---|
| 665 | edit.shift_left(common_offset); | 
|---|
| 666 | next_equal1.offset -= common_offset; | 
|---|
| 667 | next_equal1.len += common_offset; | 
|---|
| 668 | next_equal2.offset -= common_offset; | 
|---|
| 669 | next_equal2.len += common_offset; | 
|---|
| 670 |  | 
|---|
| 671 | // Second, step character by character right, looking for the best fit. | 
|---|
| 672 | let mut best_prev_equal = (prev_equal1, prev_equal2); | 
|---|
| 673 | let mut best_edit = edit; | 
|---|
| 674 | let mut best_next_equal = (next_equal1, next_equal2); | 
|---|
| 675 | let mut best_score = cleanup_semantic_score(prev_equal1, edit.text()) | 
|---|
| 676 | + cleanup_semantic_score(edit.text(), next_equal1); | 
|---|
| 677 | while !edit.text().is_empty() | 
|---|
| 678 | && !next_equal1.is_empty() | 
|---|
| 679 | && edit.text().chars().next().unwrap() == next_equal1.chars().next().unwrap() | 
|---|
| 680 | { | 
|---|
| 681 | prev_equal1.len += 1; | 
|---|
| 682 | prev_equal2.len += 1; | 
|---|
| 683 | edit.shift_right(1); | 
|---|
| 684 | next_equal1.offset += 1; | 
|---|
| 685 | next_equal1.len -= 1; | 
|---|
| 686 | next_equal2.offset += 1; | 
|---|
| 687 | next_equal2.len -= 1; | 
|---|
| 688 | let score = cleanup_semantic_score(prev_equal1, edit.text()) | 
|---|
| 689 | + cleanup_semantic_score(edit.text(), next_equal1); | 
|---|
| 690 | // The >= encourages trailing rather than leading whitespace on edits. | 
|---|
| 691 | if score >= best_score { | 
|---|
| 692 | best_score = score; | 
|---|
| 693 | best_prev_equal = (prev_equal1, prev_equal2); | 
|---|
| 694 | best_edit = edit; | 
|---|
| 695 | best_next_equal = (next_equal1, next_equal2); | 
|---|
| 696 | } | 
|---|
| 697 | } | 
|---|
| 698 |  | 
|---|
| 699 | if original_prev_len != best_prev_equal.0.len { | 
|---|
| 700 | // We have an improvement, save it back to the diff. | 
|---|
| 701 | if best_next_equal.0.is_empty() { | 
|---|
| 702 | diffs.remove(pointer + 1); | 
|---|
| 703 | } else { | 
|---|
| 704 | diffs[pointer + 1] = Diff::Equal(best_next_equal.0, best_next_equal.1); | 
|---|
| 705 | } | 
|---|
| 706 | diffs[pointer] = best_edit; | 
|---|
| 707 | if best_prev_equal.0.is_empty() { | 
|---|
| 708 | diffs.remove(pointer - 1); | 
|---|
| 709 | pointer -= 1; | 
|---|
| 710 | } else { | 
|---|
| 711 | diffs[pointer - 1] = Diff::Equal(best_prev_equal.0, best_prev_equal.1); | 
|---|
| 712 | } | 
|---|
| 713 | } | 
|---|
| 714 | } | 
|---|
| 715 | pointer += 1; | 
|---|
| 716 | } | 
|---|
| 717 | } | 
|---|
| 718 |  | 
|---|
| 719 | // Given two strings, compute a score representing whether the internal boundary | 
|---|
| 720 | // falls on logical boundaries. | 
|---|
| 721 | // | 
|---|
| 722 | // Scores range from 6 (best) to 0 (worst). | 
|---|
| 723 | fn cleanup_semantic_score(one: Range, two: Range) -> usize { | 
|---|
| 724 | if one.is_empty() || two.is_empty() { | 
|---|
| 725 | // Edges are the best. | 
|---|
| 726 | return 6; | 
|---|
| 727 | } | 
|---|
| 728 |  | 
|---|
| 729 | // Each port of this function behaves slightly differently due to subtle | 
|---|
| 730 | // differences in each language's definition of things like 'whitespace'. | 
|---|
| 731 | // Since this function's purpose is largely cosmetic, the choice has been | 
|---|
| 732 | // made to use each language's native features rather than force total | 
|---|
| 733 | // conformity. | 
|---|
| 734 | let char1 = one.chars().next_back().unwrap(); | 
|---|
| 735 | let char2 = two.chars().next().unwrap(); | 
|---|
| 736 | let non_alphanumeric1 = !char1.is_ascii_alphanumeric(); | 
|---|
| 737 | let non_alphanumeric2 = !char2.is_ascii_alphanumeric(); | 
|---|
| 738 | let whitespace1 = non_alphanumeric1 && char1.is_ascii_whitespace(); | 
|---|
| 739 | let whitespace2 = non_alphanumeric2 && char2.is_ascii_whitespace(); | 
|---|
| 740 | let line_break1 = whitespace1 && char1.is_control(); | 
|---|
| 741 | let line_break2 = whitespace2 && char2.is_control(); | 
|---|
| 742 | let blank_line1 = | 
|---|
| 743 | line_break1 && (one.ends_with([ '\n ', '\n ']) || one.ends_with([ '\n ', '\r ', '\n '])); | 
|---|
| 744 | let blank_line2 = | 
|---|
| 745 | line_break2 && (two.starts_with([ '\n ', '\n ']) || two.starts_with([ '\r ', '\n ', '\r ', '\n '])); | 
|---|
| 746 |  | 
|---|
| 747 | if blank_line1 || blank_line2 { | 
|---|
| 748 | // Five points for blank lines. | 
|---|
| 749 | 5 | 
|---|
| 750 | } else if line_break1 || line_break2 { | 
|---|
| 751 | // Four points for line breaks. | 
|---|
| 752 | 4 | 
|---|
| 753 | } else if non_alphanumeric1 && !whitespace1 && whitespace2 { | 
|---|
| 754 | // Three points for end of sentences. | 
|---|
| 755 | 3 | 
|---|
| 756 | } else if whitespace1 || whitespace2 { | 
|---|
| 757 | // Two points for whitespace. | 
|---|
| 758 | 2 | 
|---|
| 759 | } else if non_alphanumeric1 || non_alphanumeric2 { | 
|---|
| 760 | // One point for non-alphanumeric. | 
|---|
| 761 | 1 | 
|---|
| 762 | } else { | 
|---|
| 763 | 0 | 
|---|
| 764 | } | 
|---|
| 765 | } | 
|---|
| 766 |  | 
|---|
| 767 | // Reorder and merge like edit sections. Merge equalities. Any edit section can | 
|---|
| 768 | // move as long as it doesn't cross an equality. | 
|---|
| 769 | fn cleanup_merge(solution: &mut Solution) { | 
|---|
| 770 | let diffs = &mut solution.diffs; | 
|---|
| 771 | while !diffs.is_empty() { | 
|---|
| 772 | diffs.push(Diff::Equal( | 
|---|
| 773 | solution.text1.substring(solution.text1.len..), | 
|---|
| 774 | solution.text2.substring(solution.text2.len..), | 
|---|
| 775 | )); // Add a dummy entry at the end. | 
|---|
| 776 | let mut pointer = 0; | 
|---|
| 777 | let mut count_delete = 0; | 
|---|
| 778 | let mut count_insert = 0; | 
|---|
| 779 | let mut text_delete = Range::empty(); | 
|---|
| 780 | let mut text_insert = Range::empty(); | 
|---|
| 781 | while let Some(&this_diff) = diffs.get(pointer) { | 
|---|
| 782 | match this_diff { | 
|---|
| 783 | Diff::Insert(text) => { | 
|---|
| 784 | count_insert += 1; | 
|---|
| 785 | if text_insert.is_empty() { | 
|---|
| 786 | text_insert = text; | 
|---|
| 787 | } else { | 
|---|
| 788 | text_insert.len += text.len; | 
|---|
| 789 | } | 
|---|
| 790 | } | 
|---|
| 791 | Diff::Delete(text) => { | 
|---|
| 792 | count_delete += 1; | 
|---|
| 793 | if text_delete.is_empty() { | 
|---|
| 794 | text_delete = text; | 
|---|
| 795 | } else { | 
|---|
| 796 | text_delete.len += text.len; | 
|---|
| 797 | } | 
|---|
| 798 | } | 
|---|
| 799 | Diff::Equal(text, _) => { | 
|---|
| 800 | let count_both = count_delete + count_insert; | 
|---|
| 801 | if count_both > 1 { | 
|---|
| 802 | let both_types = count_delete != 0 && count_insert != 0; | 
|---|
| 803 | // Delete the offending records. | 
|---|
| 804 | diffs.drain(pointer - count_both..pointer); | 
|---|
| 805 | pointer -= count_both; | 
|---|
| 806 | if both_types { | 
|---|
| 807 | // Factor out any common prefix. | 
|---|
| 808 | let common_length = common_prefix(text_insert, text_delete); | 
|---|
| 809 | if common_length != 0 { | 
|---|
| 810 | if pointer > 0 { | 
|---|
| 811 | match &mut diffs[pointer - 1] { | 
|---|
| 812 | Diff::Equal(this_diff1, this_diff2) => { | 
|---|
| 813 | this_diff1.len += common_length; | 
|---|
| 814 | this_diff2.len += common_length; | 
|---|
| 815 | } | 
|---|
| 816 | _ => unreachable!( | 
|---|
| 817 | "previous diff should have been an equality" | 
|---|
| 818 | ), | 
|---|
| 819 | } | 
|---|
| 820 | } else { | 
|---|
| 821 | diffs.insert( | 
|---|
| 822 | pointer, | 
|---|
| 823 | Diff::Equal( | 
|---|
| 824 | text_delete.substring(..common_length), | 
|---|
| 825 | text_insert.substring(..common_length), | 
|---|
| 826 | ), | 
|---|
| 827 | ); | 
|---|
| 828 | pointer += 1; | 
|---|
| 829 | } | 
|---|
| 830 | text_insert = text_insert.substring(common_length..); | 
|---|
| 831 | text_delete = text_delete.substring(common_length..); | 
|---|
| 832 | } | 
|---|
| 833 | // Factor out any common suffix. | 
|---|
| 834 | let common_length = common_suffix(text_insert, text_delete); | 
|---|
| 835 | if common_length != 0 { | 
|---|
| 836 | diffs[pointer].grow_left(common_length); | 
|---|
| 837 | text_insert.len -= common_length; | 
|---|
| 838 | text_delete.len -= common_length; | 
|---|
| 839 | } | 
|---|
| 840 | } | 
|---|
| 841 | // Insert the merged records. | 
|---|
| 842 | if !text_delete.is_empty() { | 
|---|
| 843 | diffs.insert(pointer, Diff::Delete(text_delete)); | 
|---|
| 844 | pointer += 1; | 
|---|
| 845 | } | 
|---|
| 846 | if !text_insert.is_empty() { | 
|---|
| 847 | diffs.insert(pointer, Diff::Insert(text_insert)); | 
|---|
| 848 | pointer += 1; | 
|---|
| 849 | } | 
|---|
| 850 | } else if pointer > 0 { | 
|---|
| 851 | if let Some(Diff::Equal(prev_equal1, prev_equal2)) = | 
|---|
| 852 | diffs.get_mut(pointer - 1) | 
|---|
| 853 | { | 
|---|
| 854 | // Merge this equality with the previous one. | 
|---|
| 855 | prev_equal1.len += text.len; | 
|---|
| 856 | prev_equal2.len += text.len; | 
|---|
| 857 | diffs.remove(pointer); | 
|---|
| 858 | pointer -= 1; | 
|---|
| 859 | } | 
|---|
| 860 | } | 
|---|
| 861 | count_insert = 0; | 
|---|
| 862 | count_delete = 0; | 
|---|
| 863 | text_delete = Range::empty(); | 
|---|
| 864 | text_insert = Range::empty(); | 
|---|
| 865 | } | 
|---|
| 866 | } | 
|---|
| 867 | pointer += 1; | 
|---|
| 868 | } | 
|---|
| 869 | if diffs.last().unwrap().text().is_empty() { | 
|---|
| 870 | diffs.pop(); // Remove the dummy entry at the end. | 
|---|
| 871 | } | 
|---|
| 872 |  | 
|---|
| 873 | // Second pass: look for single edits surrounded on both sides by equalities | 
|---|
| 874 | // which can be shifted sideways to eliminate an equality. | 
|---|
| 875 | // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC | 
|---|
| 876 | let mut changes = false; | 
|---|
| 877 | let mut pointer = 1; | 
|---|
| 878 | // Intentionally ignore the first and last element (don't need checking). | 
|---|
| 879 | while let Some(&next_diff) = diffs.get(pointer + 1) { | 
|---|
| 880 | let prev_diff = diffs[pointer - 1]; | 
|---|
| 881 | let this_diff = diffs[pointer]; | 
|---|
| 882 | if let (Diff::Equal(prev_diff, _), Diff::Equal(next_diff, _)) = (prev_diff, next_diff) { | 
|---|
| 883 | // This is a single edit surrounded by equalities. | 
|---|
| 884 | if this_diff.text().ends_with(prev_diff) { | 
|---|
| 885 | // Shift the edit over the previous equality. | 
|---|
| 886 | diffs[pointer].shift_left(prev_diff.len); | 
|---|
| 887 | diffs[pointer + 1].grow_left(prev_diff.len); | 
|---|
| 888 | diffs.remove(pointer - 1); // Delete prev_diff. | 
|---|
| 889 | changes = true; | 
|---|
| 890 | } else if this_diff.text().starts_with(next_diff) { | 
|---|
| 891 | // Shift the edit over the next equality. | 
|---|
| 892 | diffs[pointer - 1].grow_right(next_diff.len); | 
|---|
| 893 | diffs[pointer].shift_right(next_diff.len); | 
|---|
| 894 | diffs.remove(pointer + 1); // Delete next_diff. | 
|---|
| 895 | changes = true; | 
|---|
| 896 | } | 
|---|
| 897 | } | 
|---|
| 898 | pointer += 1; | 
|---|
| 899 | } | 
|---|
| 900 | // If shifts were made, the diff needs reordering and another shift sweep. | 
|---|
| 901 | if !changes { | 
|---|
| 902 | return; | 
|---|
| 903 | } | 
|---|
| 904 | } | 
|---|
| 905 | } | 
|---|
| 906 |  | 
|---|
| 907 | impl Debug for Chunk<'_> { | 
|---|
| 908 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | 
|---|
| 909 | let (name: &'static str, text: &str) = match *self { | 
|---|
| 910 | Chunk::Equal(text: &str) => ( "Equal", text), | 
|---|
| 911 | Chunk::Delete(text: &str) => ( "Delete", text), | 
|---|
| 912 | Chunk::Insert(text: &str) => ( "Insert", text), | 
|---|
| 913 | }; | 
|---|
| 914 | write!(formatter, "{} ({:?} )", name, text) | 
|---|
| 915 | } | 
|---|
| 916 | } | 
|---|
| 917 |  | 
|---|
| 918 | impl Debug for Diff<'_, '_> { | 
|---|
| 919 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | 
|---|
| 920 | let (name: &'static str, range: Range<'_>) = match *self { | 
|---|
| 921 | Diff::Equal(range: Range<'_>, _) => ( "Equal", range), | 
|---|
| 922 | Diff::Delete(range: Range<'_>) => ( "Delete", range), | 
|---|
| 923 | Diff::Insert(range: Range<'_>) => ( "Insert", range), | 
|---|
| 924 | }; | 
|---|
| 925 | formatter.write_str(data:name)?; | 
|---|
| 926 | formatter.write_str(data: "(\" ")?; | 
|---|
| 927 | for ch: char in range.chars() { | 
|---|
| 928 | if ch == '\' '{ | 
|---|
| 929 | // escape_debug turns this into "\'" which is unnecessary. | 
|---|
| 930 | formatter.write_char(ch)?; | 
|---|
| 931 | } else { | 
|---|
| 932 | Display::fmt(&ch.escape_debug(), f:formatter)?; | 
|---|
| 933 | } | 
|---|
| 934 | } | 
|---|
| 935 | formatter.write_str(data: "\" )")?; | 
|---|
| 936 | Ok(()) | 
|---|
| 937 | } | 
|---|
| 938 | } | 
|---|
| 939 |  | 
|---|