lib.rs source code [crates/dissimilar/src/lib.rs]

1	//! [![github]](https://github.com/dtolnay/dissimilar)&ensp;[![crates-io]](https://crates.io/crates/dissimilar)&ensp;[![docs-rs]](https://docs.rs/dissimilar)
2	//!
3	//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
4	//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
5	//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs
6	//!
7	//! <br>
8	//!
9	//! ## Diff library with semantic cleanup, based on Google's diff-match-patch
10	//!
11	//! This library is a port of the Diff component of [Diff Match Patch] to Rust.
12	//! The diff implementation is based on [Myers' diff algorithm] but includes
13	//! some [semantic cleanups] to increase human readability by factoring out
14	//! commonalities which are likely to be coincidental.
15	//!
16	//! Diff Match Patch was originally built in 2006 to power Google Docs.
17	//!
18	//! # Interface
19	//!
20	//! Here is the entire API of the Rust implementation. It operates on borrowed
21	//! strings and the return value of the diff algorithm is a vector of chunks
22	//! pointing into slices of those input strings.
23	//!
24	//! ```
25	//! pub enum Chunk<'a> {
26	//! Equal(&'a str),
27	//! Delete(&'a str),
28	//! Insert(&'a str),
29	//! }
30	//!
31	//! # const IGNORE: &str = stringify! {
32	//! pub fn diff(text1: &str, text2: &str) -> Vec<Chunk>;
33	//! # };
34	//! ```
35	//!
36	//! [Diff Match Patch]: https://github.com/google/diff-match-patch
37	//! [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf
38	//! [semantic cleanups]: https://neil.fraser.name/writing/diff/
39
40	#![doc(html_root_url = "https://docs.rs/dissimilar/1.0.10")]
41	#![allow(
42	clippy::blocks_in_conditions,
43	clippy::bool_to_int_with_if,
44	clippy::cast_possible_wrap,
45	clippy::cast_sign_loss,
46	clippy::cloned_instead_of_copied, // https://github.com/rust-lang/rust-clippy/issues/7127
47	clippy::collapsible_else_if,
48	clippy::comparison_chain,
49	clippy::implied_bounds_in_impls,
50	clippy::items_after_test_module, // https://github.com/rust-lang/rust-clippy/issues/10713
51	clippy::let_underscore_untyped,
52	clippy::match_same_arms,
53	clippy::module_name_repetitions,
54	clippy::must_use_candidate,
55	clippy::new_without_default,
56	clippy::octal_escapes,
57	clippy::shadow_unrelated,
58	clippy::similar_names,
59	clippy::too_many_lines,
60	clippy::unseparated_literal_suffix,
61	unused_parens, // false positive on Some(&(mut diff)) pattern
62	)]
63
64	mod find;
65	mod range;
66
67	#[cfg(test)]
68	mod tests;
69
70	use crate::range::{slice, Range};
71	use std::cmp;
72	use std::collections::VecDeque;
73	use std::fmt::{self, Debug, Display, Write};
74
75	#[derive(Copy, Clone, PartialEq, Eq)]
76	pub enum Chunk<'a> {
77	Equal(&'a str),
78	Delete(&'a str),
79	Insert(&'a str),
80	}
81
82	#[derive(Copy, Clone)]
83	enum Diff<'a, 'b> {
84	Equal(Range<'a>, Range<'b>),
85	Delete(Range<'a>),
86	Insert(Range<'b>),
87	}
88
89	impl<'tmp, 'a: 'tmp, 'b: 'tmp> Diff<'a, 'b> {
90	fn text(&self) -> Range<'tmp> {
91	match *self {
92	Diff::Equal(range, _) \| Diff::Delete(range) \| Diff::Insert(range) => range,
93	}
94	}
95
96	fn grow_left(&mut self, increment: usize) {
97	self.for_each(\|range\| {
98	range.offset -= increment;
99	range.len += increment;
100	});
101	}
102
103	fn grow_right(&mut self, increment: usize) {
104	self.for_each(\|range\| range.len += increment);
105	}
106
107	fn shift_left(&mut self, increment: usize) {
108	self.for_each(\|range\| range.offset -= increment);
109	}
110
111	fn shift_right(&mut self, increment: usize) {
112	self.for_each(\|range\| range.offset += increment);
113	}
114
115	fn for_each(&mut self, f: impl Fn(&mut Range)) {
116	match self {
117	Diff::Equal(range1, range2) => {
118	f(range1);
119	f(range2);
120	}
121	Diff::Delete(range) => f(range),
122	Diff::Insert(range) => f(range),
123	}
124	}
125	}
126
127	pub fn diff<'a>(text1: &'a str, text2: &'a str) -> Vec<Chunk<'a>> {
128	let chars1: Vec<char> = text1.chars().collect();
129	let chars2: Vec<char> = text2.chars().collect();
130	let range1 = Range::new(&chars1, ..);
131	let range2 = Range::new(&chars2, ..);
132
133	let mut solution = main(range1, range2);
134	cleanup_char_boundary(&mut solution);
135	cleanup_semantic(&mut solution);
136	cleanup_merge(&mut solution);
137
138	let mut chunks = Vec::new();
139	let mut pos1 = `0`;
140	let mut pos2 = `0`;
141	for diff in solution.diffs {
142	chunks.push(match diff {
143	Diff::Equal(range, _) => {
144	let len = range.len_bytes();
145	let chunk = Chunk::Equal(&text1[pos1..pos1 + len]);
146	pos1 += len;
147	pos2 += len;
148	chunk
149	}
150	Diff::Delete(range) => {
151	let len = range.len_bytes();
152	let chunk = Chunk::Delete(&text1[pos1..pos1 + len]);
153	pos1 += len;
154	chunk
155	}
156	Diff::Insert(range) => {
157	let len = range.len_bytes();
158	let chunk = Chunk::Insert(&text2[pos2..pos2 + len]);
159	pos2 += len;
160	chunk
161	}
162	});
163	}
164	chunks
165	}
166
167	struct Solution<'a, 'b> {
168	text1: Range<'a>,
169	text2: Range<'b>,
170	diffs: Vec<Diff<'a, 'b>>,
171	}
172
173	fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> {
174	let whole1 = text1;
175	let whole2 = text2;
176
177	// Trim off common prefix.
178	let common_prefix_len = common_prefix(text1, text2);
179	let common_prefix = Diff::Equal(
180	text1.substring(..common_prefix_len),
181	text2.substring(..common_prefix_len),
182	);
183	text1 = text1.substring(common_prefix_len..);
184	text2 = text2.substring(common_prefix_len..);
185
186	// Trim off common suffix.
187	let common_suffix_len = common_suffix(text1, text2);
188	let common_suffix = Diff::Equal(
189	text1.substring(text1.len - common_suffix_len..),
190	text2.substring(text2.len - common_suffix_len..),
191	);
192	text1 = text1.substring(..text1.len - common_suffix_len);
193	text2 = text2.substring(..text2.len - common_suffix_len);
194
195	// Compute the diff on the middle block.
196	let mut solution = Solution {
197	text1: whole1,
198	text2: whole2,
199	diffs: compute(text1, text2),
200	};
201
202	// Restore the prefix and suffix.
203	if common_prefix_len > `0` {
204	solution.diffs.insert(`0`, common_prefix);
205	}
206	if common_suffix_len > `0` {
207	solution.diffs.push(common_suffix);
208	}
209
210	cleanup_merge(&mut solution);
211
212	solution
213	}
214
215	// Find the differences between two texts. Assumes that the texts do not have
216	// any common prefix or suffix.
217	fn compute<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
218	match (text1.is_empty(), text2.is_empty()) {
219	(`true`, `true`) => return Vec::new(),
220	(`true`, `false`) => return vec![Diff::Insert(text2)],
221	(`false`, `true`) => return vec![Diff::Delete(text1)],
222	(`false`, `false`) => {}
223	}
224
225	// Check for entire shorter text inside the longer text.
226	if text1.len > text2.len {
227	if let Some(i) = text1.find(text2) {
228	return vec![
229	Diff::Delete(text1.substring(..i)),
230	Diff::Equal(text1.substring(i..i + text2.len), text2),
231	Diff::Delete(text1.substring(i + text2.len..)),
232	];
233	}
234	} else {
235	if let Some(i) = text2.find(text1) {
236	return vec![
237	Diff::Insert(text2.substring(..i)),
238	Diff::Equal(text1, text2.substring(i..i + text1.len)),
239	Diff::Insert(text2.substring(i + text1.len..)),
240	];
241	}
242	}
243
244	if text1.len == `1` \|\| text2.len == `1` {
245	// Single character string.
246	// After the previous check, the character can't be an equality.
247	return vec![Diff::Delete(text1), Diff::Insert(text2)];
248	}
249
250	bisect(text1, text2)
251	}
252
253	// Find the 'middle snake' of a diff, split the problem in two and return the
254	// recursively constructed diff.
255	//
256	// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
257	fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
258	let max_d = (text1.len + text2.len + `1`) / `2`;
259	let v_offset = max_d;
260	let v_len = `2` * max_d;
261	let mut v1 = vec![`-1isize`; v_len];
262	let mut v2 = vec![`-1isize`; v_len];
263	v1[v_offset + `1`] = `0`;
264	v2[v_offset + `1`] = `0`;
265	let delta = text1.len as isize - text2.len as isize;
266	// If the total number of characters is odd, then the front path will
267	// collide with the reverse path.
268	let front = delta % `2` != `0`;
269	// Offsets for start and end of k loop.
270	// Prevents mapping of space beyond the grid.
271	let mut k1start = `0`;
272	let mut k1end = `0`;
273	let mut k2start = `0`;
274	let mut k2end = `0`;
275	for d in `0`..max_d as isize {
276	// Walk the front path one step.
277	let mut k1 = -d + k1start;
278	while k1 <= d - k1end {
279	let k1_offset = (v_offset as isize + k1) as usize;
280	let mut x1 = if k1 == -d \|\| (k1 != d && v1[k1_offset - `1`] < v1[k1_offset + `1`]) {
281	v1[k1_offset + `1`]
282	} else {
283	v1[k1_offset - `1`] + `1`
284	} as usize;
285	let mut y1 = (x1 as isize - k1) as usize;
286	if let (Some(s1), Some(s2)) = (text1.get(x1..), text2.get(y1..)) {
287	let advance = common_prefix(s1, s2);
288	x1 += advance;
289	y1 += advance;
290	}
291	v1[k1_offset] = x1 as isize;
292	if x1 > text1.len {
293	// Ran off the right of the graph.
294	k1end += `2`;
295	} else if y1 > text2.len {
296	// Ran off the bottom of the graph.
297	k1start += `2`;
298	} else if front {
299	let k2_offset = v_offset as isize + delta - k1;
300	if k2_offset >= `0` && k2_offset < v_len as isize && v2[k2_offset as usize] != `-1` {
301	// Mirror x2 onto top-left coordinate system.
302	let x2 = text1.len as isize - v2[k2_offset as usize];
303	if x1 as isize >= x2 {
304	// Overlap detected.
305	return bisect_split(text1, text2, x1, y1);
306	}
307	}
308	}
309	k1 += `2`;
310	}
311
312	// Walk the reverse path one step.
313	let mut k2 = -d + k2start;
314	while k2 <= d - k2end {
315	let k2_offset = (v_offset as isize + k2) as usize;
316	let mut x2 = if k2 == -d \|\| (k2 != d && v2[k2_offset - `1`] < v2[k2_offset + `1`]) {
317	v2[k2_offset + `1`]
318	} else {
319	v2[k2_offset - `1`] + `1`
320	} as usize;
321	let mut y2 = (x2 as isize - k2) as usize;
322	if x2 < text1.len && y2 < text2.len {
323	let advance = common_suffix(
324	text1.substring(..text1.len - x2),
325	text2.substring(..text2.len - y2),
326	);
327	x2 += advance;
328	y2 += advance;
329	}
330	v2[k2_offset] = x2 as isize;
331	if x2 > text1.len {
332	// Ran off the left of the graph.
333	k2end += `2`;
334	} else if y2 > text2.len {
335	// Ran off the top of the graph.
336	k2start += `2`;
337	} else if !front {
338	let k1_offset = v_offset as isize + delta - k2;
339	if k1_offset >= `0` && k1_offset < v_len as isize && v1[k1_offset as usize] != `-1` {
340	let x1 = v1[k1_offset as usize] as usize;
341	let y1 = v_offset + x1 - k1_offset as usize;
342	// Mirror x2 onto top-left coordinate system.
343	x2 = text1.len - x2;
344	if x1 >= x2 {
345	// Overlap detected.
346	return bisect_split(text1, text2, x1, y1);
347	}
348	}
349	}
350	k2 += `2`;
351	}
352	}
353	// Number of diffs equals number of characters, no commonality at all.
354	vec![Diff::Delete(text1), Diff::Insert(text2)]
355	}
356
357	// Given the location of the 'middle snake', split the diff in two parts and
358	// recurse.
359	fn bisect_split<'a, 'b>(
360	text1: Range<'a>,
361	text2: Range<'b>,
362	x: usize,
363	y: usize,
364	) -> Vec<Diff<'a, 'b>> {
365	let (text1a: Range<'_>, text1b: Range<'_>) = text1.split_at(mid:x);
366	let (text2a: Range<'_>, text2b: Range<'_>) = text2.split_at(mid:y);
367
368	// Compute both diffs serially.
369	let mut diffs: Vec> = main(text1:text1a, text2:text2a).diffs;
370	diffs.extend(iter:main(text1:text1b, text2:text2b).diffs);
371
372	diffs
373	}
374
375	// Determine the length of the common prefix of two strings.
376	fn common_prefix(text1: Range, text2: Range) -> usize {
377	for (i: usize, (b1: char, b2: char)) in text1.chars().zip(text2.chars()).enumerate() {
378	if b1 != b2 {
379	return i;
380	}
381	}
382	cmp::min(v1:text1.len, v2:text2.len)
383	}
384
385	// Determine the length of the common suffix of two strings.
386	fn common_suffix(text1: Range, text2: Range) -> usize {
387	for (i: usize, (b1: char, b2: char)) in text1.chars().rev().zip(text2.chars().rev()).enumerate() {
388	if b1 != b2 {
389	return i;
390	}
391	}
392	cmp::min(v1:text1.len, v2:text2.len)
393	}
394
395	// Determine if the suffix of one string is the prefix of another.
396	//
397	// Returns the number of characters common to the end of the first string and
398	// the start of the second string.
399	fn common_overlap(mut text1: Range, mut text2: Range) -> usize {
400	// Eliminate the null case.
401	if text1.is_empty() \|\| text2.is_empty() {
402	return `0`;
403	}
404	// Truncate the longer string.
405	if text1.len > text2.len {
406	text1 = text1.substring(text1.len - text2.len..);
407	} else if text1.len < text2.len {
408	text2 = text2.substring(..text1.len);
409	}
410	// Quick check for the worst case.
411	if slice(text1) == slice(text2) {
412	return text1.len;
413	}
414
415	// Start by looking for a single character match
416	// and increase length until no match is found.
417	// Performance analysis: https://neil.fraser.name/news/2010/11/04/
418	let mut best = `0`;
419	let mut length = `1`;
420	loop {
421	let pattern = text1.substring(text1.len - length..);
422	let found = match text2.find(pattern) {
423	Some(found) => found,
424	None => return best,
425	};
426	length += found;
427	if found == `0`
428	\|\| slice(text1.substring(text1.len - length..)) == slice(text2.substring(..length))
429	{
430	best = length;
431	length += `1`;
432	}
433	}
434	}
435
436	fn cleanup_char_boundary(solution: &mut Solution) {
437	fn is_segmentation_boundary(doc: &[char], pos: usize) -> bool {
438	// FIXME: use unicode-segmentation crate?
439	let _ = doc;
440	let _ = pos;
441	`true`
442	}
443
444	fn boundary_down(doc: &[char], pos: usize) -> usize {
445	let mut adjust = `0`;
446	while !is_segmentation_boundary(doc, pos - adjust) {
447	adjust += `1`;
448	}
449	adjust
450	}
451
452	fn boundary_up(doc: &[char], pos: usize) -> usize {
453	let mut adjust = `0`;
454	while !is_segmentation_boundary(doc, pos + adjust) {
455	adjust += `1`;
456	}
457	adjust
458	}
459
460	fn skip_overlap<'a>(prev: &Range<'a>, range: &mut Range<'a>) {
461	let prev_end = prev.offset + prev.len;
462	if prev_end > range.offset {
463	let delta = cmp::min(prev_end - range.offset, range.len);
464	range.offset += delta;
465	range.len -= delta;
466	}
467	}
468
469	let mut read = `0`;
470	let mut retain = `0`;
471	let mut last_delete = Range::empty();
472	let mut last_insert = Range::empty();
473	while let Some(&(mut diff)) = solution.diffs.get(read) {
474	read += `1`;
475	match &mut diff {
476	Diff::Equal(range1, range2) => {
477	let adjust = boundary_up(range1.doc, range1.offset);
478	// If the whole range is sub-character, skip it.
479	if range1.len <= adjust {
480	continue;
481	}
482	range1.offset += adjust;
483	range1.len -= adjust;
484	range2.offset += adjust;
485	range2.len -= adjust;
486	let adjust = boundary_down(range1.doc, range1.offset + range1.len);
487	range1.len -= adjust;
488	range2.len -= adjust;
489	last_delete = Range::empty();
490	last_insert = Range::empty();
491	}
492	Diff::Delete(range) => {
493	skip_overlap(&last_delete, range);
494	if range.len == `0` {
495	continue;
496	}
497	let adjust = boundary_down(range.doc, range.offset);
498	range.offset -= adjust;
499	range.len += adjust;
500	let adjust = boundary_up(range.doc, range.offset + range.len);
501	range.len += adjust;
502	last_delete = *range;
503	}
504	Diff::Insert(range) => {
505	skip_overlap(&last_insert, range);
506	if range.len == `0` {
507	continue;
508	}
509	let adjust = boundary_down(range.doc, range.offset);
510	range.offset -= adjust;
511	range.len += adjust;
512	let adjust = boundary_up(range.doc, range.offset + range.len);
513	range.len += adjust;
514	last_insert = *range;
515	}
516	}
517	solution.diffs[retain] = diff;
518	retain += `1`;
519	}
520
521	solution.diffs.truncate(retain);
522	}
523
524	// Reduce the number of edits by eliminating semantically trivial equalities.
525	fn cleanup_semantic(solution: &mut Solution) {
526	let mut diffs = &mut solution.diffs;
527	if diffs.is_empty() {
528	return;
529	}
530
531	let mut changes = `false`;
532	let mut equalities = VecDeque::new(); // Double-ended queue of equalities.
533	let mut last_equality = None; // Always equal to equalities.peek().text
534	let mut pointer = `0`;
535	// Number of characters that changed prior to the equality.
536	let mut len_insertions1 = `0`;
537	let mut len_deletions1 = `0`;
538	// Number of characters that changed after the equality.
539	let mut len_insertions2 = `0`;
540	let mut len_deletions2 = `0`;
541	while let Some(&this_diff) = diffs.get(pointer) {
542	match this_diff {
543	Diff::Equal(text1, text2) => {
544	equalities.push_back(pointer);
545	len_insertions1 = len_insertions2;
546	len_deletions1 = len_deletions2;
547	len_insertions2 = `0`;
548	len_deletions2 = `0`;
549	last_equality = Some((text1, text2));
550	pointer += `1`;
551	continue;
552	}
553	Diff::Delete(text) => len_deletions2 += text.len,
554	Diff::Insert(text) => len_insertions2 += text.len,
555	}
556	// Eliminate an equality that is smaller or equal to the edits on both
557	// sides of it.
558	if last_equality.map_or(`false`, \|(last_equality, _)\| {
559	last_equality.len <= cmp::max(len_insertions1, len_deletions1)
560	&& last_equality.len <= cmp::max(len_insertions2, len_deletions2)
561	}) {
562	// Jump back to offending equality.
563	pointer = equalities.pop_back().unwrap();
564
565	// Replace equality with a delete.
566	diffs[pointer] = Diff::Delete(last_equality.unwrap().0);
567	// Insert a corresponding insert.
568	diffs.insert(pointer + `1`, Diff::Insert(last_equality.unwrap().1));
569
570	len_insertions1 = `0`; // Reset the counters.
571	len_insertions2 = `0`;
572	len_deletions1 = `0`;
573	len_deletions2 = `0`;
574	last_equality = None;
575	changes = `true`;
576
577	// Throw away the previous equality (it needs to be reevaluated).
578	equalities.pop_back();
579	if let Some(back) = equalities.back() {
580	// There is a safe equality we can fall back to.
581	pointer = *back;
582	} else {
583	// There are no previous equalities, jump back to the start.
584	pointer = `0`;
585	continue;
586	}
587	}
588	pointer += `1`;
589	}
590
591	// Normalize the diff.
592	if changes {
593	cleanup_merge(solution);
594	}
595	cleanup_semantic_lossless(solution);
596	diffs = &mut solution.diffs;
597
598	// Find any overlaps between deletions and insertions.
599	// e.g: <del>abcxxx</del><ins>xxxdef</ins>
600	// -> <del>abc</del>xxx<ins>def</ins>
601	// e.g: <del>xxxabc</del><ins>defxxx</ins>
602	// -> <ins>def</ins>xxx<del>abc</del>
603	// Only extract an overlap if it is as big as the edit ahead or behind it.
604	let mut pointer = `1`;
605	while let Some(&this_diff) = diffs.get(pointer) {
606	let prev_diff = diffs[pointer - `1`];
607	if let (Diff::Delete(deletion), Diff::Insert(insertion)) = (prev_diff, this_diff) {
608	let overlap_len1 = common_overlap(deletion, insertion);
609	let overlap_len2 = common_overlap(insertion, deletion);
610	let overlap_min = cmp::min(deletion.len, insertion.len);
611	if overlap_len1 >= overlap_len2 && `2` * overlap_len1 >= overlap_min {
612	// Overlap found. Insert an equality and trim the surrounding edits.
613	diffs.insert(
614	pointer,
615	Diff::Equal(
616	deletion.substring(deletion.len - overlap_len1..deletion.len),
617	insertion.substring(..overlap_len1),
618	),
619	);
620	diffs[pointer - `1`] =
621	Diff::Delete(deletion.substring(..deletion.len - overlap_len1));
622	diffs[pointer + `1`] = Diff::Insert(insertion.substring(overlap_len1..));
623	} else if overlap_len1 < overlap_len2 && `2` * overlap_len2 >= overlap_min {
624	// Reverse overlap found.
625	// Insert an equality and swap and trim the surrounding edits.
626	diffs.insert(
627	pointer,
628	Diff::Equal(
629	deletion.substring(..overlap_len2),
630	insertion.substring(insertion.len - overlap_len2..insertion.len),
631	),
632	);
633	diffs[pointer - `1`] =
634	Diff::Insert(insertion.substring(..insertion.len - overlap_len2));
635	diffs[pointer + `1`] = Diff::Delete(deletion.substring(overlap_len2..));
636	}
637	pointer += `1`;
638	}
639	pointer += `1`;
640	}
641	}
642
643	// Look for single edits surrounded on both sides by equalities which can be
644	// shifted sideways to align the edit to a word boundary.
645	//
646	// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
647	fn cleanup_semantic_lossless(solution: &mut Solution) {
648	let diffs = &mut solution.diffs;
649	let mut pointer = `1`;
650	while let Some(&next_diff) = diffs.get(pointer + `1`) {
651	let prev_diff = diffs[pointer - `1`];
652	if let (
653	Diff::Equal(mut prev_equal1, mut prev_equal2),
654	Diff::Equal(mut next_equal1, mut next_equal2),
655	) = (prev_diff, next_diff)
656	{
657	// This is a single edit surrounded by equalities.
658	let mut edit = diffs[pointer];
659
660	// First, shift the edit as far left as possible.
661	let common_offset = common_suffix(prev_equal1, edit.text());
662	let original_prev_len = prev_equal1.len;
663	prev_equal1.len -= common_offset;
664	prev_equal2.len -= common_offset;
665	edit.shift_left(common_offset);
666	next_equal1.offset -= common_offset;
667	next_equal1.len += common_offset;
668	next_equal2.offset -= common_offset;
669	next_equal2.len += common_offset;
670
671	// Second, step character by character right, looking for the best fit.
672	let mut best_prev_equal = (prev_equal1, prev_equal2);
673	let mut best_edit = edit;
674	let mut best_next_equal = (next_equal1, next_equal2);
675	let mut best_score = cleanup_semantic_score(prev_equal1, edit.text())
676	+ cleanup_semantic_score(edit.text(), next_equal1);
677	while !edit.text().is_empty()
678	&& !next_equal1.is_empty()
679	&& edit.text().chars().next().unwrap() == next_equal1.chars().next().unwrap()
680	{
681	prev_equal1.len += `1`;
682	prev_equal2.len += `1`;
683	edit.shift_right(`1`);
684	next_equal1.offset += `1`;
685	next_equal1.len -= `1`;
686	next_equal2.offset += `1`;
687	next_equal2.len -= `1`;
688	let score = cleanup_semantic_score(prev_equal1, edit.text())
689	+ cleanup_semantic_score(edit.text(), next_equal1);
690	// The >= encourages trailing rather than leading whitespace on edits.
691	if score >= best_score {
692	best_score = score;
693	best_prev_equal = (prev_equal1, prev_equal2);
694	best_edit = edit;
695	best_next_equal = (next_equal1, next_equal2);
696	}
697	}
698
699	if original_prev_len != best_prev_equal.0.len {
700	// We have an improvement, save it back to the diff.
701	if best_next_equal.0.is_empty() {
702	diffs.remove(pointer + `1`);
703	} else {
704	diffs[pointer + `1`] = Diff::Equal(best_next_equal.0, best_next_equal.1);
705	}
706	diffs[pointer] = best_edit;
707	if best_prev_equal.0.is_empty() {
708	diffs.remove(pointer - `1`);
709	pointer -= `1`;
710	} else {
711	diffs[pointer - `1`] = Diff::Equal(best_prev_equal.0, best_prev_equal.1);
712	}
713	}
714	}
715	pointer += `1`;
716	}
717	}
718
719	// Given two strings, compute a score representing whether the internal boundary
720	// falls on logical boundaries.
721	//
722	// Scores range from 6 (best) to 0 (worst).
723	fn cleanup_semantic_score(one: Range, two: Range) -> usize {
724	if one.is_empty() \|\| two.is_empty() {
725	// Edges are the best.
726	return `6`;
727	}
728
729	// Each port of this function behaves slightly differently due to subtle
730	// differences in each language's definition of things like 'whitespace'.
731	// Since this function's purpose is largely cosmetic, the choice has been
732	// made to use each language's native features rather than force total
733	// conformity.
734	let char1 = one.chars().next_back().unwrap();
735	let char2 = two.chars().next().unwrap();
736	let non_alphanumeric1 = !char1.is_ascii_alphanumeric();
737	let non_alphanumeric2 = !char2.is_ascii_alphanumeric();
738	let whitespace1 = non_alphanumeric1 && char1.is_ascii_whitespace();
739	let whitespace2 = non_alphanumeric2 && char2.is_ascii_whitespace();
740	let line_break1 = whitespace1 && char1.is_control();
741	let line_break2 = whitespace2 && char2.is_control();
742	let blank_line1 =
743	line_break1 && (one.ends_with(['`\n`', '`\n`']) \|\| one.ends_with(['`\n`', '`\r`', '`\n`']));
744	let blank_line2 =
745	line_break2 && (two.starts_with(['`\n`', '`\n`']) \|\| two.starts_with(['`\r`', '`\n`', '`\r`', '`\n`']));
746
747	if blank_line1 \|\| blank_line2 {
748	// Five points for blank lines.
749	`5`
750	} else if line_break1 \|\| line_break2 {
751	// Four points for line breaks.
752	`4`
753	} else if non_alphanumeric1 && !whitespace1 && whitespace2 {
754	// Three points for end of sentences.
755	`3`
756	} else if whitespace1 \|\| whitespace2 {
757	// Two points for whitespace.
758	`2`
759	} else if non_alphanumeric1 \|\| non_alphanumeric2 {
760	// One point for non-alphanumeric.
761	`1`
762	} else {
763	`0`
764	}
765	}
766
767	// Reorder and merge like edit sections. Merge equalities. Any edit section can
768	// move as long as it doesn't cross an equality.
769	fn cleanup_merge(solution: &mut Solution) {
770	let diffs = &mut solution.diffs;
771	while !diffs.is_empty() {
772	diffs.push(Diff::Equal(
773	solution.text1.substring(solution.text1.len..),
774	solution.text2.substring(solution.text2.len..),
775	)); // Add a dummy entry at the end.
776	let mut pointer = `0`;
777	let mut count_delete = `0`;
778	let mut count_insert = `0`;
779	let mut text_delete = Range::empty();
780	let mut text_insert = Range::empty();
781	while let Some(&this_diff) = diffs.get(pointer) {
782	match this_diff {
783	Diff::Insert(text) => {
784	count_insert += `1`;
785	if text_insert.is_empty() {
786	text_insert = text;
787	} else {
788	text_insert.len += text.len;
789	}
790	}
791	Diff::Delete(text) => {
792	count_delete += `1`;
793	if text_delete.is_empty() {
794	text_delete = text;
795	} else {
796	text_delete.len += text.len;
797	}
798	}
799	Diff::Equal(text, _) => {
800	let count_both = count_delete + count_insert;
801	if count_both > `1` {
802	let both_types = count_delete != `0` && count_insert != `0`;
803	// Delete the offending records.
804	diffs.drain(pointer - count_both..pointer);
805	pointer -= count_both;
806	if both_types {
807	// Factor out any common prefix.
808	let common_length = common_prefix(text_insert, text_delete);
809	if common_length != `0` {
810	if pointer > `0` {
811	match &mut diffs[pointer - `1`] {
812	Diff::Equal(this_diff1, this_diff2) => {
813	this_diff1.len += common_length;
814	this_diff2.len += common_length;
815	}
816	_ => unreachable!(
817	"previous diff should have been an equality"
818	),
819	}
820	} else {
821	diffs.insert(
822	pointer,
823	Diff::Equal(
824	text_delete.substring(..common_length),
825	text_insert.substring(..common_length),
826	),
827	);
828	pointer += `1`;
829	}
830	text_insert = text_insert.substring(common_length..);
831	text_delete = text_delete.substring(common_length..);
832	}
833	// Factor out any common suffix.
834	let common_length = common_suffix(text_insert, text_delete);
835	if common_length != `0` {
836	diffs[pointer].grow_left(common_length);
837	text_insert.len -= common_length;
838	text_delete.len -= common_length;
839	}
840	}
841	// Insert the merged records.
842	if !text_delete.is_empty() {
843	diffs.insert(pointer, Diff::Delete(text_delete));
844	pointer += `1`;
845	}
846	if !text_insert.is_empty() {
847	diffs.insert(pointer, Diff::Insert(text_insert));
848	pointer += `1`;
849	}
850	} else if pointer > `0` {
851	if let Some(Diff::Equal(prev_equal1, prev_equal2)) =
852	diffs.get_mut(pointer - `1`)
853	{
854	// Merge this equality with the previous one.
855	prev_equal1.len += text.len;
856	prev_equal2.len += text.len;
857	diffs.remove(pointer);
858	pointer -= `1`;
859	}
860	}
861	count_insert = `0`;
862	count_delete = `0`;
863	text_delete = Range::empty();
864	text_insert = Range::empty();
865	}
866	}
867	pointer += `1`;
868	}
869	if diffs.last().unwrap().text().is_empty() {
870	diffs.pop(); // Remove the dummy entry at the end.
871	}
872
873	// Second pass: look for single edits surrounded on both sides by equalities
874	// which can be shifted sideways to eliminate an equality.
875	// e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
876	let mut changes = `false`;
877	let mut pointer = `1`;
878	// Intentionally ignore the first and last element (don't need checking).
879	while let Some(&next_diff) = diffs.get(pointer + `1`) {
880	let prev_diff = diffs[pointer - `1`];
881	let this_diff = diffs[pointer];
882	if let (Diff::Equal(prev_diff, _), Diff::Equal(next_diff, _)) = (prev_diff, next_diff) {
883	// This is a single edit surrounded by equalities.
884	if this_diff.text().ends_with(prev_diff) {
885	// Shift the edit over the previous equality.
886	diffs[pointer].shift_left(prev_diff.len);
887	diffs[pointer + `1`].grow_left(prev_diff.len);
888	diffs.remove(pointer - `1`); // Delete prev_diff.
889	changes = `true`;
890	} else if this_diff.text().starts_with(next_diff) {
891	// Shift the edit over the next equality.
892	diffs[pointer - `1`].grow_right(next_diff.len);
893	diffs[pointer].shift_right(next_diff.len);
894	diffs.remove(pointer + `1`); // Delete next_diff.
895	changes = `true`;
896	}
897	}
898	pointer += `1`;
899	}
900	// If shifts were made, the diff needs reordering and another shift sweep.
901	if !changes {
902	return;
903	}
904	}
905	}
906
907	impl Debug for Chunk<'_> {
908	fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
909	let (name: &'static str, text: &str) = match *self {
910	Chunk::Equal(text: &str) => ("Equal", text),
911	Chunk::Delete(text: &str) => ("Delete", text),
912	Chunk::Insert(text: &str) => ("Insert", text),
913	};
914	write!(formatter, "{}({:?})", name, text)
915	}
916	}
917
918	impl Debug for Diff<'_, '_> {
919	fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
920	let (name: &'static str, range: Range<'_>) = match *self {
921	Diff::Equal(range: Range<'_>, _) => ("Equal", range),
922	Diff::Delete(range: Range<'_>) => ("Delete", range),
923	Diff::Insert(range: Range<'_>) => ("Insert", range),
924	};
925	formatter.write_str(data:name)?;
926	formatter.write_str(data:"(`\"`")?;
927	for ch: char in range.chars() {
928	if ch == '`\'`' {
929	// escape_debug turns this into "\'" which is unnecessary.
930	formatter.write_char(ch)?;
931	} else {
932	Display::fmt(&ch.escape_debug(), f:formatter)?;
933	}
934	}
935	formatter.write_str(data:"`\"`)")?;
936	Ok(())
937	}
938	}
939