lib.rs source code [crates/dissimilar-1.0.6/src/lib.rs]

1	//! [![github]](https://github.com/dtolnay/dissimilar)&ensp;[![crates-io]](https://crates.io/crates/dissimilar)&ensp;[![docs-rs]](https://docs.rs/dissimilar)
2	//!
3	//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
4	//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
5	//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs
6	//!
7	//! <br>
8	//!
9	//! ## Diff library with semantic cleanup, based on Google's diff-match-patch
10	//!
11	//! This library is a port of the Diff component of [Diff Match Patch] to Rust.
12	//! The diff implementation is based on [Myers' diff algorithm] but includes
13	//! some [semantic cleanups] to increase human readability by factoring out
14	//! commonalities which are likely to be coincidental.
15	//!
16	//! Diff Match Patch was originally built in 2006 to power Google Docs.
17	//!
18	//! # Interface
19	//!
20	//! Here is the entire API of the Rust implementation. It operates on borrowed
21	//! strings and the return value of the diff algorithm is a vector of chunks
22	//! pointing into slices of those input strings.
23	//!
24	//! ```
25	//! pub enum Chunk<'a> {
26	//! Equal(&'a str),
27	//! Delete(&'a str),
28	//! Insert(&'a str),
29	//! }
30	//!
31	//! # const IGNORE: &str = stringify! {
32	//! pub fn diff(text1: &str, text2: &str) -> Vec<Chunk>;
33	//! # };
34	//! ```
35	//!
36	//! [Diff Match Patch]: https://github.com/google/diff-match-patch
37	//! [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf
38	//! [semantic cleanups]: https://neil.fraser.name/writing/diff/
39
40	#![doc(html_root_url = "https://docs.rs/dissimilar/1.0.6")]
41	#![allow(
42	clippy::blocks_in_if_conditions,
43	clippy::bool_to_int_with_if,
44	clippy::cast_possible_wrap,
45	clippy::cast_sign_loss,
46	clippy::cloned_instead_of_copied, // https://github.com/rust-lang/rust-clippy/issues/7127
47	clippy::collapsible_else_if,
48	clippy::comparison_chain,
49	clippy::match_same_arms,
50	clippy::module_name_repetitions,
51	clippy::must_use_candidate,
52	clippy::new_without_default,
53	clippy::octal_escapes,
54	clippy::shadow_unrelated,
55	clippy::similar_names,
56	clippy::too_many_lines,
57	clippy::unseparated_literal_suffix,
58	unused_parens, // false positive on Some(&(mut diff)) pattern
59	)]
60
61	mod find;
62	mod range;
63
64	#[cfg(test)]
65	mod tests;
66
67	use crate::range::{slice, Range};
68	use std::cmp;
69	use std::collections::VecDeque;
70	use std::fmt::{self, Debug, Display, Write};
71
72	#[derive(Copy, Clone, PartialEq, Eq)]
73	pub enum Chunk<'a> {
74	Equal(&'a str),
75	Delete(&'a str),
76	Insert(&'a str),
77	}
78
79	#[derive(Copy, Clone)]
80	enum Diff<'a, 'b> {
81	Equal(Range<'a>, Range<'b>),
82	Delete(Range<'a>),
83	Insert(Range<'b>),
84	}
85
86	impl<'tmp, 'a: 'tmp, 'b: 'tmp> Diff<'a, 'b> {
87	fn text(&self) -> Range<'tmp> {
88	match *self {
89	Diff::Equal(range, _) \| Diff::Delete(range) \| Diff::Insert(range) => range,
90	}
91	}
92
93	fn grow_left(&mut self, increment: usize) {
94	self.for_each(\|range\| {
95	range.offset -= increment;
96	range.len += increment;
97	});
98	}
99
100	fn grow_right(&mut self, increment: usize) {
101	self.for_each(\|range\| range.len += increment);
102	}
103
104	fn shift_left(&mut self, increment: usize) {
105	self.for_each(\|range\| range.offset -= increment);
106	}
107
108	fn shift_right(&mut self, increment: usize) {
109	self.for_each(\|range\| range.offset += increment);
110	}
111
112	fn for_each(&mut self, f: impl Fn(&mut Range)) {
113	match self {
114	Diff::Equal(range1, range2) => {
115	f(range1);
116	f(range2);
117	}
118	Diff::Delete(range) => f(range),
119	Diff::Insert(range) => f(range),
120	}
121	}
122	}
123
124	pub fn diff<'a>(text1: &'a str, text2: &'a str) -> Vec<Chunk<'a>> {
125	let chars1: Vec<char> = text1.chars().collect();
126	let chars2: Vec<char> = text2.chars().collect();
127	let range1 = Range::new(&chars1, ..);
128	let range2 = Range::new(&chars2, ..);
129
130	let mut solution = main(range1, range2);
131	cleanup_char_boundary(&mut solution);
132	cleanup_semantic(&mut solution);
133	cleanup_merge(&mut solution);
134
135	let mut chunks = Vec::new();
136	let mut pos1 = `0`;
137	let mut pos2 = `0`;
138	for diff in solution.diffs {
139	chunks.push(match diff {
140	Diff::Equal(range, _) => {
141	let len = range.len_bytes();
142	let chunk = Chunk::Equal(&text1[pos1..pos1 + len]);
143	pos1 += len;
144	pos2 += len;
145	chunk
146	}
147	Diff::Delete(range) => {
148	let len = range.len_bytes();
149	let chunk = Chunk::Delete(&text1[pos1..pos1 + len]);
150	pos1 += len;
151	chunk
152	}
153	Diff::Insert(range) => {
154	let len = range.len_bytes();
155	let chunk = Chunk::Insert(&text2[pos2..pos2 + len]);
156	pos2 += len;
157	chunk
158	}
159	});
160	}
161	chunks
162	}
163
164	struct Solution<'a, 'b> {
165	text1: Range<'a>,
166	text2: Range<'b>,
167	diffs: Vec<Diff<'a, 'b>>,
168	}
169
170	fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> {
171	let whole1 = text1;
172	let whole2 = text2;
173
174	// Trim off common prefix.
175	let common_prefix_len = common_prefix(text1, text2);
176	let common_prefix = Diff::Equal(
177	text1.substring(..common_prefix_len),
178	text2.substring(..common_prefix_len),
179	);
180	text1 = text1.substring(common_prefix_len..);
181	text2 = text2.substring(common_prefix_len..);
182
183	// Trim off common suffix.
184	let common_suffix_len = common_suffix(text1, text2);
185	let common_suffix = Diff::Equal(
186	text1.substring(text1.len - common_suffix_len..),
187	text2.substring(text2.len - common_suffix_len..),
188	);
189	text1 = text1.substring(..text1.len - common_suffix_len);
190	text2 = text2.substring(..text2.len - common_suffix_len);
191
192	// Compute the diff on the middle block.
193	let mut solution = Solution {
194	text1: whole1,
195	text2: whole2,
196	diffs: compute(text1, text2),
197	};
198
199	// Restore the prefix and suffix.
200	if common_prefix_len > `0` {
201	solution.diffs.insert(`0`, common_prefix);
202	}
203	if common_suffix_len > `0` {
204	solution.diffs.push(common_suffix);
205	}
206
207	cleanup_merge(&mut solution);
208
209	solution
210	}
211
212	// Find the differences between two texts. Assumes that the texts do not have
213	// any common prefix or suffix.
214	fn compute<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
215	match (text1.is_empty(), text2.is_empty()) {
216	(`true`, `true`) => return Vec::new(),
217	(`true`, `false`) => return vec![Diff::Insert(text2)],
218	(`false`, `true`) => return vec![Diff::Delete(text1)],
219	(`false`, `false`) => {}
220	}
221
222	// Check for entire shorter text inside the longer text.
223	if text1.len > text2.len {
224	if let Some(i) = text1.find(text2) {
225	return vec![
226	Diff::Delete(text1.substring(..i)),
227	Diff::Equal(text1.substring(i..i + text2.len), text2),
228	Diff::Delete(text1.substring(i + text2.len..)),
229	];
230	}
231	} else {
232	if let Some(i) = text2.find(text1) {
233	return vec![
234	Diff::Insert(text2.substring(..i)),
235	Diff::Equal(text1, text2.substring(i..i + text1.len)),
236	Diff::Insert(text2.substring(i + text1.len..)),
237	];
238	}
239	}
240
241	if text1.len == `1` \|\| text2.len == `1` {
242	// Single character string.
243	// After the previous check, the character can't be an equality.
244	return vec![Diff::Delete(text1), Diff::Insert(text2)];
245	}
246
247	bisect(text1, text2)
248	}
249
250	// Find the 'middle snake' of a diff, split the problem in two and return the
251	// recursively constructed diff.
252	//
253	// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
254	fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
255	let max_d = (text1.len + text2.len + `1`) / `2`;
256	let v_offset = max_d;
257	let v_len = `2` * max_d;
258	let mut v1 = vec![`-1isize`; v_len];
259	let mut v2 = vec![`-1isize`; v_len];
260	v1[v_offset + `1`] = `0`;
261	v2[v_offset + `1`] = `0`;
262	let delta = text1.len as isize - text2.len as isize;
263	// If the total number of characters is odd, then the front path will
264	// collide with the reverse path.
265	let front = delta % `2` != `0`;
266	// Offsets for start and end of k loop.
267	// Prevents mapping of space beyond the grid.
268	let mut k1start = `0`;
269	let mut k1end = `0`;
270	let mut k2start = `0`;
271	let mut k2end = `0`;
272	for d in `0`..max_d as isize {
273	// Walk the front path one step.
274	let mut k1 = -d + k1start;
275	while k1 <= d - k1end {
276	let k1_offset = (v_offset as isize + k1) as usize;
277	let mut x1 = if k1 == -d \|\| (k1 != d && v1[k1_offset - `1`] < v1[k1_offset + `1`]) {
278	v1[k1_offset + `1`]
279	} else {
280	v1[k1_offset - `1`] + `1`
281	} as usize;
282	let mut y1 = (x1 as isize - k1) as usize;
283	if let (Some(s1), Some(s2)) = (text1.get(x1..), text2.get(y1..)) {
284	let advance = common_prefix(s1, s2);
285	x1 += advance;
286	y1 += advance;
287	}
288	v1[k1_offset] = x1 as isize;
289	if x1 > text1.len {
290	// Ran off the right of the graph.
291	k1end += `2`;
292	} else if y1 > text2.len {
293	// Ran off the bottom of the graph.
294	k1start += `2`;
295	} else if front {
296	let k2_offset = v_offset as isize + delta - k1;
297	if k2_offset >= `0` && k2_offset < v_len as isize && v2[k2_offset as usize] != `-1` {
298	// Mirror x2 onto top-left coordinate system.
299	let x2 = text1.len as isize - v2[k2_offset as usize];
300	if x1 as isize >= x2 {
301	// Overlap detected.
302	return bisect_split(text1, text2, x1, y1);
303	}
304	}
305	}
306	k1 += `2`;
307	}
308
309	// Walk the reverse path one step.
310	let mut k2 = -d + k2start;
311	while k2 <= d - k2end {
312	let k2_offset = (v_offset as isize + k2) as usize;
313	let mut x2 = if k2 == -d \|\| (k2 != d && v2[k2_offset - `1`] < v2[k2_offset + `1`]) {
314	v2[k2_offset + `1`]
315	} else {
316	v2[k2_offset - `1`] + `1`
317	} as usize;
318	let mut y2 = (x2 as isize - k2) as usize;
319	if x2 < text1.len && y2 < text2.len {
320	let advance = common_suffix(
321	text1.substring(..text1.len - x2),
322	text2.substring(..text2.len - y2),
323	);
324	x2 += advance;
325	y2 += advance;
326	}
327	v2[k2_offset] = x2 as isize;
328	if x2 > text1.len {
329	// Ran off the left of the graph.
330	k2end += `2`;
331	} else if y2 > text2.len {
332	// Ran off the top of the graph.
333	k2start += `2`;
334	} else if !front {
335	let k1_offset = v_offset as isize + delta - k2;
336	if k1_offset >= `0` && k1_offset < v_len as isize && v1[k1_offset as usize] != `-1` {
337	let x1 = v1[k1_offset as usize] as usize;
338	let y1 = v_offset + x1 - k1_offset as usize;
339	// Mirror x2 onto top-left coordinate system.
340	x2 = text1.len - x2;
341	if x1 >= x2 {
342	// Overlap detected.
343	return bisect_split(text1, text2, x1, y1);
344	}
345	}
346	}
347	k2 += `2`;
348	}
349	}
350	// Number of diffs equals number of characters, no commonality at all.
351	vec![Diff::Delete(text1), Diff::Insert(text2)]
352	}
353
354	// Given the location of the 'middle snake', split the diff in two parts and
355	// recurse.
356	fn bisect_split<'a, 'b>(
357	text1: Range<'a>,
358	text2: Range<'b>,
359	x: usize,
360	y: usize,
361	) -> Vec<Diff<'a, 'b>> {
362	let (text1a: Range<'_>, text1b: Range<'_>) = text1.split_at(mid:x);
363	let (text2a: Range<'_>, text2b: Range<'_>) = text2.split_at(mid:y);
364
365	// Compute both diffs serially.
366	let mut diffs: Vec> = main(text1:text1a, text2:text2a).diffs;
367	diffs.extend(iter:main(text1:text1b, text2:text2b).diffs);
368
369	diffs
370	}
371
372	// Determine the length of the common prefix of two strings.
373	fn common_prefix(text1: Range, text2: Range) -> usize {
374	for (i: usize, (b1: char, b2: char)) in text1.chars().zip(text2.chars()).enumerate() {
375	if b1 != b2 {
376	return i;
377	}
378	}
379	cmp::min(v1:text1.len, v2:text2.len)
380	}
381
382	// Determine the length of the common suffix of two strings.
383	fn common_suffix(text1: Range, text2: Range) -> usize {
384	for (i: usize, (b1: char, b2: char)) in text1.chars().rev().zip(text2.chars().rev()).enumerate() {
385	if b1 != b2 {
386	return i;
387	}
388	}
389	cmp::min(v1:text1.len, v2:text2.len)
390	}
391
392	// Determine if the suffix of one string is the prefix of another.
393	//
394	// Returns the number of characters common to the end of the first string and
395	// the start of the second string.
396	fn common_overlap(mut text1: Range, mut text2: Range) -> usize {
397	// Eliminate the null case.
398	if text1.is_empty() \|\| text2.is_empty() {
399	return `0`;
400	}
401	// Truncate the longer string.
402	if text1.len > text2.len {
403	text1 = text1.substring(text1.len - text2.len..);
404	} else if text1.len < text2.len {
405	text2 = text2.substring(..text1.len);
406	}
407	// Quick check for the worst case.
408	if slice(text1) == slice(text2) {
409	return text1.len;
410	}
411
412	// Start by looking for a single character match
413	// and increase length until no match is found.
414	// Performance analysis: https://neil.fraser.name/news/2010/11/04/
415	let mut best = `0`;
416	let mut length = `1`;
417	loop {
418	let pattern = text1.substring(text1.len - length..);
419	let found = match text2.find(pattern) {
420	Some(found) => found,
421	None => return best,
422	};
423	length += found;
424	if found == `0`
425	\|\| slice(text1.substring(text1.len - length..)) == slice(text2.substring(..length))
426	{
427	best = length;
428	length += `1`;
429	}
430	}
431	}
432
433	fn cleanup_char_boundary(solution: &mut Solution) {
434	fn is_segmentation_boundary(doc: &[char], pos: usize) -> bool {
435	// FIXME: use unicode-segmentation crate?
436	let _ = doc;
437	let _ = pos;
438	`true`
439	}
440
441	fn boundary_down(doc: &[char], pos: usize) -> usize {
442	let mut adjust = `0`;
443	while !is_segmentation_boundary(doc, pos - adjust) {
444	adjust += `1`;
445	}
446	adjust
447	}
448
449	fn boundary_up(doc: &[char], pos: usize) -> usize {
450	let mut adjust = `0`;
451	while !is_segmentation_boundary(doc, pos + adjust) {
452	adjust += `1`;
453	}
454	adjust
455	}
456
457	fn skip_overlap<'a>(prev: &Range<'a>, range: &mut Range<'a>) {
458	let prev_end = prev.offset + prev.len;
459	if prev_end > range.offset {
460	let delta = cmp::min(prev_end - range.offset, range.len);
461	range.offset += delta;
462	range.len -= delta;
463	}
464	}
465
466	let mut read = `0`;
467	let mut retain = `0`;
468	let mut last_delete = Range::empty();
469	let mut last_insert = Range::empty();
470	while let Some(&(mut diff)) = solution.diffs.get(read) {
471	read += `1`;
472	match &mut diff {
473	Diff::Equal(range1, range2) => {
474	let adjust = boundary_up(range1.doc, range1.offset);
475	// If the whole range is sub-character, skip it.
476	if range1.len <= adjust {
477	continue;
478	}
479	range1.offset += adjust;
480	range1.len -= adjust;
481	range2.offset += adjust;
482	range2.len -= adjust;
483	let adjust = boundary_down(range1.doc, range1.offset + range1.len);
484	range1.len -= adjust;
485	range2.len -= adjust;
486	last_delete = Range::empty();
487	last_insert = Range::empty();
488	}
489	Diff::Delete(range) => {
490	skip_overlap(&last_delete, range);
491	if range.len == `0` {
492	continue;
493	}
494	let adjust = boundary_down(range.doc, range.offset);
495	range.offset -= adjust;
496	range.len += adjust;
497	let adjust = boundary_up(range.doc, range.offset + range.len);
498	range.len += adjust;
499	last_delete = *range;
500	}
501	Diff::Insert(range) => {
502	skip_overlap(&last_insert, range);
503	if range.len == `0` {
504	continue;
505	}
506	let adjust = boundary_down(range.doc, range.offset);
507	range.offset -= adjust;
508	range.len += adjust;
509	let adjust = boundary_up(range.doc, range.offset + range.len);
510	range.len += adjust;
511	last_insert = *range;
512	}
513	}
514	solution.diffs[retain] = diff;
515	retain += `1`;
516	}
517
518	solution.diffs.truncate(retain);
519	}
520
521	// Reduce the number of edits by eliminating semantically trivial equalities.
522	fn cleanup_semantic(solution: &mut Solution) {
523	let mut diffs = &mut solution.diffs;
524	if diffs.is_empty() {
525	return;
526	}
527
528	let mut changes = `false`;
529	let mut equalities = VecDeque::new(); // Double-ended queue of equalities.
530	let mut last_equality = None; // Always equal to equalities.peek().text
531	let mut pointer = `0`;
532	// Number of characters that changed prior to the equality.
533	let mut len_insertions1 = `0`;
534	let mut len_deletions1 = `0`;
535	// Number of characters that changed after the equality.
536	let mut len_insertions2 = `0`;
537	let mut len_deletions2 = `0`;
538	while let Some(&this_diff) = diffs.get(pointer) {
539	match this_diff {
540	Diff::Equal(text1, text2) => {
541	equalities.push_back(pointer);
542	len_insertions1 = len_insertions2;
543	len_deletions1 = len_deletions2;
544	len_insertions2 = `0`;
545	len_deletions2 = `0`;
546	last_equality = Some((text1, text2));
547	pointer += `1`;
548	continue;
549	}
550	Diff::Delete(text) => len_deletions2 += text.len,
551	Diff::Insert(text) => len_insertions2 += text.len,
552	}
553	// Eliminate an equality that is smaller or equal to the edits on both
554	// sides of it.
555	if last_equality.map_or(`false`, \|(last_equality, _)\| {
556	last_equality.len <= cmp::max(len_insertions1, len_deletions1)
557	&& last_equality.len <= cmp::max(len_insertions2, len_deletions2)
558	}) {
559	// Jump back to offending equality.
560	pointer = equalities.pop_back().unwrap();
561
562	// Replace equality with a delete.
563	diffs[pointer] = Diff::Delete(last_equality.unwrap().0);
564	// Insert a corresponding insert.
565	diffs.insert(pointer + `1`, Diff::Insert(last_equality.unwrap().1));
566
567	len_insertions1 = `0`; // Reset the counters.
568	len_insertions2 = `0`;
569	len_deletions1 = `0`;
570	len_deletions2 = `0`;
571	last_equality = None;
572	changes = `true`;
573
574	// Throw away the previous equality (it needs to be reevaluated).
575	equalities.pop_back();
576	if let Some(back) = equalities.back() {
577	// There is a safe equality we can fall back to.
578	pointer = *back;
579	} else {
580	// There are no previous equalities, jump back to the start.
581	pointer = `0`;
582	continue;
583	}
584	}
585	pointer += `1`;
586	}
587
588	// Normalize the diff.
589	if changes {
590	cleanup_merge(solution);
591	}
592	cleanup_semantic_lossless(solution);
593	diffs = &mut solution.diffs;
594
595	// Find any overlaps between deletions and insertions.
596	// e.g: <del>abcxxx</del><ins>xxxdef</ins>
597	// -> <del>abc</del>xxx<ins>def</ins>
598	// e.g: <del>xxxabc</del><ins>defxxx</ins>
599	// -> <ins>def</ins>xxx<del>abc</del>
600	// Only extract an overlap if it is as big as the edit ahead or behind it.
601	let mut pointer = `1`;
602	while let Some(&this_diff) = diffs.get(pointer) {
603	let prev_diff = diffs[pointer - `1`];
604	if let (Diff::Delete(deletion), Diff::Insert(insertion)) = (prev_diff, this_diff) {
605	let overlap_len1 = common_overlap(deletion, insertion);
606	let overlap_len2 = common_overlap(insertion, deletion);
607	let overlap_min = cmp::min(deletion.len, insertion.len);
608	if overlap_len1 >= overlap_len2 && `2` * overlap_len1 >= overlap_min {
609	// Overlap found. Insert an equality and trim the surrounding edits.
610	diffs.insert(
611	pointer,
612	Diff::Equal(
613	deletion.substring(deletion.len - overlap_len1..deletion.len),
614	insertion.substring(..overlap_len1),
615	),
616	);
617	diffs[pointer - `1`] =
618	Diff::Delete(deletion.substring(..deletion.len - overlap_len1));
619	diffs[pointer + `1`] = Diff::Insert(insertion.substring(overlap_len1..));
620	} else if overlap_len1 < overlap_len2 && `2` * overlap_len2 >= overlap_min {
621	// Reverse overlap found.
622	// Insert an equality and swap and trim the surrounding edits.
623	diffs.insert(
624	pointer,
625	Diff::Equal(
626	deletion.substring(..overlap_len2),
627	insertion.substring(insertion.len - overlap_len2..insertion.len),
628	),
629	);
630	diffs[pointer - `1`] =
631	Diff::Insert(insertion.substring(..insertion.len - overlap_len2));
632	diffs[pointer + `1`] = Diff::Delete(deletion.substring(overlap_len2..));
633	}
634	pointer += `1`;
635	}
636	pointer += `1`;
637	}
638	}
639
640	// Look for single edits surrounded on both sides by equalities which can be
641	// shifted sideways to align the edit to a word boundary.
642	//
643	// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
644	fn cleanup_semantic_lossless(solution: &mut Solution) {
645	let diffs = &mut solution.diffs;
646	let mut pointer = `1`;
647	while let Some(&next_diff) = diffs.get(pointer + `1`) {
648	let prev_diff = diffs[pointer - `1`];
649	if let (
650	Diff::Equal(mut prev_equal1, mut prev_equal2),
651	Diff::Equal(mut next_equal1, mut next_equal2),
652	) = (prev_diff, next_diff)
653	{
654	// This is a single edit surrounded by equalities.
655	let mut edit = diffs[pointer];
656
657	// First, shift the edit as far left as possible.
658	let common_offset = common_suffix(prev_equal1, edit.text());
659	let original_prev_len = prev_equal1.len;
660	prev_equal1.len -= common_offset;
661	prev_equal2.len -= common_offset;
662	edit.shift_left(common_offset);
663	next_equal1.offset -= common_offset;
664	next_equal1.len += common_offset;
665	next_equal2.offset -= common_offset;
666	next_equal2.len += common_offset;
667
668	// Second, step character by character right, looking for the best fit.
669	let mut best_prev_equal = (prev_equal1, prev_equal2);
670	let mut best_edit = edit;
671	let mut best_next_equal = (next_equal1, next_equal2);
672	let mut best_score = cleanup_semantic_score(prev_equal1, edit.text())
673	+ cleanup_semantic_score(edit.text(), next_equal1);
674	while !edit.text().is_empty()
675	&& !next_equal1.is_empty()
676	&& edit.text().chars().next().unwrap() == next_equal1.chars().next().unwrap()
677	{
678	prev_equal1.len += `1`;
679	prev_equal2.len += `1`;
680	edit.shift_right(`1`);
681	next_equal1.offset += `1`;
682	next_equal1.len -= `1`;
683	next_equal2.offset += `1`;
684	next_equal2.len -= `1`;
685	let score = cleanup_semantic_score(prev_equal1, edit.text())
686	+ cleanup_semantic_score(edit.text(), next_equal1);
687	// The >= encourages trailing rather than leading whitespace on edits.
688	if score >= best_score {
689	best_score = score;
690	best_prev_equal = (prev_equal1, prev_equal2);
691	best_edit = edit;
692	best_next_equal = (next_equal1, next_equal2);
693	}
694	}
695
696	if original_prev_len != best_prev_equal.0.len {
697	// We have an improvement, save it back to the diff.
698	if best_next_equal.0.is_empty() {
699	diffs.remove(pointer + `1`);
700	} else {
701	diffs[pointer + `1`] = Diff::Equal(best_next_equal.0, best_next_equal.1);
702	}
703	diffs[pointer] = best_edit;
704	if best_prev_equal.0.is_empty() {
705	diffs.remove(pointer - `1`);
706	pointer -= `1`;
707	} else {
708	diffs[pointer - `1`] = Diff::Equal(best_prev_equal.0, best_prev_equal.1);
709	}
710	}
711	}
712	pointer += `1`;
713	}
714	}
715
716	// Given two strings, compute a score representing whether the internal boundary
717	// falls on logical boundaries.
718	//
719	// Scores range from 6 (best) to 0 (worst).
720	fn cleanup_semantic_score(one: Range, two: Range) -> usize {
721	if one.is_empty() \|\| two.is_empty() {
722	// Edges are the best.
723	return `6`;
724	}
725
726	// Each port of this function behaves slightly differently due to subtle
727	// differences in each language's definition of things like 'whitespace'.
728	// Since this function's purpose is largely cosmetic, the choice has been
729	// made to use each language's native features rather than force total
730	// conformity.
731	let char1 = one.chars().next_back().unwrap();
732	let char2 = two.chars().next().unwrap();
733	let non_alphanumeric1 = !char1.is_ascii_alphanumeric();
734	let non_alphanumeric2 = !char2.is_ascii_alphanumeric();
735	let whitespace1 = non_alphanumeric1 && char1.is_ascii_whitespace();
736	let whitespace2 = non_alphanumeric2 && char2.is_ascii_whitespace();
737	let line_break1 = whitespace1 && char1.is_control();
738	let line_break2 = whitespace2 && char2.is_control();
739	let blank_line1 =
740	line_break1 && (one.ends_with(['`\n`', '`\n`']) \|\| one.ends_with(['`\n`', '`\r`', '`\n`']));
741	let blank_line2 =
742	line_break2 && (two.starts_with(['`\n`', '`\n`']) \|\| two.starts_with(['`\r`', '`\n`', '`\r`', '`\n`']));
743
744	if blank_line1 \|\| blank_line2 {
745	// Five points for blank lines.
746	`5`
747	} else if line_break1 \|\| line_break2 {
748	// Four points for line breaks.
749	`4`
750	} else if non_alphanumeric1 && !whitespace1 && whitespace2 {
751	// Three points for end of sentences.
752	`3`
753	} else if whitespace1 \|\| whitespace2 {
754	// Two points for whitespace.
755	`2`
756	} else if non_alphanumeric1 \|\| non_alphanumeric2 {
757	// One point for non-alphanumeric.
758	`1`
759	} else {
760	`0`
761	}
762	}
763
764	// Reorder and merge like edit sections. Merge equalities. Any edit section can
765	// move as long as it doesn't cross an equality.
766	fn cleanup_merge(solution: &mut Solution) {
767	let diffs = &mut solution.diffs;
768	while !diffs.is_empty() {
769	diffs.push(Diff::Equal(
770	solution.text1.substring(solution.text1.len..),
771	solution.text2.substring(solution.text2.len..),
772	)); // Add a dummy entry at the end.
773	let mut pointer = `0`;
774	let mut count_delete = `0`;
775	let mut count_insert = `0`;
776	let mut text_delete = Range::empty();
777	let mut text_insert = Range::empty();
778	while let Some(&this_diff) = diffs.get(pointer) {
779	match this_diff {
780	Diff::Insert(text) => {
781	count_insert += `1`;
782	if text_insert.is_empty() {
783	text_insert = text;
784	} else {
785	text_insert.len += text.len;
786	}
787	}
788	Diff::Delete(text) => {
789	count_delete += `1`;
790	if text_delete.is_empty() {
791	text_delete = text;
792	} else {
793	text_delete.len += text.len;
794	}
795	}
796	Diff::Equal(text, _) => {
797	let count_both = count_delete + count_insert;
798	if count_both > `1` {
799	let both_types = count_delete != `0` && count_insert != `0`;
800	// Delete the offending records.
801	diffs.splice(pointer - count_both..pointer, None);
802	pointer -= count_both;
803	if both_types {
804	// Factor out any common prefix.
805	let common_length = common_prefix(text_insert, text_delete);
806	if common_length != `0` {
807	if pointer > `0` {
808	match &mut diffs[pointer - `1`] {
809	Diff::Equal(this_diff1, this_diff2) => {
810	this_diff1.len += common_length;
811	this_diff2.len += common_length;
812	}
813	_ => unreachable!(
814	"previous diff should have been an equality"
815	),
816	}
817	} else {
818	diffs.insert(
819	pointer,
820	Diff::Equal(
821	text_delete.substring(..common_length),
822	text_insert.substring(..common_length),
823	),
824	);
825	pointer += `1`;
826	}
827	text_insert = text_insert.substring(common_length..);
828	text_delete = text_delete.substring(common_length..);
829	}
830	// Factor out any common suffix.
831	let common_length = common_suffix(text_insert, text_delete);
832	if common_length != `0` {
833	diffs[pointer].grow_left(common_length);
834	text_insert.len -= common_length;
835	text_delete.len -= common_length;
836	}
837	}
838	// Insert the merged records.
839	if !text_delete.is_empty() {
840	diffs.insert(pointer, Diff::Delete(text_delete));
841	pointer += `1`;
842	}
843	if !text_insert.is_empty() {
844	diffs.insert(pointer, Diff::Insert(text_insert));
845	pointer += `1`;
846	}
847	} else if pointer > `0` {
848	if let Some(Diff::Equal(prev_equal1, prev_equal2)) =
849	diffs.get_mut(pointer - `1`)
850	{
851	// Merge this equality with the previous one.
852	prev_equal1.len += text.len;
853	prev_equal2.len += text.len;
854	diffs.remove(pointer);
855	pointer -= `1`;
856	}
857	}
858	count_insert = `0`;
859	count_delete = `0`;
860	text_delete = Range::empty();
861	text_insert = Range::empty();
862	}
863	}
864	pointer += `1`;
865	}
866	if diffs.last().unwrap().text().is_empty() {
867	diffs.pop(); // Remove the dummy entry at the end.
868	}
869
870	// Second pass: look for single edits surrounded on both sides by equalities
871	// which can be shifted sideways to eliminate an equality.
872	// e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
873	let mut changes = `false`;
874	let mut pointer = `1`;
875	// Intentionally ignore the first and last element (don't need checking).
876	while let Some(&next_diff) = diffs.get(pointer + `1`) {
877	let prev_diff = diffs[pointer - `1`];
878	let this_diff = diffs[pointer];
879	if let (Diff::Equal(prev_diff, _), Diff::Equal(next_diff, _)) = (prev_diff, next_diff) {
880	// This is a single edit surrounded by equalities.
881	if this_diff.text().ends_with(prev_diff) {
882	// Shift the edit over the previous equality.
883	diffs[pointer].shift_left(prev_diff.len);
884	diffs[pointer + `1`].grow_left(prev_diff.len);
885	diffs.remove(pointer - `1`); // Delete prev_diff.
886	changes = `true`;
887	} else if this_diff.text().starts_with(next_diff) {
888	// Shift the edit over the next equality.
889	diffs[pointer - `1`].grow_right(next_diff.len);
890	diffs[pointer].shift_right(next_diff.len);
891	diffs.remove(pointer + `1`); // Delete next_diff.
892	changes = `true`;
893	}
894	}
895	pointer += `1`;
896	}
897	// If shifts were made, the diff needs reordering and another shift sweep.
898	if !changes {
899	return;
900	}
901	}
902	}
903
904	impl Debug for Chunk<'_> {
905	fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
906	let (name: &str, text: &str) = match *self {
907	Chunk::Equal(text: &str) => ("Equal", text),
908	Chunk::Delete(text: &str) => ("Delete", text),
909	Chunk::Insert(text: &str) => ("Insert", text),
910	};
911	write!(formatter, "{}({:?})", name, text)
912	}
913	}
914
915	impl Debug for Diff<'_, '_> {
916	fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
917	let (name: &str, range: Range<'_>) = match *self {
918	Diff::Equal(range: Range<'_>, _) => ("Equal", range),
919	Diff::Delete(range: Range<'_>) => ("Delete", range),
920	Diff::Insert(range: Range<'_>) => ("Insert", range),
921	};
922	formatter.write_str(data:name)?;
923	formatter.write_str(data:"(`\"`")?;
924	for ch: char in range.chars() {
925	if ch == '`\'`' {
926	// escape_debug turns this into "\'" which is unnecessary.
927	formatter.write_char(ch)?;
928	} else {
929	Display::fmt(&ch.escape_debug(), f:formatter)?;
930	}
931	}
932	formatter.write_str(data:"`\"`)")?;
933	Ok(())
934	}
935	}
936