lib.rs source code [crates/dissimilar-1.0.7/src/lib.rs]

1	//! [![github]](https://github.com/dtolnay/dissimilar)&ensp;[![crates-io]](https://crates.io/crates/dissimilar)&ensp;[![docs-rs]](https://docs.rs/dissimilar)
2	//!
3	//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
4	//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
5	//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs
6	//!
7	//! <br>
8	//!
9	//! ## Diff library with semantic cleanup, based on Google's diff-match-patch
10	//!
11	//! This library is a port of the Diff component of [Diff Match Patch] to Rust.
12	//! The diff implementation is based on [Myers' diff algorithm] but includes
13	//! some [semantic cleanups] to increase human readability by factoring out
14	//! commonalities which are likely to be coincidental.
15	//!
16	//! Diff Match Patch was originally built in 2006 to power Google Docs.
17	//!
18	//! # Interface
19	//!
20	//! Here is the entire API of the Rust implementation. It operates on borrowed
21	//! strings and the return value of the diff algorithm is a vector of chunks
22	//! pointing into slices of those input strings.
23	//!
24	//! ```
25	//! pub enum Chunk<'a> {
26	//! Equal(&'a str),
27	//! Delete(&'a str),
28	//! Insert(&'a str),
29	//! }
30	//!
31	//! # const IGNORE: &str = stringify! {
32	//! pub fn diff(text1: &str, text2: &str) -> Vec<Chunk>;
33	//! # };
34	//! ```
35	//!
36	//! [Diff Match Patch]: https://github.com/google/diff-match-patch
37	//! [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf
38	//! [semantic cleanups]: https://neil.fraser.name/writing/diff/
39
40	#![doc(html_root_url = "https://docs.rs/dissimilar/1.0.7")]
41	#![allow(
42	clippy::blocks_in_if_conditions,
43	clippy::bool_to_int_with_if,
44	clippy::cast_possible_wrap,
45	clippy::cast_sign_loss,
46	clippy::cloned_instead_of_copied, // https://github.com/rust-lang/rust-clippy/issues/7127
47	clippy::collapsible_else_if,
48	clippy::comparison_chain,
49	clippy::items_after_test_module, // https://github.com/rust-lang/rust-clippy/issues/10713
50	clippy::let_underscore_untyped,
51	clippy::match_same_arms,
52	clippy::module_name_repetitions,
53	clippy::must_use_candidate,
54	clippy::new_without_default,
55	clippy::octal_escapes,
56	clippy::shadow_unrelated,
57	clippy::similar_names,
58	clippy::too_many_lines,
59	clippy::unseparated_literal_suffix,
60	unused_parens, // false positive on Some(&(mut diff)) pattern
61	)]
62
63	mod find;
64	mod range;
65
66	#[cfg(test)]
67	mod tests;
68
69	use crate::range::{slice, Range};
70	use std::cmp;
71	use std::collections::VecDeque;
72	use std::fmt::{self, Debug, Display, Write};
73
74	#[derive(Copy, Clone, PartialEq, Eq)]
75	pub enum Chunk<'a> {
76	Equal(&'a str),
77	Delete(&'a str),
78	Insert(&'a str),
79	}
80
81	#[derive(Copy, Clone)]
82	enum Diff<'a, 'b> {
83	Equal(Range<'a>, Range<'b>),
84	Delete(Range<'a>),
85	Insert(Range<'b>),
86	}
87
88	impl<'tmp, 'a: 'tmp, 'b: 'tmp> Diff<'a, 'b> {
89	fn text(&self) -> Range<'tmp> {
90	match *self {
91	Diff::Equal(range, _) \| Diff::Delete(range) \| Diff::Insert(range) => range,
92	}
93	}
94
95	fn grow_left(&mut self, increment: usize) {
96	self.for_each(\|range\| {
97	range.offset -= increment;
98	range.len += increment;
99	});
100	}
101
102	fn grow_right(&mut self, increment: usize) {
103	self.for_each(\|range\| range.len += increment);
104	}
105
106	fn shift_left(&mut self, increment: usize) {
107	self.for_each(\|range\| range.offset -= increment);
108	}
109
110	fn shift_right(&mut self, increment: usize) {
111	self.for_each(\|range\| range.offset += increment);
112	}
113
114	fn for_each(&mut self, f: impl Fn(&mut Range)) {
115	match self {
116	Diff::Equal(range1, range2) => {
117	f(range1);
118	f(range2);
119	}
120	Diff::Delete(range) => f(range),
121	Diff::Insert(range) => f(range),
122	}
123	}
124	}
125
126	pub fn diff<'a>(text1: &'a str, text2: &'a str) -> Vec<Chunk<'a>> {
127	let chars1: Vec<char> = text1.chars().collect();
128	let chars2: Vec<char> = text2.chars().collect();
129	let range1 = Range::new(&chars1, ..);
130	let range2 = Range::new(&chars2, ..);
131
132	let mut solution = main(range1, range2);
133	cleanup_char_boundary(&mut solution);
134	cleanup_semantic(&mut solution);
135	cleanup_merge(&mut solution);
136
137	let mut chunks = Vec::new();
138	let mut pos1 = `0`;
139	let mut pos2 = `0`;
140	for diff in solution.diffs {
141	chunks.push(match diff {
142	Diff::Equal(range, _) => {
143	let len = range.len_bytes();
144	let chunk = Chunk::Equal(&text1[pos1..pos1 + len]);
145	pos1 += len;
146	pos2 += len;
147	chunk
148	}
149	Diff::Delete(range) => {
150	let len = range.len_bytes();
151	let chunk = Chunk::Delete(&text1[pos1..pos1 + len]);
152	pos1 += len;
153	chunk
154	}
155	Diff::Insert(range) => {
156	let len = range.len_bytes();
157	let chunk = Chunk::Insert(&text2[pos2..pos2 + len]);
158	pos2 += len;
159	chunk
160	}
161	});
162	}
163	chunks
164	}
165
166	struct Solution<'a, 'b> {
167	text1: Range<'a>,
168	text2: Range<'b>,
169	diffs: Vec<Diff<'a, 'b>>,
170	}
171
172	fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> {
173	let whole1 = text1;
174	let whole2 = text2;
175
176	// Trim off common prefix.
177	let common_prefix_len = common_prefix(text1, text2);
178	let common_prefix = Diff::Equal(
179	text1.substring(..common_prefix_len),
180	text2.substring(..common_prefix_len),
181	);
182	text1 = text1.substring(common_prefix_len..);
183	text2 = text2.substring(common_prefix_len..);
184
185	// Trim off common suffix.
186	let common_suffix_len = common_suffix(text1, text2);
187	let common_suffix = Diff::Equal(
188	text1.substring(text1.len - common_suffix_len..),
189	text2.substring(text2.len - common_suffix_len..),
190	);
191	text1 = text1.substring(..text1.len - common_suffix_len);
192	text2 = text2.substring(..text2.len - common_suffix_len);
193
194	// Compute the diff on the middle block.
195	let mut solution = Solution {
196	text1: whole1,
197	text2: whole2,
198	diffs: compute(text1, text2),
199	};
200
201	// Restore the prefix and suffix.
202	if common_prefix_len > `0` {
203	solution.diffs.insert(`0`, common_prefix);
204	}
205	if common_suffix_len > `0` {
206	solution.diffs.push(common_suffix);
207	}
208
209	cleanup_merge(&mut solution);
210
211	solution
212	}
213
214	// Find the differences between two texts. Assumes that the texts do not have
215	// any common prefix or suffix.
216	fn compute<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
217	match (text1.is_empty(), text2.is_empty()) {
218	(`true`, `true`) => return Vec::new(),
219	(`true`, `false`) => return vec![Diff::Insert(text2)],
220	(`false`, `true`) => return vec![Diff::Delete(text1)],
221	(`false`, `false`) => {}
222	}
223
224	// Check for entire shorter text inside the longer text.
225	if text1.len > text2.len {
226	if let Some(i) = text1.find(text2) {
227	return vec![
228	Diff::Delete(text1.substring(..i)),
229	Diff::Equal(text1.substring(i..i + text2.len), text2),
230	Diff::Delete(text1.substring(i + text2.len..)),
231	];
232	}
233	} else {
234	if let Some(i) = text2.find(text1) {
235	return vec![
236	Diff::Insert(text2.substring(..i)),
237	Diff::Equal(text1, text2.substring(i..i + text1.len)),
238	Diff::Insert(text2.substring(i + text1.len..)),
239	];
240	}
241	}
242
243	if text1.len == `1` \|\| text2.len == `1` {
244	// Single character string.
245	// After the previous check, the character can't be an equality.
246	return vec![Diff::Delete(text1), Diff::Insert(text2)];
247	}
248
249	bisect(text1, text2)
250	}
251
252	// Find the 'middle snake' of a diff, split the problem in two and return the
253	// recursively constructed diff.
254	//
255	// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
256	fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
257	let max_d = (text1.len + text2.len + `1`) / `2`;
258	let v_offset = max_d;
259	let v_len = `2` * max_d;
260	let mut v1 = vec![`-1isize`; v_len];
261	let mut v2 = vec![`-1isize`; v_len];
262	v1[v_offset + `1`] = `0`;
263	v2[v_offset + `1`] = `0`;
264	let delta = text1.len as isize - text2.len as isize;
265	// If the total number of characters is odd, then the front path will
266	// collide with the reverse path.
267	let front = delta % `2` != `0`;
268	// Offsets for start and end of k loop.
269	// Prevents mapping of space beyond the grid.
270	let mut k1start = `0`;
271	let mut k1end = `0`;
272	let mut k2start = `0`;
273	let mut k2end = `0`;
274	for d in `0`..max_d as isize {
275	// Walk the front path one step.
276	let mut k1 = -d + k1start;
277	while k1 <= d - k1end {
278	let k1_offset = (v_offset as isize + k1) as usize;
279	let mut x1 = if k1 == -d \|\| (k1 != d && v1[k1_offset - `1`] < v1[k1_offset + `1`]) {
280	v1[k1_offset + `1`]
281	} else {
282	v1[k1_offset - `1`] + `1`
283	} as usize;
284	let mut y1 = (x1 as isize - k1) as usize;
285	if let (Some(s1), Some(s2)) = (text1.get(x1..), text2.get(y1..)) {
286	let advance = common_prefix(s1, s2);
287	x1 += advance;
288	y1 += advance;
289	}
290	v1[k1_offset] = x1 as isize;
291	if x1 > text1.len {
292	// Ran off the right of the graph.
293	k1end += `2`;
294	} else if y1 > text2.len {
295	// Ran off the bottom of the graph.
296	k1start += `2`;
297	} else if front {
298	let k2_offset = v_offset as isize + delta - k1;
299	if k2_offset >= `0` && k2_offset < v_len as isize && v2[k2_offset as usize] != `-1` {
300	// Mirror x2 onto top-left coordinate system.
301	let x2 = text1.len as isize - v2[k2_offset as usize];
302	if x1 as isize >= x2 {
303	// Overlap detected.
304	return bisect_split(text1, text2, x1, y1);
305	}
306	}
307	}
308	k1 += `2`;
309	}
310
311	// Walk the reverse path one step.
312	let mut k2 = -d + k2start;
313	while k2 <= d - k2end {
314	let k2_offset = (v_offset as isize + k2) as usize;
315	let mut x2 = if k2 == -d \|\| (k2 != d && v2[k2_offset - `1`] < v2[k2_offset + `1`]) {
316	v2[k2_offset + `1`]
317	} else {
318	v2[k2_offset - `1`] + `1`
319	} as usize;
320	let mut y2 = (x2 as isize - k2) as usize;
321	if x2 < text1.len && y2 < text2.len {
322	let advance = common_suffix(
323	text1.substring(..text1.len - x2),
324	text2.substring(..text2.len - y2),
325	);
326	x2 += advance;
327	y2 += advance;
328	}
329	v2[k2_offset] = x2 as isize;
330	if x2 > text1.len {
331	// Ran off the left of the graph.
332	k2end += `2`;
333	} else if y2 > text2.len {
334	// Ran off the top of the graph.
335	k2start += `2`;
336	} else if !front {
337	let k1_offset = v_offset as isize + delta - k2;
338	if k1_offset >= `0` && k1_offset < v_len as isize && v1[k1_offset as usize] != `-1` {
339	let x1 = v1[k1_offset as usize] as usize;
340	let y1 = v_offset + x1 - k1_offset as usize;
341	// Mirror x2 onto top-left coordinate system.
342	x2 = text1.len - x2;
343	if x1 >= x2 {
344	// Overlap detected.
345	return bisect_split(text1, text2, x1, y1);
346	}
347	}
348	}
349	k2 += `2`;
350	}
351	}
352	// Number of diffs equals number of characters, no commonality at all.
353	vec![Diff::Delete(text1), Diff::Insert(text2)]
354	}
355
356	// Given the location of the 'middle snake', split the diff in two parts and
357	// recurse.
358	fn bisect_split<'a, 'b>(
359	text1: Range<'a>,
360	text2: Range<'b>,
361	x: usize,
362	y: usize,
363	) -> Vec<Diff<'a, 'b>> {
364	let (text1a: Range<'_>, text1b: Range<'_>) = text1.split_at(mid:x);
365	let (text2a: Range<'_>, text2b: Range<'_>) = text2.split_at(mid:y);
366
367	// Compute both diffs serially.
368	let mut diffs: Vec> = main(text1:text1a, text2:text2a).diffs;
369	diffs.extend(iter:main(text1:text1b, text2:text2b).diffs);
370
371	diffs
372	}
373
374	// Determine the length of the common prefix of two strings.
375	fn common_prefix(text1: Range, text2: Range) -> usize {
376	for (i: usize, (b1: char, b2: char)) in text1.chars().zip(text2.chars()).enumerate() {
377	if b1 != b2 {
378	return i;
379	}
380	}
381	cmp::min(v1:text1.len, v2:text2.len)
382	}
383
384	// Determine the length of the common suffix of two strings.
385	fn common_suffix(text1: Range, text2: Range) -> usize {
386	for (i: usize, (b1: char, b2: char)) in text1.chars().rev().zip(text2.chars().rev()).enumerate() {
387	if b1 != b2 {
388	return i;
389	}
390	}
391	cmp::min(v1:text1.len, v2:text2.len)
392	}
393
394	// Determine if the suffix of one string is the prefix of another.
395	//
396	// Returns the number of characters common to the end of the first string and
397	// the start of the second string.
398	fn common_overlap(mut text1: Range, mut text2: Range) -> usize {
399	// Eliminate the null case.
400	if text1.is_empty() \|\| text2.is_empty() {
401	return `0`;
402	}
403	// Truncate the longer string.
404	if text1.len > text2.len {
405	text1 = text1.substring(text1.len - text2.len..);
406	} else if text1.len < text2.len {
407	text2 = text2.substring(..text1.len);
408	}
409	// Quick check for the worst case.
410	if slice(text1) == slice(text2) {
411	return text1.len;
412	}
413
414	// Start by looking for a single character match
415	// and increase length until no match is found.
416	// Performance analysis: https://neil.fraser.name/news/2010/11/04/
417	let mut best = `0`;
418	let mut length = `1`;
419	loop {
420	let pattern = text1.substring(text1.len - length..);
421	let found = match text2.find(pattern) {
422	Some(found) => found,
423	None => return best,
424	};
425	length += found;
426	if found == `0`
427	\|\| slice(text1.substring(text1.len - length..)) == slice(text2.substring(..length))
428	{
429	best = length;
430	length += `1`;
431	}
432	}
433	}
434
435	fn cleanup_char_boundary(solution: &mut Solution) {
436	fn is_segmentation_boundary(doc: &[char], pos: usize) -> bool {
437	// FIXME: use unicode-segmentation crate?
438	let _ = doc;
439	let _ = pos;
440	`true`
441	}
442
443	fn boundary_down(doc: &[char], pos: usize) -> usize {
444	let mut adjust = `0`;
445	while !is_segmentation_boundary(doc, pos - adjust) {
446	adjust += `1`;
447	}
448	adjust
449	}
450
451	fn boundary_up(doc: &[char], pos: usize) -> usize {
452	let mut adjust = `0`;
453	while !is_segmentation_boundary(doc, pos + adjust) {
454	adjust += `1`;
455	}
456	adjust
457	}
458
459	fn skip_overlap<'a>(prev: &Range<'a>, range: &mut Range<'a>) {
460	let prev_end = prev.offset + prev.len;
461	if prev_end > range.offset {
462	let delta = cmp::min(prev_end - range.offset, range.len);
463	range.offset += delta;
464	range.len -= delta;
465	}
466	}
467
468	let mut read = `0`;
469	let mut retain = `0`;
470	let mut last_delete = Range::empty();
471	let mut last_insert = Range::empty();
472	while let Some(&(mut diff)) = solution.diffs.get(read) {
473	read += `1`;
474	match &mut diff {
475	Diff::Equal(range1, range2) => {
476	let adjust = boundary_up(range1.doc, range1.offset);
477	// If the whole range is sub-character, skip it.
478	if range1.len <= adjust {
479	continue;
480	}
481	range1.offset += adjust;
482	range1.len -= adjust;
483	range2.offset += adjust;
484	range2.len -= adjust;
485	let adjust = boundary_down(range1.doc, range1.offset + range1.len);
486	range1.len -= adjust;
487	range2.len -= adjust;
488	last_delete = Range::empty();
489	last_insert = Range::empty();
490	}
491	Diff::Delete(range) => {
492	skip_overlap(&last_delete, range);
493	if range.len == `0` {
494	continue;
495	}
496	let adjust = boundary_down(range.doc, range.offset);
497	range.offset -= adjust;
498	range.len += adjust;
499	let adjust = boundary_up(range.doc, range.offset + range.len);
500	range.len += adjust;
501	last_delete = *range;
502	}
503	Diff::Insert(range) => {
504	skip_overlap(&last_insert, range);
505	if range.len == `0` {
506	continue;
507	}
508	let adjust = boundary_down(range.doc, range.offset);
509	range.offset -= adjust;
510	range.len += adjust;
511	let adjust = boundary_up(range.doc, range.offset + range.len);
512	range.len += adjust;
513	last_insert = *range;
514	}
515	}
516	solution.diffs[retain] = diff;
517	retain += `1`;
518	}
519
520	solution.diffs.truncate(retain);
521	}
522
523	// Reduce the number of edits by eliminating semantically trivial equalities.
524	fn cleanup_semantic(solution: &mut Solution) {
525	let mut diffs = &mut solution.diffs;
526	if diffs.is_empty() {
527	return;
528	}
529
530	let mut changes = `false`;
531	let mut equalities = VecDeque::new(); // Double-ended queue of equalities.
532	let mut last_equality = None; // Always equal to equalities.peek().text
533	let mut pointer = `0`;
534	// Number of characters that changed prior to the equality.
535	let mut len_insertions1 = `0`;
536	let mut len_deletions1 = `0`;
537	// Number of characters that changed after the equality.
538	let mut len_insertions2 = `0`;
539	let mut len_deletions2 = `0`;
540	while let Some(&this_diff) = diffs.get(pointer) {
541	match this_diff {
542	Diff::Equal(text1, text2) => {
543	equalities.push_back(pointer);
544	len_insertions1 = len_insertions2;
545	len_deletions1 = len_deletions2;
546	len_insertions2 = `0`;
547	len_deletions2 = `0`;
548	last_equality = Some((text1, text2));
549	pointer += `1`;
550	continue;
551	}
552	Diff::Delete(text) => len_deletions2 += text.len,
553	Diff::Insert(text) => len_insertions2 += text.len,
554	}
555	// Eliminate an equality that is smaller or equal to the edits on both
556	// sides of it.
557	if last_equality.map_or(`false`, \|(last_equality, _)\| {
558	last_equality.len <= cmp::max(len_insertions1, len_deletions1)
559	&& last_equality.len <= cmp::max(len_insertions2, len_deletions2)
560	}) {
561	// Jump back to offending equality.
562	pointer = equalities.pop_back().unwrap();
563
564	// Replace equality with a delete.
565	diffs[pointer] = Diff::Delete(last_equality.unwrap().0);
566	// Insert a corresponding insert.
567	diffs.insert(pointer + `1`, Diff::Insert(last_equality.unwrap().1));
568
569	len_insertions1 = `0`; // Reset the counters.
570	len_insertions2 = `0`;
571	len_deletions1 = `0`;
572	len_deletions2 = `0`;
573	last_equality = None;
574	changes = `true`;
575
576	// Throw away the previous equality (it needs to be reevaluated).
577	equalities.pop_back();
578	if let Some(back) = equalities.back() {
579	// There is a safe equality we can fall back to.
580	pointer = *back;
581	} else {
582	// There are no previous equalities, jump back to the start.
583	pointer = `0`;
584	continue;
585	}
586	}
587	pointer += `1`;
588	}
589
590	// Normalize the diff.
591	if changes {
592	cleanup_merge(solution);
593	}
594	cleanup_semantic_lossless(solution);
595	diffs = &mut solution.diffs;
596
597	// Find any overlaps between deletions and insertions.
598	// e.g: <del>abcxxx</del><ins>xxxdef</ins>
599	// -> <del>abc</del>xxx<ins>def</ins>
600	// e.g: <del>xxxabc</del><ins>defxxx</ins>
601	// -> <ins>def</ins>xxx<del>abc</del>
602	// Only extract an overlap if it is as big as the edit ahead or behind it.
603	let mut pointer = `1`;
604	while let Some(&this_diff) = diffs.get(pointer) {
605	let prev_diff = diffs[pointer - `1`];
606	if let (Diff::Delete(deletion), Diff::Insert(insertion)) = (prev_diff, this_diff) {
607	let overlap_len1 = common_overlap(deletion, insertion);
608	let overlap_len2 = common_overlap(insertion, deletion);
609	let overlap_min = cmp::min(deletion.len, insertion.len);
610	if overlap_len1 >= overlap_len2 && `2` * overlap_len1 >= overlap_min {
611	// Overlap found. Insert an equality and trim the surrounding edits.
612	diffs.insert(
613	pointer,
614	Diff::Equal(
615	deletion.substring(deletion.len - overlap_len1..deletion.len),
616	insertion.substring(..overlap_len1),
617	),
618	);
619	diffs[pointer - `1`] =
620	Diff::Delete(deletion.substring(..deletion.len - overlap_len1));
621	diffs[pointer + `1`] = Diff::Insert(insertion.substring(overlap_len1..));
622	} else if overlap_len1 < overlap_len2 && `2` * overlap_len2 >= overlap_min {
623	// Reverse overlap found.
624	// Insert an equality and swap and trim the surrounding edits.
625	diffs.insert(
626	pointer,
627	Diff::Equal(
628	deletion.substring(..overlap_len2),
629	insertion.substring(insertion.len - overlap_len2..insertion.len),
630	),
631	);
632	diffs[pointer - `1`] =
633	Diff::Insert(insertion.substring(..insertion.len - overlap_len2));
634	diffs[pointer + `1`] = Diff::Delete(deletion.substring(overlap_len2..));
635	}
636	pointer += `1`;
637	}
638	pointer += `1`;
639	}
640	}
641
642	// Look for single edits surrounded on both sides by equalities which can be
643	// shifted sideways to align the edit to a word boundary.
644	//
645	// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
646	fn cleanup_semantic_lossless(solution: &mut Solution) {
647	let diffs = &mut solution.diffs;
648	let mut pointer = `1`;
649	while let Some(&next_diff) = diffs.get(pointer + `1`) {
650	let prev_diff = diffs[pointer - `1`];
651	if let (
652	Diff::Equal(mut prev_equal1, mut prev_equal2),
653	Diff::Equal(mut next_equal1, mut next_equal2),
654	) = (prev_diff, next_diff)
655	{
656	// This is a single edit surrounded by equalities.
657	let mut edit = diffs[pointer];
658
659	// First, shift the edit as far left as possible.
660	let common_offset = common_suffix(prev_equal1, edit.text());
661	let original_prev_len = prev_equal1.len;
662	prev_equal1.len -= common_offset;
663	prev_equal2.len -= common_offset;
664	edit.shift_left(common_offset);
665	next_equal1.offset -= common_offset;
666	next_equal1.len += common_offset;
667	next_equal2.offset -= common_offset;
668	next_equal2.len += common_offset;
669
670	// Second, step character by character right, looking for the best fit.
671	let mut best_prev_equal = (prev_equal1, prev_equal2);
672	let mut best_edit = edit;
673	let mut best_next_equal = (next_equal1, next_equal2);
674	let mut best_score = cleanup_semantic_score(prev_equal1, edit.text())
675	+ cleanup_semantic_score(edit.text(), next_equal1);
676	while !edit.text().is_empty()
677	&& !next_equal1.is_empty()
678	&& edit.text().chars().next().unwrap() == next_equal1.chars().next().unwrap()
679	{
680	prev_equal1.len += `1`;
681	prev_equal2.len += `1`;
682	edit.shift_right(`1`);
683	next_equal1.offset += `1`;
684	next_equal1.len -= `1`;
685	next_equal2.offset += `1`;
686	next_equal2.len -= `1`;
687	let score = cleanup_semantic_score(prev_equal1, edit.text())
688	+ cleanup_semantic_score(edit.text(), next_equal1);
689	// The >= encourages trailing rather than leading whitespace on edits.
690	if score >= best_score {
691	best_score = score;
692	best_prev_equal = (prev_equal1, prev_equal2);
693	best_edit = edit;
694	best_next_equal = (next_equal1, next_equal2);
695	}
696	}
697
698	if original_prev_len != best_prev_equal.0.len {
699	// We have an improvement, save it back to the diff.
700	if best_next_equal.0.is_empty() {
701	diffs.remove(pointer + `1`);
702	} else {
703	diffs[pointer + `1`] = Diff::Equal(best_next_equal.0, best_next_equal.1);
704	}
705	diffs[pointer] = best_edit;
706	if best_prev_equal.0.is_empty() {
707	diffs.remove(pointer - `1`);
708	pointer -= `1`;
709	} else {
710	diffs[pointer - `1`] = Diff::Equal(best_prev_equal.0, best_prev_equal.1);
711	}
712	}
713	}
714	pointer += `1`;
715	}
716	}
717
718	// Given two strings, compute a score representing whether the internal boundary
719	// falls on logical boundaries.
720	//
721	// Scores range from 6 (best) to 0 (worst).
722	fn cleanup_semantic_score(one: Range, two: Range) -> usize {
723	if one.is_empty() \|\| two.is_empty() {
724	// Edges are the best.
725	return `6`;
726	}
727
728	// Each port of this function behaves slightly differently due to subtle
729	// differences in each language's definition of things like 'whitespace'.
730	// Since this function's purpose is largely cosmetic, the choice has been
731	// made to use each language's native features rather than force total
732	// conformity.
733	let char1 = one.chars().next_back().unwrap();
734	let char2 = two.chars().next().unwrap();
735	let non_alphanumeric1 = !char1.is_ascii_alphanumeric();
736	let non_alphanumeric2 = !char2.is_ascii_alphanumeric();
737	let whitespace1 = non_alphanumeric1 && char1.is_ascii_whitespace();
738	let whitespace2 = non_alphanumeric2 && char2.is_ascii_whitespace();
739	let line_break1 = whitespace1 && char1.is_control();
740	let line_break2 = whitespace2 && char2.is_control();
741	let blank_line1 =
742	line_break1 && (one.ends_with(['`\n`', '`\n`']) \|\| one.ends_with(['`\n`', '`\r`', '`\n`']));
743	let blank_line2 =
744	line_break2 && (two.starts_with(['`\n`', '`\n`']) \|\| two.starts_with(['`\r`', '`\n`', '`\r`', '`\n`']));
745
746	if blank_line1 \|\| blank_line2 {
747	// Five points for blank lines.
748	`5`
749	} else if line_break1 \|\| line_break2 {
750	// Four points for line breaks.
751	`4`
752	} else if non_alphanumeric1 && !whitespace1 && whitespace2 {
753	// Three points for end of sentences.
754	`3`
755	} else if whitespace1 \|\| whitespace2 {
756	// Two points for whitespace.
757	`2`
758	} else if non_alphanumeric1 \|\| non_alphanumeric2 {
759	// One point for non-alphanumeric.
760	`1`
761	} else {
762	`0`
763	}
764	}
765
766	// Reorder and merge like edit sections. Merge equalities. Any edit section can
767	// move as long as it doesn't cross an equality.
768	fn cleanup_merge(solution: &mut Solution) {
769	let diffs = &mut solution.diffs;
770	while !diffs.is_empty() {
771	diffs.push(Diff::Equal(
772	solution.text1.substring(solution.text1.len..),
773	solution.text2.substring(solution.text2.len..),
774	)); // Add a dummy entry at the end.
775	let mut pointer = `0`;
776	let mut count_delete = `0`;
777	let mut count_insert = `0`;
778	let mut text_delete = Range::empty();
779	let mut text_insert = Range::empty();
780	while let Some(&this_diff) = diffs.get(pointer) {
781	match this_diff {
782	Diff::Insert(text) => {
783	count_insert += `1`;
784	if text_insert.is_empty() {
785	text_insert = text;
786	} else {
787	text_insert.len += text.len;
788	}
789	}
790	Diff::Delete(text) => {
791	count_delete += `1`;
792	if text_delete.is_empty() {
793	text_delete = text;
794	} else {
795	text_delete.len += text.len;
796	}
797	}
798	Diff::Equal(text, _) => {
799	let count_both = count_delete + count_insert;
800	if count_both > `1` {
801	let both_types = count_delete != `0` && count_insert != `0`;
802	// Delete the offending records.
803	diffs.splice(pointer - count_both..pointer, None);
804	pointer -= count_both;
805	if both_types {
806	// Factor out any common prefix.
807	let common_length = common_prefix(text_insert, text_delete);
808	if common_length != `0` {
809	if pointer > `0` {
810	match &mut diffs[pointer - `1`] {
811	Diff::Equal(this_diff1, this_diff2) => {
812	this_diff1.len += common_length;
813	this_diff2.len += common_length;
814	}
815	_ => unreachable!(
816	"previous diff should have been an equality"
817	),
818	}
819	} else {
820	diffs.insert(
821	pointer,
822	Diff::Equal(
823	text_delete.substring(..common_length),
824	text_insert.substring(..common_length),
825	),
826	);
827	pointer += `1`;
828	}
829	text_insert = text_insert.substring(common_length..);
830	text_delete = text_delete.substring(common_length..);
831	}
832	// Factor out any common suffix.
833	let common_length = common_suffix(text_insert, text_delete);
834	if common_length != `0` {
835	diffs[pointer].grow_left(common_length);
836	text_insert.len -= common_length;
837	text_delete.len -= common_length;
838	}
839	}
840	// Insert the merged records.
841	if !text_delete.is_empty() {
842	diffs.insert(pointer, Diff::Delete(text_delete));
843	pointer += `1`;
844	}
845	if !text_insert.is_empty() {
846	diffs.insert(pointer, Diff::Insert(text_insert));
847	pointer += `1`;
848	}
849	} else if pointer > `0` {
850	if let Some(Diff::Equal(prev_equal1, prev_equal2)) =
851	diffs.get_mut(pointer - `1`)
852	{
853	// Merge this equality with the previous one.
854	prev_equal1.len += text.len;
855	prev_equal2.len += text.len;
856	diffs.remove(pointer);
857	pointer -= `1`;
858	}
859	}
860	count_insert = `0`;
861	count_delete = `0`;
862	text_delete = Range::empty();
863	text_insert = Range::empty();
864	}
865	}
866	pointer += `1`;
867	}
868	if diffs.last().unwrap().text().is_empty() {
869	diffs.pop(); // Remove the dummy entry at the end.
870	}
871
872	// Second pass: look for single edits surrounded on both sides by equalities
873	// which can be shifted sideways to eliminate an equality.
874	// e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
875	let mut changes = `false`;
876	let mut pointer = `1`;
877	// Intentionally ignore the first and last element (don't need checking).
878	while let Some(&next_diff) = diffs.get(pointer + `1`) {
879	let prev_diff = diffs[pointer - `1`];
880	let this_diff = diffs[pointer];
881	if let (Diff::Equal(prev_diff, _), Diff::Equal(next_diff, _)) = (prev_diff, next_diff) {
882	// This is a single edit surrounded by equalities.
883	if this_diff.text().ends_with(prev_diff) {
884	// Shift the edit over the previous equality.
885	diffs[pointer].shift_left(prev_diff.len);
886	diffs[pointer + `1`].grow_left(prev_diff.len);
887	diffs.remove(pointer - `1`); // Delete prev_diff.
888	changes = `true`;
889	} else if this_diff.text().starts_with(next_diff) {
890	// Shift the edit over the next equality.
891	diffs[pointer - `1`].grow_right(next_diff.len);
892	diffs[pointer].shift_right(next_diff.len);
893	diffs.remove(pointer + `1`); // Delete next_diff.
894	changes = `true`;
895	}
896	}
897	pointer += `1`;
898	}
899	// If shifts were made, the diff needs reordering and another shift sweep.
900	if !changes {
901	return;
902	}
903	}
904	}
905
906	impl Debug for Chunk<'_> {
907	fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
908	let (name: &str, text: &str) = match *self {
909	Chunk::Equal(text: &str) => ("Equal", text),
910	Chunk::Delete(text: &str) => ("Delete", text),
911	Chunk::Insert(text: &str) => ("Insert", text),
912	};
913	write!(formatter, "{}({:?})", name, text)
914	}
915	}
916
917	impl Debug for Diff<'_, '_> {
918	fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
919	let (name: &str, range: Range<'_>) = match *self {
920	Diff::Equal(range: Range<'_>, _) => ("Equal", range),
921	Diff::Delete(range: Range<'_>) => ("Delete", range),
922	Diff::Insert(range: Range<'_>) => ("Insert", range),
923	};
924	formatter.write_str(data:name)?;
925	formatter.write_str(data:"(`\"`")?;
926	for ch: char in range.chars() {
927	if ch == '`\'`' {
928	// escape_debug turns this into "\'" which is unnecessary.
929	formatter.write_char(ch)?;
930	} else {
931	Display::fmt(&ch.escape_debug(), f:formatter)?;
932	}
933	}
934	formatter.write_str(data:"`\"`)")?;
935	Ok(())
936	}
937	}
938

Provided by KDAB

Definitions