1use sequencematcher::SequenceMatcher;
2use std::cmp;
3use utils::{count_leading, str_with_similar_chars};
4
5#[derive(Default)]
6pub struct Differ {
7 pub line_junk: Option<fn(&&str) -> bool>,
8 pub char_junk: Option<fn(&char) -> bool>,
9}
10
11impl Differ {
12 pub fn new() -> Differ {
13 Differ {
14 line_junk: None,
15 char_junk: None,
16 }
17 }
18
19 pub fn compare(&self, first_sequence: &[&str], second_sequence: &[&str]) -> Vec<String> {
20 let mut matcher = SequenceMatcher::new(first_sequence, second_sequence);
21 matcher.set_is_junk(self.line_junk);
22 let mut res = Vec::new();
23 for opcode in matcher.get_opcodes() {
24 let mut gen = Vec::new();
25 match opcode.tag.as_ref() {
26 "replace" => {
27 gen = self.fancy_replace(
28 first_sequence,
29 opcode.first_start,
30 opcode.first_end,
31 second_sequence,
32 opcode.second_start,
33 opcode.second_end,
34 )
35 }
36 "delete" => {
37 gen = self.dump("-", first_sequence, opcode.first_start, opcode.first_end)
38 }
39 "insert" => {
40 gen = self.dump("+", second_sequence, opcode.second_start, opcode.second_end)
41 }
42 "equal" => {
43 gen = self.dump(" ", first_sequence, opcode.first_start, opcode.first_end)
44 }
45 _ => {}
46 }
47 for i in gen {
48 res.push(i);
49 }
50 }
51 res
52 }
53
54 fn dump(&self, tag: &str, sequence: &[&str], start: usize, end: usize) -> Vec<String> {
55 let mut res = Vec::new();
56 for i in start..end {
57 if let Some(s) = sequence.get(i) {
58 res.push(format!("{} {}", tag, s))
59 }
60 }
61 res
62 }
63
64 fn plain_replace(
65 &self,
66 first_sequence: &[&str],
67 first_start: usize,
68 first_end: usize,
69 second_sequence: &[&str],
70 second_start: usize,
71 second_end: usize,
72 ) -> Vec<String> {
73 if !(first_start < first_end && second_start < second_end) {
74 return Vec::new();
75 }
76 let (mut first, second) = if second_end - second_start < first_end - first_start {
77 (
78 self.dump("+", second_sequence, second_start, second_end),
79 self.dump("-", first_sequence, first_start, first_end),
80 )
81 } else {
82 (
83 self.dump("-", first_sequence, first_start, first_end),
84 self.dump("+", second_sequence, second_start, second_end),
85 )
86 };
87 for s in second {
88 first.push(s);
89 }
90 first
91 }
92
93 fn fancy_replace(
94 &self,
95 first_sequence: &[&str],
96 first_start: usize,
97 first_end: usize,
98 second_sequence: &[&str],
99 second_start: usize,
100 second_end: usize,
101 ) -> Vec<String> {
102 let mut res = Vec::new();
103 let (mut best_ratio, cutoff) = (0.74, 0.75);
104 let (mut best_i, mut best_j) = (0, 0);
105 let mut eqi: Option<usize> = None;
106 let mut eqj: Option<usize> = None;
107 for (j, second_sequence_str) in second_sequence
108 .iter()
109 .enumerate()
110 .take(second_end)
111 .skip(second_start)
112 {
113 for (i, first_sequence_str) in first_sequence
114 .iter()
115 .enumerate()
116 .take(second_end)
117 .skip(second_start)
118 {
119 if first_sequence_str == second_sequence_str {
120 if eqi.is_none() {
121 eqi = Some(i);
122 eqj = Some(j);
123 }
124 continue;
125 }
126 let (first_sequence_chars, second_sequence_chars) = (
127 first_sequence_str.chars().collect::<Vec<char>>(),
128 second_sequence_str.chars().collect::<Vec<char>>(),
129 );
130 let mut cruncher =
131 SequenceMatcher::new(&first_sequence_chars, &second_sequence_chars);
132 cruncher.set_is_junk(self.char_junk);
133 if cruncher.ratio() > best_ratio {
134 best_ratio = cruncher.ratio();
135 best_i = i;
136 best_j = j;
137 }
138 }
139 }
140 if best_ratio < cutoff {
141 if eqi.is_none() {
142 res.extend(
143 self.plain_replace(
144 first_sequence,
145 first_start,
146 first_end,
147 second_sequence,
148 second_start,
149 second_end,
150 ).iter()
151 .cloned(),
152 );
153 return res;
154 }
155 best_i = eqi.unwrap();
156 best_j = eqj.unwrap();
157 } else {
158 eqi = None;
159 }
160 res.extend(
161 self.fancy_helper(
162 first_sequence,
163 first_start,
164 best_i,
165 second_sequence,
166 second_start,
167 best_j,
168 ).iter()
169 .cloned(),
170 );
171 let first_element = &first_sequence[best_i];
172 let second_element = &second_sequence[best_j];
173 if eqi.is_none() {
174 let (mut first_tag, mut second_tag) = (String::new(), String::new());
175 let first_element_chars: Vec<char> = first_element.chars().collect();
176 let second_element_chars: Vec<char> = second_element.chars().collect();
177 let mut cruncher = SequenceMatcher::new(&first_element_chars, &second_element_chars);
178 cruncher.set_is_junk(self.char_junk);
179 for opcode in &cruncher.get_opcodes() {
180 let (first_length, second_length) = (
181 opcode.first_end - opcode.first_start,
182 opcode.second_end - opcode.second_start,
183 );
184 match opcode.tag.as_ref() {
185 "replace" => {
186 first_tag.push_str(&str_with_similar_chars('^', first_length));
187 second_tag.push_str(&str_with_similar_chars('^', second_length));
188 }
189 "delete" => {
190 first_tag.push_str(&str_with_similar_chars('-', first_length));
191 }
192 "insert" => {
193 second_tag.push_str(&str_with_similar_chars('+', second_length));
194 }
195 "equal" => {
196 first_tag.push_str(&str_with_similar_chars(' ', first_length));
197 second_tag.push_str(&str_with_similar_chars(' ', second_length));
198 }
199 _ => {}
200 }
201 }
202 res.extend(
203 self.qformat(&first_element, &second_element, &first_tag, &second_tag)
204 .iter()
205 .cloned(),
206 );
207 } else {
208 let mut s = String::from(" ");
209 s.push_str(&first_element);
210 res.push(s);
211 }
212 res.extend(
213 self.fancy_helper(
214 first_sequence,
215 best_i + 1,
216 first_end,
217 second_sequence,
218 best_j + 1,
219 second_end,
220 ).iter()
221 .cloned(),
222 );
223 res
224 }
225
226 fn fancy_helper(
227 &self,
228 first_sequence: &[&str],
229 first_start: usize,
230 first_end: usize,
231 second_sequence: &[&str],
232 second_start: usize,
233 second_end: usize,
234 ) -> Vec<String> {
235 let mut res = Vec::new();
236 if first_start < first_end {
237 if second_start < second_end {
238 res = self.fancy_replace(
239 first_sequence,
240 first_start,
241 first_end,
242 second_sequence,
243 second_start,
244 second_end,
245 );
246 } else {
247 res = self.dump("-", first_sequence, first_start, first_end);
248 }
249 } else if second_start < second_end {
250 res = self.dump("+", second_sequence, second_start, second_end);
251 }
252 res
253 }
254
255 fn qformat(
256 &self,
257 first_line: &str,
258 second_line: &str,
259 first_tags: &str,
260 second_tags: &str,
261 ) -> Vec<String> {
262 let mut res = Vec::new();
263 let mut first_tags = first_tags;
264 let mut second_tags = second_tags;
265 let mut common = cmp::min(
266 count_leading(first_line, '\t'),
267 count_leading(second_line, '\t'),
268 );
269 common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
270 common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
271 first_tags = first_tags.split_at(common).1.trim_right();
272 second_tags = second_tags.split_at(common).1.trim_right();
273 let mut s = format!("- {}", first_line);
274 res.push(s);
275 if first_tags != "" {
276 s = format!("? {}{}\n", str_with_similar_chars('\t', common), first_tags);
277 res.push(s);
278 }
279 s = format!("+ {}", second_line);
280 res.push(s);
281 if second_tags != "" {
282 s = format!(
283 "? {}{}\n",
284 str_with_similar_chars('\t', common),
285 second_tags
286 );
287 res.push(s);
288 }
289 res
290 }
291
292 pub fn restore(delta: &[String], which: usize) -> Vec<String> {
293 if !(which == 1 || which == 2) {
294 panic!("Second parameter must be 1 or 2");
295 }
296 let mut res = Vec::new();
297 let tag = if which == 1 { "- " } else { "+ " }.to_string();
298 let prefixes = vec![tag, " ".to_string()];
299 for line in delta {
300 for prefix in &prefixes {
301 if line.starts_with(prefix) {
302 res.push(line.split_at(2).1.to_string());
303 }
304 }
305 }
306 res
307 }
308}
309
310#[test]
311fn test_fancy_replace() {
312 let differ = Differ::new();
313 let result = differ
314 .fancy_replace(&vec!["abcDefghiJkl\n"], 0, 1, &vec!["abcdefGhijkl\n"], 0, 1)
315 .join("");
316 assert_eq!(
317 result,
318 "- abcDefghiJkl\n? ^ ^ ^\n+ abcdefGhijkl\n? ^ ^ ^\n"
319 );
320}
321
322#[test]
323fn test_qformat() {
324 let differ = Differ::new();
325 let result = differ.qformat(
326 "\tabcDefghiJkl\n",
327 "\tabcdefGhijkl\n",
328 " ^ ^ ^ ",
329 " ^ ^ ^ ",
330 );
331 assert_eq!(
332 result,
333 vec![
334 "- \tabcDefghiJkl\n",
335 "? \t ^ ^ ^\n",
336 "+ \tabcdefGhijkl\n",
337 "? \t ^ ^ ^\n",
338 ]
339 );
340}
341