1 | use sequencematcher::SequenceMatcher; |
2 | use std::cmp; |
3 | use utils::{count_leading, str_with_similar_chars}; |
4 | |
5 | #[derive(Default)] |
6 | pub struct Differ { |
7 | pub line_junk: Option<fn(&&str) -> bool>, |
8 | pub char_junk: Option<fn(&char) -> bool>, |
9 | } |
10 | |
11 | impl Differ { |
12 | pub fn new() -> Differ { |
13 | Differ { |
14 | line_junk: None, |
15 | char_junk: None, |
16 | } |
17 | } |
18 | |
19 | pub fn compare(&self, first_sequence: &[&str], second_sequence: &[&str]) -> Vec<String> { |
20 | let mut matcher = SequenceMatcher::new(first_sequence, second_sequence); |
21 | matcher.set_is_junk(self.line_junk); |
22 | let mut res = Vec::new(); |
23 | for opcode in matcher.get_opcodes() { |
24 | let mut gen = Vec::new(); |
25 | match opcode.tag.as_ref() { |
26 | "replace" => { |
27 | gen = self.fancy_replace( |
28 | first_sequence, |
29 | opcode.first_start, |
30 | opcode.first_end, |
31 | second_sequence, |
32 | opcode.second_start, |
33 | opcode.second_end, |
34 | ) |
35 | } |
36 | "delete" => { |
37 | gen = self.dump("-" , first_sequence, opcode.first_start, opcode.first_end) |
38 | } |
39 | "insert" => { |
40 | gen = self.dump("+" , second_sequence, opcode.second_start, opcode.second_end) |
41 | } |
42 | "equal" => { |
43 | gen = self.dump(" " , first_sequence, opcode.first_start, opcode.first_end) |
44 | } |
45 | _ => {} |
46 | } |
47 | for i in gen { |
48 | res.push(i); |
49 | } |
50 | } |
51 | res |
52 | } |
53 | |
54 | fn dump(&self, tag: &str, sequence: &[&str], start: usize, end: usize) -> Vec<String> { |
55 | let mut res = Vec::new(); |
56 | for i in start..end { |
57 | if let Some(s) = sequence.get(i) { |
58 | res.push(format!("{} {}" , tag, s)) |
59 | } |
60 | } |
61 | res |
62 | } |
63 | |
64 | fn plain_replace( |
65 | &self, |
66 | first_sequence: &[&str], |
67 | first_start: usize, |
68 | first_end: usize, |
69 | second_sequence: &[&str], |
70 | second_start: usize, |
71 | second_end: usize, |
72 | ) -> Vec<String> { |
73 | if !(first_start < first_end && second_start < second_end) { |
74 | return Vec::new(); |
75 | } |
76 | let (mut first, second) = if second_end - second_start < first_end - first_start { |
77 | ( |
78 | self.dump("+" , second_sequence, second_start, second_end), |
79 | self.dump("-" , first_sequence, first_start, first_end), |
80 | ) |
81 | } else { |
82 | ( |
83 | self.dump("-" , first_sequence, first_start, first_end), |
84 | self.dump("+" , second_sequence, second_start, second_end), |
85 | ) |
86 | }; |
87 | for s in second { |
88 | first.push(s); |
89 | } |
90 | first |
91 | } |
92 | |
93 | fn fancy_replace( |
94 | &self, |
95 | first_sequence: &[&str], |
96 | first_start: usize, |
97 | first_end: usize, |
98 | second_sequence: &[&str], |
99 | second_start: usize, |
100 | second_end: usize, |
101 | ) -> Vec<String> { |
102 | let mut res = Vec::new(); |
103 | let (mut best_ratio, cutoff) = (0.74, 0.75); |
104 | let (mut best_i, mut best_j) = (0, 0); |
105 | let mut eqi: Option<usize> = None; |
106 | let mut eqj: Option<usize> = None; |
107 | for (j, second_sequence_str) in second_sequence |
108 | .iter() |
109 | .enumerate() |
110 | .take(second_end) |
111 | .skip(second_start) |
112 | { |
113 | for (i, first_sequence_str) in first_sequence |
114 | .iter() |
115 | .enumerate() |
116 | .take(second_end) |
117 | .skip(second_start) |
118 | { |
119 | if first_sequence_str == second_sequence_str { |
120 | if eqi.is_none() { |
121 | eqi = Some(i); |
122 | eqj = Some(j); |
123 | } |
124 | continue; |
125 | } |
126 | let (first_sequence_chars, second_sequence_chars) = ( |
127 | first_sequence_str.chars().collect::<Vec<char>>(), |
128 | second_sequence_str.chars().collect::<Vec<char>>(), |
129 | ); |
130 | let mut cruncher = |
131 | SequenceMatcher::new(&first_sequence_chars, &second_sequence_chars); |
132 | cruncher.set_is_junk(self.char_junk); |
133 | if cruncher.ratio() > best_ratio { |
134 | best_ratio = cruncher.ratio(); |
135 | best_i = i; |
136 | best_j = j; |
137 | } |
138 | } |
139 | } |
140 | if best_ratio < cutoff { |
141 | if eqi.is_none() { |
142 | res.extend( |
143 | self.plain_replace( |
144 | first_sequence, |
145 | first_start, |
146 | first_end, |
147 | second_sequence, |
148 | second_start, |
149 | second_end, |
150 | ).iter() |
151 | .cloned(), |
152 | ); |
153 | return res; |
154 | } |
155 | best_i = eqi.unwrap(); |
156 | best_j = eqj.unwrap(); |
157 | } else { |
158 | eqi = None; |
159 | } |
160 | res.extend( |
161 | self.fancy_helper( |
162 | first_sequence, |
163 | first_start, |
164 | best_i, |
165 | second_sequence, |
166 | second_start, |
167 | best_j, |
168 | ).iter() |
169 | .cloned(), |
170 | ); |
171 | let first_element = &first_sequence[best_i]; |
172 | let second_element = &second_sequence[best_j]; |
173 | if eqi.is_none() { |
174 | let (mut first_tag, mut second_tag) = (String::new(), String::new()); |
175 | let first_element_chars: Vec<char> = first_element.chars().collect(); |
176 | let second_element_chars: Vec<char> = second_element.chars().collect(); |
177 | let mut cruncher = SequenceMatcher::new(&first_element_chars, &second_element_chars); |
178 | cruncher.set_is_junk(self.char_junk); |
179 | for opcode in &cruncher.get_opcodes() { |
180 | let (first_length, second_length) = ( |
181 | opcode.first_end - opcode.first_start, |
182 | opcode.second_end - opcode.second_start, |
183 | ); |
184 | match opcode.tag.as_ref() { |
185 | "replace" => { |
186 | first_tag.push_str(&str_with_similar_chars('^' , first_length)); |
187 | second_tag.push_str(&str_with_similar_chars('^' , second_length)); |
188 | } |
189 | "delete" => { |
190 | first_tag.push_str(&str_with_similar_chars('-' , first_length)); |
191 | } |
192 | "insert" => { |
193 | second_tag.push_str(&str_with_similar_chars('+' , second_length)); |
194 | } |
195 | "equal" => { |
196 | first_tag.push_str(&str_with_similar_chars(' ' , first_length)); |
197 | second_tag.push_str(&str_with_similar_chars(' ' , second_length)); |
198 | } |
199 | _ => {} |
200 | } |
201 | } |
202 | res.extend( |
203 | self.qformat(&first_element, &second_element, &first_tag, &second_tag) |
204 | .iter() |
205 | .cloned(), |
206 | ); |
207 | } else { |
208 | let mut s = String::from(" " ); |
209 | s.push_str(&first_element); |
210 | res.push(s); |
211 | } |
212 | res.extend( |
213 | self.fancy_helper( |
214 | first_sequence, |
215 | best_i + 1, |
216 | first_end, |
217 | second_sequence, |
218 | best_j + 1, |
219 | second_end, |
220 | ).iter() |
221 | .cloned(), |
222 | ); |
223 | res |
224 | } |
225 | |
226 | fn fancy_helper( |
227 | &self, |
228 | first_sequence: &[&str], |
229 | first_start: usize, |
230 | first_end: usize, |
231 | second_sequence: &[&str], |
232 | second_start: usize, |
233 | second_end: usize, |
234 | ) -> Vec<String> { |
235 | let mut res = Vec::new(); |
236 | if first_start < first_end { |
237 | if second_start < second_end { |
238 | res = self.fancy_replace( |
239 | first_sequence, |
240 | first_start, |
241 | first_end, |
242 | second_sequence, |
243 | second_start, |
244 | second_end, |
245 | ); |
246 | } else { |
247 | res = self.dump("-" , first_sequence, first_start, first_end); |
248 | } |
249 | } else if second_start < second_end { |
250 | res = self.dump("+" , second_sequence, second_start, second_end); |
251 | } |
252 | res |
253 | } |
254 | |
255 | fn qformat( |
256 | &self, |
257 | first_line: &str, |
258 | second_line: &str, |
259 | first_tags: &str, |
260 | second_tags: &str, |
261 | ) -> Vec<String> { |
262 | let mut res = Vec::new(); |
263 | let mut first_tags = first_tags; |
264 | let mut second_tags = second_tags; |
265 | let mut common = cmp::min( |
266 | count_leading(first_line, ' \t' ), |
267 | count_leading(second_line, ' \t' ), |
268 | ); |
269 | common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' ' )); |
270 | common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' ' )); |
271 | first_tags = first_tags.split_at(common).1.trim_right(); |
272 | second_tags = second_tags.split_at(common).1.trim_right(); |
273 | let mut s = format!("- {}" , first_line); |
274 | res.push(s); |
275 | if first_tags != "" { |
276 | s = format!("? {}{} \n" , str_with_similar_chars(' \t' , common), first_tags); |
277 | res.push(s); |
278 | } |
279 | s = format!("+ {}" , second_line); |
280 | res.push(s); |
281 | if second_tags != "" { |
282 | s = format!( |
283 | "? {}{} \n" , |
284 | str_with_similar_chars(' \t' , common), |
285 | second_tags |
286 | ); |
287 | res.push(s); |
288 | } |
289 | res |
290 | } |
291 | |
292 | pub fn restore(delta: &[String], which: usize) -> Vec<String> { |
293 | if !(which == 1 || which == 2) { |
294 | panic!("Second parameter must be 1 or 2" ); |
295 | } |
296 | let mut res = Vec::new(); |
297 | let tag = if which == 1 { "- " } else { "+ " }.to_string(); |
298 | let prefixes = vec![tag, " " .to_string()]; |
299 | for line in delta { |
300 | for prefix in &prefixes { |
301 | if line.starts_with(prefix) { |
302 | res.push(line.split_at(2).1.to_string()); |
303 | } |
304 | } |
305 | } |
306 | res |
307 | } |
308 | } |
309 | |
310 | #[test] |
311 | fn test_fancy_replace() { |
312 | let differ = Differ::new(); |
313 | let result = differ |
314 | .fancy_replace(&vec!["abcDefghiJkl \n" ], 0, 1, &vec!["abcdefGhijkl \n" ], 0, 1) |
315 | .join("" ); |
316 | assert_eq!( |
317 | result, |
318 | "- abcDefghiJkl \n? ^ ^ ^ \n+ abcdefGhijkl \n? ^ ^ ^ \n" |
319 | ); |
320 | } |
321 | |
322 | #[test] |
323 | fn test_qformat() { |
324 | let differ = Differ::new(); |
325 | let result = differ.qformat( |
326 | " \tabcDefghiJkl \n" , |
327 | " \tabcdefGhijkl \n" , |
328 | " ^ ^ ^ " , |
329 | " ^ ^ ^ " , |
330 | ); |
331 | assert_eq!( |
332 | result, |
333 | vec![ |
334 | "- \tabcDefghiJkl \n" , |
335 | "? \t ^ ^ ^ \n" , |
336 | "+ \tabcdefGhijkl \n" , |
337 | "? \t ^ ^ ^ \n" , |
338 | ] |
339 | ); |
340 | } |
341 | |