| 1 | use sequencematcher::SequenceMatcher; |
| 2 | use std::cmp; |
| 3 | use utils::{count_leading, str_with_similar_chars}; |
| 4 | |
| 5 | #[derive(Default)] |
| 6 | pub struct Differ { |
| 7 | pub line_junk: Option<fn(&&str) -> bool>, |
| 8 | pub char_junk: Option<fn(&char) -> bool>, |
| 9 | } |
| 10 | |
| 11 | impl Differ { |
| 12 | pub fn new() -> Differ { |
| 13 | Differ { |
| 14 | line_junk: None, |
| 15 | char_junk: None, |
| 16 | } |
| 17 | } |
| 18 | |
| 19 | pub fn compare(&self, first_sequence: &[&str], second_sequence: &[&str]) -> Vec<String> { |
| 20 | let mut matcher = SequenceMatcher::new(first_sequence, second_sequence); |
| 21 | matcher.set_is_junk(self.line_junk); |
| 22 | let mut res = Vec::new(); |
| 23 | for opcode in matcher.get_opcodes() { |
| 24 | let mut gen = Vec::new(); |
| 25 | match opcode.tag.as_ref() { |
| 26 | "replace" => { |
| 27 | gen = self.fancy_replace( |
| 28 | first_sequence, |
| 29 | opcode.first_start, |
| 30 | opcode.first_end, |
| 31 | second_sequence, |
| 32 | opcode.second_start, |
| 33 | opcode.second_end, |
| 34 | ) |
| 35 | } |
| 36 | "delete" => { |
| 37 | gen = self.dump("-" , first_sequence, opcode.first_start, opcode.first_end) |
| 38 | } |
| 39 | "insert" => { |
| 40 | gen = self.dump("+" , second_sequence, opcode.second_start, opcode.second_end) |
| 41 | } |
| 42 | "equal" => { |
| 43 | gen = self.dump(" " , first_sequence, opcode.first_start, opcode.first_end) |
| 44 | } |
| 45 | _ => {} |
| 46 | } |
| 47 | for i in gen { |
| 48 | res.push(i); |
| 49 | } |
| 50 | } |
| 51 | res |
| 52 | } |
| 53 | |
| 54 | fn dump(&self, tag: &str, sequence: &[&str], start: usize, end: usize) -> Vec<String> { |
| 55 | let mut res = Vec::new(); |
| 56 | for i in start..end { |
| 57 | if let Some(s) = sequence.get(i) { |
| 58 | res.push(format!("{} {}" , tag, s)) |
| 59 | } |
| 60 | } |
| 61 | res |
| 62 | } |
| 63 | |
| 64 | fn plain_replace( |
| 65 | &self, |
| 66 | first_sequence: &[&str], |
| 67 | first_start: usize, |
| 68 | first_end: usize, |
| 69 | second_sequence: &[&str], |
| 70 | second_start: usize, |
| 71 | second_end: usize, |
| 72 | ) -> Vec<String> { |
| 73 | if !(first_start < first_end && second_start < second_end) { |
| 74 | return Vec::new(); |
| 75 | } |
| 76 | let (mut first, second) = if second_end - second_start < first_end - first_start { |
| 77 | ( |
| 78 | self.dump("+" , second_sequence, second_start, second_end), |
| 79 | self.dump("-" , first_sequence, first_start, first_end), |
| 80 | ) |
| 81 | } else { |
| 82 | ( |
| 83 | self.dump("-" , first_sequence, first_start, first_end), |
| 84 | self.dump("+" , second_sequence, second_start, second_end), |
| 85 | ) |
| 86 | }; |
| 87 | for s in second { |
| 88 | first.push(s); |
| 89 | } |
| 90 | first |
| 91 | } |
| 92 | |
| 93 | fn fancy_replace( |
| 94 | &self, |
| 95 | first_sequence: &[&str], |
| 96 | first_start: usize, |
| 97 | first_end: usize, |
| 98 | second_sequence: &[&str], |
| 99 | second_start: usize, |
| 100 | second_end: usize, |
| 101 | ) -> Vec<String> { |
| 102 | let mut res = Vec::new(); |
| 103 | let (mut best_ratio, cutoff) = (0.74, 0.75); |
| 104 | let (mut best_i, mut best_j) = (0, 0); |
| 105 | let mut eqi: Option<usize> = None; |
| 106 | let mut eqj: Option<usize> = None; |
| 107 | for (j, second_sequence_str) in second_sequence |
| 108 | .iter() |
| 109 | .enumerate() |
| 110 | .take(second_end) |
| 111 | .skip(second_start) |
| 112 | { |
| 113 | for (i, first_sequence_str) in first_sequence |
| 114 | .iter() |
| 115 | .enumerate() |
| 116 | .take(second_end) |
| 117 | .skip(second_start) |
| 118 | { |
| 119 | if first_sequence_str == second_sequence_str { |
| 120 | if eqi.is_none() { |
| 121 | eqi = Some(i); |
| 122 | eqj = Some(j); |
| 123 | } |
| 124 | continue; |
| 125 | } |
| 126 | let (first_sequence_chars, second_sequence_chars) = ( |
| 127 | first_sequence_str.chars().collect::<Vec<char>>(), |
| 128 | second_sequence_str.chars().collect::<Vec<char>>(), |
| 129 | ); |
| 130 | let mut cruncher = |
| 131 | SequenceMatcher::new(&first_sequence_chars, &second_sequence_chars); |
| 132 | cruncher.set_is_junk(self.char_junk); |
| 133 | if cruncher.ratio() > best_ratio { |
| 134 | best_ratio = cruncher.ratio(); |
| 135 | best_i = i; |
| 136 | best_j = j; |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | if best_ratio < cutoff { |
| 141 | if eqi.is_none() { |
| 142 | res.extend( |
| 143 | self.plain_replace( |
| 144 | first_sequence, |
| 145 | first_start, |
| 146 | first_end, |
| 147 | second_sequence, |
| 148 | second_start, |
| 149 | second_end, |
| 150 | ).iter() |
| 151 | .cloned(), |
| 152 | ); |
| 153 | return res; |
| 154 | } |
| 155 | best_i = eqi.unwrap(); |
| 156 | best_j = eqj.unwrap(); |
| 157 | } else { |
| 158 | eqi = None; |
| 159 | } |
| 160 | res.extend( |
| 161 | self.fancy_helper( |
| 162 | first_sequence, |
| 163 | first_start, |
| 164 | best_i, |
| 165 | second_sequence, |
| 166 | second_start, |
| 167 | best_j, |
| 168 | ).iter() |
| 169 | .cloned(), |
| 170 | ); |
| 171 | let first_element = &first_sequence[best_i]; |
| 172 | let second_element = &second_sequence[best_j]; |
| 173 | if eqi.is_none() { |
| 174 | let (mut first_tag, mut second_tag) = (String::new(), String::new()); |
| 175 | let first_element_chars: Vec<char> = first_element.chars().collect(); |
| 176 | let second_element_chars: Vec<char> = second_element.chars().collect(); |
| 177 | let mut cruncher = SequenceMatcher::new(&first_element_chars, &second_element_chars); |
| 178 | cruncher.set_is_junk(self.char_junk); |
| 179 | for opcode in &cruncher.get_opcodes() { |
| 180 | let (first_length, second_length) = ( |
| 181 | opcode.first_end - opcode.first_start, |
| 182 | opcode.second_end - opcode.second_start, |
| 183 | ); |
| 184 | match opcode.tag.as_ref() { |
| 185 | "replace" => { |
| 186 | first_tag.push_str(&str_with_similar_chars('^' , first_length)); |
| 187 | second_tag.push_str(&str_with_similar_chars('^' , second_length)); |
| 188 | } |
| 189 | "delete" => { |
| 190 | first_tag.push_str(&str_with_similar_chars('-' , first_length)); |
| 191 | } |
| 192 | "insert" => { |
| 193 | second_tag.push_str(&str_with_similar_chars('+' , second_length)); |
| 194 | } |
| 195 | "equal" => { |
| 196 | first_tag.push_str(&str_with_similar_chars(' ' , first_length)); |
| 197 | second_tag.push_str(&str_with_similar_chars(' ' , second_length)); |
| 198 | } |
| 199 | _ => {} |
| 200 | } |
| 201 | } |
| 202 | res.extend( |
| 203 | self.qformat(&first_element, &second_element, &first_tag, &second_tag) |
| 204 | .iter() |
| 205 | .cloned(), |
| 206 | ); |
| 207 | } else { |
| 208 | let mut s = String::from(" " ); |
| 209 | s.push_str(&first_element); |
| 210 | res.push(s); |
| 211 | } |
| 212 | res.extend( |
| 213 | self.fancy_helper( |
| 214 | first_sequence, |
| 215 | best_i + 1, |
| 216 | first_end, |
| 217 | second_sequence, |
| 218 | best_j + 1, |
| 219 | second_end, |
| 220 | ).iter() |
| 221 | .cloned(), |
| 222 | ); |
| 223 | res |
| 224 | } |
| 225 | |
| 226 | fn fancy_helper( |
| 227 | &self, |
| 228 | first_sequence: &[&str], |
| 229 | first_start: usize, |
| 230 | first_end: usize, |
| 231 | second_sequence: &[&str], |
| 232 | second_start: usize, |
| 233 | second_end: usize, |
| 234 | ) -> Vec<String> { |
| 235 | let mut res = Vec::new(); |
| 236 | if first_start < first_end { |
| 237 | if second_start < second_end { |
| 238 | res = self.fancy_replace( |
| 239 | first_sequence, |
| 240 | first_start, |
| 241 | first_end, |
| 242 | second_sequence, |
| 243 | second_start, |
| 244 | second_end, |
| 245 | ); |
| 246 | } else { |
| 247 | res = self.dump("-" , first_sequence, first_start, first_end); |
| 248 | } |
| 249 | } else if second_start < second_end { |
| 250 | res = self.dump("+" , second_sequence, second_start, second_end); |
| 251 | } |
| 252 | res |
| 253 | } |
| 254 | |
| 255 | fn qformat( |
| 256 | &self, |
| 257 | first_line: &str, |
| 258 | second_line: &str, |
| 259 | first_tags: &str, |
| 260 | second_tags: &str, |
| 261 | ) -> Vec<String> { |
| 262 | let mut res = Vec::new(); |
| 263 | let mut first_tags = first_tags; |
| 264 | let mut second_tags = second_tags; |
| 265 | let mut common = cmp::min( |
| 266 | count_leading(first_line, ' \t' ), |
| 267 | count_leading(second_line, ' \t' ), |
| 268 | ); |
| 269 | common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' ' )); |
| 270 | common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' ' )); |
| 271 | first_tags = first_tags.split_at(common).1.trim_right(); |
| 272 | second_tags = second_tags.split_at(common).1.trim_right(); |
| 273 | let mut s = format!("- {}" , first_line); |
| 274 | res.push(s); |
| 275 | if first_tags != "" { |
| 276 | s = format!("? {}{} \n" , str_with_similar_chars(' \t' , common), first_tags); |
| 277 | res.push(s); |
| 278 | } |
| 279 | s = format!("+ {}" , second_line); |
| 280 | res.push(s); |
| 281 | if second_tags != "" { |
| 282 | s = format!( |
| 283 | "? {}{} \n" , |
| 284 | str_with_similar_chars(' \t' , common), |
| 285 | second_tags |
| 286 | ); |
| 287 | res.push(s); |
| 288 | } |
| 289 | res |
| 290 | } |
| 291 | |
| 292 | pub fn restore(delta: &[String], which: usize) -> Vec<String> { |
| 293 | if !(which == 1 || which == 2) { |
| 294 | panic!("Second parameter must be 1 or 2" ); |
| 295 | } |
| 296 | let mut res = Vec::new(); |
| 297 | let tag = if which == 1 { "- " } else { "+ " }.to_string(); |
| 298 | let prefixes = vec![tag, " " .to_string()]; |
| 299 | for line in delta { |
| 300 | for prefix in &prefixes { |
| 301 | if line.starts_with(prefix) { |
| 302 | res.push(line.split_at(2).1.to_string()); |
| 303 | } |
| 304 | } |
| 305 | } |
| 306 | res |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | #[test] |
| 311 | fn test_fancy_replace() { |
| 312 | let differ = Differ::new(); |
| 313 | let result = differ |
| 314 | .fancy_replace(&vec!["abcDefghiJkl \n" ], 0, 1, &vec!["abcdefGhijkl \n" ], 0, 1) |
| 315 | .join("" ); |
| 316 | assert_eq!( |
| 317 | result, |
| 318 | "- abcDefghiJkl \n? ^ ^ ^ \n+ abcdefGhijkl \n? ^ ^ ^ \n" |
| 319 | ); |
| 320 | } |
| 321 | |
| 322 | #[test] |
| 323 | fn test_qformat() { |
| 324 | let differ = Differ::new(); |
| 325 | let result = differ.qformat( |
| 326 | " \tabcDefghiJkl \n" , |
| 327 | " \tabcdefGhijkl \n" , |
| 328 | " ^ ^ ^ " , |
| 329 | " ^ ^ ^ " , |
| 330 | ); |
| 331 | assert_eq!( |
| 332 | result, |
| 333 | vec![ |
| 334 | "- \tabcDefghiJkl \n" , |
| 335 | "? \t ^ ^ ^ \n" , |
| 336 | "+ \tabcdefGhijkl \n" , |
| 337 | "? \t ^ ^ ^ \n" , |
| 338 | ] |
| 339 | ); |
| 340 | } |
| 341 | |