1 | use std::cmp::min; |
2 | |
3 | /// Calculate the levenshtein distance between two strings. |
4 | pub(crate) fn distance(s1: &str, s2: &str) -> usize { |
5 | let v1: Vec<char> = s1.chars().collect(); |
6 | let v2: Vec<char> = s2.chars().collect(); |
7 | |
8 | let l_v1 = v1.len(); |
9 | let l_v2 = v2.len(); |
10 | |
11 | if l_v1 == 0 { |
12 | return l_v2; |
13 | } |
14 | if l_v2 == 0 { |
15 | return l_v1; |
16 | } |
17 | if l_v1 > l_v2 { |
18 | return distance(s2, s1); |
19 | } |
20 | |
21 | let mut col: Vec<usize> = (0..=l_v1).collect(); |
22 | |
23 | for i in 1..=l_v2 { |
24 | let mut last_diag = col[0]; |
25 | col[0] += 1; |
26 | for j in 1..=l_v1 { |
27 | let last_diag_temp = col[j]; |
28 | if v1[j-1] == v2[i-1] { |
29 | col[j] = last_diag; |
30 | } else { |
31 | col[j] = min(last_diag, min(col[j-1], col[j])) + 1; |
32 | } |
33 | last_diag = last_diag_temp; |
34 | } |
35 | } |
36 | |
37 | col[l_v1] |
38 | } |
39 | |
40 | |
41 | #[cfg (test)] |
42 | mod tests { |
43 | use crate::levenshtein::*; |
44 | #[test ] |
45 | fn test1() { |
46 | assert_eq!(distance("kitten" , "sitting" ), 3); |
47 | } |
48 | |
49 | #[test ] |
50 | fn test_diff_len() { |
51 | assert_eq!(distance("kit" , "kitten" ), 3); |
52 | } |
53 | |
54 | #[test ] |
55 | fn test_equal() { |
56 | assert_eq!(distance("test" , "test" ), 0); |
57 | assert_eq!(distance("" , "" ), 0); |
58 | assert_eq!(distance("long string with space" , "long string with space" ), 0); |
59 | assert_eq!(distance("unicode 😀" , "unicode 😀" ), 0); |
60 | } |
61 | |
62 | #[test ] |
63 | fn test_one_empty() { |
64 | assert_eq!(distance("test" , "" ), 4); |
65 | assert_eq!(distance("" , "test" ), 4); |
66 | assert_eq!(distance("" , "" ), 0); |
67 | assert_eq!(distance("long string" , "" ), 11); |
68 | assert_eq!(distance("😀" , "" ), 1); |
69 | } |
70 | } |
71 | |