| 1 | use std::cmp::min; |
| 2 | |
| 3 | /// Calculate the levenshtein distance between two strings. |
| 4 | pub(crate) fn distance(s1: &str, s2: &str) -> usize { |
| 5 | let v1: Vec<char> = s1.chars().collect(); |
| 6 | let v2: Vec<char> = s2.chars().collect(); |
| 7 | |
| 8 | let l_v1 = v1.len(); |
| 9 | let l_v2 = v2.len(); |
| 10 | |
| 11 | if l_v1 == 0 { |
| 12 | return l_v2; |
| 13 | } |
| 14 | if l_v2 == 0 { |
| 15 | return l_v1; |
| 16 | } |
| 17 | if l_v1 > l_v2 { |
| 18 | return distance(s2, s1); |
| 19 | } |
| 20 | |
| 21 | let mut col: Vec<usize> = (0..=l_v1).collect(); |
| 22 | |
| 23 | for i in 1..=l_v2 { |
| 24 | let mut last_diag = col[0]; |
| 25 | col[0] += 1; |
| 26 | for j in 1..=l_v1 { |
| 27 | let last_diag_temp = col[j]; |
| 28 | if v1[j-1] == v2[i-1] { |
| 29 | col[j] = last_diag; |
| 30 | } else { |
| 31 | col[j] = min(last_diag, min(col[j-1], col[j])) + 1; |
| 32 | } |
| 33 | last_diag = last_diag_temp; |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | col[l_v1] |
| 38 | } |
| 39 | |
| 40 | |
| 41 | #[cfg (test)] |
| 42 | mod tests { |
| 43 | use crate::levenshtein::*; |
| 44 | #[test ] |
| 45 | fn test1() { |
| 46 | assert_eq!(distance("kitten" , "sitting" ), 3); |
| 47 | } |
| 48 | |
| 49 | #[test ] |
| 50 | fn test_diff_len() { |
| 51 | assert_eq!(distance("kit" , "kitten" ), 3); |
| 52 | } |
| 53 | |
| 54 | #[test ] |
| 55 | fn test_equal() { |
| 56 | assert_eq!(distance("test" , "test" ), 0); |
| 57 | assert_eq!(distance("" , "" ), 0); |
| 58 | assert_eq!(distance("long string with space" , "long string with space" ), 0); |
| 59 | assert_eq!(distance("unicode 😀" , "unicode 😀" ), 0); |
| 60 | } |
| 61 | |
| 62 | #[test ] |
| 63 | fn test_one_empty() { |
| 64 | assert_eq!(distance("test" , "" ), 4); |
| 65 | assert_eq!(distance("" , "test" ), 4); |
| 66 | assert_eq!(distance("" , "" ), 0); |
| 67 | assert_eq!(distance("long string" , "" ), 11); |
| 68 | assert_eq!(distance("😀" , "" ), 1); |
| 69 | } |
| 70 | } |
| 71 | |