| 1 | use std::collections::HashMap; |
| 2 | use std::hash::Hash; |
| 3 | |
| 4 | use super::DiffableStrRef; |
| 5 | |
| 6 | // quick and dirty way to get an upper sequence ratio. |
| 7 | pub fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 { |
| 8 | let n: usize = seq1.len() + seq2.len(); |
| 9 | if n == 0 { |
| 10 | 1.0 |
| 11 | } else { |
| 12 | 2.0 * seq1.len().min(seq2.len()) as f32 / n as f32 |
| 13 | } |
| 14 | } |
| 15 | |
| 16 | /// Internal utility to calculate an upper bound for a ratio for |
| 17 | /// [`get_close_matches`]. This is based on Python's difflib approach |
| 18 | /// of considering the two sets to be multisets. |
| 19 | /// |
| 20 | /// It counts the number of matches without regard to order, which is an |
| 21 | /// obvious upper bound. |
| 22 | pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>); |
| 23 | |
| 24 | impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> { |
| 25 | pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> { |
| 26 | let mut counts = HashMap::new(); |
| 27 | for &word in seq { |
| 28 | *counts.entry(word).or_insert(0) += 1; |
| 29 | } |
| 30 | QuickSeqRatio(counts) |
| 31 | } |
| 32 | |
| 33 | pub fn calc(&self, seq: &[&T]) -> f32 { |
| 34 | let n = self.0.len() + seq.len(); |
| 35 | if n == 0 { |
| 36 | return 1.0; |
| 37 | } |
| 38 | |
| 39 | let mut available = HashMap::new(); |
| 40 | let mut matches = 0; |
| 41 | for &word in seq { |
| 42 | let x = if let Some(count) = available.get(&word) { |
| 43 | *count |
| 44 | } else { |
| 45 | self.0.get(&word).copied().unwrap_or(0) |
| 46 | }; |
| 47 | available.insert(word, x - 1); |
| 48 | if x > 0 { |
| 49 | matches += 1; |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | 2.0 * matches as f32 / n as f32 |
| 54 | } |
| 55 | } |
| 56 | |