1 | use std::collections::HashMap; |
2 | use std::hash::Hash; |
3 | |
4 | use super::DiffableStrRef; |
5 | |
6 | // quick and dirty way to get an upper sequence ratio. |
7 | pub fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 { |
8 | let n: usize = seq1.len() + seq2.len(); |
9 | if n == 0 { |
10 | 1.0 |
11 | } else { |
12 | 2.0 * seq1.len().min(seq2.len()) as f32 / n as f32 |
13 | } |
14 | } |
15 | |
16 | /// Internal utility to calculate an upper bound for a ratio for |
17 | /// [`get_close_matches`]. This is based on Python's difflib approach |
18 | /// of considering the two sets to be multisets. |
19 | /// |
20 | /// It counts the number of matches without regard to order, which is an |
21 | /// obvious upper bound. |
22 | pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>); |
23 | |
24 | impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> { |
25 | pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> { |
26 | let mut counts = HashMap::new(); |
27 | for &word in seq { |
28 | *counts.entry(word).or_insert(0) += 1; |
29 | } |
30 | QuickSeqRatio(counts) |
31 | } |
32 | |
33 | pub fn calc(&self, seq: &[&T]) -> f32 { |
34 | let n = self.0.len() + seq.len(); |
35 | if n == 0 { |
36 | return 1.0; |
37 | } |
38 | |
39 | let mut available = HashMap::new(); |
40 | let mut matches = 0; |
41 | for &word in seq { |
42 | let x = if let Some(count) = available.get(&word) { |
43 | *count |
44 | } else { |
45 | self.0.get(&word).copied().unwrap_or(0) |
46 | }; |
47 | available.insert(word, x - 1); |
48 | if x > 0 { |
49 | matches += 1; |
50 | } |
51 | } |
52 | |
53 | 2.0 * matches as f32 / n as f32 |
54 | } |
55 | } |
56 | |