utils.rs source code [crates/similar/src/text/utils.rs]

1	use std::collections::HashMap;
2	use std::hash::Hash;
3
4	use super::DiffableStrRef;
5
6	// quick and dirty way to get an upper sequence ratio.
7	pub fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
8	let n: usize = seq1.len() + seq2.len();
9	if n == `0` {
10	`1.0`
11	} else {
12	`2.0` * seq1.len().min(seq2.len()) as f32 / n as f32
13	}
14	}
15
16	/// Internal utility to calculate an upper bound for a ratio for
17	/// [`get_close_matches`]. This is based on Python's difflib approach
18	/// of considering the two sets to be multisets.
19	///
20	/// It counts the number of matches without regard to order, which is an
21	/// obvious upper bound.
22	pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
23
24	impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
25	pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
26	let mut counts = HashMap::new();
27	for &word in seq {
28	*counts.entry(word).or_insert(`0`) += `1`;
29	}
30	QuickSeqRatio(counts)
31	}
32
33	pub fn calc(&self, seq: &[&T]) -> f32 {
34	let n = self.0.len() + seq.len();
35	if n == `0` {
36	return `1.0`;
37	}
38
39	let mut available = HashMap::new();
40	let mut matches = `0`;
41	for &word in seq {
42	let x = if let Some(count) = available.get(&word) {
43	*count
44	} else {
45	self.0.get(&word).copied().unwrap_or(`0`)
46	};
47	available.insert(word, x - `1`);
48	if x > `0` {
49	matches += `1`;
50	}
51	}
52
53	`2.0` * matches as f32 / n as f32
54	}
55	}
56