lib.rs source code [crates/phf_generator/src/lib.rs]

1	#![doc(html_root_url = "https://docs.rs/phf_generator/0.9")]
2	use phf_shared::{HashKey, PhfHash};
3	use rand::distributions::Standard;
4	use rand::rngs::SmallRng;
5	use rand::{Rng, SeedableRng};
6
7	const DEFAULT_LAMBDA: usize = `5`;
8
9	const FIXED_SEED: u64 = `1234567890`;
10
11	pub struct HashState {
12	pub key: HashKey,
13	pub disps: Vec<(u32, u32)>,
14	pub map: Vec<usize>,
15	}
16
17	pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
18	SmallRng::seed_from_u64(FIXED_SEED)
19	.sample_iter(Standard)
20	.find_map(\|key\| try_generate_hash(entries, key))
21	.expect(msg:"failed to solve PHF")
22	}
23
24	fn try_generate_hash<H: PhfHash>(entries: &[H], key: HashKey) -> Option<HashState> {
25	struct Bucket {
26	idx: usize,
27	keys: Vec<usize>,
28	}
29
30	let hashes: Vec<_> = entries
31	.iter()
32	.map(\|entry\| phf_shared::hash(entry, &key))
33	.collect();
34
35	let buckets_len = (hashes.len() + DEFAULT_LAMBDA - `1`) / DEFAULT_LAMBDA;
36	let mut buckets = (`0`..buckets_len)
37	.map(\|i\| Bucket {
38	idx: i,
39	keys: vec![],
40	})
41	.collect::<Vec<_>>();
42
43	for (i, hash) in hashes.iter().enumerate() {
44	buckets[(hash.g % (buckets_len as u32)) as usize]
45	.keys
46	.push(i);
47	}
48
49	// Sort descending
50	buckets.sort_by(\|a, b\| a.keys.len().cmp(&b.keys.len()).reverse());
51
52	let table_len = hashes.len();
53	let mut map = vec![None; table_len];
54	let mut disps = vec![(`0u32`, `0u32`); buckets_len];
55
56	// store whether an element from the bucket being placed is
57	// located at a certain position, to allow for efficient overlap
58	// checks. It works by storing the generation in each cell and
59	// each new placement-attempt is a new generation, so you can tell
60	// if this is legitimately full by checking that the generations
61	// are equal. (A u64 is far too large to overflow in a reasonable
62	// time for current hardware.)
63	let mut try_map = vec![`0u64`; table_len];
64	let mut generation = `0u64`;
65
66	// the actual values corresponding to the markers above, as
67	// (index, key) pairs, for adding to the main map once we've
68	// chosen the right disps.
69	let mut values_to_add = vec![];
70
71	'buckets: for bucket in &buckets {
72	for d1 in `0`..(table_len as u32) {
73	'disps: for d2 in `0`..(table_len as u32) {
74	values_to_add.clear();
75	generation += `1`;
76
77	for &key in &bucket.keys {
78	let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2)
79	% (table_len as u32)) as usize;
80	if map[idx].is_some() \|\| try_map[idx] == generation {
81	continue 'disps;
82	}
83	try_map[idx] = generation;
84	values_to_add.push((idx, key));
85	}
86
87	// We've picked a good set of disps
88	disps[bucket.idx] = (d1, d2);
89	for &(idx, key) in &values_to_add {
90	map[idx] = Some(key);
91	}
92	continue 'buckets;
93	}
94	}
95
96	// Unable to find displacements for a bucket
97	return None;
98	}
99
100	Some(HashState {
101	key,
102	disps,
103	map: map.into_iter().map(\|i\| i.unwrap()).collect(),
104	})
105	}
106