1#![doc(html_root_url = "https://docs.rs/phf_generator/0.9")]
2use phf_shared::{HashKey, PhfHash};
3use rand::distributions::Standard;
4use rand::rngs::SmallRng;
5use rand::{Rng, SeedableRng};
6
7const DEFAULT_LAMBDA: usize = 5;
8
9const FIXED_SEED: u64 = 1234567890;
10
11pub struct HashState {
12 pub key: HashKey,
13 pub disps: Vec<(u32, u32)>,
14 pub map: Vec<usize>,
15}
16
17pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
18 SmallRng::seed_from_u64(FIXED_SEED)
19 .sample_iter(Standard)
20 .find_map(|key| try_generate_hash(entries, key))
21 .expect(msg:"failed to solve PHF")
22}
23
24fn try_generate_hash<H: PhfHash>(entries: &[H], key: HashKey) -> Option<HashState> {
25 struct Bucket {
26 idx: usize,
27 keys: Vec<usize>,
28 }
29
30 let hashes: Vec<_> = entries
31 .iter()
32 .map(|entry| phf_shared::hash(entry, &key))
33 .collect();
34
35 let buckets_len = (hashes.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
36 let mut buckets = (0..buckets_len)
37 .map(|i| Bucket {
38 idx: i,
39 keys: vec![],
40 })
41 .collect::<Vec<_>>();
42
43 for (i, hash) in hashes.iter().enumerate() {
44 buckets[(hash.g % (buckets_len as u32)) as usize]
45 .keys
46 .push(i);
47 }
48
49 // Sort descending
50 buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());
51
52 let table_len = hashes.len();
53 let mut map = vec![None; table_len];
54 let mut disps = vec![(0u32, 0u32); buckets_len];
55
56 // store whether an element from the bucket being placed is
57 // located at a certain position, to allow for efficient overlap
58 // checks. It works by storing the generation in each cell and
59 // each new placement-attempt is a new generation, so you can tell
60 // if this is legitimately full by checking that the generations
61 // are equal. (A u64 is far too large to overflow in a reasonable
62 // time for current hardware.)
63 let mut try_map = vec![0u64; table_len];
64 let mut generation = 0u64;
65
66 // the actual values corresponding to the markers above, as
67 // (index, key) pairs, for adding to the main map once we've
68 // chosen the right disps.
69 let mut values_to_add = vec![];
70
71 'buckets: for bucket in &buckets {
72 for d1 in 0..(table_len as u32) {
73 'disps: for d2 in 0..(table_len as u32) {
74 values_to_add.clear();
75 generation += 1;
76
77 for &key in &bucket.keys {
78 let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2)
79 % (table_len as u32)) as usize;
80 if map[idx].is_some() || try_map[idx] == generation {
81 continue 'disps;
82 }
83 try_map[idx] = generation;
84 values_to_add.push((idx, key));
85 }
86
87 // We've picked a good set of disps
88 disps[bucket.idx] = (d1, d2);
89 for &(idx, key) in &values_to_add {
90 map[idx] = Some(key);
91 }
92 continue 'buckets;
93 }
94 }
95
96 // Unable to find displacements for a bucket
97 return None;
98 }
99
100 Some(HashState {
101 key,
102 disps,
103 map: map.into_iter().map(|i| i.unwrap()).collect(),
104 })
105}
106