1 | use alloc::vec::Vec; |
2 | |
3 | #[cfg (feature = "std" )] |
4 | type IndexSet<K> = indexmap::IndexSet<K>; |
5 | #[cfg (not(feature = "std" ))] |
6 | type IndexSet<K> = indexmap::IndexSet<K, hashbrown::hash_map::DefaultHashBuilder>; |
7 | |
8 | /// An identifier for an entry in a string table. |
9 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
10 | pub struct StringId(usize); |
11 | |
12 | #[derive (Debug, Default)] |
13 | pub(crate) struct StringTable<'a> { |
14 | strings: IndexSet<&'a [u8]>, |
15 | offsets: Vec<usize>, |
16 | } |
17 | |
18 | impl<'a> StringTable<'a> { |
19 | /// Add a string to the string table. |
20 | /// |
21 | /// Panics if the string table has already been written, or |
22 | /// if the string contains a null byte. |
23 | pub fn add(&mut self, string: &'a [u8]) -> StringId { |
24 | assert!(self.offsets.is_empty()); |
25 | assert!(!string.contains(&0)); |
26 | let id = self.strings.insert_full(string).0; |
27 | StringId(id) |
28 | } |
29 | |
30 | /// Return the id of the given string. |
31 | /// |
32 | /// Panics if the string is not in the string table. |
33 | pub fn get_id(&self, string: &[u8]) -> StringId { |
34 | let id = self.strings.get_index_of(string).unwrap(); |
35 | StringId(id) |
36 | } |
37 | |
38 | /// Return the string for the given id. |
39 | /// |
40 | /// Panics if the string is not in the string table. |
41 | pub fn get_string(&self, id: StringId) -> &'a [u8] { |
42 | self.strings.get_index(id.0).unwrap() |
43 | } |
44 | |
45 | /// Return the offset of the given string. |
46 | /// |
47 | /// Panics if the string table has not been written, or |
48 | /// if the string is not in the string table. |
49 | pub fn get_offset(&self, id: StringId) -> usize { |
50 | self.offsets[id.0] |
51 | } |
52 | |
53 | /// Append the string table to the given `Vec`, and |
54 | /// calculate the list of string offsets. |
55 | /// |
56 | /// `base` is the initial string table offset. For example, |
57 | /// this should be 1 for ELF, to account for the initial |
58 | /// null byte (which must have been written by the caller). |
59 | pub fn write(&mut self, base: usize, w: &mut Vec<u8>) { |
60 | assert!(self.offsets.is_empty()); |
61 | |
62 | let mut ids: Vec<_> = (0..self.strings.len()).collect(); |
63 | sort(&mut ids, 1, &self.strings); |
64 | |
65 | self.offsets = vec![0; ids.len()]; |
66 | let mut offset = base; |
67 | let mut previous = &[][..]; |
68 | for id in ids { |
69 | let string = self.strings.get_index(id).unwrap(); |
70 | if previous.ends_with(string) { |
71 | self.offsets[id] = offset - string.len() - 1; |
72 | } else { |
73 | self.offsets[id] = offset; |
74 | w.extend_from_slice(string); |
75 | w.push(0); |
76 | offset += string.len() + 1; |
77 | previous = string; |
78 | } |
79 | } |
80 | } |
81 | } |
82 | |
83 | // Multi-key quicksort. |
84 | // |
85 | // Ordering is such that if a string is a suffix of at least one other string, |
86 | // then it is placed immediately after one of those strings. That is: |
87 | // - comparison starts at the end of the string |
88 | // - shorter strings come later |
89 | // |
90 | // Based on the implementation in LLVM. |
91 | fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) { |
92 | loop { |
93 | if ids.len() <= 1 { |
94 | return; |
95 | } |
96 | |
97 | let pivot = byte(ids[0], pos, strings); |
98 | let mut lower = 0; |
99 | let mut upper = ids.len(); |
100 | let mut i = 1; |
101 | while i < upper { |
102 | let b = byte(ids[i], pos, strings); |
103 | if b > pivot { |
104 | ids.swap(lower, i); |
105 | lower += 1; |
106 | i += 1; |
107 | } else if b < pivot { |
108 | upper -= 1; |
109 | ids.swap(upper, i); |
110 | } else { |
111 | i += 1; |
112 | } |
113 | } |
114 | |
115 | sort(&mut ids[..lower], pos, strings); |
116 | sort(&mut ids[upper..], pos, strings); |
117 | |
118 | if pivot == 0 { |
119 | return; |
120 | } |
121 | ids = &mut ids[lower..upper]; |
122 | pos += 1; |
123 | } |
124 | } |
125 | |
126 | fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 { |
127 | let string: &&[u8] = strings.get_index(id).unwrap(); |
128 | let len: usize = string.len(); |
129 | if len >= pos { |
130 | string[len - pos] |
131 | } else { |
132 | // We know the strings don't contain null bytes. |
133 | 0 |
134 | } |
135 | } |
136 | |
137 | #[cfg (test)] |
138 | mod tests { |
139 | use super::*; |
140 | |
141 | #[test ] |
142 | fn string_table() { |
143 | let mut table = StringTable::default(); |
144 | let id0 = table.add(b"" ); |
145 | let id1 = table.add(b"foo" ); |
146 | let id2 = table.add(b"bar" ); |
147 | let id3 = table.add(b"foobar" ); |
148 | |
149 | let mut data = Vec::new(); |
150 | data.push(0); |
151 | table.write(1, &mut data); |
152 | assert_eq!(data, b" \0foobar \0foo \0" ); |
153 | |
154 | assert_eq!(table.get_offset(id0), 11); |
155 | assert_eq!(table.get_offset(id1), 8); |
156 | assert_eq!(table.get_offset(id2), 4); |
157 | assert_eq!(table.get_offset(id3), 1); |
158 | } |
159 | } |
160 | |