1 | use alloc::vec::Vec; |
2 | |
3 | #[cfg (feature = "write_std" )] |
4 | type IndexSet<K> = indexmap::IndexSet<K>; |
5 | #[cfg (not(feature = "write_std" ))] |
6 | type IndexSet<K> = indexmap::IndexSet<K, hashbrown::DefaultHashBuilder>; |
7 | |
8 | /// An identifier for an entry in a string table. |
9 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
10 | pub struct StringId(usize); |
11 | |
12 | #[derive (Debug, Default)] |
13 | pub(crate) struct StringTable<'a> { |
14 | strings: IndexSet<&'a [u8]>, |
15 | offsets: Vec<usize>, |
16 | } |
17 | |
18 | impl<'a> StringTable<'a> { |
19 | /// Add a string to the string table. |
20 | /// |
21 | /// Panics if the string table has already been written, or |
22 | /// if the string contains a null byte. |
23 | pub fn add(&mut self, string: &'a [u8]) -> StringId { |
24 | assert!(self.offsets.is_empty()); |
25 | assert!(!string.contains(&0)); |
26 | let id = self.strings.insert_full(string).0; |
27 | StringId(id) |
28 | } |
29 | |
30 | /// Return the id of the given string. |
31 | /// |
32 | /// Panics if the string is not in the string table. |
33 | #[allow (dead_code)] |
34 | pub fn get_id(&self, string: &[u8]) -> StringId { |
35 | let id = self.strings.get_index_of(string).unwrap(); |
36 | StringId(id) |
37 | } |
38 | |
39 | /// Return the string for the given id. |
40 | /// |
41 | /// Panics if the string is not in the string table. |
42 | #[allow (dead_code)] |
43 | pub fn get_string(&self, id: StringId) -> &'a [u8] { |
44 | self.strings.get_index(id.0).unwrap() |
45 | } |
46 | |
47 | /// Return the offset of the given string. |
48 | /// |
49 | /// Panics if the string table has not been written, or |
50 | /// if the string is not in the string table. |
51 | pub fn get_offset(&self, id: StringId) -> usize { |
52 | self.offsets[id.0] |
53 | } |
54 | |
55 | /// Append the string table to the given `Vec`, and |
56 | /// calculate the list of string offsets. |
57 | /// |
58 | /// `base` is the initial string table offset. For example, |
59 | /// this should be 1 for ELF, to account for the initial |
60 | /// null byte (which must have been written by the caller). |
61 | /// |
62 | /// Panics if the string table has already been written. |
63 | pub fn write(&mut self, base: usize, w: &mut Vec<u8>) { |
64 | assert!(self.offsets.is_empty()); |
65 | |
66 | let mut ids: Vec<_> = (0..self.strings.len()).collect(); |
67 | sort(&mut ids, 1, &self.strings); |
68 | |
69 | self.offsets = vec![0; ids.len()]; |
70 | let mut offset = base; |
71 | let mut previous = &[][..]; |
72 | for id in ids { |
73 | let string = self.strings.get_index(id).unwrap(); |
74 | if previous.ends_with(string) { |
75 | self.offsets[id] = offset - string.len() - 1; |
76 | } else { |
77 | self.offsets[id] = offset; |
78 | w.extend_from_slice(string); |
79 | w.push(0); |
80 | offset += string.len() + 1; |
81 | previous = string; |
82 | } |
83 | } |
84 | } |
85 | |
86 | /// Calculate the size in bytes of the string table. |
87 | /// |
88 | /// `base` is the initial string table offset. For example, |
89 | /// this should be 1 for ELF, to account for the initial |
90 | /// null byte. |
91 | #[allow (dead_code)] |
92 | pub fn size(&self, base: usize) -> usize { |
93 | // TODO: cache this result? |
94 | let mut ids: Vec<_> = (0..self.strings.len()).collect(); |
95 | sort(&mut ids, 1, &self.strings); |
96 | |
97 | let mut size = base; |
98 | let mut previous = &[][..]; |
99 | for id in ids { |
100 | let string = self.strings.get_index(id).unwrap(); |
101 | if !previous.ends_with(string) { |
102 | size += string.len() + 1; |
103 | previous = string; |
104 | } |
105 | } |
106 | size |
107 | } |
108 | } |
109 | |
110 | // Multi-key quicksort. |
111 | // |
112 | // Ordering is such that if a string is a suffix of at least one other string, |
113 | // then it is placed immediately after one of those strings. That is: |
114 | // - comparison starts at the end of the string |
115 | // - shorter strings come later |
116 | // |
117 | // Based on the implementation in LLVM. |
118 | fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) { |
119 | loop { |
120 | if ids.len() <= 1 { |
121 | return; |
122 | } |
123 | |
124 | let pivot = byte(ids[0], pos, strings); |
125 | let mut lower = 0; |
126 | let mut upper = ids.len(); |
127 | let mut i = 1; |
128 | while i < upper { |
129 | let b = byte(ids[i], pos, strings); |
130 | if b > pivot { |
131 | ids.swap(lower, i); |
132 | lower += 1; |
133 | i += 1; |
134 | } else if b < pivot { |
135 | upper -= 1; |
136 | ids.swap(upper, i); |
137 | } else { |
138 | i += 1; |
139 | } |
140 | } |
141 | |
142 | sort(&mut ids[..lower], pos, strings); |
143 | sort(&mut ids[upper..], pos, strings); |
144 | |
145 | if pivot == 0 { |
146 | return; |
147 | } |
148 | ids = &mut ids[lower..upper]; |
149 | pos += 1; |
150 | } |
151 | } |
152 | |
153 | fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 { |
154 | let string: &&[u8] = strings.get_index(id).unwrap(); |
155 | let len: usize = string.len(); |
156 | if len >= pos { |
157 | string[len - pos] |
158 | } else { |
159 | // We know the strings don't contain null bytes. |
160 | 0 |
161 | } |
162 | } |
163 | |
164 | #[cfg (test)] |
165 | mod tests { |
166 | use super::*; |
167 | |
168 | #[test ] |
169 | fn string_table() { |
170 | let mut table = StringTable::default(); |
171 | let id0 = table.add(b"" ); |
172 | let id1 = table.add(b"foo" ); |
173 | let id2 = table.add(b"bar" ); |
174 | let id3 = table.add(b"foobar" ); |
175 | |
176 | let mut data = Vec::new(); |
177 | data.push(0); |
178 | table.write(1, &mut data); |
179 | assert_eq!(data, b" \0foobar \0foo \0" ); |
180 | |
181 | assert_eq!(table.get_offset(id0), 11); |
182 | assert_eq!(table.get_offset(id1), 8); |
183 | assert_eq!(table.get_offset(id2), 4); |
184 | assert_eq!(table.get_offset(id3), 1); |
185 | } |
186 | } |
187 | |