1use alloc::vec::Vec;
2
3#[cfg(feature = "std")]
4type IndexSet<K> = indexmap::IndexSet<K>;
5#[cfg(not(feature = "std"))]
6type IndexSet<K> = indexmap::IndexSet<K, hashbrown::hash_map::DefaultHashBuilder>;
7
8/// An identifier for an entry in a string table.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub struct StringId(usize);
11
12#[derive(Debug, Default)]
13pub(crate) struct StringTable<'a> {
14 strings: IndexSet<&'a [u8]>,
15 offsets: Vec<usize>,
16}
17
18impl<'a> StringTable<'a> {
19 /// Add a string to the string table.
20 ///
21 /// Panics if the string table has already been written, or
22 /// if the string contains a null byte.
23 pub fn add(&mut self, string: &'a [u8]) -> StringId {
24 assert!(self.offsets.is_empty());
25 assert!(!string.contains(&0));
26 let id = self.strings.insert_full(string).0;
27 StringId(id)
28 }
29
30 /// Return the id of the given string.
31 ///
32 /// Panics if the string is not in the string table.
33 pub fn get_id(&self, string: &[u8]) -> StringId {
34 let id = self.strings.get_index_of(string).unwrap();
35 StringId(id)
36 }
37
38 /// Return the string for the given id.
39 ///
40 /// Panics if the string is not in the string table.
41 pub fn get_string(&self, id: StringId) -> &'a [u8] {
42 self.strings.get_index(id.0).unwrap()
43 }
44
45 /// Return the offset of the given string.
46 ///
47 /// Panics if the string table has not been written, or
48 /// if the string is not in the string table.
49 pub fn get_offset(&self, id: StringId) -> usize {
50 self.offsets[id.0]
51 }
52
53 /// Append the string table to the given `Vec`, and
54 /// calculate the list of string offsets.
55 ///
56 /// `base` is the initial string table offset. For example,
57 /// this should be 1 for ELF, to account for the initial
58 /// null byte (which must have been written by the caller).
59 pub fn write(&mut self, base: usize, w: &mut Vec<u8>) {
60 assert!(self.offsets.is_empty());
61
62 let mut ids: Vec<_> = (0..self.strings.len()).collect();
63 sort(&mut ids, 1, &self.strings);
64
65 self.offsets = vec![0; ids.len()];
66 let mut offset = base;
67 let mut previous = &[][..];
68 for id in ids {
69 let string = self.strings.get_index(id).unwrap();
70 if previous.ends_with(string) {
71 self.offsets[id] = offset - string.len() - 1;
72 } else {
73 self.offsets[id] = offset;
74 w.extend_from_slice(string);
75 w.push(0);
76 offset += string.len() + 1;
77 previous = string;
78 }
79 }
80 }
81}
82
83// Multi-key quicksort.
84//
85// Ordering is such that if a string is a suffix of at least one other string,
86// then it is placed immediately after one of those strings. That is:
87// - comparison starts at the end of the string
88// - shorter strings come later
89//
90// Based on the implementation in LLVM.
91fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) {
92 loop {
93 if ids.len() <= 1 {
94 return;
95 }
96
97 let pivot = byte(ids[0], pos, strings);
98 let mut lower = 0;
99 let mut upper = ids.len();
100 let mut i = 1;
101 while i < upper {
102 let b = byte(ids[i], pos, strings);
103 if b > pivot {
104 ids.swap(lower, i);
105 lower += 1;
106 i += 1;
107 } else if b < pivot {
108 upper -= 1;
109 ids.swap(upper, i);
110 } else {
111 i += 1;
112 }
113 }
114
115 sort(&mut ids[..lower], pos, strings);
116 sort(&mut ids[upper..], pos, strings);
117
118 if pivot == 0 {
119 return;
120 }
121 ids = &mut ids[lower..upper];
122 pos += 1;
123 }
124}
125
126fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 {
127 let string: &&[u8] = strings.get_index(id).unwrap();
128 let len: usize = string.len();
129 if len >= pos {
130 string[len - pos]
131 } else {
132 // We know the strings don't contain null bytes.
133 0
134 }
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 #[test]
142 fn string_table() {
143 let mut table = StringTable::default();
144 let id0 = table.add(b"");
145 let id1 = table.add(b"foo");
146 let id2 = table.add(b"bar");
147 let id3 = table.add(b"foobar");
148
149 let mut data = Vec::new();
150 data.push(0);
151 table.write(1, &mut data);
152 assert_eq!(data, b"\0foobar\0foo\0");
153
154 assert_eq!(table.get_offset(id0), 11);
155 assert_eq!(table.get_offset(id1), 8);
156 assert_eq!(table.get_offset(id2), 4);
157 assert_eq!(table.get_offset(id3), 1);
158 }
159}
160