| 1 | use alloc::vec::Vec; |
| 2 | |
| 3 | #[cfg (feature = "write_std" )] |
| 4 | type IndexSet<K> = indexmap::IndexSet<K>; |
| 5 | #[cfg (not(feature = "write_std" ))] |
| 6 | type IndexSet<K> = indexmap::IndexSet<K, hashbrown::DefaultHashBuilder>; |
| 7 | |
| 8 | /// An identifier for an entry in a string table. |
| 9 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
| 10 | pub struct StringId(usize); |
| 11 | |
| 12 | #[derive (Debug, Default)] |
| 13 | pub(crate) struct StringTable<'a> { |
| 14 | strings: IndexSet<&'a [u8]>, |
| 15 | offsets: Vec<usize>, |
| 16 | } |
| 17 | |
| 18 | impl<'a> StringTable<'a> { |
| 19 | /// Add a string to the string table. |
| 20 | /// |
| 21 | /// Panics if the string table has already been written, or |
| 22 | /// if the string contains a null byte. |
| 23 | pub fn add(&mut self, string: &'a [u8]) -> StringId { |
| 24 | assert!(self.offsets.is_empty()); |
| 25 | assert!(!string.contains(&0)); |
| 26 | let id = self.strings.insert_full(string).0; |
| 27 | StringId(id) |
| 28 | } |
| 29 | |
| 30 | /// Return the id of the given string. |
| 31 | /// |
| 32 | /// Panics if the string is not in the string table. |
| 33 | #[allow (dead_code)] |
| 34 | pub fn get_id(&self, string: &[u8]) -> StringId { |
| 35 | let id = self.strings.get_index_of(string).unwrap(); |
| 36 | StringId(id) |
| 37 | } |
| 38 | |
| 39 | /// Return the string for the given id. |
| 40 | /// |
| 41 | /// Panics if the string is not in the string table. |
| 42 | #[allow (dead_code)] |
| 43 | pub fn get_string(&self, id: StringId) -> &'a [u8] { |
| 44 | self.strings.get_index(id.0).unwrap() |
| 45 | } |
| 46 | |
| 47 | /// Return the offset of the given string. |
| 48 | /// |
| 49 | /// Panics if the string table has not been written, or |
| 50 | /// if the string is not in the string table. |
| 51 | pub fn get_offset(&self, id: StringId) -> usize { |
| 52 | self.offsets[id.0] |
| 53 | } |
| 54 | |
| 55 | /// Append the string table to the given `Vec`, and |
| 56 | /// calculate the list of string offsets. |
| 57 | /// |
| 58 | /// `base` is the initial string table offset. For example, |
| 59 | /// this should be 1 for ELF, to account for the initial |
| 60 | /// null byte (which must have been written by the caller). |
| 61 | /// |
| 62 | /// Panics if the string table has already been written. |
| 63 | pub fn write(&mut self, base: usize, w: &mut Vec<u8>) { |
| 64 | assert!(self.offsets.is_empty()); |
| 65 | |
| 66 | let mut ids: Vec<_> = (0..self.strings.len()).collect(); |
| 67 | sort(&mut ids, 1, &self.strings); |
| 68 | |
| 69 | self.offsets = vec![0; ids.len()]; |
| 70 | let mut offset = base; |
| 71 | let mut previous = &[][..]; |
| 72 | for id in ids { |
| 73 | let string = self.strings.get_index(id).unwrap(); |
| 74 | if previous.ends_with(string) { |
| 75 | self.offsets[id] = offset - string.len() - 1; |
| 76 | } else { |
| 77 | self.offsets[id] = offset; |
| 78 | w.extend_from_slice(string); |
| 79 | w.push(0); |
| 80 | offset += string.len() + 1; |
| 81 | previous = string; |
| 82 | } |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | /// Calculate the size in bytes of the string table. |
| 87 | /// |
| 88 | /// `base` is the initial string table offset. For example, |
| 89 | /// this should be 1 for ELF, to account for the initial |
| 90 | /// null byte. |
| 91 | #[allow (dead_code)] |
| 92 | pub fn size(&self, base: usize) -> usize { |
| 93 | // TODO: cache this result? |
| 94 | let mut ids: Vec<_> = (0..self.strings.len()).collect(); |
| 95 | sort(&mut ids, 1, &self.strings); |
| 96 | |
| 97 | let mut size = base; |
| 98 | let mut previous = &[][..]; |
| 99 | for id in ids { |
| 100 | let string = self.strings.get_index(id).unwrap(); |
| 101 | if !previous.ends_with(string) { |
| 102 | size += string.len() + 1; |
| 103 | previous = string; |
| 104 | } |
| 105 | } |
| 106 | size |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | // Multi-key quicksort. |
| 111 | // |
| 112 | // Ordering is such that if a string is a suffix of at least one other string, |
| 113 | // then it is placed immediately after one of those strings. That is: |
| 114 | // - comparison starts at the end of the string |
| 115 | // - shorter strings come later |
| 116 | // |
| 117 | // Based on the implementation in LLVM. |
| 118 | fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) { |
| 119 | loop { |
| 120 | if ids.len() <= 1 { |
| 121 | return; |
| 122 | } |
| 123 | |
| 124 | let pivot = byte(ids[0], pos, strings); |
| 125 | let mut lower = 0; |
| 126 | let mut upper = ids.len(); |
| 127 | let mut i = 1; |
| 128 | while i < upper { |
| 129 | let b = byte(ids[i], pos, strings); |
| 130 | if b > pivot { |
| 131 | ids.swap(lower, i); |
| 132 | lower += 1; |
| 133 | i += 1; |
| 134 | } else if b < pivot { |
| 135 | upper -= 1; |
| 136 | ids.swap(upper, i); |
| 137 | } else { |
| 138 | i += 1; |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | sort(&mut ids[..lower], pos, strings); |
| 143 | sort(&mut ids[upper..], pos, strings); |
| 144 | |
| 145 | if pivot == 0 { |
| 146 | return; |
| 147 | } |
| 148 | ids = &mut ids[lower..upper]; |
| 149 | pos += 1; |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 { |
| 154 | let string: &&[u8] = strings.get_index(id).unwrap(); |
| 155 | let len: usize = string.len(); |
| 156 | if len >= pos { |
| 157 | string[len - pos] |
| 158 | } else { |
| 159 | // We know the strings don't contain null bytes. |
| 160 | 0 |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | #[cfg (test)] |
| 165 | mod tests { |
| 166 | use super::*; |
| 167 | |
| 168 | #[test ] |
| 169 | fn string_table() { |
| 170 | let mut table = StringTable::default(); |
| 171 | let id0 = table.add(b"" ); |
| 172 | let id1 = table.add(b"foo" ); |
| 173 | let id2 = table.add(b"bar" ); |
| 174 | let id3 = table.add(b"foobar" ); |
| 175 | |
| 176 | let mut data = Vec::new(); |
| 177 | data.push(0); |
| 178 | table.write(1, &mut data); |
| 179 | assert_eq!(data, b" \0foobar \0foo \0" ); |
| 180 | |
| 181 | assert_eq!(table.get_offset(id0), 11); |
| 182 | assert_eq!(table.get_offset(id1), 8); |
| 183 | assert_eq!(table.get_offset(id2), 4); |
| 184 | assert_eq!(table.get_offset(id3), 1); |
| 185 | } |
| 186 | } |
| 187 | |