1 | pub extern crate object; |
2 | |
3 | use std::{ |
4 | borrow::Cow, |
5 | collections::HashSet, |
6 | fmt, |
7 | path::{Path, PathBuf}, |
8 | }; |
9 | |
10 | use gimli::{EndianSlice, Reader}; |
11 | use object::{write::Object as WritableObject, FileKind, Object, ObjectSection}; |
12 | use tracing::{debug, trace}; |
13 | |
14 | use crate::{ |
15 | error::Result, |
16 | ext::EndianityExt, |
17 | index::Bucketable, |
18 | package::{dwo_identifier_of_unit, DwarfObject, InProgressDwarfPackage}, |
19 | relocate::{add_relocations, Relocate, RelocationMap}, |
20 | }; |
21 | |
22 | mod error; |
23 | mod ext; |
24 | mod index; |
25 | mod package; |
26 | mod relocate; |
27 | mod strings; |
28 | |
29 | pub use crate::error::Error; |
30 | |
31 | /// `Session` is expected to be implemented by users of `thorin`, allowing users of `thorin` to |
32 | /// decide how to manage data, rather than `thorin` having arenas internally. |
33 | pub trait Session<Relocations> { |
34 | /// Returns a reference to `data`'s contents with lifetime `'session`. |
35 | fn alloc_data<'session>(&'session self, data: Vec<u8>) -> &'session [u8]; |
36 | |
37 | /// Returns a reference to `data`'s contents with lifetime `'input`. |
38 | /// |
39 | /// If `Cow` is borrowed, then return the contained reference (`'input`). If `Cow` is owned, |
40 | /// then calls `alloc_data` to return a reference of lifetime `'session`, which is guaranteed |
41 | /// to be longer than `'input`, so can be returned. |
42 | fn alloc_owned_cow<'input, 'session: 'input>( |
43 | &'session self, |
44 | data: Cow<'input, [u8]>, |
45 | ) -> &'input [u8] { |
46 | match data { |
47 | Cow::Borrowed(data) => data, |
48 | Cow::Owned(data) => self.alloc_data(data), |
49 | } |
50 | } |
51 | |
52 | /// Returns a reference to `relocation` with lifetime `'session`. |
53 | fn alloc_relocation<'session>(&'session self, data: Relocations) -> &'session Relocations; |
54 | |
55 | /// Returns a reference to contents of file at `path` with lifetime `'session`. |
56 | fn read_input<'session>(&'session self, path: &Path) -> std::io::Result<&'session [u8]>; |
57 | } |
58 | |
59 | /// Should missing DWARF objects referenced by executables be skipped or result in an error? |
60 | /// |
61 | /// Referenced objects that are still missing when the DWARF package is finished will result in |
62 | /// an error. |
63 | #[derive (Copy, Clone, Debug, Eq, Hash, PartialEq)] |
64 | pub enum MissingReferencedObjectBehaviour { |
65 | /// Skip missing referenced DWARF objects - useful if this is expected, i.e. the path in the |
66 | /// executable is wrong, but the referenced object will be found because it is an input. |
67 | Skip, |
68 | /// Error when encountering missing referenced DWARF objects. |
69 | Error, |
70 | } |
71 | |
72 | impl MissingReferencedObjectBehaviour { |
73 | /// Should missing referenced objects be skipped? |
74 | pub fn skip_missing(&self) -> bool { |
75 | match *self { |
76 | MissingReferencedObjectBehaviour::Skip => true, |
77 | MissingReferencedObjectBehaviour::Error => false, |
78 | } |
79 | } |
80 | } |
81 | |
82 | /// Builder for DWARF packages, add input objects/packages with `add_input_object` or input objects |
83 | /// referenced by an executable with `add_executable` before accessing the completed object with |
84 | /// `finish`. |
85 | pub struct DwarfPackage<'output, 'session: 'output, Sess: Session<RelocationMap>> { |
86 | sess: &'session Sess, |
87 | maybe_in_progress: Option<InProgressDwarfPackage<'output>>, |
88 | targets: HashSet<DwarfObject>, |
89 | } |
90 | |
91 | impl<'output, 'session: 'output, Sess> fmt::Debug for DwarfPackage<'output, 'session, Sess> |
92 | where |
93 | Sess: Session<RelocationMap>, |
94 | { |
95 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
96 | f&mut DebugStruct<'_, '_>.debug_struct("DwarfPackage" ) |
97 | .field("in_progress" , &self.maybe_in_progress) |
98 | .field(name:"target_count" , &self.targets.len()) |
99 | .finish() |
100 | } |
101 | } |
102 | |
103 | impl<'output, 'session: 'output, Sess> DwarfPackage<'output, 'session, Sess> |
104 | where |
105 | Sess: Session<RelocationMap>, |
106 | { |
107 | /// Create a new `DwarfPackage` with the provided `Session` implementation. |
108 | pub fn new(sess: &'session Sess) -> Self { |
109 | Self { sess, maybe_in_progress: None, targets: HashSet::new() } |
110 | } |
111 | |
112 | /// Add an input object to the in-progress package. |
113 | #[tracing::instrument (level = "trace" , skip(obj))] |
114 | fn process_input_object<'input>(&mut self, obj: &'input object::File<'input>) -> Result<()> { |
115 | if self.maybe_in_progress.is_none() { |
116 | self.maybe_in_progress = |
117 | Some(InProgressDwarfPackage::new(obj.architecture(), obj.endianness())); |
118 | } |
119 | |
120 | let encoding = if let Some(section) = obj.section_by_name(".debug_info.dwo" ) { |
121 | let data = section.compressed_data()?.decompress()?; |
122 | let data_ref = self.sess.alloc_owned_cow(data); |
123 | let debug_info = gimli::DebugInfo::new(data_ref, obj.endianness().as_runtime_endian()); |
124 | debug_info |
125 | .units() |
126 | .next() |
127 | .map_err(Error::ParseUnitHeader)? |
128 | .map(|root_header| root_header.encoding()) |
129 | .ok_or(Error::NoCompilationUnits)? |
130 | } else { |
131 | debug!("no `.debug_info.dwo` in input dwarf object" ); |
132 | return Ok(()); |
133 | }; |
134 | |
135 | let sess = self.sess; |
136 | self.maybe_in_progress |
137 | .as_mut() |
138 | .expect("`process_input_object` is broken" ) |
139 | .add_input_object(sess, obj, encoding) |
140 | } |
141 | |
142 | /// Add input objects referenced by executable to the DWARF package. |
143 | #[tracing::instrument (level = "trace" )] |
144 | pub fn add_executable( |
145 | &mut self, |
146 | path: &Path, |
147 | missing_behaviour: MissingReferencedObjectBehaviour, |
148 | ) -> Result<()> { |
149 | let data = self.sess.read_input(path).map_err(Error::ReadInput)?; |
150 | let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?; |
151 | |
152 | let mut load_section = |id: gimli::SectionId| -> Result<_> { |
153 | let mut relocations = RelocationMap::default(); |
154 | let data = match obj.section_by_name(&id.name()) { |
155 | Some(ref section) => { |
156 | add_relocations(&mut relocations, &obj, section)?; |
157 | section.compressed_data()?.decompress()? |
158 | } |
159 | // Use a non-zero capacity so that `ReaderOffsetId`s are unique. |
160 | None => Cow::Owned(Vec::with_capacity(1)), |
161 | }; |
162 | |
163 | let data_ref = self.sess.alloc_owned_cow(data); |
164 | let reader = EndianSlice::new(data_ref, obj.endianness().as_runtime_endian()); |
165 | let section = reader; |
166 | let relocations = self.sess.alloc_relocation(relocations); |
167 | Ok(Relocate { relocations, section, reader }) |
168 | }; |
169 | |
170 | let dwarf = gimli::Dwarf::load(&mut load_section)?; |
171 | |
172 | let mut iter = dwarf.units(); |
173 | while let Some(header) = iter.next().map_err(Error::ParseUnitHeader)? { |
174 | let unit = dwarf.unit(header).map_err(Error::ParseUnit)?; |
175 | |
176 | let target = match dwo_identifier_of_unit(&dwarf.debug_abbrev, &unit.header)? { |
177 | Some(target) => target, |
178 | None => { |
179 | debug!("no target" ); |
180 | continue; |
181 | } |
182 | }; |
183 | |
184 | let dwo_name = { |
185 | let mut cursor = unit.header.entries(&unit.abbreviations); |
186 | cursor.next_dfs()?; |
187 | let root = cursor.current().expect("unit w/out root debugging information entry" ); |
188 | |
189 | let dwo_name = if let Some(val) = root.attr_value(gimli::DW_AT_dwo_name)? { |
190 | // DWARF 5 |
191 | val |
192 | } else if let Some(val) = root.attr_value(gimli::DW_AT_GNU_dwo_name)? { |
193 | // GNU Extension |
194 | val |
195 | } else { |
196 | return Err(Error::MissingDwoName(target.index())); |
197 | }; |
198 | |
199 | dwarf.attr_string(&unit, dwo_name)?.to_string()?.into_owned() |
200 | }; |
201 | |
202 | // Prepend the compilation directory if it exists. |
203 | let mut path = if let Some(comp_dir) = &unit.comp_dir { |
204 | PathBuf::from(comp_dir.to_string()?.into_owned()) |
205 | } else { |
206 | PathBuf::new() |
207 | }; |
208 | path.push(dwo_name); |
209 | |
210 | // Only add `DwoId`s to the targets, not `DebugTypeSignature`s. There doesn't |
211 | // appear to be a "skeleton type unit" to find the corresponding unit of (there are |
212 | // normal type units in an executable, but should we expect to find a corresponding |
213 | // split type unit for those?). |
214 | if matches!(target, DwarfObject::Compilation(_)) { |
215 | // Input objects are processed first, if a DWARF object referenced by this |
216 | // executable was already found then don't add it to the target and try to add it |
217 | // again. |
218 | if let Some(package) = &self.maybe_in_progress { |
219 | if package.contained_units().contains(&target) { |
220 | continue; |
221 | } |
222 | } |
223 | |
224 | debug!(?target, "adding target" ); |
225 | self.targets.insert(target); |
226 | } |
227 | |
228 | match self.add_input_object(&path) { |
229 | Ok(()) => (), |
230 | Err(Error::ReadInput(..)) if missing_behaviour.skip_missing() => (), |
231 | Err(e) => return Err(e), |
232 | } |
233 | } |
234 | |
235 | Ok(()) |
236 | } |
237 | |
238 | /// Add an input object to the DWARF package. |
239 | /// |
240 | /// Input object must be an archive or an elf object. |
241 | #[tracing::instrument (level = "trace" )] |
242 | pub fn add_input_object(&mut self, path: &Path) -> Result<()> { |
243 | let data = self.sess.read_input(&path).map_err(Error::ReadInput)?; |
244 | |
245 | let kind = FileKind::parse(data).map_err(Error::ParseFileKind)?; |
246 | trace!(?kind); |
247 | match kind { |
248 | FileKind::Archive => { |
249 | let archive = object::read::archive::ArchiveFile::parse(data) |
250 | .map_err(Error::ParseArchiveFile)?; |
251 | |
252 | for member in archive.members() { |
253 | let member = member.map_err(Error::ParseArchiveMember)?; |
254 | let data = member.data(data)?; |
255 | |
256 | let kind = if let Ok(kind) = FileKind::parse(data) { |
257 | kind |
258 | } else { |
259 | trace!("skipping non-elf archive member" ); |
260 | continue; |
261 | }; |
262 | |
263 | trace!(?kind, "archive member" ); |
264 | match kind { |
265 | FileKind::Elf32 | FileKind::Elf64 => { |
266 | let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?; |
267 | self.process_input_object(&obj)?; |
268 | } |
269 | _ => { |
270 | trace!("skipping non-elf archive member" ); |
271 | } |
272 | } |
273 | } |
274 | |
275 | Ok(()) |
276 | } |
277 | FileKind::Elf32 | FileKind::Elf64 => { |
278 | let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?; |
279 | self.process_input_object(&obj) |
280 | } |
281 | _ => Err(Error::InvalidInputKind), |
282 | } |
283 | } |
284 | |
285 | /// Returns the `object::write::Object` containing the created DWARF package. |
286 | /// |
287 | /// Returns an `Error::MissingReferencedUnit` if DWARF objects referenced by executables were |
288 | /// not subsequently found. |
289 | /// Returns an `Error::NoOutputObjectCreated` if no input objects or executables were provided. |
290 | #[tracing::instrument (level = "trace" )] |
291 | pub fn finish(self) -> Result<WritableObject<'output>> { |
292 | match self.maybe_in_progress { |
293 | Some(package) => { |
294 | if let Some(missing) = self.targets.difference(package.contained_units()).next() { |
295 | return Err(Error::MissingReferencedUnit(missing.index())); |
296 | } |
297 | |
298 | package.finish() |
299 | } |
300 | None if !self.targets.is_empty() => { |
301 | let first_missing_unit = self |
302 | .targets |
303 | .iter() |
304 | .next() |
305 | .copied() |
306 | .expect("non-empty map doesn't have first element" ); |
307 | Err(Error::MissingReferencedUnit(first_missing_unit.index())) |
308 | } |
309 | None => Err(Error::NoOutputObjectCreated), |
310 | } |
311 | } |
312 | } |
313 | |