1 | use alloc::borrow::Cow; |
2 | use alloc::vec::Vec; |
3 | |
4 | use crate::read::{ |
5 | self, Architecture, CodeView, ComdatKind, CompressedData, CompressedFileRange, Export, |
6 | FileFlags, Import, ObjectKind, ObjectMap, Relocation, Result, SectionFlags, SectionIndex, |
7 | SectionKind, SegmentFlags, SubArchitecture, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, |
8 | SymbolMapName, SymbolScope, SymbolSection, |
9 | }; |
10 | use crate::Endianness; |
11 | |
12 | /// An object file. |
13 | /// |
14 | /// This is the primary trait for the unified read API. |
15 | pub trait Object<'data: 'file, 'file>: read::private::Sealed { |
16 | /// A loadable segment in the object file. |
17 | type Segment: ObjectSegment<'data>; |
18 | |
19 | /// An iterator for the loadable segments in the object file. |
20 | type SegmentIterator: Iterator<Item = Self::Segment>; |
21 | |
22 | /// A section in the object file. |
23 | type Section: ObjectSection<'data>; |
24 | |
25 | /// An iterator for the sections in the object file. |
26 | type SectionIterator: Iterator<Item = Self::Section>; |
27 | |
28 | /// A COMDAT section group in the object file. |
29 | type Comdat: ObjectComdat<'data>; |
30 | |
31 | /// An iterator for the COMDAT section groups in the object file. |
32 | type ComdatIterator: Iterator<Item = Self::Comdat>; |
33 | |
34 | /// A symbol in the object file. |
35 | type Symbol: ObjectSymbol<'data>; |
36 | |
37 | /// An iterator for symbols in the object file. |
38 | type SymbolIterator: Iterator<Item = Self::Symbol>; |
39 | |
40 | /// A symbol table in the object file. |
41 | type SymbolTable: ObjectSymbolTable< |
42 | 'data, |
43 | Symbol = Self::Symbol, |
44 | SymbolIterator = Self::SymbolIterator, |
45 | >; |
46 | |
47 | /// An iterator for the dynamic relocations in the file. |
48 | /// |
49 | /// The first field in the item tuple is the address |
50 | /// that the relocation applies to. |
51 | type DynamicRelocationIterator: Iterator<Item = (u64, Relocation)>; |
52 | |
53 | /// Get the architecture type of the file. |
54 | fn architecture(&self) -> Architecture; |
55 | |
56 | /// Get the sub-architecture type of the file if known. |
57 | /// |
58 | /// A value of `None` has a range of meanings: the file supports all |
59 | /// sub-architectures, the file does not explicitly specify a |
60 | /// sub-architecture, or the sub-architecture is currently unrecognized. |
61 | fn sub_architecture(&self) -> Option<SubArchitecture> { |
62 | None |
63 | } |
64 | |
65 | /// Get the endianness of the file. |
66 | #[inline ] |
67 | fn endianness(&self) -> Endianness { |
68 | if self.is_little_endian() { |
69 | Endianness::Little |
70 | } else { |
71 | Endianness::Big |
72 | } |
73 | } |
74 | |
75 | /// Return true if the file is little endian, false if it is big endian. |
76 | fn is_little_endian(&self) -> bool; |
77 | |
78 | /// Return true if the file can contain 64-bit addresses. |
79 | fn is_64(&self) -> bool; |
80 | |
81 | /// Return the kind of this object. |
82 | fn kind(&self) -> ObjectKind; |
83 | |
84 | /// Get an iterator for the loadable segments in the file. |
85 | /// |
86 | /// For ELF, this is program headers with type [`PT_LOAD`](crate::elf::PT_LOAD). |
87 | /// For Mach-O, this is load commands with type [`LC_SEGMENT`](crate::macho::LC_SEGMENT) |
88 | /// or [`LC_SEGMENT_64`](crate::macho::LC_SEGMENT_64). |
89 | /// For PE, this is all sections. |
90 | fn segments(&'file self) -> Self::SegmentIterator; |
91 | |
92 | /// Get the section named `section_name`, if such a section exists. |
93 | /// |
94 | /// If `section_name` starts with a '.' then it is treated as a system section name, |
95 | /// and is compared using the conventions specific to the object file format. This |
96 | /// includes: |
97 | /// - if ".debug_str_offsets" is requested for a Mach-O object file, then the actual |
98 | /// section name that is searched for is "__debug_str_offs". |
99 | /// - if ".debug_info" is requested for an ELF object file, then |
100 | /// ".zdebug_info" may be returned (and similarly for other debug sections). |
101 | /// |
102 | /// For some object files, multiple segments may contain sections with the same |
103 | /// name. In this case, the first matching section will be used. |
104 | /// |
105 | /// This method skips over sections with invalid names. |
106 | fn section_by_name(&'file self, section_name: &str) -> Option<Self::Section> { |
107 | self.section_by_name_bytes(section_name.as_bytes()) |
108 | } |
109 | |
110 | /// Like [`Self::section_by_name`], but allows names that are not UTF-8. |
111 | fn section_by_name_bytes(&'file self, section_name: &[u8]) -> Option<Self::Section>; |
112 | |
113 | /// Get the section at the given index. |
114 | /// |
115 | /// The meaning of the index depends on the object file. |
116 | /// |
117 | /// For some object files, this requires iterating through all sections. |
118 | /// |
119 | /// Returns an error if the index is invalid. |
120 | fn section_by_index(&'file self, index: SectionIndex) -> Result<Self::Section>; |
121 | |
122 | /// Get an iterator for the sections in the file. |
123 | fn sections(&'file self) -> Self::SectionIterator; |
124 | |
125 | /// Get an iterator for the COMDAT section groups in the file. |
126 | fn comdats(&'file self) -> Self::ComdatIterator; |
127 | |
128 | /// Get the debugging symbol table, if any. |
129 | fn symbol_table(&'file self) -> Option<Self::SymbolTable>; |
130 | |
131 | /// Get the debugging symbol at the given index. |
132 | /// |
133 | /// The meaning of the index depends on the object file. |
134 | /// |
135 | /// Returns an error if the index is invalid. |
136 | fn symbol_by_index(&'file self, index: SymbolIndex) -> Result<Self::Symbol>; |
137 | |
138 | /// Get an iterator for the debugging symbols in the file. |
139 | /// |
140 | /// This may skip over symbols that are malformed or unsupported. |
141 | /// |
142 | /// For Mach-O files, this does not include STAB entries. |
143 | fn symbols(&'file self) -> Self::SymbolIterator; |
144 | |
145 | /// Get the symbol named `symbol_name`, if the symbol exists. |
146 | fn symbol_by_name(&'file self, symbol_name: &str) -> Option<Self::Symbol> { |
147 | self.symbol_by_name_bytes(symbol_name.as_bytes()) |
148 | } |
149 | |
150 | /// Like [`Self::symbol_by_name`], but allows names that are not UTF-8. |
151 | fn symbol_by_name_bytes(&'file self, symbol_name: &[u8]) -> Option<Self::Symbol> { |
152 | self.symbols() |
153 | .find(|sym| sym.name_bytes() == Ok(symbol_name)) |
154 | } |
155 | |
156 | /// Get the dynamic linking symbol table, if any. |
157 | /// |
158 | /// Only ELF has a separate dynamic linking symbol table. |
159 | /// Consider using [`Self::exports`] or [`Self::imports`] instead. |
160 | fn dynamic_symbol_table(&'file self) -> Option<Self::SymbolTable>; |
161 | |
162 | /// Get an iterator for the dynamic linking symbols in the file. |
163 | /// |
164 | /// This may skip over symbols that are malformed or unsupported. |
165 | /// |
166 | /// Only ELF has dynamic linking symbols. |
167 | /// Other file formats will return an empty iterator. |
168 | /// Consider using [`Self::exports`] or [`Self::imports`] instead. |
169 | fn dynamic_symbols(&'file self) -> Self::SymbolIterator; |
170 | |
171 | /// Get the dynamic relocations for this file. |
172 | /// |
173 | /// Symbol indices in these relocations refer to the dynamic symbol table. |
174 | /// |
175 | /// Only ELF has dynamic relocations. |
176 | fn dynamic_relocations(&'file self) -> Option<Self::DynamicRelocationIterator>; |
177 | |
178 | /// Construct a map from addresses to symbol names. |
179 | /// |
180 | /// The map will only contain defined text and data symbols. |
181 | /// The dynamic symbol table will only be used if there are no debugging symbols. |
182 | fn symbol_map(&'file self) -> SymbolMap<SymbolMapName<'data>> { |
183 | let mut symbols = Vec::new(); |
184 | if let Some(table) = self.symbol_table().or_else(|| self.dynamic_symbol_table()) { |
185 | // Sometimes symbols share addresses. Collect them all then choose the "best". |
186 | let mut all_symbols = Vec::new(); |
187 | for symbol in table.symbols() { |
188 | // Must have an address. |
189 | if !symbol.is_definition() { |
190 | continue; |
191 | } |
192 | // Must have a name. |
193 | let name = match symbol.name() { |
194 | Ok(name) => name, |
195 | _ => continue, |
196 | }; |
197 | if name.is_empty() { |
198 | continue; |
199 | } |
200 | |
201 | // Lower is better. |
202 | let mut priority = 0u32; |
203 | |
204 | // Prefer known kind. |
205 | match symbol.kind() { |
206 | SymbolKind::Text | SymbolKind::Data => {} |
207 | SymbolKind::Unknown => priority += 1, |
208 | _ => continue, |
209 | } |
210 | priority *= 2; |
211 | |
212 | // Prefer global visibility. |
213 | priority += match symbol.scope() { |
214 | SymbolScope::Unknown => 3, |
215 | SymbolScope::Compilation => 2, |
216 | SymbolScope::Linkage => 1, |
217 | SymbolScope::Dynamic => 0, |
218 | }; |
219 | priority *= 4; |
220 | |
221 | // Prefer later entries (earlier symbol is likely to be less specific). |
222 | let index = !0 - symbol.index().0; |
223 | |
224 | // Tuple is ordered for sort. |
225 | all_symbols.push((symbol.address(), priority, index, name)); |
226 | } |
227 | // Unstable sort is okay because tuple includes index. |
228 | all_symbols.sort_unstable(); |
229 | |
230 | let mut previous_address = !0; |
231 | for (address, _priority, _index, name) in all_symbols { |
232 | if address != previous_address { |
233 | symbols.push(SymbolMapName::new(address, name)); |
234 | previous_address = address; |
235 | } |
236 | } |
237 | } |
238 | SymbolMap::new(symbols) |
239 | } |
240 | |
241 | /// Construct a map from addresses to symbol names and object file names. |
242 | /// |
243 | /// This is derived from Mach-O STAB entries. |
244 | fn object_map(&'file self) -> ObjectMap<'data> { |
245 | ObjectMap::default() |
246 | } |
247 | |
248 | /// Get the imported symbols. |
249 | fn imports(&self) -> Result<Vec<Import<'data>>>; |
250 | |
251 | /// Get the exported symbols that expose both a name and an address. |
252 | /// |
253 | /// Some file formats may provide other kinds of symbols that can be retrieved using |
254 | /// the low level API. |
255 | fn exports(&self) -> Result<Vec<Export<'data>>>; |
256 | |
257 | /// Return true if the file contains DWARF debug information sections, false if not. |
258 | fn has_debug_symbols(&self) -> bool; |
259 | |
260 | /// The UUID from a Mach-O [`LC_UUID`](crate::macho::LC_UUID) load command. |
261 | #[inline ] |
262 | fn mach_uuid(&self) -> Result<Option<[u8; 16]>> { |
263 | Ok(None) |
264 | } |
265 | |
266 | /// The build ID from an ELF [`NT_GNU_BUILD_ID`](crate::elf::NT_GNU_BUILD_ID) note. |
267 | #[inline ] |
268 | fn build_id(&self) -> Result<Option<&'data [u8]>> { |
269 | Ok(None) |
270 | } |
271 | |
272 | /// The filename and CRC from a `.gnu_debuglink` section. |
273 | #[inline ] |
274 | fn gnu_debuglink(&self) -> Result<Option<(&'data [u8], u32)>> { |
275 | Ok(None) |
276 | } |
277 | |
278 | /// The filename and build ID from a `.gnu_debugaltlink` section. |
279 | #[inline ] |
280 | fn gnu_debugaltlink(&self) -> Result<Option<(&'data [u8], &'data [u8])>> { |
281 | Ok(None) |
282 | } |
283 | |
284 | /// The filename and GUID from the PE CodeView section. |
285 | #[inline ] |
286 | fn pdb_info(&self) -> Result<Option<CodeView<'_>>> { |
287 | Ok(None) |
288 | } |
289 | |
290 | /// Get the base address used for relative virtual addresses. |
291 | /// |
292 | /// Currently this is only non-zero for PE. |
293 | fn relative_address_base(&'file self) -> u64; |
294 | |
295 | /// Get the virtual address of the entry point of the binary. |
296 | fn entry(&'file self) -> u64; |
297 | |
298 | /// File flags that are specific to each file format. |
299 | fn flags(&self) -> FileFlags; |
300 | } |
301 | |
302 | /// A loadable segment in an [`Object`]. |
303 | /// |
304 | /// This trait is part of the unified read API. |
305 | pub trait ObjectSegment<'data>: read::private::Sealed { |
306 | /// Returns the virtual address of the segment. |
307 | fn address(&self) -> u64; |
308 | |
309 | /// Returns the size of the segment in memory. |
310 | fn size(&self) -> u64; |
311 | |
312 | /// Returns the alignment of the segment in memory. |
313 | fn align(&self) -> u64; |
314 | |
315 | /// Returns the offset and size of the segment in the file. |
316 | fn file_range(&self) -> (u64, u64); |
317 | |
318 | /// Returns a reference to the file contents of the segment. |
319 | /// |
320 | /// The length of this data may be different from the size of the |
321 | /// segment in memory. |
322 | fn data(&self) -> Result<&'data [u8]>; |
323 | |
324 | /// Return the segment data in the given range. |
325 | /// |
326 | /// Returns `Ok(None)` if the segment does not contain the given range. |
327 | fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>>; |
328 | |
329 | /// Returns the name of the segment. |
330 | fn name_bytes(&self) -> Result<Option<&[u8]>>; |
331 | |
332 | /// Returns the name of the segment. |
333 | /// |
334 | /// Returns an error if the name is not UTF-8. |
335 | fn name(&self) -> Result<Option<&str>>; |
336 | |
337 | /// Return the flags of segment. |
338 | fn flags(&self) -> SegmentFlags; |
339 | } |
340 | |
341 | /// A section in an [`Object`]. |
342 | /// |
343 | /// This trait is part of the unified read API. |
344 | pub trait ObjectSection<'data>: read::private::Sealed { |
345 | /// An iterator for the relocations for a section. |
346 | /// |
347 | /// The first field in the item tuple is the section offset |
348 | /// that the relocation applies to. |
349 | type RelocationIterator: Iterator<Item = (u64, Relocation)>; |
350 | |
351 | /// Returns the section index. |
352 | fn index(&self) -> SectionIndex; |
353 | |
354 | /// Returns the address of the section. |
355 | fn address(&self) -> u64; |
356 | |
357 | /// Returns the size of the section in memory. |
358 | fn size(&self) -> u64; |
359 | |
360 | /// Returns the alignment of the section in memory. |
361 | fn align(&self) -> u64; |
362 | |
363 | /// Returns offset and size of on-disk segment (if any). |
364 | fn file_range(&self) -> Option<(u64, u64)>; |
365 | |
366 | /// Returns the raw contents of the section. |
367 | /// |
368 | /// The length of this data may be different from the size of the |
369 | /// section in memory. |
370 | /// |
371 | /// This does not do any decompression. |
372 | fn data(&self) -> Result<&'data [u8]>; |
373 | |
374 | /// Return the raw contents of the section data in the given range. |
375 | /// |
376 | /// This does not do any decompression. |
377 | /// |
378 | /// Returns `Ok(None)` if the section does not contain the given range. |
379 | fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>>; |
380 | |
381 | /// Returns the potentially compressed file range of the section, |
382 | /// along with information about the compression. |
383 | fn compressed_file_range(&self) -> Result<CompressedFileRange>; |
384 | |
385 | /// Returns the potentially compressed contents of the section, |
386 | /// along with information about the compression. |
387 | fn compressed_data(&self) -> Result<CompressedData<'data>>; |
388 | |
389 | /// Returns the uncompressed contents of the section. |
390 | /// |
391 | /// The length of this data may be different from the size of the |
392 | /// section in memory. |
393 | /// |
394 | /// If no compression is detected, then returns the data unchanged. |
395 | /// Returns `Err` if decompression fails. |
396 | fn uncompressed_data(&self) -> Result<Cow<'data, [u8]>> { |
397 | self.compressed_data()?.decompress() |
398 | } |
399 | |
400 | /// Returns the name of the section. |
401 | fn name_bytes(&self) -> Result<&[u8]>; |
402 | |
403 | /// Returns the name of the section. |
404 | /// |
405 | /// Returns an error if the name is not UTF-8. |
406 | fn name(&self) -> Result<&str>; |
407 | |
408 | /// Returns the name of the segment for this section. |
409 | fn segment_name_bytes(&self) -> Result<Option<&[u8]>>; |
410 | |
411 | /// Returns the name of the segment for this section. |
412 | /// |
413 | /// Returns an error if the name is not UTF-8. |
414 | fn segment_name(&self) -> Result<Option<&str>>; |
415 | |
416 | /// Return the kind of this section. |
417 | fn kind(&self) -> SectionKind; |
418 | |
419 | /// Get the relocations for this section. |
420 | fn relocations(&self) -> Self::RelocationIterator; |
421 | |
422 | /// Section flags that are specific to each file format. |
423 | fn flags(&self) -> SectionFlags; |
424 | } |
425 | |
426 | /// A COMDAT section group in an [`Object`]. |
427 | /// |
428 | /// This trait is part of the unified read API. |
429 | pub trait ObjectComdat<'data>: read::private::Sealed { |
430 | /// An iterator for the sections in the section group. |
431 | type SectionIterator: Iterator<Item = SectionIndex>; |
432 | |
433 | /// Returns the COMDAT selection kind. |
434 | fn kind(&self) -> ComdatKind; |
435 | |
436 | /// Returns the index of the symbol used for the name of COMDAT section group. |
437 | fn symbol(&self) -> SymbolIndex; |
438 | |
439 | /// Returns the name of the COMDAT section group. |
440 | fn name_bytes(&self) -> Result<&[u8]>; |
441 | |
442 | /// Returns the name of the COMDAT section group. |
443 | /// |
444 | /// Returns an error if the name is not UTF-8. |
445 | fn name(&self) -> Result<&str>; |
446 | |
447 | /// Get the sections in this section group. |
448 | fn sections(&self) -> Self::SectionIterator; |
449 | } |
450 | |
451 | /// A symbol table in an [`Object`]. |
452 | /// |
453 | /// This trait is part of the unified read API. |
454 | pub trait ObjectSymbolTable<'data>: read::private::Sealed { |
455 | /// A symbol table entry. |
456 | type Symbol: ObjectSymbol<'data>; |
457 | |
458 | /// An iterator for the symbols in a symbol table. |
459 | type SymbolIterator: Iterator<Item = Self::Symbol>; |
460 | |
461 | /// Get an iterator for the symbols in the table. |
462 | /// |
463 | /// This may skip over symbols that are malformed or unsupported. |
464 | fn symbols(&self) -> Self::SymbolIterator; |
465 | |
466 | /// Get the symbol at the given index. |
467 | /// |
468 | /// The meaning of the index depends on the object file. |
469 | /// |
470 | /// Returns an error if the index is invalid. |
471 | fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol>; |
472 | } |
473 | |
474 | /// A symbol table entry in an [`Object`]. |
475 | /// |
476 | /// This trait is part of the unified read API. |
477 | pub trait ObjectSymbol<'data>: read::private::Sealed { |
478 | /// The index of the symbol. |
479 | fn index(&self) -> SymbolIndex; |
480 | |
481 | /// The name of the symbol. |
482 | fn name_bytes(&self) -> Result<&'data [u8]>; |
483 | |
484 | /// The name of the symbol. |
485 | /// |
486 | /// Returns an error if the name is not UTF-8. |
487 | fn name(&self) -> Result<&'data str>; |
488 | |
489 | /// The address of the symbol. May be zero if the address is unknown. |
490 | fn address(&self) -> u64; |
491 | |
492 | /// The size of the symbol. May be zero if the size is unknown. |
493 | fn size(&self) -> u64; |
494 | |
495 | /// Return the kind of this symbol. |
496 | fn kind(&self) -> SymbolKind; |
497 | |
498 | /// Returns the section where the symbol is defined. |
499 | fn section(&self) -> SymbolSection; |
500 | |
501 | /// Returns the section index for the section containing this symbol. |
502 | /// |
503 | /// May return `None` if the symbol is not defined in a section. |
504 | fn section_index(&self) -> Option<SectionIndex> { |
505 | self.section().index() |
506 | } |
507 | |
508 | /// Return true if the symbol is undefined. |
509 | fn is_undefined(&self) -> bool; |
510 | |
511 | /// Return true if the symbol is a definition of a function or data object |
512 | /// that has a known address. |
513 | /// |
514 | /// This is primarily used to implement [`Object::symbol_map`]. |
515 | fn is_definition(&self) -> bool; |
516 | |
517 | /// Return true if the symbol is common data. |
518 | /// |
519 | /// Note: does not check for [`SymbolSection::Section`] with [`SectionKind::Common`]. |
520 | fn is_common(&self) -> bool; |
521 | |
522 | /// Return true if the symbol is weak. |
523 | fn is_weak(&self) -> bool; |
524 | |
525 | /// Returns the symbol scope. |
526 | fn scope(&self) -> SymbolScope; |
527 | |
528 | /// Return true if the symbol visible outside of the compilation unit. |
529 | /// |
530 | /// This treats [`SymbolScope::Unknown`] as global. |
531 | fn is_global(&self) -> bool; |
532 | |
533 | /// Return true if the symbol is only visible within the compilation unit. |
534 | fn is_local(&self) -> bool; |
535 | |
536 | /// Symbol flags that are specific to each file format. |
537 | fn flags(&self) -> SymbolFlags<SectionIndex, SymbolIndex>; |
538 | } |
539 | |
540 | /// An iterator for files that don't have dynamic relocations. |
541 | #[derive (Debug)] |
542 | pub struct NoDynamicRelocationIterator; |
543 | |
544 | impl Iterator for NoDynamicRelocationIterator { |
545 | type Item = (u64, Relocation); |
546 | |
547 | #[inline ] |
548 | fn next(&mut self) -> Option<Self::Item> { |
549 | None |
550 | } |
551 | } |
552 | |