1 | //===- EhFrame.h ------------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_MACHO_EH_FRAME_H |
10 | #define LLD_MACHO_EH_FRAME_H |
11 | |
12 | #include "InputSection.h" |
13 | #include "Relocations.h" |
14 | |
15 | #include "lld/Common/LLVM.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/ADT/PointerUnion.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | |
20 | /* |
21 | * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it |
22 | * is closely coupled with other file parsing logic; EhFrame.h just contains a |
23 | * few helpers. |
24 | */ |
25 | |
26 | /* |
27 | * === The EH frame format === |
28 | * |
29 | * EH frames can either be Common Information Entries (CIEs) or Frame |
30 | * Description Entries (FDEs). CIEs contain information that is common amongst |
31 | * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame |
32 | * entries together form a forest of two-level trees, with CIEs as the roots |
33 | * and FDEs as the leaves. Note that a CIE must precede the FDEs which point |
34 | * to it. |
35 | * |
36 | * A CIE comprises the following fields in order: |
37 | * 1. Length of the entry (4 or 12 bytes) |
38 | * 2. CIE offset (4 bytes; always 0 for CIEs) |
39 | * 3. CIE version (byte) |
40 | * 4. Null-terminated augmentation string |
41 | * 5-8. LEB128 values that we don't care about |
42 | * 9. Augmentation data, to be interpreted using the aug string |
43 | * 10. DWARF instructions (ignored by LLD) |
44 | * |
45 | * An FDE comprises of the following: |
46 | * 1. Length of the entry (4 or 12 bytes) |
47 | * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE) |
48 | * 3. Function address (pointer-sized pcrel offset) |
49 | * 4. (std::optional) Augmentation data length |
50 | * 5. (std::optional) LSDA address (pointer-sized pcrel offset) |
51 | * 6. DWARF instructions (ignored by LLD) |
52 | */ |
53 | namespace lld::macho { |
54 | |
55 | class EhReader { |
56 | public: |
57 | EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff) |
58 | : file(file), data(data), dataOff(dataOff) {} |
59 | size_t size() const { return data.size(); } |
60 | // Read and validate the length field. |
61 | uint64_t readLength(size_t *off) const; |
62 | // Skip the length field without doing validation. |
63 | void skipValidLength(size_t *off) const; |
64 | uint8_t readByte(size_t *off) const; |
65 | uint32_t readU32(size_t *off) const; |
66 | uint64_t readPointer(size_t *off, uint8_t size) const; |
67 | StringRef readString(size_t *off) const; |
68 | void skipLeb128(size_t *off) const; |
69 | void failOn(size_t errOff, const Twine &msg) const; |
70 | |
71 | private: |
72 | const ObjFile *file; |
73 | ArrayRef<uint8_t> data; |
74 | // The offset of the data array within its section. Used only for error |
75 | // reporting. |
76 | const size_t dataOff; |
77 | }; |
78 | |
79 | // The EH frame format, when emitted by llvm-mc, consists of a number of |
80 | // "abs-ified" relocations, i.e. relocations that are implicitly encoded as |
81 | // pcrel offsets in the section data. The offsets refer to the locations of |
82 | // symbols in the input object file. When we ingest these EH frames, we convert |
83 | // these implicit relocations into explicit Relocs. |
84 | // |
85 | // These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4. |
86 | // However, we need this operation to be cross-platform, and ARM does not have a |
87 | // similar relocation that is applicable. We therefore use the more verbose (but |
88 | // more generic) subtractor relocation to encode these pcrel values. ld64 |
89 | // appears to do something similar -- its `-r` output contains these explicit |
90 | // subtractor relocations. |
91 | class EhRelocator { |
92 | public: |
93 | EhRelocator(InputSection *isec) : isec(isec) {} |
94 | |
95 | // For the next two methods, let `PC` denote `isec address + off`. |
96 | // Create relocs writing the value of target - PC to PC. |
97 | void makePcRel(uint64_t off, |
98 | llvm::PointerUnion<Symbol *, InputSection *> target, |
99 | uint8_t length); |
100 | // Create relocs writing the value of PC - target to PC. |
101 | void makeNegativePcRel(uint64_t off, |
102 | llvm::PointerUnion<Symbol *, InputSection *> target, |
103 | uint8_t length); |
104 | // Insert the new relocations into isec->relocs. |
105 | void commit(); |
106 | |
107 | private: |
108 | InputSection *isec; |
109 | // Insert new relocs here so that we don't invalidate iterators into the |
110 | // existing relocs vector. |
111 | SmallVector<Reloc, 6> newRelocs; |
112 | }; |
113 | |
114 | } // namespace lld::macho |
115 | |
116 | #endif |
117 | |