1 | //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H |
10 | #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H |
11 | |
12 | #include "llvm/ADT/StringRef.h" |
13 | #include "llvm/BinaryFormat/XCOFF.h" |
14 | #include "llvm/MC/MCDisassembler/MCSymbolizer.h" |
15 | #include "llvm/Support/Error.h" |
16 | #include <cstdint> |
17 | #include <memory> |
18 | #include <vector> |
19 | |
20 | namespace llvm { |
21 | |
22 | struct XCOFFSymbolInfoTy { |
23 | std::optional<XCOFF::StorageMappingClass> StorageMappingClass; |
24 | std::optional<uint32_t> Index; |
25 | bool IsLabel = false; |
26 | bool operator<(const XCOFFSymbolInfoTy &SymInfo) const; |
27 | }; |
28 | |
29 | struct SymbolInfoTy { |
30 | uint64_t Addr; |
31 | StringRef Name; |
32 | // XCOFF uses XCOFFSymInfo. Other targets use Type. |
33 | XCOFFSymbolInfoTy XCOFFSymInfo; |
34 | uint8_t Type; |
35 | // Used by ELF to describe a mapping symbol that is usually not displayed. |
36 | bool IsMappingSymbol; |
37 | |
38 | private: |
39 | bool IsXCOFF; |
40 | bool HasType; |
41 | |
42 | public: |
43 | SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr, |
44 | StringRef Name, std::optional<uint32_t> Idx, bool Label) |
45 | : Addr(Addr), Name(Name), XCOFFSymInfo{.StorageMappingClass: Smc, .Index: Idx, .IsLabel: Label}, Type(0), |
46 | IsMappingSymbol(false), IsXCOFF(true), HasType(false) {} |
47 | SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, |
48 | bool IsMappingSymbol = false, bool IsXCOFF = false) |
49 | : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol), |
50 | IsXCOFF(IsXCOFF), HasType(true) {} |
51 | bool isXCOFF() const { return IsXCOFF; } |
52 | |
53 | private: |
54 | friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { |
55 | assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) && |
56 | "The value of IsXCOFF and HasType in P1 and P2 should be the same " |
57 | "respectively." ); |
58 | |
59 | if (P1.IsXCOFF && P1.HasType) |
60 | return std::tie(args: P1.Addr, args: P1.Type, args: P1.Name) < |
61 | std::tie(args: P2.Addr, args: P2.Type, args: P2.Name); |
62 | |
63 | if (P1.IsXCOFF) |
64 | return std::tie(args: P1.Addr, args: P1.XCOFFSymInfo, args: P1.Name) < |
65 | std::tie(args: P2.Addr, args: P2.XCOFFSymInfo, args: P2.Name); |
66 | |
67 | // With the same address, place mapping symbols first. |
68 | bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol; |
69 | return std::tie(args: P1.Addr, args&: MS1, args: P1.Name, args: P1.Type) < |
70 | std::tie(args: P2.Addr, args&: MS2, args: P2.Name, args: P2.Type); |
71 | } |
72 | }; |
73 | |
74 | using SectionSymbolsTy = std::vector<SymbolInfoTy>; |
75 | |
76 | template <typename T> class ArrayRef; |
77 | class MCContext; |
78 | class MCInst; |
79 | class MCSubtargetInfo; |
80 | class raw_ostream; |
81 | |
82 | /// Superclass for all disassemblers. Consumes a memory region and provides an |
83 | /// array of assembly instructions. |
84 | class MCDisassembler { |
85 | public: |
86 | /// Ternary decode status. Most backends will just use Fail and |
87 | /// Success, however some have a concept of an instruction with |
88 | /// understandable semantics but which is architecturally |
89 | /// incorrect. An example of this is ARM UNPREDICTABLE instructions |
90 | /// which are disassemblable but cause undefined behaviour. |
91 | /// |
92 | /// Because it makes sense to disassemble these instructions, there |
93 | /// is a "soft fail" failure mode that indicates the MCInst& is |
94 | /// valid but architecturally incorrect. |
95 | /// |
96 | /// The enum numbers are deliberately chosen such that reduction |
97 | /// from Success->SoftFail ->Fail can be done with a simple |
98 | /// bitwise-AND: |
99 | /// |
100 | /// LEFT & TOP = | Success Unpredictable Fail |
101 | /// --------------+----------------------------------- |
102 | /// Success | Success Unpredictable Fail |
103 | /// Unpredictable | Unpredictable Unpredictable Fail |
104 | /// Fail | Fail Fail Fail |
105 | /// |
106 | /// An easy way of encoding this is as 0b11, 0b01, 0b00 for |
107 | /// Success, SoftFail, Fail respectively. |
108 | enum DecodeStatus { |
109 | Fail = 0, |
110 | SoftFail = 1, |
111 | Success = 3 |
112 | }; |
113 | |
114 | MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) |
115 | : Ctx(Ctx), STI(STI) {} |
116 | |
117 | virtual ~MCDisassembler(); |
118 | |
119 | /// Returns the disassembly of a single instruction. |
120 | /// |
121 | /// \param Instr - An MCInst to populate with the contents of the |
122 | /// instruction. |
123 | /// \param Size - A value to populate with the size of the instruction, or |
124 | /// the number of bytes consumed while attempting to decode |
125 | /// an invalid instruction. |
126 | /// \param Address - The address, in the memory space of region, of the first |
127 | /// byte of the instruction. |
128 | /// \param Bytes - A reference to the actual bytes of the instruction. |
129 | /// \param CStream - The stream to print comments and annotations on. |
130 | /// \return - MCDisassembler::Success if the instruction is valid, |
131 | /// MCDisassembler::SoftFail if the instruction was |
132 | /// disassemblable but invalid, |
133 | /// MCDisassembler::Fail if the instruction was invalid. |
134 | virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, |
135 | ArrayRef<uint8_t> Bytes, uint64_t Address, |
136 | raw_ostream &CStream) const = 0; |
137 | |
138 | /// Used to perform separate target specific disassembly for a particular |
139 | /// symbol. May parse any prelude that precedes instructions after the |
140 | /// start of a symbol, or the entire symbol. |
141 | /// This is used for example by WebAssembly to decode preludes. |
142 | /// |
143 | /// Base implementation returns false. So all targets by default decline to |
144 | /// treat symbols separately. |
145 | /// |
146 | /// \param Symbol - The symbol. |
147 | /// \param Size - The number of bytes consumed. |
148 | /// \param Address - The address, in the memory space of region, of the first |
149 | /// byte of the symbol. |
150 | /// \param Bytes - A reference to the actual bytes at the symbol location. |
151 | /// \return - True if this symbol triggered some target specific |
152 | /// disassembly for this symbol. Size must be set with the |
153 | /// number of bytes consumed. |
154 | /// - Error if this symbol triggered some target specific |
155 | /// disassembly for this symbol, but an error was found with |
156 | /// it. Size must be set with the number of bytes consumed. |
157 | /// - False if the target doesn't want to handle the symbol |
158 | /// separately. The value of Size is ignored in this case, |
159 | /// and Err must not be set. |
160 | virtual Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, |
161 | ArrayRef<uint8_t> Bytes, |
162 | uint64_t Address) const; |
163 | // TODO: |
164 | // Implement similar hooks that can be used at other points during |
165 | // disassembly. Something along the following lines: |
166 | // - onBeforeInstructionDecode() |
167 | // - onAfterInstructionDecode() |
168 | // - onSymbolEnd() |
169 | // It should help move much of the target specific code from llvm-objdump to |
170 | // respective target disassemblers. |
171 | |
172 | /// Suggest a distance to skip in a buffer of data to find the next |
173 | /// place to look for the start of an instruction. For example, if |
174 | /// all instructions have a fixed alignment, this might advance to |
175 | /// the next multiple of that alignment. |
176 | /// |
177 | /// If not overridden, the default is 1. |
178 | /// |
179 | /// \param Address - The address, in the memory space of region, of the |
180 | /// starting point (typically the first byte of something |
181 | /// that did not decode as a valid instruction at all). |
182 | /// \param Bytes - A reference to the actual bytes at Address. May be |
183 | /// needed in order to determine the width of an |
184 | /// unrecognized instruction (e.g. in Thumb this is a simple |
185 | /// consistent criterion that doesn't require knowing the |
186 | /// specific instruction). The caller can pass as much data |
187 | /// as they have available, and the function is required to |
188 | /// make a reasonable default choice if not enough data is |
189 | /// available to make a better one. |
190 | /// \return - A number of bytes to skip. Must always be greater than |
191 | /// zero. May be greater than the size of Bytes. |
192 | virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes, |
193 | uint64_t Address) const; |
194 | |
195 | private: |
196 | MCContext &Ctx; |
197 | |
198 | protected: |
199 | // Subtarget information, for instruction decoding predicates if required. |
200 | const MCSubtargetInfo &STI; |
201 | std::unique_ptr<MCSymbolizer> Symbolizer; |
202 | |
203 | public: |
204 | // Helpers around MCSymbolizer |
205 | bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, |
206 | bool IsBranch, uint64_t Offset, uint64_t OpSize, |
207 | uint64_t InstSize) const; |
208 | |
209 | void (int64_t Value, uint64_t Address) const; |
210 | |
211 | /// Set \p Symzer as the current symbolizer. |
212 | /// This takes ownership of \p Symzer, and deletes the previously set one. |
213 | void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); |
214 | |
215 | MCContext& getContext() const { return Ctx; } |
216 | |
217 | const MCSubtargetInfo& getSubtargetInfo() const { return STI; } |
218 | |
219 | /// ELF-specific, set the ABI version from the object header. |
220 | virtual void setABIVersion(unsigned Version) {} |
221 | |
222 | // Marked mutable because we cache it inside the disassembler, rather than |
223 | // having to pass it around as an argument through all the autogenerated code. |
224 | mutable raw_ostream * = nullptr; |
225 | }; |
226 | |
227 | } // end namespace llvm |
228 | |
229 | #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H |
230 | |