1 | //===-- DisassemblerLLVMC.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "DisassemblerLLVMC.h" |
10 | |
11 | #include "llvm-c/Disassembler.h" |
12 | #include "llvm/ADT/SmallString.h" |
13 | #include "llvm/ADT/StringExtras.h" |
14 | #include "llvm/MC/MCAsmInfo.h" |
15 | #include "llvm/MC/MCContext.h" |
16 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
17 | #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" |
18 | #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" |
19 | #include "llvm/MC/MCInst.h" |
20 | #include "llvm/MC/MCInstPrinter.h" |
21 | #include "llvm/MC/MCInstrAnalysis.h" |
22 | #include "llvm/MC/MCInstrInfo.h" |
23 | #include "llvm/MC/MCRegisterInfo.h" |
24 | #include "llvm/MC/MCSubtargetInfo.h" |
25 | #include "llvm/MC/MCTargetOptions.h" |
26 | #include "llvm/MC/TargetRegistry.h" |
27 | #include "llvm/Support/ErrorHandling.h" |
28 | #include "llvm/Support/ScopedPrinter.h" |
29 | #include "llvm/Support/TargetSelect.h" |
30 | #include "llvm/TargetParser/AArch64TargetParser.h" |
31 | |
32 | #include "lldb/Core/Address.h" |
33 | #include "lldb/Core/Module.h" |
34 | #include "lldb/Symbol/Function.h" |
35 | #include "lldb/Symbol/SymbolContext.h" |
36 | #include "lldb/Target/ExecutionContext.h" |
37 | #include "lldb/Target/Process.h" |
38 | #include "lldb/Target/RegisterContext.h" |
39 | #include "lldb/Target/SectionLoadList.h" |
40 | #include "lldb/Target/StackFrame.h" |
41 | #include "lldb/Target/Target.h" |
42 | #include "lldb/Utility/DataExtractor.h" |
43 | #include "lldb/Utility/LLDBLog.h" |
44 | #include "lldb/Utility/Log.h" |
45 | #include "lldb/Utility/RegularExpression.h" |
46 | #include "lldb/Utility/Stream.h" |
47 | #include <optional> |
48 | |
49 | using namespace lldb; |
50 | using namespace lldb_private; |
51 | |
52 | LLDB_PLUGIN_DEFINE(DisassemblerLLVMC) |
53 | |
54 | class DisassemblerLLVMC::MCDisasmInstance { |
55 | public: |
56 | static std::unique_ptr<MCDisasmInstance> |
57 | Create(const char *triple, const char *cpu, const char *features_str, |
58 | unsigned flavor, DisassemblerLLVMC &owner); |
59 | |
60 | ~MCDisasmInstance() = default; |
61 | |
62 | uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, |
63 | lldb::addr_t pc, llvm::MCInst &mc_inst) const; |
64 | void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc, |
65 | std::string &inst_string, std::string &); |
66 | void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); |
67 | void SetUseColor(bool use_color); |
68 | bool GetUseColor() const; |
69 | bool CanBranch(llvm::MCInst &mc_inst) const; |
70 | bool HasDelaySlot(llvm::MCInst &mc_inst) const; |
71 | bool IsCall(llvm::MCInst &mc_inst) const; |
72 | bool IsLoad(llvm::MCInst &mc_inst) const; |
73 | bool IsAuthenticated(llvm::MCInst &mc_inst) const; |
74 | |
75 | private: |
76 | MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, |
77 | std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, |
78 | std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, |
79 | std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, |
80 | std::unique_ptr<llvm::MCContext> &&context_up, |
81 | std::unique_ptr<llvm::MCDisassembler> &&disasm_up, |
82 | std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up, |
83 | std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up); |
84 | |
85 | std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; |
86 | std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; |
87 | std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; |
88 | std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; |
89 | std::unique_ptr<llvm::MCContext> m_context_up; |
90 | std::unique_ptr<llvm::MCDisassembler> m_disasm_up; |
91 | std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; |
92 | std::unique_ptr<llvm::MCInstrAnalysis> m_instr_analysis_up; |
93 | }; |
94 | |
95 | namespace x86 { |
96 | |
97 | /// These are the three values deciding instruction control flow kind. |
98 | /// InstructionLengthDecode function decodes an instruction and get this struct. |
99 | /// |
100 | /// primary_opcode |
101 | /// Primary opcode of the instruction. |
102 | /// For one-byte opcode instruction, it's the first byte after prefix. |
103 | /// For two- and three-byte opcodes, it's the second byte. |
104 | /// |
105 | /// opcode_len |
106 | /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. |
107 | /// |
108 | /// modrm |
109 | /// ModR/M byte of the instruction. |
110 | /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] |
111 | /// may contain a register or specify an addressing mode, depending on MOD. |
112 | struct InstructionOpcodeAndModrm { |
113 | uint8_t primary_opcode; |
114 | uint8_t opcode_len; |
115 | uint8_t modrm; |
116 | }; |
117 | |
118 | /// Determine the InstructionControlFlowKind based on opcode and modrm bytes. |
119 | /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and |
120 | /// instruction set. |
121 | /// |
122 | /// \param[in] opcode_and_modrm |
123 | /// Contains primary_opcode byte, its length, and ModR/M byte. |
124 | /// Refer to the struct InstructionOpcodeAndModrm for details. |
125 | /// |
126 | /// \return |
127 | /// The control flow kind of the instruction or |
128 | /// eInstructionControlFlowKindOther if the instruction doesn't affect |
129 | /// the control flow of the program. |
130 | lldb::InstructionControlFlowKind |
131 | MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { |
132 | uint8_t opcode = opcode_and_modrm.primary_opcode; |
133 | uint8_t opcode_len = opcode_and_modrm.opcode_len; |
134 | uint8_t modrm = opcode_and_modrm.modrm; |
135 | |
136 | if (opcode_len > 2) |
137 | return lldb::eInstructionControlFlowKindOther; |
138 | |
139 | if (opcode >= 0x70 && opcode <= 0x7F) { |
140 | if (opcode_len == 1) |
141 | return lldb::eInstructionControlFlowKindCondJump; |
142 | else |
143 | return lldb::eInstructionControlFlowKindOther; |
144 | } |
145 | |
146 | if (opcode >= 0x80 && opcode <= 0x8F) { |
147 | if (opcode_len == 2) |
148 | return lldb::eInstructionControlFlowKindCondJump; |
149 | else |
150 | return lldb::eInstructionControlFlowKindOther; |
151 | } |
152 | |
153 | switch (opcode) { |
154 | case 0x9A: |
155 | if (opcode_len == 1) |
156 | return lldb::eInstructionControlFlowKindFarCall; |
157 | break; |
158 | case 0xFF: |
159 | if (opcode_len == 1) { |
160 | uint8_t modrm_reg = (modrm >> 3) & 7; |
161 | if (modrm_reg == 2) |
162 | return lldb::eInstructionControlFlowKindCall; |
163 | else if (modrm_reg == 3) |
164 | return lldb::eInstructionControlFlowKindFarCall; |
165 | else if (modrm_reg == 4) |
166 | return lldb::eInstructionControlFlowKindJump; |
167 | else if (modrm_reg == 5) |
168 | return lldb::eInstructionControlFlowKindFarJump; |
169 | } |
170 | break; |
171 | case 0xE8: |
172 | if (opcode_len == 1) |
173 | return lldb::eInstructionControlFlowKindCall; |
174 | break; |
175 | case 0xCD: |
176 | case 0xCC: |
177 | case 0xCE: |
178 | case 0xF1: |
179 | if (opcode_len == 1) |
180 | return lldb::eInstructionControlFlowKindFarCall; |
181 | break; |
182 | case 0xCF: |
183 | if (opcode_len == 1) |
184 | return lldb::eInstructionControlFlowKindFarReturn; |
185 | break; |
186 | case 0xE9: |
187 | case 0xEB: |
188 | if (opcode_len == 1) |
189 | return lldb::eInstructionControlFlowKindJump; |
190 | break; |
191 | case 0xEA: |
192 | if (opcode_len == 1) |
193 | return lldb::eInstructionControlFlowKindFarJump; |
194 | break; |
195 | case 0xE3: |
196 | case 0xE0: |
197 | case 0xE1: |
198 | case 0xE2: |
199 | if (opcode_len == 1) |
200 | return lldb::eInstructionControlFlowKindCondJump; |
201 | break; |
202 | case 0xC3: |
203 | case 0xC2: |
204 | if (opcode_len == 1) |
205 | return lldb::eInstructionControlFlowKindReturn; |
206 | break; |
207 | case 0xCB: |
208 | case 0xCA: |
209 | if (opcode_len == 1) |
210 | return lldb::eInstructionControlFlowKindFarReturn; |
211 | break; |
212 | case 0x05: |
213 | case 0x34: |
214 | if (opcode_len == 2) |
215 | return lldb::eInstructionControlFlowKindFarCall; |
216 | break; |
217 | case 0x35: |
218 | case 0x07: |
219 | if (opcode_len == 2) |
220 | return lldb::eInstructionControlFlowKindFarReturn; |
221 | break; |
222 | case 0x01: |
223 | if (opcode_len == 2) { |
224 | switch (modrm) { |
225 | case 0xc1: |
226 | return lldb::eInstructionControlFlowKindFarCall; |
227 | case 0xc2: |
228 | case 0xc3: |
229 | return lldb::eInstructionControlFlowKindFarReturn; |
230 | default: |
231 | break; |
232 | } |
233 | } |
234 | break; |
235 | default: |
236 | break; |
237 | } |
238 | |
239 | return lldb::eInstructionControlFlowKindOther; |
240 | } |
241 | |
242 | /// Decode an instruction into opcode, modrm and opcode_len. |
243 | /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. |
244 | /// Opcodes in x86 are generally the first byte of instruction, though two-byte |
245 | /// instructions and prefixes exist. ModR/M is the byte following the opcode |
246 | /// and adds additional information for how the instruction is executed. |
247 | /// |
248 | /// \param[in] inst_bytes |
249 | /// Raw bytes of the instruction |
250 | /// |
251 | /// |
252 | /// \param[in] bytes_len |
253 | /// The length of the inst_bytes array. |
254 | /// |
255 | /// \param[in] is_exec_mode_64b |
256 | /// If true, the execution mode is 64 bit. |
257 | /// |
258 | /// \return |
259 | /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding |
260 | /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition |
261 | /// for more details. |
262 | /// Otherwise if the given instruction is invalid, returns std::nullopt. |
263 | std::optional<InstructionOpcodeAndModrm> |
264 | InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, |
265 | bool is_exec_mode_64b) { |
266 | int op_idx = 0; |
267 | bool prefix_done = false; |
268 | InstructionOpcodeAndModrm ret = {.primary_opcode: 0, .opcode_len: 0, .modrm: 0}; |
269 | |
270 | // In most cases, the primary_opcode is the first byte of the instruction |
271 | // but some instructions have a prefix to be skipped for these calculations. |
272 | // The following mapping is inspired from libipt's instruction decoding logic |
273 | // in `src/pt_ild.c` |
274 | while (!prefix_done) { |
275 | if (op_idx >= bytes_len) |
276 | return std::nullopt; |
277 | |
278 | ret.primary_opcode = inst_bytes[op_idx]; |
279 | switch (ret.primary_opcode) { |
280 | // prefix_ignore |
281 | case 0x26: |
282 | case 0x2e: |
283 | case 0x36: |
284 | case 0x3e: |
285 | case 0x64: |
286 | case 0x65: |
287 | // prefix_osz, prefix_asz |
288 | case 0x66: |
289 | case 0x67: |
290 | // prefix_lock, prefix_f2, prefix_f3 |
291 | case 0xf0: |
292 | case 0xf2: |
293 | case 0xf3: |
294 | op_idx++; |
295 | break; |
296 | |
297 | // prefix_rex |
298 | case 0x40: |
299 | case 0x41: |
300 | case 0x42: |
301 | case 0x43: |
302 | case 0x44: |
303 | case 0x45: |
304 | case 0x46: |
305 | case 0x47: |
306 | case 0x48: |
307 | case 0x49: |
308 | case 0x4a: |
309 | case 0x4b: |
310 | case 0x4c: |
311 | case 0x4d: |
312 | case 0x4e: |
313 | case 0x4f: |
314 | if (is_exec_mode_64b) |
315 | op_idx++; |
316 | else |
317 | prefix_done = true; |
318 | break; |
319 | |
320 | // prefix_vex_c4, c5 |
321 | case 0xc5: |
322 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
323 | prefix_done = true; |
324 | break; |
325 | } |
326 | |
327 | ret.opcode_len = 2; |
328 | ret.primary_opcode = inst_bytes[op_idx + 2]; |
329 | ret.modrm = inst_bytes[op_idx + 3]; |
330 | return ret; |
331 | |
332 | case 0xc4: |
333 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
334 | prefix_done = true; |
335 | break; |
336 | } |
337 | ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; |
338 | ret.primary_opcode = inst_bytes[op_idx + 3]; |
339 | ret.modrm = inst_bytes[op_idx + 4]; |
340 | return ret; |
341 | |
342 | // prefix_evex |
343 | case 0x62: |
344 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
345 | prefix_done = true; |
346 | break; |
347 | } |
348 | ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; |
349 | ret.primary_opcode = inst_bytes[op_idx + 4]; |
350 | ret.modrm = inst_bytes[op_idx + 5]; |
351 | return ret; |
352 | |
353 | default: |
354 | prefix_done = true; |
355 | break; |
356 | } |
357 | } // prefix done |
358 | |
359 | ret.primary_opcode = inst_bytes[op_idx]; |
360 | ret.modrm = inst_bytes[op_idx + 1]; |
361 | ret.opcode_len = 1; |
362 | |
363 | // If the first opcode is 0F, it's two- or three- byte opcodes. |
364 | if (ret.primary_opcode == 0x0F) { |
365 | ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte |
366 | |
367 | if (ret.primary_opcode == 0x38) { |
368 | ret.opcode_len = 3; |
369 | ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte |
370 | ret.modrm = inst_bytes[op_idx + 1]; |
371 | } else if (ret.primary_opcode == 0x3A) { |
372 | ret.opcode_len = 3; |
373 | ret.primary_opcode = inst_bytes[++op_idx]; |
374 | ret.modrm = inst_bytes[op_idx + 1]; |
375 | } else if ((ret.primary_opcode & 0xf8) == 0x38) { |
376 | ret.opcode_len = 0; |
377 | ret.primary_opcode = inst_bytes[++op_idx]; |
378 | ret.modrm = inst_bytes[op_idx + 1]; |
379 | } else if (ret.primary_opcode == 0x0F) { |
380 | ret.opcode_len = 3; |
381 | // opcode is 0x0F, no needs to update |
382 | ret.modrm = inst_bytes[op_idx + 1]; |
383 | } else { |
384 | ret.opcode_len = 2; |
385 | ret.modrm = inst_bytes[op_idx + 1]; |
386 | } |
387 | } |
388 | |
389 | return ret; |
390 | } |
391 | |
392 | lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, |
393 | Opcode m_opcode) { |
394 | std::optional<InstructionOpcodeAndModrm> ret; |
395 | |
396 | if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { |
397 | // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes |
398 | return lldb::eInstructionControlFlowKindUnknown; |
399 | } |
400 | |
401 | // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. |
402 | // These are the three values deciding instruction control flow kind. |
403 | ret = InstructionLengthDecode(inst_bytes: (const uint8_t *)m_opcode.GetOpcodeBytes(), |
404 | bytes_len: m_opcode.GetByteSize(), is_exec_mode_64b); |
405 | if (!ret) |
406 | return lldb::eInstructionControlFlowKindUnknown; |
407 | else |
408 | return MapOpcodeIntoControlFlowKind(opcode_and_modrm: *ret); |
409 | } |
410 | |
411 | } // namespace x86 |
412 | |
413 | class InstructionLLVMC : public lldb_private::Instruction { |
414 | public: |
415 | InstructionLLVMC(DisassemblerLLVMC &disasm, |
416 | const lldb_private::Address &address, |
417 | AddressClass addr_class) |
418 | : Instruction(address, addr_class), |
419 | m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( |
420 | r: disasm.shared_from_this())) {} |
421 | |
422 | ~InstructionLLVMC() override = default; |
423 | |
424 | bool DoesBranch() override { |
425 | VisitInstruction(); |
426 | return m_does_branch; |
427 | } |
428 | |
429 | bool HasDelaySlot() override { |
430 | VisitInstruction(); |
431 | return m_has_delay_slot; |
432 | } |
433 | |
434 | bool IsLoad() override { |
435 | VisitInstruction(); |
436 | return m_is_load; |
437 | } |
438 | |
439 | bool IsAuthenticated() override { |
440 | VisitInstruction(); |
441 | return m_is_authenticated; |
442 | } |
443 | |
444 | DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { |
445 | DisassemblerScope disasm(*this); |
446 | return GetDisasmToUse(is_alternate_isa, disasm); |
447 | } |
448 | |
449 | size_t (const lldb_private::Disassembler &disassembler, |
450 | const lldb_private::DataExtractor &data, |
451 | lldb::offset_t data_offset) override { |
452 | // All we have to do is read the opcode which can be easy for some |
453 | // architectures |
454 | bool got_op = false; |
455 | DisassemblerScope disasm(*this); |
456 | if (disasm) { |
457 | const ArchSpec &arch = disasm->GetArchitecture(); |
458 | const lldb::ByteOrder byte_order = data.GetByteOrder(); |
459 | |
460 | const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); |
461 | const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); |
462 | if (min_op_byte_size == max_op_byte_size) { |
463 | // Fixed size instructions, just read that amount of data. |
464 | if (!data.ValidOffsetForDataOfSize(offset: data_offset, length: min_op_byte_size)) |
465 | return false; |
466 | |
467 | switch (min_op_byte_size) { |
468 | case 1: |
469 | m_opcode.SetOpcode8(inst: data.GetU8(offset_ptr: &data_offset), order: byte_order); |
470 | got_op = true; |
471 | break; |
472 | |
473 | case 2: |
474 | m_opcode.SetOpcode16(inst: data.GetU16(offset_ptr: &data_offset), order: byte_order); |
475 | got_op = true; |
476 | break; |
477 | |
478 | case 4: |
479 | m_opcode.SetOpcode32(inst: data.GetU32(offset_ptr: &data_offset), order: byte_order); |
480 | got_op = true; |
481 | break; |
482 | |
483 | case 8: |
484 | m_opcode.SetOpcode64(inst: data.GetU64(offset_ptr: &data_offset), order: byte_order); |
485 | got_op = true; |
486 | break; |
487 | |
488 | default: |
489 | m_opcode.SetOpcodeBytes(bytes: data.PeekData(offset: data_offset, length: min_op_byte_size), |
490 | length: min_op_byte_size); |
491 | got_op = true; |
492 | break; |
493 | } |
494 | } |
495 | if (!got_op) { |
496 | bool is_alternate_isa = false; |
497 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = |
498 | GetDisasmToUse(is_alternate_isa, disasm); |
499 | |
500 | const llvm::Triple::ArchType machine = arch.GetMachine(); |
501 | if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { |
502 | if (machine == llvm::Triple::thumb || is_alternate_isa) { |
503 | uint32_t thumb_opcode = data.GetU16(offset_ptr: &data_offset); |
504 | if ((thumb_opcode & 0xe000) != 0xe000 || |
505 | ((thumb_opcode & 0x1800u) == 0)) { |
506 | m_opcode.SetOpcode16(inst: thumb_opcode, order: byte_order); |
507 | m_is_valid = true; |
508 | } else { |
509 | thumb_opcode <<= 16; |
510 | thumb_opcode |= data.GetU16(offset_ptr: &data_offset); |
511 | m_opcode.SetOpcode16_2(inst: thumb_opcode, order: byte_order); |
512 | m_is_valid = true; |
513 | } |
514 | } else { |
515 | m_opcode.SetOpcode32(inst: data.GetU32(offset_ptr: &data_offset), order: byte_order); |
516 | m_is_valid = true; |
517 | } |
518 | } else { |
519 | // The opcode isn't evenly sized, so we need to actually use the llvm |
520 | // disassembler to parse it and get the size. |
521 | uint8_t *opcode_data = |
522 | const_cast<uint8_t *>(data.PeekData(offset: data_offset, length: 1)); |
523 | const size_t opcode_data_len = data.BytesLeft(offset: data_offset); |
524 | const addr_t pc = m_address.GetFileAddress(); |
525 | llvm::MCInst inst; |
526 | |
527 | const size_t inst_size = |
528 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
529 | if (inst_size == 0) |
530 | m_opcode.Clear(); |
531 | else { |
532 | m_opcode.SetOpcodeBytes(bytes: opcode_data, length: inst_size); |
533 | m_is_valid = true; |
534 | } |
535 | } |
536 | } |
537 | return m_opcode.GetByteSize(); |
538 | } |
539 | return 0; |
540 | } |
541 | |
542 | void (std::string &description) { |
543 | if (m_comment.empty()) |
544 | m_comment.swap(s&: description); |
545 | else { |
546 | m_comment.append(s: ", " ); |
547 | m_comment.append(str: description); |
548 | } |
549 | } |
550 | |
551 | lldb::InstructionControlFlowKind |
552 | GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override { |
553 | DisassemblerScope disasm(*this, exe_ctx); |
554 | if (disasm){ |
555 | if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86) |
556 | return x86::GetControlFlowKind(/*is_64b=*/is_exec_mode_64b: false, m_opcode); |
557 | else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64) |
558 | return x86::GetControlFlowKind(/*is_64b=*/is_exec_mode_64b: true, m_opcode); |
559 | } |
560 | |
561 | return eInstructionControlFlowKindUnknown; |
562 | } |
563 | |
564 | void CalculateMnemonicOperandsAndComment( |
565 | const lldb_private::ExecutionContext *exe_ctx) override { |
566 | DataExtractor data; |
567 | const AddressClass address_class = GetAddressClass(); |
568 | |
569 | if (m_opcode.GetData(data)) { |
570 | std::string out_string; |
571 | std::string markup_out_string; |
572 | std::string ; |
573 | std::string ; |
574 | |
575 | DisassemblerScope disasm(*this, exe_ctx); |
576 | if (disasm) { |
577 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; |
578 | |
579 | if (address_class == AddressClass::eCodeAlternateISA) |
580 | mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); |
581 | else |
582 | mc_disasm_ptr = disasm->m_disasm_up.get(); |
583 | |
584 | lldb::addr_t pc = m_address.GetFileAddress(); |
585 | m_using_file_addr = true; |
586 | |
587 | bool use_hex_immediates = true; |
588 | Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; |
589 | |
590 | if (exe_ctx) { |
591 | Target *target = exe_ctx->GetTargetPtr(); |
592 | if (target) { |
593 | use_hex_immediates = target->GetUseHexImmediates(); |
594 | hex_style = target->GetHexImmediateStyle(); |
595 | |
596 | const lldb::addr_t load_addr = m_address.GetLoadAddress(target); |
597 | if (load_addr != LLDB_INVALID_ADDRESS) { |
598 | pc = load_addr; |
599 | m_using_file_addr = false; |
600 | } |
601 | } |
602 | } |
603 | |
604 | const uint8_t *opcode_data = data.GetDataStart(); |
605 | const size_t opcode_data_len = data.GetByteSize(); |
606 | llvm::MCInst inst; |
607 | size_t inst_size = |
608 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
609 | |
610 | if (inst_size > 0) { |
611 | mc_disasm_ptr->SetStyle(use_hex_immed: use_hex_immediates, hex_style); |
612 | |
613 | const bool saved_use_color = mc_disasm_ptr->GetUseColor(); |
614 | mc_disasm_ptr->SetUseColor(false); |
615 | mc_disasm_ptr->PrintMCInst(mc_inst&: inst, pc, inst_string&: out_string, comments_string&: comment_string); |
616 | mc_disasm_ptr->SetUseColor(true); |
617 | mc_disasm_ptr->PrintMCInst(mc_inst&: inst, pc, inst_string&: markup_out_string, |
618 | comments_string&: markup_comment_string); |
619 | mc_disasm_ptr->SetUseColor(saved_use_color); |
620 | |
621 | if (!comment_string.empty()) { |
622 | AppendComment(description&: comment_string); |
623 | } |
624 | } |
625 | |
626 | if (inst_size == 0) { |
627 | m_comment.assign(s: "unknown opcode" ); |
628 | inst_size = m_opcode.GetByteSize(); |
629 | StreamString mnemonic_strm; |
630 | lldb::offset_t offset = 0; |
631 | lldb::ByteOrder byte_order = data.GetByteOrder(); |
632 | switch (inst_size) { |
633 | case 1: { |
634 | const uint8_t uval8 = data.GetU8(offset_ptr: &offset); |
635 | m_opcode.SetOpcode8(inst: uval8, order: byte_order); |
636 | m_opcode_name.assign(s: ".byte" ); |
637 | mnemonic_strm.Printf(format: "0x%2.2x" , uval8); |
638 | } break; |
639 | case 2: { |
640 | const uint16_t uval16 = data.GetU16(offset_ptr: &offset); |
641 | m_opcode.SetOpcode16(inst: uval16, order: byte_order); |
642 | m_opcode_name.assign(s: ".short" ); |
643 | mnemonic_strm.Printf(format: "0x%4.4x" , uval16); |
644 | } break; |
645 | case 4: { |
646 | const uint32_t uval32 = data.GetU32(offset_ptr: &offset); |
647 | m_opcode.SetOpcode32(inst: uval32, order: byte_order); |
648 | m_opcode_name.assign(s: ".long" ); |
649 | mnemonic_strm.Printf(format: "0x%8.8x" , uval32); |
650 | } break; |
651 | case 8: { |
652 | const uint64_t uval64 = data.GetU64(offset_ptr: &offset); |
653 | m_opcode.SetOpcode64(inst: uval64, order: byte_order); |
654 | m_opcode_name.assign(s: ".quad" ); |
655 | mnemonic_strm.Printf(format: "0x%16.16" PRIx64, uval64); |
656 | } break; |
657 | default: |
658 | if (inst_size == 0) |
659 | return; |
660 | else { |
661 | const uint8_t *bytes = data.PeekData(offset, length: inst_size); |
662 | if (bytes == nullptr) |
663 | return; |
664 | m_opcode_name.assign(s: ".byte" ); |
665 | m_opcode.SetOpcodeBytes(bytes, length: inst_size); |
666 | mnemonic_strm.Printf(format: "0x%2.2x" , bytes[0]); |
667 | for (uint32_t i = 1; i < inst_size; ++i) |
668 | mnemonic_strm.Printf(format: " 0x%2.2x" , bytes[i]); |
669 | } |
670 | break; |
671 | } |
672 | m_mnemonics = std::string(mnemonic_strm.GetString()); |
673 | return; |
674 | } |
675 | |
676 | static RegularExpression s_regex( |
677 | llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?" )); |
678 | |
679 | llvm::SmallVector<llvm::StringRef, 4> matches; |
680 | if (s_regex.Execute(string: out_string, matches: &matches)) { |
681 | m_opcode_name = matches[1].str(); |
682 | m_mnemonics = matches[2].str(); |
683 | } |
684 | matches.clear(); |
685 | if (s_regex.Execute(string: markup_out_string, matches: &matches)) { |
686 | m_markup_opcode_name = matches[1].str(); |
687 | m_markup_mnemonics = matches[2].str(); |
688 | } |
689 | } |
690 | } |
691 | } |
692 | |
693 | bool IsValid() const { return m_is_valid; } |
694 | |
695 | bool UsingFileAddress() const { return m_using_file_addr; } |
696 | size_t GetByteSize() const { return m_opcode.GetByteSize(); } |
697 | |
698 | /// Grants exclusive access to the disassembler and initializes it with the |
699 | /// given InstructionLLVMC and an optional ExecutionContext. |
700 | class DisassemblerScope { |
701 | std::shared_ptr<DisassemblerLLVMC> m_disasm; |
702 | |
703 | public: |
704 | explicit DisassemblerScope( |
705 | InstructionLLVMC &i, |
706 | const lldb_private::ExecutionContext *exe_ctx = nullptr) |
707 | : m_disasm(i.m_disasm_wp.lock()) { |
708 | m_disasm->m_mutex.lock(); |
709 | m_disasm->m_inst = &i; |
710 | m_disasm->m_exe_ctx = exe_ctx; |
711 | } |
712 | ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } |
713 | |
714 | /// Evaluates to true if this scope contains a valid disassembler. |
715 | operator bool() const { return static_cast<bool>(m_disasm); } |
716 | |
717 | std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } |
718 | }; |
719 | |
720 | static llvm::StringRef::const_iterator |
721 | ConsumeWhitespace(llvm::StringRef::const_iterator osi, |
722 | llvm::StringRef::const_iterator ose) { |
723 | while (osi != ose) { |
724 | switch (*osi) { |
725 | default: |
726 | return osi; |
727 | case ' ': |
728 | case '\t': |
729 | break; |
730 | } |
731 | ++osi; |
732 | } |
733 | |
734 | return osi; |
735 | } |
736 | |
737 | static std::pair<bool, llvm::StringRef::const_iterator> |
738 | ConsumeChar(llvm::StringRef::const_iterator osi, const char c, |
739 | llvm::StringRef::const_iterator ose) { |
740 | bool found = false; |
741 | |
742 | osi = ConsumeWhitespace(osi, ose); |
743 | if (osi != ose && *osi == c) { |
744 | found = true; |
745 | ++osi; |
746 | } |
747 | |
748 | return std::make_pair(x&: found, y&: osi); |
749 | } |
750 | |
751 | static std::pair<Operand, llvm::StringRef::const_iterator> |
752 | ParseRegisterName(llvm::StringRef::const_iterator osi, |
753 | llvm::StringRef::const_iterator ose) { |
754 | Operand ret; |
755 | ret.m_type = Operand::Type::Register; |
756 | std::string str; |
757 | |
758 | osi = ConsumeWhitespace(osi, ose); |
759 | |
760 | while (osi != ose) { |
761 | if (*osi >= '0' && *osi <= '9') { |
762 | if (str.empty()) { |
763 | return std::make_pair(x: Operand(), y&: osi); |
764 | } else { |
765 | str.push_back(c: *osi); |
766 | } |
767 | } else if (*osi >= 'a' && *osi <= 'z') { |
768 | str.push_back(c: *osi); |
769 | } else { |
770 | switch (*osi) { |
771 | default: |
772 | if (str.empty()) { |
773 | return std::make_pair(x: Operand(), y&: osi); |
774 | } else { |
775 | ret.m_register = ConstString(str); |
776 | return std::make_pair(x&: ret, y&: osi); |
777 | } |
778 | case '%': |
779 | if (!str.empty()) { |
780 | return std::make_pair(x: Operand(), y&: osi); |
781 | } |
782 | break; |
783 | } |
784 | } |
785 | ++osi; |
786 | } |
787 | |
788 | ret.m_register = ConstString(str); |
789 | return std::make_pair(x&: ret, y&: osi); |
790 | } |
791 | |
792 | static std::pair<Operand, llvm::StringRef::const_iterator> |
793 | ParseImmediate(llvm::StringRef::const_iterator osi, |
794 | llvm::StringRef::const_iterator ose) { |
795 | Operand ret; |
796 | ret.m_type = Operand::Type::Immediate; |
797 | std::string str; |
798 | bool is_hex = false; |
799 | |
800 | osi = ConsumeWhitespace(osi, ose); |
801 | |
802 | while (osi != ose) { |
803 | if (*osi >= '0' && *osi <= '9') { |
804 | str.push_back(c: *osi); |
805 | } else if (*osi >= 'a' && *osi <= 'f') { |
806 | if (is_hex) { |
807 | str.push_back(c: *osi); |
808 | } else { |
809 | return std::make_pair(x: Operand(), y&: osi); |
810 | } |
811 | } else { |
812 | switch (*osi) { |
813 | default: |
814 | if (str.empty()) { |
815 | return std::make_pair(x: Operand(), y&: osi); |
816 | } else { |
817 | ret.m_immediate = strtoull(nptr: str.c_str(), endptr: nullptr, base: 0); |
818 | return std::make_pair(x&: ret, y&: osi); |
819 | } |
820 | case 'x': |
821 | if (str == "0" ) { |
822 | is_hex = true; |
823 | str.push_back(c: *osi); |
824 | } else { |
825 | return std::make_pair(x: Operand(), y&: osi); |
826 | } |
827 | break; |
828 | case '#': |
829 | case '$': |
830 | if (!str.empty()) { |
831 | return std::make_pair(x: Operand(), y&: osi); |
832 | } |
833 | break; |
834 | case '-': |
835 | if (str.empty()) { |
836 | ret.m_negative = true; |
837 | } else { |
838 | return std::make_pair(x: Operand(), y&: osi); |
839 | } |
840 | } |
841 | } |
842 | ++osi; |
843 | } |
844 | |
845 | ret.m_immediate = strtoull(nptr: str.c_str(), endptr: nullptr, base: 0); |
846 | return std::make_pair(x&: ret, y&: osi); |
847 | } |
848 | |
849 | // -0x5(%rax,%rax,2) |
850 | static std::pair<Operand, llvm::StringRef::const_iterator> |
851 | ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, |
852 | llvm::StringRef::const_iterator ose) { |
853 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
854 | ParseImmediate(osi, ose); |
855 | if (offset_and_iterator.first.IsValid()) { |
856 | osi = offset_and_iterator.second; |
857 | } |
858 | |
859 | bool found = false; |
860 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '(', ose); |
861 | if (!found) { |
862 | return std::make_pair(x: Operand(), y&: osi); |
863 | } |
864 | |
865 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
866 | ParseRegisterName(osi, ose); |
867 | if (base_and_iterator.first.IsValid()) { |
868 | osi = base_and_iterator.second; |
869 | } else { |
870 | return std::make_pair(x: Operand(), y&: osi); |
871 | } |
872 | |
873 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
874 | if (!found) { |
875 | return std::make_pair(x: Operand(), y&: osi); |
876 | } |
877 | |
878 | std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = |
879 | ParseRegisterName(osi, ose); |
880 | if (index_and_iterator.first.IsValid()) { |
881 | osi = index_and_iterator.second; |
882 | } else { |
883 | return std::make_pair(x: Operand(), y&: osi); |
884 | } |
885 | |
886 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
887 | if (!found) { |
888 | return std::make_pair(x: Operand(), y&: osi); |
889 | } |
890 | |
891 | std::pair<Operand, llvm::StringRef::const_iterator> |
892 | multiplier_and_iterator = ParseImmediate(osi, ose); |
893 | if (index_and_iterator.first.IsValid()) { |
894 | osi = index_and_iterator.second; |
895 | } else { |
896 | return std::make_pair(x: Operand(), y&: osi); |
897 | } |
898 | |
899 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ')', ose); |
900 | if (!found) { |
901 | return std::make_pair(x: Operand(), y&: osi); |
902 | } |
903 | |
904 | Operand product; |
905 | product.m_type = Operand::Type::Product; |
906 | product.m_children.push_back(x: index_and_iterator.first); |
907 | product.m_children.push_back(x: multiplier_and_iterator.first); |
908 | |
909 | Operand index; |
910 | index.m_type = Operand::Type::Sum; |
911 | index.m_children.push_back(x: base_and_iterator.first); |
912 | index.m_children.push_back(x: product); |
913 | |
914 | if (offset_and_iterator.first.IsValid()) { |
915 | Operand offset; |
916 | offset.m_type = Operand::Type::Sum; |
917 | offset.m_children.push_back(x: offset_and_iterator.first); |
918 | offset.m_children.push_back(x: index); |
919 | |
920 | Operand deref; |
921 | deref.m_type = Operand::Type::Dereference; |
922 | deref.m_children.push_back(x: offset); |
923 | return std::make_pair(x&: deref, y&: osi); |
924 | } else { |
925 | Operand deref; |
926 | deref.m_type = Operand::Type::Dereference; |
927 | deref.m_children.push_back(x: index); |
928 | return std::make_pair(x&: deref, y&: osi); |
929 | } |
930 | } |
931 | |
932 | // -0x10(%rbp) |
933 | static std::pair<Operand, llvm::StringRef::const_iterator> |
934 | ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, |
935 | llvm::StringRef::const_iterator ose) { |
936 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
937 | ParseImmediate(osi, ose); |
938 | if (offset_and_iterator.first.IsValid()) { |
939 | osi = offset_and_iterator.second; |
940 | } |
941 | |
942 | bool found = false; |
943 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '(', ose); |
944 | if (!found) { |
945 | return std::make_pair(x: Operand(), y&: osi); |
946 | } |
947 | |
948 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
949 | ParseRegisterName(osi, ose); |
950 | if (base_and_iterator.first.IsValid()) { |
951 | osi = base_and_iterator.second; |
952 | } else { |
953 | return std::make_pair(x: Operand(), y&: osi); |
954 | } |
955 | |
956 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ')', ose); |
957 | if (!found) { |
958 | return std::make_pair(x: Operand(), y&: osi); |
959 | } |
960 | |
961 | if (offset_and_iterator.first.IsValid()) { |
962 | Operand offset; |
963 | offset.m_type = Operand::Type::Sum; |
964 | offset.m_children.push_back(x: offset_and_iterator.first); |
965 | offset.m_children.push_back(x: base_and_iterator.first); |
966 | |
967 | Operand deref; |
968 | deref.m_type = Operand::Type::Dereference; |
969 | deref.m_children.push_back(x: offset); |
970 | return std::make_pair(x&: deref, y&: osi); |
971 | } else { |
972 | Operand deref; |
973 | deref.m_type = Operand::Type::Dereference; |
974 | deref.m_children.push_back(x: base_and_iterator.first); |
975 | return std::make_pair(x&: deref, y&: osi); |
976 | } |
977 | } |
978 | |
979 | // [sp, #8]! |
980 | static std::pair<Operand, llvm::StringRef::const_iterator> |
981 | ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, |
982 | llvm::StringRef::const_iterator ose) { |
983 | bool found = false; |
984 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '[', ose); |
985 | if (!found) { |
986 | return std::make_pair(x: Operand(), y&: osi); |
987 | } |
988 | |
989 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
990 | ParseRegisterName(osi, ose); |
991 | if (base_and_iterator.first.IsValid()) { |
992 | osi = base_and_iterator.second; |
993 | } else { |
994 | return std::make_pair(x: Operand(), y&: osi); |
995 | } |
996 | |
997 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
998 | if (!found) { |
999 | return std::make_pair(x: Operand(), y&: osi); |
1000 | } |
1001 | |
1002 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
1003 | ParseImmediate(osi, ose); |
1004 | if (offset_and_iterator.first.IsValid()) { |
1005 | osi = offset_and_iterator.second; |
1006 | } |
1007 | |
1008 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ']', ose); |
1009 | if (!found) { |
1010 | return std::make_pair(x: Operand(), y&: osi); |
1011 | } |
1012 | |
1013 | Operand offset; |
1014 | offset.m_type = Operand::Type::Sum; |
1015 | offset.m_children.push_back(x: offset_and_iterator.first); |
1016 | offset.m_children.push_back(x: base_and_iterator.first); |
1017 | |
1018 | Operand deref; |
1019 | deref.m_type = Operand::Type::Dereference; |
1020 | deref.m_children.push_back(x: offset); |
1021 | return std::make_pair(x&: deref, y&: osi); |
1022 | } |
1023 | |
1024 | // [sp] |
1025 | static std::pair<Operand, llvm::StringRef::const_iterator> |
1026 | ParseARMDerefAccess(llvm::StringRef::const_iterator osi, |
1027 | llvm::StringRef::const_iterator ose) { |
1028 | bool found = false; |
1029 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '[', ose); |
1030 | if (!found) { |
1031 | return std::make_pair(x: Operand(), y&: osi); |
1032 | } |
1033 | |
1034 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
1035 | ParseRegisterName(osi, ose); |
1036 | if (base_and_iterator.first.IsValid()) { |
1037 | osi = base_and_iterator.second; |
1038 | } else { |
1039 | return std::make_pair(x: Operand(), y&: osi); |
1040 | } |
1041 | |
1042 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ']', ose); |
1043 | if (!found) { |
1044 | return std::make_pair(x: Operand(), y&: osi); |
1045 | } |
1046 | |
1047 | Operand deref; |
1048 | deref.m_type = Operand::Type::Dereference; |
1049 | deref.m_children.push_back(x: base_and_iterator.first); |
1050 | return std::make_pair(x&: deref, y&: osi); |
1051 | } |
1052 | |
1053 | static void DumpOperand(const Operand &op, Stream &s) { |
1054 | switch (op.m_type) { |
1055 | case Operand::Type::Dereference: |
1056 | s.PutCString(cstr: "*" ); |
1057 | DumpOperand(op: op.m_children[0], s); |
1058 | break; |
1059 | case Operand::Type::Immediate: |
1060 | if (op.m_negative) { |
1061 | s.PutCString(cstr: "-" ); |
1062 | } |
1063 | s.PutCString(cstr: llvm::to_string(Value: op.m_immediate)); |
1064 | break; |
1065 | case Operand::Type::Invalid: |
1066 | s.PutCString(cstr: "Invalid" ); |
1067 | break; |
1068 | case Operand::Type::Product: |
1069 | s.PutCString(cstr: "(" ); |
1070 | DumpOperand(op: op.m_children[0], s); |
1071 | s.PutCString(cstr: "*" ); |
1072 | DumpOperand(op: op.m_children[1], s); |
1073 | s.PutCString(cstr: ")" ); |
1074 | break; |
1075 | case Operand::Type::Register: |
1076 | s.PutCString(cstr: op.m_register.GetStringRef()); |
1077 | break; |
1078 | case Operand::Type::Sum: |
1079 | s.PutCString(cstr: "(" ); |
1080 | DumpOperand(op: op.m_children[0], s); |
1081 | s.PutCString(cstr: "+" ); |
1082 | DumpOperand(op: op.m_children[1], s); |
1083 | s.PutCString(cstr: ")" ); |
1084 | break; |
1085 | } |
1086 | } |
1087 | |
1088 | bool ParseOperands( |
1089 | llvm::SmallVectorImpl<Instruction::Operand> &operands) override { |
1090 | const char *operands_string = GetOperands(exe_ctx: nullptr); |
1091 | |
1092 | if (!operands_string) { |
1093 | return false; |
1094 | } |
1095 | |
1096 | llvm::StringRef operands_ref(operands_string); |
1097 | |
1098 | llvm::StringRef::const_iterator osi = operands_ref.begin(); |
1099 | llvm::StringRef::const_iterator ose = operands_ref.end(); |
1100 | |
1101 | while (osi != ose) { |
1102 | Operand operand; |
1103 | llvm::StringRef::const_iterator iter; |
1104 | |
1105 | if ((std::tie(args&: operand, args&: iter) = ParseIntelIndexedAccess(osi, ose), |
1106 | operand.IsValid()) || |
1107 | (std::tie(args&: operand, args&: iter) = ParseIntelDerefAccess(osi, ose), |
1108 | operand.IsValid()) || |
1109 | (std::tie(args&: operand, args&: iter) = ParseARMOffsetAccess(osi, ose), |
1110 | operand.IsValid()) || |
1111 | (std::tie(args&: operand, args&: iter) = ParseARMDerefAccess(osi, ose), |
1112 | operand.IsValid()) || |
1113 | (std::tie(args&: operand, args&: iter) = ParseRegisterName(osi, ose), |
1114 | operand.IsValid()) || |
1115 | (std::tie(args&: operand, args&: iter) = ParseImmediate(osi, ose), |
1116 | operand.IsValid())) { |
1117 | osi = iter; |
1118 | operands.push_back(Elt: operand); |
1119 | } else { |
1120 | return false; |
1121 | } |
1122 | |
1123 | std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = |
1124 | ConsumeChar(osi, c: ',', ose); |
1125 | if (found_and_iter.first) { |
1126 | osi = found_and_iter.second; |
1127 | } |
1128 | |
1129 | osi = ConsumeWhitespace(osi, ose); |
1130 | } |
1131 | |
1132 | DisassemblerSP disasm_sp = m_disasm_wp.lock(); |
1133 | |
1134 | if (disasm_sp && operands.size() > 1) { |
1135 | // TODO tie this into the MC Disassembler's notion of clobbers. |
1136 | switch (disasm_sp->GetArchitecture().GetMachine()) { |
1137 | default: |
1138 | break; |
1139 | case llvm::Triple::x86: |
1140 | case llvm::Triple::x86_64: |
1141 | operands[operands.size() - 1].m_clobbered = true; |
1142 | break; |
1143 | case llvm::Triple::arm: |
1144 | operands[0].m_clobbered = true; |
1145 | break; |
1146 | } |
1147 | } |
1148 | |
1149 | if (Log *log = GetLog(mask: LLDBLog::Process)) { |
1150 | StreamString ss; |
1151 | |
1152 | ss.Printf(format: "[%s] expands to %zu operands:\n" , operands_string, |
1153 | operands.size()); |
1154 | for (const Operand &operand : operands) { |
1155 | ss.PutCString(cstr: " " ); |
1156 | DumpOperand(op: operand, s&: ss); |
1157 | ss.PutCString(cstr: "\n" ); |
1158 | } |
1159 | |
1160 | log->PutString(str: ss.GetString()); |
1161 | } |
1162 | |
1163 | return true; |
1164 | } |
1165 | |
1166 | bool IsCall() override { |
1167 | VisitInstruction(); |
1168 | return m_is_call; |
1169 | } |
1170 | |
1171 | protected: |
1172 | std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; |
1173 | |
1174 | bool m_is_valid = false; |
1175 | bool m_using_file_addr = false; |
1176 | bool m_has_visited_instruction = false; |
1177 | |
1178 | // Be conservative. If we didn't understand the instruction, say it: |
1179 | // - Might branch |
1180 | // - Does not have a delay slot |
1181 | // - Is not a call |
1182 | // - Is not a load |
1183 | // - Is not an authenticated instruction |
1184 | bool m_does_branch = true; |
1185 | bool m_has_delay_slot = false; |
1186 | bool m_is_call = false; |
1187 | bool m_is_load = false; |
1188 | bool m_is_authenticated = false; |
1189 | |
1190 | void VisitInstruction() { |
1191 | if (m_has_visited_instruction) |
1192 | return; |
1193 | |
1194 | DisassemblerScope disasm(*this); |
1195 | if (!disasm) |
1196 | return; |
1197 | |
1198 | DataExtractor data; |
1199 | if (!m_opcode.GetData(data)) |
1200 | return; |
1201 | |
1202 | bool is_alternate_isa; |
1203 | lldb::addr_t pc = m_address.GetFileAddress(); |
1204 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = |
1205 | GetDisasmToUse(is_alternate_isa, disasm); |
1206 | const uint8_t *opcode_data = data.GetDataStart(); |
1207 | const size_t opcode_data_len = data.GetByteSize(); |
1208 | llvm::MCInst inst; |
1209 | const size_t inst_size = |
1210 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
1211 | if (inst_size == 0) |
1212 | return; |
1213 | |
1214 | m_has_visited_instruction = true; |
1215 | m_does_branch = mc_disasm_ptr->CanBranch(mc_inst&: inst); |
1216 | m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(mc_inst&: inst); |
1217 | m_is_call = mc_disasm_ptr->IsCall(mc_inst&: inst); |
1218 | m_is_load = mc_disasm_ptr->IsLoad(mc_inst&: inst); |
1219 | m_is_authenticated = mc_disasm_ptr->IsAuthenticated(mc_inst&: inst); |
1220 | } |
1221 | |
1222 | private: |
1223 | DisassemblerLLVMC::MCDisasmInstance * |
1224 | GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { |
1225 | is_alternate_isa = false; |
1226 | if (disasm) { |
1227 | if (disasm->m_alternate_disasm_up) { |
1228 | const AddressClass address_class = GetAddressClass(); |
1229 | |
1230 | if (address_class == AddressClass::eCodeAlternateISA) { |
1231 | is_alternate_isa = true; |
1232 | return disasm->m_alternate_disasm_up.get(); |
1233 | } |
1234 | } |
1235 | return disasm->m_disasm_up.get(); |
1236 | } |
1237 | return nullptr; |
1238 | } |
1239 | }; |
1240 | |
1241 | std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> |
1242 | DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, |
1243 | const char *features_str, |
1244 | unsigned flavor, |
1245 | DisassemblerLLVMC &owner) { |
1246 | using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; |
1247 | |
1248 | std::string Status; |
1249 | const llvm::Target *curr_target = |
1250 | llvm::TargetRegistry::lookupTarget(TripleStr: triple, Error&: Status); |
1251 | if (!curr_target) |
1252 | return Instance(); |
1253 | |
1254 | std::unique_ptr<llvm::MCInstrInfo> instr_info_up( |
1255 | curr_target->createMCInstrInfo()); |
1256 | if (!instr_info_up) |
1257 | return Instance(); |
1258 | |
1259 | std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( |
1260 | curr_target->createMCRegInfo(TT: triple)); |
1261 | if (!reg_info_up) |
1262 | return Instance(); |
1263 | |
1264 | std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( |
1265 | curr_target->createMCSubtargetInfo(TheTriple: triple, CPU: cpu, Features: features_str)); |
1266 | if (!subtarget_info_up) |
1267 | return Instance(); |
1268 | |
1269 | llvm::MCTargetOptions MCOptions; |
1270 | std::unique_ptr<llvm::MCAsmInfo> asm_info_up( |
1271 | curr_target->createMCAsmInfo(MRI: *reg_info_up, TheTriple: triple, Options: MCOptions)); |
1272 | if (!asm_info_up) |
1273 | return Instance(); |
1274 | |
1275 | std::unique_ptr<llvm::MCContext> context_up( |
1276 | new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(), |
1277 | reg_info_up.get(), subtarget_info_up.get())); |
1278 | if (!context_up) |
1279 | return Instance(); |
1280 | |
1281 | std::unique_ptr<llvm::MCDisassembler> disasm_up( |
1282 | curr_target->createMCDisassembler(STI: *subtarget_info_up, Ctx&: *context_up)); |
1283 | if (!disasm_up) |
1284 | return Instance(); |
1285 | |
1286 | std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( |
1287 | curr_target->createMCRelocationInfo(TT: triple, Ctx&: *context_up)); |
1288 | if (!rel_info_up) |
1289 | return Instance(); |
1290 | |
1291 | std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( |
1292 | curr_target->createMCSymbolizer( |
1293 | TT: triple, GetOpInfo: nullptr, SymbolLookUp: DisassemblerLLVMC::SymbolLookupCallback, DisInfo: &owner, |
1294 | Ctx: context_up.get(), RelInfo: std::move(rel_info_up))); |
1295 | disasm_up->setSymbolizer(std::move(symbolizer_up)); |
1296 | |
1297 | unsigned asm_printer_variant = |
1298 | flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; |
1299 | |
1300 | std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( |
1301 | curr_target->createMCInstPrinter(T: llvm::Triple{triple}, |
1302 | SyntaxVariant: asm_printer_variant, MAI: *asm_info_up, |
1303 | MII: *instr_info_up, MRI: *reg_info_up)); |
1304 | if (!instr_printer_up) |
1305 | return Instance(); |
1306 | |
1307 | instr_printer_up->setPrintBranchImmAsAddress(true); |
1308 | |
1309 | // Not all targets may have registered createMCInstrAnalysis(). |
1310 | std::unique_ptr<llvm::MCInstrAnalysis> instr_analysis_up( |
1311 | curr_target->createMCInstrAnalysis(Info: instr_info_up.get())); |
1312 | |
1313 | return Instance(new MCDisasmInstance( |
1314 | std::move(instr_info_up), std::move(reg_info_up), |
1315 | std::move(subtarget_info_up), std::move(asm_info_up), |
1316 | std::move(context_up), std::move(disasm_up), std::move(instr_printer_up), |
1317 | std::move(instr_analysis_up))); |
1318 | } |
1319 | |
1320 | DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( |
1321 | std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, |
1322 | std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, |
1323 | std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, |
1324 | std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, |
1325 | std::unique_ptr<llvm::MCContext> &&context_up, |
1326 | std::unique_ptr<llvm::MCDisassembler> &&disasm_up, |
1327 | std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up, |
1328 | std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up) |
1329 | : m_instr_info_up(std::move(instr_info_up)), |
1330 | m_reg_info_up(std::move(reg_info_up)), |
1331 | m_subtarget_info_up(std::move(subtarget_info_up)), |
1332 | m_asm_info_up(std::move(asm_info_up)), |
1333 | m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), |
1334 | m_instr_printer_up(std::move(instr_printer_up)), |
1335 | m_instr_analysis_up(std::move(instr_analysis_up)) { |
1336 | assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up && |
1337 | m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); |
1338 | } |
1339 | |
1340 | uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( |
1341 | const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, |
1342 | llvm::MCInst &mc_inst) const { |
1343 | llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); |
1344 | llvm::MCDisassembler::DecodeStatus status; |
1345 | |
1346 | uint64_t new_inst_size; |
1347 | status = m_disasm_up->getInstruction(Instr&: mc_inst, Size&: new_inst_size, Bytes: data, Address: pc, |
1348 | CStream&: llvm::nulls()); |
1349 | if (status == llvm::MCDisassembler::Success) |
1350 | return new_inst_size; |
1351 | else |
1352 | return 0; |
1353 | } |
1354 | |
1355 | void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( |
1356 | llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string, |
1357 | std::string &) { |
1358 | llvm::raw_string_ostream inst_stream(inst_string); |
1359 | llvm::raw_string_ostream (comments_string); |
1360 | |
1361 | inst_stream.enable_colors(enable: m_instr_printer_up->getUseColor()); |
1362 | m_instr_printer_up->setCommentStream(comments_stream); |
1363 | m_instr_printer_up->printInst(MI: &mc_inst, Address: pc, Annot: llvm::StringRef(), |
1364 | STI: *m_subtarget_info_up, OS&: inst_stream); |
1365 | m_instr_printer_up->setCommentStream(llvm::nulls()); |
1366 | |
1367 | static std::string g_newlines("\r\n" ); |
1368 | |
1369 | for (size_t newline_pos = 0; |
1370 | (newline_pos = comments_string.find_first_of(str: g_newlines, pos: newline_pos)) != |
1371 | comments_string.npos; |
1372 | /**/) { |
1373 | comments_string.replace(i1: comments_string.begin() + newline_pos, |
1374 | i2: comments_string.begin() + newline_pos + 1, n: 1, c: ' '); |
1375 | } |
1376 | } |
1377 | |
1378 | void DisassemblerLLVMC::MCDisasmInstance::SetStyle( |
1379 | bool use_hex_immed, HexImmediateStyle hex_style) { |
1380 | m_instr_printer_up->setPrintImmHex(use_hex_immed); |
1381 | switch (hex_style) { |
1382 | case eHexStyleC: |
1383 | m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); |
1384 | break; |
1385 | case eHexStyleAsm: |
1386 | m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); |
1387 | break; |
1388 | } |
1389 | } |
1390 | |
1391 | void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color) { |
1392 | m_instr_printer_up->setUseColor(use_color); |
1393 | } |
1394 | |
1395 | bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const { |
1396 | return m_instr_printer_up->getUseColor(); |
1397 | } |
1398 | |
1399 | bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( |
1400 | llvm::MCInst &mc_inst) const { |
1401 | if (m_instr_analysis_up) |
1402 | return m_instr_analysis_up->mayAffectControlFlow(Inst: mc_inst, MCRI: *m_reg_info_up); |
1403 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()) |
1404 | .mayAffectControlFlow(MI: mc_inst, RI: *m_reg_info_up); |
1405 | } |
1406 | |
1407 | bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( |
1408 | llvm::MCInst &mc_inst) const { |
1409 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).hasDelaySlot(); |
1410 | } |
1411 | |
1412 | bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { |
1413 | if (m_instr_analysis_up) |
1414 | return m_instr_analysis_up->isCall(Inst: mc_inst); |
1415 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).isCall(); |
1416 | } |
1417 | |
1418 | bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const { |
1419 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).mayLoad(); |
1420 | } |
1421 | |
1422 | bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated( |
1423 | llvm::MCInst &mc_inst) const { |
1424 | const auto &InstrDesc = m_instr_info_up->get(Opcode: mc_inst.getOpcode()); |
1425 | |
1426 | // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4 |
1427 | // == 'a' + 'c') as authenticated instructions for reporting purposes, in |
1428 | // addition to the standard authenticated instructions specified in ARMv8.3. |
1429 | bool IsBrkC47x = false; |
1430 | if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) { |
1431 | const llvm::MCOperand &Op0 = mc_inst.getOperand(i: 0); |
1432 | if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474) |
1433 | IsBrkC47x = true; |
1434 | } |
1435 | |
1436 | return InstrDesc.isAuthenticated() || IsBrkC47x; |
1437 | } |
1438 | |
1439 | DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, |
1440 | const char *flavor_string, |
1441 | const char *cpu_string, |
1442 | const char *features_string) |
1443 | : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), |
1444 | m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS), |
1445 | m_adrp_insn() { |
1446 | if (!FlavorValidForArchSpec(arch, flavor: m_flavor.c_str())) { |
1447 | m_flavor.assign(s: "default" ); |
1448 | } |
1449 | |
1450 | const bool cpu_or_features_overriden = cpu_string || features_string; |
1451 | unsigned flavor = ~0U; |
1452 | llvm::Triple triple = arch.GetTriple(); |
1453 | |
1454 | // So far the only supported flavor is "intel" on x86. The base class will |
1455 | // set this correctly coming in. |
1456 | if (triple.getArch() == llvm::Triple::x86 || |
1457 | triple.getArch() == llvm::Triple::x86_64) { |
1458 | if (m_flavor == "intel" ) { |
1459 | flavor = 1; |
1460 | } else if (m_flavor == "att" ) { |
1461 | flavor = 0; |
1462 | } |
1463 | } |
1464 | |
1465 | ArchSpec thumb_arch(arch); |
1466 | if (triple.getArch() == llvm::Triple::arm) { |
1467 | std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); |
1468 | // Replace "arm" with "thumb" so we get all thumb variants correct |
1469 | if (thumb_arch_name.size() > 3) { |
1470 | thumb_arch_name.erase(pos: 0, n: 3); |
1471 | thumb_arch_name.insert(pos: 0, s: "thumb" ); |
1472 | } else { |
1473 | thumb_arch_name = "thumbv9.3a" ; |
1474 | } |
1475 | thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); |
1476 | } |
1477 | |
1478 | // If no sub architecture specified then use the most recent arm architecture |
1479 | // so the disassembler will return all instructions. Without it we will see a |
1480 | // lot of unknown opcodes if the code uses instructions which are not |
1481 | // available in the oldest arm version (which is used when no sub architecture |
1482 | // is specified). |
1483 | if (triple.getArch() == llvm::Triple::arm && |
1484 | triple.getSubArch() == llvm::Triple::NoSubArch) |
1485 | triple.setArchName("armv9.3a" ); |
1486 | |
1487 | std::string features_str = |
1488 | features_string ? std::string(features_string) : "" ; |
1489 | const char *triple_str = triple.getTriple().c_str(); |
1490 | |
1491 | // ARM Cortex M0-M7 devices only execute thumb instructions |
1492 | if (arch.IsAlwaysThumbInstructions()) { |
1493 | triple_str = thumb_arch.GetTriple().getTriple().c_str(); |
1494 | if (!features_string) |
1495 | features_str += "+fp-armv8," ; |
1496 | } |
1497 | |
1498 | const char *cpu = cpu_string; |
1499 | |
1500 | if (!cpu_or_features_overriden) { |
1501 | switch (arch.GetCore()) { |
1502 | case ArchSpec::eCore_mips32: |
1503 | case ArchSpec::eCore_mips32el: |
1504 | cpu = "mips32" ; |
1505 | break; |
1506 | case ArchSpec::eCore_mips32r2: |
1507 | case ArchSpec::eCore_mips32r2el: |
1508 | cpu = "mips32r2" ; |
1509 | break; |
1510 | case ArchSpec::eCore_mips32r3: |
1511 | case ArchSpec::eCore_mips32r3el: |
1512 | cpu = "mips32r3" ; |
1513 | break; |
1514 | case ArchSpec::eCore_mips32r5: |
1515 | case ArchSpec::eCore_mips32r5el: |
1516 | cpu = "mips32r5" ; |
1517 | break; |
1518 | case ArchSpec::eCore_mips32r6: |
1519 | case ArchSpec::eCore_mips32r6el: |
1520 | cpu = "mips32r6" ; |
1521 | break; |
1522 | case ArchSpec::eCore_mips64: |
1523 | case ArchSpec::eCore_mips64el: |
1524 | cpu = "mips64" ; |
1525 | break; |
1526 | case ArchSpec::eCore_mips64r2: |
1527 | case ArchSpec::eCore_mips64r2el: |
1528 | cpu = "mips64r2" ; |
1529 | break; |
1530 | case ArchSpec::eCore_mips64r3: |
1531 | case ArchSpec::eCore_mips64r3el: |
1532 | cpu = "mips64r3" ; |
1533 | break; |
1534 | case ArchSpec::eCore_mips64r5: |
1535 | case ArchSpec::eCore_mips64r5el: |
1536 | cpu = "mips64r5" ; |
1537 | break; |
1538 | case ArchSpec::eCore_mips64r6: |
1539 | case ArchSpec::eCore_mips64r6el: |
1540 | cpu = "mips64r6" ; |
1541 | break; |
1542 | default: |
1543 | cpu = "" ; |
1544 | break; |
1545 | } |
1546 | } |
1547 | |
1548 | if (arch.IsMIPS() && !cpu_or_features_overriden) { |
1549 | uint32_t arch_flags = arch.GetFlags(); |
1550 | if (arch_flags & ArchSpec::eMIPSAse_msa) |
1551 | features_str += "+msa," ; |
1552 | if (arch_flags & ArchSpec::eMIPSAse_dsp) |
1553 | features_str += "+dsp," ; |
1554 | if (arch_flags & ArchSpec::eMIPSAse_dspr2) |
1555 | features_str += "+dspr2," ; |
1556 | } |
1557 | |
1558 | // If any AArch64 variant, enable latest ISA with all extensions unless the |
1559 | // CPU or features were overridden. |
1560 | if (triple.isAArch64() && !cpu_or_features_overriden) { |
1561 | features_str += "+all," ; |
1562 | if (triple.getVendor() == llvm::Triple::Apple) |
1563 | cpu = "apple-latest" ; |
1564 | } |
1565 | |
1566 | if (triple.isRISCV() && !cpu_or_features_overriden) { |
1567 | uint32_t arch_flags = arch.GetFlags(); |
1568 | if (arch_flags & ArchSpec::eRISCV_rvc) |
1569 | features_str += "+c," ; |
1570 | if (arch_flags & ArchSpec::eRISCV_rve) |
1571 | features_str += "+e," ; |
1572 | if ((arch_flags & ArchSpec::eRISCV_float_abi_single) == |
1573 | ArchSpec::eRISCV_float_abi_single) |
1574 | features_str += "+f," ; |
1575 | if ((arch_flags & ArchSpec::eRISCV_float_abi_double) == |
1576 | ArchSpec::eRISCV_float_abi_double) |
1577 | features_str += "+f,+d," ; |
1578 | if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) == |
1579 | ArchSpec::eRISCV_float_abi_quad) |
1580 | features_str += "+f,+d,+q," ; |
1581 | // FIXME: how do we detect features such as `+a`, `+m`? |
1582 | // Turn them on by default now, since everyone seems to use them |
1583 | features_str += "+a,+m," ; |
1584 | } |
1585 | |
1586 | // We use m_disasm_up.get() to tell whether we are valid or not, so if this |
1587 | // isn't good for some reason, we won't be valid and FindPlugin will fail and |
1588 | // we won't get used. |
1589 | m_disasm_up = MCDisasmInstance::Create(triple: triple_str, cpu, features_str: features_str.c_str(), |
1590 | flavor, owner&: *this); |
1591 | |
1592 | llvm::Triple::ArchType llvm_arch = triple.getArch(); |
1593 | |
1594 | // For arm CPUs that can execute arm or thumb instructions, also create a |
1595 | // thumb instruction disassembler. |
1596 | if (llvm_arch == llvm::Triple::arm) { |
1597 | std::string thumb_triple(thumb_arch.GetTriple().getTriple()); |
1598 | m_alternate_disasm_up = |
1599 | MCDisasmInstance::Create(triple: thumb_triple.c_str(), cpu: "" , features_str: features_str.c_str(), |
1600 | flavor, owner&: *this); |
1601 | if (!m_alternate_disasm_up) |
1602 | m_disasm_up.reset(); |
1603 | |
1604 | } else if (arch.IsMIPS()) { |
1605 | /* Create alternate disassembler for MIPS16 and microMIPS */ |
1606 | uint32_t arch_flags = arch.GetFlags(); |
1607 | if (arch_flags & ArchSpec::eMIPSAse_mips16) |
1608 | features_str += "+mips16," ; |
1609 | else if (arch_flags & ArchSpec::eMIPSAse_micromips) |
1610 | features_str += "+micromips," ; |
1611 | |
1612 | m_alternate_disasm_up = MCDisasmInstance::Create( |
1613 | triple: triple_str, cpu, features_str: features_str.c_str(), flavor, owner&: *this); |
1614 | if (!m_alternate_disasm_up) |
1615 | m_disasm_up.reset(); |
1616 | } |
1617 | } |
1618 | |
1619 | DisassemblerLLVMC::~DisassemblerLLVMC() = default; |
1620 | |
1621 | lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, |
1622 | const char *flavor, |
1623 | const char *cpu, |
1624 | const char *features) { |
1625 | if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { |
1626 | auto disasm_sp = |
1627 | std::make_shared<DisassemblerLLVMC>(args: arch, args&: flavor, args&: cpu, args&: features); |
1628 | if (disasm_sp && disasm_sp->IsValid()) |
1629 | return disasm_sp; |
1630 | } |
1631 | return lldb::DisassemblerSP(); |
1632 | } |
1633 | |
1634 | size_t DisassemblerLLVMC::(const Address &base_addr, |
1635 | const DataExtractor &data, |
1636 | lldb::offset_t data_offset, |
1637 | size_t num_instructions, |
1638 | bool append, bool data_from_file) { |
1639 | if (!append) |
1640 | m_instruction_list.Clear(); |
1641 | |
1642 | if (!IsValid()) |
1643 | return 0; |
1644 | |
1645 | m_data_from_file = data_from_file; |
1646 | uint32_t data_cursor = data_offset; |
1647 | const size_t data_byte_size = data.GetByteSize(); |
1648 | uint32_t instructions_parsed = 0; |
1649 | Address inst_addr(base_addr); |
1650 | |
1651 | while (data_cursor < data_byte_size && |
1652 | instructions_parsed < num_instructions) { |
1653 | |
1654 | AddressClass address_class = AddressClass::eCode; |
1655 | |
1656 | if (m_alternate_disasm_up) |
1657 | address_class = inst_addr.GetAddressClass(); |
1658 | |
1659 | InstructionSP inst_sp( |
1660 | new InstructionLLVMC(*this, inst_addr, address_class)); |
1661 | |
1662 | if (!inst_sp) |
1663 | break; |
1664 | |
1665 | uint32_t inst_size = inst_sp->Decode(disassembler: *this, data, data_offset: data_cursor); |
1666 | |
1667 | if (inst_size == 0) |
1668 | break; |
1669 | |
1670 | m_instruction_list.Append(inst_sp); |
1671 | data_cursor += inst_size; |
1672 | inst_addr.Slide(offset: inst_size); |
1673 | instructions_parsed++; |
1674 | } |
1675 | |
1676 | return data_cursor - data_offset; |
1677 | } |
1678 | |
1679 | void DisassemblerLLVMC::Initialize() { |
1680 | PluginManager::RegisterPlugin(name: GetPluginNameStatic(), |
1681 | description: "Disassembler that uses LLVM MC to disassemble " |
1682 | "i386, x86_64, ARM, and ARM64." , |
1683 | create_callback: CreateInstance); |
1684 | |
1685 | llvm::InitializeAllTargetInfos(); |
1686 | llvm::InitializeAllTargetMCs(); |
1687 | llvm::InitializeAllAsmParsers(); |
1688 | llvm::InitializeAllDisassemblers(); |
1689 | } |
1690 | |
1691 | void DisassemblerLLVMC::Terminate() { |
1692 | PluginManager::UnregisterPlugin(create_callback: CreateInstance); |
1693 | } |
1694 | |
1695 | int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, |
1696 | uint64_t offset, uint64_t size, |
1697 | int tag_type, void *tag_bug) { |
1698 | return static_cast<DisassemblerLLVMC *>(disassembler) |
1699 | ->OpInfo(PC: pc, Offset: offset, Size: size, TagType: tag_type, TagBug: tag_bug); |
1700 | } |
1701 | |
1702 | const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, |
1703 | uint64_t value, |
1704 | uint64_t *type, uint64_t pc, |
1705 | const char **name) { |
1706 | return static_cast<DisassemblerLLVMC *>(disassembler) |
1707 | ->SymbolLookup(ReferenceValue: value, ReferenceType: type, ReferencePC: pc, ReferenceName: name); |
1708 | } |
1709 | |
1710 | bool DisassemblerLLVMC::FlavorValidForArchSpec( |
1711 | const lldb_private::ArchSpec &arch, const char *flavor) { |
1712 | llvm::Triple triple = arch.GetTriple(); |
1713 | if (flavor == nullptr || strcmp(s1: flavor, s2: "default" ) == 0) |
1714 | return true; |
1715 | |
1716 | if (triple.getArch() == llvm::Triple::x86 || |
1717 | triple.getArch() == llvm::Triple::x86_64) { |
1718 | return strcmp(s1: flavor, s2: "intel" ) == 0 || strcmp(s1: flavor, s2: "att" ) == 0; |
1719 | } else |
1720 | return false; |
1721 | } |
1722 | |
1723 | bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } |
1724 | |
1725 | int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, |
1726 | int tag_type, void *tag_bug) { |
1727 | switch (tag_type) { |
1728 | default: |
1729 | break; |
1730 | case 1: |
1731 | memset(s: tag_bug, c: 0, n: sizeof(::LLVMOpInfo1)); |
1732 | break; |
1733 | } |
1734 | return 0; |
1735 | } |
1736 | |
1737 | const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, |
1738 | uint64_t pc, const char **name) { |
1739 | if (*type_ptr) { |
1740 | if (m_exe_ctx && m_inst) { |
1741 | // std::string remove_this_prior_to_checkin; |
1742 | Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; |
1743 | Address value_so_addr; |
1744 | Address pc_so_addr; |
1745 | if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 || |
1746 | target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be || |
1747 | target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) { |
1748 | if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { |
1749 | m_adrp_address = pc; |
1750 | m_adrp_insn = value; |
1751 | *name = nullptr; |
1752 | *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; |
1753 | return nullptr; |
1754 | } |
1755 | // If this instruction is an ADD and |
1756 | // the previous instruction was an ADRP and |
1757 | // the ADRP's register and this ADD's register are the same, |
1758 | // then this is a pc-relative address calculation. |
1759 | if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && |
1760 | m_adrp_insn && m_adrp_address == pc - 4 && |
1761 | (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) { |
1762 | uint32_t addxri_inst; |
1763 | uint64_t adrp_imm, addxri_imm; |
1764 | // Get immlo and immhi bits, OR them together to get the ADRP imm |
1765 | // value. |
1766 | adrp_imm = |
1767 | ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3); |
1768 | // if high bit of immhi after right-shifting set, sign extend |
1769 | if (adrp_imm & (1ULL << 20)) |
1770 | adrp_imm |= ~((1ULL << 21) - 1); |
1771 | |
1772 | addxri_inst = value; |
1773 | addxri_imm = (addxri_inst >> 10) & 0xfff; |
1774 | // check if 'sh' bit is set, shift imm value up if so |
1775 | // (this would make no sense, ADRP already gave us this part) |
1776 | if ((addxri_inst >> (12 + 5 + 5)) & 1) |
1777 | addxri_imm <<= 12; |
1778 | value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) + |
1779 | addxri_imm; |
1780 | } |
1781 | m_adrp_address = LLDB_INVALID_ADDRESS; |
1782 | m_adrp_insn.reset(); |
1783 | } |
1784 | |
1785 | if (m_inst->UsingFileAddress()) { |
1786 | ModuleSP module_sp(m_inst->GetAddress().GetModule()); |
1787 | if (module_sp) { |
1788 | module_sp->ResolveFileAddress(vm_addr: value, so_addr&: value_so_addr); |
1789 | module_sp->ResolveFileAddress(vm_addr: pc, so_addr&: pc_so_addr); |
1790 | } |
1791 | } else if (target && target->HasLoadedSections()) { |
1792 | target->ResolveLoadAddress(load_addr: value, so_addr&: value_so_addr); |
1793 | target->ResolveLoadAddress(load_addr: pc, so_addr&: pc_so_addr); |
1794 | } |
1795 | |
1796 | SymbolContext sym_ctx; |
1797 | const SymbolContextItem resolve_scope = |
1798 | eSymbolContextFunction | eSymbolContextSymbol; |
1799 | if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { |
1800 | pc_so_addr.GetModule()->ResolveSymbolContextForAddress( |
1801 | so_addr: pc_so_addr, resolve_scope, sc&: sym_ctx); |
1802 | } |
1803 | |
1804 | if (value_so_addr.IsValid() && value_so_addr.GetSection()) { |
1805 | StreamString ss; |
1806 | |
1807 | bool format_omitting_current_func_name = false; |
1808 | if (sym_ctx.symbol || sym_ctx.function) { |
1809 | AddressRange range; |
1810 | for (uint32_t idx = 0; |
1811 | sym_ctx.GetAddressRange(scope: resolve_scope, range_idx: idx, use_inline_block_range: false, range); |
1812 | ++idx) { |
1813 | if (range.ContainsLoadAddress(so_addr: value_so_addr, target)) { |
1814 | format_omitting_current_func_name = true; |
1815 | break; |
1816 | } |
1817 | } |
1818 | } |
1819 | |
1820 | // If the "value" address (the target address we're symbolicating) is |
1821 | // inside the same SymbolContext as the current instruction pc |
1822 | // (pc_so_addr), don't print the full function name - just print it |
1823 | // with DumpStyleNoFunctionName style, e.g. "<+36>". |
1824 | if (format_omitting_current_func_name) { |
1825 | value_so_addr.Dump(s: &ss, exe_scope: target, style: Address::DumpStyleNoFunctionName, |
1826 | fallback_style: Address::DumpStyleSectionNameOffset); |
1827 | } else { |
1828 | value_so_addr.Dump( |
1829 | s: &ss, exe_scope: target, |
1830 | style: Address::DumpStyleResolvedDescriptionNoFunctionArguments, |
1831 | fallback_style: Address::DumpStyleSectionNameOffset); |
1832 | } |
1833 | |
1834 | if (!ss.GetString().empty()) { |
1835 | // If Address::Dump returned a multi-line description, most commonly |
1836 | // seen when we have multiple levels of inlined functions at an |
1837 | // address, only show the first line. |
1838 | std::string str = std::string(ss.GetString()); |
1839 | size_t first_eol_char = str.find_first_of(s: "\r\n" ); |
1840 | if (first_eol_char != std::string::npos) { |
1841 | str.erase(pos: first_eol_char); |
1842 | } |
1843 | m_inst->AppendComment(description&: str); |
1844 | } |
1845 | } |
1846 | } |
1847 | } |
1848 | |
1849 | // TODO: llvm-objdump sets the type_ptr to the |
1850 | // LLVMDisassembler_ReferenceType_Out_* values |
1851 | // based on where value_so_addr is pointing, with |
1852 | // Mach-O specific augmentations in MachODump.cpp. e.g. |
1853 | // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand |
1854 | // handles. |
1855 | *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; |
1856 | *name = nullptr; |
1857 | return nullptr; |
1858 | } |
1859 | |