1 | //===-- DisassemblerLLVMC.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "DisassemblerLLVMC.h" |
10 | |
11 | #include "llvm-c/Disassembler.h" |
12 | #include "llvm/ADT/SmallString.h" |
13 | #include "llvm/ADT/StringExtras.h" |
14 | #include "llvm/MC/MCAsmInfo.h" |
15 | #include "llvm/MC/MCContext.h" |
16 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
17 | #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" |
18 | #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" |
19 | #include "llvm/MC/MCInst.h" |
20 | #include "llvm/MC/MCInstPrinter.h" |
21 | #include "llvm/MC/MCInstrAnalysis.h" |
22 | #include "llvm/MC/MCInstrInfo.h" |
23 | #include "llvm/MC/MCRegisterInfo.h" |
24 | #include "llvm/MC/MCSubtargetInfo.h" |
25 | #include "llvm/MC/MCTargetOptions.h" |
26 | #include "llvm/MC/TargetRegistry.h" |
27 | #include "llvm/Support/ErrorHandling.h" |
28 | #include "llvm/Support/ScopedPrinter.h" |
29 | #include "llvm/Support/TargetSelect.h" |
30 | #include "llvm/TargetParser/AArch64TargetParser.h" |
31 | |
32 | #include "lldb/Core/Address.h" |
33 | #include "lldb/Core/Module.h" |
34 | #include "lldb/Symbol/SymbolContext.h" |
35 | #include "lldb/Target/ExecutionContext.h" |
36 | #include "lldb/Target/Process.h" |
37 | #include "lldb/Target/RegisterContext.h" |
38 | #include "lldb/Target/SectionLoadList.h" |
39 | #include "lldb/Target/StackFrame.h" |
40 | #include "lldb/Target/Target.h" |
41 | #include "lldb/Utility/DataExtractor.h" |
42 | #include "lldb/Utility/LLDBLog.h" |
43 | #include "lldb/Utility/Log.h" |
44 | #include "lldb/Utility/RegularExpression.h" |
45 | #include "lldb/Utility/Stream.h" |
46 | #include <optional> |
47 | |
48 | using namespace lldb; |
49 | using namespace lldb_private; |
50 | |
51 | LLDB_PLUGIN_DEFINE(DisassemblerLLVMC) |
52 | |
53 | class DisassemblerLLVMC::MCDisasmInstance { |
54 | public: |
55 | static std::unique_ptr<MCDisasmInstance> |
56 | Create(const char *triple, const char *cpu, const char *features_str, |
57 | unsigned flavor, DisassemblerLLVMC &owner); |
58 | |
59 | ~MCDisasmInstance() = default; |
60 | |
61 | uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, |
62 | lldb::addr_t pc, llvm::MCInst &mc_inst) const; |
63 | void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc, |
64 | std::string &inst_string, std::string &); |
65 | void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); |
66 | void SetUseColor(bool use_color); |
67 | bool GetUseColor() const; |
68 | bool CanBranch(llvm::MCInst &mc_inst) const; |
69 | bool HasDelaySlot(llvm::MCInst &mc_inst) const; |
70 | bool IsCall(llvm::MCInst &mc_inst) const; |
71 | bool IsLoad(llvm::MCInst &mc_inst) const; |
72 | bool IsAuthenticated(llvm::MCInst &mc_inst) const; |
73 | |
74 | private: |
75 | MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, |
76 | std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, |
77 | std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, |
78 | std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, |
79 | std::unique_ptr<llvm::MCContext> &&context_up, |
80 | std::unique_ptr<llvm::MCDisassembler> &&disasm_up, |
81 | std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up, |
82 | std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up); |
83 | |
84 | std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; |
85 | std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; |
86 | std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; |
87 | std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; |
88 | std::unique_ptr<llvm::MCContext> m_context_up; |
89 | std::unique_ptr<llvm::MCDisassembler> m_disasm_up; |
90 | std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; |
91 | std::unique_ptr<llvm::MCInstrAnalysis> m_instr_analysis_up; |
92 | }; |
93 | |
94 | namespace x86 { |
95 | |
96 | /// These are the three values deciding instruction control flow kind. |
97 | /// InstructionLengthDecode function decodes an instruction and get this struct. |
98 | /// |
99 | /// primary_opcode |
100 | /// Primary opcode of the instruction. |
101 | /// For one-byte opcode instruction, it's the first byte after prefix. |
102 | /// For two- and three-byte opcodes, it's the second byte. |
103 | /// |
104 | /// opcode_len |
105 | /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. |
106 | /// |
107 | /// modrm |
108 | /// ModR/M byte of the instruction. |
109 | /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] |
110 | /// may contain a register or specify an addressing mode, depending on MOD. |
111 | struct InstructionOpcodeAndModrm { |
112 | uint8_t primary_opcode; |
113 | uint8_t opcode_len; |
114 | uint8_t modrm; |
115 | }; |
116 | |
117 | /// Determine the InstructionControlFlowKind based on opcode and modrm bytes. |
118 | /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and |
119 | /// instruction set. |
120 | /// |
121 | /// \param[in] opcode_and_modrm |
122 | /// Contains primary_opcode byte, its length, and ModR/M byte. |
123 | /// Refer to the struct InstructionOpcodeAndModrm for details. |
124 | /// |
125 | /// \return |
126 | /// The control flow kind of the instruction or |
127 | /// eInstructionControlFlowKindOther if the instruction doesn't affect |
128 | /// the control flow of the program. |
129 | lldb::InstructionControlFlowKind |
130 | MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { |
131 | uint8_t opcode = opcode_and_modrm.primary_opcode; |
132 | uint8_t opcode_len = opcode_and_modrm.opcode_len; |
133 | uint8_t modrm = opcode_and_modrm.modrm; |
134 | |
135 | if (opcode_len > 2) |
136 | return lldb::eInstructionControlFlowKindOther; |
137 | |
138 | if (opcode >= 0x70 && opcode <= 0x7F) { |
139 | if (opcode_len == 1) |
140 | return lldb::eInstructionControlFlowKindCondJump; |
141 | else |
142 | return lldb::eInstructionControlFlowKindOther; |
143 | } |
144 | |
145 | if (opcode >= 0x80 && opcode <= 0x8F) { |
146 | if (opcode_len == 2) |
147 | return lldb::eInstructionControlFlowKindCondJump; |
148 | else |
149 | return lldb::eInstructionControlFlowKindOther; |
150 | } |
151 | |
152 | switch (opcode) { |
153 | case 0x9A: |
154 | if (opcode_len == 1) |
155 | return lldb::eInstructionControlFlowKindFarCall; |
156 | break; |
157 | case 0xFF: |
158 | if (opcode_len == 1) { |
159 | uint8_t modrm_reg = (modrm >> 3) & 7; |
160 | if (modrm_reg == 2) |
161 | return lldb::eInstructionControlFlowKindCall; |
162 | else if (modrm_reg == 3) |
163 | return lldb::eInstructionControlFlowKindFarCall; |
164 | else if (modrm_reg == 4) |
165 | return lldb::eInstructionControlFlowKindJump; |
166 | else if (modrm_reg == 5) |
167 | return lldb::eInstructionControlFlowKindFarJump; |
168 | } |
169 | break; |
170 | case 0xE8: |
171 | if (opcode_len == 1) |
172 | return lldb::eInstructionControlFlowKindCall; |
173 | break; |
174 | case 0xCD: |
175 | case 0xCC: |
176 | case 0xCE: |
177 | case 0xF1: |
178 | if (opcode_len == 1) |
179 | return lldb::eInstructionControlFlowKindFarCall; |
180 | break; |
181 | case 0xCF: |
182 | if (opcode_len == 1) |
183 | return lldb::eInstructionControlFlowKindFarReturn; |
184 | break; |
185 | case 0xE9: |
186 | case 0xEB: |
187 | if (opcode_len == 1) |
188 | return lldb::eInstructionControlFlowKindJump; |
189 | break; |
190 | case 0xEA: |
191 | if (opcode_len == 1) |
192 | return lldb::eInstructionControlFlowKindFarJump; |
193 | break; |
194 | case 0xE3: |
195 | case 0xE0: |
196 | case 0xE1: |
197 | case 0xE2: |
198 | if (opcode_len == 1) |
199 | return lldb::eInstructionControlFlowKindCondJump; |
200 | break; |
201 | case 0xC3: |
202 | case 0xC2: |
203 | if (opcode_len == 1) |
204 | return lldb::eInstructionControlFlowKindReturn; |
205 | break; |
206 | case 0xCB: |
207 | case 0xCA: |
208 | if (opcode_len == 1) |
209 | return lldb::eInstructionControlFlowKindFarReturn; |
210 | break; |
211 | case 0x05: |
212 | case 0x34: |
213 | if (opcode_len == 2) |
214 | return lldb::eInstructionControlFlowKindFarCall; |
215 | break; |
216 | case 0x35: |
217 | case 0x07: |
218 | if (opcode_len == 2) |
219 | return lldb::eInstructionControlFlowKindFarReturn; |
220 | break; |
221 | case 0x01: |
222 | if (opcode_len == 2) { |
223 | switch (modrm) { |
224 | case 0xc1: |
225 | return lldb::eInstructionControlFlowKindFarCall; |
226 | case 0xc2: |
227 | case 0xc3: |
228 | return lldb::eInstructionControlFlowKindFarReturn; |
229 | default: |
230 | break; |
231 | } |
232 | } |
233 | break; |
234 | default: |
235 | break; |
236 | } |
237 | |
238 | return lldb::eInstructionControlFlowKindOther; |
239 | } |
240 | |
241 | /// Decode an instruction into opcode, modrm and opcode_len. |
242 | /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. |
243 | /// Opcodes in x86 are generally the first byte of instruction, though two-byte |
244 | /// instructions and prefixes exist. ModR/M is the byte following the opcode |
245 | /// and adds additional information for how the instruction is executed. |
246 | /// |
247 | /// \param[in] inst_bytes |
248 | /// Raw bytes of the instruction |
249 | /// |
250 | /// |
251 | /// \param[in] bytes_len |
252 | /// The length of the inst_bytes array. |
253 | /// |
254 | /// \param[in] is_exec_mode_64b |
255 | /// If true, the execution mode is 64 bit. |
256 | /// |
257 | /// \return |
258 | /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding |
259 | /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition |
260 | /// for more details. |
261 | /// Otherwise if the given instruction is invalid, returns std::nullopt. |
262 | std::optional<InstructionOpcodeAndModrm> |
263 | InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, |
264 | bool is_exec_mode_64b) { |
265 | int op_idx = 0; |
266 | bool prefix_done = false; |
267 | InstructionOpcodeAndModrm ret = {.primary_opcode: 0, .opcode_len: 0, .modrm: 0}; |
268 | |
269 | // In most cases, the primary_opcode is the first byte of the instruction |
270 | // but some instructions have a prefix to be skipped for these calculations. |
271 | // The following mapping is inspired from libipt's instruction decoding logic |
272 | // in `src/pt_ild.c` |
273 | while (!prefix_done) { |
274 | if (op_idx >= bytes_len) |
275 | return std::nullopt; |
276 | |
277 | ret.primary_opcode = inst_bytes[op_idx]; |
278 | switch (ret.primary_opcode) { |
279 | // prefix_ignore |
280 | case 0x26: |
281 | case 0x2e: |
282 | case 0x36: |
283 | case 0x3e: |
284 | case 0x64: |
285 | case 0x65: |
286 | // prefix_osz, prefix_asz |
287 | case 0x66: |
288 | case 0x67: |
289 | // prefix_lock, prefix_f2, prefix_f3 |
290 | case 0xf0: |
291 | case 0xf2: |
292 | case 0xf3: |
293 | op_idx++; |
294 | break; |
295 | |
296 | // prefix_rex |
297 | case 0x40: |
298 | case 0x41: |
299 | case 0x42: |
300 | case 0x43: |
301 | case 0x44: |
302 | case 0x45: |
303 | case 0x46: |
304 | case 0x47: |
305 | case 0x48: |
306 | case 0x49: |
307 | case 0x4a: |
308 | case 0x4b: |
309 | case 0x4c: |
310 | case 0x4d: |
311 | case 0x4e: |
312 | case 0x4f: |
313 | if (is_exec_mode_64b) |
314 | op_idx++; |
315 | else |
316 | prefix_done = true; |
317 | break; |
318 | |
319 | // prefix_vex_c4, c5 |
320 | case 0xc5: |
321 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
322 | prefix_done = true; |
323 | break; |
324 | } |
325 | |
326 | ret.opcode_len = 2; |
327 | ret.primary_opcode = inst_bytes[op_idx + 2]; |
328 | ret.modrm = inst_bytes[op_idx + 3]; |
329 | return ret; |
330 | |
331 | case 0xc4: |
332 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
333 | prefix_done = true; |
334 | break; |
335 | } |
336 | ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; |
337 | ret.primary_opcode = inst_bytes[op_idx + 3]; |
338 | ret.modrm = inst_bytes[op_idx + 4]; |
339 | return ret; |
340 | |
341 | // prefix_evex |
342 | case 0x62: |
343 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
344 | prefix_done = true; |
345 | break; |
346 | } |
347 | ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; |
348 | ret.primary_opcode = inst_bytes[op_idx + 4]; |
349 | ret.modrm = inst_bytes[op_idx + 5]; |
350 | return ret; |
351 | |
352 | default: |
353 | prefix_done = true; |
354 | break; |
355 | } |
356 | } // prefix done |
357 | |
358 | ret.primary_opcode = inst_bytes[op_idx]; |
359 | ret.modrm = inst_bytes[op_idx + 1]; |
360 | ret.opcode_len = 1; |
361 | |
362 | // If the first opcode is 0F, it's two- or three- byte opcodes. |
363 | if (ret.primary_opcode == 0x0F) { |
364 | ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte |
365 | |
366 | if (ret.primary_opcode == 0x38) { |
367 | ret.opcode_len = 3; |
368 | ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte |
369 | ret.modrm = inst_bytes[op_idx + 1]; |
370 | } else if (ret.primary_opcode == 0x3A) { |
371 | ret.opcode_len = 3; |
372 | ret.primary_opcode = inst_bytes[++op_idx]; |
373 | ret.modrm = inst_bytes[op_idx + 1]; |
374 | } else if ((ret.primary_opcode & 0xf8) == 0x38) { |
375 | ret.opcode_len = 0; |
376 | ret.primary_opcode = inst_bytes[++op_idx]; |
377 | ret.modrm = inst_bytes[op_idx + 1]; |
378 | } else if (ret.primary_opcode == 0x0F) { |
379 | ret.opcode_len = 3; |
380 | // opcode is 0x0F, no needs to update |
381 | ret.modrm = inst_bytes[op_idx + 1]; |
382 | } else { |
383 | ret.opcode_len = 2; |
384 | ret.modrm = inst_bytes[op_idx + 1]; |
385 | } |
386 | } |
387 | |
388 | return ret; |
389 | } |
390 | |
391 | lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, |
392 | Opcode m_opcode) { |
393 | std::optional<InstructionOpcodeAndModrm> ret; |
394 | |
395 | if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { |
396 | // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes |
397 | return lldb::eInstructionControlFlowKindUnknown; |
398 | } |
399 | |
400 | // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. |
401 | // These are the three values deciding instruction control flow kind. |
402 | ret = InstructionLengthDecode(inst_bytes: (const uint8_t *)m_opcode.GetOpcodeBytes(), |
403 | bytes_len: m_opcode.GetByteSize(), is_exec_mode_64b); |
404 | if (!ret) |
405 | return lldb::eInstructionControlFlowKindUnknown; |
406 | else |
407 | return MapOpcodeIntoControlFlowKind(opcode_and_modrm: *ret); |
408 | } |
409 | |
410 | } // namespace x86 |
411 | |
412 | class InstructionLLVMC : public lldb_private::Instruction { |
413 | public: |
414 | InstructionLLVMC(DisassemblerLLVMC &disasm, |
415 | const lldb_private::Address &address, |
416 | AddressClass addr_class) |
417 | : Instruction(address, addr_class), |
418 | m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( |
419 | r: disasm.shared_from_this())) {} |
420 | |
421 | ~InstructionLLVMC() override = default; |
422 | |
423 | bool DoesBranch() override { |
424 | VisitInstruction(); |
425 | return m_does_branch; |
426 | } |
427 | |
428 | bool HasDelaySlot() override { |
429 | VisitInstruction(); |
430 | return m_has_delay_slot; |
431 | } |
432 | |
433 | bool IsLoad() override { |
434 | VisitInstruction(); |
435 | return m_is_load; |
436 | } |
437 | |
438 | bool IsAuthenticated() override { |
439 | VisitInstruction(); |
440 | return m_is_authenticated; |
441 | } |
442 | |
443 | DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { |
444 | DisassemblerScope disasm(*this); |
445 | return GetDisasmToUse(is_alternate_isa, disasm); |
446 | } |
447 | |
448 | size_t (const lldb_private::Disassembler &disassembler, |
449 | const lldb_private::DataExtractor &data, |
450 | lldb::offset_t data_offset) override { |
451 | // All we have to do is read the opcode which can be easy for some |
452 | // architectures |
453 | bool got_op = false; |
454 | DisassemblerScope disasm(*this); |
455 | if (disasm) { |
456 | const ArchSpec &arch = disasm->GetArchitecture(); |
457 | const lldb::ByteOrder byte_order = data.GetByteOrder(); |
458 | |
459 | const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); |
460 | const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); |
461 | if (min_op_byte_size == max_op_byte_size) { |
462 | // Fixed size instructions, just read that amount of data. |
463 | if (!data.ValidOffsetForDataOfSize(offset: data_offset, length: min_op_byte_size)) |
464 | return false; |
465 | |
466 | switch (min_op_byte_size) { |
467 | case 1: |
468 | m_opcode.SetOpcode8(inst: data.GetU8(offset_ptr: &data_offset), order: byte_order); |
469 | got_op = true; |
470 | break; |
471 | |
472 | case 2: |
473 | m_opcode.SetOpcode16(inst: data.GetU16(offset_ptr: &data_offset), order: byte_order); |
474 | got_op = true; |
475 | break; |
476 | |
477 | case 4: |
478 | m_opcode.SetOpcode32(inst: data.GetU32(offset_ptr: &data_offset), order: byte_order); |
479 | got_op = true; |
480 | break; |
481 | |
482 | case 8: |
483 | m_opcode.SetOpcode64(inst: data.GetU64(offset_ptr: &data_offset), order: byte_order); |
484 | got_op = true; |
485 | break; |
486 | |
487 | default: |
488 | m_opcode.SetOpcodeBytes(bytes: data.PeekData(offset: data_offset, length: min_op_byte_size), |
489 | length: min_op_byte_size); |
490 | got_op = true; |
491 | break; |
492 | } |
493 | } |
494 | if (!got_op) { |
495 | bool is_alternate_isa = false; |
496 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = |
497 | GetDisasmToUse(is_alternate_isa, disasm); |
498 | |
499 | const llvm::Triple::ArchType machine = arch.GetMachine(); |
500 | if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { |
501 | if (machine == llvm::Triple::thumb || is_alternate_isa) { |
502 | uint32_t thumb_opcode = data.GetU16(offset_ptr: &data_offset); |
503 | if ((thumb_opcode & 0xe000) != 0xe000 || |
504 | ((thumb_opcode & 0x1800u) == 0)) { |
505 | m_opcode.SetOpcode16(inst: thumb_opcode, order: byte_order); |
506 | m_is_valid = true; |
507 | } else { |
508 | thumb_opcode <<= 16; |
509 | thumb_opcode |= data.GetU16(offset_ptr: &data_offset); |
510 | m_opcode.SetOpcode16_2(inst: thumb_opcode, order: byte_order); |
511 | m_is_valid = true; |
512 | } |
513 | } else { |
514 | m_opcode.SetOpcode32(inst: data.GetU32(offset_ptr: &data_offset), order: byte_order); |
515 | m_is_valid = true; |
516 | } |
517 | } else { |
518 | // The opcode isn't evenly sized, so we need to actually use the llvm |
519 | // disassembler to parse it and get the size. |
520 | uint8_t *opcode_data = |
521 | const_cast<uint8_t *>(data.PeekData(offset: data_offset, length: 1)); |
522 | const size_t opcode_data_len = data.BytesLeft(offset: data_offset); |
523 | const addr_t pc = m_address.GetFileAddress(); |
524 | llvm::MCInst inst; |
525 | |
526 | const size_t inst_size = |
527 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
528 | if (inst_size == 0) |
529 | m_opcode.Clear(); |
530 | else { |
531 | m_opcode.SetOpcodeBytes(bytes: opcode_data, length: inst_size); |
532 | m_is_valid = true; |
533 | } |
534 | } |
535 | } |
536 | return m_opcode.GetByteSize(); |
537 | } |
538 | return 0; |
539 | } |
540 | |
541 | void (std::string &description) { |
542 | if (m_comment.empty()) |
543 | m_comment.swap(s&: description); |
544 | else { |
545 | m_comment.append(s: ", " ); |
546 | m_comment.append(str: description); |
547 | } |
548 | } |
549 | |
550 | lldb::InstructionControlFlowKind |
551 | GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override { |
552 | DisassemblerScope disasm(*this, exe_ctx); |
553 | if (disasm){ |
554 | if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86) |
555 | return x86::GetControlFlowKind(/*is_64b=*/is_exec_mode_64b: false, m_opcode); |
556 | else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64) |
557 | return x86::GetControlFlowKind(/*is_64b=*/is_exec_mode_64b: true, m_opcode); |
558 | } |
559 | |
560 | return eInstructionControlFlowKindUnknown; |
561 | } |
562 | |
563 | void CalculateMnemonicOperandsAndComment( |
564 | const lldb_private::ExecutionContext *exe_ctx) override { |
565 | DataExtractor data; |
566 | const AddressClass address_class = GetAddressClass(); |
567 | |
568 | if (m_opcode.GetData(data)) { |
569 | std::string out_string; |
570 | std::string markup_out_string; |
571 | std::string ; |
572 | std::string ; |
573 | |
574 | DisassemblerScope disasm(*this, exe_ctx); |
575 | if (disasm) { |
576 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; |
577 | |
578 | if (address_class == AddressClass::eCodeAlternateISA) |
579 | mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); |
580 | else |
581 | mc_disasm_ptr = disasm->m_disasm_up.get(); |
582 | |
583 | lldb::addr_t pc = m_address.GetFileAddress(); |
584 | m_using_file_addr = true; |
585 | |
586 | const bool data_from_file = disasm->m_data_from_file; |
587 | bool use_hex_immediates = true; |
588 | Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; |
589 | |
590 | if (exe_ctx) { |
591 | Target *target = exe_ctx->GetTargetPtr(); |
592 | if (target) { |
593 | use_hex_immediates = target->GetUseHexImmediates(); |
594 | hex_style = target->GetHexImmediateStyle(); |
595 | |
596 | if (!data_from_file) { |
597 | const lldb::addr_t load_addr = m_address.GetLoadAddress(target); |
598 | if (load_addr != LLDB_INVALID_ADDRESS) { |
599 | pc = load_addr; |
600 | m_using_file_addr = false; |
601 | } |
602 | } |
603 | } |
604 | } |
605 | |
606 | const uint8_t *opcode_data = data.GetDataStart(); |
607 | const size_t opcode_data_len = data.GetByteSize(); |
608 | llvm::MCInst inst; |
609 | size_t inst_size = |
610 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
611 | |
612 | if (inst_size > 0) { |
613 | mc_disasm_ptr->SetStyle(use_hex_immed: use_hex_immediates, hex_style); |
614 | |
615 | const bool saved_use_color = mc_disasm_ptr->GetUseColor(); |
616 | mc_disasm_ptr->SetUseColor(false); |
617 | mc_disasm_ptr->PrintMCInst(mc_inst&: inst, pc, inst_string&: out_string, comments_string&: comment_string); |
618 | mc_disasm_ptr->SetUseColor(true); |
619 | mc_disasm_ptr->PrintMCInst(mc_inst&: inst, pc, inst_string&: markup_out_string, |
620 | comments_string&: markup_comment_string); |
621 | mc_disasm_ptr->SetUseColor(saved_use_color); |
622 | |
623 | if (!comment_string.empty()) { |
624 | AppendComment(description&: comment_string); |
625 | } |
626 | } |
627 | |
628 | if (inst_size == 0) { |
629 | m_comment.assign(s: "unknown opcode" ); |
630 | inst_size = m_opcode.GetByteSize(); |
631 | StreamString mnemonic_strm; |
632 | lldb::offset_t offset = 0; |
633 | lldb::ByteOrder byte_order = data.GetByteOrder(); |
634 | switch (inst_size) { |
635 | case 1: { |
636 | const uint8_t uval8 = data.GetU8(offset_ptr: &offset); |
637 | m_opcode.SetOpcode8(inst: uval8, order: byte_order); |
638 | m_opcode_name.assign(s: ".byte" ); |
639 | mnemonic_strm.Printf(format: "0x%2.2x" , uval8); |
640 | } break; |
641 | case 2: { |
642 | const uint16_t uval16 = data.GetU16(offset_ptr: &offset); |
643 | m_opcode.SetOpcode16(inst: uval16, order: byte_order); |
644 | m_opcode_name.assign(s: ".short" ); |
645 | mnemonic_strm.Printf(format: "0x%4.4x" , uval16); |
646 | } break; |
647 | case 4: { |
648 | const uint32_t uval32 = data.GetU32(offset_ptr: &offset); |
649 | m_opcode.SetOpcode32(inst: uval32, order: byte_order); |
650 | m_opcode_name.assign(s: ".long" ); |
651 | mnemonic_strm.Printf(format: "0x%8.8x" , uval32); |
652 | } break; |
653 | case 8: { |
654 | const uint64_t uval64 = data.GetU64(offset_ptr: &offset); |
655 | m_opcode.SetOpcode64(inst: uval64, order: byte_order); |
656 | m_opcode_name.assign(s: ".quad" ); |
657 | mnemonic_strm.Printf(format: "0x%16.16" PRIx64, uval64); |
658 | } break; |
659 | default: |
660 | if (inst_size == 0) |
661 | return; |
662 | else { |
663 | const uint8_t *bytes = data.PeekData(offset, length: inst_size); |
664 | if (bytes == nullptr) |
665 | return; |
666 | m_opcode_name.assign(s: ".byte" ); |
667 | m_opcode.SetOpcodeBytes(bytes, length: inst_size); |
668 | mnemonic_strm.Printf(format: "0x%2.2x" , bytes[0]); |
669 | for (uint32_t i = 1; i < inst_size; ++i) |
670 | mnemonic_strm.Printf(format: " 0x%2.2x" , bytes[i]); |
671 | } |
672 | break; |
673 | } |
674 | m_mnemonics = std::string(mnemonic_strm.GetString()); |
675 | return; |
676 | } |
677 | |
678 | static RegularExpression s_regex( |
679 | llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?" )); |
680 | |
681 | llvm::SmallVector<llvm::StringRef, 4> matches; |
682 | if (s_regex.Execute(string: out_string, matches: &matches)) { |
683 | m_opcode_name = matches[1].str(); |
684 | m_mnemonics = matches[2].str(); |
685 | } |
686 | matches.clear(); |
687 | if (s_regex.Execute(string: markup_out_string, matches: &matches)) { |
688 | m_markup_opcode_name = matches[1].str(); |
689 | m_markup_mnemonics = matches[2].str(); |
690 | } |
691 | } |
692 | } |
693 | } |
694 | |
695 | bool IsValid() const { return m_is_valid; } |
696 | |
697 | bool UsingFileAddress() const { return m_using_file_addr; } |
698 | size_t GetByteSize() const { return m_opcode.GetByteSize(); } |
699 | |
700 | /// Grants exclusive access to the disassembler and initializes it with the |
701 | /// given InstructionLLVMC and an optional ExecutionContext. |
702 | class DisassemblerScope { |
703 | std::shared_ptr<DisassemblerLLVMC> m_disasm; |
704 | |
705 | public: |
706 | explicit DisassemblerScope( |
707 | InstructionLLVMC &i, |
708 | const lldb_private::ExecutionContext *exe_ctx = nullptr) |
709 | : m_disasm(i.m_disasm_wp.lock()) { |
710 | m_disasm->m_mutex.lock(); |
711 | m_disasm->m_inst = &i; |
712 | m_disasm->m_exe_ctx = exe_ctx; |
713 | } |
714 | ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } |
715 | |
716 | /// Evaluates to true if this scope contains a valid disassembler. |
717 | operator bool() const { return static_cast<bool>(m_disasm); } |
718 | |
719 | std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } |
720 | }; |
721 | |
722 | static llvm::StringRef::const_iterator |
723 | ConsumeWhitespace(llvm::StringRef::const_iterator osi, |
724 | llvm::StringRef::const_iterator ose) { |
725 | while (osi != ose) { |
726 | switch (*osi) { |
727 | default: |
728 | return osi; |
729 | case ' ': |
730 | case '\t': |
731 | break; |
732 | } |
733 | ++osi; |
734 | } |
735 | |
736 | return osi; |
737 | } |
738 | |
739 | static std::pair<bool, llvm::StringRef::const_iterator> |
740 | ConsumeChar(llvm::StringRef::const_iterator osi, const char c, |
741 | llvm::StringRef::const_iterator ose) { |
742 | bool found = false; |
743 | |
744 | osi = ConsumeWhitespace(osi, ose); |
745 | if (osi != ose && *osi == c) { |
746 | found = true; |
747 | ++osi; |
748 | } |
749 | |
750 | return std::make_pair(x&: found, y&: osi); |
751 | } |
752 | |
753 | static std::pair<Operand, llvm::StringRef::const_iterator> |
754 | ParseRegisterName(llvm::StringRef::const_iterator osi, |
755 | llvm::StringRef::const_iterator ose) { |
756 | Operand ret; |
757 | ret.m_type = Operand::Type::Register; |
758 | std::string str; |
759 | |
760 | osi = ConsumeWhitespace(osi, ose); |
761 | |
762 | while (osi != ose) { |
763 | if (*osi >= '0' && *osi <= '9') { |
764 | if (str.empty()) { |
765 | return std::make_pair(x: Operand(), y&: osi); |
766 | } else { |
767 | str.push_back(c: *osi); |
768 | } |
769 | } else if (*osi >= 'a' && *osi <= 'z') { |
770 | str.push_back(c: *osi); |
771 | } else { |
772 | switch (*osi) { |
773 | default: |
774 | if (str.empty()) { |
775 | return std::make_pair(x: Operand(), y&: osi); |
776 | } else { |
777 | ret.m_register = ConstString(str); |
778 | return std::make_pair(x&: ret, y&: osi); |
779 | } |
780 | case '%': |
781 | if (!str.empty()) { |
782 | return std::make_pair(x: Operand(), y&: osi); |
783 | } |
784 | break; |
785 | } |
786 | } |
787 | ++osi; |
788 | } |
789 | |
790 | ret.m_register = ConstString(str); |
791 | return std::make_pair(x&: ret, y&: osi); |
792 | } |
793 | |
794 | static std::pair<Operand, llvm::StringRef::const_iterator> |
795 | ParseImmediate(llvm::StringRef::const_iterator osi, |
796 | llvm::StringRef::const_iterator ose) { |
797 | Operand ret; |
798 | ret.m_type = Operand::Type::Immediate; |
799 | std::string str; |
800 | bool is_hex = false; |
801 | |
802 | osi = ConsumeWhitespace(osi, ose); |
803 | |
804 | while (osi != ose) { |
805 | if (*osi >= '0' && *osi <= '9') { |
806 | str.push_back(c: *osi); |
807 | } else if (*osi >= 'a' && *osi <= 'f') { |
808 | if (is_hex) { |
809 | str.push_back(c: *osi); |
810 | } else { |
811 | return std::make_pair(x: Operand(), y&: osi); |
812 | } |
813 | } else { |
814 | switch (*osi) { |
815 | default: |
816 | if (str.empty()) { |
817 | return std::make_pair(x: Operand(), y&: osi); |
818 | } else { |
819 | ret.m_immediate = strtoull(nptr: str.c_str(), endptr: nullptr, base: 0); |
820 | return std::make_pair(x&: ret, y&: osi); |
821 | } |
822 | case 'x': |
823 | if (!str.compare(s: "0" )) { |
824 | is_hex = true; |
825 | str.push_back(c: *osi); |
826 | } else { |
827 | return std::make_pair(x: Operand(), y&: osi); |
828 | } |
829 | break; |
830 | case '#': |
831 | case '$': |
832 | if (!str.empty()) { |
833 | return std::make_pair(x: Operand(), y&: osi); |
834 | } |
835 | break; |
836 | case '-': |
837 | if (str.empty()) { |
838 | ret.m_negative = true; |
839 | } else { |
840 | return std::make_pair(x: Operand(), y&: osi); |
841 | } |
842 | } |
843 | } |
844 | ++osi; |
845 | } |
846 | |
847 | ret.m_immediate = strtoull(nptr: str.c_str(), endptr: nullptr, base: 0); |
848 | return std::make_pair(x&: ret, y&: osi); |
849 | } |
850 | |
851 | // -0x5(%rax,%rax,2) |
852 | static std::pair<Operand, llvm::StringRef::const_iterator> |
853 | ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, |
854 | llvm::StringRef::const_iterator ose) { |
855 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
856 | ParseImmediate(osi, ose); |
857 | if (offset_and_iterator.first.IsValid()) { |
858 | osi = offset_and_iterator.second; |
859 | } |
860 | |
861 | bool found = false; |
862 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '(', ose); |
863 | if (!found) { |
864 | return std::make_pair(x: Operand(), y&: osi); |
865 | } |
866 | |
867 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
868 | ParseRegisterName(osi, ose); |
869 | if (base_and_iterator.first.IsValid()) { |
870 | osi = base_and_iterator.second; |
871 | } else { |
872 | return std::make_pair(x: Operand(), y&: osi); |
873 | } |
874 | |
875 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
876 | if (!found) { |
877 | return std::make_pair(x: Operand(), y&: osi); |
878 | } |
879 | |
880 | std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = |
881 | ParseRegisterName(osi, ose); |
882 | if (index_and_iterator.first.IsValid()) { |
883 | osi = index_and_iterator.second; |
884 | } else { |
885 | return std::make_pair(x: Operand(), y&: osi); |
886 | } |
887 | |
888 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
889 | if (!found) { |
890 | return std::make_pair(x: Operand(), y&: osi); |
891 | } |
892 | |
893 | std::pair<Operand, llvm::StringRef::const_iterator> |
894 | multiplier_and_iterator = ParseImmediate(osi, ose); |
895 | if (index_and_iterator.first.IsValid()) { |
896 | osi = index_and_iterator.second; |
897 | } else { |
898 | return std::make_pair(x: Operand(), y&: osi); |
899 | } |
900 | |
901 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ')', ose); |
902 | if (!found) { |
903 | return std::make_pair(x: Operand(), y&: osi); |
904 | } |
905 | |
906 | Operand product; |
907 | product.m_type = Operand::Type::Product; |
908 | product.m_children.push_back(x: index_and_iterator.first); |
909 | product.m_children.push_back(x: multiplier_and_iterator.first); |
910 | |
911 | Operand index; |
912 | index.m_type = Operand::Type::Sum; |
913 | index.m_children.push_back(x: base_and_iterator.first); |
914 | index.m_children.push_back(x: product); |
915 | |
916 | if (offset_and_iterator.first.IsValid()) { |
917 | Operand offset; |
918 | offset.m_type = Operand::Type::Sum; |
919 | offset.m_children.push_back(x: offset_and_iterator.first); |
920 | offset.m_children.push_back(x: index); |
921 | |
922 | Operand deref; |
923 | deref.m_type = Operand::Type::Dereference; |
924 | deref.m_children.push_back(x: offset); |
925 | return std::make_pair(x&: deref, y&: osi); |
926 | } else { |
927 | Operand deref; |
928 | deref.m_type = Operand::Type::Dereference; |
929 | deref.m_children.push_back(x: index); |
930 | return std::make_pair(x&: deref, y&: osi); |
931 | } |
932 | } |
933 | |
934 | // -0x10(%rbp) |
935 | static std::pair<Operand, llvm::StringRef::const_iterator> |
936 | ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, |
937 | llvm::StringRef::const_iterator ose) { |
938 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
939 | ParseImmediate(osi, ose); |
940 | if (offset_and_iterator.first.IsValid()) { |
941 | osi = offset_and_iterator.second; |
942 | } |
943 | |
944 | bool found = false; |
945 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '(', ose); |
946 | if (!found) { |
947 | return std::make_pair(x: Operand(), y&: osi); |
948 | } |
949 | |
950 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
951 | ParseRegisterName(osi, ose); |
952 | if (base_and_iterator.first.IsValid()) { |
953 | osi = base_and_iterator.second; |
954 | } else { |
955 | return std::make_pair(x: Operand(), y&: osi); |
956 | } |
957 | |
958 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ')', ose); |
959 | if (!found) { |
960 | return std::make_pair(x: Operand(), y&: osi); |
961 | } |
962 | |
963 | if (offset_and_iterator.first.IsValid()) { |
964 | Operand offset; |
965 | offset.m_type = Operand::Type::Sum; |
966 | offset.m_children.push_back(x: offset_and_iterator.first); |
967 | offset.m_children.push_back(x: base_and_iterator.first); |
968 | |
969 | Operand deref; |
970 | deref.m_type = Operand::Type::Dereference; |
971 | deref.m_children.push_back(x: offset); |
972 | return std::make_pair(x&: deref, y&: osi); |
973 | } else { |
974 | Operand deref; |
975 | deref.m_type = Operand::Type::Dereference; |
976 | deref.m_children.push_back(x: base_and_iterator.first); |
977 | return std::make_pair(x&: deref, y&: osi); |
978 | } |
979 | } |
980 | |
981 | // [sp, #8]! |
982 | static std::pair<Operand, llvm::StringRef::const_iterator> |
983 | ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, |
984 | llvm::StringRef::const_iterator ose) { |
985 | bool found = false; |
986 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '[', ose); |
987 | if (!found) { |
988 | return std::make_pair(x: Operand(), y&: osi); |
989 | } |
990 | |
991 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
992 | ParseRegisterName(osi, ose); |
993 | if (base_and_iterator.first.IsValid()) { |
994 | osi = base_and_iterator.second; |
995 | } else { |
996 | return std::make_pair(x: Operand(), y&: osi); |
997 | } |
998 | |
999 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
1000 | if (!found) { |
1001 | return std::make_pair(x: Operand(), y&: osi); |
1002 | } |
1003 | |
1004 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
1005 | ParseImmediate(osi, ose); |
1006 | if (offset_and_iterator.first.IsValid()) { |
1007 | osi = offset_and_iterator.second; |
1008 | } |
1009 | |
1010 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ']', ose); |
1011 | if (!found) { |
1012 | return std::make_pair(x: Operand(), y&: osi); |
1013 | } |
1014 | |
1015 | Operand offset; |
1016 | offset.m_type = Operand::Type::Sum; |
1017 | offset.m_children.push_back(x: offset_and_iterator.first); |
1018 | offset.m_children.push_back(x: base_and_iterator.first); |
1019 | |
1020 | Operand deref; |
1021 | deref.m_type = Operand::Type::Dereference; |
1022 | deref.m_children.push_back(x: offset); |
1023 | return std::make_pair(x&: deref, y&: osi); |
1024 | } |
1025 | |
1026 | // [sp] |
1027 | static std::pair<Operand, llvm::StringRef::const_iterator> |
1028 | ParseARMDerefAccess(llvm::StringRef::const_iterator osi, |
1029 | llvm::StringRef::const_iterator ose) { |
1030 | bool found = false; |
1031 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '[', ose); |
1032 | if (!found) { |
1033 | return std::make_pair(x: Operand(), y&: osi); |
1034 | } |
1035 | |
1036 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
1037 | ParseRegisterName(osi, ose); |
1038 | if (base_and_iterator.first.IsValid()) { |
1039 | osi = base_and_iterator.second; |
1040 | } else { |
1041 | return std::make_pair(x: Operand(), y&: osi); |
1042 | } |
1043 | |
1044 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ']', ose); |
1045 | if (!found) { |
1046 | return std::make_pair(x: Operand(), y&: osi); |
1047 | } |
1048 | |
1049 | Operand deref; |
1050 | deref.m_type = Operand::Type::Dereference; |
1051 | deref.m_children.push_back(x: base_and_iterator.first); |
1052 | return std::make_pair(x&: deref, y&: osi); |
1053 | } |
1054 | |
1055 | static void DumpOperand(const Operand &op, Stream &s) { |
1056 | switch (op.m_type) { |
1057 | case Operand::Type::Dereference: |
1058 | s.PutCString(cstr: "*" ); |
1059 | DumpOperand(op: op.m_children[0], s); |
1060 | break; |
1061 | case Operand::Type::Immediate: |
1062 | if (op.m_negative) { |
1063 | s.PutCString(cstr: "-" ); |
1064 | } |
1065 | s.PutCString(cstr: llvm::to_string(Value: op.m_immediate)); |
1066 | break; |
1067 | case Operand::Type::Invalid: |
1068 | s.PutCString(cstr: "Invalid" ); |
1069 | break; |
1070 | case Operand::Type::Product: |
1071 | s.PutCString(cstr: "(" ); |
1072 | DumpOperand(op: op.m_children[0], s); |
1073 | s.PutCString(cstr: "*" ); |
1074 | DumpOperand(op: op.m_children[1], s); |
1075 | s.PutCString(cstr: ")" ); |
1076 | break; |
1077 | case Operand::Type::Register: |
1078 | s.PutCString(cstr: op.m_register.GetStringRef()); |
1079 | break; |
1080 | case Operand::Type::Sum: |
1081 | s.PutCString(cstr: "(" ); |
1082 | DumpOperand(op: op.m_children[0], s); |
1083 | s.PutCString(cstr: "+" ); |
1084 | DumpOperand(op: op.m_children[1], s); |
1085 | s.PutCString(cstr: ")" ); |
1086 | break; |
1087 | } |
1088 | } |
1089 | |
1090 | bool ParseOperands( |
1091 | llvm::SmallVectorImpl<Instruction::Operand> &operands) override { |
1092 | const char *operands_string = GetOperands(exe_ctx: nullptr); |
1093 | |
1094 | if (!operands_string) { |
1095 | return false; |
1096 | } |
1097 | |
1098 | llvm::StringRef operands_ref(operands_string); |
1099 | |
1100 | llvm::StringRef::const_iterator osi = operands_ref.begin(); |
1101 | llvm::StringRef::const_iterator ose = operands_ref.end(); |
1102 | |
1103 | while (osi != ose) { |
1104 | Operand operand; |
1105 | llvm::StringRef::const_iterator iter; |
1106 | |
1107 | if ((std::tie(args&: operand, args&: iter) = ParseIntelIndexedAccess(osi, ose), |
1108 | operand.IsValid()) || |
1109 | (std::tie(args&: operand, args&: iter) = ParseIntelDerefAccess(osi, ose), |
1110 | operand.IsValid()) || |
1111 | (std::tie(args&: operand, args&: iter) = ParseARMOffsetAccess(osi, ose), |
1112 | operand.IsValid()) || |
1113 | (std::tie(args&: operand, args&: iter) = ParseARMDerefAccess(osi, ose), |
1114 | operand.IsValid()) || |
1115 | (std::tie(args&: operand, args&: iter) = ParseRegisterName(osi, ose), |
1116 | operand.IsValid()) || |
1117 | (std::tie(args&: operand, args&: iter) = ParseImmediate(osi, ose), |
1118 | operand.IsValid())) { |
1119 | osi = iter; |
1120 | operands.push_back(Elt: operand); |
1121 | } else { |
1122 | return false; |
1123 | } |
1124 | |
1125 | std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = |
1126 | ConsumeChar(osi, c: ',', ose); |
1127 | if (found_and_iter.first) { |
1128 | osi = found_and_iter.second; |
1129 | } |
1130 | |
1131 | osi = ConsumeWhitespace(osi, ose); |
1132 | } |
1133 | |
1134 | DisassemblerSP disasm_sp = m_disasm_wp.lock(); |
1135 | |
1136 | if (disasm_sp && operands.size() > 1) { |
1137 | // TODO tie this into the MC Disassembler's notion of clobbers. |
1138 | switch (disasm_sp->GetArchitecture().GetMachine()) { |
1139 | default: |
1140 | break; |
1141 | case llvm::Triple::x86: |
1142 | case llvm::Triple::x86_64: |
1143 | operands[operands.size() - 1].m_clobbered = true; |
1144 | break; |
1145 | case llvm::Triple::arm: |
1146 | operands[0].m_clobbered = true; |
1147 | break; |
1148 | } |
1149 | } |
1150 | |
1151 | if (Log *log = GetLog(mask: LLDBLog::Process)) { |
1152 | StreamString ss; |
1153 | |
1154 | ss.Printf(format: "[%s] expands to %zu operands:\n" , operands_string, |
1155 | operands.size()); |
1156 | for (const Operand &operand : operands) { |
1157 | ss.PutCString(cstr: " " ); |
1158 | DumpOperand(op: operand, s&: ss); |
1159 | ss.PutCString(cstr: "\n" ); |
1160 | } |
1161 | |
1162 | log->PutString(str: ss.GetString()); |
1163 | } |
1164 | |
1165 | return true; |
1166 | } |
1167 | |
1168 | bool IsCall() override { |
1169 | VisitInstruction(); |
1170 | return m_is_call; |
1171 | } |
1172 | |
1173 | protected: |
1174 | std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; |
1175 | |
1176 | bool m_is_valid = false; |
1177 | bool m_using_file_addr = false; |
1178 | bool m_has_visited_instruction = false; |
1179 | |
1180 | // Be conservative. If we didn't understand the instruction, say it: |
1181 | // - Might branch |
1182 | // - Does not have a delay slot |
1183 | // - Is not a call |
1184 | // - Is not a load |
1185 | // - Is not an authenticated instruction |
1186 | bool m_does_branch = true; |
1187 | bool m_has_delay_slot = false; |
1188 | bool m_is_call = false; |
1189 | bool m_is_load = false; |
1190 | bool m_is_authenticated = false; |
1191 | |
1192 | void VisitInstruction() { |
1193 | if (m_has_visited_instruction) |
1194 | return; |
1195 | |
1196 | DisassemblerScope disasm(*this); |
1197 | if (!disasm) |
1198 | return; |
1199 | |
1200 | DataExtractor data; |
1201 | if (!m_opcode.GetData(data)) |
1202 | return; |
1203 | |
1204 | bool is_alternate_isa; |
1205 | lldb::addr_t pc = m_address.GetFileAddress(); |
1206 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = |
1207 | GetDisasmToUse(is_alternate_isa, disasm); |
1208 | const uint8_t *opcode_data = data.GetDataStart(); |
1209 | const size_t opcode_data_len = data.GetByteSize(); |
1210 | llvm::MCInst inst; |
1211 | const size_t inst_size = |
1212 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
1213 | if (inst_size == 0) |
1214 | return; |
1215 | |
1216 | m_has_visited_instruction = true; |
1217 | m_does_branch = mc_disasm_ptr->CanBranch(mc_inst&: inst); |
1218 | m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(mc_inst&: inst); |
1219 | m_is_call = mc_disasm_ptr->IsCall(mc_inst&: inst); |
1220 | m_is_load = mc_disasm_ptr->IsLoad(mc_inst&: inst); |
1221 | m_is_authenticated = mc_disasm_ptr->IsAuthenticated(mc_inst&: inst); |
1222 | } |
1223 | |
1224 | private: |
1225 | DisassemblerLLVMC::MCDisasmInstance * |
1226 | GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { |
1227 | is_alternate_isa = false; |
1228 | if (disasm) { |
1229 | if (disasm->m_alternate_disasm_up) { |
1230 | const AddressClass address_class = GetAddressClass(); |
1231 | |
1232 | if (address_class == AddressClass::eCodeAlternateISA) { |
1233 | is_alternate_isa = true; |
1234 | return disasm->m_alternate_disasm_up.get(); |
1235 | } |
1236 | } |
1237 | return disasm->m_disasm_up.get(); |
1238 | } |
1239 | return nullptr; |
1240 | } |
1241 | }; |
1242 | |
1243 | std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> |
1244 | DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, |
1245 | const char *features_str, |
1246 | unsigned flavor, |
1247 | DisassemblerLLVMC &owner) { |
1248 | using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; |
1249 | |
1250 | std::string Status; |
1251 | const llvm::Target *curr_target = |
1252 | llvm::TargetRegistry::lookupTarget(Triple: triple, Error&: Status); |
1253 | if (!curr_target) |
1254 | return Instance(); |
1255 | |
1256 | std::unique_ptr<llvm::MCInstrInfo> instr_info_up( |
1257 | curr_target->createMCInstrInfo()); |
1258 | if (!instr_info_up) |
1259 | return Instance(); |
1260 | |
1261 | std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( |
1262 | curr_target->createMCRegInfo(TT: triple)); |
1263 | if (!reg_info_up) |
1264 | return Instance(); |
1265 | |
1266 | std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( |
1267 | curr_target->createMCSubtargetInfo(TheTriple: triple, CPU: cpu, Features: features_str)); |
1268 | if (!subtarget_info_up) |
1269 | return Instance(); |
1270 | |
1271 | llvm::MCTargetOptions MCOptions; |
1272 | std::unique_ptr<llvm::MCAsmInfo> asm_info_up( |
1273 | curr_target->createMCAsmInfo(MRI: *reg_info_up, TheTriple: triple, Options: MCOptions)); |
1274 | if (!asm_info_up) |
1275 | return Instance(); |
1276 | |
1277 | std::unique_ptr<llvm::MCContext> context_up( |
1278 | new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(), |
1279 | reg_info_up.get(), subtarget_info_up.get())); |
1280 | if (!context_up) |
1281 | return Instance(); |
1282 | |
1283 | std::unique_ptr<llvm::MCDisassembler> disasm_up( |
1284 | curr_target->createMCDisassembler(STI: *subtarget_info_up, Ctx&: *context_up)); |
1285 | if (!disasm_up) |
1286 | return Instance(); |
1287 | |
1288 | std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( |
1289 | curr_target->createMCRelocationInfo(TT: triple, Ctx&: *context_up)); |
1290 | if (!rel_info_up) |
1291 | return Instance(); |
1292 | |
1293 | std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( |
1294 | curr_target->createMCSymbolizer( |
1295 | TT: triple, GetOpInfo: nullptr, SymbolLookUp: DisassemblerLLVMC::SymbolLookupCallback, DisInfo: &owner, |
1296 | Ctx: context_up.get(), RelInfo: std::move(rel_info_up))); |
1297 | disasm_up->setSymbolizer(std::move(symbolizer_up)); |
1298 | |
1299 | unsigned asm_printer_variant = |
1300 | flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; |
1301 | |
1302 | std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( |
1303 | curr_target->createMCInstPrinter(T: llvm::Triple{triple}, |
1304 | SyntaxVariant: asm_printer_variant, MAI: *asm_info_up, |
1305 | MII: *instr_info_up, MRI: *reg_info_up)); |
1306 | if (!instr_printer_up) |
1307 | return Instance(); |
1308 | |
1309 | instr_printer_up->setPrintBranchImmAsAddress(true); |
1310 | |
1311 | // Not all targets may have registered createMCInstrAnalysis(). |
1312 | std::unique_ptr<llvm::MCInstrAnalysis> instr_analysis_up( |
1313 | curr_target->createMCInstrAnalysis(Info: instr_info_up.get())); |
1314 | |
1315 | return Instance(new MCDisasmInstance( |
1316 | std::move(instr_info_up), std::move(reg_info_up), |
1317 | std::move(subtarget_info_up), std::move(asm_info_up), |
1318 | std::move(context_up), std::move(disasm_up), std::move(instr_printer_up), |
1319 | std::move(instr_analysis_up))); |
1320 | } |
1321 | |
1322 | DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( |
1323 | std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, |
1324 | std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, |
1325 | std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, |
1326 | std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, |
1327 | std::unique_ptr<llvm::MCContext> &&context_up, |
1328 | std::unique_ptr<llvm::MCDisassembler> &&disasm_up, |
1329 | std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up, |
1330 | std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up) |
1331 | : m_instr_info_up(std::move(instr_info_up)), |
1332 | m_reg_info_up(std::move(reg_info_up)), |
1333 | m_subtarget_info_up(std::move(subtarget_info_up)), |
1334 | m_asm_info_up(std::move(asm_info_up)), |
1335 | m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), |
1336 | m_instr_printer_up(std::move(instr_printer_up)), |
1337 | m_instr_analysis_up(std::move(instr_analysis_up)) { |
1338 | assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up && |
1339 | m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); |
1340 | } |
1341 | |
1342 | uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( |
1343 | const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, |
1344 | llvm::MCInst &mc_inst) const { |
1345 | llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); |
1346 | llvm::MCDisassembler::DecodeStatus status; |
1347 | |
1348 | uint64_t new_inst_size; |
1349 | status = m_disasm_up->getInstruction(Instr&: mc_inst, Size&: new_inst_size, Bytes: data, Address: pc, |
1350 | CStream&: llvm::nulls()); |
1351 | if (status == llvm::MCDisassembler::Success) |
1352 | return new_inst_size; |
1353 | else |
1354 | return 0; |
1355 | } |
1356 | |
1357 | void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( |
1358 | llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string, |
1359 | std::string &) { |
1360 | llvm::raw_string_ostream inst_stream(inst_string); |
1361 | llvm::raw_string_ostream (comments_string); |
1362 | |
1363 | inst_stream.enable_colors(enable: m_instr_printer_up->getUseColor()); |
1364 | m_instr_printer_up->setCommentStream(comments_stream); |
1365 | m_instr_printer_up->printInst(MI: &mc_inst, Address: pc, Annot: llvm::StringRef(), |
1366 | STI: *m_subtarget_info_up, OS&: inst_stream); |
1367 | m_instr_printer_up->setCommentStream(llvm::nulls()); |
1368 | |
1369 | comments_stream.flush(); |
1370 | |
1371 | static std::string g_newlines("\r\n" ); |
1372 | |
1373 | for (size_t newline_pos = 0; |
1374 | (newline_pos = comments_string.find_first_of(str: g_newlines, pos: newline_pos)) != |
1375 | comments_string.npos; |
1376 | /**/) { |
1377 | comments_string.replace(i1: comments_string.begin() + newline_pos, |
1378 | i2: comments_string.begin() + newline_pos + 1, n: 1, c: ' '); |
1379 | } |
1380 | } |
1381 | |
1382 | void DisassemblerLLVMC::MCDisasmInstance::SetStyle( |
1383 | bool use_hex_immed, HexImmediateStyle hex_style) { |
1384 | m_instr_printer_up->setPrintImmHex(use_hex_immed); |
1385 | switch (hex_style) { |
1386 | case eHexStyleC: |
1387 | m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); |
1388 | break; |
1389 | case eHexStyleAsm: |
1390 | m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); |
1391 | break; |
1392 | } |
1393 | } |
1394 | |
1395 | void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color) { |
1396 | m_instr_printer_up->setUseColor(use_color); |
1397 | } |
1398 | |
1399 | bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const { |
1400 | return m_instr_printer_up->getUseColor(); |
1401 | } |
1402 | |
1403 | bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( |
1404 | llvm::MCInst &mc_inst) const { |
1405 | if (m_instr_analysis_up) |
1406 | return m_instr_analysis_up->mayAffectControlFlow(Inst: mc_inst, MCRI: *m_reg_info_up); |
1407 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()) |
1408 | .mayAffectControlFlow(MI: mc_inst, RI: *m_reg_info_up); |
1409 | } |
1410 | |
1411 | bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( |
1412 | llvm::MCInst &mc_inst) const { |
1413 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).hasDelaySlot(); |
1414 | } |
1415 | |
1416 | bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { |
1417 | if (m_instr_analysis_up) |
1418 | return m_instr_analysis_up->isCall(Inst: mc_inst); |
1419 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).isCall(); |
1420 | } |
1421 | |
1422 | bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const { |
1423 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).mayLoad(); |
1424 | } |
1425 | |
1426 | bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated( |
1427 | llvm::MCInst &mc_inst) const { |
1428 | const auto &InstrDesc = m_instr_info_up->get(Opcode: mc_inst.getOpcode()); |
1429 | |
1430 | // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4 |
1431 | // == 'a' + 'c') as authenticated instructions for reporting purposes, in |
1432 | // addition to the standard authenticated instructions specified in ARMv8.3. |
1433 | bool IsBrkC47x = false; |
1434 | if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) { |
1435 | const llvm::MCOperand &Op0 = mc_inst.getOperand(i: 0); |
1436 | if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474) |
1437 | IsBrkC47x = true; |
1438 | } |
1439 | |
1440 | return InstrDesc.isAuthenticated() || IsBrkC47x; |
1441 | } |
1442 | |
1443 | DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, |
1444 | const char *flavor_string) |
1445 | : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), |
1446 | m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS), |
1447 | m_adrp_insn() { |
1448 | if (!FlavorValidForArchSpec(arch, flavor: m_flavor.c_str())) { |
1449 | m_flavor.assign(s: "default" ); |
1450 | } |
1451 | |
1452 | unsigned flavor = ~0U; |
1453 | llvm::Triple triple = arch.GetTriple(); |
1454 | |
1455 | // So far the only supported flavor is "intel" on x86. The base class will |
1456 | // set this correctly coming in. |
1457 | if (triple.getArch() == llvm::Triple::x86 || |
1458 | triple.getArch() == llvm::Triple::x86_64) { |
1459 | if (m_flavor == "intel" ) { |
1460 | flavor = 1; |
1461 | } else if (m_flavor == "att" ) { |
1462 | flavor = 0; |
1463 | } |
1464 | } |
1465 | |
1466 | ArchSpec thumb_arch(arch); |
1467 | if (triple.getArch() == llvm::Triple::arm) { |
1468 | std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); |
1469 | // Replace "arm" with "thumb" so we get all thumb variants correct |
1470 | if (thumb_arch_name.size() > 3) { |
1471 | thumb_arch_name.erase(pos: 0, n: 3); |
1472 | thumb_arch_name.insert(pos: 0, s: "thumb" ); |
1473 | } else { |
1474 | thumb_arch_name = "thumbv9.3a" ; |
1475 | } |
1476 | thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); |
1477 | } |
1478 | |
1479 | // If no sub architecture specified then use the most recent arm architecture |
1480 | // so the disassembler will return all instructions. Without it we will see a |
1481 | // lot of unknown opcodes if the code uses instructions which are not |
1482 | // available in the oldest arm version (which is used when no sub architecture |
1483 | // is specified). |
1484 | if (triple.getArch() == llvm::Triple::arm && |
1485 | triple.getSubArch() == llvm::Triple::NoSubArch) |
1486 | triple.setArchName("armv9.3a" ); |
1487 | |
1488 | std::string features_str; |
1489 | const char *triple_str = triple.getTriple().c_str(); |
1490 | |
1491 | // ARM Cortex M0-M7 devices only execute thumb instructions |
1492 | if (arch.IsAlwaysThumbInstructions()) { |
1493 | triple_str = thumb_arch.GetTriple().getTriple().c_str(); |
1494 | features_str += "+fp-armv8," ; |
1495 | } |
1496 | |
1497 | const char *cpu = "" ; |
1498 | |
1499 | switch (arch.GetCore()) { |
1500 | case ArchSpec::eCore_mips32: |
1501 | case ArchSpec::eCore_mips32el: |
1502 | cpu = "mips32" ; |
1503 | break; |
1504 | case ArchSpec::eCore_mips32r2: |
1505 | case ArchSpec::eCore_mips32r2el: |
1506 | cpu = "mips32r2" ; |
1507 | break; |
1508 | case ArchSpec::eCore_mips32r3: |
1509 | case ArchSpec::eCore_mips32r3el: |
1510 | cpu = "mips32r3" ; |
1511 | break; |
1512 | case ArchSpec::eCore_mips32r5: |
1513 | case ArchSpec::eCore_mips32r5el: |
1514 | cpu = "mips32r5" ; |
1515 | break; |
1516 | case ArchSpec::eCore_mips32r6: |
1517 | case ArchSpec::eCore_mips32r6el: |
1518 | cpu = "mips32r6" ; |
1519 | break; |
1520 | case ArchSpec::eCore_mips64: |
1521 | case ArchSpec::eCore_mips64el: |
1522 | cpu = "mips64" ; |
1523 | break; |
1524 | case ArchSpec::eCore_mips64r2: |
1525 | case ArchSpec::eCore_mips64r2el: |
1526 | cpu = "mips64r2" ; |
1527 | break; |
1528 | case ArchSpec::eCore_mips64r3: |
1529 | case ArchSpec::eCore_mips64r3el: |
1530 | cpu = "mips64r3" ; |
1531 | break; |
1532 | case ArchSpec::eCore_mips64r5: |
1533 | case ArchSpec::eCore_mips64r5el: |
1534 | cpu = "mips64r5" ; |
1535 | break; |
1536 | case ArchSpec::eCore_mips64r6: |
1537 | case ArchSpec::eCore_mips64r6el: |
1538 | cpu = "mips64r6" ; |
1539 | break; |
1540 | default: |
1541 | cpu = "" ; |
1542 | break; |
1543 | } |
1544 | |
1545 | if (arch.IsMIPS()) { |
1546 | uint32_t arch_flags = arch.GetFlags(); |
1547 | if (arch_flags & ArchSpec::eMIPSAse_msa) |
1548 | features_str += "+msa," ; |
1549 | if (arch_flags & ArchSpec::eMIPSAse_dsp) |
1550 | features_str += "+dsp," ; |
1551 | if (arch_flags & ArchSpec::eMIPSAse_dspr2) |
1552 | features_str += "+dspr2," ; |
1553 | } |
1554 | |
1555 | // If any AArch64 variant, enable latest ISA with all extensions. |
1556 | if (triple.isAArch64()) { |
1557 | features_str += "+all," ; |
1558 | |
1559 | if (triple.getVendor() == llvm::Triple::Apple) |
1560 | cpu = "apple-latest" ; |
1561 | } |
1562 | |
1563 | if (triple.isRISCV()) { |
1564 | uint32_t arch_flags = arch.GetFlags(); |
1565 | if (arch_flags & ArchSpec::eRISCV_rvc) |
1566 | features_str += "+c," ; |
1567 | if (arch_flags & ArchSpec::eRISCV_rve) |
1568 | features_str += "+e," ; |
1569 | if ((arch_flags & ArchSpec::eRISCV_float_abi_single) == |
1570 | ArchSpec::eRISCV_float_abi_single) |
1571 | features_str += "+f," ; |
1572 | if ((arch_flags & ArchSpec::eRISCV_float_abi_double) == |
1573 | ArchSpec::eRISCV_float_abi_double) |
1574 | features_str += "+f,+d," ; |
1575 | if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) == |
1576 | ArchSpec::eRISCV_float_abi_quad) |
1577 | features_str += "+f,+d,+q," ; |
1578 | // FIXME: how do we detect features such as `+a`, `+m`? |
1579 | // Turn them on by default now, since everyone seems to use them |
1580 | features_str += "+a,+m," ; |
1581 | } |
1582 | |
1583 | // We use m_disasm_up.get() to tell whether we are valid or not, so if this |
1584 | // isn't good for some reason, we won't be valid and FindPlugin will fail and |
1585 | // we won't get used. |
1586 | m_disasm_up = MCDisasmInstance::Create(triple: triple_str, cpu, features_str: features_str.c_str(), |
1587 | flavor, owner&: *this); |
1588 | |
1589 | llvm::Triple::ArchType llvm_arch = triple.getArch(); |
1590 | |
1591 | // For arm CPUs that can execute arm or thumb instructions, also create a |
1592 | // thumb instruction disassembler. |
1593 | if (llvm_arch == llvm::Triple::arm) { |
1594 | std::string thumb_triple(thumb_arch.GetTriple().getTriple()); |
1595 | m_alternate_disasm_up = |
1596 | MCDisasmInstance::Create(triple: thumb_triple.c_str(), cpu: "" , features_str: features_str.c_str(), |
1597 | flavor, owner&: *this); |
1598 | if (!m_alternate_disasm_up) |
1599 | m_disasm_up.reset(); |
1600 | |
1601 | } else if (arch.IsMIPS()) { |
1602 | /* Create alternate disassembler for MIPS16 and microMIPS */ |
1603 | uint32_t arch_flags = arch.GetFlags(); |
1604 | if (arch_flags & ArchSpec::eMIPSAse_mips16) |
1605 | features_str += "+mips16," ; |
1606 | else if (arch_flags & ArchSpec::eMIPSAse_micromips) |
1607 | features_str += "+micromips," ; |
1608 | |
1609 | m_alternate_disasm_up = MCDisasmInstance::Create( |
1610 | triple: triple_str, cpu, features_str: features_str.c_str(), flavor, owner&: *this); |
1611 | if (!m_alternate_disasm_up) |
1612 | m_disasm_up.reset(); |
1613 | } |
1614 | } |
1615 | |
1616 | DisassemblerLLVMC::~DisassemblerLLVMC() = default; |
1617 | |
1618 | lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, |
1619 | const char *flavor) { |
1620 | if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { |
1621 | auto disasm_sp = std::make_shared<DisassemblerLLVMC>(args: arch, args&: flavor); |
1622 | if (disasm_sp && disasm_sp->IsValid()) |
1623 | return disasm_sp; |
1624 | } |
1625 | return lldb::DisassemblerSP(); |
1626 | } |
1627 | |
1628 | size_t DisassemblerLLVMC::(const Address &base_addr, |
1629 | const DataExtractor &data, |
1630 | lldb::offset_t data_offset, |
1631 | size_t num_instructions, |
1632 | bool append, bool data_from_file) { |
1633 | if (!append) |
1634 | m_instruction_list.Clear(); |
1635 | |
1636 | if (!IsValid()) |
1637 | return 0; |
1638 | |
1639 | m_data_from_file = data_from_file; |
1640 | uint32_t data_cursor = data_offset; |
1641 | const size_t data_byte_size = data.GetByteSize(); |
1642 | uint32_t instructions_parsed = 0; |
1643 | Address inst_addr(base_addr); |
1644 | |
1645 | while (data_cursor < data_byte_size && |
1646 | instructions_parsed < num_instructions) { |
1647 | |
1648 | AddressClass address_class = AddressClass::eCode; |
1649 | |
1650 | if (m_alternate_disasm_up) |
1651 | address_class = inst_addr.GetAddressClass(); |
1652 | |
1653 | InstructionSP inst_sp( |
1654 | new InstructionLLVMC(*this, inst_addr, address_class)); |
1655 | |
1656 | if (!inst_sp) |
1657 | break; |
1658 | |
1659 | uint32_t inst_size = inst_sp->Decode(disassembler: *this, data, data_offset: data_cursor); |
1660 | |
1661 | if (inst_size == 0) |
1662 | break; |
1663 | |
1664 | m_instruction_list.Append(inst_sp); |
1665 | data_cursor += inst_size; |
1666 | inst_addr.Slide(offset: inst_size); |
1667 | instructions_parsed++; |
1668 | } |
1669 | |
1670 | return data_cursor - data_offset; |
1671 | } |
1672 | |
1673 | void DisassemblerLLVMC::Initialize() { |
1674 | PluginManager::RegisterPlugin(name: GetPluginNameStatic(), |
1675 | description: "Disassembler that uses LLVM MC to disassemble " |
1676 | "i386, x86_64, ARM, and ARM64." , |
1677 | create_callback: CreateInstance); |
1678 | |
1679 | llvm::InitializeAllTargetInfos(); |
1680 | llvm::InitializeAllTargetMCs(); |
1681 | llvm::InitializeAllAsmParsers(); |
1682 | llvm::InitializeAllDisassemblers(); |
1683 | } |
1684 | |
1685 | void DisassemblerLLVMC::Terminate() { |
1686 | PluginManager::UnregisterPlugin(create_callback: CreateInstance); |
1687 | } |
1688 | |
1689 | int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, |
1690 | uint64_t offset, uint64_t size, |
1691 | int tag_type, void *tag_bug) { |
1692 | return static_cast<DisassemblerLLVMC *>(disassembler) |
1693 | ->OpInfo(PC: pc, Offset: offset, Size: size, TagType: tag_type, TagBug: tag_bug); |
1694 | } |
1695 | |
1696 | const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, |
1697 | uint64_t value, |
1698 | uint64_t *type, uint64_t pc, |
1699 | const char **name) { |
1700 | return static_cast<DisassemblerLLVMC *>(disassembler) |
1701 | ->SymbolLookup(ReferenceValue: value, ReferenceType: type, ReferencePC: pc, ReferenceName: name); |
1702 | } |
1703 | |
1704 | bool DisassemblerLLVMC::FlavorValidForArchSpec( |
1705 | const lldb_private::ArchSpec &arch, const char *flavor) { |
1706 | llvm::Triple triple = arch.GetTriple(); |
1707 | if (flavor == nullptr || strcmp(s1: flavor, s2: "default" ) == 0) |
1708 | return true; |
1709 | |
1710 | if (triple.getArch() == llvm::Triple::x86 || |
1711 | triple.getArch() == llvm::Triple::x86_64) { |
1712 | return strcmp(s1: flavor, s2: "intel" ) == 0 || strcmp(s1: flavor, s2: "att" ) == 0; |
1713 | } else |
1714 | return false; |
1715 | } |
1716 | |
1717 | bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } |
1718 | |
1719 | int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, |
1720 | int tag_type, void *tag_bug) { |
1721 | switch (tag_type) { |
1722 | default: |
1723 | break; |
1724 | case 1: |
1725 | memset(s: tag_bug, c: 0, n: sizeof(::LLVMOpInfo1)); |
1726 | break; |
1727 | } |
1728 | return 0; |
1729 | } |
1730 | |
1731 | const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, |
1732 | uint64_t pc, const char **name) { |
1733 | if (*type_ptr) { |
1734 | if (m_exe_ctx && m_inst) { |
1735 | // std::string remove_this_prior_to_checkin; |
1736 | Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; |
1737 | Address value_so_addr; |
1738 | Address pc_so_addr; |
1739 | if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 || |
1740 | target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be || |
1741 | target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) { |
1742 | if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { |
1743 | m_adrp_address = pc; |
1744 | m_adrp_insn = value; |
1745 | *name = nullptr; |
1746 | *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; |
1747 | return nullptr; |
1748 | } |
1749 | // If this instruction is an ADD and |
1750 | // the previous instruction was an ADRP and |
1751 | // the ADRP's register and this ADD's register are the same, |
1752 | // then this is a pc-relative address calculation. |
1753 | if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && |
1754 | m_adrp_insn && m_adrp_address == pc - 4 && |
1755 | (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) { |
1756 | uint32_t addxri_inst; |
1757 | uint64_t adrp_imm, addxri_imm; |
1758 | // Get immlo and immhi bits, OR them together to get the ADRP imm |
1759 | // value. |
1760 | adrp_imm = |
1761 | ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3); |
1762 | // if high bit of immhi after right-shifting set, sign extend |
1763 | if (adrp_imm & (1ULL << 20)) |
1764 | adrp_imm |= ~((1ULL << 21) - 1); |
1765 | |
1766 | addxri_inst = value; |
1767 | addxri_imm = (addxri_inst >> 10) & 0xfff; |
1768 | // check if 'sh' bit is set, shift imm value up if so |
1769 | // (this would make no sense, ADRP already gave us this part) |
1770 | if ((addxri_inst >> (12 + 5 + 5)) & 1) |
1771 | addxri_imm <<= 12; |
1772 | value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) + |
1773 | addxri_imm; |
1774 | } |
1775 | m_adrp_address = LLDB_INVALID_ADDRESS; |
1776 | m_adrp_insn.reset(); |
1777 | } |
1778 | |
1779 | if (m_inst->UsingFileAddress()) { |
1780 | ModuleSP module_sp(m_inst->GetAddress().GetModule()); |
1781 | if (module_sp) { |
1782 | module_sp->ResolveFileAddress(vm_addr: value, so_addr&: value_so_addr); |
1783 | module_sp->ResolveFileAddress(vm_addr: pc, so_addr&: pc_so_addr); |
1784 | } |
1785 | } else if (target && !target->GetSectionLoadList().IsEmpty()) { |
1786 | target->GetSectionLoadList().ResolveLoadAddress(load_addr: value, so_addr&: value_so_addr); |
1787 | target->GetSectionLoadList().ResolveLoadAddress(load_addr: pc, so_addr&: pc_so_addr); |
1788 | } |
1789 | |
1790 | SymbolContext sym_ctx; |
1791 | const SymbolContextItem resolve_scope = |
1792 | eSymbolContextFunction | eSymbolContextSymbol; |
1793 | if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { |
1794 | pc_so_addr.GetModule()->ResolveSymbolContextForAddress( |
1795 | so_addr: pc_so_addr, resolve_scope, sc&: sym_ctx); |
1796 | } |
1797 | |
1798 | if (value_so_addr.IsValid() && value_so_addr.GetSection()) { |
1799 | StreamString ss; |
1800 | |
1801 | bool format_omitting_current_func_name = false; |
1802 | if (sym_ctx.symbol || sym_ctx.function) { |
1803 | AddressRange range; |
1804 | if (sym_ctx.GetAddressRange(scope: resolve_scope, range_idx: 0, use_inline_block_range: false, range) && |
1805 | range.GetBaseAddress().IsValid() && |
1806 | range.ContainsLoadAddress(so_addr: value_so_addr, target)) { |
1807 | format_omitting_current_func_name = true; |
1808 | } |
1809 | } |
1810 | |
1811 | // If the "value" address (the target address we're symbolicating) is |
1812 | // inside the same SymbolContext as the current instruction pc |
1813 | // (pc_so_addr), don't print the full function name - just print it |
1814 | // with DumpStyleNoFunctionName style, e.g. "<+36>". |
1815 | if (format_omitting_current_func_name) { |
1816 | value_so_addr.Dump(s: &ss, exe_scope: target, style: Address::DumpStyleNoFunctionName, |
1817 | fallback_style: Address::DumpStyleSectionNameOffset); |
1818 | } else { |
1819 | value_so_addr.Dump( |
1820 | s: &ss, exe_scope: target, |
1821 | style: Address::DumpStyleResolvedDescriptionNoFunctionArguments, |
1822 | fallback_style: Address::DumpStyleSectionNameOffset); |
1823 | } |
1824 | |
1825 | if (!ss.GetString().empty()) { |
1826 | // If Address::Dump returned a multi-line description, most commonly |
1827 | // seen when we have multiple levels of inlined functions at an |
1828 | // address, only show the first line. |
1829 | std::string str = std::string(ss.GetString()); |
1830 | size_t first_eol_char = str.find_first_of(s: "\r\n" ); |
1831 | if (first_eol_char != std::string::npos) { |
1832 | str.erase(pos: first_eol_char); |
1833 | } |
1834 | m_inst->AppendComment(description&: str); |
1835 | } |
1836 | } |
1837 | } |
1838 | } |
1839 | |
1840 | // TODO: llvm-objdump sets the type_ptr to the |
1841 | // LLVMDisassembler_ReferenceType_Out_* values |
1842 | // based on where value_so_addr is pointing, with |
1843 | // Mach-O specific augmentations in MachODump.cpp. e.g. |
1844 | // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand |
1845 | // handles. |
1846 | *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; |
1847 | *name = nullptr; |
1848 | return nullptr; |
1849 | } |
1850 | |