| 1 | //===-- DisassemblerLLVMC.cpp ---------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "DisassemblerLLVMC.h" |
| 10 | |
| 11 | #include "llvm-c/Disassembler.h" |
| 12 | #include "llvm/ADT/SmallString.h" |
| 13 | #include "llvm/ADT/StringExtras.h" |
| 14 | #include "llvm/MC/MCAsmInfo.h" |
| 15 | #include "llvm/MC/MCContext.h" |
| 16 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
| 17 | #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" |
| 18 | #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" |
| 19 | #include "llvm/MC/MCInst.h" |
| 20 | #include "llvm/MC/MCInstPrinter.h" |
| 21 | #include "llvm/MC/MCInstrAnalysis.h" |
| 22 | #include "llvm/MC/MCInstrInfo.h" |
| 23 | #include "llvm/MC/MCRegisterInfo.h" |
| 24 | #include "llvm/MC/MCSubtargetInfo.h" |
| 25 | #include "llvm/MC/MCTargetOptions.h" |
| 26 | #include "llvm/MC/TargetRegistry.h" |
| 27 | #include "llvm/Support/ErrorHandling.h" |
| 28 | #include "llvm/Support/ScopedPrinter.h" |
| 29 | #include "llvm/Support/TargetSelect.h" |
| 30 | #include "llvm/TargetParser/AArch64TargetParser.h" |
| 31 | |
| 32 | #include "lldb/Core/Address.h" |
| 33 | #include "lldb/Core/Module.h" |
| 34 | #include "lldb/Symbol/Function.h" |
| 35 | #include "lldb/Symbol/SymbolContext.h" |
| 36 | #include "lldb/Target/ExecutionContext.h" |
| 37 | #include "lldb/Target/Process.h" |
| 38 | #include "lldb/Target/RegisterContext.h" |
| 39 | #include "lldb/Target/SectionLoadList.h" |
| 40 | #include "lldb/Target/StackFrame.h" |
| 41 | #include "lldb/Target/Target.h" |
| 42 | #include "lldb/Utility/DataExtractor.h" |
| 43 | #include "lldb/Utility/LLDBLog.h" |
| 44 | #include "lldb/Utility/Log.h" |
| 45 | #include "lldb/Utility/RegularExpression.h" |
| 46 | #include "lldb/Utility/Stream.h" |
| 47 | #include <optional> |
| 48 | |
| 49 | using namespace lldb; |
| 50 | using namespace lldb_private; |
| 51 | |
| 52 | LLDB_PLUGIN_DEFINE(DisassemblerLLVMC) |
| 53 | |
| 54 | class DisassemblerLLVMC::MCDisasmInstance { |
| 55 | public: |
| 56 | static std::unique_ptr<MCDisasmInstance> |
| 57 | Create(const char *triple, const char *cpu, const char *features_str, |
| 58 | unsigned flavor, DisassemblerLLVMC &owner); |
| 59 | |
| 60 | ~MCDisasmInstance() = default; |
| 61 | |
| 62 | uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, |
| 63 | lldb::addr_t pc, llvm::MCInst &mc_inst) const; |
| 64 | void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc, |
| 65 | std::string &inst_string, std::string &); |
| 66 | void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); |
| 67 | void SetUseColor(bool use_color); |
| 68 | bool GetUseColor() const; |
| 69 | bool CanBranch(llvm::MCInst &mc_inst) const; |
| 70 | bool HasDelaySlot(llvm::MCInst &mc_inst) const; |
| 71 | bool IsCall(llvm::MCInst &mc_inst) const; |
| 72 | bool IsLoad(llvm::MCInst &mc_inst) const; |
| 73 | bool IsAuthenticated(llvm::MCInst &mc_inst) const; |
| 74 | |
| 75 | private: |
| 76 | MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, |
| 77 | std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, |
| 78 | std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, |
| 79 | std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, |
| 80 | std::unique_ptr<llvm::MCContext> &&context_up, |
| 81 | std::unique_ptr<llvm::MCDisassembler> &&disasm_up, |
| 82 | std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up, |
| 83 | std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up); |
| 84 | |
| 85 | std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; |
| 86 | std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; |
| 87 | std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; |
| 88 | std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; |
| 89 | std::unique_ptr<llvm::MCContext> m_context_up; |
| 90 | std::unique_ptr<llvm::MCDisassembler> m_disasm_up; |
| 91 | std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; |
| 92 | std::unique_ptr<llvm::MCInstrAnalysis> m_instr_analysis_up; |
| 93 | }; |
| 94 | |
| 95 | namespace x86 { |
| 96 | |
| 97 | /// These are the three values deciding instruction control flow kind. |
| 98 | /// InstructionLengthDecode function decodes an instruction and get this struct. |
| 99 | /// |
| 100 | /// primary_opcode |
| 101 | /// Primary opcode of the instruction. |
| 102 | /// For one-byte opcode instruction, it's the first byte after prefix. |
| 103 | /// For two- and three-byte opcodes, it's the second byte. |
| 104 | /// |
| 105 | /// opcode_len |
| 106 | /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. |
| 107 | /// |
| 108 | /// modrm |
| 109 | /// ModR/M byte of the instruction. |
| 110 | /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] |
| 111 | /// may contain a register or specify an addressing mode, depending on MOD. |
| 112 | struct InstructionOpcodeAndModrm { |
| 113 | uint8_t primary_opcode; |
| 114 | uint8_t opcode_len; |
| 115 | uint8_t modrm; |
| 116 | }; |
| 117 | |
| 118 | /// Determine the InstructionControlFlowKind based on opcode and modrm bytes. |
| 119 | /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and |
| 120 | /// instruction set. |
| 121 | /// |
| 122 | /// \param[in] opcode_and_modrm |
| 123 | /// Contains primary_opcode byte, its length, and ModR/M byte. |
| 124 | /// Refer to the struct InstructionOpcodeAndModrm for details. |
| 125 | /// |
| 126 | /// \return |
| 127 | /// The control flow kind of the instruction or |
| 128 | /// eInstructionControlFlowKindOther if the instruction doesn't affect |
| 129 | /// the control flow of the program. |
| 130 | lldb::InstructionControlFlowKind |
| 131 | MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { |
| 132 | uint8_t opcode = opcode_and_modrm.primary_opcode; |
| 133 | uint8_t opcode_len = opcode_and_modrm.opcode_len; |
| 134 | uint8_t modrm = opcode_and_modrm.modrm; |
| 135 | |
| 136 | if (opcode_len > 2) |
| 137 | return lldb::eInstructionControlFlowKindOther; |
| 138 | |
| 139 | if (opcode >= 0x70 && opcode <= 0x7F) { |
| 140 | if (opcode_len == 1) |
| 141 | return lldb::eInstructionControlFlowKindCondJump; |
| 142 | else |
| 143 | return lldb::eInstructionControlFlowKindOther; |
| 144 | } |
| 145 | |
| 146 | if (opcode >= 0x80 && opcode <= 0x8F) { |
| 147 | if (opcode_len == 2) |
| 148 | return lldb::eInstructionControlFlowKindCondJump; |
| 149 | else |
| 150 | return lldb::eInstructionControlFlowKindOther; |
| 151 | } |
| 152 | |
| 153 | switch (opcode) { |
| 154 | case 0x9A: |
| 155 | if (opcode_len == 1) |
| 156 | return lldb::eInstructionControlFlowKindFarCall; |
| 157 | break; |
| 158 | case 0xFF: |
| 159 | if (opcode_len == 1) { |
| 160 | uint8_t modrm_reg = (modrm >> 3) & 7; |
| 161 | if (modrm_reg == 2) |
| 162 | return lldb::eInstructionControlFlowKindCall; |
| 163 | else if (modrm_reg == 3) |
| 164 | return lldb::eInstructionControlFlowKindFarCall; |
| 165 | else if (modrm_reg == 4) |
| 166 | return lldb::eInstructionControlFlowKindJump; |
| 167 | else if (modrm_reg == 5) |
| 168 | return lldb::eInstructionControlFlowKindFarJump; |
| 169 | } |
| 170 | break; |
| 171 | case 0xE8: |
| 172 | if (opcode_len == 1) |
| 173 | return lldb::eInstructionControlFlowKindCall; |
| 174 | break; |
| 175 | case 0xCD: |
| 176 | case 0xCC: |
| 177 | case 0xCE: |
| 178 | case 0xF1: |
| 179 | if (opcode_len == 1) |
| 180 | return lldb::eInstructionControlFlowKindFarCall; |
| 181 | break; |
| 182 | case 0xCF: |
| 183 | if (opcode_len == 1) |
| 184 | return lldb::eInstructionControlFlowKindFarReturn; |
| 185 | break; |
| 186 | case 0xE9: |
| 187 | case 0xEB: |
| 188 | if (opcode_len == 1) |
| 189 | return lldb::eInstructionControlFlowKindJump; |
| 190 | break; |
| 191 | case 0xEA: |
| 192 | if (opcode_len == 1) |
| 193 | return lldb::eInstructionControlFlowKindFarJump; |
| 194 | break; |
| 195 | case 0xE3: |
| 196 | case 0xE0: |
| 197 | case 0xE1: |
| 198 | case 0xE2: |
| 199 | if (opcode_len == 1) |
| 200 | return lldb::eInstructionControlFlowKindCondJump; |
| 201 | break; |
| 202 | case 0xC3: |
| 203 | case 0xC2: |
| 204 | if (opcode_len == 1) |
| 205 | return lldb::eInstructionControlFlowKindReturn; |
| 206 | break; |
| 207 | case 0xCB: |
| 208 | case 0xCA: |
| 209 | if (opcode_len == 1) |
| 210 | return lldb::eInstructionControlFlowKindFarReturn; |
| 211 | break; |
| 212 | case 0x05: |
| 213 | case 0x34: |
| 214 | if (opcode_len == 2) |
| 215 | return lldb::eInstructionControlFlowKindFarCall; |
| 216 | break; |
| 217 | case 0x35: |
| 218 | case 0x07: |
| 219 | if (opcode_len == 2) |
| 220 | return lldb::eInstructionControlFlowKindFarReturn; |
| 221 | break; |
| 222 | case 0x01: |
| 223 | if (opcode_len == 2) { |
| 224 | switch (modrm) { |
| 225 | case 0xc1: |
| 226 | return lldb::eInstructionControlFlowKindFarCall; |
| 227 | case 0xc2: |
| 228 | case 0xc3: |
| 229 | return lldb::eInstructionControlFlowKindFarReturn; |
| 230 | default: |
| 231 | break; |
| 232 | } |
| 233 | } |
| 234 | break; |
| 235 | default: |
| 236 | break; |
| 237 | } |
| 238 | |
| 239 | return lldb::eInstructionControlFlowKindOther; |
| 240 | } |
| 241 | |
| 242 | /// Decode an instruction into opcode, modrm and opcode_len. |
| 243 | /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. |
| 244 | /// Opcodes in x86 are generally the first byte of instruction, though two-byte |
| 245 | /// instructions and prefixes exist. ModR/M is the byte following the opcode |
| 246 | /// and adds additional information for how the instruction is executed. |
| 247 | /// |
| 248 | /// \param[in] inst_bytes |
| 249 | /// Raw bytes of the instruction |
| 250 | /// |
| 251 | /// |
| 252 | /// \param[in] bytes_len |
| 253 | /// The length of the inst_bytes array. |
| 254 | /// |
| 255 | /// \param[in] is_exec_mode_64b |
| 256 | /// If true, the execution mode is 64 bit. |
| 257 | /// |
| 258 | /// \return |
| 259 | /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding |
| 260 | /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition |
| 261 | /// for more details. |
| 262 | /// Otherwise if the given instruction is invalid, returns std::nullopt. |
| 263 | std::optional<InstructionOpcodeAndModrm> |
| 264 | InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, |
| 265 | bool is_exec_mode_64b) { |
| 266 | int op_idx = 0; |
| 267 | bool prefix_done = false; |
| 268 | InstructionOpcodeAndModrm ret = {.primary_opcode: 0, .opcode_len: 0, .modrm: 0}; |
| 269 | |
| 270 | // In most cases, the primary_opcode is the first byte of the instruction |
| 271 | // but some instructions have a prefix to be skipped for these calculations. |
| 272 | // The following mapping is inspired from libipt's instruction decoding logic |
| 273 | // in `src/pt_ild.c` |
| 274 | while (!prefix_done) { |
| 275 | if (op_idx >= bytes_len) |
| 276 | return std::nullopt; |
| 277 | |
| 278 | ret.primary_opcode = inst_bytes[op_idx]; |
| 279 | switch (ret.primary_opcode) { |
| 280 | // prefix_ignore |
| 281 | case 0x26: |
| 282 | case 0x2e: |
| 283 | case 0x36: |
| 284 | case 0x3e: |
| 285 | case 0x64: |
| 286 | case 0x65: |
| 287 | // prefix_osz, prefix_asz |
| 288 | case 0x66: |
| 289 | case 0x67: |
| 290 | // prefix_lock, prefix_f2, prefix_f3 |
| 291 | case 0xf0: |
| 292 | case 0xf2: |
| 293 | case 0xf3: |
| 294 | op_idx++; |
| 295 | break; |
| 296 | |
| 297 | // prefix_rex |
| 298 | case 0x40: |
| 299 | case 0x41: |
| 300 | case 0x42: |
| 301 | case 0x43: |
| 302 | case 0x44: |
| 303 | case 0x45: |
| 304 | case 0x46: |
| 305 | case 0x47: |
| 306 | case 0x48: |
| 307 | case 0x49: |
| 308 | case 0x4a: |
| 309 | case 0x4b: |
| 310 | case 0x4c: |
| 311 | case 0x4d: |
| 312 | case 0x4e: |
| 313 | case 0x4f: |
| 314 | if (is_exec_mode_64b) |
| 315 | op_idx++; |
| 316 | else |
| 317 | prefix_done = true; |
| 318 | break; |
| 319 | |
| 320 | // prefix_vex_c4, c5 |
| 321 | case 0xc5: |
| 322 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
| 323 | prefix_done = true; |
| 324 | break; |
| 325 | } |
| 326 | |
| 327 | ret.opcode_len = 2; |
| 328 | ret.primary_opcode = inst_bytes[op_idx + 2]; |
| 329 | ret.modrm = inst_bytes[op_idx + 3]; |
| 330 | return ret; |
| 331 | |
| 332 | case 0xc4: |
| 333 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
| 334 | prefix_done = true; |
| 335 | break; |
| 336 | } |
| 337 | ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; |
| 338 | ret.primary_opcode = inst_bytes[op_idx + 3]; |
| 339 | ret.modrm = inst_bytes[op_idx + 4]; |
| 340 | return ret; |
| 341 | |
| 342 | // prefix_evex |
| 343 | case 0x62: |
| 344 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
| 345 | prefix_done = true; |
| 346 | break; |
| 347 | } |
| 348 | ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; |
| 349 | ret.primary_opcode = inst_bytes[op_idx + 4]; |
| 350 | ret.modrm = inst_bytes[op_idx + 5]; |
| 351 | return ret; |
| 352 | |
| 353 | default: |
| 354 | prefix_done = true; |
| 355 | break; |
| 356 | } |
| 357 | } // prefix done |
| 358 | |
| 359 | ret.primary_opcode = inst_bytes[op_idx]; |
| 360 | ret.modrm = inst_bytes[op_idx + 1]; |
| 361 | ret.opcode_len = 1; |
| 362 | |
| 363 | // If the first opcode is 0F, it's two- or three- byte opcodes. |
| 364 | if (ret.primary_opcode == 0x0F) { |
| 365 | ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte |
| 366 | |
| 367 | if (ret.primary_opcode == 0x38) { |
| 368 | ret.opcode_len = 3; |
| 369 | ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte |
| 370 | ret.modrm = inst_bytes[op_idx + 1]; |
| 371 | } else if (ret.primary_opcode == 0x3A) { |
| 372 | ret.opcode_len = 3; |
| 373 | ret.primary_opcode = inst_bytes[++op_idx]; |
| 374 | ret.modrm = inst_bytes[op_idx + 1]; |
| 375 | } else if ((ret.primary_opcode & 0xf8) == 0x38) { |
| 376 | ret.opcode_len = 0; |
| 377 | ret.primary_opcode = inst_bytes[++op_idx]; |
| 378 | ret.modrm = inst_bytes[op_idx + 1]; |
| 379 | } else if (ret.primary_opcode == 0x0F) { |
| 380 | ret.opcode_len = 3; |
| 381 | // opcode is 0x0F, no needs to update |
| 382 | ret.modrm = inst_bytes[op_idx + 1]; |
| 383 | } else { |
| 384 | ret.opcode_len = 2; |
| 385 | ret.modrm = inst_bytes[op_idx + 1]; |
| 386 | } |
| 387 | } |
| 388 | |
| 389 | return ret; |
| 390 | } |
| 391 | |
| 392 | lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, |
| 393 | Opcode m_opcode) { |
| 394 | std::optional<InstructionOpcodeAndModrm> ret; |
| 395 | |
| 396 | if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { |
| 397 | // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes |
| 398 | return lldb::eInstructionControlFlowKindUnknown; |
| 399 | } |
| 400 | |
| 401 | // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. |
| 402 | // These are the three values deciding instruction control flow kind. |
| 403 | ret = InstructionLengthDecode(inst_bytes: (const uint8_t *)m_opcode.GetOpcodeBytes(), |
| 404 | bytes_len: m_opcode.GetByteSize(), is_exec_mode_64b); |
| 405 | if (!ret) |
| 406 | return lldb::eInstructionControlFlowKindUnknown; |
| 407 | else |
| 408 | return MapOpcodeIntoControlFlowKind(opcode_and_modrm: *ret); |
| 409 | } |
| 410 | |
| 411 | } // namespace x86 |
| 412 | |
| 413 | class InstructionLLVMC : public lldb_private::Instruction { |
| 414 | public: |
| 415 | InstructionLLVMC(DisassemblerLLVMC &disasm, |
| 416 | const lldb_private::Address &address, |
| 417 | AddressClass addr_class) |
| 418 | : Instruction(address, addr_class), |
| 419 | m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( |
| 420 | r: disasm.shared_from_this())) {} |
| 421 | |
| 422 | ~InstructionLLVMC() override = default; |
| 423 | |
| 424 | bool DoesBranch() override { |
| 425 | VisitInstruction(); |
| 426 | return m_does_branch; |
| 427 | } |
| 428 | |
| 429 | bool HasDelaySlot() override { |
| 430 | VisitInstruction(); |
| 431 | return m_has_delay_slot; |
| 432 | } |
| 433 | |
| 434 | bool IsLoad() override { |
| 435 | VisitInstruction(); |
| 436 | return m_is_load; |
| 437 | } |
| 438 | |
| 439 | bool IsAuthenticated() override { |
| 440 | VisitInstruction(); |
| 441 | return m_is_authenticated; |
| 442 | } |
| 443 | |
| 444 | DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { |
| 445 | DisassemblerScope disasm(*this); |
| 446 | return GetDisasmToUse(is_alternate_isa, disasm); |
| 447 | } |
| 448 | |
| 449 | size_t (const lldb_private::Disassembler &disassembler, |
| 450 | const lldb_private::DataExtractor &data, |
| 451 | lldb::offset_t data_offset) override { |
| 452 | // All we have to do is read the opcode which can be easy for some |
| 453 | // architectures |
| 454 | bool got_op = false; |
| 455 | DisassemblerScope disasm(*this); |
| 456 | if (disasm) { |
| 457 | const ArchSpec &arch = disasm->GetArchitecture(); |
| 458 | const lldb::ByteOrder byte_order = data.GetByteOrder(); |
| 459 | |
| 460 | const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); |
| 461 | const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); |
| 462 | if (min_op_byte_size == max_op_byte_size) { |
| 463 | // Fixed size instructions, just read that amount of data. |
| 464 | if (!data.ValidOffsetForDataOfSize(offset: data_offset, length: min_op_byte_size)) |
| 465 | return false; |
| 466 | |
| 467 | switch (min_op_byte_size) { |
| 468 | case 1: |
| 469 | m_opcode.SetOpcode8(inst: data.GetU8(offset_ptr: &data_offset), order: byte_order); |
| 470 | got_op = true; |
| 471 | break; |
| 472 | |
| 473 | case 2: |
| 474 | m_opcode.SetOpcode16(inst: data.GetU16(offset_ptr: &data_offset), order: byte_order); |
| 475 | got_op = true; |
| 476 | break; |
| 477 | |
| 478 | case 4: |
| 479 | m_opcode.SetOpcode32(inst: data.GetU32(offset_ptr: &data_offset), order: byte_order); |
| 480 | got_op = true; |
| 481 | break; |
| 482 | |
| 483 | case 8: |
| 484 | m_opcode.SetOpcode64(inst: data.GetU64(offset_ptr: &data_offset), order: byte_order); |
| 485 | got_op = true; |
| 486 | break; |
| 487 | |
| 488 | default: |
| 489 | m_opcode.SetOpcodeBytes(bytes: data.PeekData(offset: data_offset, length: min_op_byte_size), |
| 490 | length: min_op_byte_size); |
| 491 | got_op = true; |
| 492 | break; |
| 493 | } |
| 494 | } |
| 495 | if (!got_op) { |
| 496 | bool is_alternate_isa = false; |
| 497 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = |
| 498 | GetDisasmToUse(is_alternate_isa, disasm); |
| 499 | |
| 500 | const llvm::Triple::ArchType machine = arch.GetMachine(); |
| 501 | if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { |
| 502 | if (machine == llvm::Triple::thumb || is_alternate_isa) { |
| 503 | uint32_t thumb_opcode = data.GetU16(offset_ptr: &data_offset); |
| 504 | if ((thumb_opcode & 0xe000) != 0xe000 || |
| 505 | ((thumb_opcode & 0x1800u) == 0)) { |
| 506 | m_opcode.SetOpcode16(inst: thumb_opcode, order: byte_order); |
| 507 | m_is_valid = true; |
| 508 | } else { |
| 509 | thumb_opcode <<= 16; |
| 510 | thumb_opcode |= data.GetU16(offset_ptr: &data_offset); |
| 511 | m_opcode.SetOpcode16_2(inst: thumb_opcode, order: byte_order); |
| 512 | m_is_valid = true; |
| 513 | } |
| 514 | } else { |
| 515 | m_opcode.SetOpcode32(inst: data.GetU32(offset_ptr: &data_offset), order: byte_order); |
| 516 | m_is_valid = true; |
| 517 | } |
| 518 | } else { |
| 519 | // The opcode isn't evenly sized, so we need to actually use the llvm |
| 520 | // disassembler to parse it and get the size. |
| 521 | uint8_t *opcode_data = |
| 522 | const_cast<uint8_t *>(data.PeekData(offset: data_offset, length: 1)); |
| 523 | const size_t opcode_data_len = data.BytesLeft(offset: data_offset); |
| 524 | const addr_t pc = m_address.GetFileAddress(); |
| 525 | llvm::MCInst inst; |
| 526 | |
| 527 | const size_t inst_size = |
| 528 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
| 529 | if (inst_size == 0) |
| 530 | m_opcode.Clear(); |
| 531 | else { |
| 532 | m_opcode.SetOpcodeBytes(bytes: opcode_data, length: inst_size); |
| 533 | m_is_valid = true; |
| 534 | } |
| 535 | } |
| 536 | } |
| 537 | return m_opcode.GetByteSize(); |
| 538 | } |
| 539 | return 0; |
| 540 | } |
| 541 | |
| 542 | void (std::string &description) { |
| 543 | if (m_comment.empty()) |
| 544 | m_comment.swap(s&: description); |
| 545 | else { |
| 546 | m_comment.append(s: ", " ); |
| 547 | m_comment.append(str: description); |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | lldb::InstructionControlFlowKind |
| 552 | GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override { |
| 553 | DisassemblerScope disasm(*this, exe_ctx); |
| 554 | if (disasm){ |
| 555 | if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86) |
| 556 | return x86::GetControlFlowKind(/*is_64b=*/is_exec_mode_64b: false, m_opcode); |
| 557 | else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64) |
| 558 | return x86::GetControlFlowKind(/*is_64b=*/is_exec_mode_64b: true, m_opcode); |
| 559 | } |
| 560 | |
| 561 | return eInstructionControlFlowKindUnknown; |
| 562 | } |
| 563 | |
| 564 | void CalculateMnemonicOperandsAndComment( |
| 565 | const lldb_private::ExecutionContext *exe_ctx) override { |
| 566 | DataExtractor data; |
| 567 | const AddressClass address_class = GetAddressClass(); |
| 568 | |
| 569 | if (m_opcode.GetData(data)) { |
| 570 | std::string out_string; |
| 571 | std::string markup_out_string; |
| 572 | std::string ; |
| 573 | std::string ; |
| 574 | |
| 575 | DisassemblerScope disasm(*this, exe_ctx); |
| 576 | if (disasm) { |
| 577 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; |
| 578 | |
| 579 | if (address_class == AddressClass::eCodeAlternateISA) |
| 580 | mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); |
| 581 | else |
| 582 | mc_disasm_ptr = disasm->m_disasm_up.get(); |
| 583 | |
| 584 | lldb::addr_t pc = m_address.GetFileAddress(); |
| 585 | m_using_file_addr = true; |
| 586 | |
| 587 | bool use_hex_immediates = true; |
| 588 | Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; |
| 589 | |
| 590 | if (exe_ctx) { |
| 591 | Target *target = exe_ctx->GetTargetPtr(); |
| 592 | if (target) { |
| 593 | use_hex_immediates = target->GetUseHexImmediates(); |
| 594 | hex_style = target->GetHexImmediateStyle(); |
| 595 | |
| 596 | const lldb::addr_t load_addr = m_address.GetLoadAddress(target); |
| 597 | if (load_addr != LLDB_INVALID_ADDRESS) { |
| 598 | pc = load_addr; |
| 599 | m_using_file_addr = false; |
| 600 | } |
| 601 | } |
| 602 | } |
| 603 | |
| 604 | const uint8_t *opcode_data = data.GetDataStart(); |
| 605 | const size_t opcode_data_len = data.GetByteSize(); |
| 606 | llvm::MCInst inst; |
| 607 | size_t inst_size = |
| 608 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
| 609 | |
| 610 | if (inst_size > 0) { |
| 611 | mc_disasm_ptr->SetStyle(use_hex_immed: use_hex_immediates, hex_style); |
| 612 | |
| 613 | const bool saved_use_color = mc_disasm_ptr->GetUseColor(); |
| 614 | mc_disasm_ptr->SetUseColor(false); |
| 615 | mc_disasm_ptr->PrintMCInst(mc_inst&: inst, pc, inst_string&: out_string, comments_string&: comment_string); |
| 616 | mc_disasm_ptr->SetUseColor(true); |
| 617 | mc_disasm_ptr->PrintMCInst(mc_inst&: inst, pc, inst_string&: markup_out_string, |
| 618 | comments_string&: markup_comment_string); |
| 619 | mc_disasm_ptr->SetUseColor(saved_use_color); |
| 620 | |
| 621 | if (!comment_string.empty()) { |
| 622 | AppendComment(description&: comment_string); |
| 623 | } |
| 624 | } |
| 625 | |
| 626 | if (inst_size == 0) { |
| 627 | m_comment.assign(s: "unknown opcode" ); |
| 628 | inst_size = m_opcode.GetByteSize(); |
| 629 | StreamString mnemonic_strm; |
| 630 | lldb::offset_t offset = 0; |
| 631 | lldb::ByteOrder byte_order = data.GetByteOrder(); |
| 632 | switch (inst_size) { |
| 633 | case 1: { |
| 634 | const uint8_t uval8 = data.GetU8(offset_ptr: &offset); |
| 635 | m_opcode.SetOpcode8(inst: uval8, order: byte_order); |
| 636 | m_opcode_name.assign(s: ".byte" ); |
| 637 | mnemonic_strm.Printf(format: "0x%2.2x" , uval8); |
| 638 | } break; |
| 639 | case 2: { |
| 640 | const uint16_t uval16 = data.GetU16(offset_ptr: &offset); |
| 641 | m_opcode.SetOpcode16(inst: uval16, order: byte_order); |
| 642 | m_opcode_name.assign(s: ".short" ); |
| 643 | mnemonic_strm.Printf(format: "0x%4.4x" , uval16); |
| 644 | } break; |
| 645 | case 4: { |
| 646 | const uint32_t uval32 = data.GetU32(offset_ptr: &offset); |
| 647 | m_opcode.SetOpcode32(inst: uval32, order: byte_order); |
| 648 | m_opcode_name.assign(s: ".long" ); |
| 649 | mnemonic_strm.Printf(format: "0x%8.8x" , uval32); |
| 650 | } break; |
| 651 | case 8: { |
| 652 | const uint64_t uval64 = data.GetU64(offset_ptr: &offset); |
| 653 | m_opcode.SetOpcode64(inst: uval64, order: byte_order); |
| 654 | m_opcode_name.assign(s: ".quad" ); |
| 655 | mnemonic_strm.Printf(format: "0x%16.16" PRIx64, uval64); |
| 656 | } break; |
| 657 | default: |
| 658 | if (inst_size == 0) |
| 659 | return; |
| 660 | else { |
| 661 | const uint8_t *bytes = data.PeekData(offset, length: inst_size); |
| 662 | if (bytes == nullptr) |
| 663 | return; |
| 664 | m_opcode_name.assign(s: ".byte" ); |
| 665 | m_opcode.SetOpcodeBytes(bytes, length: inst_size); |
| 666 | mnemonic_strm.Printf(format: "0x%2.2x" , bytes[0]); |
| 667 | for (uint32_t i = 1; i < inst_size; ++i) |
| 668 | mnemonic_strm.Printf(format: " 0x%2.2x" , bytes[i]); |
| 669 | } |
| 670 | break; |
| 671 | } |
| 672 | m_mnemonics = std::string(mnemonic_strm.GetString()); |
| 673 | return; |
| 674 | } |
| 675 | |
| 676 | static RegularExpression s_regex( |
| 677 | llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?" )); |
| 678 | |
| 679 | llvm::SmallVector<llvm::StringRef, 4> matches; |
| 680 | if (s_regex.Execute(string: out_string, matches: &matches)) { |
| 681 | m_opcode_name = matches[1].str(); |
| 682 | m_mnemonics = matches[2].str(); |
| 683 | } |
| 684 | matches.clear(); |
| 685 | if (s_regex.Execute(string: markup_out_string, matches: &matches)) { |
| 686 | m_markup_opcode_name = matches[1].str(); |
| 687 | m_markup_mnemonics = matches[2].str(); |
| 688 | } |
| 689 | } |
| 690 | } |
| 691 | } |
| 692 | |
| 693 | bool IsValid() const { return m_is_valid; } |
| 694 | |
| 695 | bool UsingFileAddress() const { return m_using_file_addr; } |
| 696 | size_t GetByteSize() const { return m_opcode.GetByteSize(); } |
| 697 | |
| 698 | /// Grants exclusive access to the disassembler and initializes it with the |
| 699 | /// given InstructionLLVMC and an optional ExecutionContext. |
| 700 | class DisassemblerScope { |
| 701 | std::shared_ptr<DisassemblerLLVMC> m_disasm; |
| 702 | |
| 703 | public: |
| 704 | explicit DisassemblerScope( |
| 705 | InstructionLLVMC &i, |
| 706 | const lldb_private::ExecutionContext *exe_ctx = nullptr) |
| 707 | : m_disasm(i.m_disasm_wp.lock()) { |
| 708 | m_disasm->m_mutex.lock(); |
| 709 | m_disasm->m_inst = &i; |
| 710 | m_disasm->m_exe_ctx = exe_ctx; |
| 711 | } |
| 712 | ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } |
| 713 | |
| 714 | /// Evaluates to true if this scope contains a valid disassembler. |
| 715 | operator bool() const { return static_cast<bool>(m_disasm); } |
| 716 | |
| 717 | std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } |
| 718 | }; |
| 719 | |
| 720 | static llvm::StringRef::const_iterator |
| 721 | ConsumeWhitespace(llvm::StringRef::const_iterator osi, |
| 722 | llvm::StringRef::const_iterator ose) { |
| 723 | while (osi != ose) { |
| 724 | switch (*osi) { |
| 725 | default: |
| 726 | return osi; |
| 727 | case ' ': |
| 728 | case '\t': |
| 729 | break; |
| 730 | } |
| 731 | ++osi; |
| 732 | } |
| 733 | |
| 734 | return osi; |
| 735 | } |
| 736 | |
| 737 | static std::pair<bool, llvm::StringRef::const_iterator> |
| 738 | ConsumeChar(llvm::StringRef::const_iterator osi, const char c, |
| 739 | llvm::StringRef::const_iterator ose) { |
| 740 | bool found = false; |
| 741 | |
| 742 | osi = ConsumeWhitespace(osi, ose); |
| 743 | if (osi != ose && *osi == c) { |
| 744 | found = true; |
| 745 | ++osi; |
| 746 | } |
| 747 | |
| 748 | return std::make_pair(x&: found, y&: osi); |
| 749 | } |
| 750 | |
| 751 | static std::pair<Operand, llvm::StringRef::const_iterator> |
| 752 | ParseRegisterName(llvm::StringRef::const_iterator osi, |
| 753 | llvm::StringRef::const_iterator ose) { |
| 754 | Operand ret; |
| 755 | ret.m_type = Operand::Type::Register; |
| 756 | std::string str; |
| 757 | |
| 758 | osi = ConsumeWhitespace(osi, ose); |
| 759 | |
| 760 | while (osi != ose) { |
| 761 | if (*osi >= '0' && *osi <= '9') { |
| 762 | if (str.empty()) { |
| 763 | return std::make_pair(x: Operand(), y&: osi); |
| 764 | } else { |
| 765 | str.push_back(c: *osi); |
| 766 | } |
| 767 | } else if (*osi >= 'a' && *osi <= 'z') { |
| 768 | str.push_back(c: *osi); |
| 769 | } else { |
| 770 | switch (*osi) { |
| 771 | default: |
| 772 | if (str.empty()) { |
| 773 | return std::make_pair(x: Operand(), y&: osi); |
| 774 | } else { |
| 775 | ret.m_register = ConstString(str); |
| 776 | return std::make_pair(x&: ret, y&: osi); |
| 777 | } |
| 778 | case '%': |
| 779 | if (!str.empty()) { |
| 780 | return std::make_pair(x: Operand(), y&: osi); |
| 781 | } |
| 782 | break; |
| 783 | } |
| 784 | } |
| 785 | ++osi; |
| 786 | } |
| 787 | |
| 788 | ret.m_register = ConstString(str); |
| 789 | return std::make_pair(x&: ret, y&: osi); |
| 790 | } |
| 791 | |
| 792 | static std::pair<Operand, llvm::StringRef::const_iterator> |
| 793 | ParseImmediate(llvm::StringRef::const_iterator osi, |
| 794 | llvm::StringRef::const_iterator ose) { |
| 795 | Operand ret; |
| 796 | ret.m_type = Operand::Type::Immediate; |
| 797 | std::string str; |
| 798 | bool is_hex = false; |
| 799 | |
| 800 | osi = ConsumeWhitespace(osi, ose); |
| 801 | |
| 802 | while (osi != ose) { |
| 803 | if (*osi >= '0' && *osi <= '9') { |
| 804 | str.push_back(c: *osi); |
| 805 | } else if (*osi >= 'a' && *osi <= 'f') { |
| 806 | if (is_hex) { |
| 807 | str.push_back(c: *osi); |
| 808 | } else { |
| 809 | return std::make_pair(x: Operand(), y&: osi); |
| 810 | } |
| 811 | } else { |
| 812 | switch (*osi) { |
| 813 | default: |
| 814 | if (str.empty()) { |
| 815 | return std::make_pair(x: Operand(), y&: osi); |
| 816 | } else { |
| 817 | ret.m_immediate = strtoull(nptr: str.c_str(), endptr: nullptr, base: 0); |
| 818 | return std::make_pair(x&: ret, y&: osi); |
| 819 | } |
| 820 | case 'x': |
| 821 | if (str == "0" ) { |
| 822 | is_hex = true; |
| 823 | str.push_back(c: *osi); |
| 824 | } else { |
| 825 | return std::make_pair(x: Operand(), y&: osi); |
| 826 | } |
| 827 | break; |
| 828 | case '#': |
| 829 | case '$': |
| 830 | if (!str.empty()) { |
| 831 | return std::make_pair(x: Operand(), y&: osi); |
| 832 | } |
| 833 | break; |
| 834 | case '-': |
| 835 | if (str.empty()) { |
| 836 | ret.m_negative = true; |
| 837 | } else { |
| 838 | return std::make_pair(x: Operand(), y&: osi); |
| 839 | } |
| 840 | } |
| 841 | } |
| 842 | ++osi; |
| 843 | } |
| 844 | |
| 845 | ret.m_immediate = strtoull(nptr: str.c_str(), endptr: nullptr, base: 0); |
| 846 | return std::make_pair(x&: ret, y&: osi); |
| 847 | } |
| 848 | |
| 849 | // -0x5(%rax,%rax,2) |
| 850 | static std::pair<Operand, llvm::StringRef::const_iterator> |
| 851 | ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, |
| 852 | llvm::StringRef::const_iterator ose) { |
| 853 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
| 854 | ParseImmediate(osi, ose); |
| 855 | if (offset_and_iterator.first.IsValid()) { |
| 856 | osi = offset_and_iterator.second; |
| 857 | } |
| 858 | |
| 859 | bool found = false; |
| 860 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '(', ose); |
| 861 | if (!found) { |
| 862 | return std::make_pair(x: Operand(), y&: osi); |
| 863 | } |
| 864 | |
| 865 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
| 866 | ParseRegisterName(osi, ose); |
| 867 | if (base_and_iterator.first.IsValid()) { |
| 868 | osi = base_and_iterator.second; |
| 869 | } else { |
| 870 | return std::make_pair(x: Operand(), y&: osi); |
| 871 | } |
| 872 | |
| 873 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
| 874 | if (!found) { |
| 875 | return std::make_pair(x: Operand(), y&: osi); |
| 876 | } |
| 877 | |
| 878 | std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = |
| 879 | ParseRegisterName(osi, ose); |
| 880 | if (index_and_iterator.first.IsValid()) { |
| 881 | osi = index_and_iterator.second; |
| 882 | } else { |
| 883 | return std::make_pair(x: Operand(), y&: osi); |
| 884 | } |
| 885 | |
| 886 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
| 887 | if (!found) { |
| 888 | return std::make_pair(x: Operand(), y&: osi); |
| 889 | } |
| 890 | |
| 891 | std::pair<Operand, llvm::StringRef::const_iterator> |
| 892 | multiplier_and_iterator = ParseImmediate(osi, ose); |
| 893 | if (index_and_iterator.first.IsValid()) { |
| 894 | osi = index_and_iterator.second; |
| 895 | } else { |
| 896 | return std::make_pair(x: Operand(), y&: osi); |
| 897 | } |
| 898 | |
| 899 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ')', ose); |
| 900 | if (!found) { |
| 901 | return std::make_pair(x: Operand(), y&: osi); |
| 902 | } |
| 903 | |
| 904 | Operand product; |
| 905 | product.m_type = Operand::Type::Product; |
| 906 | product.m_children.push_back(x: index_and_iterator.first); |
| 907 | product.m_children.push_back(x: multiplier_and_iterator.first); |
| 908 | |
| 909 | Operand index; |
| 910 | index.m_type = Operand::Type::Sum; |
| 911 | index.m_children.push_back(x: base_and_iterator.first); |
| 912 | index.m_children.push_back(x: product); |
| 913 | |
| 914 | if (offset_and_iterator.first.IsValid()) { |
| 915 | Operand offset; |
| 916 | offset.m_type = Operand::Type::Sum; |
| 917 | offset.m_children.push_back(x: offset_and_iterator.first); |
| 918 | offset.m_children.push_back(x: index); |
| 919 | |
| 920 | Operand deref; |
| 921 | deref.m_type = Operand::Type::Dereference; |
| 922 | deref.m_children.push_back(x: offset); |
| 923 | return std::make_pair(x&: deref, y&: osi); |
| 924 | } else { |
| 925 | Operand deref; |
| 926 | deref.m_type = Operand::Type::Dereference; |
| 927 | deref.m_children.push_back(x: index); |
| 928 | return std::make_pair(x&: deref, y&: osi); |
| 929 | } |
| 930 | } |
| 931 | |
| 932 | // -0x10(%rbp) |
| 933 | static std::pair<Operand, llvm::StringRef::const_iterator> |
| 934 | ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, |
| 935 | llvm::StringRef::const_iterator ose) { |
| 936 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
| 937 | ParseImmediate(osi, ose); |
| 938 | if (offset_and_iterator.first.IsValid()) { |
| 939 | osi = offset_and_iterator.second; |
| 940 | } |
| 941 | |
| 942 | bool found = false; |
| 943 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '(', ose); |
| 944 | if (!found) { |
| 945 | return std::make_pair(x: Operand(), y&: osi); |
| 946 | } |
| 947 | |
| 948 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
| 949 | ParseRegisterName(osi, ose); |
| 950 | if (base_and_iterator.first.IsValid()) { |
| 951 | osi = base_and_iterator.second; |
| 952 | } else { |
| 953 | return std::make_pair(x: Operand(), y&: osi); |
| 954 | } |
| 955 | |
| 956 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ')', ose); |
| 957 | if (!found) { |
| 958 | return std::make_pair(x: Operand(), y&: osi); |
| 959 | } |
| 960 | |
| 961 | if (offset_and_iterator.first.IsValid()) { |
| 962 | Operand offset; |
| 963 | offset.m_type = Operand::Type::Sum; |
| 964 | offset.m_children.push_back(x: offset_and_iterator.first); |
| 965 | offset.m_children.push_back(x: base_and_iterator.first); |
| 966 | |
| 967 | Operand deref; |
| 968 | deref.m_type = Operand::Type::Dereference; |
| 969 | deref.m_children.push_back(x: offset); |
| 970 | return std::make_pair(x&: deref, y&: osi); |
| 971 | } else { |
| 972 | Operand deref; |
| 973 | deref.m_type = Operand::Type::Dereference; |
| 974 | deref.m_children.push_back(x: base_and_iterator.first); |
| 975 | return std::make_pair(x&: deref, y&: osi); |
| 976 | } |
| 977 | } |
| 978 | |
| 979 | // [sp, #8]! |
| 980 | static std::pair<Operand, llvm::StringRef::const_iterator> |
| 981 | ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, |
| 982 | llvm::StringRef::const_iterator ose) { |
| 983 | bool found = false; |
| 984 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '[', ose); |
| 985 | if (!found) { |
| 986 | return std::make_pair(x: Operand(), y&: osi); |
| 987 | } |
| 988 | |
| 989 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
| 990 | ParseRegisterName(osi, ose); |
| 991 | if (base_and_iterator.first.IsValid()) { |
| 992 | osi = base_and_iterator.second; |
| 993 | } else { |
| 994 | return std::make_pair(x: Operand(), y&: osi); |
| 995 | } |
| 996 | |
| 997 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose); |
| 998 | if (!found) { |
| 999 | return std::make_pair(x: Operand(), y&: osi); |
| 1000 | } |
| 1001 | |
| 1002 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
| 1003 | ParseImmediate(osi, ose); |
| 1004 | if (offset_and_iterator.first.IsValid()) { |
| 1005 | osi = offset_and_iterator.second; |
| 1006 | } |
| 1007 | |
| 1008 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ']', ose); |
| 1009 | if (!found) { |
| 1010 | return std::make_pair(x: Operand(), y&: osi); |
| 1011 | } |
| 1012 | |
| 1013 | Operand offset; |
| 1014 | offset.m_type = Operand::Type::Sum; |
| 1015 | offset.m_children.push_back(x: offset_and_iterator.first); |
| 1016 | offset.m_children.push_back(x: base_and_iterator.first); |
| 1017 | |
| 1018 | Operand deref; |
| 1019 | deref.m_type = Operand::Type::Dereference; |
| 1020 | deref.m_children.push_back(x: offset); |
| 1021 | return std::make_pair(x&: deref, y&: osi); |
| 1022 | } |
| 1023 | |
| 1024 | // [sp] |
| 1025 | static std::pair<Operand, llvm::StringRef::const_iterator> |
| 1026 | ParseARMDerefAccess(llvm::StringRef::const_iterator osi, |
| 1027 | llvm::StringRef::const_iterator ose) { |
| 1028 | bool found = false; |
| 1029 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '[', ose); |
| 1030 | if (!found) { |
| 1031 | return std::make_pair(x: Operand(), y&: osi); |
| 1032 | } |
| 1033 | |
| 1034 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
| 1035 | ParseRegisterName(osi, ose); |
| 1036 | if (base_and_iterator.first.IsValid()) { |
| 1037 | osi = base_and_iterator.second; |
| 1038 | } else { |
| 1039 | return std::make_pair(x: Operand(), y&: osi); |
| 1040 | } |
| 1041 | |
| 1042 | std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ']', ose); |
| 1043 | if (!found) { |
| 1044 | return std::make_pair(x: Operand(), y&: osi); |
| 1045 | } |
| 1046 | |
| 1047 | Operand deref; |
| 1048 | deref.m_type = Operand::Type::Dereference; |
| 1049 | deref.m_children.push_back(x: base_and_iterator.first); |
| 1050 | return std::make_pair(x&: deref, y&: osi); |
| 1051 | } |
| 1052 | |
| 1053 | static void DumpOperand(const Operand &op, Stream &s) { |
| 1054 | switch (op.m_type) { |
| 1055 | case Operand::Type::Dereference: |
| 1056 | s.PutCString(cstr: "*" ); |
| 1057 | DumpOperand(op: op.m_children[0], s); |
| 1058 | break; |
| 1059 | case Operand::Type::Immediate: |
| 1060 | if (op.m_negative) { |
| 1061 | s.PutCString(cstr: "-" ); |
| 1062 | } |
| 1063 | s.PutCString(cstr: llvm::to_string(Value: op.m_immediate)); |
| 1064 | break; |
| 1065 | case Operand::Type::Invalid: |
| 1066 | s.PutCString(cstr: "Invalid" ); |
| 1067 | break; |
| 1068 | case Operand::Type::Product: |
| 1069 | s.PutCString(cstr: "(" ); |
| 1070 | DumpOperand(op: op.m_children[0], s); |
| 1071 | s.PutCString(cstr: "*" ); |
| 1072 | DumpOperand(op: op.m_children[1], s); |
| 1073 | s.PutCString(cstr: ")" ); |
| 1074 | break; |
| 1075 | case Operand::Type::Register: |
| 1076 | s.PutCString(cstr: op.m_register.GetStringRef()); |
| 1077 | break; |
| 1078 | case Operand::Type::Sum: |
| 1079 | s.PutCString(cstr: "(" ); |
| 1080 | DumpOperand(op: op.m_children[0], s); |
| 1081 | s.PutCString(cstr: "+" ); |
| 1082 | DumpOperand(op: op.m_children[1], s); |
| 1083 | s.PutCString(cstr: ")" ); |
| 1084 | break; |
| 1085 | } |
| 1086 | } |
| 1087 | |
| 1088 | bool ParseOperands( |
| 1089 | llvm::SmallVectorImpl<Instruction::Operand> &operands) override { |
| 1090 | const char *operands_string = GetOperands(exe_ctx: nullptr); |
| 1091 | |
| 1092 | if (!operands_string) { |
| 1093 | return false; |
| 1094 | } |
| 1095 | |
| 1096 | llvm::StringRef operands_ref(operands_string); |
| 1097 | |
| 1098 | llvm::StringRef::const_iterator osi = operands_ref.begin(); |
| 1099 | llvm::StringRef::const_iterator ose = operands_ref.end(); |
| 1100 | |
| 1101 | while (osi != ose) { |
| 1102 | Operand operand; |
| 1103 | llvm::StringRef::const_iterator iter; |
| 1104 | |
| 1105 | if ((std::tie(args&: operand, args&: iter) = ParseIntelIndexedAccess(osi, ose), |
| 1106 | operand.IsValid()) || |
| 1107 | (std::tie(args&: operand, args&: iter) = ParseIntelDerefAccess(osi, ose), |
| 1108 | operand.IsValid()) || |
| 1109 | (std::tie(args&: operand, args&: iter) = ParseARMOffsetAccess(osi, ose), |
| 1110 | operand.IsValid()) || |
| 1111 | (std::tie(args&: operand, args&: iter) = ParseARMDerefAccess(osi, ose), |
| 1112 | operand.IsValid()) || |
| 1113 | (std::tie(args&: operand, args&: iter) = ParseRegisterName(osi, ose), |
| 1114 | operand.IsValid()) || |
| 1115 | (std::tie(args&: operand, args&: iter) = ParseImmediate(osi, ose), |
| 1116 | operand.IsValid())) { |
| 1117 | osi = iter; |
| 1118 | operands.push_back(Elt: operand); |
| 1119 | } else { |
| 1120 | return false; |
| 1121 | } |
| 1122 | |
| 1123 | std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = |
| 1124 | ConsumeChar(osi, c: ',', ose); |
| 1125 | if (found_and_iter.first) { |
| 1126 | osi = found_and_iter.second; |
| 1127 | } |
| 1128 | |
| 1129 | osi = ConsumeWhitespace(osi, ose); |
| 1130 | } |
| 1131 | |
| 1132 | DisassemblerSP disasm_sp = m_disasm_wp.lock(); |
| 1133 | |
| 1134 | if (disasm_sp && operands.size() > 1) { |
| 1135 | // TODO tie this into the MC Disassembler's notion of clobbers. |
| 1136 | switch (disasm_sp->GetArchitecture().GetMachine()) { |
| 1137 | default: |
| 1138 | break; |
| 1139 | case llvm::Triple::x86: |
| 1140 | case llvm::Triple::x86_64: |
| 1141 | operands[operands.size() - 1].m_clobbered = true; |
| 1142 | break; |
| 1143 | case llvm::Triple::arm: |
| 1144 | operands[0].m_clobbered = true; |
| 1145 | break; |
| 1146 | } |
| 1147 | } |
| 1148 | |
| 1149 | if (Log *log = GetLog(mask: LLDBLog::Process)) { |
| 1150 | StreamString ss; |
| 1151 | |
| 1152 | ss.Printf(format: "[%s] expands to %zu operands:\n" , operands_string, |
| 1153 | operands.size()); |
| 1154 | for (const Operand &operand : operands) { |
| 1155 | ss.PutCString(cstr: " " ); |
| 1156 | DumpOperand(op: operand, s&: ss); |
| 1157 | ss.PutCString(cstr: "\n" ); |
| 1158 | } |
| 1159 | |
| 1160 | log->PutString(str: ss.GetString()); |
| 1161 | } |
| 1162 | |
| 1163 | return true; |
| 1164 | } |
| 1165 | |
| 1166 | bool IsCall() override { |
| 1167 | VisitInstruction(); |
| 1168 | return m_is_call; |
| 1169 | } |
| 1170 | |
| 1171 | protected: |
| 1172 | std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; |
| 1173 | |
| 1174 | bool m_is_valid = false; |
| 1175 | bool m_using_file_addr = false; |
| 1176 | bool m_has_visited_instruction = false; |
| 1177 | |
| 1178 | // Be conservative. If we didn't understand the instruction, say it: |
| 1179 | // - Might branch |
| 1180 | // - Does not have a delay slot |
| 1181 | // - Is not a call |
| 1182 | // - Is not a load |
| 1183 | // - Is not an authenticated instruction |
| 1184 | bool m_does_branch = true; |
| 1185 | bool m_has_delay_slot = false; |
| 1186 | bool m_is_call = false; |
| 1187 | bool m_is_load = false; |
| 1188 | bool m_is_authenticated = false; |
| 1189 | |
| 1190 | void VisitInstruction() { |
| 1191 | if (m_has_visited_instruction) |
| 1192 | return; |
| 1193 | |
| 1194 | DisassemblerScope disasm(*this); |
| 1195 | if (!disasm) |
| 1196 | return; |
| 1197 | |
| 1198 | DataExtractor data; |
| 1199 | if (!m_opcode.GetData(data)) |
| 1200 | return; |
| 1201 | |
| 1202 | bool is_alternate_isa; |
| 1203 | lldb::addr_t pc = m_address.GetFileAddress(); |
| 1204 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = |
| 1205 | GetDisasmToUse(is_alternate_isa, disasm); |
| 1206 | const uint8_t *opcode_data = data.GetDataStart(); |
| 1207 | const size_t opcode_data_len = data.GetByteSize(); |
| 1208 | llvm::MCInst inst; |
| 1209 | const size_t inst_size = |
| 1210 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst); |
| 1211 | if (inst_size == 0) |
| 1212 | return; |
| 1213 | |
| 1214 | m_has_visited_instruction = true; |
| 1215 | m_does_branch = mc_disasm_ptr->CanBranch(mc_inst&: inst); |
| 1216 | m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(mc_inst&: inst); |
| 1217 | m_is_call = mc_disasm_ptr->IsCall(mc_inst&: inst); |
| 1218 | m_is_load = mc_disasm_ptr->IsLoad(mc_inst&: inst); |
| 1219 | m_is_authenticated = mc_disasm_ptr->IsAuthenticated(mc_inst&: inst); |
| 1220 | } |
| 1221 | |
| 1222 | private: |
| 1223 | DisassemblerLLVMC::MCDisasmInstance * |
| 1224 | GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { |
| 1225 | is_alternate_isa = false; |
| 1226 | if (disasm) { |
| 1227 | if (disasm->m_alternate_disasm_up) { |
| 1228 | const AddressClass address_class = GetAddressClass(); |
| 1229 | |
| 1230 | if (address_class == AddressClass::eCodeAlternateISA) { |
| 1231 | is_alternate_isa = true; |
| 1232 | return disasm->m_alternate_disasm_up.get(); |
| 1233 | } |
| 1234 | } |
| 1235 | return disasm->m_disasm_up.get(); |
| 1236 | } |
| 1237 | return nullptr; |
| 1238 | } |
| 1239 | }; |
| 1240 | |
| 1241 | std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> |
| 1242 | DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, |
| 1243 | const char *features_str, |
| 1244 | unsigned flavor, |
| 1245 | DisassemblerLLVMC &owner) { |
| 1246 | using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; |
| 1247 | |
| 1248 | std::string Status; |
| 1249 | const llvm::Target *curr_target = |
| 1250 | llvm::TargetRegistry::lookupTarget(TripleStr: triple, Error&: Status); |
| 1251 | if (!curr_target) |
| 1252 | return Instance(); |
| 1253 | |
| 1254 | std::unique_ptr<llvm::MCInstrInfo> instr_info_up( |
| 1255 | curr_target->createMCInstrInfo()); |
| 1256 | if (!instr_info_up) |
| 1257 | return Instance(); |
| 1258 | |
| 1259 | std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( |
| 1260 | curr_target->createMCRegInfo(TT: triple)); |
| 1261 | if (!reg_info_up) |
| 1262 | return Instance(); |
| 1263 | |
| 1264 | std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( |
| 1265 | curr_target->createMCSubtargetInfo(TheTriple: triple, CPU: cpu, Features: features_str)); |
| 1266 | if (!subtarget_info_up) |
| 1267 | return Instance(); |
| 1268 | |
| 1269 | llvm::MCTargetOptions MCOptions; |
| 1270 | std::unique_ptr<llvm::MCAsmInfo> asm_info_up( |
| 1271 | curr_target->createMCAsmInfo(MRI: *reg_info_up, TheTriple: triple, Options: MCOptions)); |
| 1272 | if (!asm_info_up) |
| 1273 | return Instance(); |
| 1274 | |
| 1275 | std::unique_ptr<llvm::MCContext> context_up( |
| 1276 | new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(), |
| 1277 | reg_info_up.get(), subtarget_info_up.get())); |
| 1278 | if (!context_up) |
| 1279 | return Instance(); |
| 1280 | |
| 1281 | std::unique_ptr<llvm::MCDisassembler> disasm_up( |
| 1282 | curr_target->createMCDisassembler(STI: *subtarget_info_up, Ctx&: *context_up)); |
| 1283 | if (!disasm_up) |
| 1284 | return Instance(); |
| 1285 | |
| 1286 | std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( |
| 1287 | curr_target->createMCRelocationInfo(TT: triple, Ctx&: *context_up)); |
| 1288 | if (!rel_info_up) |
| 1289 | return Instance(); |
| 1290 | |
| 1291 | std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( |
| 1292 | curr_target->createMCSymbolizer( |
| 1293 | TT: triple, GetOpInfo: nullptr, SymbolLookUp: DisassemblerLLVMC::SymbolLookupCallback, DisInfo: &owner, |
| 1294 | Ctx: context_up.get(), RelInfo: std::move(rel_info_up))); |
| 1295 | disasm_up->setSymbolizer(std::move(symbolizer_up)); |
| 1296 | |
| 1297 | unsigned asm_printer_variant = |
| 1298 | flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; |
| 1299 | |
| 1300 | std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( |
| 1301 | curr_target->createMCInstPrinter(T: llvm::Triple{triple}, |
| 1302 | SyntaxVariant: asm_printer_variant, MAI: *asm_info_up, |
| 1303 | MII: *instr_info_up, MRI: *reg_info_up)); |
| 1304 | if (!instr_printer_up) |
| 1305 | return Instance(); |
| 1306 | |
| 1307 | instr_printer_up->setPrintBranchImmAsAddress(true); |
| 1308 | |
| 1309 | // Not all targets may have registered createMCInstrAnalysis(). |
| 1310 | std::unique_ptr<llvm::MCInstrAnalysis> instr_analysis_up( |
| 1311 | curr_target->createMCInstrAnalysis(Info: instr_info_up.get())); |
| 1312 | |
| 1313 | return Instance(new MCDisasmInstance( |
| 1314 | std::move(instr_info_up), std::move(reg_info_up), |
| 1315 | std::move(subtarget_info_up), std::move(asm_info_up), |
| 1316 | std::move(context_up), std::move(disasm_up), std::move(instr_printer_up), |
| 1317 | std::move(instr_analysis_up))); |
| 1318 | } |
| 1319 | |
| 1320 | DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( |
| 1321 | std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, |
| 1322 | std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, |
| 1323 | std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, |
| 1324 | std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, |
| 1325 | std::unique_ptr<llvm::MCContext> &&context_up, |
| 1326 | std::unique_ptr<llvm::MCDisassembler> &&disasm_up, |
| 1327 | std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up, |
| 1328 | std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up) |
| 1329 | : m_instr_info_up(std::move(instr_info_up)), |
| 1330 | m_reg_info_up(std::move(reg_info_up)), |
| 1331 | m_subtarget_info_up(std::move(subtarget_info_up)), |
| 1332 | m_asm_info_up(std::move(asm_info_up)), |
| 1333 | m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), |
| 1334 | m_instr_printer_up(std::move(instr_printer_up)), |
| 1335 | m_instr_analysis_up(std::move(instr_analysis_up)) { |
| 1336 | assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up && |
| 1337 | m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); |
| 1338 | } |
| 1339 | |
| 1340 | uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( |
| 1341 | const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, |
| 1342 | llvm::MCInst &mc_inst) const { |
| 1343 | llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); |
| 1344 | llvm::MCDisassembler::DecodeStatus status; |
| 1345 | |
| 1346 | uint64_t new_inst_size; |
| 1347 | status = m_disasm_up->getInstruction(Instr&: mc_inst, Size&: new_inst_size, Bytes: data, Address: pc, |
| 1348 | CStream&: llvm::nulls()); |
| 1349 | if (status == llvm::MCDisassembler::Success) |
| 1350 | return new_inst_size; |
| 1351 | else |
| 1352 | return 0; |
| 1353 | } |
| 1354 | |
| 1355 | void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( |
| 1356 | llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string, |
| 1357 | std::string &) { |
| 1358 | llvm::raw_string_ostream inst_stream(inst_string); |
| 1359 | llvm::raw_string_ostream (comments_string); |
| 1360 | |
| 1361 | inst_stream.enable_colors(enable: m_instr_printer_up->getUseColor()); |
| 1362 | m_instr_printer_up->setCommentStream(comments_stream); |
| 1363 | m_instr_printer_up->printInst(MI: &mc_inst, Address: pc, Annot: llvm::StringRef(), |
| 1364 | STI: *m_subtarget_info_up, OS&: inst_stream); |
| 1365 | m_instr_printer_up->setCommentStream(llvm::nulls()); |
| 1366 | |
| 1367 | static std::string g_newlines("\r\n" ); |
| 1368 | |
| 1369 | for (size_t newline_pos = 0; |
| 1370 | (newline_pos = comments_string.find_first_of(str: g_newlines, pos: newline_pos)) != |
| 1371 | comments_string.npos; |
| 1372 | /**/) { |
| 1373 | comments_string.replace(i1: comments_string.begin() + newline_pos, |
| 1374 | i2: comments_string.begin() + newline_pos + 1, n: 1, c: ' '); |
| 1375 | } |
| 1376 | } |
| 1377 | |
| 1378 | void DisassemblerLLVMC::MCDisasmInstance::SetStyle( |
| 1379 | bool use_hex_immed, HexImmediateStyle hex_style) { |
| 1380 | m_instr_printer_up->setPrintImmHex(use_hex_immed); |
| 1381 | switch (hex_style) { |
| 1382 | case eHexStyleC: |
| 1383 | m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); |
| 1384 | break; |
| 1385 | case eHexStyleAsm: |
| 1386 | m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); |
| 1387 | break; |
| 1388 | } |
| 1389 | } |
| 1390 | |
| 1391 | void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color) { |
| 1392 | m_instr_printer_up->setUseColor(use_color); |
| 1393 | } |
| 1394 | |
| 1395 | bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const { |
| 1396 | return m_instr_printer_up->getUseColor(); |
| 1397 | } |
| 1398 | |
| 1399 | bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( |
| 1400 | llvm::MCInst &mc_inst) const { |
| 1401 | if (m_instr_analysis_up) |
| 1402 | return m_instr_analysis_up->mayAffectControlFlow(Inst: mc_inst, MCRI: *m_reg_info_up); |
| 1403 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()) |
| 1404 | .mayAffectControlFlow(MI: mc_inst, RI: *m_reg_info_up); |
| 1405 | } |
| 1406 | |
| 1407 | bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( |
| 1408 | llvm::MCInst &mc_inst) const { |
| 1409 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).hasDelaySlot(); |
| 1410 | } |
| 1411 | |
| 1412 | bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { |
| 1413 | if (m_instr_analysis_up) |
| 1414 | return m_instr_analysis_up->isCall(Inst: mc_inst); |
| 1415 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).isCall(); |
| 1416 | } |
| 1417 | |
| 1418 | bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const { |
| 1419 | return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).mayLoad(); |
| 1420 | } |
| 1421 | |
| 1422 | bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated( |
| 1423 | llvm::MCInst &mc_inst) const { |
| 1424 | const auto &InstrDesc = m_instr_info_up->get(Opcode: mc_inst.getOpcode()); |
| 1425 | |
| 1426 | // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4 |
| 1427 | // == 'a' + 'c') as authenticated instructions for reporting purposes, in |
| 1428 | // addition to the standard authenticated instructions specified in ARMv8.3. |
| 1429 | bool IsBrkC47x = false; |
| 1430 | if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) { |
| 1431 | const llvm::MCOperand &Op0 = mc_inst.getOperand(i: 0); |
| 1432 | if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474) |
| 1433 | IsBrkC47x = true; |
| 1434 | } |
| 1435 | |
| 1436 | return InstrDesc.isAuthenticated() || IsBrkC47x; |
| 1437 | } |
| 1438 | |
| 1439 | DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, |
| 1440 | const char *flavor_string, |
| 1441 | const char *cpu_string, |
| 1442 | const char *features_string) |
| 1443 | : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), |
| 1444 | m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS), |
| 1445 | m_adrp_insn() { |
| 1446 | if (!FlavorValidForArchSpec(arch, flavor: m_flavor.c_str())) { |
| 1447 | m_flavor.assign(s: "default" ); |
| 1448 | } |
| 1449 | |
| 1450 | const bool cpu_or_features_overriden = cpu_string || features_string; |
| 1451 | unsigned flavor = ~0U; |
| 1452 | llvm::Triple triple = arch.GetTriple(); |
| 1453 | |
| 1454 | // So far the only supported flavor is "intel" on x86. The base class will |
| 1455 | // set this correctly coming in. |
| 1456 | if (triple.getArch() == llvm::Triple::x86 || |
| 1457 | triple.getArch() == llvm::Triple::x86_64) { |
| 1458 | if (m_flavor == "intel" ) { |
| 1459 | flavor = 1; |
| 1460 | } else if (m_flavor == "att" ) { |
| 1461 | flavor = 0; |
| 1462 | } |
| 1463 | } |
| 1464 | |
| 1465 | ArchSpec thumb_arch(arch); |
| 1466 | if (triple.getArch() == llvm::Triple::arm) { |
| 1467 | std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); |
| 1468 | // Replace "arm" with "thumb" so we get all thumb variants correct |
| 1469 | if (thumb_arch_name.size() > 3) { |
| 1470 | thumb_arch_name.erase(pos: 0, n: 3); |
| 1471 | thumb_arch_name.insert(pos: 0, s: "thumb" ); |
| 1472 | } else { |
| 1473 | thumb_arch_name = "thumbv9.3a" ; |
| 1474 | } |
| 1475 | thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); |
| 1476 | } |
| 1477 | |
| 1478 | // If no sub architecture specified then use the most recent arm architecture |
| 1479 | // so the disassembler will return all instructions. Without it we will see a |
| 1480 | // lot of unknown opcodes if the code uses instructions which are not |
| 1481 | // available in the oldest arm version (which is used when no sub architecture |
| 1482 | // is specified). |
| 1483 | if (triple.getArch() == llvm::Triple::arm && |
| 1484 | triple.getSubArch() == llvm::Triple::NoSubArch) |
| 1485 | triple.setArchName("armv9.3a" ); |
| 1486 | |
| 1487 | std::string features_str = |
| 1488 | features_string ? std::string(features_string) : "" ; |
| 1489 | const char *triple_str = triple.getTriple().c_str(); |
| 1490 | |
| 1491 | // ARM Cortex M0-M7 devices only execute thumb instructions |
| 1492 | if (arch.IsAlwaysThumbInstructions()) { |
| 1493 | triple_str = thumb_arch.GetTriple().getTriple().c_str(); |
| 1494 | if (!features_string) |
| 1495 | features_str += "+fp-armv8," ; |
| 1496 | } |
| 1497 | |
| 1498 | const char *cpu = cpu_string; |
| 1499 | |
| 1500 | if (!cpu_or_features_overriden) { |
| 1501 | switch (arch.GetCore()) { |
| 1502 | case ArchSpec::eCore_mips32: |
| 1503 | case ArchSpec::eCore_mips32el: |
| 1504 | cpu = "mips32" ; |
| 1505 | break; |
| 1506 | case ArchSpec::eCore_mips32r2: |
| 1507 | case ArchSpec::eCore_mips32r2el: |
| 1508 | cpu = "mips32r2" ; |
| 1509 | break; |
| 1510 | case ArchSpec::eCore_mips32r3: |
| 1511 | case ArchSpec::eCore_mips32r3el: |
| 1512 | cpu = "mips32r3" ; |
| 1513 | break; |
| 1514 | case ArchSpec::eCore_mips32r5: |
| 1515 | case ArchSpec::eCore_mips32r5el: |
| 1516 | cpu = "mips32r5" ; |
| 1517 | break; |
| 1518 | case ArchSpec::eCore_mips32r6: |
| 1519 | case ArchSpec::eCore_mips32r6el: |
| 1520 | cpu = "mips32r6" ; |
| 1521 | break; |
| 1522 | case ArchSpec::eCore_mips64: |
| 1523 | case ArchSpec::eCore_mips64el: |
| 1524 | cpu = "mips64" ; |
| 1525 | break; |
| 1526 | case ArchSpec::eCore_mips64r2: |
| 1527 | case ArchSpec::eCore_mips64r2el: |
| 1528 | cpu = "mips64r2" ; |
| 1529 | break; |
| 1530 | case ArchSpec::eCore_mips64r3: |
| 1531 | case ArchSpec::eCore_mips64r3el: |
| 1532 | cpu = "mips64r3" ; |
| 1533 | break; |
| 1534 | case ArchSpec::eCore_mips64r5: |
| 1535 | case ArchSpec::eCore_mips64r5el: |
| 1536 | cpu = "mips64r5" ; |
| 1537 | break; |
| 1538 | case ArchSpec::eCore_mips64r6: |
| 1539 | case ArchSpec::eCore_mips64r6el: |
| 1540 | cpu = "mips64r6" ; |
| 1541 | break; |
| 1542 | default: |
| 1543 | cpu = "" ; |
| 1544 | break; |
| 1545 | } |
| 1546 | } |
| 1547 | |
| 1548 | if (arch.IsMIPS() && !cpu_or_features_overriden) { |
| 1549 | uint32_t arch_flags = arch.GetFlags(); |
| 1550 | if (arch_flags & ArchSpec::eMIPSAse_msa) |
| 1551 | features_str += "+msa," ; |
| 1552 | if (arch_flags & ArchSpec::eMIPSAse_dsp) |
| 1553 | features_str += "+dsp," ; |
| 1554 | if (arch_flags & ArchSpec::eMIPSAse_dspr2) |
| 1555 | features_str += "+dspr2," ; |
| 1556 | } |
| 1557 | |
| 1558 | // If any AArch64 variant, enable latest ISA with all extensions unless the |
| 1559 | // CPU or features were overridden. |
| 1560 | if (triple.isAArch64() && !cpu_or_features_overriden) { |
| 1561 | features_str += "+all," ; |
| 1562 | if (triple.getVendor() == llvm::Triple::Apple) |
| 1563 | cpu = "apple-latest" ; |
| 1564 | } |
| 1565 | |
| 1566 | if (triple.isRISCV() && !cpu_or_features_overriden) { |
| 1567 | uint32_t arch_flags = arch.GetFlags(); |
| 1568 | if (arch_flags & ArchSpec::eRISCV_rvc) |
| 1569 | features_str += "+c," ; |
| 1570 | if (arch_flags & ArchSpec::eRISCV_rve) |
| 1571 | features_str += "+e," ; |
| 1572 | if ((arch_flags & ArchSpec::eRISCV_float_abi_single) == |
| 1573 | ArchSpec::eRISCV_float_abi_single) |
| 1574 | features_str += "+f," ; |
| 1575 | if ((arch_flags & ArchSpec::eRISCV_float_abi_double) == |
| 1576 | ArchSpec::eRISCV_float_abi_double) |
| 1577 | features_str += "+f,+d," ; |
| 1578 | if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) == |
| 1579 | ArchSpec::eRISCV_float_abi_quad) |
| 1580 | features_str += "+f,+d,+q," ; |
| 1581 | // FIXME: how do we detect features such as `+a`, `+m`? |
| 1582 | // Turn them on by default now, since everyone seems to use them |
| 1583 | features_str += "+a,+m," ; |
| 1584 | } |
| 1585 | |
| 1586 | // We use m_disasm_up.get() to tell whether we are valid or not, so if this |
| 1587 | // isn't good for some reason, we won't be valid and FindPlugin will fail and |
| 1588 | // we won't get used. |
| 1589 | m_disasm_up = MCDisasmInstance::Create(triple: triple_str, cpu, features_str: features_str.c_str(), |
| 1590 | flavor, owner&: *this); |
| 1591 | |
| 1592 | llvm::Triple::ArchType llvm_arch = triple.getArch(); |
| 1593 | |
| 1594 | // For arm CPUs that can execute arm or thumb instructions, also create a |
| 1595 | // thumb instruction disassembler. |
| 1596 | if (llvm_arch == llvm::Triple::arm) { |
| 1597 | std::string thumb_triple(thumb_arch.GetTriple().getTriple()); |
| 1598 | m_alternate_disasm_up = |
| 1599 | MCDisasmInstance::Create(triple: thumb_triple.c_str(), cpu: "" , features_str: features_str.c_str(), |
| 1600 | flavor, owner&: *this); |
| 1601 | if (!m_alternate_disasm_up) |
| 1602 | m_disasm_up.reset(); |
| 1603 | |
| 1604 | } else if (arch.IsMIPS()) { |
| 1605 | /* Create alternate disassembler for MIPS16 and microMIPS */ |
| 1606 | uint32_t arch_flags = arch.GetFlags(); |
| 1607 | if (arch_flags & ArchSpec::eMIPSAse_mips16) |
| 1608 | features_str += "+mips16," ; |
| 1609 | else if (arch_flags & ArchSpec::eMIPSAse_micromips) |
| 1610 | features_str += "+micromips," ; |
| 1611 | |
| 1612 | m_alternate_disasm_up = MCDisasmInstance::Create( |
| 1613 | triple: triple_str, cpu, features_str: features_str.c_str(), flavor, owner&: *this); |
| 1614 | if (!m_alternate_disasm_up) |
| 1615 | m_disasm_up.reset(); |
| 1616 | } |
| 1617 | } |
| 1618 | |
| 1619 | DisassemblerLLVMC::~DisassemblerLLVMC() = default; |
| 1620 | |
| 1621 | lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, |
| 1622 | const char *flavor, |
| 1623 | const char *cpu, |
| 1624 | const char *features) { |
| 1625 | if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { |
| 1626 | auto disasm_sp = |
| 1627 | std::make_shared<DisassemblerLLVMC>(args: arch, args&: flavor, args&: cpu, args&: features); |
| 1628 | if (disasm_sp && disasm_sp->IsValid()) |
| 1629 | return disasm_sp; |
| 1630 | } |
| 1631 | return lldb::DisassemblerSP(); |
| 1632 | } |
| 1633 | |
| 1634 | size_t DisassemblerLLVMC::(const Address &base_addr, |
| 1635 | const DataExtractor &data, |
| 1636 | lldb::offset_t data_offset, |
| 1637 | size_t num_instructions, |
| 1638 | bool append, bool data_from_file) { |
| 1639 | if (!append) |
| 1640 | m_instruction_list.Clear(); |
| 1641 | |
| 1642 | if (!IsValid()) |
| 1643 | return 0; |
| 1644 | |
| 1645 | m_data_from_file = data_from_file; |
| 1646 | uint32_t data_cursor = data_offset; |
| 1647 | const size_t data_byte_size = data.GetByteSize(); |
| 1648 | uint32_t instructions_parsed = 0; |
| 1649 | Address inst_addr(base_addr); |
| 1650 | |
| 1651 | while (data_cursor < data_byte_size && |
| 1652 | instructions_parsed < num_instructions) { |
| 1653 | |
| 1654 | AddressClass address_class = AddressClass::eCode; |
| 1655 | |
| 1656 | if (m_alternate_disasm_up) |
| 1657 | address_class = inst_addr.GetAddressClass(); |
| 1658 | |
| 1659 | InstructionSP inst_sp( |
| 1660 | new InstructionLLVMC(*this, inst_addr, address_class)); |
| 1661 | |
| 1662 | if (!inst_sp) |
| 1663 | break; |
| 1664 | |
| 1665 | uint32_t inst_size = inst_sp->Decode(disassembler: *this, data, data_offset: data_cursor); |
| 1666 | |
| 1667 | if (inst_size == 0) |
| 1668 | break; |
| 1669 | |
| 1670 | m_instruction_list.Append(inst_sp); |
| 1671 | data_cursor += inst_size; |
| 1672 | inst_addr.Slide(offset: inst_size); |
| 1673 | instructions_parsed++; |
| 1674 | } |
| 1675 | |
| 1676 | return data_cursor - data_offset; |
| 1677 | } |
| 1678 | |
| 1679 | void DisassemblerLLVMC::Initialize() { |
| 1680 | PluginManager::RegisterPlugin(name: GetPluginNameStatic(), |
| 1681 | description: "Disassembler that uses LLVM MC to disassemble " |
| 1682 | "i386, x86_64, ARM, and ARM64." , |
| 1683 | create_callback: CreateInstance); |
| 1684 | |
| 1685 | llvm::InitializeAllTargetInfos(); |
| 1686 | llvm::InitializeAllTargetMCs(); |
| 1687 | llvm::InitializeAllAsmParsers(); |
| 1688 | llvm::InitializeAllDisassemblers(); |
| 1689 | } |
| 1690 | |
| 1691 | void DisassemblerLLVMC::Terminate() { |
| 1692 | PluginManager::UnregisterPlugin(create_callback: CreateInstance); |
| 1693 | } |
| 1694 | |
| 1695 | int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, |
| 1696 | uint64_t offset, uint64_t size, |
| 1697 | int tag_type, void *tag_bug) { |
| 1698 | return static_cast<DisassemblerLLVMC *>(disassembler) |
| 1699 | ->OpInfo(PC: pc, Offset: offset, Size: size, TagType: tag_type, TagBug: tag_bug); |
| 1700 | } |
| 1701 | |
| 1702 | const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, |
| 1703 | uint64_t value, |
| 1704 | uint64_t *type, uint64_t pc, |
| 1705 | const char **name) { |
| 1706 | return static_cast<DisassemblerLLVMC *>(disassembler) |
| 1707 | ->SymbolLookup(ReferenceValue: value, ReferenceType: type, ReferencePC: pc, ReferenceName: name); |
| 1708 | } |
| 1709 | |
| 1710 | bool DisassemblerLLVMC::FlavorValidForArchSpec( |
| 1711 | const lldb_private::ArchSpec &arch, const char *flavor) { |
| 1712 | llvm::Triple triple = arch.GetTriple(); |
| 1713 | if (flavor == nullptr || strcmp(s1: flavor, s2: "default" ) == 0) |
| 1714 | return true; |
| 1715 | |
| 1716 | if (triple.getArch() == llvm::Triple::x86 || |
| 1717 | triple.getArch() == llvm::Triple::x86_64) { |
| 1718 | return strcmp(s1: flavor, s2: "intel" ) == 0 || strcmp(s1: flavor, s2: "att" ) == 0; |
| 1719 | } else |
| 1720 | return false; |
| 1721 | } |
| 1722 | |
| 1723 | bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } |
| 1724 | |
| 1725 | int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, |
| 1726 | int tag_type, void *tag_bug) { |
| 1727 | switch (tag_type) { |
| 1728 | default: |
| 1729 | break; |
| 1730 | case 1: |
| 1731 | memset(s: tag_bug, c: 0, n: sizeof(::LLVMOpInfo1)); |
| 1732 | break; |
| 1733 | } |
| 1734 | return 0; |
| 1735 | } |
| 1736 | |
| 1737 | const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, |
| 1738 | uint64_t pc, const char **name) { |
| 1739 | if (*type_ptr) { |
| 1740 | if (m_exe_ctx && m_inst) { |
| 1741 | // std::string remove_this_prior_to_checkin; |
| 1742 | Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; |
| 1743 | Address value_so_addr; |
| 1744 | Address pc_so_addr; |
| 1745 | if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 || |
| 1746 | target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be || |
| 1747 | target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) { |
| 1748 | if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { |
| 1749 | m_adrp_address = pc; |
| 1750 | m_adrp_insn = value; |
| 1751 | *name = nullptr; |
| 1752 | *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; |
| 1753 | return nullptr; |
| 1754 | } |
| 1755 | // If this instruction is an ADD and |
| 1756 | // the previous instruction was an ADRP and |
| 1757 | // the ADRP's register and this ADD's register are the same, |
| 1758 | // then this is a pc-relative address calculation. |
| 1759 | if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && |
| 1760 | m_adrp_insn && m_adrp_address == pc - 4 && |
| 1761 | (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) { |
| 1762 | uint32_t addxri_inst; |
| 1763 | uint64_t adrp_imm, addxri_imm; |
| 1764 | // Get immlo and immhi bits, OR them together to get the ADRP imm |
| 1765 | // value. |
| 1766 | adrp_imm = |
| 1767 | ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3); |
| 1768 | // if high bit of immhi after right-shifting set, sign extend |
| 1769 | if (adrp_imm & (1ULL << 20)) |
| 1770 | adrp_imm |= ~((1ULL << 21) - 1); |
| 1771 | |
| 1772 | addxri_inst = value; |
| 1773 | addxri_imm = (addxri_inst >> 10) & 0xfff; |
| 1774 | // check if 'sh' bit is set, shift imm value up if so |
| 1775 | // (this would make no sense, ADRP already gave us this part) |
| 1776 | if ((addxri_inst >> (12 + 5 + 5)) & 1) |
| 1777 | addxri_imm <<= 12; |
| 1778 | value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) + |
| 1779 | addxri_imm; |
| 1780 | } |
| 1781 | m_adrp_address = LLDB_INVALID_ADDRESS; |
| 1782 | m_adrp_insn.reset(); |
| 1783 | } |
| 1784 | |
| 1785 | if (m_inst->UsingFileAddress()) { |
| 1786 | ModuleSP module_sp(m_inst->GetAddress().GetModule()); |
| 1787 | if (module_sp) { |
| 1788 | module_sp->ResolveFileAddress(vm_addr: value, so_addr&: value_so_addr); |
| 1789 | module_sp->ResolveFileAddress(vm_addr: pc, so_addr&: pc_so_addr); |
| 1790 | } |
| 1791 | } else if (target && target->HasLoadedSections()) { |
| 1792 | target->ResolveLoadAddress(load_addr: value, so_addr&: value_so_addr); |
| 1793 | target->ResolveLoadAddress(load_addr: pc, so_addr&: pc_so_addr); |
| 1794 | } |
| 1795 | |
| 1796 | SymbolContext sym_ctx; |
| 1797 | const SymbolContextItem resolve_scope = |
| 1798 | eSymbolContextFunction | eSymbolContextSymbol; |
| 1799 | if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { |
| 1800 | pc_so_addr.GetModule()->ResolveSymbolContextForAddress( |
| 1801 | so_addr: pc_so_addr, resolve_scope, sc&: sym_ctx); |
| 1802 | } |
| 1803 | |
| 1804 | if (value_so_addr.IsValid() && value_so_addr.GetSection()) { |
| 1805 | StreamString ss; |
| 1806 | |
| 1807 | bool format_omitting_current_func_name = false; |
| 1808 | if (sym_ctx.symbol || sym_ctx.function) { |
| 1809 | AddressRange range; |
| 1810 | for (uint32_t idx = 0; |
| 1811 | sym_ctx.GetAddressRange(scope: resolve_scope, range_idx: idx, use_inline_block_range: false, range); |
| 1812 | ++idx) { |
| 1813 | if (range.ContainsLoadAddress(so_addr: value_so_addr, target)) { |
| 1814 | format_omitting_current_func_name = true; |
| 1815 | break; |
| 1816 | } |
| 1817 | } |
| 1818 | } |
| 1819 | |
| 1820 | // If the "value" address (the target address we're symbolicating) is |
| 1821 | // inside the same SymbolContext as the current instruction pc |
| 1822 | // (pc_so_addr), don't print the full function name - just print it |
| 1823 | // with DumpStyleNoFunctionName style, e.g. "<+36>". |
| 1824 | if (format_omitting_current_func_name) { |
| 1825 | value_so_addr.Dump(s: &ss, exe_scope: target, style: Address::DumpStyleNoFunctionName, |
| 1826 | fallback_style: Address::DumpStyleSectionNameOffset); |
| 1827 | } else { |
| 1828 | value_so_addr.Dump( |
| 1829 | s: &ss, exe_scope: target, |
| 1830 | style: Address::DumpStyleResolvedDescriptionNoFunctionArguments, |
| 1831 | fallback_style: Address::DumpStyleSectionNameOffset); |
| 1832 | } |
| 1833 | |
| 1834 | if (!ss.GetString().empty()) { |
| 1835 | // If Address::Dump returned a multi-line description, most commonly |
| 1836 | // seen when we have multiple levels of inlined functions at an |
| 1837 | // address, only show the first line. |
| 1838 | std::string str = std::string(ss.GetString()); |
| 1839 | size_t first_eol_char = str.find_first_of(s: "\r\n" ); |
| 1840 | if (first_eol_char != std::string::npos) { |
| 1841 | str.erase(pos: first_eol_char); |
| 1842 | } |
| 1843 | m_inst->AppendComment(description&: str); |
| 1844 | } |
| 1845 | } |
| 1846 | } |
| 1847 | } |
| 1848 | |
| 1849 | // TODO: llvm-objdump sets the type_ptr to the |
| 1850 | // LLVMDisassembler_ReferenceType_Out_* values |
| 1851 | // based on where value_so_addr is pointing, with |
| 1852 | // Mach-O specific augmentations in MachODump.cpp. e.g. |
| 1853 | // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand |
| 1854 | // handles. |
| 1855 | *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; |
| 1856 | *name = nullptr; |
| 1857 | return nullptr; |
| 1858 | } |
| 1859 | |