1//===-- DisassemblerLLVMC.cpp ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "DisassemblerLLVMC.h"
10
11#include "llvm-c/Disassembler.h"
12#include "llvm/ADT/SmallString.h"
13#include "llvm/ADT/StringExtras.h"
14#include "llvm/MC/MCAsmInfo.h"
15#include "llvm/MC/MCContext.h"
16#include "llvm/MC/MCDisassembler/MCDisassembler.h"
17#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
18#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstPrinter.h"
21#include "llvm/MC/MCInstrAnalysis.h"
22#include "llvm/MC/MCInstrInfo.h"
23#include "llvm/MC/MCRegisterInfo.h"
24#include "llvm/MC/MCSubtargetInfo.h"
25#include "llvm/MC/MCTargetOptions.h"
26#include "llvm/MC/TargetRegistry.h"
27#include "llvm/Support/ErrorHandling.h"
28#include "llvm/Support/ScopedPrinter.h"
29#include "llvm/Support/TargetSelect.h"
30#include "llvm/TargetParser/AArch64TargetParser.h"
31
32#include "lldb/Core/Address.h"
33#include "lldb/Core/Module.h"
34#include "lldb/Symbol/Function.h"
35#include "lldb/Symbol/SymbolContext.h"
36#include "lldb/Target/ExecutionContext.h"
37#include "lldb/Target/Process.h"
38#include "lldb/Target/RegisterContext.h"
39#include "lldb/Target/SectionLoadList.h"
40#include "lldb/Target/StackFrame.h"
41#include "lldb/Target/Target.h"
42#include "lldb/Utility/DataExtractor.h"
43#include "lldb/Utility/LLDBLog.h"
44#include "lldb/Utility/Log.h"
45#include "lldb/Utility/RegularExpression.h"
46#include "lldb/Utility/Stream.h"
47#include <optional>
48
49using namespace lldb;
50using namespace lldb_private;
51
52LLDB_PLUGIN_DEFINE(DisassemblerLLVMC)
53
54class DisassemblerLLVMC::MCDisasmInstance {
55public:
56 static std::unique_ptr<MCDisasmInstance>
57 Create(const char *triple, const char *cpu, const char *features_str,
58 unsigned flavor, DisassemblerLLVMC &owner);
59
60 ~MCDisasmInstance() = default;
61
62 uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
63 lldb::addr_t pc, llvm::MCInst &mc_inst) const;
64 void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc,
65 std::string &inst_string, std::string &comments_string);
66 void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
67 void SetUseColor(bool use_color);
68 bool GetUseColor() const;
69 bool CanBranch(llvm::MCInst &mc_inst) const;
70 bool HasDelaySlot(llvm::MCInst &mc_inst) const;
71 bool IsCall(llvm::MCInst &mc_inst) const;
72 bool IsLoad(llvm::MCInst &mc_inst) const;
73 bool IsAuthenticated(llvm::MCInst &mc_inst) const;
74
75private:
76 MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
77 std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
78 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
79 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
80 std::unique_ptr<llvm::MCContext> &&context_up,
81 std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
82 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up,
83 std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up);
84
85 std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;
86 std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;
87 std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;
88 std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;
89 std::unique_ptr<llvm::MCContext> m_context_up;
90 std::unique_ptr<llvm::MCDisassembler> m_disasm_up;
91 std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
92 std::unique_ptr<llvm::MCInstrAnalysis> m_instr_analysis_up;
93};
94
95namespace x86 {
96
97/// These are the three values deciding instruction control flow kind.
98/// InstructionLengthDecode function decodes an instruction and get this struct.
99///
100/// primary_opcode
101/// Primary opcode of the instruction.
102/// For one-byte opcode instruction, it's the first byte after prefix.
103/// For two- and three-byte opcodes, it's the second byte.
104///
105/// opcode_len
106/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
107///
108/// modrm
109/// ModR/M byte of the instruction.
110/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
111/// may contain a register or specify an addressing mode, depending on MOD.
112struct InstructionOpcodeAndModrm {
113 uint8_t primary_opcode;
114 uint8_t opcode_len;
115 uint8_t modrm;
116};
117
118/// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
119/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
120/// instruction set.
121///
122/// \param[in] opcode_and_modrm
123/// Contains primary_opcode byte, its length, and ModR/M byte.
124/// Refer to the struct InstructionOpcodeAndModrm for details.
125///
126/// \return
127/// The control flow kind of the instruction or
128/// eInstructionControlFlowKindOther if the instruction doesn't affect
129/// the control flow of the program.
130lldb::InstructionControlFlowKind
131MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
132 uint8_t opcode = opcode_and_modrm.primary_opcode;
133 uint8_t opcode_len = opcode_and_modrm.opcode_len;
134 uint8_t modrm = opcode_and_modrm.modrm;
135
136 if (opcode_len > 2)
137 return lldb::eInstructionControlFlowKindOther;
138
139 if (opcode >= 0x70 && opcode <= 0x7F) {
140 if (opcode_len == 1)
141 return lldb::eInstructionControlFlowKindCondJump;
142 else
143 return lldb::eInstructionControlFlowKindOther;
144 }
145
146 if (opcode >= 0x80 && opcode <= 0x8F) {
147 if (opcode_len == 2)
148 return lldb::eInstructionControlFlowKindCondJump;
149 else
150 return lldb::eInstructionControlFlowKindOther;
151 }
152
153 switch (opcode) {
154 case 0x9A:
155 if (opcode_len == 1)
156 return lldb::eInstructionControlFlowKindFarCall;
157 break;
158 case 0xFF:
159 if (opcode_len == 1) {
160 uint8_t modrm_reg = (modrm >> 3) & 7;
161 if (modrm_reg == 2)
162 return lldb::eInstructionControlFlowKindCall;
163 else if (modrm_reg == 3)
164 return lldb::eInstructionControlFlowKindFarCall;
165 else if (modrm_reg == 4)
166 return lldb::eInstructionControlFlowKindJump;
167 else if (modrm_reg == 5)
168 return lldb::eInstructionControlFlowKindFarJump;
169 }
170 break;
171 case 0xE8:
172 if (opcode_len == 1)
173 return lldb::eInstructionControlFlowKindCall;
174 break;
175 case 0xCD:
176 case 0xCC:
177 case 0xCE:
178 case 0xF1:
179 if (opcode_len == 1)
180 return lldb::eInstructionControlFlowKindFarCall;
181 break;
182 case 0xCF:
183 if (opcode_len == 1)
184 return lldb::eInstructionControlFlowKindFarReturn;
185 break;
186 case 0xE9:
187 case 0xEB:
188 if (opcode_len == 1)
189 return lldb::eInstructionControlFlowKindJump;
190 break;
191 case 0xEA:
192 if (opcode_len == 1)
193 return lldb::eInstructionControlFlowKindFarJump;
194 break;
195 case 0xE3:
196 case 0xE0:
197 case 0xE1:
198 case 0xE2:
199 if (opcode_len == 1)
200 return lldb::eInstructionControlFlowKindCondJump;
201 break;
202 case 0xC3:
203 case 0xC2:
204 if (opcode_len == 1)
205 return lldb::eInstructionControlFlowKindReturn;
206 break;
207 case 0xCB:
208 case 0xCA:
209 if (opcode_len == 1)
210 return lldb::eInstructionControlFlowKindFarReturn;
211 break;
212 case 0x05:
213 case 0x34:
214 if (opcode_len == 2)
215 return lldb::eInstructionControlFlowKindFarCall;
216 break;
217 case 0x35:
218 case 0x07:
219 if (opcode_len == 2)
220 return lldb::eInstructionControlFlowKindFarReturn;
221 break;
222 case 0x01:
223 if (opcode_len == 2) {
224 switch (modrm) {
225 case 0xc1:
226 return lldb::eInstructionControlFlowKindFarCall;
227 case 0xc2:
228 case 0xc3:
229 return lldb::eInstructionControlFlowKindFarReturn;
230 default:
231 break;
232 }
233 }
234 break;
235 default:
236 break;
237 }
238
239 return lldb::eInstructionControlFlowKindOther;
240}
241
242/// Decode an instruction into opcode, modrm and opcode_len.
243/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
244/// Opcodes in x86 are generally the first byte of instruction, though two-byte
245/// instructions and prefixes exist. ModR/M is the byte following the opcode
246/// and adds additional information for how the instruction is executed.
247///
248/// \param[in] inst_bytes
249/// Raw bytes of the instruction
250///
251///
252/// \param[in] bytes_len
253/// The length of the inst_bytes array.
254///
255/// \param[in] is_exec_mode_64b
256/// If true, the execution mode is 64 bit.
257///
258/// \return
259/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
260/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
261/// for more details.
262/// Otherwise if the given instruction is invalid, returns std::nullopt.
263std::optional<InstructionOpcodeAndModrm>
264InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
265 bool is_exec_mode_64b) {
266 int op_idx = 0;
267 bool prefix_done = false;
268 InstructionOpcodeAndModrm ret = {.primary_opcode: 0, .opcode_len: 0, .modrm: 0};
269
270 // In most cases, the primary_opcode is the first byte of the instruction
271 // but some instructions have a prefix to be skipped for these calculations.
272 // The following mapping is inspired from libipt's instruction decoding logic
273 // in `src/pt_ild.c`
274 while (!prefix_done) {
275 if (op_idx >= bytes_len)
276 return std::nullopt;
277
278 ret.primary_opcode = inst_bytes[op_idx];
279 switch (ret.primary_opcode) {
280 // prefix_ignore
281 case 0x26:
282 case 0x2e:
283 case 0x36:
284 case 0x3e:
285 case 0x64:
286 case 0x65:
287 // prefix_osz, prefix_asz
288 case 0x66:
289 case 0x67:
290 // prefix_lock, prefix_f2, prefix_f3
291 case 0xf0:
292 case 0xf2:
293 case 0xf3:
294 op_idx++;
295 break;
296
297 // prefix_rex
298 case 0x40:
299 case 0x41:
300 case 0x42:
301 case 0x43:
302 case 0x44:
303 case 0x45:
304 case 0x46:
305 case 0x47:
306 case 0x48:
307 case 0x49:
308 case 0x4a:
309 case 0x4b:
310 case 0x4c:
311 case 0x4d:
312 case 0x4e:
313 case 0x4f:
314 if (is_exec_mode_64b)
315 op_idx++;
316 else
317 prefix_done = true;
318 break;
319
320 // prefix_vex_c4, c5
321 case 0xc5:
322 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
323 prefix_done = true;
324 break;
325 }
326
327 ret.opcode_len = 2;
328 ret.primary_opcode = inst_bytes[op_idx + 2];
329 ret.modrm = inst_bytes[op_idx + 3];
330 return ret;
331
332 case 0xc4:
333 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
334 prefix_done = true;
335 break;
336 }
337 ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
338 ret.primary_opcode = inst_bytes[op_idx + 3];
339 ret.modrm = inst_bytes[op_idx + 4];
340 return ret;
341
342 // prefix_evex
343 case 0x62:
344 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
345 prefix_done = true;
346 break;
347 }
348 ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
349 ret.primary_opcode = inst_bytes[op_idx + 4];
350 ret.modrm = inst_bytes[op_idx + 5];
351 return ret;
352
353 default:
354 prefix_done = true;
355 break;
356 }
357 } // prefix done
358
359 ret.primary_opcode = inst_bytes[op_idx];
360 ret.modrm = inst_bytes[op_idx + 1];
361 ret.opcode_len = 1;
362
363 // If the first opcode is 0F, it's two- or three- byte opcodes.
364 if (ret.primary_opcode == 0x0F) {
365 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
366
367 if (ret.primary_opcode == 0x38) {
368 ret.opcode_len = 3;
369 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
370 ret.modrm = inst_bytes[op_idx + 1];
371 } else if (ret.primary_opcode == 0x3A) {
372 ret.opcode_len = 3;
373 ret.primary_opcode = inst_bytes[++op_idx];
374 ret.modrm = inst_bytes[op_idx + 1];
375 } else if ((ret.primary_opcode & 0xf8) == 0x38) {
376 ret.opcode_len = 0;
377 ret.primary_opcode = inst_bytes[++op_idx];
378 ret.modrm = inst_bytes[op_idx + 1];
379 } else if (ret.primary_opcode == 0x0F) {
380 ret.opcode_len = 3;
381 // opcode is 0x0F, no needs to update
382 ret.modrm = inst_bytes[op_idx + 1];
383 } else {
384 ret.opcode_len = 2;
385 ret.modrm = inst_bytes[op_idx + 1];
386 }
387 }
388
389 return ret;
390}
391
392lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
393 Opcode m_opcode) {
394 std::optional<InstructionOpcodeAndModrm> ret;
395
396 if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
397 // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
398 return lldb::eInstructionControlFlowKindUnknown;
399 }
400
401 // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
402 // These are the three values deciding instruction control flow kind.
403 ret = InstructionLengthDecode(inst_bytes: (const uint8_t *)m_opcode.GetOpcodeBytes(),
404 bytes_len: m_opcode.GetByteSize(), is_exec_mode_64b);
405 if (!ret)
406 return lldb::eInstructionControlFlowKindUnknown;
407 else
408 return MapOpcodeIntoControlFlowKind(opcode_and_modrm: *ret);
409}
410
411} // namespace x86
412
413class InstructionLLVMC : public lldb_private::Instruction {
414public:
415 InstructionLLVMC(DisassemblerLLVMC &disasm,
416 const lldb_private::Address &address,
417 AddressClass addr_class)
418 : Instruction(address, addr_class),
419 m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>(
420 r: disasm.shared_from_this())) {}
421
422 ~InstructionLLVMC() override = default;
423
424 bool DoesBranch() override {
425 VisitInstruction();
426 return m_does_branch;
427 }
428
429 bool HasDelaySlot() override {
430 VisitInstruction();
431 return m_has_delay_slot;
432 }
433
434 bool IsLoad() override {
435 VisitInstruction();
436 return m_is_load;
437 }
438
439 bool IsAuthenticated() override {
440 VisitInstruction();
441 return m_is_authenticated;
442 }
443
444 DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) {
445 DisassemblerScope disasm(*this);
446 return GetDisasmToUse(is_alternate_isa, disasm);
447 }
448
449 size_t Decode(const lldb_private::Disassembler &disassembler,
450 const lldb_private::DataExtractor &data,
451 lldb::offset_t data_offset) override {
452 // All we have to do is read the opcode which can be easy for some
453 // architectures
454 bool got_op = false;
455 DisassemblerScope disasm(*this);
456 if (disasm) {
457 const ArchSpec &arch = disasm->GetArchitecture();
458 const lldb::ByteOrder byte_order = data.GetByteOrder();
459
460 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
461 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
462 if (min_op_byte_size == max_op_byte_size) {
463 // Fixed size instructions, just read that amount of data.
464 if (!data.ValidOffsetForDataOfSize(offset: data_offset, length: min_op_byte_size))
465 return false;
466
467 switch (min_op_byte_size) {
468 case 1:
469 m_opcode.SetOpcode8(inst: data.GetU8(offset_ptr: &data_offset), order: byte_order);
470 got_op = true;
471 break;
472
473 case 2:
474 m_opcode.SetOpcode16(inst: data.GetU16(offset_ptr: &data_offset), order: byte_order);
475 got_op = true;
476 break;
477
478 case 4:
479 m_opcode.SetOpcode32(inst: data.GetU32(offset_ptr: &data_offset), order: byte_order);
480 got_op = true;
481 break;
482
483 case 8:
484 m_opcode.SetOpcode64(inst: data.GetU64(offset_ptr: &data_offset), order: byte_order);
485 got_op = true;
486 break;
487
488 default:
489 m_opcode.SetOpcodeBytes(bytes: data.PeekData(offset: data_offset, length: min_op_byte_size),
490 length: min_op_byte_size);
491 got_op = true;
492 break;
493 }
494 }
495 if (!got_op) {
496 bool is_alternate_isa = false;
497 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
498 GetDisasmToUse(is_alternate_isa, disasm);
499
500 const llvm::Triple::ArchType machine = arch.GetMachine();
501 if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {
502 if (machine == llvm::Triple::thumb || is_alternate_isa) {
503 uint32_t thumb_opcode = data.GetU16(offset_ptr: &data_offset);
504 if ((thumb_opcode & 0xe000) != 0xe000 ||
505 ((thumb_opcode & 0x1800u) == 0)) {
506 m_opcode.SetOpcode16(inst: thumb_opcode, order: byte_order);
507 m_is_valid = true;
508 } else {
509 thumb_opcode <<= 16;
510 thumb_opcode |= data.GetU16(offset_ptr: &data_offset);
511 m_opcode.SetOpcode16_2(inst: thumb_opcode, order: byte_order);
512 m_is_valid = true;
513 }
514 } else {
515 m_opcode.SetOpcode32(inst: data.GetU32(offset_ptr: &data_offset), order: byte_order);
516 m_is_valid = true;
517 }
518 } else {
519 // The opcode isn't evenly sized, so we need to actually use the llvm
520 // disassembler to parse it and get the size.
521 uint8_t *opcode_data =
522 const_cast<uint8_t *>(data.PeekData(offset: data_offset, length: 1));
523 const size_t opcode_data_len = data.BytesLeft(offset: data_offset);
524 const addr_t pc = m_address.GetFileAddress();
525 llvm::MCInst inst;
526
527 const size_t inst_size =
528 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst);
529 if (inst_size == 0)
530 m_opcode.Clear();
531 else {
532 m_opcode.SetOpcodeBytes(bytes: opcode_data, length: inst_size);
533 m_is_valid = true;
534 }
535 }
536 }
537 return m_opcode.GetByteSize();
538 }
539 return 0;
540 }
541
542 void AppendComment(std::string &description) {
543 if (m_comment.empty())
544 m_comment.swap(s&: description);
545 else {
546 m_comment.append(s: ", ");
547 m_comment.append(str: description);
548 }
549 }
550
551 lldb::InstructionControlFlowKind
552 GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override {
553 DisassemblerScope disasm(*this, exe_ctx);
554 if (disasm){
555 if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86)
556 return x86::GetControlFlowKind(/*is_64b=*/is_exec_mode_64b: false, m_opcode);
557 else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64)
558 return x86::GetControlFlowKind(/*is_64b=*/is_exec_mode_64b: true, m_opcode);
559 }
560
561 return eInstructionControlFlowKindUnknown;
562 }
563
564 void CalculateMnemonicOperandsAndComment(
565 const lldb_private::ExecutionContext *exe_ctx) override {
566 DataExtractor data;
567 const AddressClass address_class = GetAddressClass();
568
569 if (m_opcode.GetData(data)) {
570 std::string out_string;
571 std::string markup_out_string;
572 std::string comment_string;
573 std::string markup_comment_string;
574
575 DisassemblerScope disasm(*this, exe_ctx);
576 if (disasm) {
577 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr;
578
579 if (address_class == AddressClass::eCodeAlternateISA)
580 mc_disasm_ptr = disasm->m_alternate_disasm_up.get();
581 else
582 mc_disasm_ptr = disasm->m_disasm_up.get();
583
584 lldb::addr_t pc = m_address.GetFileAddress();
585 m_using_file_addr = true;
586
587 bool use_hex_immediates = true;
588 Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
589
590 if (exe_ctx) {
591 Target *target = exe_ctx->GetTargetPtr();
592 if (target) {
593 use_hex_immediates = target->GetUseHexImmediates();
594 hex_style = target->GetHexImmediateStyle();
595
596 const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
597 if (load_addr != LLDB_INVALID_ADDRESS) {
598 pc = load_addr;
599 m_using_file_addr = false;
600 }
601 }
602 }
603
604 const uint8_t *opcode_data = data.GetDataStart();
605 const size_t opcode_data_len = data.GetByteSize();
606 llvm::MCInst inst;
607 size_t inst_size =
608 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst);
609
610 if (inst_size > 0) {
611 mc_disasm_ptr->SetStyle(use_hex_immed: use_hex_immediates, hex_style);
612
613 const bool saved_use_color = mc_disasm_ptr->GetUseColor();
614 mc_disasm_ptr->SetUseColor(false);
615 mc_disasm_ptr->PrintMCInst(mc_inst&: inst, pc, inst_string&: out_string, comments_string&: comment_string);
616 mc_disasm_ptr->SetUseColor(true);
617 mc_disasm_ptr->PrintMCInst(mc_inst&: inst, pc, inst_string&: markup_out_string,
618 comments_string&: markup_comment_string);
619 mc_disasm_ptr->SetUseColor(saved_use_color);
620
621 if (!comment_string.empty()) {
622 AppendComment(description&: comment_string);
623 }
624 }
625
626 if (inst_size == 0) {
627 m_comment.assign(s: "unknown opcode");
628 inst_size = m_opcode.GetByteSize();
629 StreamString mnemonic_strm;
630 lldb::offset_t offset = 0;
631 lldb::ByteOrder byte_order = data.GetByteOrder();
632 switch (inst_size) {
633 case 1: {
634 const uint8_t uval8 = data.GetU8(offset_ptr: &offset);
635 m_opcode.SetOpcode8(inst: uval8, order: byte_order);
636 m_opcode_name.assign(s: ".byte");
637 mnemonic_strm.Printf(format: "0x%2.2x", uval8);
638 } break;
639 case 2: {
640 const uint16_t uval16 = data.GetU16(offset_ptr: &offset);
641 m_opcode.SetOpcode16(inst: uval16, order: byte_order);
642 m_opcode_name.assign(s: ".short");
643 mnemonic_strm.Printf(format: "0x%4.4x", uval16);
644 } break;
645 case 4: {
646 const uint32_t uval32 = data.GetU32(offset_ptr: &offset);
647 m_opcode.SetOpcode32(inst: uval32, order: byte_order);
648 m_opcode_name.assign(s: ".long");
649 mnemonic_strm.Printf(format: "0x%8.8x", uval32);
650 } break;
651 case 8: {
652 const uint64_t uval64 = data.GetU64(offset_ptr: &offset);
653 m_opcode.SetOpcode64(inst: uval64, order: byte_order);
654 m_opcode_name.assign(s: ".quad");
655 mnemonic_strm.Printf(format: "0x%16.16" PRIx64, uval64);
656 } break;
657 default:
658 if (inst_size == 0)
659 return;
660 else {
661 const uint8_t *bytes = data.PeekData(offset, length: inst_size);
662 if (bytes == nullptr)
663 return;
664 m_opcode_name.assign(s: ".byte");
665 m_opcode.SetOpcodeBytes(bytes, length: inst_size);
666 mnemonic_strm.Printf(format: "0x%2.2x", bytes[0]);
667 for (uint32_t i = 1; i < inst_size; ++i)
668 mnemonic_strm.Printf(format: " 0x%2.2x", bytes[i]);
669 }
670 break;
671 }
672 m_mnemonics = std::string(mnemonic_strm.GetString());
673 return;
674 }
675
676 static RegularExpression s_regex(
677 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
678
679 llvm::SmallVector<llvm::StringRef, 4> matches;
680 if (s_regex.Execute(string: out_string, matches: &matches)) {
681 m_opcode_name = matches[1].str();
682 m_mnemonics = matches[2].str();
683 }
684 matches.clear();
685 if (s_regex.Execute(string: markup_out_string, matches: &matches)) {
686 m_markup_opcode_name = matches[1].str();
687 m_markup_mnemonics = matches[2].str();
688 }
689 }
690 }
691 }
692
693 bool IsValid() const { return m_is_valid; }
694
695 bool UsingFileAddress() const { return m_using_file_addr; }
696 size_t GetByteSize() const { return m_opcode.GetByteSize(); }
697
698 /// Grants exclusive access to the disassembler and initializes it with the
699 /// given InstructionLLVMC and an optional ExecutionContext.
700 class DisassemblerScope {
701 std::shared_ptr<DisassemblerLLVMC> m_disasm;
702
703 public:
704 explicit DisassemblerScope(
705 InstructionLLVMC &i,
706 const lldb_private::ExecutionContext *exe_ctx = nullptr)
707 : m_disasm(i.m_disasm_wp.lock()) {
708 m_disasm->m_mutex.lock();
709 m_disasm->m_inst = &i;
710 m_disasm->m_exe_ctx = exe_ctx;
711 }
712 ~DisassemblerScope() { m_disasm->m_mutex.unlock(); }
713
714 /// Evaluates to true if this scope contains a valid disassembler.
715 operator bool() const { return static_cast<bool>(m_disasm); }
716
717 std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; }
718 };
719
720 static llvm::StringRef::const_iterator
721 ConsumeWhitespace(llvm::StringRef::const_iterator osi,
722 llvm::StringRef::const_iterator ose) {
723 while (osi != ose) {
724 switch (*osi) {
725 default:
726 return osi;
727 case ' ':
728 case '\t':
729 break;
730 }
731 ++osi;
732 }
733
734 return osi;
735 }
736
737 static std::pair<bool, llvm::StringRef::const_iterator>
738 ConsumeChar(llvm::StringRef::const_iterator osi, const char c,
739 llvm::StringRef::const_iterator ose) {
740 bool found = false;
741
742 osi = ConsumeWhitespace(osi, ose);
743 if (osi != ose && *osi == c) {
744 found = true;
745 ++osi;
746 }
747
748 return std::make_pair(x&: found, y&: osi);
749 }
750
751 static std::pair<Operand, llvm::StringRef::const_iterator>
752 ParseRegisterName(llvm::StringRef::const_iterator osi,
753 llvm::StringRef::const_iterator ose) {
754 Operand ret;
755 ret.m_type = Operand::Type::Register;
756 std::string str;
757
758 osi = ConsumeWhitespace(osi, ose);
759
760 while (osi != ose) {
761 if (*osi >= '0' && *osi <= '9') {
762 if (str.empty()) {
763 return std::make_pair(x: Operand(), y&: osi);
764 } else {
765 str.push_back(c: *osi);
766 }
767 } else if (*osi >= 'a' && *osi <= 'z') {
768 str.push_back(c: *osi);
769 } else {
770 switch (*osi) {
771 default:
772 if (str.empty()) {
773 return std::make_pair(x: Operand(), y&: osi);
774 } else {
775 ret.m_register = ConstString(str);
776 return std::make_pair(x&: ret, y&: osi);
777 }
778 case '%':
779 if (!str.empty()) {
780 return std::make_pair(x: Operand(), y&: osi);
781 }
782 break;
783 }
784 }
785 ++osi;
786 }
787
788 ret.m_register = ConstString(str);
789 return std::make_pair(x&: ret, y&: osi);
790 }
791
792 static std::pair<Operand, llvm::StringRef::const_iterator>
793 ParseImmediate(llvm::StringRef::const_iterator osi,
794 llvm::StringRef::const_iterator ose) {
795 Operand ret;
796 ret.m_type = Operand::Type::Immediate;
797 std::string str;
798 bool is_hex = false;
799
800 osi = ConsumeWhitespace(osi, ose);
801
802 while (osi != ose) {
803 if (*osi >= '0' && *osi <= '9') {
804 str.push_back(c: *osi);
805 } else if (*osi >= 'a' && *osi <= 'f') {
806 if (is_hex) {
807 str.push_back(c: *osi);
808 } else {
809 return std::make_pair(x: Operand(), y&: osi);
810 }
811 } else {
812 switch (*osi) {
813 default:
814 if (str.empty()) {
815 return std::make_pair(x: Operand(), y&: osi);
816 } else {
817 ret.m_immediate = strtoull(nptr: str.c_str(), endptr: nullptr, base: 0);
818 return std::make_pair(x&: ret, y&: osi);
819 }
820 case 'x':
821 if (str == "0") {
822 is_hex = true;
823 str.push_back(c: *osi);
824 } else {
825 return std::make_pair(x: Operand(), y&: osi);
826 }
827 break;
828 case '#':
829 case '$':
830 if (!str.empty()) {
831 return std::make_pair(x: Operand(), y&: osi);
832 }
833 break;
834 case '-':
835 if (str.empty()) {
836 ret.m_negative = true;
837 } else {
838 return std::make_pair(x: Operand(), y&: osi);
839 }
840 }
841 }
842 ++osi;
843 }
844
845 ret.m_immediate = strtoull(nptr: str.c_str(), endptr: nullptr, base: 0);
846 return std::make_pair(x&: ret, y&: osi);
847 }
848
849 // -0x5(%rax,%rax,2)
850 static std::pair<Operand, llvm::StringRef::const_iterator>
851 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,
852 llvm::StringRef::const_iterator ose) {
853 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
854 ParseImmediate(osi, ose);
855 if (offset_and_iterator.first.IsValid()) {
856 osi = offset_and_iterator.second;
857 }
858
859 bool found = false;
860 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '(', ose);
861 if (!found) {
862 return std::make_pair(x: Operand(), y&: osi);
863 }
864
865 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
866 ParseRegisterName(osi, ose);
867 if (base_and_iterator.first.IsValid()) {
868 osi = base_and_iterator.second;
869 } else {
870 return std::make_pair(x: Operand(), y&: osi);
871 }
872
873 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose);
874 if (!found) {
875 return std::make_pair(x: Operand(), y&: osi);
876 }
877
878 std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =
879 ParseRegisterName(osi, ose);
880 if (index_and_iterator.first.IsValid()) {
881 osi = index_and_iterator.second;
882 } else {
883 return std::make_pair(x: Operand(), y&: osi);
884 }
885
886 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose);
887 if (!found) {
888 return std::make_pair(x: Operand(), y&: osi);
889 }
890
891 std::pair<Operand, llvm::StringRef::const_iterator>
892 multiplier_and_iterator = ParseImmediate(osi, ose);
893 if (index_and_iterator.first.IsValid()) {
894 osi = index_and_iterator.second;
895 } else {
896 return std::make_pair(x: Operand(), y&: osi);
897 }
898
899 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ')', ose);
900 if (!found) {
901 return std::make_pair(x: Operand(), y&: osi);
902 }
903
904 Operand product;
905 product.m_type = Operand::Type::Product;
906 product.m_children.push_back(x: index_and_iterator.first);
907 product.m_children.push_back(x: multiplier_and_iterator.first);
908
909 Operand index;
910 index.m_type = Operand::Type::Sum;
911 index.m_children.push_back(x: base_and_iterator.first);
912 index.m_children.push_back(x: product);
913
914 if (offset_and_iterator.first.IsValid()) {
915 Operand offset;
916 offset.m_type = Operand::Type::Sum;
917 offset.m_children.push_back(x: offset_and_iterator.first);
918 offset.m_children.push_back(x: index);
919
920 Operand deref;
921 deref.m_type = Operand::Type::Dereference;
922 deref.m_children.push_back(x: offset);
923 return std::make_pair(x&: deref, y&: osi);
924 } else {
925 Operand deref;
926 deref.m_type = Operand::Type::Dereference;
927 deref.m_children.push_back(x: index);
928 return std::make_pair(x&: deref, y&: osi);
929 }
930 }
931
932 // -0x10(%rbp)
933 static std::pair<Operand, llvm::StringRef::const_iterator>
934 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,
935 llvm::StringRef::const_iterator ose) {
936 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
937 ParseImmediate(osi, ose);
938 if (offset_and_iterator.first.IsValid()) {
939 osi = offset_and_iterator.second;
940 }
941
942 bool found = false;
943 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '(', ose);
944 if (!found) {
945 return std::make_pair(x: Operand(), y&: osi);
946 }
947
948 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
949 ParseRegisterName(osi, ose);
950 if (base_and_iterator.first.IsValid()) {
951 osi = base_and_iterator.second;
952 } else {
953 return std::make_pair(x: Operand(), y&: osi);
954 }
955
956 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ')', ose);
957 if (!found) {
958 return std::make_pair(x: Operand(), y&: osi);
959 }
960
961 if (offset_and_iterator.first.IsValid()) {
962 Operand offset;
963 offset.m_type = Operand::Type::Sum;
964 offset.m_children.push_back(x: offset_and_iterator.first);
965 offset.m_children.push_back(x: base_and_iterator.first);
966
967 Operand deref;
968 deref.m_type = Operand::Type::Dereference;
969 deref.m_children.push_back(x: offset);
970 return std::make_pair(x&: deref, y&: osi);
971 } else {
972 Operand deref;
973 deref.m_type = Operand::Type::Dereference;
974 deref.m_children.push_back(x: base_and_iterator.first);
975 return std::make_pair(x&: deref, y&: osi);
976 }
977 }
978
979 // [sp, #8]!
980 static std::pair<Operand, llvm::StringRef::const_iterator>
981 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,
982 llvm::StringRef::const_iterator ose) {
983 bool found = false;
984 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '[', ose);
985 if (!found) {
986 return std::make_pair(x: Operand(), y&: osi);
987 }
988
989 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
990 ParseRegisterName(osi, ose);
991 if (base_and_iterator.first.IsValid()) {
992 osi = base_and_iterator.second;
993 } else {
994 return std::make_pair(x: Operand(), y&: osi);
995 }
996
997 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ',', ose);
998 if (!found) {
999 return std::make_pair(x: Operand(), y&: osi);
1000 }
1001
1002 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
1003 ParseImmediate(osi, ose);
1004 if (offset_and_iterator.first.IsValid()) {
1005 osi = offset_and_iterator.second;
1006 }
1007
1008 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ']', ose);
1009 if (!found) {
1010 return std::make_pair(x: Operand(), y&: osi);
1011 }
1012
1013 Operand offset;
1014 offset.m_type = Operand::Type::Sum;
1015 offset.m_children.push_back(x: offset_and_iterator.first);
1016 offset.m_children.push_back(x: base_and_iterator.first);
1017
1018 Operand deref;
1019 deref.m_type = Operand::Type::Dereference;
1020 deref.m_children.push_back(x: offset);
1021 return std::make_pair(x&: deref, y&: osi);
1022 }
1023
1024 // [sp]
1025 static std::pair<Operand, llvm::StringRef::const_iterator>
1026 ParseARMDerefAccess(llvm::StringRef::const_iterator osi,
1027 llvm::StringRef::const_iterator ose) {
1028 bool found = false;
1029 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: '[', ose);
1030 if (!found) {
1031 return std::make_pair(x: Operand(), y&: osi);
1032 }
1033
1034 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
1035 ParseRegisterName(osi, ose);
1036 if (base_and_iterator.first.IsValid()) {
1037 osi = base_and_iterator.second;
1038 } else {
1039 return std::make_pair(x: Operand(), y&: osi);
1040 }
1041
1042 std::tie(args&: found, args&: osi) = ConsumeChar(osi, c: ']', ose);
1043 if (!found) {
1044 return std::make_pair(x: Operand(), y&: osi);
1045 }
1046
1047 Operand deref;
1048 deref.m_type = Operand::Type::Dereference;
1049 deref.m_children.push_back(x: base_and_iterator.first);
1050 return std::make_pair(x&: deref, y&: osi);
1051 }
1052
1053 static void DumpOperand(const Operand &op, Stream &s) {
1054 switch (op.m_type) {
1055 case Operand::Type::Dereference:
1056 s.PutCString(cstr: "*");
1057 DumpOperand(op: op.m_children[0], s);
1058 break;
1059 case Operand::Type::Immediate:
1060 if (op.m_negative) {
1061 s.PutCString(cstr: "-");
1062 }
1063 s.PutCString(cstr: llvm::to_string(Value: op.m_immediate));
1064 break;
1065 case Operand::Type::Invalid:
1066 s.PutCString(cstr: "Invalid");
1067 break;
1068 case Operand::Type::Product:
1069 s.PutCString(cstr: "(");
1070 DumpOperand(op: op.m_children[0], s);
1071 s.PutCString(cstr: "*");
1072 DumpOperand(op: op.m_children[1], s);
1073 s.PutCString(cstr: ")");
1074 break;
1075 case Operand::Type::Register:
1076 s.PutCString(cstr: op.m_register.GetStringRef());
1077 break;
1078 case Operand::Type::Sum:
1079 s.PutCString(cstr: "(");
1080 DumpOperand(op: op.m_children[0], s);
1081 s.PutCString(cstr: "+");
1082 DumpOperand(op: op.m_children[1], s);
1083 s.PutCString(cstr: ")");
1084 break;
1085 }
1086 }
1087
1088 bool ParseOperands(
1089 llvm::SmallVectorImpl<Instruction::Operand> &operands) override {
1090 const char *operands_string = GetOperands(exe_ctx: nullptr);
1091
1092 if (!operands_string) {
1093 return false;
1094 }
1095
1096 llvm::StringRef operands_ref(operands_string);
1097
1098 llvm::StringRef::const_iterator osi = operands_ref.begin();
1099 llvm::StringRef::const_iterator ose = operands_ref.end();
1100
1101 while (osi != ose) {
1102 Operand operand;
1103 llvm::StringRef::const_iterator iter;
1104
1105 if ((std::tie(args&: operand, args&: iter) = ParseIntelIndexedAccess(osi, ose),
1106 operand.IsValid()) ||
1107 (std::tie(args&: operand, args&: iter) = ParseIntelDerefAccess(osi, ose),
1108 operand.IsValid()) ||
1109 (std::tie(args&: operand, args&: iter) = ParseARMOffsetAccess(osi, ose),
1110 operand.IsValid()) ||
1111 (std::tie(args&: operand, args&: iter) = ParseARMDerefAccess(osi, ose),
1112 operand.IsValid()) ||
1113 (std::tie(args&: operand, args&: iter) = ParseRegisterName(osi, ose),
1114 operand.IsValid()) ||
1115 (std::tie(args&: operand, args&: iter) = ParseImmediate(osi, ose),
1116 operand.IsValid())) {
1117 osi = iter;
1118 operands.push_back(Elt: operand);
1119 } else {
1120 return false;
1121 }
1122
1123 std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =
1124 ConsumeChar(osi, c: ',', ose);
1125 if (found_and_iter.first) {
1126 osi = found_and_iter.second;
1127 }
1128
1129 osi = ConsumeWhitespace(osi, ose);
1130 }
1131
1132 DisassemblerSP disasm_sp = m_disasm_wp.lock();
1133
1134 if (disasm_sp && operands.size() > 1) {
1135 // TODO tie this into the MC Disassembler's notion of clobbers.
1136 switch (disasm_sp->GetArchitecture().GetMachine()) {
1137 default:
1138 break;
1139 case llvm::Triple::x86:
1140 case llvm::Triple::x86_64:
1141 operands[operands.size() - 1].m_clobbered = true;
1142 break;
1143 case llvm::Triple::arm:
1144 operands[0].m_clobbered = true;
1145 break;
1146 }
1147 }
1148
1149 if (Log *log = GetLog(mask: LLDBLog::Process)) {
1150 StreamString ss;
1151
1152 ss.Printf(format: "[%s] expands to %zu operands:\n", operands_string,
1153 operands.size());
1154 for (const Operand &operand : operands) {
1155 ss.PutCString(cstr: " ");
1156 DumpOperand(op: operand, s&: ss);
1157 ss.PutCString(cstr: "\n");
1158 }
1159
1160 log->PutString(str: ss.GetString());
1161 }
1162
1163 return true;
1164 }
1165
1166 bool IsCall() override {
1167 VisitInstruction();
1168 return m_is_call;
1169 }
1170
1171protected:
1172 std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;
1173
1174 bool m_is_valid = false;
1175 bool m_using_file_addr = false;
1176 bool m_has_visited_instruction = false;
1177
1178 // Be conservative. If we didn't understand the instruction, say it:
1179 // - Might branch
1180 // - Does not have a delay slot
1181 // - Is not a call
1182 // - Is not a load
1183 // - Is not an authenticated instruction
1184 bool m_does_branch = true;
1185 bool m_has_delay_slot = false;
1186 bool m_is_call = false;
1187 bool m_is_load = false;
1188 bool m_is_authenticated = false;
1189
1190 void VisitInstruction() {
1191 if (m_has_visited_instruction)
1192 return;
1193
1194 DisassemblerScope disasm(*this);
1195 if (!disasm)
1196 return;
1197
1198 DataExtractor data;
1199 if (!m_opcode.GetData(data))
1200 return;
1201
1202 bool is_alternate_isa;
1203 lldb::addr_t pc = m_address.GetFileAddress();
1204 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
1205 GetDisasmToUse(is_alternate_isa, disasm);
1206 const uint8_t *opcode_data = data.GetDataStart();
1207 const size_t opcode_data_len = data.GetByteSize();
1208 llvm::MCInst inst;
1209 const size_t inst_size =
1210 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, mc_inst&: inst);
1211 if (inst_size == 0)
1212 return;
1213
1214 m_has_visited_instruction = true;
1215 m_does_branch = mc_disasm_ptr->CanBranch(mc_inst&: inst);
1216 m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(mc_inst&: inst);
1217 m_is_call = mc_disasm_ptr->IsCall(mc_inst&: inst);
1218 m_is_load = mc_disasm_ptr->IsLoad(mc_inst&: inst);
1219 m_is_authenticated = mc_disasm_ptr->IsAuthenticated(mc_inst&: inst);
1220 }
1221
1222private:
1223 DisassemblerLLVMC::MCDisasmInstance *
1224 GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) {
1225 is_alternate_isa = false;
1226 if (disasm) {
1227 if (disasm->m_alternate_disasm_up) {
1228 const AddressClass address_class = GetAddressClass();
1229
1230 if (address_class == AddressClass::eCodeAlternateISA) {
1231 is_alternate_isa = true;
1232 return disasm->m_alternate_disasm_up.get();
1233 }
1234 }
1235 return disasm->m_disasm_up.get();
1236 }
1237 return nullptr;
1238 }
1239};
1240
1241std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>
1242DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
1243 const char *features_str,
1244 unsigned flavor,
1245 DisassemblerLLVMC &owner) {
1246 using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;
1247
1248 std::string Status;
1249 const llvm::Target *curr_target =
1250 llvm::TargetRegistry::lookupTarget(TripleStr: triple, Error&: Status);
1251 if (!curr_target)
1252 return Instance();
1253
1254 std::unique_ptr<llvm::MCInstrInfo> instr_info_up(
1255 curr_target->createMCInstrInfo());
1256 if (!instr_info_up)
1257 return Instance();
1258
1259 std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(
1260 curr_target->createMCRegInfo(TT: triple));
1261 if (!reg_info_up)
1262 return Instance();
1263
1264 std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(
1265 curr_target->createMCSubtargetInfo(TheTriple: triple, CPU: cpu, Features: features_str));
1266 if (!subtarget_info_up)
1267 return Instance();
1268
1269 llvm::MCTargetOptions MCOptions;
1270 std::unique_ptr<llvm::MCAsmInfo> asm_info_up(
1271 curr_target->createMCAsmInfo(MRI: *reg_info_up, TheTriple: triple, Options: MCOptions));
1272 if (!asm_info_up)
1273 return Instance();
1274
1275 std::unique_ptr<llvm::MCContext> context_up(
1276 new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(),
1277 reg_info_up.get(), subtarget_info_up.get()));
1278 if (!context_up)
1279 return Instance();
1280
1281 std::unique_ptr<llvm::MCDisassembler> disasm_up(
1282 curr_target->createMCDisassembler(STI: *subtarget_info_up, Ctx&: *context_up));
1283 if (!disasm_up)
1284 return Instance();
1285
1286 std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(
1287 curr_target->createMCRelocationInfo(TT: triple, Ctx&: *context_up));
1288 if (!rel_info_up)
1289 return Instance();
1290
1291 std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(
1292 curr_target->createMCSymbolizer(
1293 TT: triple, GetOpInfo: nullptr, SymbolLookUp: DisassemblerLLVMC::SymbolLookupCallback, DisInfo: &owner,
1294 Ctx: context_up.get(), RelInfo: std::move(rel_info_up)));
1295 disasm_up->setSymbolizer(std::move(symbolizer_up));
1296
1297 unsigned asm_printer_variant =
1298 flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;
1299
1300 std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(
1301 curr_target->createMCInstPrinter(T: llvm::Triple{triple},
1302 SyntaxVariant: asm_printer_variant, MAI: *asm_info_up,
1303 MII: *instr_info_up, MRI: *reg_info_up));
1304 if (!instr_printer_up)
1305 return Instance();
1306
1307 instr_printer_up->setPrintBranchImmAsAddress(true);
1308
1309 // Not all targets may have registered createMCInstrAnalysis().
1310 std::unique_ptr<llvm::MCInstrAnalysis> instr_analysis_up(
1311 curr_target->createMCInstrAnalysis(Info: instr_info_up.get()));
1312
1313 return Instance(new MCDisasmInstance(
1314 std::move(instr_info_up), std::move(reg_info_up),
1315 std::move(subtarget_info_up), std::move(asm_info_up),
1316 std::move(context_up), std::move(disasm_up), std::move(instr_printer_up),
1317 std::move(instr_analysis_up)));
1318}
1319
1320DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
1321 std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
1322 std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
1323 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
1324 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
1325 std::unique_ptr<llvm::MCContext> &&context_up,
1326 std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
1327 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up,
1328 std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up)
1329 : m_instr_info_up(std::move(instr_info_up)),
1330 m_reg_info_up(std::move(reg_info_up)),
1331 m_subtarget_info_up(std::move(subtarget_info_up)),
1332 m_asm_info_up(std::move(asm_info_up)),
1333 m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),
1334 m_instr_printer_up(std::move(instr_printer_up)),
1335 m_instr_analysis_up(std::move(instr_analysis_up)) {
1336 assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&
1337 m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
1338}
1339
1340uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
1341 const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
1342 llvm::MCInst &mc_inst) const {
1343 llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
1344 llvm::MCDisassembler::DecodeStatus status;
1345
1346 uint64_t new_inst_size;
1347 status = m_disasm_up->getInstruction(Instr&: mc_inst, Size&: new_inst_size, Bytes: data, Address: pc,
1348 CStream&: llvm::nulls());
1349 if (status == llvm::MCDisassembler::Success)
1350 return new_inst_size;
1351 else
1352 return 0;
1353}
1354
1355void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
1356 llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string,
1357 std::string &comments_string) {
1358 llvm::raw_string_ostream inst_stream(inst_string);
1359 llvm::raw_string_ostream comments_stream(comments_string);
1360
1361 inst_stream.enable_colors(enable: m_instr_printer_up->getUseColor());
1362 m_instr_printer_up->setCommentStream(comments_stream);
1363 m_instr_printer_up->printInst(MI: &mc_inst, Address: pc, Annot: llvm::StringRef(),
1364 STI: *m_subtarget_info_up, OS&: inst_stream);
1365 m_instr_printer_up->setCommentStream(llvm::nulls());
1366
1367 static std::string g_newlines("\r\n");
1368
1369 for (size_t newline_pos = 0;
1370 (newline_pos = comments_string.find_first_of(str: g_newlines, pos: newline_pos)) !=
1371 comments_string.npos;
1372 /**/) {
1373 comments_string.replace(i1: comments_string.begin() + newline_pos,
1374 i2: comments_string.begin() + newline_pos + 1, n: 1, c: ' ');
1375 }
1376}
1377
1378void DisassemblerLLVMC::MCDisasmInstance::SetStyle(
1379 bool use_hex_immed, HexImmediateStyle hex_style) {
1380 m_instr_printer_up->setPrintImmHex(use_hex_immed);
1381 switch (hex_style) {
1382 case eHexStyleC:
1383 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);
1384 break;
1385 case eHexStyleAsm:
1386 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);
1387 break;
1388 }
1389}
1390
1391void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color) {
1392 m_instr_printer_up->setUseColor(use_color);
1393}
1394
1395bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const {
1396 return m_instr_printer_up->getUseColor();
1397}
1398
1399bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(
1400 llvm::MCInst &mc_inst) const {
1401 if (m_instr_analysis_up)
1402 return m_instr_analysis_up->mayAffectControlFlow(Inst: mc_inst, MCRI: *m_reg_info_up);
1403 return m_instr_info_up->get(Opcode: mc_inst.getOpcode())
1404 .mayAffectControlFlow(MI: mc_inst, RI: *m_reg_info_up);
1405}
1406
1407bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(
1408 llvm::MCInst &mc_inst) const {
1409 return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).hasDelaySlot();
1410}
1411
1412bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
1413 if (m_instr_analysis_up)
1414 return m_instr_analysis_up->isCall(Inst: mc_inst);
1415 return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).isCall();
1416}
1417
1418bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const {
1419 return m_instr_info_up->get(Opcode: mc_inst.getOpcode()).mayLoad();
1420}
1421
1422bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated(
1423 llvm::MCInst &mc_inst) const {
1424 const auto &InstrDesc = m_instr_info_up->get(Opcode: mc_inst.getOpcode());
1425
1426 // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4
1427 // == 'a' + 'c') as authenticated instructions for reporting purposes, in
1428 // addition to the standard authenticated instructions specified in ARMv8.3.
1429 bool IsBrkC47x = false;
1430 if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) {
1431 const llvm::MCOperand &Op0 = mc_inst.getOperand(i: 0);
1432 if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474)
1433 IsBrkC47x = true;
1434 }
1435
1436 return InstrDesc.isAuthenticated() || IsBrkC47x;
1437}
1438
1439DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
1440 const char *flavor_string,
1441 const char *cpu_string,
1442 const char *features_string)
1443 : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
1444 m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
1445 m_adrp_insn() {
1446 if (!FlavorValidForArchSpec(arch, flavor: m_flavor.c_str())) {
1447 m_flavor.assign(s: "default");
1448 }
1449
1450 const bool cpu_or_features_overriden = cpu_string || features_string;
1451 unsigned flavor = ~0U;
1452 llvm::Triple triple = arch.GetTriple();
1453
1454 // So far the only supported flavor is "intel" on x86. The base class will
1455 // set this correctly coming in.
1456 if (triple.getArch() == llvm::Triple::x86 ||
1457 triple.getArch() == llvm::Triple::x86_64) {
1458 if (m_flavor == "intel") {
1459 flavor = 1;
1460 } else if (m_flavor == "att") {
1461 flavor = 0;
1462 }
1463 }
1464
1465 ArchSpec thumb_arch(arch);
1466 if (triple.getArch() == llvm::Triple::arm) {
1467 std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());
1468 // Replace "arm" with "thumb" so we get all thumb variants correct
1469 if (thumb_arch_name.size() > 3) {
1470 thumb_arch_name.erase(pos: 0, n: 3);
1471 thumb_arch_name.insert(pos: 0, s: "thumb");
1472 } else {
1473 thumb_arch_name = "thumbv9.3a";
1474 }
1475 thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));
1476 }
1477
1478 // If no sub architecture specified then use the most recent arm architecture
1479 // so the disassembler will return all instructions. Without it we will see a
1480 // lot of unknown opcodes if the code uses instructions which are not
1481 // available in the oldest arm version (which is used when no sub architecture
1482 // is specified).
1483 if (triple.getArch() == llvm::Triple::arm &&
1484 triple.getSubArch() == llvm::Triple::NoSubArch)
1485 triple.setArchName("armv9.3a");
1486
1487 std::string features_str =
1488 features_string ? std::string(features_string) : "";
1489 const char *triple_str = triple.getTriple().c_str();
1490
1491 // ARM Cortex M0-M7 devices only execute thumb instructions
1492 if (arch.IsAlwaysThumbInstructions()) {
1493 triple_str = thumb_arch.GetTriple().getTriple().c_str();
1494 if (!features_string)
1495 features_str += "+fp-armv8,";
1496 }
1497
1498 const char *cpu = cpu_string;
1499
1500 if (!cpu_or_features_overriden) {
1501 switch (arch.GetCore()) {
1502 case ArchSpec::eCore_mips32:
1503 case ArchSpec::eCore_mips32el:
1504 cpu = "mips32";
1505 break;
1506 case ArchSpec::eCore_mips32r2:
1507 case ArchSpec::eCore_mips32r2el:
1508 cpu = "mips32r2";
1509 break;
1510 case ArchSpec::eCore_mips32r3:
1511 case ArchSpec::eCore_mips32r3el:
1512 cpu = "mips32r3";
1513 break;
1514 case ArchSpec::eCore_mips32r5:
1515 case ArchSpec::eCore_mips32r5el:
1516 cpu = "mips32r5";
1517 break;
1518 case ArchSpec::eCore_mips32r6:
1519 case ArchSpec::eCore_mips32r6el:
1520 cpu = "mips32r6";
1521 break;
1522 case ArchSpec::eCore_mips64:
1523 case ArchSpec::eCore_mips64el:
1524 cpu = "mips64";
1525 break;
1526 case ArchSpec::eCore_mips64r2:
1527 case ArchSpec::eCore_mips64r2el:
1528 cpu = "mips64r2";
1529 break;
1530 case ArchSpec::eCore_mips64r3:
1531 case ArchSpec::eCore_mips64r3el:
1532 cpu = "mips64r3";
1533 break;
1534 case ArchSpec::eCore_mips64r5:
1535 case ArchSpec::eCore_mips64r5el:
1536 cpu = "mips64r5";
1537 break;
1538 case ArchSpec::eCore_mips64r6:
1539 case ArchSpec::eCore_mips64r6el:
1540 cpu = "mips64r6";
1541 break;
1542 default:
1543 cpu = "";
1544 break;
1545 }
1546 }
1547
1548 if (arch.IsMIPS() && !cpu_or_features_overriden) {
1549 uint32_t arch_flags = arch.GetFlags();
1550 if (arch_flags & ArchSpec::eMIPSAse_msa)
1551 features_str += "+msa,";
1552 if (arch_flags & ArchSpec::eMIPSAse_dsp)
1553 features_str += "+dsp,";
1554 if (arch_flags & ArchSpec::eMIPSAse_dspr2)
1555 features_str += "+dspr2,";
1556 }
1557
1558 // If any AArch64 variant, enable latest ISA with all extensions unless the
1559 // CPU or features were overridden.
1560 if (triple.isAArch64() && !cpu_or_features_overriden) {
1561 features_str += "+all,";
1562 if (triple.getVendor() == llvm::Triple::Apple)
1563 cpu = "apple-latest";
1564 }
1565
1566 if (triple.isRISCV() && !cpu_or_features_overriden) {
1567 uint32_t arch_flags = arch.GetFlags();
1568 if (arch_flags & ArchSpec::eRISCV_rvc)
1569 features_str += "+c,";
1570 if (arch_flags & ArchSpec::eRISCV_rve)
1571 features_str += "+e,";
1572 if ((arch_flags & ArchSpec::eRISCV_float_abi_single) ==
1573 ArchSpec::eRISCV_float_abi_single)
1574 features_str += "+f,";
1575 if ((arch_flags & ArchSpec::eRISCV_float_abi_double) ==
1576 ArchSpec::eRISCV_float_abi_double)
1577 features_str += "+f,+d,";
1578 if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) ==
1579 ArchSpec::eRISCV_float_abi_quad)
1580 features_str += "+f,+d,+q,";
1581 // FIXME: how do we detect features such as `+a`, `+m`?
1582 // Turn them on by default now, since everyone seems to use them
1583 features_str += "+a,+m,";
1584 }
1585
1586 // We use m_disasm_up.get() to tell whether we are valid or not, so if this
1587 // isn't good for some reason, we won't be valid and FindPlugin will fail and
1588 // we won't get used.
1589 m_disasm_up = MCDisasmInstance::Create(triple: triple_str, cpu, features_str: features_str.c_str(),
1590 flavor, owner&: *this);
1591
1592 llvm::Triple::ArchType llvm_arch = triple.getArch();
1593
1594 // For arm CPUs that can execute arm or thumb instructions, also create a
1595 // thumb instruction disassembler.
1596 if (llvm_arch == llvm::Triple::arm) {
1597 std::string thumb_triple(thumb_arch.GetTriple().getTriple());
1598 m_alternate_disasm_up =
1599 MCDisasmInstance::Create(triple: thumb_triple.c_str(), cpu: "", features_str: features_str.c_str(),
1600 flavor, owner&: *this);
1601 if (!m_alternate_disasm_up)
1602 m_disasm_up.reset();
1603
1604 } else if (arch.IsMIPS()) {
1605 /* Create alternate disassembler for MIPS16 and microMIPS */
1606 uint32_t arch_flags = arch.GetFlags();
1607 if (arch_flags & ArchSpec::eMIPSAse_mips16)
1608 features_str += "+mips16,";
1609 else if (arch_flags & ArchSpec::eMIPSAse_micromips)
1610 features_str += "+micromips,";
1611
1612 m_alternate_disasm_up = MCDisasmInstance::Create(
1613 triple: triple_str, cpu, features_str: features_str.c_str(), flavor, owner&: *this);
1614 if (!m_alternate_disasm_up)
1615 m_disasm_up.reset();
1616 }
1617}
1618
1619DisassemblerLLVMC::~DisassemblerLLVMC() = default;
1620
1621lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch,
1622 const char *flavor,
1623 const char *cpu,
1624 const char *features) {
1625 if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {
1626 auto disasm_sp =
1627 std::make_shared<DisassemblerLLVMC>(args: arch, args&: flavor, args&: cpu, args&: features);
1628 if (disasm_sp && disasm_sp->IsValid())
1629 return disasm_sp;
1630 }
1631 return lldb::DisassemblerSP();
1632}
1633
1634size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr,
1635 const DataExtractor &data,
1636 lldb::offset_t data_offset,
1637 size_t num_instructions,
1638 bool append, bool data_from_file) {
1639 if (!append)
1640 m_instruction_list.Clear();
1641
1642 if (!IsValid())
1643 return 0;
1644
1645 m_data_from_file = data_from_file;
1646 uint32_t data_cursor = data_offset;
1647 const size_t data_byte_size = data.GetByteSize();
1648 uint32_t instructions_parsed = 0;
1649 Address inst_addr(base_addr);
1650
1651 while (data_cursor < data_byte_size &&
1652 instructions_parsed < num_instructions) {
1653
1654 AddressClass address_class = AddressClass::eCode;
1655
1656 if (m_alternate_disasm_up)
1657 address_class = inst_addr.GetAddressClass();
1658
1659 InstructionSP inst_sp(
1660 new InstructionLLVMC(*this, inst_addr, address_class));
1661
1662 if (!inst_sp)
1663 break;
1664
1665 uint32_t inst_size = inst_sp->Decode(disassembler: *this, data, data_offset: data_cursor);
1666
1667 if (inst_size == 0)
1668 break;
1669
1670 m_instruction_list.Append(inst_sp);
1671 data_cursor += inst_size;
1672 inst_addr.Slide(offset: inst_size);
1673 instructions_parsed++;
1674 }
1675
1676 return data_cursor - data_offset;
1677}
1678
1679void DisassemblerLLVMC::Initialize() {
1680 PluginManager::RegisterPlugin(name: GetPluginNameStatic(),
1681 description: "Disassembler that uses LLVM MC to disassemble "
1682 "i386, x86_64, ARM, and ARM64.",
1683 create_callback: CreateInstance);
1684
1685 llvm::InitializeAllTargetInfos();
1686 llvm::InitializeAllTargetMCs();
1687 llvm::InitializeAllAsmParsers();
1688 llvm::InitializeAllDisassemblers();
1689}
1690
1691void DisassemblerLLVMC::Terminate() {
1692 PluginManager::UnregisterPlugin(create_callback: CreateInstance);
1693}
1694
1695int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
1696 uint64_t offset, uint64_t size,
1697 int tag_type, void *tag_bug) {
1698 return static_cast<DisassemblerLLVMC *>(disassembler)
1699 ->OpInfo(PC: pc, Offset: offset, Size: size, TagType: tag_type, TagBug: tag_bug);
1700}
1701
1702const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
1703 uint64_t value,
1704 uint64_t *type, uint64_t pc,
1705 const char **name) {
1706 return static_cast<DisassemblerLLVMC *>(disassembler)
1707 ->SymbolLookup(ReferenceValue: value, ReferenceType: type, ReferencePC: pc, ReferenceName: name);
1708}
1709
1710bool DisassemblerLLVMC::FlavorValidForArchSpec(
1711 const lldb_private::ArchSpec &arch, const char *flavor) {
1712 llvm::Triple triple = arch.GetTriple();
1713 if (flavor == nullptr || strcmp(s1: flavor, s2: "default") == 0)
1714 return true;
1715
1716 if (triple.getArch() == llvm::Triple::x86 ||
1717 triple.getArch() == llvm::Triple::x86_64) {
1718 return strcmp(s1: flavor, s2: "intel") == 0 || strcmp(s1: flavor, s2: "att") == 0;
1719 } else
1720 return false;
1721}
1722
1723bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }
1724
1725int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,
1726 int tag_type, void *tag_bug) {
1727 switch (tag_type) {
1728 default:
1729 break;
1730 case 1:
1731 memset(s: tag_bug, c: 0, n: sizeof(::LLVMOpInfo1));
1732 break;
1733 }
1734 return 0;
1735}
1736
1737const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
1738 uint64_t pc, const char **name) {
1739 if (*type_ptr) {
1740 if (m_exe_ctx && m_inst) {
1741 // std::string remove_this_prior_to_checkin;
1742 Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
1743 Address value_so_addr;
1744 Address pc_so_addr;
1745 if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
1746 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
1747 target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
1748 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
1749 m_adrp_address = pc;
1750 m_adrp_insn = value;
1751 *name = nullptr;
1752 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1753 return nullptr;
1754 }
1755 // If this instruction is an ADD and
1756 // the previous instruction was an ADRP and
1757 // the ADRP's register and this ADD's register are the same,
1758 // then this is a pc-relative address calculation.
1759 if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
1760 m_adrp_insn && m_adrp_address == pc - 4 &&
1761 (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) {
1762 uint32_t addxri_inst;
1763 uint64_t adrp_imm, addxri_imm;
1764 // Get immlo and immhi bits, OR them together to get the ADRP imm
1765 // value.
1766 adrp_imm =
1767 ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3);
1768 // if high bit of immhi after right-shifting set, sign extend
1769 if (adrp_imm & (1ULL << 20))
1770 adrp_imm |= ~((1ULL << 21) - 1);
1771
1772 addxri_inst = value;
1773 addxri_imm = (addxri_inst >> 10) & 0xfff;
1774 // check if 'sh' bit is set, shift imm value up if so
1775 // (this would make no sense, ADRP already gave us this part)
1776 if ((addxri_inst >> (12 + 5 + 5)) & 1)
1777 addxri_imm <<= 12;
1778 value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
1779 addxri_imm;
1780 }
1781 m_adrp_address = LLDB_INVALID_ADDRESS;
1782 m_adrp_insn.reset();
1783 }
1784
1785 if (m_inst->UsingFileAddress()) {
1786 ModuleSP module_sp(m_inst->GetAddress().GetModule());
1787 if (module_sp) {
1788 module_sp->ResolveFileAddress(vm_addr: value, so_addr&: value_so_addr);
1789 module_sp->ResolveFileAddress(vm_addr: pc, so_addr&: pc_so_addr);
1790 }
1791 } else if (target && target->HasLoadedSections()) {
1792 target->ResolveLoadAddress(load_addr: value, so_addr&: value_so_addr);
1793 target->ResolveLoadAddress(load_addr: pc, so_addr&: pc_so_addr);
1794 }
1795
1796 SymbolContext sym_ctx;
1797 const SymbolContextItem resolve_scope =
1798 eSymbolContextFunction | eSymbolContextSymbol;
1799 if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {
1800 pc_so_addr.GetModule()->ResolveSymbolContextForAddress(
1801 so_addr: pc_so_addr, resolve_scope, sc&: sym_ctx);
1802 }
1803
1804 if (value_so_addr.IsValid() && value_so_addr.GetSection()) {
1805 StreamString ss;
1806
1807 bool format_omitting_current_func_name = false;
1808 if (sym_ctx.symbol || sym_ctx.function) {
1809 AddressRange range;
1810 for (uint32_t idx = 0;
1811 sym_ctx.GetAddressRange(scope: resolve_scope, range_idx: idx, use_inline_block_range: false, range);
1812 ++idx) {
1813 if (range.ContainsLoadAddress(so_addr: value_so_addr, target)) {
1814 format_omitting_current_func_name = true;
1815 break;
1816 }
1817 }
1818 }
1819
1820 // If the "value" address (the target address we're symbolicating) is
1821 // inside the same SymbolContext as the current instruction pc
1822 // (pc_so_addr), don't print the full function name - just print it
1823 // with DumpStyleNoFunctionName style, e.g. "<+36>".
1824 if (format_omitting_current_func_name) {
1825 value_so_addr.Dump(s: &ss, exe_scope: target, style: Address::DumpStyleNoFunctionName,
1826 fallback_style: Address::DumpStyleSectionNameOffset);
1827 } else {
1828 value_so_addr.Dump(
1829 s: &ss, exe_scope: target,
1830 style: Address::DumpStyleResolvedDescriptionNoFunctionArguments,
1831 fallback_style: Address::DumpStyleSectionNameOffset);
1832 }
1833
1834 if (!ss.GetString().empty()) {
1835 // If Address::Dump returned a multi-line description, most commonly
1836 // seen when we have multiple levels of inlined functions at an
1837 // address, only show the first line.
1838 std::string str = std::string(ss.GetString());
1839 size_t first_eol_char = str.find_first_of(s: "\r\n");
1840 if (first_eol_char != std::string::npos) {
1841 str.erase(pos: first_eol_char);
1842 }
1843 m_inst->AppendComment(description&: str);
1844 }
1845 }
1846 }
1847 }
1848
1849 // TODO: llvm-objdump sets the type_ptr to the
1850 // LLVMDisassembler_ReferenceType_Out_* values
1851 // based on where value_so_addr is pointing, with
1852 // Mach-O specific augmentations in MachODump.cpp. e.g.
1853 // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
1854 // handles.
1855 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1856 *name = nullptr;
1857 return nullptr;
1858}
1859

source code of lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp