| 1 | //===-- DisassembleRequestHandler.cpp -------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "DAP.h" |
| 10 | #include "EventHelper.h" |
| 11 | #include "JSONUtils.h" |
| 12 | #include "LLDBUtils.h" |
| 13 | #include "Protocol/ProtocolRequests.h" |
| 14 | #include "Protocol/ProtocolTypes.h" |
| 15 | #include "ProtocolUtils.h" |
| 16 | #include "RequestHandler.h" |
| 17 | #include "lldb/API/SBAddress.h" |
| 18 | #include "lldb/API/SBInstruction.h" |
| 19 | #include "lldb/API/SBLineEntry.h" |
| 20 | #include "lldb/API/SBTarget.h" |
| 21 | #include "lldb/lldb-types.h" |
| 22 | #include "llvm/ADT/StringExtras.h" |
| 23 | #include "llvm/Support/Error.h" |
| 24 | #include <cstdint> |
| 25 | #include <optional> |
| 26 | |
| 27 | using namespace lldb_dap::protocol; |
| 28 | |
| 29 | namespace lldb_dap { |
| 30 | |
| 31 | static protocol::DisassembledInstruction GetInvalidInstruction() { |
| 32 | DisassembledInstruction invalid_inst; |
| 33 | invalid_inst.address = LLDB_INVALID_ADDRESS; |
| 34 | invalid_inst.presentationHint = |
| 35 | DisassembledInstruction::eDisassembledInstructionPresentationHintInvalid; |
| 36 | return invalid_inst; |
| 37 | } |
| 38 | |
| 39 | static lldb::SBAddress GetDisassembleStartAddress(lldb::SBTarget target, |
| 40 | lldb::SBAddress addr, |
| 41 | int64_t instruction_offset) { |
| 42 | if (instruction_offset == 0) |
| 43 | return addr; |
| 44 | |
| 45 | if (target.GetMinimumOpcodeByteSize() == target.GetMaximumOpcodeByteSize()) { |
| 46 | // We have fixed opcode size, so we can calculate the address directly, |
| 47 | // negative or positive. |
| 48 | lldb::addr_t load_addr = addr.GetLoadAddress(target); |
| 49 | load_addr += instruction_offset * target.GetMinimumOpcodeByteSize(); |
| 50 | return lldb::SBAddress(load_addr, target); |
| 51 | } |
| 52 | |
| 53 | if (instruction_offset > 0) { |
| 54 | lldb::SBInstructionList forward_insts = |
| 55 | target.ReadInstructions(base_addr: addr, count: instruction_offset + 1); |
| 56 | return forward_insts.GetInstructionAtIndex(idx: forward_insts.GetSize() - 1) |
| 57 | .GetAddress(); |
| 58 | } |
| 59 | |
| 60 | // We have a negative instruction offset, so we need to disassemble backwards. |
| 61 | // The opcode size is not fixed, so we have no idea where to start from. |
| 62 | // Let's try from the start of the current symbol if available. |
| 63 | auto symbol = addr.GetSymbol(); |
| 64 | if (!symbol.IsValid()) |
| 65 | return addr; |
| 66 | |
| 67 | // Add valid instructions before the current instruction using the symbol. |
| 68 | lldb::SBInstructionList symbol_insts = |
| 69 | target.ReadInstructions(start_addr: symbol.GetStartAddress(), end_addr: addr, flavor_string: nullptr); |
| 70 | if (!symbol_insts.IsValid() || symbol_insts.GetSize() == 0) |
| 71 | return addr; |
| 72 | |
| 73 | const auto backwards_instructions_count = |
| 74 | static_cast<size_t>(std::abs(i: instruction_offset)); |
| 75 | if (symbol_insts.GetSize() < backwards_instructions_count) { |
| 76 | // We don't have enough instructions to disassemble backwards, so just |
| 77 | // return the start address of the symbol. |
| 78 | return symbol_insts.GetInstructionAtIndex(idx: 0).GetAddress(); |
| 79 | } |
| 80 | |
| 81 | return symbol_insts |
| 82 | .GetInstructionAtIndex(idx: symbol_insts.GetSize() - |
| 83 | backwards_instructions_count) |
| 84 | .GetAddress(); |
| 85 | } |
| 86 | |
| 87 | static DisassembledInstruction ConvertSBInstructionToDisassembledInstruction( |
| 88 | lldb::SBTarget &target, lldb::SBInstruction &inst, bool resolve_symbols) { |
| 89 | if (!inst.IsValid()) |
| 90 | return GetInvalidInstruction(); |
| 91 | |
| 92 | auto addr = inst.GetAddress(); |
| 93 | const auto inst_addr = addr.GetLoadAddress(target); |
| 94 | |
| 95 | // FIXME: This is a workaround - this address might come from |
| 96 | // disassembly that started in a different section, and thus |
| 97 | // comparisons between this object and other address objects with the |
| 98 | // same load address will return false. |
| 99 | addr = lldb::SBAddress(inst_addr, target); |
| 100 | |
| 101 | const char *m = inst.GetMnemonic(target); |
| 102 | const char *o = inst.GetOperands(target); |
| 103 | const char *c = inst.GetComment(target); |
| 104 | auto d = inst.GetData(target); |
| 105 | |
| 106 | std::string bytes; |
| 107 | llvm::raw_string_ostream sb(bytes); |
| 108 | for (unsigned i = 0; i < inst.GetByteSize(); i++) { |
| 109 | lldb::SBError error; |
| 110 | uint8_t b = d.GetUnsignedInt8(error, offset: i); |
| 111 | if (error.Success()) |
| 112 | sb << llvm::format(Fmt: "%2.2x " , Vals: b); |
| 113 | } |
| 114 | |
| 115 | DisassembledInstruction disassembled_inst; |
| 116 | disassembled_inst.address = inst_addr; |
| 117 | disassembled_inst.instructionBytes = |
| 118 | bytes.size() > 0 ? bytes.substr(pos: 0, n: bytes.size() - 1) : "" ; |
| 119 | |
| 120 | std::string instruction; |
| 121 | llvm::raw_string_ostream si(instruction); |
| 122 | |
| 123 | lldb::SBSymbol symbol = addr.GetSymbol(); |
| 124 | // Only add the symbol on the first line of the function. |
| 125 | if (symbol.IsValid() && symbol.GetStartAddress() == addr) { |
| 126 | // If we have a valid symbol, append it as a label prefix for the first |
| 127 | // instruction. This is so you can see the start of a function/callsite |
| 128 | // in the assembly, at the moment VS Code (1.80) does not visualize the |
| 129 | // symbol associated with the assembly instruction. |
| 130 | si << (symbol.GetMangledName() != nullptr ? symbol.GetMangledName() |
| 131 | : symbol.GetName()) |
| 132 | << ": " ; |
| 133 | |
| 134 | if (resolve_symbols) |
| 135 | disassembled_inst.symbol = symbol.GetDisplayName(); |
| 136 | } |
| 137 | |
| 138 | si << llvm::formatv(Fmt: "{0,7} {1,12}" , Vals&: m, Vals&: o); |
| 139 | if (c && c[0]) { |
| 140 | si << " ; " << c; |
| 141 | } |
| 142 | |
| 143 | disassembled_inst.instruction = std::move(instruction); |
| 144 | |
| 145 | protocol::Source source = CreateSource(address: addr, target); |
| 146 | lldb::SBLineEntry line_entry = GetLineEntryForAddress(target, address: addr); |
| 147 | |
| 148 | // If the line number is 0 then the entry represents a compiler generated |
| 149 | // location. |
| 150 | if (!IsAssemblySource(source) && line_entry.GetStartAddress() == addr && |
| 151 | line_entry.IsValid() && line_entry.GetFileSpec().IsValid() && |
| 152 | line_entry.GetLine() != 0) { |
| 153 | |
| 154 | disassembled_inst.location = std::move(source); |
| 155 | const auto line = line_entry.GetLine(); |
| 156 | if (line != 0 && line != LLDB_INVALID_LINE_NUMBER) |
| 157 | disassembled_inst.line = line; |
| 158 | |
| 159 | const auto column = line_entry.GetColumn(); |
| 160 | if (column != 0 && column != LLDB_INVALID_COLUMN_NUMBER) |
| 161 | disassembled_inst.column = column; |
| 162 | |
| 163 | lldb::SBAddress end_addr = line_entry.GetEndAddress(); |
| 164 | auto end_line_entry = GetLineEntryForAddress(target, address: end_addr); |
| 165 | if (end_line_entry.IsValid() && |
| 166 | end_line_entry.GetFileSpec() == line_entry.GetFileSpec()) { |
| 167 | const auto end_line = end_line_entry.GetLine(); |
| 168 | if (end_line != 0 && end_line != LLDB_INVALID_LINE_NUMBER && |
| 169 | end_line != line) { |
| 170 | disassembled_inst.endLine = end_line; |
| 171 | |
| 172 | const auto end_column = end_line_entry.GetColumn(); |
| 173 | if (end_column != 0 && end_column != LLDB_INVALID_COLUMN_NUMBER && |
| 174 | end_column != column) |
| 175 | disassembled_inst.endColumn = end_column - 1; |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | return disassembled_inst; |
| 181 | } |
| 182 | |
| 183 | /// Disassembles code stored at the provided location. |
| 184 | /// Clients should only call this request if the corresponding capability |
| 185 | /// `supportsDisassembleRequest` is true. |
| 186 | llvm::Expected<DisassembleResponseBody> |
| 187 | DisassembleRequestHandler::Run(const DisassembleArguments &args) const { |
| 188 | std::optional<lldb::addr_t> addr_opt = |
| 189 | DecodeMemoryReference(memoryReference: args.memoryReference); |
| 190 | if (!addr_opt.has_value()) |
| 191 | return llvm::make_error<DAPError>(Args: "Malformed memory reference: " + |
| 192 | args.memoryReference); |
| 193 | |
| 194 | lldb::addr_t addr_ptr = *addr_opt; |
| 195 | addr_ptr += args.offset.value_or(u: 0); |
| 196 | lldb::SBAddress addr(addr_ptr, dap.target); |
| 197 | if (!addr.IsValid()) |
| 198 | return llvm::make_error<DAPError>( |
| 199 | Args: "Memory reference not found in the current binary." ); |
| 200 | |
| 201 | // Offset (in instructions) to be applied after the byte offset (if any) |
| 202 | // before disassembling. Can be negative. |
| 203 | int64_t instruction_offset = args.instructionOffset.value_or(u: 0); |
| 204 | |
| 205 | // Calculate a sufficient address to start disassembling from. |
| 206 | lldb::SBAddress disassemble_start_addr = |
| 207 | GetDisassembleStartAddress(target: dap.target, addr, instruction_offset); |
| 208 | if (!disassemble_start_addr.IsValid()) |
| 209 | return llvm::make_error<DAPError>( |
| 210 | Args: "Unexpected error while disassembling instructions." ); |
| 211 | |
| 212 | lldb::SBInstructionList insts = dap.target.ReadInstructions( |
| 213 | base_addr: disassemble_start_addr, count: args.instructionCount); |
| 214 | if (!insts.IsValid()) |
| 215 | return llvm::make_error<DAPError>( |
| 216 | Args: "Unexpected error while disassembling instructions." ); |
| 217 | |
| 218 | // Conver the found instructions to the DAP format. |
| 219 | const bool resolve_symbols = args.resolveSymbols.value_or(u: false); |
| 220 | std::vector<DisassembledInstruction> instructions; |
| 221 | size_t original_address_index = args.instructionCount; |
| 222 | for (size_t i = 0; i < insts.GetSize(); ++i) { |
| 223 | lldb::SBInstruction inst = insts.GetInstructionAtIndex(idx: i); |
| 224 | if (inst.GetAddress() == addr) |
| 225 | original_address_index = i; |
| 226 | |
| 227 | instructions.push_back(x: ConvertSBInstructionToDisassembledInstruction( |
| 228 | target&: dap.target, inst, resolve_symbols)); |
| 229 | } |
| 230 | |
| 231 | // Check if we miss instructions at the beginning. |
| 232 | if (instruction_offset < 0) { |
| 233 | const auto backwards_instructions_count = |
| 234 | static_cast<size_t>(std::abs(i: instruction_offset)); |
| 235 | if (original_address_index < backwards_instructions_count) { |
| 236 | // We don't have enough instructions before the main address as was |
| 237 | // requested. Let's pad the start of the instructions with invalid |
| 238 | // instructions. |
| 239 | std::vector<DisassembledInstruction> invalid_instructions( |
| 240 | backwards_instructions_count - original_address_index, |
| 241 | GetInvalidInstruction()); |
| 242 | instructions.insert(position: instructions.begin(), first: invalid_instructions.begin(), |
| 243 | last: invalid_instructions.end()); |
| 244 | |
| 245 | // Trim excess instructions if needed. |
| 246 | if (instructions.size() > args.instructionCount) |
| 247 | instructions.resize(new_size: args.instructionCount); |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | // Pad the instructions with invalid instructions if needed. |
| 252 | while (instructions.size() < args.instructionCount) { |
| 253 | instructions.push_back(x: GetInvalidInstruction()); |
| 254 | } |
| 255 | |
| 256 | return DisassembleResponseBody{.instructions: std::move(instructions)}; |
| 257 | } |
| 258 | |
| 259 | } // namespace lldb_dap |
| 260 | |