| 1 | //===-- DisassembleRequestHandler.cpp -------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "DAP.h" |
| 10 | #include "EventHelper.h" |
| 11 | #include "JSONUtils.h" |
| 12 | #include "LLDBUtils.h" |
| 13 | #include "Protocol/ProtocolRequests.h" |
| 14 | #include "Protocol/ProtocolTypes.h" |
| 15 | #include "ProtocolUtils.h" |
| 16 | #include "RequestHandler.h" |
| 17 | #include "lldb/API/SBAddress.h" |
| 18 | #include "lldb/API/SBInstruction.h" |
| 19 | #include "lldb/API/SBLineEntry.h" |
| 20 | #include "lldb/API/SBTarget.h" |
| 21 | #include "lldb/lldb-types.h" |
| 22 | #include "llvm/ADT/StringExtras.h" |
| 23 | #include "llvm/Support/Error.h" |
| 24 | #include <cstdint> |
| 25 | #include <optional> |
| 26 | |
| 27 | using namespace lldb_dap::protocol; |
| 28 | |
| 29 | namespace lldb_dap { |
| 30 | |
| 31 | static protocol::DisassembledInstruction GetInvalidInstruction() { |
| 32 | DisassembledInstruction invalid_inst; |
| 33 | invalid_inst.address = LLDB_INVALID_ADDRESS; |
| 34 | invalid_inst.presentationHint = |
| 35 | DisassembledInstruction::eDisassembledInstructionPresentationHintInvalid; |
| 36 | return invalid_inst; |
| 37 | } |
| 38 | |
| 39 | static lldb::SBAddress GetDisassembleStartAddress(lldb::SBTarget target, |
| 40 | lldb::SBAddress addr, |
| 41 | int64_t instruction_offset) { |
| 42 | if (instruction_offset == 0) |
| 43 | return addr; |
| 44 | |
| 45 | if (target.GetMinimumOpcodeByteSize() == target.GetMaximumOpcodeByteSize()) { |
| 46 | // We have fixed opcode size, so we can calculate the address directly, |
| 47 | // negative or positive. |
| 48 | lldb::addr_t load_addr = addr.GetLoadAddress(target); |
| 49 | load_addr += instruction_offset * target.GetMinimumOpcodeByteSize(); |
| 50 | return lldb::SBAddress(load_addr, target); |
| 51 | } |
| 52 | |
| 53 | if (instruction_offset > 0) { |
| 54 | lldb::SBInstructionList forward_insts = |
| 55 | target.ReadInstructions(base_addr: addr, count: instruction_offset + 1); |
| 56 | return forward_insts.GetInstructionAtIndex(idx: forward_insts.GetSize() - 1) |
| 57 | .GetAddress(); |
| 58 | } |
| 59 | |
| 60 | // We have a negative instruction offset, so we need to disassemble backwards. |
| 61 | // The opcode size is not fixed, so we have no idea where to start from. |
| 62 | // Let's try from the start of the current symbol if available. |
| 63 | auto symbol = addr.GetSymbol(); |
| 64 | if (!symbol.IsValid()) |
| 65 | return addr; |
| 66 | |
| 67 | // Add valid instructions before the current instruction using the symbol. |
| 68 | lldb::SBInstructionList symbol_insts = |
| 69 | target.ReadInstructions(start_addr: symbol.GetStartAddress(), end_addr: addr, flavor_string: nullptr); |
| 70 | if (!symbol_insts.IsValid() || symbol_insts.GetSize() == 0) |
| 71 | return addr; |
| 72 | |
| 73 | const auto backwards_instructions_count = |
| 74 | static_cast<size_t>(std::abs(i: instruction_offset)); |
| 75 | if (symbol_insts.GetSize() < backwards_instructions_count) { |
| 76 | // We don't have enough instructions to disassemble backwards, so just |
| 77 | // return the start address of the symbol. |
| 78 | return symbol_insts.GetInstructionAtIndex(idx: 0).GetAddress(); |
| 79 | } |
| 80 | |
| 81 | return symbol_insts |
| 82 | .GetInstructionAtIndex(idx: symbol_insts.GetSize() - |
| 83 | backwards_instructions_count) |
| 84 | .GetAddress(); |
| 85 | } |
| 86 | |
| 87 | static DisassembledInstruction ConvertSBInstructionToDisassembledInstruction( |
| 88 | DAP &dap, lldb::SBInstruction &inst, bool resolve_symbols) { |
| 89 | lldb::SBTarget target = dap.target; |
| 90 | if (!inst.IsValid()) |
| 91 | return GetInvalidInstruction(); |
| 92 | |
| 93 | auto addr = inst.GetAddress(); |
| 94 | const auto inst_addr = addr.GetLoadAddress(target); |
| 95 | |
| 96 | // FIXME: This is a workaround - this address might come from |
| 97 | // disassembly that started in a different section, and thus |
| 98 | // comparisons between this object and other address objects with the |
| 99 | // same load address will return false. |
| 100 | addr = lldb::SBAddress(inst_addr, target); |
| 101 | |
| 102 | const char *m = inst.GetMnemonic(target); |
| 103 | const char *o = inst.GetOperands(target); |
| 104 | std::string c = inst.GetComment(target); |
| 105 | auto d = inst.GetData(target); |
| 106 | |
| 107 | std::string bytes; |
| 108 | llvm::raw_string_ostream sb(bytes); |
| 109 | for (unsigned i = 0; i < inst.GetByteSize(); i++) { |
| 110 | lldb::SBError error; |
| 111 | uint8_t b = d.GetUnsignedInt8(error, offset: i); |
| 112 | if (error.Success()) |
| 113 | sb << llvm::format(Fmt: "%2.2x " , Vals: b); |
| 114 | } |
| 115 | |
| 116 | DisassembledInstruction disassembled_inst; |
| 117 | disassembled_inst.address = inst_addr; |
| 118 | |
| 119 | if (!bytes.empty()) // remove last whitespace |
| 120 | bytes.pop_back(); |
| 121 | disassembled_inst.instructionBytes = std::move(bytes); |
| 122 | |
| 123 | llvm::raw_string_ostream si(disassembled_inst.instruction); |
| 124 | si << llvm::formatv(Fmt: "{0,-7} {1,-25}" , Vals&: m, Vals&: o); |
| 125 | |
| 126 | // Only add the symbol on the first line of the function. |
| 127 | // in the comment section |
| 128 | if (lldb::SBSymbol symbol = addr.GetSymbol(); |
| 129 | symbol.GetStartAddress() == addr) { |
| 130 | const llvm::StringRef sym_display_name = symbol.GetDisplayName(); |
| 131 | c.append(s: " " ); |
| 132 | c.append(svt: sym_display_name); |
| 133 | |
| 134 | if (resolve_symbols) |
| 135 | disassembled_inst.symbol = sym_display_name; |
| 136 | } |
| 137 | |
| 138 | if (!c.empty()) { |
| 139 | si << " ; " << c; |
| 140 | } |
| 141 | |
| 142 | std::optional<protocol::Source> source = dap.ResolveSource(address: addr); |
| 143 | lldb::SBLineEntry line_entry = GetLineEntryForAddress(target, address: addr); |
| 144 | |
| 145 | // If the line number is 0 then the entry represents a compiler generated |
| 146 | // location. |
| 147 | if (source && !IsAssemblySource(source: *source) && |
| 148 | line_entry.GetStartAddress() == addr && line_entry.IsValid() && |
| 149 | line_entry.GetFileSpec().IsValid() && line_entry.GetLine() != 0) { |
| 150 | |
| 151 | disassembled_inst.location = std::move(source); |
| 152 | const auto line = line_entry.GetLine(); |
| 153 | if (line != 0 && line != LLDB_INVALID_LINE_NUMBER) |
| 154 | disassembled_inst.line = line; |
| 155 | |
| 156 | const auto column = line_entry.GetColumn(); |
| 157 | if (column != 0 && column != LLDB_INVALID_COLUMN_NUMBER) |
| 158 | disassembled_inst.column = column; |
| 159 | |
| 160 | lldb::SBAddress end_addr = line_entry.GetEndAddress(); |
| 161 | auto end_line_entry = GetLineEntryForAddress(target, address: end_addr); |
| 162 | if (end_line_entry.IsValid() && |
| 163 | end_line_entry.GetFileSpec() == line_entry.GetFileSpec()) { |
| 164 | const auto end_line = end_line_entry.GetLine(); |
| 165 | if (end_line != 0 && end_line != LLDB_INVALID_LINE_NUMBER && |
| 166 | end_line != line) { |
| 167 | disassembled_inst.endLine = end_line; |
| 168 | |
| 169 | const auto end_column = end_line_entry.GetColumn(); |
| 170 | if (end_column != 0 && end_column != LLDB_INVALID_COLUMN_NUMBER && |
| 171 | end_column != column) |
| 172 | disassembled_inst.endColumn = end_column - 1; |
| 173 | } |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | return disassembled_inst; |
| 178 | } |
| 179 | |
| 180 | /// Disassembles code stored at the provided location. |
| 181 | /// Clients should only call this request if the corresponding capability |
| 182 | /// `supportsDisassembleRequest` is true. |
| 183 | llvm::Expected<DisassembleResponseBody> |
| 184 | DisassembleRequestHandler::Run(const DisassembleArguments &args) const { |
| 185 | const lldb::addr_t addr_ptr = args.memoryReference + args.offset; |
| 186 | lldb::SBAddress addr(addr_ptr, dap.target); |
| 187 | if (!addr.IsValid()) |
| 188 | return llvm::make_error<DAPError>( |
| 189 | Args: "Memory reference not found in the current binary." ); |
| 190 | |
| 191 | // Offset (in instructions) to be applied after the byte offset (if any) |
| 192 | // before disassembling. Can be negative. |
| 193 | const int64_t instruction_offset = args.instructionOffset; |
| 194 | |
| 195 | // Calculate a sufficient address to start disassembling from. |
| 196 | lldb::SBAddress disassemble_start_addr = |
| 197 | GetDisassembleStartAddress(target: dap.target, addr, instruction_offset); |
| 198 | if (!disassemble_start_addr.IsValid()) |
| 199 | return llvm::make_error<DAPError>( |
| 200 | Args: "Unexpected error while disassembling instructions." ); |
| 201 | |
| 202 | lldb::SBInstructionList insts = dap.target.ReadInstructions( |
| 203 | base_addr: disassemble_start_addr, count: args.instructionCount); |
| 204 | if (!insts.IsValid()) |
| 205 | return llvm::make_error<DAPError>( |
| 206 | Args: "Unexpected error while disassembling instructions." ); |
| 207 | |
| 208 | // Convert the found instructions to the DAP format. |
| 209 | const bool resolve_symbols = args.resolveSymbols; |
| 210 | std::vector<DisassembledInstruction> instructions; |
| 211 | size_t original_address_index = args.instructionCount; |
| 212 | for (size_t i = 0; i < insts.GetSize(); ++i) { |
| 213 | lldb::SBInstruction inst = insts.GetInstructionAtIndex(idx: i); |
| 214 | if (inst.GetAddress() == addr) |
| 215 | original_address_index = i; |
| 216 | |
| 217 | instructions.push_back(x: ConvertSBInstructionToDisassembledInstruction( |
| 218 | dap, inst, resolve_symbols)); |
| 219 | } |
| 220 | |
| 221 | // Check if we miss instructions at the beginning. |
| 222 | if (instruction_offset < 0) { |
| 223 | const auto backwards_instructions_count = |
| 224 | static_cast<size_t>(std::abs(i: instruction_offset)); |
| 225 | if (original_address_index < backwards_instructions_count) { |
| 226 | // We don't have enough instructions before the main address as was |
| 227 | // requested. Let's pad the start of the instructions with invalid |
| 228 | // instructions. |
| 229 | std::vector<DisassembledInstruction> invalid_instructions( |
| 230 | backwards_instructions_count - original_address_index, |
| 231 | GetInvalidInstruction()); |
| 232 | instructions.insert(position: instructions.begin(), first: invalid_instructions.begin(), |
| 233 | last: invalid_instructions.end()); |
| 234 | |
| 235 | // Trim excess instructions if needed. |
| 236 | if (instructions.size() > args.instructionCount) |
| 237 | instructions.resize(new_size: args.instructionCount); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | // Pad the instructions with invalid instructions if needed. |
| 242 | while (instructions.size() < args.instructionCount) { |
| 243 | instructions.push_back(x: GetInvalidInstruction()); |
| 244 | } |
| 245 | |
| 246 | return DisassembleResponseBody{.instructions: std::move(instructions)}; |
| 247 | } |
| 248 | |
| 249 | } // namespace lldb_dap |
| 250 | |