| 1 | //===-- ObjectFileWasm.cpp ------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "ObjectFileWasm.h" |
| 10 | #include "lldb/Core/Module.h" |
| 11 | #include "lldb/Core/ModuleSpec.h" |
| 12 | #include "lldb/Core/PluginManager.h" |
| 13 | #include "lldb/Core/Section.h" |
| 14 | #include "lldb/Target/Process.h" |
| 15 | #include "lldb/Target/SectionLoadList.h" |
| 16 | #include "lldb/Target/Target.h" |
| 17 | #include "lldb/Utility/DataBufferHeap.h" |
| 18 | #include "lldb/Utility/LLDBLog.h" |
| 19 | #include "lldb/Utility/Log.h" |
| 20 | #include "llvm/ADT/ArrayRef.h" |
| 21 | #include "llvm/ADT/SmallVector.h" |
| 22 | #include "llvm/ADT/StringRef.h" |
| 23 | #include "llvm/BinaryFormat/Magic.h" |
| 24 | #include "llvm/BinaryFormat/Wasm.h" |
| 25 | #include "llvm/Support/Endian.h" |
| 26 | #include "llvm/Support/Format.h" |
| 27 | #include <optional> |
| 28 | |
| 29 | using namespace lldb; |
| 30 | using namespace lldb_private; |
| 31 | using namespace lldb_private::wasm; |
| 32 | |
| 33 | LLDB_PLUGIN_DEFINE(ObjectFileWasm) |
| 34 | |
| 35 | static const uint32_t = |
| 36 | sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); |
| 37 | |
| 38 | /// Checks whether the data buffer starts with a valid Wasm module header. |
| 39 | static bool (const DataBufferSP &data_sp) { |
| 40 | if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) |
| 41 | return false; |
| 42 | |
| 43 | if (llvm::identify_magic(magic: toStringRef(Input: data_sp->GetData())) != |
| 44 | llvm::file_magic::wasm_object) |
| 45 | return false; |
| 46 | |
| 47 | const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); |
| 48 | |
| 49 | uint32_t version = llvm::support::endian::read32le(P: Ptr); |
| 50 | return version == llvm::wasm::WasmVersion; |
| 51 | } |
| 52 | |
| 53 | static std::optional<ConstString> |
| 54 | (llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { |
| 55 | // A Wasm string is encoded as a vector of UTF-8 codes. |
| 56 | // Vectors are encoded with their u32 length followed by the element |
| 57 | // sequence. |
| 58 | uint64_t len = data.getULEB128(C&: c); |
| 59 | if (!c) { |
| 60 | consumeError(Err: c.takeError()); |
| 61 | return std::nullopt; |
| 62 | } |
| 63 | |
| 64 | if (len >= (uint64_t(1) << 32)) { |
| 65 | return std::nullopt; |
| 66 | } |
| 67 | |
| 68 | llvm::SmallVector<uint8_t, 32> str_storage; |
| 69 | data.getU8(C&: c, Dst&: str_storage, Count: len); |
| 70 | if (!c) { |
| 71 | consumeError(Err: c.takeError()); |
| 72 | return std::nullopt; |
| 73 | } |
| 74 | |
| 75 | llvm::StringRef str = toStringRef(Input: llvm::ArrayRef(str_storage)); |
| 76 | return ConstString(str); |
| 77 | } |
| 78 | |
| 79 | char ObjectFileWasm::ID; |
| 80 | |
| 81 | void ObjectFileWasm::Initialize() { |
| 82 | PluginManager::RegisterPlugin(name: GetPluginNameStatic(), |
| 83 | description: GetPluginDescriptionStatic(), create_callback: CreateInstance, |
| 84 | create_memory_callback: CreateMemoryInstance, get_module_specifications: GetModuleSpecifications); |
| 85 | } |
| 86 | |
| 87 | void ObjectFileWasm::Terminate() { |
| 88 | PluginManager::UnregisterPlugin(create_callback: CreateInstance); |
| 89 | } |
| 90 | |
| 91 | ObjectFile * |
| 92 | ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp, |
| 93 | offset_t data_offset, const FileSpec *file, |
| 94 | offset_t file_offset, offset_t length) { |
| 95 | Log *log = GetLog(mask: LLDBLog::Object); |
| 96 | |
| 97 | if (!data_sp) { |
| 98 | data_sp = MapFileData(file: *file, Size: length, Offset: file_offset); |
| 99 | if (!data_sp) { |
| 100 | LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s" , |
| 101 | file->GetPath().c_str()); |
| 102 | return nullptr; |
| 103 | } |
| 104 | data_offset = 0; |
| 105 | } |
| 106 | |
| 107 | assert(data_sp); |
| 108 | if (!ValidateModuleHeader(data_sp)) { |
| 109 | LLDB_LOGF(log, |
| 110 | "Failed to create ObjectFileWasm instance: invalid Wasm header" ); |
| 111 | return nullptr; |
| 112 | } |
| 113 | |
| 114 | // Update the data to contain the entire file if it doesn't contain it |
| 115 | // already. |
| 116 | if (data_sp->GetByteSize() < length) { |
| 117 | data_sp = MapFileData(file: *file, Size: length, Offset: file_offset); |
| 118 | if (!data_sp) { |
| 119 | LLDB_LOGF(log, |
| 120 | "Failed to create ObjectFileWasm instance: cannot read file %s" , |
| 121 | file->GetPath().c_str()); |
| 122 | return nullptr; |
| 123 | } |
| 124 | data_offset = 0; |
| 125 | } |
| 126 | |
| 127 | std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( |
| 128 | module_sp, data_sp, data_offset, file, file_offset, length)); |
| 129 | ArchSpec spec = objfile_up->GetArchitecture(); |
| 130 | if (spec && objfile_up->SetModulesArchitecture(spec)) { |
| 131 | LLDB_LOGF(log, |
| 132 | "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s" , |
| 133 | static_cast<void *>(objfile_up.get()), |
| 134 | static_cast<void *>(objfile_up->GetModule().get()), |
| 135 | objfile_up->GetModule()->GetSpecificationDescription().c_str(), |
| 136 | file ? file->GetPath().c_str() : "<NULL>" ); |
| 137 | return objfile_up.release(); |
| 138 | } |
| 139 | |
| 140 | LLDB_LOGF(log, "Failed to create ObjectFileWasm instance" ); |
| 141 | return nullptr; |
| 142 | } |
| 143 | |
| 144 | ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, |
| 145 | WritableDataBufferSP data_sp, |
| 146 | const ProcessSP &process_sp, |
| 147 | addr_t ) { |
| 148 | if (!ValidateModuleHeader(data_sp)) |
| 149 | return nullptr; |
| 150 | |
| 151 | std::unique_ptr<ObjectFileWasm> objfile_up( |
| 152 | new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); |
| 153 | ArchSpec spec = objfile_up->GetArchitecture(); |
| 154 | if (spec && objfile_up->SetModulesArchitecture(spec)) |
| 155 | return objfile_up.release(); |
| 156 | return nullptr; |
| 157 | } |
| 158 | |
| 159 | bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { |
| 160 | // Buffer sufficient to read a section header and find the pointer to the next |
| 161 | // section. |
| 162 | const uint32_t kBufferSize = 1024; |
| 163 | DataExtractor = ReadImageData(offset: *offset_ptr, size: kBufferSize); |
| 164 | |
| 165 | llvm::DataExtractor data = section_header_data.GetAsLLVM(); |
| 166 | llvm::DataExtractor::Cursor c(0); |
| 167 | |
| 168 | // Each section consists of: |
| 169 | // - a one-byte section id, |
| 170 | // - the u32 size of the contents, in bytes, |
| 171 | // - the actual contents. |
| 172 | uint8_t section_id = data.getU8(C&: c); |
| 173 | uint64_t payload_len = data.getULEB128(C&: c); |
| 174 | if (!c) |
| 175 | return !llvm::errorToBool(Err: c.takeError()); |
| 176 | |
| 177 | if (payload_len >= (uint64_t(1) << 32)) |
| 178 | return false; |
| 179 | |
| 180 | if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { |
| 181 | // Custom sections have the id 0. Their contents consist of a name |
| 182 | // identifying the custom section, followed by an uninterpreted sequence |
| 183 | // of bytes. |
| 184 | lldb::offset_t prev_offset = c.tell(); |
| 185 | std::optional<ConstString> sect_name = GetWasmString(data, c); |
| 186 | if (!sect_name) |
| 187 | return false; |
| 188 | |
| 189 | if (payload_len < c.tell() - prev_offset) |
| 190 | return false; |
| 191 | |
| 192 | uint32_t section_length = payload_len - (c.tell() - prev_offset); |
| 193 | m_sect_infos.push_back(x: section_info{.offset: *offset_ptr + c.tell(), .size: section_length, |
| 194 | .id: section_id, .name: *sect_name}); |
| 195 | *offset_ptr += (c.tell() + section_length); |
| 196 | } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) { |
| 197 | m_sect_infos.push_back(x: section_info{.offset: *offset_ptr + c.tell(), |
| 198 | .size: static_cast<uint32_t>(payload_len), |
| 199 | .id: section_id, .name: ConstString()}); |
| 200 | *offset_ptr += (c.tell() + payload_len); |
| 201 | } else { |
| 202 | // Invalid section id. |
| 203 | return false; |
| 204 | } |
| 205 | return true; |
| 206 | } |
| 207 | |
| 208 | bool ObjectFileWasm::DecodeSections() { |
| 209 | lldb::offset_t offset = kWasmHeaderSize; |
| 210 | if (IsInMemory()) { |
| 211 | offset += m_memory_addr; |
| 212 | } |
| 213 | |
| 214 | while (DecodeNextSection(offset_ptr: &offset)) |
| 215 | ; |
| 216 | return true; |
| 217 | } |
| 218 | |
| 219 | size_t ObjectFileWasm::GetModuleSpecifications( |
| 220 | const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, |
| 221 | offset_t file_offset, offset_t length, ModuleSpecList &specs) { |
| 222 | if (!ValidateModuleHeader(data_sp)) { |
| 223 | return 0; |
| 224 | } |
| 225 | |
| 226 | ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm" )); |
| 227 | specs.Append(spec); |
| 228 | return 1; |
| 229 | } |
| 230 | |
| 231 | ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp, |
| 232 | offset_t data_offset, const FileSpec *file, |
| 233 | offset_t offset, offset_t length) |
| 234 | : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), |
| 235 | m_arch("wasm32-unknown-unknown-wasm" ) { |
| 236 | m_data.SetAddressByteSize(4); |
| 237 | } |
| 238 | |
| 239 | ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, |
| 240 | lldb::WritableDataBufferSP , |
| 241 | const lldb::ProcessSP &process_sp, |
| 242 | lldb::addr_t ) |
| 243 | : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), |
| 244 | m_arch("wasm32-unknown-unknown-wasm" ) {} |
| 245 | |
| 246 | bool ObjectFileWasm::() { |
| 247 | // We already parsed the header during initialization. |
| 248 | return true; |
| 249 | } |
| 250 | |
| 251 | void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} |
| 252 | |
| 253 | static SectionType GetSectionTypeFromName(llvm::StringRef Name) { |
| 254 | if (Name.consume_front(Prefix: ".debug_" ) || Name.consume_front(Prefix: ".zdebug_" )) { |
| 255 | return ObjectFile::GetDWARFSectionTypeFromName(name: Name); |
| 256 | } |
| 257 | return eSectionTypeOther; |
| 258 | } |
| 259 | |
| 260 | void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { |
| 261 | if (m_sections_up) |
| 262 | return; |
| 263 | |
| 264 | m_sections_up = std::make_unique<SectionList>(); |
| 265 | |
| 266 | if (m_sect_infos.empty()) { |
| 267 | DecodeSections(); |
| 268 | } |
| 269 | |
| 270 | for (const section_info §_info : m_sect_infos) { |
| 271 | SectionType section_type = eSectionTypeOther; |
| 272 | ConstString section_name; |
| 273 | offset_t file_offset = sect_info.offset & 0xffffffff; |
| 274 | addr_t vm_addr = file_offset; |
| 275 | size_t vm_size = sect_info.size; |
| 276 | |
| 277 | if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { |
| 278 | section_type = eSectionTypeCode; |
| 279 | section_name = ConstString("code" ); |
| 280 | |
| 281 | // A code address in DWARF for WebAssembly is the offset of an |
| 282 | // instruction relative within the Code section of the WebAssembly file. |
| 283 | // For this reason Section::GetFileAddress() must return zero for the |
| 284 | // Code section. |
| 285 | vm_addr = 0; |
| 286 | } else { |
| 287 | section_type = GetSectionTypeFromName(Name: sect_info.name.GetStringRef()); |
| 288 | if (section_type == eSectionTypeOther) |
| 289 | continue; |
| 290 | section_name = sect_info.name; |
| 291 | if (!IsInMemory()) { |
| 292 | vm_size = 0; |
| 293 | vm_addr = 0; |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | SectionSP section_sp( |
| 298 | new Section(GetModule(), // Module to which this section belongs. |
| 299 | this, // ObjectFile to which this section belongs and |
| 300 | // should read section data from. |
| 301 | section_type, // Section ID. |
| 302 | section_name, // Section name. |
| 303 | section_type, // Section type. |
| 304 | vm_addr, // VM address. |
| 305 | vm_size, // VM size in bytes of this section. |
| 306 | file_offset, // Offset of this section in the file. |
| 307 | sect_info.size, // Size of the section as found in the file. |
| 308 | 0, // Alignment of the section |
| 309 | 0, // Flags for this section. |
| 310 | 1)); // Number of host bytes per target byte |
| 311 | m_sections_up->AddSection(section_sp); |
| 312 | unified_section_list.AddSection(section_sp); |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, |
| 317 | bool value_is_offset) { |
| 318 | /// In WebAssembly, linear memory is disjointed from code space. The VM can |
| 319 | /// load multiple instances of a module, which logically share the same code. |
| 320 | /// We represent a wasm32 code address with 64-bits, like: |
| 321 | /// 63 32 31 0 |
| 322 | /// +---------------+---------------+ |
| 323 | /// + module_id | offset | |
| 324 | /// +---------------+---------------+ |
| 325 | /// where the lower 32 bits represent a module offset (relative to the module |
| 326 | /// start not to the beginning of the code section) and the higher 32 bits |
| 327 | /// uniquely identify the module in the WebAssembly VM. |
| 328 | /// In other words, we assume that each WebAssembly module is loaded by the |
| 329 | /// engine at a 64-bit address that starts at the boundary of 4GB pages, like |
| 330 | /// 0x0000000400000000 for module_id == 4. |
| 331 | /// These 64-bit addresses will be used to request code ranges for a specific |
| 332 | /// module from the WebAssembly engine. |
| 333 | |
| 334 | assert(m_memory_addr == LLDB_INVALID_ADDRESS || |
| 335 | m_memory_addr == load_address); |
| 336 | |
| 337 | ModuleSP module_sp = GetModule(); |
| 338 | if (!module_sp) |
| 339 | return false; |
| 340 | |
| 341 | DecodeSections(); |
| 342 | |
| 343 | size_t num_loaded_sections = 0; |
| 344 | SectionList *section_list = GetSectionList(); |
| 345 | if (!section_list) |
| 346 | return false; |
| 347 | |
| 348 | const size_t num_sections = section_list->GetSize(); |
| 349 | for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { |
| 350 | SectionSP section_sp(section_list->GetSectionAtIndex(idx: sect_idx)); |
| 351 | if (target.SetSectionLoadAddress( |
| 352 | section: section_sp, load_addr: load_address | section_sp->GetFileOffset())) { |
| 353 | ++num_loaded_sections; |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | return num_loaded_sections > 0; |
| 358 | } |
| 359 | |
| 360 | DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) { |
| 361 | DataExtractor data; |
| 362 | if (m_file) { |
| 363 | if (offset < GetByteSize()) { |
| 364 | size = std::min(a: static_cast<uint64_t>(size), b: GetByteSize() - offset); |
| 365 | auto buffer_sp = MapFileData(file: m_file, Size: size, Offset: offset); |
| 366 | return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); |
| 367 | } |
| 368 | } else { |
| 369 | ProcessSP process_sp(m_process_wp.lock()); |
| 370 | if (process_sp) { |
| 371 | auto data_up = std::make_unique<DataBufferHeap>(args&: size, args: 0); |
| 372 | Status readmem_error; |
| 373 | size_t bytes_read = process_sp->ReadMemory( |
| 374 | vm_addr: offset, buf: data_up->GetBytes(), size: data_up->GetByteSize(), error&: readmem_error); |
| 375 | if (bytes_read > 0) { |
| 376 | DataBufferSP buffer_sp(data_up.release()); |
| 377 | data.SetData(data_sp: buffer_sp, offset: 0, length: buffer_sp->GetByteSize()); |
| 378 | } |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | data.SetByteOrder(GetByteOrder()); |
| 383 | return data; |
| 384 | } |
| 385 | |
| 386 | std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { |
| 387 | static ConstString g_sect_name_external_debug_info("external_debug_info" ); |
| 388 | |
| 389 | for (const section_info §_info : m_sect_infos) { |
| 390 | if (g_sect_name_external_debug_info == sect_info.name) { |
| 391 | const uint32_t kBufferSize = 1024; |
| 392 | DataExtractor = |
| 393 | ReadImageData(offset: sect_info.offset, size: kBufferSize); |
| 394 | llvm::DataExtractor data = section_header_data.GetAsLLVM(); |
| 395 | llvm::DataExtractor::Cursor c(0); |
| 396 | std::optional<ConstString> symbols_url = GetWasmString(data, c); |
| 397 | if (symbols_url) |
| 398 | return FileSpec(symbols_url->GetStringRef()); |
| 399 | } |
| 400 | } |
| 401 | return std::nullopt; |
| 402 | } |
| 403 | |
| 404 | void ObjectFileWasm::Dump(Stream *s) { |
| 405 | ModuleSP module_sp(GetModule()); |
| 406 | if (!module_sp) |
| 407 | return; |
| 408 | |
| 409 | std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); |
| 410 | |
| 411 | llvm::raw_ostream &ostream = s->AsRawOstream(); |
| 412 | ostream << static_cast<void *>(this) << ": " ; |
| 413 | s->Indent(); |
| 414 | ostream << "ObjectFileWasm, file = '" ; |
| 415 | m_file.Dump(s&: ostream); |
| 416 | ostream << "', arch = " ; |
| 417 | ostream << GetArchitecture().GetArchitectureName() << "\n" ; |
| 418 | |
| 419 | SectionList *sections = GetSectionList(); |
| 420 | if (sections) { |
| 421 | sections->Dump(s&: s->AsRawOstream(), indent: s->GetIndentLevel(), target: nullptr, show_header: true, |
| 422 | UINT32_MAX); |
| 423 | } |
| 424 | ostream << "\n" ; |
| 425 | DumpSectionHeaders(ostream); |
| 426 | ostream << "\n" ; |
| 427 | } |
| 428 | |
| 429 | void ObjectFileWasm::(llvm::raw_ostream &ostream, |
| 430 | const section_info_t &sh) { |
| 431 | ostream << llvm::left_justify(Str: sh.name.GetStringRef(), Width: 16) << " " |
| 432 | << llvm::format_hex(N: sh.offset, Width: 10) << " " |
| 433 | << llvm::format_hex(N: sh.size, Width: 10) << " " << llvm::format_hex(N: sh.id, Width: 6) |
| 434 | << "\n" ; |
| 435 | } |
| 436 | |
| 437 | void ObjectFileWasm::(llvm::raw_ostream &ostream) { |
| 438 | ostream << "Section Headers\n" ; |
| 439 | ostream << "IDX name addr size id\n" ; |
| 440 | ostream << "==== ---------------- ---------- ---------- ------\n" ; |
| 441 | |
| 442 | uint32_t idx = 0; |
| 443 | for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); |
| 444 | ++pos, ++idx) { |
| 445 | ostream << "[" << llvm::format_decimal(N: idx, Width: 2) << "] " ; |
| 446 | ObjectFileWasm::DumpSectionHeader(ostream, sh: *pos); |
| 447 | } |
| 448 | } |
| 449 | |