1//===-- ObjectFileWasm.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ObjectFileWasm.h"
10#include "lldb/Core/Module.h"
11#include "lldb/Core/ModuleSpec.h"
12#include "lldb/Core/PluginManager.h"
13#include "lldb/Core/Section.h"
14#include "lldb/Target/Process.h"
15#include "lldb/Target/SectionLoadList.h"
16#include "lldb/Target/Target.h"
17#include "lldb/Utility/DataBufferHeap.h"
18#include "lldb/Utility/LLDBLog.h"
19#include "lldb/Utility/Log.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/BinaryFormat/Magic.h"
24#include "llvm/BinaryFormat/Wasm.h"
25#include "llvm/Support/Endian.h"
26#include "llvm/Support/Format.h"
27#include <optional>
28
29using namespace lldb;
30using namespace lldb_private;
31using namespace lldb_private::wasm;
32
33LLDB_PLUGIN_DEFINE(ObjectFileWasm)
34
35static const uint32_t kWasmHeaderSize =
36 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
37
38/// Checks whether the data buffer starts with a valid Wasm module header.
39static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
40 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
41 return false;
42
43 if (llvm::identify_magic(magic: toStringRef(Input: data_sp->GetData())) !=
44 llvm::file_magic::wasm_object)
45 return false;
46
47 const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
48
49 uint32_t version = llvm::support::endian::read32le(P: Ptr);
50 return version == llvm::wasm::WasmVersion;
51}
52
53static std::optional<ConstString>
54GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
55 // A Wasm string is encoded as a vector of UTF-8 codes.
56 // Vectors are encoded with their u32 length followed by the element
57 // sequence.
58 uint64_t len = data.getULEB128(C&: c);
59 if (!c) {
60 consumeError(Err: c.takeError());
61 return std::nullopt;
62 }
63
64 if (len >= (uint64_t(1) << 32)) {
65 return std::nullopt;
66 }
67
68 llvm::SmallVector<uint8_t, 32> str_storage;
69 data.getU8(C&: c, Dst&: str_storage, Count: len);
70 if (!c) {
71 consumeError(Err: c.takeError());
72 return std::nullopt;
73 }
74
75 llvm::StringRef str = toStringRef(Input: llvm::ArrayRef(str_storage));
76 return ConstString(str);
77}
78
79char ObjectFileWasm::ID;
80
81void ObjectFileWasm::Initialize() {
82 PluginManager::RegisterPlugin(name: GetPluginNameStatic(),
83 description: GetPluginDescriptionStatic(), create_callback: CreateInstance,
84 create_memory_callback: CreateMemoryInstance, get_module_specifications: GetModuleSpecifications);
85}
86
87void ObjectFileWasm::Terminate() {
88 PluginManager::UnregisterPlugin(create_callback: CreateInstance);
89}
90
91ObjectFile *
92ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp,
93 offset_t data_offset, const FileSpec *file,
94 offset_t file_offset, offset_t length) {
95 Log *log = GetLog(mask: LLDBLog::Object);
96
97 if (!data_sp) {
98 data_sp = MapFileData(file: *file, Size: length, Offset: file_offset);
99 if (!data_sp) {
100 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
101 file->GetPath().c_str());
102 return nullptr;
103 }
104 data_offset = 0;
105 }
106
107 assert(data_sp);
108 if (!ValidateModuleHeader(data_sp)) {
109 LLDB_LOGF(log,
110 "Failed to create ObjectFileWasm instance: invalid Wasm header");
111 return nullptr;
112 }
113
114 // Update the data to contain the entire file if it doesn't contain it
115 // already.
116 if (data_sp->GetByteSize() < length) {
117 data_sp = MapFileData(file: *file, Size: length, Offset: file_offset);
118 if (!data_sp) {
119 LLDB_LOGF(log,
120 "Failed to create ObjectFileWasm instance: cannot read file %s",
121 file->GetPath().c_str());
122 return nullptr;
123 }
124 data_offset = 0;
125 }
126
127 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
128 module_sp, data_sp, data_offset, file, file_offset, length));
129 ArchSpec spec = objfile_up->GetArchitecture();
130 if (spec && objfile_up->SetModulesArchitecture(spec)) {
131 LLDB_LOGF(log,
132 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
133 static_cast<void *>(objfile_up.get()),
134 static_cast<void *>(objfile_up->GetModule().get()),
135 objfile_up->GetModule()->GetSpecificationDescription().c_str(),
136 file ? file->GetPath().c_str() : "<NULL>");
137 return objfile_up.release();
138 }
139
140 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
141 return nullptr;
142}
143
144ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
145 WritableDataBufferSP data_sp,
146 const ProcessSP &process_sp,
147 addr_t header_addr) {
148 if (!ValidateModuleHeader(data_sp))
149 return nullptr;
150
151 std::unique_ptr<ObjectFileWasm> objfile_up(
152 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
153 ArchSpec spec = objfile_up->GetArchitecture();
154 if (spec && objfile_up->SetModulesArchitecture(spec))
155 return objfile_up.release();
156 return nullptr;
157}
158
159bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
160 // Buffer sufficient to read a section header and find the pointer to the next
161 // section.
162 const uint32_t kBufferSize = 1024;
163 DataExtractor section_header_data = ReadImageData(offset: *offset_ptr, size: kBufferSize);
164
165 llvm::DataExtractor data = section_header_data.GetAsLLVM();
166 llvm::DataExtractor::Cursor c(0);
167
168 // Each section consists of:
169 // - a one-byte section id,
170 // - the u32 size of the contents, in bytes,
171 // - the actual contents.
172 uint8_t section_id = data.getU8(C&: c);
173 uint64_t payload_len = data.getULEB128(C&: c);
174 if (!c)
175 return !llvm::errorToBool(Err: c.takeError());
176
177 if (payload_len >= (uint64_t(1) << 32))
178 return false;
179
180 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
181 // Custom sections have the id 0. Their contents consist of a name
182 // identifying the custom section, followed by an uninterpreted sequence
183 // of bytes.
184 lldb::offset_t prev_offset = c.tell();
185 std::optional<ConstString> sect_name = GetWasmString(data, c);
186 if (!sect_name)
187 return false;
188
189 if (payload_len < c.tell() - prev_offset)
190 return false;
191
192 uint32_t section_length = payload_len - (c.tell() - prev_offset);
193 m_sect_infos.push_back(x: section_info{.offset: *offset_ptr + c.tell(), .size: section_length,
194 .id: section_id, .name: *sect_name});
195 *offset_ptr += (c.tell() + section_length);
196 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197 m_sect_infos.push_back(x: section_info{.offset: *offset_ptr + c.tell(),
198 .size: static_cast<uint32_t>(payload_len),
199 .id: section_id, .name: ConstString()});
200 *offset_ptr += (c.tell() + payload_len);
201 } else {
202 // Invalid section id.
203 return false;
204 }
205 return true;
206}
207
208bool ObjectFileWasm::DecodeSections() {
209 lldb::offset_t offset = kWasmHeaderSize;
210 if (IsInMemory()) {
211 offset += m_memory_addr;
212 }
213
214 while (DecodeNextSection(offset_ptr: &offset))
215 ;
216 return true;
217}
218
219size_t ObjectFileWasm::GetModuleSpecifications(
220 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
221 offset_t file_offset, offset_t length, ModuleSpecList &specs) {
222 if (!ValidateModuleHeader(data_sp)) {
223 return 0;
224 }
225
226 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
227 specs.Append(spec);
228 return 1;
229}
230
231ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp,
232 offset_t data_offset, const FileSpec *file,
233 offset_t offset, offset_t length)
234 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
235 m_arch("wasm32-unknown-unknown-wasm") {
236 m_data.SetAddressByteSize(4);
237}
238
239ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
240 lldb::WritableDataBufferSP header_data_sp,
241 const lldb::ProcessSP &process_sp,
242 lldb::addr_t header_addr)
243 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
244 m_arch("wasm32-unknown-unknown-wasm") {}
245
246bool ObjectFileWasm::ParseHeader() {
247 // We already parsed the header during initialization.
248 return true;
249}
250
251void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
252
253static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
254 if (Name.consume_front(Prefix: ".debug_") || Name.consume_front(Prefix: ".zdebug_")) {
255 return llvm::StringSwitch<SectionType>(Name)
256 .Case(S: "abbrev", Value: eSectionTypeDWARFDebugAbbrev)
257 .Case(S: "abbrev.dwo", Value: eSectionTypeDWARFDebugAbbrevDwo)
258 .Case(S: "addr", Value: eSectionTypeDWARFDebugAddr)
259 .Case(S: "aranges", Value: eSectionTypeDWARFDebugAranges)
260 .Case(S: "cu_index", Value: eSectionTypeDWARFDebugCuIndex)
261 .Case(S: "frame", Value: eSectionTypeDWARFDebugFrame)
262 .Case(S: "info", Value: eSectionTypeDWARFDebugInfo)
263 .Case(S: "info.dwo", Value: eSectionTypeDWARFDebugInfoDwo)
264 .Cases(S0: "line", S1: "line.dwo", Value: eSectionTypeDWARFDebugLine)
265 .Cases(S0: "line_str", S1: "line_str.dwo", Value: eSectionTypeDWARFDebugLineStr)
266 .Case(S: "loc", Value: eSectionTypeDWARFDebugLoc)
267 .Case(S: "loc.dwo", Value: eSectionTypeDWARFDebugLocDwo)
268 .Case(S: "loclists", Value: eSectionTypeDWARFDebugLocLists)
269 .Case(S: "loclists.dwo", Value: eSectionTypeDWARFDebugLocListsDwo)
270 .Case(S: "macinfo", Value: eSectionTypeDWARFDebugMacInfo)
271 .Cases(S0: "macro", S1: "macro.dwo", Value: eSectionTypeDWARFDebugMacro)
272 .Case(S: "names", Value: eSectionTypeDWARFDebugNames)
273 .Case(S: "pubnames", Value: eSectionTypeDWARFDebugPubNames)
274 .Case(S: "pubtypes", Value: eSectionTypeDWARFDebugPubTypes)
275 .Case(S: "ranges", Value: eSectionTypeDWARFDebugRanges)
276 .Case(S: "rnglists", Value: eSectionTypeDWARFDebugRngLists)
277 .Case(S: "rnglists.dwo", Value: eSectionTypeDWARFDebugRngListsDwo)
278 .Case(S: "str", Value: eSectionTypeDWARFDebugStr)
279 .Case(S: "str.dwo", Value: eSectionTypeDWARFDebugStrDwo)
280 .Case(S: "str_offsets", Value: eSectionTypeDWARFDebugStrOffsets)
281 .Case(S: "str_offsets.dwo", Value: eSectionTypeDWARFDebugStrOffsetsDwo)
282 .Case(S: "tu_index", Value: eSectionTypeDWARFDebugTuIndex)
283 .Case(S: "types", Value: eSectionTypeDWARFDebugTypes)
284 .Case(S: "types.dwo", Value: eSectionTypeDWARFDebugTypesDwo)
285 .Default(Value: eSectionTypeOther);
286 }
287 return eSectionTypeOther;
288}
289
290void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
291 if (m_sections_up)
292 return;
293
294 m_sections_up = std::make_unique<SectionList>();
295
296 if (m_sect_infos.empty()) {
297 DecodeSections();
298 }
299
300 for (const section_info &sect_info : m_sect_infos) {
301 SectionType section_type = eSectionTypeOther;
302 ConstString section_name;
303 offset_t file_offset = sect_info.offset & 0xffffffff;
304 addr_t vm_addr = file_offset;
305 size_t vm_size = sect_info.size;
306
307 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
308 section_type = eSectionTypeCode;
309 section_name = ConstString("code");
310
311 // A code address in DWARF for WebAssembly is the offset of an
312 // instruction relative within the Code section of the WebAssembly file.
313 // For this reason Section::GetFileAddress() must return zero for the
314 // Code section.
315 vm_addr = 0;
316 } else {
317 section_type = GetSectionTypeFromName(Name: sect_info.name.GetStringRef());
318 if (section_type == eSectionTypeOther)
319 continue;
320 section_name = sect_info.name;
321 if (!IsInMemory()) {
322 vm_size = 0;
323 vm_addr = 0;
324 }
325 }
326
327 SectionSP section_sp(
328 new Section(GetModule(), // Module to which this section belongs.
329 this, // ObjectFile to which this section belongs and
330 // should read section data from.
331 section_type, // Section ID.
332 section_name, // Section name.
333 section_type, // Section type.
334 vm_addr, // VM address.
335 vm_size, // VM size in bytes of this section.
336 file_offset, // Offset of this section in the file.
337 sect_info.size, // Size of the section as found in the file.
338 0, // Alignment of the section
339 0, // Flags for this section.
340 1)); // Number of host bytes per target byte
341 m_sections_up->AddSection(section_sp);
342 unified_section_list.AddSection(section_sp);
343 }
344}
345
346bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
347 bool value_is_offset) {
348 /// In WebAssembly, linear memory is disjointed from code space. The VM can
349 /// load multiple instances of a module, which logically share the same code.
350 /// We represent a wasm32 code address with 64-bits, like:
351 /// 63 32 31 0
352 /// +---------------+---------------+
353 /// + module_id | offset |
354 /// +---------------+---------------+
355 /// where the lower 32 bits represent a module offset (relative to the module
356 /// start not to the beginning of the code section) and the higher 32 bits
357 /// uniquely identify the module in the WebAssembly VM.
358 /// In other words, we assume that each WebAssembly module is loaded by the
359 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
360 /// 0x0000000400000000 for module_id == 4.
361 /// These 64-bit addresses will be used to request code ranges for a specific
362 /// module from the WebAssembly engine.
363
364 assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
365 m_memory_addr == load_address);
366
367 ModuleSP module_sp = GetModule();
368 if (!module_sp)
369 return false;
370
371 DecodeSections();
372
373 size_t num_loaded_sections = 0;
374 SectionList *section_list = GetSectionList();
375 if (!section_list)
376 return false;
377
378 const size_t num_sections = section_list->GetSize();
379 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
380 SectionSP section_sp(section_list->GetSectionAtIndex(idx: sect_idx));
381 if (target.SetSectionLoadAddress(
382 section: section_sp, load_addr: load_address | section_sp->GetFileOffset())) {
383 ++num_loaded_sections;
384 }
385 }
386
387 return num_loaded_sections > 0;
388}
389
390DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
391 DataExtractor data;
392 if (m_file) {
393 if (offset < GetByteSize()) {
394 size = std::min(a: static_cast<uint64_t>(size), b: GetByteSize() - offset);
395 auto buffer_sp = MapFileData(file: m_file, Size: size, Offset: offset);
396 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
397 }
398 } else {
399 ProcessSP process_sp(m_process_wp.lock());
400 if (process_sp) {
401 auto data_up = std::make_unique<DataBufferHeap>(args&: size, args: 0);
402 Status readmem_error;
403 size_t bytes_read = process_sp->ReadMemory(
404 vm_addr: offset, buf: data_up->GetBytes(), size: data_up->GetByteSize(), error&: readmem_error);
405 if (bytes_read > 0) {
406 DataBufferSP buffer_sp(data_up.release());
407 data.SetData(data_sp: buffer_sp, offset: 0, length: buffer_sp->GetByteSize());
408 }
409 }
410 }
411
412 data.SetByteOrder(GetByteOrder());
413 return data;
414}
415
416std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
417 static ConstString g_sect_name_external_debug_info("external_debug_info");
418
419 for (const section_info &sect_info : m_sect_infos) {
420 if (g_sect_name_external_debug_info == sect_info.name) {
421 const uint32_t kBufferSize = 1024;
422 DataExtractor section_header_data =
423 ReadImageData(offset: sect_info.offset, size: kBufferSize);
424 llvm::DataExtractor data = section_header_data.GetAsLLVM();
425 llvm::DataExtractor::Cursor c(0);
426 std::optional<ConstString> symbols_url = GetWasmString(data, c);
427 if (symbols_url)
428 return FileSpec(symbols_url->GetStringRef());
429 }
430 }
431 return std::nullopt;
432}
433
434void ObjectFileWasm::Dump(Stream *s) {
435 ModuleSP module_sp(GetModule());
436 if (!module_sp)
437 return;
438
439 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
440
441 llvm::raw_ostream &ostream = s->AsRawOstream();
442 ostream << static_cast<void *>(this) << ": ";
443 s->Indent();
444 ostream << "ObjectFileWasm, file = '";
445 m_file.Dump(s&: ostream);
446 ostream << "', arch = ";
447 ostream << GetArchitecture().GetArchitectureName() << "\n";
448
449 SectionList *sections = GetSectionList();
450 if (sections) {
451 sections->Dump(s&: s->AsRawOstream(), indent: s->GetIndentLevel(), target: nullptr, show_header: true,
452 UINT32_MAX);
453 }
454 ostream << "\n";
455 DumpSectionHeaders(ostream);
456 ostream << "\n";
457}
458
459void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
460 const section_info_t &sh) {
461 ostream << llvm::left_justify(Str: sh.name.GetStringRef(), Width: 16) << " "
462 << llvm::format_hex(N: sh.offset, Width: 10) << " "
463 << llvm::format_hex(N: sh.size, Width: 10) << " " << llvm::format_hex(N: sh.id, Width: 6)
464 << "\n";
465}
466
467void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
468 ostream << "Section Headers\n";
469 ostream << "IDX name addr size id\n";
470 ostream << "==== ---------------- ---------- ---------- ------\n";
471
472 uint32_t idx = 0;
473 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
474 ++pos, ++idx) {
475 ostream << "[" << llvm::format_decimal(N: idx, Width: 2) << "] ";
476 ObjectFileWasm::DumpSectionHeader(ostream, sh: *pos);
477 }
478}
479

source code of lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp