1 | //===-- ObjectFileWasm.cpp ------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ObjectFileWasm.h" |
10 | #include "lldb/Core/Module.h" |
11 | #include "lldb/Core/ModuleSpec.h" |
12 | #include "lldb/Core/PluginManager.h" |
13 | #include "lldb/Core/Section.h" |
14 | #include "lldb/Target/Process.h" |
15 | #include "lldb/Target/SectionLoadList.h" |
16 | #include "lldb/Target/Target.h" |
17 | #include "lldb/Utility/DataBufferHeap.h" |
18 | #include "lldb/Utility/LLDBLog.h" |
19 | #include "lldb/Utility/Log.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/BinaryFormat/Magic.h" |
24 | #include "llvm/BinaryFormat/Wasm.h" |
25 | #include "llvm/Support/Endian.h" |
26 | #include "llvm/Support/Format.h" |
27 | #include <optional> |
28 | |
29 | using namespace lldb; |
30 | using namespace lldb_private; |
31 | using namespace lldb_private::wasm; |
32 | |
33 | LLDB_PLUGIN_DEFINE(ObjectFileWasm) |
34 | |
35 | static const uint32_t = |
36 | sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); |
37 | |
38 | /// Checks whether the data buffer starts with a valid Wasm module header. |
39 | static bool (const DataBufferSP &data_sp) { |
40 | if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) |
41 | return false; |
42 | |
43 | if (llvm::identify_magic(magic: toStringRef(Input: data_sp->GetData())) != |
44 | llvm::file_magic::wasm_object) |
45 | return false; |
46 | |
47 | const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); |
48 | |
49 | uint32_t version = llvm::support::endian::read32le(P: Ptr); |
50 | return version == llvm::wasm::WasmVersion; |
51 | } |
52 | |
53 | static std::optional<ConstString> |
54 | (llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { |
55 | // A Wasm string is encoded as a vector of UTF-8 codes. |
56 | // Vectors are encoded with their u32 length followed by the element |
57 | // sequence. |
58 | uint64_t len = data.getULEB128(C&: c); |
59 | if (!c) { |
60 | consumeError(Err: c.takeError()); |
61 | return std::nullopt; |
62 | } |
63 | |
64 | if (len >= (uint64_t(1) << 32)) { |
65 | return std::nullopt; |
66 | } |
67 | |
68 | llvm::SmallVector<uint8_t, 32> str_storage; |
69 | data.getU8(C&: c, Dst&: str_storage, Count: len); |
70 | if (!c) { |
71 | consumeError(Err: c.takeError()); |
72 | return std::nullopt; |
73 | } |
74 | |
75 | llvm::StringRef str = toStringRef(Input: llvm::ArrayRef(str_storage)); |
76 | return ConstString(str); |
77 | } |
78 | |
79 | char ObjectFileWasm::ID; |
80 | |
81 | void ObjectFileWasm::Initialize() { |
82 | PluginManager::RegisterPlugin(name: GetPluginNameStatic(), |
83 | description: GetPluginDescriptionStatic(), create_callback: CreateInstance, |
84 | create_memory_callback: CreateMemoryInstance, get_module_specifications: GetModuleSpecifications); |
85 | } |
86 | |
87 | void ObjectFileWasm::Terminate() { |
88 | PluginManager::UnregisterPlugin(create_callback: CreateInstance); |
89 | } |
90 | |
91 | ObjectFile * |
92 | ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp, |
93 | offset_t data_offset, const FileSpec *file, |
94 | offset_t file_offset, offset_t length) { |
95 | Log *log = GetLog(mask: LLDBLog::Object); |
96 | |
97 | if (!data_sp) { |
98 | data_sp = MapFileData(file: *file, Size: length, Offset: file_offset); |
99 | if (!data_sp) { |
100 | LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s" , |
101 | file->GetPath().c_str()); |
102 | return nullptr; |
103 | } |
104 | data_offset = 0; |
105 | } |
106 | |
107 | assert(data_sp); |
108 | if (!ValidateModuleHeader(data_sp)) { |
109 | LLDB_LOGF(log, |
110 | "Failed to create ObjectFileWasm instance: invalid Wasm header" ); |
111 | return nullptr; |
112 | } |
113 | |
114 | // Update the data to contain the entire file if it doesn't contain it |
115 | // already. |
116 | if (data_sp->GetByteSize() < length) { |
117 | data_sp = MapFileData(file: *file, Size: length, Offset: file_offset); |
118 | if (!data_sp) { |
119 | LLDB_LOGF(log, |
120 | "Failed to create ObjectFileWasm instance: cannot read file %s" , |
121 | file->GetPath().c_str()); |
122 | return nullptr; |
123 | } |
124 | data_offset = 0; |
125 | } |
126 | |
127 | std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( |
128 | module_sp, data_sp, data_offset, file, file_offset, length)); |
129 | ArchSpec spec = objfile_up->GetArchitecture(); |
130 | if (spec && objfile_up->SetModulesArchitecture(spec)) { |
131 | LLDB_LOGF(log, |
132 | "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s" , |
133 | static_cast<void *>(objfile_up.get()), |
134 | static_cast<void *>(objfile_up->GetModule().get()), |
135 | objfile_up->GetModule()->GetSpecificationDescription().c_str(), |
136 | file ? file->GetPath().c_str() : "<NULL>" ); |
137 | return objfile_up.release(); |
138 | } |
139 | |
140 | LLDB_LOGF(log, "Failed to create ObjectFileWasm instance" ); |
141 | return nullptr; |
142 | } |
143 | |
144 | ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, |
145 | WritableDataBufferSP data_sp, |
146 | const ProcessSP &process_sp, |
147 | addr_t ) { |
148 | if (!ValidateModuleHeader(data_sp)) |
149 | return nullptr; |
150 | |
151 | std::unique_ptr<ObjectFileWasm> objfile_up( |
152 | new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); |
153 | ArchSpec spec = objfile_up->GetArchitecture(); |
154 | if (spec && objfile_up->SetModulesArchitecture(spec)) |
155 | return objfile_up.release(); |
156 | return nullptr; |
157 | } |
158 | |
159 | bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { |
160 | // Buffer sufficient to read a section header and find the pointer to the next |
161 | // section. |
162 | const uint32_t kBufferSize = 1024; |
163 | DataExtractor = ReadImageData(offset: *offset_ptr, size: kBufferSize); |
164 | |
165 | llvm::DataExtractor data = section_header_data.GetAsLLVM(); |
166 | llvm::DataExtractor::Cursor c(0); |
167 | |
168 | // Each section consists of: |
169 | // - a one-byte section id, |
170 | // - the u32 size of the contents, in bytes, |
171 | // - the actual contents. |
172 | uint8_t section_id = data.getU8(C&: c); |
173 | uint64_t payload_len = data.getULEB128(C&: c); |
174 | if (!c) |
175 | return !llvm::errorToBool(Err: c.takeError()); |
176 | |
177 | if (payload_len >= (uint64_t(1) << 32)) |
178 | return false; |
179 | |
180 | if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { |
181 | // Custom sections have the id 0. Their contents consist of a name |
182 | // identifying the custom section, followed by an uninterpreted sequence |
183 | // of bytes. |
184 | lldb::offset_t prev_offset = c.tell(); |
185 | std::optional<ConstString> sect_name = GetWasmString(data, c); |
186 | if (!sect_name) |
187 | return false; |
188 | |
189 | if (payload_len < c.tell() - prev_offset) |
190 | return false; |
191 | |
192 | uint32_t section_length = payload_len - (c.tell() - prev_offset); |
193 | m_sect_infos.push_back(x: section_info{.offset: *offset_ptr + c.tell(), .size: section_length, |
194 | .id: section_id, .name: *sect_name}); |
195 | *offset_ptr += (c.tell() + section_length); |
196 | } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) { |
197 | m_sect_infos.push_back(x: section_info{.offset: *offset_ptr + c.tell(), |
198 | .size: static_cast<uint32_t>(payload_len), |
199 | .id: section_id, .name: ConstString()}); |
200 | *offset_ptr += (c.tell() + payload_len); |
201 | } else { |
202 | // Invalid section id. |
203 | return false; |
204 | } |
205 | return true; |
206 | } |
207 | |
208 | bool ObjectFileWasm::DecodeSections() { |
209 | lldb::offset_t offset = kWasmHeaderSize; |
210 | if (IsInMemory()) { |
211 | offset += m_memory_addr; |
212 | } |
213 | |
214 | while (DecodeNextSection(offset_ptr: &offset)) |
215 | ; |
216 | return true; |
217 | } |
218 | |
219 | size_t ObjectFileWasm::GetModuleSpecifications( |
220 | const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, |
221 | offset_t file_offset, offset_t length, ModuleSpecList &specs) { |
222 | if (!ValidateModuleHeader(data_sp)) { |
223 | return 0; |
224 | } |
225 | |
226 | ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm" )); |
227 | specs.Append(spec); |
228 | return 1; |
229 | } |
230 | |
231 | ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp, |
232 | offset_t data_offset, const FileSpec *file, |
233 | offset_t offset, offset_t length) |
234 | : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), |
235 | m_arch("wasm32-unknown-unknown-wasm" ) { |
236 | m_data.SetAddressByteSize(4); |
237 | } |
238 | |
239 | ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, |
240 | lldb::WritableDataBufferSP , |
241 | const lldb::ProcessSP &process_sp, |
242 | lldb::addr_t ) |
243 | : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), |
244 | m_arch("wasm32-unknown-unknown-wasm" ) {} |
245 | |
246 | bool ObjectFileWasm::() { |
247 | // We already parsed the header during initialization. |
248 | return true; |
249 | } |
250 | |
251 | void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} |
252 | |
253 | static SectionType GetSectionTypeFromName(llvm::StringRef Name) { |
254 | if (Name.consume_front(Prefix: ".debug_" ) || Name.consume_front(Prefix: ".zdebug_" )) { |
255 | return llvm::StringSwitch<SectionType>(Name) |
256 | .Case(S: "abbrev" , Value: eSectionTypeDWARFDebugAbbrev) |
257 | .Case(S: "abbrev.dwo" , Value: eSectionTypeDWARFDebugAbbrevDwo) |
258 | .Case(S: "addr" , Value: eSectionTypeDWARFDebugAddr) |
259 | .Case(S: "aranges" , Value: eSectionTypeDWARFDebugAranges) |
260 | .Case(S: "cu_index" , Value: eSectionTypeDWARFDebugCuIndex) |
261 | .Case(S: "frame" , Value: eSectionTypeDWARFDebugFrame) |
262 | .Case(S: "info" , Value: eSectionTypeDWARFDebugInfo) |
263 | .Case(S: "info.dwo" , Value: eSectionTypeDWARFDebugInfoDwo) |
264 | .Cases(S0: "line" , S1: "line.dwo" , Value: eSectionTypeDWARFDebugLine) |
265 | .Cases(S0: "line_str" , S1: "line_str.dwo" , Value: eSectionTypeDWARFDebugLineStr) |
266 | .Case(S: "loc" , Value: eSectionTypeDWARFDebugLoc) |
267 | .Case(S: "loc.dwo" , Value: eSectionTypeDWARFDebugLocDwo) |
268 | .Case(S: "loclists" , Value: eSectionTypeDWARFDebugLocLists) |
269 | .Case(S: "loclists.dwo" , Value: eSectionTypeDWARFDebugLocListsDwo) |
270 | .Case(S: "macinfo" , Value: eSectionTypeDWARFDebugMacInfo) |
271 | .Cases(S0: "macro" , S1: "macro.dwo" , Value: eSectionTypeDWARFDebugMacro) |
272 | .Case(S: "names" , Value: eSectionTypeDWARFDebugNames) |
273 | .Case(S: "pubnames" , Value: eSectionTypeDWARFDebugPubNames) |
274 | .Case(S: "pubtypes" , Value: eSectionTypeDWARFDebugPubTypes) |
275 | .Case(S: "ranges" , Value: eSectionTypeDWARFDebugRanges) |
276 | .Case(S: "rnglists" , Value: eSectionTypeDWARFDebugRngLists) |
277 | .Case(S: "rnglists.dwo" , Value: eSectionTypeDWARFDebugRngListsDwo) |
278 | .Case(S: "str" , Value: eSectionTypeDWARFDebugStr) |
279 | .Case(S: "str.dwo" , Value: eSectionTypeDWARFDebugStrDwo) |
280 | .Case(S: "str_offsets" , Value: eSectionTypeDWARFDebugStrOffsets) |
281 | .Case(S: "str_offsets.dwo" , Value: eSectionTypeDWARFDebugStrOffsetsDwo) |
282 | .Case(S: "tu_index" , Value: eSectionTypeDWARFDebugTuIndex) |
283 | .Case(S: "types" , Value: eSectionTypeDWARFDebugTypes) |
284 | .Case(S: "types.dwo" , Value: eSectionTypeDWARFDebugTypesDwo) |
285 | .Default(Value: eSectionTypeOther); |
286 | } |
287 | return eSectionTypeOther; |
288 | } |
289 | |
290 | void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { |
291 | if (m_sections_up) |
292 | return; |
293 | |
294 | m_sections_up = std::make_unique<SectionList>(); |
295 | |
296 | if (m_sect_infos.empty()) { |
297 | DecodeSections(); |
298 | } |
299 | |
300 | for (const section_info §_info : m_sect_infos) { |
301 | SectionType section_type = eSectionTypeOther; |
302 | ConstString section_name; |
303 | offset_t file_offset = sect_info.offset & 0xffffffff; |
304 | addr_t vm_addr = file_offset; |
305 | size_t vm_size = sect_info.size; |
306 | |
307 | if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { |
308 | section_type = eSectionTypeCode; |
309 | section_name = ConstString("code" ); |
310 | |
311 | // A code address in DWARF for WebAssembly is the offset of an |
312 | // instruction relative within the Code section of the WebAssembly file. |
313 | // For this reason Section::GetFileAddress() must return zero for the |
314 | // Code section. |
315 | vm_addr = 0; |
316 | } else { |
317 | section_type = GetSectionTypeFromName(Name: sect_info.name.GetStringRef()); |
318 | if (section_type == eSectionTypeOther) |
319 | continue; |
320 | section_name = sect_info.name; |
321 | if (!IsInMemory()) { |
322 | vm_size = 0; |
323 | vm_addr = 0; |
324 | } |
325 | } |
326 | |
327 | SectionSP section_sp( |
328 | new Section(GetModule(), // Module to which this section belongs. |
329 | this, // ObjectFile to which this section belongs and |
330 | // should read section data from. |
331 | section_type, // Section ID. |
332 | section_name, // Section name. |
333 | section_type, // Section type. |
334 | vm_addr, // VM address. |
335 | vm_size, // VM size in bytes of this section. |
336 | file_offset, // Offset of this section in the file. |
337 | sect_info.size, // Size of the section as found in the file. |
338 | 0, // Alignment of the section |
339 | 0, // Flags for this section. |
340 | 1)); // Number of host bytes per target byte |
341 | m_sections_up->AddSection(section_sp); |
342 | unified_section_list.AddSection(section_sp); |
343 | } |
344 | } |
345 | |
346 | bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, |
347 | bool value_is_offset) { |
348 | /// In WebAssembly, linear memory is disjointed from code space. The VM can |
349 | /// load multiple instances of a module, which logically share the same code. |
350 | /// We represent a wasm32 code address with 64-bits, like: |
351 | /// 63 32 31 0 |
352 | /// +---------------+---------------+ |
353 | /// + module_id | offset | |
354 | /// +---------------+---------------+ |
355 | /// where the lower 32 bits represent a module offset (relative to the module |
356 | /// start not to the beginning of the code section) and the higher 32 bits |
357 | /// uniquely identify the module in the WebAssembly VM. |
358 | /// In other words, we assume that each WebAssembly module is loaded by the |
359 | /// engine at a 64-bit address that starts at the boundary of 4GB pages, like |
360 | /// 0x0000000400000000 for module_id == 4. |
361 | /// These 64-bit addresses will be used to request code ranges for a specific |
362 | /// module from the WebAssembly engine. |
363 | |
364 | assert(m_memory_addr == LLDB_INVALID_ADDRESS || |
365 | m_memory_addr == load_address); |
366 | |
367 | ModuleSP module_sp = GetModule(); |
368 | if (!module_sp) |
369 | return false; |
370 | |
371 | DecodeSections(); |
372 | |
373 | size_t num_loaded_sections = 0; |
374 | SectionList *section_list = GetSectionList(); |
375 | if (!section_list) |
376 | return false; |
377 | |
378 | const size_t num_sections = section_list->GetSize(); |
379 | for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { |
380 | SectionSP section_sp(section_list->GetSectionAtIndex(idx: sect_idx)); |
381 | if (target.SetSectionLoadAddress( |
382 | section: section_sp, load_addr: load_address | section_sp->GetFileOffset())) { |
383 | ++num_loaded_sections; |
384 | } |
385 | } |
386 | |
387 | return num_loaded_sections > 0; |
388 | } |
389 | |
390 | DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) { |
391 | DataExtractor data; |
392 | if (m_file) { |
393 | if (offset < GetByteSize()) { |
394 | size = std::min(a: static_cast<uint64_t>(size), b: GetByteSize() - offset); |
395 | auto buffer_sp = MapFileData(file: m_file, Size: size, Offset: offset); |
396 | return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); |
397 | } |
398 | } else { |
399 | ProcessSP process_sp(m_process_wp.lock()); |
400 | if (process_sp) { |
401 | auto data_up = std::make_unique<DataBufferHeap>(args&: size, args: 0); |
402 | Status readmem_error; |
403 | size_t bytes_read = process_sp->ReadMemory( |
404 | vm_addr: offset, buf: data_up->GetBytes(), size: data_up->GetByteSize(), error&: readmem_error); |
405 | if (bytes_read > 0) { |
406 | DataBufferSP buffer_sp(data_up.release()); |
407 | data.SetData(data_sp: buffer_sp, offset: 0, length: buffer_sp->GetByteSize()); |
408 | } |
409 | } |
410 | } |
411 | |
412 | data.SetByteOrder(GetByteOrder()); |
413 | return data; |
414 | } |
415 | |
416 | std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { |
417 | static ConstString g_sect_name_external_debug_info("external_debug_info" ); |
418 | |
419 | for (const section_info §_info : m_sect_infos) { |
420 | if (g_sect_name_external_debug_info == sect_info.name) { |
421 | const uint32_t kBufferSize = 1024; |
422 | DataExtractor = |
423 | ReadImageData(offset: sect_info.offset, size: kBufferSize); |
424 | llvm::DataExtractor data = section_header_data.GetAsLLVM(); |
425 | llvm::DataExtractor::Cursor c(0); |
426 | std::optional<ConstString> symbols_url = GetWasmString(data, c); |
427 | if (symbols_url) |
428 | return FileSpec(symbols_url->GetStringRef()); |
429 | } |
430 | } |
431 | return std::nullopt; |
432 | } |
433 | |
434 | void ObjectFileWasm::Dump(Stream *s) { |
435 | ModuleSP module_sp(GetModule()); |
436 | if (!module_sp) |
437 | return; |
438 | |
439 | std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); |
440 | |
441 | llvm::raw_ostream &ostream = s->AsRawOstream(); |
442 | ostream << static_cast<void *>(this) << ": " ; |
443 | s->Indent(); |
444 | ostream << "ObjectFileWasm, file = '" ; |
445 | m_file.Dump(s&: ostream); |
446 | ostream << "', arch = " ; |
447 | ostream << GetArchitecture().GetArchitectureName() << "\n" ; |
448 | |
449 | SectionList *sections = GetSectionList(); |
450 | if (sections) { |
451 | sections->Dump(s&: s->AsRawOstream(), indent: s->GetIndentLevel(), target: nullptr, show_header: true, |
452 | UINT32_MAX); |
453 | } |
454 | ostream << "\n" ; |
455 | DumpSectionHeaders(ostream); |
456 | ostream << "\n" ; |
457 | } |
458 | |
459 | void ObjectFileWasm::(llvm::raw_ostream &ostream, |
460 | const section_info_t &sh) { |
461 | ostream << llvm::left_justify(Str: sh.name.GetStringRef(), Width: 16) << " " |
462 | << llvm::format_hex(N: sh.offset, Width: 10) << " " |
463 | << llvm::format_hex(N: sh.size, Width: 10) << " " << llvm::format_hex(N: sh.id, Width: 6) |
464 | << "\n" ; |
465 | } |
466 | |
467 | void ObjectFileWasm::(llvm::raw_ostream &ostream) { |
468 | ostream << "Section Headers\n" ; |
469 | ostream << "IDX name addr size id\n" ; |
470 | ostream << "==== ---------------- ---------- ---------- ------\n" ; |
471 | |
472 | uint32_t idx = 0; |
473 | for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); |
474 | ++pos, ++idx) { |
475 | ostream << "[" << llvm::format_decimal(N: idx, Width: 2) << "] " ; |
476 | ObjectFileWasm::DumpSectionHeader(ostream, sh: *pos); |
477 | } |
478 | } |
479 | |