1 | //===-- ObjectFileWasm.cpp ------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ObjectFileWasm.h" |
10 | #include "lldb/Core/Module.h" |
11 | #include "lldb/Core/ModuleSpec.h" |
12 | #include "lldb/Core/PluginManager.h" |
13 | #include "lldb/Core/Section.h" |
14 | #include "lldb/Target/Process.h" |
15 | #include "lldb/Target/SectionLoadList.h" |
16 | #include "lldb/Target/Target.h" |
17 | #include "lldb/Utility/DataBufferHeap.h" |
18 | #include "lldb/Utility/LLDBLog.h" |
19 | #include "lldb/Utility/Log.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/BinaryFormat/Magic.h" |
24 | #include "llvm/BinaryFormat/Wasm.h" |
25 | #include "llvm/Support/Endian.h" |
26 | #include "llvm/Support/Format.h" |
27 | #include <optional> |
28 | |
29 | using namespace lldb; |
30 | using namespace lldb_private; |
31 | using namespace lldb_private::wasm; |
32 | |
33 | LLDB_PLUGIN_DEFINE(ObjectFileWasm) |
34 | |
35 | static const uint32_t = |
36 | sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); |
37 | |
38 | /// Checks whether the data buffer starts with a valid Wasm module header. |
39 | static bool (const DataBufferSP &data_sp) { |
40 | if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) |
41 | return false; |
42 | |
43 | if (llvm::identify_magic(magic: toStringRef(Input: data_sp->GetData())) != |
44 | llvm::file_magic::wasm_object) |
45 | return false; |
46 | |
47 | const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); |
48 | |
49 | uint32_t version = llvm::support::endian::read32le(P: Ptr); |
50 | return version == llvm::wasm::WasmVersion; |
51 | } |
52 | |
53 | static std::optional<ConstString> |
54 | (llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { |
55 | // A Wasm string is encoded as a vector of UTF-8 codes. |
56 | // Vectors are encoded with their u32 length followed by the element |
57 | // sequence. |
58 | uint64_t len = data.getULEB128(C&: c); |
59 | if (!c) { |
60 | consumeError(Err: c.takeError()); |
61 | return std::nullopt; |
62 | } |
63 | |
64 | if (len >= (uint64_t(1) << 32)) { |
65 | return std::nullopt; |
66 | } |
67 | |
68 | llvm::SmallVector<uint8_t, 32> str_storage; |
69 | data.getU8(C&: c, Dst&: str_storage, Count: len); |
70 | if (!c) { |
71 | consumeError(Err: c.takeError()); |
72 | return std::nullopt; |
73 | } |
74 | |
75 | llvm::StringRef str = toStringRef(Input: llvm::ArrayRef(str_storage)); |
76 | return ConstString(str); |
77 | } |
78 | |
79 | char ObjectFileWasm::ID; |
80 | |
81 | void ObjectFileWasm::Initialize() { |
82 | PluginManager::RegisterPlugin(name: GetPluginNameStatic(), |
83 | description: GetPluginDescriptionStatic(), create_callback: CreateInstance, |
84 | create_memory_callback: CreateMemoryInstance, get_module_specifications: GetModuleSpecifications); |
85 | } |
86 | |
87 | void ObjectFileWasm::Terminate() { |
88 | PluginManager::UnregisterPlugin(create_callback: CreateInstance); |
89 | } |
90 | |
91 | ObjectFile * |
92 | ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp, |
93 | offset_t data_offset, const FileSpec *file, |
94 | offset_t file_offset, offset_t length) { |
95 | Log *log = GetLog(mask: LLDBLog::Object); |
96 | |
97 | if (!data_sp) { |
98 | data_sp = MapFileData(file: *file, Size: length, Offset: file_offset); |
99 | if (!data_sp) { |
100 | LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s" , |
101 | file->GetPath().c_str()); |
102 | return nullptr; |
103 | } |
104 | data_offset = 0; |
105 | } |
106 | |
107 | assert(data_sp); |
108 | if (!ValidateModuleHeader(data_sp)) { |
109 | LLDB_LOGF(log, |
110 | "Failed to create ObjectFileWasm instance: invalid Wasm header" ); |
111 | return nullptr; |
112 | } |
113 | |
114 | // Update the data to contain the entire file if it doesn't contain it |
115 | // already. |
116 | if (data_sp->GetByteSize() < length) { |
117 | data_sp = MapFileData(file: *file, Size: length, Offset: file_offset); |
118 | if (!data_sp) { |
119 | LLDB_LOGF(log, |
120 | "Failed to create ObjectFileWasm instance: cannot read file %s" , |
121 | file->GetPath().c_str()); |
122 | return nullptr; |
123 | } |
124 | data_offset = 0; |
125 | } |
126 | |
127 | std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( |
128 | module_sp, data_sp, data_offset, file, file_offset, length)); |
129 | ArchSpec spec = objfile_up->GetArchitecture(); |
130 | if (spec && objfile_up->SetModulesArchitecture(spec)) { |
131 | LLDB_LOGF(log, |
132 | "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s" , |
133 | static_cast<void *>(objfile_up.get()), |
134 | static_cast<void *>(objfile_up->GetModule().get()), |
135 | objfile_up->GetModule()->GetSpecificationDescription().c_str(), |
136 | file ? file->GetPath().c_str() : "<NULL>" ); |
137 | return objfile_up.release(); |
138 | } |
139 | |
140 | LLDB_LOGF(log, "Failed to create ObjectFileWasm instance" ); |
141 | return nullptr; |
142 | } |
143 | |
144 | ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, |
145 | WritableDataBufferSP data_sp, |
146 | const ProcessSP &process_sp, |
147 | addr_t ) { |
148 | if (!ValidateModuleHeader(data_sp)) |
149 | return nullptr; |
150 | |
151 | std::unique_ptr<ObjectFileWasm> objfile_up( |
152 | new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); |
153 | ArchSpec spec = objfile_up->GetArchitecture(); |
154 | if (spec && objfile_up->SetModulesArchitecture(spec)) |
155 | return objfile_up.release(); |
156 | return nullptr; |
157 | } |
158 | |
159 | bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { |
160 | // Buffer sufficient to read a section header and find the pointer to the next |
161 | // section. |
162 | const uint32_t kBufferSize = 1024; |
163 | DataExtractor = ReadImageData(offset: *offset_ptr, size: kBufferSize); |
164 | |
165 | llvm::DataExtractor data = section_header_data.GetAsLLVM(); |
166 | llvm::DataExtractor::Cursor c(0); |
167 | |
168 | // Each section consists of: |
169 | // - a one-byte section id, |
170 | // - the u32 size of the contents, in bytes, |
171 | // - the actual contents. |
172 | uint8_t section_id = data.getU8(C&: c); |
173 | uint64_t payload_len = data.getULEB128(C&: c); |
174 | if (!c) |
175 | return !llvm::errorToBool(Err: c.takeError()); |
176 | |
177 | if (payload_len >= (uint64_t(1) << 32)) |
178 | return false; |
179 | |
180 | if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { |
181 | // Custom sections have the id 0. Their contents consist of a name |
182 | // identifying the custom section, followed by an uninterpreted sequence |
183 | // of bytes. |
184 | lldb::offset_t prev_offset = c.tell(); |
185 | std::optional<ConstString> sect_name = GetWasmString(data, c); |
186 | if (!sect_name) |
187 | return false; |
188 | |
189 | if (payload_len < c.tell() - prev_offset) |
190 | return false; |
191 | |
192 | uint32_t section_length = payload_len - (c.tell() - prev_offset); |
193 | m_sect_infos.push_back(x: section_info{.offset: *offset_ptr + c.tell(), .size: section_length, |
194 | .id: section_id, .name: *sect_name}); |
195 | *offset_ptr += (c.tell() + section_length); |
196 | } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) { |
197 | m_sect_infos.push_back(x: section_info{.offset: *offset_ptr + c.tell(), |
198 | .size: static_cast<uint32_t>(payload_len), |
199 | .id: section_id, .name: ConstString()}); |
200 | *offset_ptr += (c.tell() + payload_len); |
201 | } else { |
202 | // Invalid section id. |
203 | return false; |
204 | } |
205 | return true; |
206 | } |
207 | |
208 | bool ObjectFileWasm::DecodeSections() { |
209 | lldb::offset_t offset = kWasmHeaderSize; |
210 | if (IsInMemory()) { |
211 | offset += m_memory_addr; |
212 | } |
213 | |
214 | while (DecodeNextSection(offset_ptr: &offset)) |
215 | ; |
216 | return true; |
217 | } |
218 | |
219 | size_t ObjectFileWasm::GetModuleSpecifications( |
220 | const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, |
221 | offset_t file_offset, offset_t length, ModuleSpecList &specs) { |
222 | if (!ValidateModuleHeader(data_sp)) { |
223 | return 0; |
224 | } |
225 | |
226 | ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm" )); |
227 | specs.Append(spec); |
228 | return 1; |
229 | } |
230 | |
231 | ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp, |
232 | offset_t data_offset, const FileSpec *file, |
233 | offset_t offset, offset_t length) |
234 | : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), |
235 | m_arch("wasm32-unknown-unknown-wasm" ) { |
236 | m_data.SetAddressByteSize(4); |
237 | } |
238 | |
239 | ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, |
240 | lldb::WritableDataBufferSP , |
241 | const lldb::ProcessSP &process_sp, |
242 | lldb::addr_t ) |
243 | : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), |
244 | m_arch("wasm32-unknown-unknown-wasm" ) {} |
245 | |
246 | bool ObjectFileWasm::() { |
247 | // We already parsed the header during initialization. |
248 | return true; |
249 | } |
250 | |
251 | void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} |
252 | |
253 | static SectionType GetSectionTypeFromName(llvm::StringRef Name) { |
254 | if (Name.consume_front(Prefix: ".debug_" ) || Name.consume_front(Prefix: ".zdebug_" )) { |
255 | return ObjectFile::GetDWARFSectionTypeFromName(name: Name); |
256 | } |
257 | return eSectionTypeOther; |
258 | } |
259 | |
260 | void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { |
261 | if (m_sections_up) |
262 | return; |
263 | |
264 | m_sections_up = std::make_unique<SectionList>(); |
265 | |
266 | if (m_sect_infos.empty()) { |
267 | DecodeSections(); |
268 | } |
269 | |
270 | for (const section_info §_info : m_sect_infos) { |
271 | SectionType section_type = eSectionTypeOther; |
272 | ConstString section_name; |
273 | offset_t file_offset = sect_info.offset & 0xffffffff; |
274 | addr_t vm_addr = file_offset; |
275 | size_t vm_size = sect_info.size; |
276 | |
277 | if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { |
278 | section_type = eSectionTypeCode; |
279 | section_name = ConstString("code" ); |
280 | |
281 | // A code address in DWARF for WebAssembly is the offset of an |
282 | // instruction relative within the Code section of the WebAssembly file. |
283 | // For this reason Section::GetFileAddress() must return zero for the |
284 | // Code section. |
285 | vm_addr = 0; |
286 | } else { |
287 | section_type = GetSectionTypeFromName(Name: sect_info.name.GetStringRef()); |
288 | if (section_type == eSectionTypeOther) |
289 | continue; |
290 | section_name = sect_info.name; |
291 | if (!IsInMemory()) { |
292 | vm_size = 0; |
293 | vm_addr = 0; |
294 | } |
295 | } |
296 | |
297 | SectionSP section_sp( |
298 | new Section(GetModule(), // Module to which this section belongs. |
299 | this, // ObjectFile to which this section belongs and |
300 | // should read section data from. |
301 | section_type, // Section ID. |
302 | section_name, // Section name. |
303 | section_type, // Section type. |
304 | vm_addr, // VM address. |
305 | vm_size, // VM size in bytes of this section. |
306 | file_offset, // Offset of this section in the file. |
307 | sect_info.size, // Size of the section as found in the file. |
308 | 0, // Alignment of the section |
309 | 0, // Flags for this section. |
310 | 1)); // Number of host bytes per target byte |
311 | m_sections_up->AddSection(section_sp); |
312 | unified_section_list.AddSection(section_sp); |
313 | } |
314 | } |
315 | |
316 | bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, |
317 | bool value_is_offset) { |
318 | /// In WebAssembly, linear memory is disjointed from code space. The VM can |
319 | /// load multiple instances of a module, which logically share the same code. |
320 | /// We represent a wasm32 code address with 64-bits, like: |
321 | /// 63 32 31 0 |
322 | /// +---------------+---------------+ |
323 | /// + module_id | offset | |
324 | /// +---------------+---------------+ |
325 | /// where the lower 32 bits represent a module offset (relative to the module |
326 | /// start not to the beginning of the code section) and the higher 32 bits |
327 | /// uniquely identify the module in the WebAssembly VM. |
328 | /// In other words, we assume that each WebAssembly module is loaded by the |
329 | /// engine at a 64-bit address that starts at the boundary of 4GB pages, like |
330 | /// 0x0000000400000000 for module_id == 4. |
331 | /// These 64-bit addresses will be used to request code ranges for a specific |
332 | /// module from the WebAssembly engine. |
333 | |
334 | assert(m_memory_addr == LLDB_INVALID_ADDRESS || |
335 | m_memory_addr == load_address); |
336 | |
337 | ModuleSP module_sp = GetModule(); |
338 | if (!module_sp) |
339 | return false; |
340 | |
341 | DecodeSections(); |
342 | |
343 | size_t num_loaded_sections = 0; |
344 | SectionList *section_list = GetSectionList(); |
345 | if (!section_list) |
346 | return false; |
347 | |
348 | const size_t num_sections = section_list->GetSize(); |
349 | for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { |
350 | SectionSP section_sp(section_list->GetSectionAtIndex(idx: sect_idx)); |
351 | if (target.SetSectionLoadAddress( |
352 | section: section_sp, load_addr: load_address | section_sp->GetFileOffset())) { |
353 | ++num_loaded_sections; |
354 | } |
355 | } |
356 | |
357 | return num_loaded_sections > 0; |
358 | } |
359 | |
360 | DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) { |
361 | DataExtractor data; |
362 | if (m_file) { |
363 | if (offset < GetByteSize()) { |
364 | size = std::min(a: static_cast<uint64_t>(size), b: GetByteSize() - offset); |
365 | auto buffer_sp = MapFileData(file: m_file, Size: size, Offset: offset); |
366 | return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); |
367 | } |
368 | } else { |
369 | ProcessSP process_sp(m_process_wp.lock()); |
370 | if (process_sp) { |
371 | auto data_up = std::make_unique<DataBufferHeap>(args&: size, args: 0); |
372 | Status readmem_error; |
373 | size_t bytes_read = process_sp->ReadMemory( |
374 | vm_addr: offset, buf: data_up->GetBytes(), size: data_up->GetByteSize(), error&: readmem_error); |
375 | if (bytes_read > 0) { |
376 | DataBufferSP buffer_sp(data_up.release()); |
377 | data.SetData(data_sp: buffer_sp, offset: 0, length: buffer_sp->GetByteSize()); |
378 | } |
379 | } |
380 | } |
381 | |
382 | data.SetByteOrder(GetByteOrder()); |
383 | return data; |
384 | } |
385 | |
386 | std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { |
387 | static ConstString g_sect_name_external_debug_info("external_debug_info" ); |
388 | |
389 | for (const section_info §_info : m_sect_infos) { |
390 | if (g_sect_name_external_debug_info == sect_info.name) { |
391 | const uint32_t kBufferSize = 1024; |
392 | DataExtractor = |
393 | ReadImageData(offset: sect_info.offset, size: kBufferSize); |
394 | llvm::DataExtractor data = section_header_data.GetAsLLVM(); |
395 | llvm::DataExtractor::Cursor c(0); |
396 | std::optional<ConstString> symbols_url = GetWasmString(data, c); |
397 | if (symbols_url) |
398 | return FileSpec(symbols_url->GetStringRef()); |
399 | } |
400 | } |
401 | return std::nullopt; |
402 | } |
403 | |
404 | void ObjectFileWasm::Dump(Stream *s) { |
405 | ModuleSP module_sp(GetModule()); |
406 | if (!module_sp) |
407 | return; |
408 | |
409 | std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); |
410 | |
411 | llvm::raw_ostream &ostream = s->AsRawOstream(); |
412 | ostream << static_cast<void *>(this) << ": " ; |
413 | s->Indent(); |
414 | ostream << "ObjectFileWasm, file = '" ; |
415 | m_file.Dump(s&: ostream); |
416 | ostream << "', arch = " ; |
417 | ostream << GetArchitecture().GetArchitectureName() << "\n" ; |
418 | |
419 | SectionList *sections = GetSectionList(); |
420 | if (sections) { |
421 | sections->Dump(s&: s->AsRawOstream(), indent: s->GetIndentLevel(), target: nullptr, show_header: true, |
422 | UINT32_MAX); |
423 | } |
424 | ostream << "\n" ; |
425 | DumpSectionHeaders(ostream); |
426 | ostream << "\n" ; |
427 | } |
428 | |
429 | void ObjectFileWasm::(llvm::raw_ostream &ostream, |
430 | const section_info_t &sh) { |
431 | ostream << llvm::left_justify(Str: sh.name.GetStringRef(), Width: 16) << " " |
432 | << llvm::format_hex(N: sh.offset, Width: 10) << " " |
433 | << llvm::format_hex(N: sh.size, Width: 10) << " " << llvm::format_hex(N: sh.id, Width: 6) |
434 | << "\n" ; |
435 | } |
436 | |
437 | void ObjectFileWasm::(llvm::raw_ostream &ostream) { |
438 | ostream << "Section Headers\n" ; |
439 | ostream << "IDX name addr size id\n" ; |
440 | ostream << "==== ---------------- ---------- ---------- ------\n" ; |
441 | |
442 | uint32_t idx = 0; |
443 | for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); |
444 | ++pos, ++idx) { |
445 | ostream << "[" << llvm::format_decimal(N: idx, Width: 2) << "] " ; |
446 | ObjectFileWasm::DumpSectionHeader(ostream, sh: *pos); |
447 | } |
448 | } |
449 | |