1 | //===- InputFiles.cpp -----------------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InputFiles.h" |
10 | #include "Config.h" |
11 | #include "InputChunks.h" |
12 | #include "InputElement.h" |
13 | #include "OutputSegment.h" |
14 | #include "SymbolTable.h" |
15 | #include "lld/Common/CommonLinkerContext.h" |
16 | #include "lld/Common/Reproduce.h" |
17 | #include "llvm/BinaryFormat/Wasm.h" |
18 | #include "llvm/Object/Binary.h" |
19 | #include "llvm/Object/Wasm.h" |
20 | #include "llvm/ProfileData/InstrProf.h" |
21 | #include "llvm/Support/Path.h" |
22 | #include "llvm/Support/TarWriter.h" |
23 | #include "llvm/Support/raw_ostream.h" |
24 | #include <optional> |
25 | |
26 | #define DEBUG_TYPE "lld" |
27 | |
28 | using namespace llvm; |
29 | using namespace llvm::object; |
30 | using namespace llvm::wasm; |
31 | using namespace llvm::sys; |
32 | |
33 | namespace lld { |
34 | |
35 | // Returns a string in the format of "foo.o" or "foo.a(bar.o)". |
36 | std::string toString(const wasm::InputFile *file) { |
37 | if (!file) |
38 | return "<internal>"; |
39 | |
40 | if (file->archiveName.empty()) |
41 | return std::string(file->getName()); |
42 | |
43 | return (file->archiveName + "("+ file->getName() + ")").str(); |
44 | } |
45 | |
46 | namespace wasm { |
47 | |
48 | std::string replaceThinLTOSuffix(StringRef path) { |
49 | auto [suffix, repl] = ctx.arg.thinLTOObjectSuffixReplace; |
50 | if (path.consume_back(Suffix: suffix)) |
51 | return (path + repl).str(); |
52 | return std::string(path); |
53 | } |
54 | |
55 | void InputFile::checkArch(Triple::ArchType arch) const { |
56 | bool is64 = arch == Triple::wasm64; |
57 | if (is64 && !ctx.arg.is64) { |
58 | fatal(msg: toString(file: this) + |
59 | ": must specify -mwasm64 to process wasm64 object files"); |
60 | } else if (ctx.arg.is64.value_or(u: false) != is64) { |
61 | fatal(msg: toString(file: this) + |
62 | ": wasm32 object file can't be linked in wasm64 mode"); |
63 | } |
64 | } |
65 | |
66 | std::unique_ptr<llvm::TarWriter> tar; |
67 | |
68 | std::optional<MemoryBufferRef> readFile(StringRef path) { |
69 | log(msg: "Loading: "+ path); |
70 | |
71 | auto mbOrErr = MemoryBuffer::getFile(Filename: path); |
72 | if (auto ec = mbOrErr.getError()) { |
73 | error(msg: "cannot open "+ path + ": "+ ec.message()); |
74 | return std::nullopt; |
75 | } |
76 | std::unique_ptr<MemoryBuffer> &mb = *mbOrErr; |
77 | MemoryBufferRef mbref = mb->getMemBufferRef(); |
78 | make<std::unique_ptr<MemoryBuffer>>(args: std::move(mb)); // take MB ownership |
79 | |
80 | if (tar) |
81 | tar->append(Path: relativeToRoot(path), Data: mbref.getBuffer()); |
82 | return mbref; |
83 | } |
84 | |
85 | InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName, |
86 | uint64_t offsetInArchive, bool lazy) { |
87 | file_magic magic = identify_magic(magic: mb.getBuffer()); |
88 | if (magic == file_magic::wasm_object) { |
89 | std::unique_ptr<Binary> bin = |
90 | CHECK(createBinary(mb), mb.getBufferIdentifier()); |
91 | auto *obj = cast<WasmObjectFile>(Val: bin.get()); |
92 | if (obj->hasUnmodeledTypes()) |
93 | fatal(msg: toString(s: mb.getBufferIdentifier()) + |
94 | "file has unmodeled reference or GC types"); |
95 | if (obj->isSharedObject()) |
96 | return make<SharedFile>(args&: mb); |
97 | return make<ObjFile>(args&: mb, args&: archiveName, args&: lazy); |
98 | } |
99 | |
100 | assert(magic == file_magic::bitcode); |
101 | return make<BitcodeFile>(args&: mb, args&: archiveName, args&: offsetInArchive, args&: lazy); |
102 | } |
103 | |
104 | // Relocations contain either symbol or type indices. This function takes a |
105 | // relocation and returns relocated index (i.e. translates from the input |
106 | // symbol/type space to the output symbol/type space). |
107 | uint32_t ObjFile::calcNewIndex(const WasmRelocation &reloc) const { |
108 | if (reloc.Type == R_WASM_TYPE_INDEX_LEB) { |
109 | assert(typeIsUsed[reloc.Index]); |
110 | return typeMap[reloc.Index]; |
111 | } |
112 | const Symbol *sym = symbols[reloc.Index]; |
113 | if (auto *ss = dyn_cast<SectionSymbol>(Val: sym)) |
114 | sym = ss->getOutputSectionSymbol(); |
115 | return sym->getOutputSymbolIndex(); |
116 | } |
117 | |
118 | // Relocations can contain addend for combined sections. This function takes a |
119 | // relocation and returns updated addend by offset in the output section. |
120 | int64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const { |
121 | switch (reloc.Type) { |
122 | case R_WASM_MEMORY_ADDR_LEB: |
123 | case R_WASM_MEMORY_ADDR_LEB64: |
124 | case R_WASM_MEMORY_ADDR_SLEB64: |
125 | case R_WASM_MEMORY_ADDR_SLEB: |
126 | case R_WASM_MEMORY_ADDR_REL_SLEB: |
127 | case R_WASM_MEMORY_ADDR_REL_SLEB64: |
128 | case R_WASM_MEMORY_ADDR_I32: |
129 | case R_WASM_MEMORY_ADDR_I64: |
130 | case R_WASM_MEMORY_ADDR_TLS_SLEB: |
131 | case R_WASM_MEMORY_ADDR_TLS_SLEB64: |
132 | case R_WASM_FUNCTION_OFFSET_I32: |
133 | case R_WASM_FUNCTION_OFFSET_I64: |
134 | case R_WASM_MEMORY_ADDR_LOCREL_I32: |
135 | return reloc.Addend; |
136 | case R_WASM_SECTION_OFFSET_I32: |
137 | return getSectionSymbol(index: reloc.Index)->section->getOffset(offset: reloc.Addend); |
138 | default: |
139 | llvm_unreachable("unexpected relocation type"); |
140 | } |
141 | } |
142 | |
143 | // Translate from the relocation's index into the final linked output value. |
144 | uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc, uint64_t tombstone, |
145 | const InputChunk *chunk) const { |
146 | const Symbol* sym = nullptr; |
147 | if (reloc.Type != R_WASM_TYPE_INDEX_LEB) { |
148 | sym = symbols[reloc.Index]; |
149 | |
150 | // We can end up with relocations against non-live symbols. For example |
151 | // in debug sections. We return a tombstone value in debug symbol sections |
152 | // so this will not produce a valid range conflicting with ranges of actual |
153 | // code. In other sections we return reloc.Addend. |
154 | |
155 | if (!isa<SectionSymbol>(Val: sym) && !sym->isLive()) |
156 | return tombstone ? tombstone : reloc.Addend; |
157 | } |
158 | |
159 | switch (reloc.Type) { |
160 | case R_WASM_TABLE_INDEX_I32: |
161 | case R_WASM_TABLE_INDEX_I64: |
162 | case R_WASM_TABLE_INDEX_SLEB: |
163 | case R_WASM_TABLE_INDEX_SLEB64: |
164 | case R_WASM_TABLE_INDEX_REL_SLEB: |
165 | case R_WASM_TABLE_INDEX_REL_SLEB64: { |
166 | if (!getFunctionSymbol(index: reloc.Index)->hasTableIndex()) |
167 | return 0; |
168 | uint32_t index = getFunctionSymbol(index: reloc.Index)->getTableIndex(); |
169 | if (reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB || |
170 | reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB64) |
171 | index -= ctx.arg.tableBase; |
172 | return index; |
173 | } |
174 | case R_WASM_MEMORY_ADDR_LEB: |
175 | case R_WASM_MEMORY_ADDR_LEB64: |
176 | case R_WASM_MEMORY_ADDR_SLEB: |
177 | case R_WASM_MEMORY_ADDR_SLEB64: |
178 | case R_WASM_MEMORY_ADDR_REL_SLEB: |
179 | case R_WASM_MEMORY_ADDR_REL_SLEB64: |
180 | case R_WASM_MEMORY_ADDR_I32: |
181 | case R_WASM_MEMORY_ADDR_I64: |
182 | case R_WASM_MEMORY_ADDR_TLS_SLEB: |
183 | case R_WASM_MEMORY_ADDR_TLS_SLEB64: |
184 | case R_WASM_MEMORY_ADDR_LOCREL_I32: { |
185 | if (isa<UndefinedData>(Val: sym) || sym->isShared() || sym->isUndefWeak()) |
186 | return 0; |
187 | auto D = cast<DefinedData>(Val: sym); |
188 | uint64_t value = D->getVA() + reloc.Addend; |
189 | if (reloc.Type == R_WASM_MEMORY_ADDR_LOCREL_I32) { |
190 | const auto *segment = cast<InputSegment>(Val: chunk); |
191 | uint64_t p = segment->outputSeg->startVA + segment->outputSegmentOffset + |
192 | reloc.Offset - segment->getInputSectionOffset(); |
193 | value -= p; |
194 | } |
195 | return value; |
196 | } |
197 | case R_WASM_TYPE_INDEX_LEB: |
198 | return typeMap[reloc.Index]; |
199 | case R_WASM_FUNCTION_INDEX_LEB: |
200 | case R_WASM_FUNCTION_INDEX_I32: |
201 | return getFunctionSymbol(index: reloc.Index)->getFunctionIndex(); |
202 | case R_WASM_GLOBAL_INDEX_LEB: |
203 | case R_WASM_GLOBAL_INDEX_I32: |
204 | if (auto gs = dyn_cast<GlobalSymbol>(Val: sym)) |
205 | return gs->getGlobalIndex(); |
206 | return sym->getGOTIndex(); |
207 | case R_WASM_TAG_INDEX_LEB: |
208 | return getTagSymbol(index: reloc.Index)->getTagIndex(); |
209 | case R_WASM_FUNCTION_OFFSET_I32: |
210 | case R_WASM_FUNCTION_OFFSET_I64: { |
211 | if (isa<UndefinedFunction>(Val: sym)) { |
212 | return tombstone ? tombstone : reloc.Addend; |
213 | } |
214 | auto *f = cast<DefinedFunction>(Val: sym); |
215 | return f->function->getOffset(offset: f->function->getFunctionCodeOffset() + |
216 | reloc.Addend); |
217 | } |
218 | case R_WASM_SECTION_OFFSET_I32: |
219 | return getSectionSymbol(index: reloc.Index)->section->getOffset(offset: reloc.Addend); |
220 | case R_WASM_TABLE_NUMBER_LEB: |
221 | return getTableSymbol(index: reloc.Index)->getTableNumber(); |
222 | default: |
223 | llvm_unreachable("unknown relocation type"); |
224 | } |
225 | } |
226 | |
227 | template <class T> |
228 | static void setRelocs(const std::vector<T *> &chunks, |
229 | const WasmSection *section) { |
230 | if (!section) |
231 | return; |
232 | |
233 | ArrayRef<WasmRelocation> relocs = section->Relocations; |
234 | assert(llvm::is_sorted( |
235 | relocs, [](const WasmRelocation &r1, const WasmRelocation &r2) { |
236 | return r1.Offset < r2.Offset; |
237 | })); |
238 | assert(llvm::is_sorted(chunks, [](InputChunk *c1, InputChunk *c2) { |
239 | return c1->getInputSectionOffset() < c2->getInputSectionOffset(); |
240 | })); |
241 | |
242 | auto relocsNext = relocs.begin(); |
243 | auto relocsEnd = relocs.end(); |
244 | auto relocLess = [](const WasmRelocation &r, uint32_t val) { |
245 | return r.Offset < val; |
246 | }; |
247 | for (InputChunk *c : chunks) { |
248 | auto relocsStart = std::lower_bound(relocsNext, relocsEnd, |
249 | c->getInputSectionOffset(), relocLess); |
250 | relocsNext = std::lower_bound( |
251 | relocsStart, relocsEnd, c->getInputSectionOffset() + c->getInputSize(), |
252 | relocLess); |
253 | c->setRelocations(ArrayRef<WasmRelocation>(relocsStart, relocsNext)); |
254 | } |
255 | } |
256 | |
257 | // An object file can have two approaches to tables. With the |
258 | // reference-types feature or call-indirect-overlong feature enabled |
259 | // (explicitly, or implied by the reference-types feature), input files that |
260 | // define or use tables declare the tables using symbols, and record each use |
261 | // with a relocation. This way when the linker combines inputs, it can collate |
262 | // the tables used by the inputs, assigning them distinct table numbers, and |
263 | // renumber all the uses as appropriate. At the same time, the linker has |
264 | // special logic to build the indirect function table if it is needed. |
265 | // |
266 | // However, MVP object files (those that target WebAssembly 1.0, the "minimum |
267 | // viable product" version of WebAssembly) neither write table symbols nor |
268 | // record relocations. These files can have at most one table, the indirect |
269 | // function table used by call_indirect and which is the address space for |
270 | // function pointers. If this table is present, it is always an import. If we |
271 | // have a file with a table import but no table symbols, it is an MVP object |
272 | // file. synthesizeMVPIndirectFunctionTableSymbolIfNeeded serves as a shim when |
273 | // loading these input files, defining the missing symbol to allow the indirect |
274 | // function table to be built. |
275 | // |
276 | // As indirect function table table usage in MVP objects cannot be relocated, |
277 | // the linker must ensure that this table gets assigned index zero. |
278 | void ObjFile::addLegacyIndirectFunctionTableIfNeeded( |
279 | uint32_t tableSymbolCount) { |
280 | uint32_t tableCount = wasmObj->getNumImportedTables() + tables.size(); |
281 | |
282 | // If there are symbols for all tables, then all is good. |
283 | if (tableCount == tableSymbolCount) |
284 | return; |
285 | |
286 | // It's possible for an input to define tables and also use the indirect |
287 | // function table, but forget to compile with -mattr=+call-indirect-overlong |
288 | // or -mattr=+reference-types. For these newer files, we require symbols for |
289 | // all tables, and relocations for all of their uses. |
290 | if (tableSymbolCount != 0) { |
291 | error(msg: toString(file: this) + |
292 | ": expected one symbol table entry for each of the "+ |
293 | Twine(tableCount) + " table(s) present, but got "+ |
294 | Twine(tableSymbolCount) + " symbol(s) instead."); |
295 | return; |
296 | } |
297 | |
298 | // An MVP object file can have up to one table import, for the indirect |
299 | // function table, but will have no table definitions. |
300 | if (tables.size()) { |
301 | error(msg: toString(file: this) + |
302 | ": unexpected table definition(s) without corresponding " |
303 | "symbol-table entries."); |
304 | return; |
305 | } |
306 | |
307 | // An MVP object file can have only one table import. |
308 | if (tableCount != 1) { |
309 | error(msg: toString(file: this) + |
310 | ": multiple table imports, but no corresponding symbol-table " |
311 | "entries."); |
312 | return; |
313 | } |
314 | |
315 | const WasmImport *tableImport = nullptr; |
316 | for (const auto &import : wasmObj->imports()) { |
317 | if (import.Kind == WASM_EXTERNAL_TABLE) { |
318 | assert(!tableImport); |
319 | tableImport = &import; |
320 | } |
321 | } |
322 | assert(tableImport); |
323 | |
324 | // We can only synthesize a symtab entry for the indirect function table; if |
325 | // it has an unexpected name or type, assume that it's not actually the |
326 | // indirect function table. |
327 | if (tableImport->Field != functionTableName || |
328 | tableImport->Table.ElemType != ValType::FUNCREF) { |
329 | error(msg: toString(file: this) + ": table import "+ Twine(tableImport->Field) + |
330 | " is missing a symbol table entry."); |
331 | return; |
332 | } |
333 | |
334 | WasmSymbolInfo info; |
335 | info.Name = tableImport->Field; |
336 | info.Kind = WASM_SYMBOL_TYPE_TABLE; |
337 | info.ImportModule = tableImport->Module; |
338 | info.ImportName = tableImport->Field; |
339 | info.Flags = WASM_SYMBOL_UNDEFINED | WASM_SYMBOL_NO_STRIP; |
340 | info.ElementIndex = 0; |
341 | LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: "<< info.Name |
342 | << "\n"); |
343 | const WasmGlobalType *globalType = nullptr; |
344 | const WasmSignature *signature = nullptr; |
345 | auto *wasmSym = |
346 | make<WasmSymbol>(args&: info, args&: globalType, args: &tableImport->Table, args&: signature); |
347 | Symbol *sym = createUndefined(sym: *wasmSym, isCalledDirectly: false); |
348 | // We're only sure it's a TableSymbol if the createUndefined succeeded. |
349 | if (errorCount()) |
350 | return; |
351 | symbols.push_back(x: sym); |
352 | // Because there are no TABLE_NUMBER relocs, we can't compute accurate |
353 | // liveness info; instead, just mark the symbol as always live. |
354 | sym->markLive(); |
355 | |
356 | // We assume that this compilation unit has unrelocatable references to |
357 | // this table. |
358 | ctx.legacyFunctionTable = true; |
359 | } |
360 | |
361 | static bool shouldMerge(const WasmSection &sec) { |
362 | if (ctx.arg.optimize == 0) |
363 | return false; |
364 | // Sadly we don't have section attributes yet for custom sections, so we |
365 | // currently go by the name alone. |
366 | // TODO(sbc): Add ability for wasm sections to carry flags so we don't |
367 | // need to use names here. |
368 | // For now, keep in sync with uses of wasm::WASM_SEG_FLAG_STRINGS in |
369 | // MCObjectFileInfo::initWasmMCObjectFileInfo which creates these custom |
370 | // sections. |
371 | return sec.Name == ".debug_str"|| sec.Name == ".debug_str.dwo"|| |
372 | sec.Name == ".debug_line_str"; |
373 | } |
374 | |
375 | static bool shouldMerge(const WasmSegment &seg) { |
376 | // As of now we only support merging strings, and only with single byte |
377 | // alignment (2^0). |
378 | if (!(seg.Data.LinkingFlags & WASM_SEG_FLAG_STRINGS) || |
379 | (seg.Data.Alignment != 0)) |
380 | return false; |
381 | |
382 | // On a regular link we don't merge sections if -O0 (default is -O1). This |
383 | // sometimes makes the linker significantly faster, although the output will |
384 | // be bigger. |
385 | if (ctx.arg.optimize == 0) |
386 | return false; |
387 | |
388 | // A mergeable section with size 0 is useless because they don't have |
389 | // any data to merge. A mergeable string section with size 0 can be |
390 | // argued as invalid because it doesn't end with a null character. |
391 | // We'll avoid a mess by handling them as if they were non-mergeable. |
392 | if (seg.Data.Content.size() == 0) |
393 | return false; |
394 | |
395 | return true; |
396 | } |
397 | |
398 | void ObjFile::parseLazy() { |
399 | LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: "<< toString(this) << " " |
400 | << wasmObj.get() << "\n"); |
401 | for (const SymbolRef &sym : wasmObj->symbols()) { |
402 | const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(Symb: sym.getRawDataRefImpl()); |
403 | if (wasmSym.isUndefined() || wasmSym.isBindingLocal()) |
404 | continue; |
405 | symtab->addLazy(name: wasmSym.Info.Name, f: this); |
406 | // addLazy() may trigger this->extract() if an existing symbol is an |
407 | // undefined symbol. If that happens, this function has served its purpose, |
408 | // and we can exit from the loop early. |
409 | if (!lazy) |
410 | break; |
411 | } |
412 | } |
413 | |
414 | ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy) |
415 | : WasmFileBase(ObjectKind, m) { |
416 | this->lazy = lazy; |
417 | this->archiveName = std::string(archiveName); |
418 | |
419 | // Currently we only do this check for regular object file, and not for shared |
420 | // object files. This is because architecture detection for shared objects is |
421 | // currently based on a heuristic, which is fallable: |
422 | // https://github.com/llvm/llvm-project/issues/98778 |
423 | checkArch(arch: wasmObj->getArch()); |
424 | |
425 | // Unless we are processing this as a lazy object file (e.g. part of an |
426 | // archive file or within `--start-lib`/`--end-lib`, it's eagerly linked, so |
427 | // mark it live. |
428 | if (!lazy) |
429 | markLive(); |
430 | } |
431 | |
432 | void SharedFile::parse() { |
433 | assert(wasmObj->isSharedObject()); |
434 | |
435 | for (const SymbolRef &sym : wasmObj->symbols()) { |
436 | const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(Symb: sym.getRawDataRefImpl()); |
437 | if (wasmSym.isDefined()) { |
438 | StringRef name = wasmSym.Info.Name; |
439 | // Certain shared library exports are known to be DSO-local so we |
440 | // don't want to add them to the symbol table. |
441 | // TODO(sbc): Instead of hardcoding these here perhaps we could add |
442 | // this as extra metadata in the `dylink` section. |
443 | if (name == "__wasm_apply_data_relocs"|| name == "__wasm_call_ctors"|| |
444 | name.starts_with(Prefix: "__start_") || name.starts_with(Prefix: "__stop_")) |
445 | continue; |
446 | uint32_t flags = wasmSym.Info.Flags; |
447 | Symbol *s; |
448 | LLVM_DEBUG(dbgs() << "shared symbol: "<< name << "\n"); |
449 | switch (wasmSym.Info.Kind) { |
450 | case WASM_SYMBOL_TYPE_FUNCTION: |
451 | s = symtab->addSharedFunction(name, flags, file: this, sig: wasmSym.Signature); |
452 | break; |
453 | case WASM_SYMBOL_TYPE_DATA: |
454 | s = symtab->addSharedData(name, flags, file: this); |
455 | break; |
456 | default: |
457 | continue; |
458 | } |
459 | symbols.push_back(x: s); |
460 | } |
461 | } |
462 | } |
463 | |
464 | // Returns the alignment for a custom section. This is used to concatenate |
465 | // custom sections with the same name into a single custom section. |
466 | static uint32_t getCustomSectionAlignment(const WasmSection &sec) { |
467 | // TODO: Add a section attribute for alignment in the linking spec. |
468 | if (sec.Name == getInstrProfSectionName(IPSK: IPSK_covfun, OF: Triple::Wasm) || |
469 | sec.Name == getInstrProfSectionName(IPSK: IPSK_covmap, OF: Triple::Wasm)) { |
470 | // llvm-cov assumes that coverage metadata sections are 8-byte aligned. |
471 | return 8; |
472 | } |
473 | return 1; |
474 | } |
475 | |
476 | WasmFileBase::WasmFileBase(Kind k, MemoryBufferRef m) : InputFile(k, m) { |
477 | // Parse a memory buffer as a wasm file. |
478 | LLVM_DEBUG(dbgs() << "Reading object: "<< toString(this) << "\n"); |
479 | std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this)); |
480 | |
481 | auto *obj = dyn_cast<WasmObjectFile>(Val: bin.get()); |
482 | if (!obj) |
483 | fatal(msg: toString(file: this) + ": not a wasm file"); |
484 | |
485 | bin.release(); |
486 | wasmObj.reset(p: obj); |
487 | } |
488 | |
489 | void ObjFile::parse(bool ignoreComdats) { |
490 | // Parse a memory buffer as a wasm file. |
491 | LLVM_DEBUG(dbgs() << "ObjFile::parse: "<< toString(this) << "\n"); |
492 | |
493 | if (!wasmObj->isRelocatableObject()) |
494 | fatal(msg: toString(file: this) + ": not a relocatable wasm file"); |
495 | |
496 | // Build up a map of function indices to table indices for use when |
497 | // verifying the existing table index relocations |
498 | uint32_t totalFunctions = |
499 | wasmObj->getNumImportedFunctions() + wasmObj->functions().size(); |
500 | tableEntriesRel.resize(new_size: totalFunctions); |
501 | tableEntries.resize(new_size: totalFunctions); |
502 | for (const WasmElemSegment &seg : wasmObj->elements()) { |
503 | int64_t offset; |
504 | if (seg.Offset.Extended) |
505 | fatal(msg: toString(file: this) + ": extended init exprs not supported"); |
506 | else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I32_CONST) |
507 | offset = seg.Offset.Inst.Value.Int32; |
508 | else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I64_CONST) |
509 | offset = seg.Offset.Inst.Value.Int64; |
510 | else |
511 | fatal(msg: toString(file: this) + ": invalid table elements"); |
512 | for (size_t index = 0; index < seg.Functions.size(); index++) { |
513 | auto functionIndex = seg.Functions[index]; |
514 | tableEntriesRel[functionIndex] = index; |
515 | tableEntries[functionIndex] = offset + index; |
516 | } |
517 | } |
518 | |
519 | ArrayRef<StringRef> comdats = wasmObj->linkingData().Comdats; |
520 | for (StringRef comdat : comdats) { |
521 | bool isNew = ignoreComdats || symtab->addComdat(name: comdat); |
522 | keptComdats.push_back(x: isNew); |
523 | } |
524 | |
525 | uint32_t sectionIndex = 0; |
526 | |
527 | // Bool for each symbol, true if called directly. This allows us to implement |
528 | // a weaker form of signature checking where undefined functions that are not |
529 | // called directly (i.e. only address taken) don't have to match the defined |
530 | // function's signature. We cannot do this for directly called functions |
531 | // because those signatures are checked at validation times. |
532 | // See https://github.com/llvm/llvm-project/issues/39758 |
533 | std::vector<bool> isCalledDirectly(wasmObj->getNumberOfSymbols(), false); |
534 | for (const SectionRef &sec : wasmObj->sections()) { |
535 | const WasmSection §ion = wasmObj->getWasmSection(Section: sec); |
536 | // Wasm objects can have at most one code and one data section. |
537 | if (section.Type == WASM_SEC_CODE) { |
538 | assert(!codeSection); |
539 | codeSection = §ion; |
540 | } else if (section.Type == WASM_SEC_DATA) { |
541 | assert(!dataSection); |
542 | dataSection = §ion; |
543 | } else if (section.Type == WASM_SEC_CUSTOM) { |
544 | InputChunk *customSec; |
545 | uint32_t alignment = getCustomSectionAlignment(sec: section); |
546 | if (shouldMerge(sec: section)) |
547 | customSec = make<MergeInputChunk>(args: section, args: this, args&: alignment); |
548 | else |
549 | customSec = make<InputSection>(args: section, args: this, args&: alignment); |
550 | customSec->discarded = isExcludedByComdat(chunk: customSec); |
551 | customSections.emplace_back(args&: customSec); |
552 | customSections.back()->setRelocations(section.Relocations); |
553 | customSectionsByIndex[sectionIndex] = customSections.back(); |
554 | } |
555 | sectionIndex++; |
556 | // Scans relocations to determine if a function symbol is called directly. |
557 | for (const WasmRelocation &reloc : section.Relocations) |
558 | if (reloc.Type == R_WASM_FUNCTION_INDEX_LEB) |
559 | isCalledDirectly[reloc.Index] = true; |
560 | } |
561 | |
562 | typeMap.resize(new_size: getWasmObj()->types().size()); |
563 | typeIsUsed.resize(new_size: getWasmObj()->types().size(), x: false); |
564 | |
565 | |
566 | // Populate `Segments`. |
567 | for (const WasmSegment &s : wasmObj->dataSegments()) { |
568 | InputChunk *seg; |
569 | if (shouldMerge(seg: s)) |
570 | seg = make<MergeInputChunk>(args: s, args: this); |
571 | else |
572 | seg = make<InputSegment>(args: s, args: this); |
573 | seg->discarded = isExcludedByComdat(chunk: seg); |
574 | // Older object files did not include WASM_SEG_FLAG_TLS and instead |
575 | // relied on the naming convention. To maintain compat with such objects |
576 | // we still imply the TLS flag based on the name of the segment. |
577 | if (!seg->isTLS() && |
578 | (seg->name.starts_with(Prefix: ".tdata") || seg->name.starts_with(Prefix: ".tbss"))) |
579 | seg->flags |= WASM_SEG_FLAG_TLS; |
580 | segments.emplace_back(args&: seg); |
581 | } |
582 | setRelocs(chunks: segments, section: dataSection); |
583 | |
584 | // Populate `Functions`. |
585 | ArrayRef<WasmFunction> funcs = wasmObj->functions(); |
586 | ArrayRef<WasmSignature> types = wasmObj->types(); |
587 | functions.reserve(n: funcs.size()); |
588 | |
589 | for (auto &f : funcs) { |
590 | auto *func = make<InputFunction>(args: types[f.SigIndex], args: &f, args: this); |
591 | func->discarded = isExcludedByComdat(chunk: func); |
592 | functions.emplace_back(args&: func); |
593 | } |
594 | setRelocs(chunks: functions, section: codeSection); |
595 | |
596 | // Populate `Tables`. |
597 | for (const WasmTable &t : wasmObj->tables()) |
598 | tables.emplace_back(args: make<InputTable>(args: t, args: this)); |
599 | |
600 | // Populate `Globals`. |
601 | for (const WasmGlobal &g : wasmObj->globals()) |
602 | globals.emplace_back(args: make<InputGlobal>(args: g, args: this)); |
603 | |
604 | // Populate `Tags`. |
605 | for (const WasmTag &t : wasmObj->tags()) |
606 | tags.emplace_back(args: make<InputTag>(args: types[t.SigIndex], args: t, args: this)); |
607 | |
608 | // Populate `Symbols` based on the symbols in the object. |
609 | symbols.reserve(n: wasmObj->getNumberOfSymbols()); |
610 | uint32_t tableSymbolCount = 0; |
611 | for (const SymbolRef &sym : wasmObj->symbols()) { |
612 | const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(Symb: sym.getRawDataRefImpl()); |
613 | if (wasmSym.isTypeTable()) |
614 | tableSymbolCount++; |
615 | if (wasmSym.isDefined()) { |
616 | // createDefined may fail if the symbol is comdat excluded in which case |
617 | // we fall back to creating an undefined symbol |
618 | if (Symbol *d = createDefined(sym: wasmSym)) { |
619 | symbols.push_back(x: d); |
620 | continue; |
621 | } |
622 | } |
623 | size_t idx = symbols.size(); |
624 | symbols.push_back(x: createUndefined(sym: wasmSym, isCalledDirectly: isCalledDirectly[idx])); |
625 | } |
626 | |
627 | addLegacyIndirectFunctionTableIfNeeded(tableSymbolCount); |
628 | } |
629 | |
630 | bool ObjFile::isExcludedByComdat(const InputChunk *chunk) const { |
631 | uint32_t c = chunk->getComdat(); |
632 | if (c == UINT32_MAX) |
633 | return false; |
634 | return !keptComdats[c]; |
635 | } |
636 | |
637 | FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t index) const { |
638 | return cast<FunctionSymbol>(Val: symbols[index]); |
639 | } |
640 | |
641 | GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t index) const { |
642 | return cast<GlobalSymbol>(Val: symbols[index]); |
643 | } |
644 | |
645 | TagSymbol *ObjFile::getTagSymbol(uint32_t index) const { |
646 | return cast<TagSymbol>(Val: symbols[index]); |
647 | } |
648 | |
649 | TableSymbol *ObjFile::getTableSymbol(uint32_t index) const { |
650 | return cast<TableSymbol>(Val: symbols[index]); |
651 | } |
652 | |
653 | SectionSymbol *ObjFile::getSectionSymbol(uint32_t index) const { |
654 | return cast<SectionSymbol>(Val: symbols[index]); |
655 | } |
656 | |
657 | DataSymbol *ObjFile::getDataSymbol(uint32_t index) const { |
658 | return cast<DataSymbol>(Val: symbols[index]); |
659 | } |
660 | |
661 | Symbol *ObjFile::createDefined(const WasmSymbol &sym) { |
662 | StringRef name = sym.Info.Name; |
663 | uint32_t flags = sym.Info.Flags; |
664 | |
665 | switch (sym.Info.Kind) { |
666 | case WASM_SYMBOL_TYPE_FUNCTION: { |
667 | InputFunction *func = |
668 | functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()]; |
669 | if (sym.isBindingLocal()) |
670 | return make<DefinedFunction>(args&: name, args&: flags, args: this, args&: func); |
671 | if (func->discarded) |
672 | return nullptr; |
673 | return symtab->addDefinedFunction(name, flags, file: this, function: func); |
674 | } |
675 | case WASM_SYMBOL_TYPE_DATA: { |
676 | InputChunk *seg = segments[sym.Info.DataRef.Segment]; |
677 | auto offset = sym.Info.DataRef.Offset; |
678 | auto size = sym.Info.DataRef.Size; |
679 | // Support older (e.g. llvm 13) object files that pre-date the per-symbol |
680 | // TLS flag, and symbols were assumed to be TLS by being defined in a TLS |
681 | // segment. |
682 | if (!(flags & WASM_SYMBOL_TLS) && seg->isTLS()) |
683 | flags |= WASM_SYMBOL_TLS; |
684 | if (sym.isBindingLocal()) |
685 | return make<DefinedData>(args&: name, args&: flags, args: this, args&: seg, args&: offset, args&: size); |
686 | if (seg->discarded) |
687 | return nullptr; |
688 | return symtab->addDefinedData(name, flags, file: this, segment: seg, address: offset, size); |
689 | } |
690 | case WASM_SYMBOL_TYPE_GLOBAL: { |
691 | InputGlobal *global = |
692 | globals[sym.Info.ElementIndex - wasmObj->getNumImportedGlobals()]; |
693 | if (sym.isBindingLocal()) |
694 | return make<DefinedGlobal>(args&: name, args&: flags, args: this, args&: global); |
695 | return symtab->addDefinedGlobal(name, flags, file: this, g: global); |
696 | } |
697 | case WASM_SYMBOL_TYPE_SECTION: { |
698 | InputChunk *section = customSectionsByIndex[sym.Info.ElementIndex]; |
699 | assert(sym.isBindingLocal()); |
700 | // Need to return null if discarded here? data and func only do that when |
701 | // binding is not local. |
702 | if (section->discarded) |
703 | return nullptr; |
704 | return make<SectionSymbol>(args&: flags, args&: section, args: this); |
705 | } |
706 | case WASM_SYMBOL_TYPE_TAG: { |
707 | InputTag *tag = tags[sym.Info.ElementIndex - wasmObj->getNumImportedTags()]; |
708 | if (sym.isBindingLocal()) |
709 | return make<DefinedTag>(args&: name, args&: flags, args: this, args&: tag); |
710 | return symtab->addDefinedTag(name, flags, file: this, t: tag); |
711 | } |
712 | case WASM_SYMBOL_TYPE_TABLE: { |
713 | InputTable *table = |
714 | tables[sym.Info.ElementIndex - wasmObj->getNumImportedTables()]; |
715 | if (sym.isBindingLocal()) |
716 | return make<DefinedTable>(args&: name, args&: flags, args: this, args&: table); |
717 | return symtab->addDefinedTable(name, flags, file: this, t: table); |
718 | } |
719 | } |
720 | llvm_unreachable("unknown symbol kind"); |
721 | } |
722 | |
723 | Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) { |
724 | StringRef name = sym.Info.Name; |
725 | uint32_t flags = sym.Info.Flags | WASM_SYMBOL_UNDEFINED; |
726 | |
727 | switch (sym.Info.Kind) { |
728 | case WASM_SYMBOL_TYPE_FUNCTION: |
729 | if (sym.isBindingLocal()) |
730 | return make<UndefinedFunction>(args&: name, args: sym.Info.ImportName, |
731 | args: sym.Info.ImportModule, args&: flags, args: this, |
732 | args: sym.Signature, args&: isCalledDirectly); |
733 | return symtab->addUndefinedFunction(name, importName: sym.Info.ImportName, |
734 | importModule: sym.Info.ImportModule, flags, file: this, |
735 | signature: sym.Signature, isCalledDirectly); |
736 | case WASM_SYMBOL_TYPE_DATA: |
737 | if (sym.isBindingLocal()) |
738 | return make<UndefinedData>(args&: name, args&: flags, args: this); |
739 | return symtab->addUndefinedData(name, flags, file: this); |
740 | case WASM_SYMBOL_TYPE_GLOBAL: |
741 | if (sym.isBindingLocal()) |
742 | return make<UndefinedGlobal>(args&: name, args: sym.Info.ImportName, |
743 | args: sym.Info.ImportModule, args&: flags, args: this, |
744 | args: sym.GlobalType); |
745 | return symtab->addUndefinedGlobal(name, importName: sym.Info.ImportName, |
746 | importModule: sym.Info.ImportModule, flags, file: this, |
747 | type: sym.GlobalType); |
748 | case WASM_SYMBOL_TYPE_TABLE: |
749 | if (sym.isBindingLocal()) |
750 | return make<UndefinedTable>(args&: name, args: sym.Info.ImportName, |
751 | args: sym.Info.ImportModule, args&: flags, args: this, |
752 | args: sym.TableType); |
753 | return symtab->addUndefinedTable(name, importName: sym.Info.ImportName, |
754 | importModule: sym.Info.ImportModule, flags, file: this, |
755 | type: sym.TableType); |
756 | case WASM_SYMBOL_TYPE_TAG: |
757 | if (sym.isBindingLocal()) |
758 | return make<UndefinedTag>(args&: name, args: sym.Info.ImportName, |
759 | args: sym.Info.ImportModule, args&: flags, args: this, |
760 | args: sym.Signature); |
761 | return symtab->addUndefinedTag(name, importName: sym.Info.ImportName, |
762 | importModule: sym.Info.ImportModule, flags, file: this, |
763 | sig: sym.Signature); |
764 | case WASM_SYMBOL_TYPE_SECTION: |
765 | llvm_unreachable("section symbols cannot be undefined"); |
766 | } |
767 | llvm_unreachable("unknown symbol kind"); |
768 | } |
769 | |
770 | static StringRef strip(StringRef s) { return s.trim(Char: ' '); } |
771 | |
772 | void StubFile::parse() { |
773 | bool first = true; |
774 | |
775 | SmallVector<StringRef> lines; |
776 | mb.getBuffer().split(A&: lines, Separator: '\n'); |
777 | for (StringRef line : lines) { |
778 | line = line.trim(); |
779 | |
780 | // File must begin with #STUB |
781 | if (first) { |
782 | assert(line == "#STUB"); |
783 | first = false; |
784 | } |
785 | |
786 | // Lines starting with # are considered comments |
787 | if (line.starts_with(Prefix: "#") || !line.size()) |
788 | continue; |
789 | |
790 | StringRef sym; |
791 | StringRef rest; |
792 | std::tie(args&: sym, args&: rest) = line.split(Separator: ':'); |
793 | sym = strip(s: sym); |
794 | rest = strip(s: rest); |
795 | |
796 | symbolDependencies[sym] = {}; |
797 | |
798 | while (rest.size()) { |
799 | StringRef dep; |
800 | std::tie(args&: dep, args&: rest) = rest.split(Separator: ','); |
801 | dep = strip(s: dep); |
802 | symbolDependencies[sym].push_back(x: dep); |
803 | } |
804 | } |
805 | } |
806 | |
807 | static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { |
808 | switch (gvVisibility) { |
809 | case GlobalValue::DefaultVisibility: |
810 | return WASM_SYMBOL_VISIBILITY_DEFAULT; |
811 | case GlobalValue::HiddenVisibility: |
812 | case GlobalValue::ProtectedVisibility: |
813 | return WASM_SYMBOL_VISIBILITY_HIDDEN; |
814 | } |
815 | llvm_unreachable("unknown visibility"); |
816 | } |
817 | |
818 | static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats, |
819 | const lto::InputFile::Symbol &objSym, |
820 | BitcodeFile &f) { |
821 | StringRef name = saver().save(S: objSym.getName()); |
822 | |
823 | uint32_t flags = objSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; |
824 | flags |= mapVisibility(gvVisibility: objSym.getVisibility()); |
825 | |
826 | int c = objSym.getComdatIndex(); |
827 | bool excludedByComdat = c != -1 && !keptComdats[c]; |
828 | |
829 | if (objSym.isUndefined() || excludedByComdat) { |
830 | flags |= WASM_SYMBOL_UNDEFINED; |
831 | if (objSym.isExecutable()) |
832 | return symtab->addUndefinedFunction(name, importName: std::nullopt, importModule: std::nullopt, |
833 | flags, file: &f, signature: nullptr, isCalledDirectly: true); |
834 | return symtab->addUndefinedData(name, flags, file: &f); |
835 | } |
836 | |
837 | if (objSym.isExecutable()) |
838 | return symtab->addDefinedFunction(name, flags, file: &f, function: nullptr); |
839 | return symtab->addDefinedData(name, flags, file: &f, segment: nullptr, address: 0, size: 0); |
840 | } |
841 | |
842 | BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName, |
843 | uint64_t offsetInArchive, bool lazy) |
844 | : InputFile(BitcodeKind, m) { |
845 | this->lazy = lazy; |
846 | this->archiveName = std::string(archiveName); |
847 | |
848 | std::string path = mb.getBufferIdentifier().str(); |
849 | if (ctx.arg.thinLTOIndexOnly) |
850 | path = replaceThinLTOSuffix(path: mb.getBufferIdentifier()); |
851 | |
852 | // ThinLTO assumes that all MemoryBufferRefs given to it have a unique |
853 | // name. If two archives define two members with the same name, this |
854 | // causes a collision which result in only one of the objects being taken |
855 | // into consideration at LTO time (which very likely causes undefined |
856 | // symbols later in the link stage). So we append file offset to make |
857 | // filename unique. |
858 | StringRef name = archiveName.empty() |
859 | ? saver().save(S: path) |
860 | : saver().save(S: archiveName + "("+ path::filename(path) + |
861 | " at "+ utostr(X: offsetInArchive) + ")"); |
862 | MemoryBufferRef mbref(mb.getBuffer(), name); |
863 | |
864 | obj = check(e: lto::InputFile::create(Object: mbref)); |
865 | |
866 | // If this isn't part of an archive, it's eagerly linked, so mark it live. |
867 | if (archiveName.empty()) |
868 | markLive(); |
869 | } |
870 | |
871 | bool BitcodeFile::doneLTO = false; |
872 | |
873 | void BitcodeFile::parseLazy() { |
874 | for (auto [i, irSym] : llvm::enumerate(First: obj->symbols())) { |
875 | if (irSym.isUndefined()) |
876 | continue; |
877 | StringRef name = saver().save(S: irSym.getName()); |
878 | symtab->addLazy(name, f: this); |
879 | // addLazy() may trigger this->extract() if an existing symbol is an |
880 | // undefined symbol. If that happens, this function has served its purpose, |
881 | // and we can exit from the loop early. |
882 | if (!lazy) |
883 | break; |
884 | } |
885 | } |
886 | |
887 | void BitcodeFile::parse(StringRef symName) { |
888 | if (doneLTO) { |
889 | error(msg: toString(file: this) + ": attempt to add bitcode file after LTO ("+ symName + ")"); |
890 | return; |
891 | } |
892 | |
893 | Triple t(obj->getTargetTriple()); |
894 | if (!t.isWasm()) { |
895 | error(msg: toString(file: this) + ": machine type must be wasm32 or wasm64"); |
896 | return; |
897 | } |
898 | checkArch(arch: t.getArch()); |
899 | std::vector<bool> keptComdats; |
900 | // TODO Support nodeduplicate |
901 | // https://github.com/llvm/llvm-project/issues/49875 |
902 | for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) |
903 | keptComdats.push_back(x: symtab->addComdat(name: s.first)); |
904 | |
905 | for (const lto::InputFile::Symbol &objSym : obj->symbols()) |
906 | symbols.push_back(x: createBitcodeSymbol(keptComdats, objSym, f&: *this)); |
907 | } |
908 | |
909 | } // namespace wasm |
910 | } // namespace lld |
911 |
Definitions
- toString
- replaceThinLTOSuffix
- checkArch
- tar
- readFile
- createObjectFile
- calcNewIndex
- calcNewAddend
- calcNewValue
- setRelocs
- addLegacyIndirectFunctionTableIfNeeded
- shouldMerge
- shouldMerge
- parseLazy
- ObjFile
- parse
- getCustomSectionAlignment
- WasmFileBase
- parse
- isExcludedByComdat
- getFunctionSymbol
- getGlobalSymbol
- getTagSymbol
- getTableSymbol
- getSectionSymbol
- getDataSymbol
- createDefined
- createUndefined
- strip
- parse
- mapVisibility
- createBitcodeSymbol
- BitcodeFile
- doneLTO
- parseLazy
Learn to use CMake with our Intro Training
Find out more