1 | //===- SymbolTable.cpp ----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "SymbolTable.h" |
10 | #include "Config.h" |
11 | #include "InputChunks.h" |
12 | #include "InputElement.h" |
13 | #include "WriterUtils.h" |
14 | #include "lld/Common/CommonLinkerContext.h" |
15 | #include <optional> |
16 | |
17 | #define DEBUG_TYPE "lld" |
18 | |
19 | using namespace llvm; |
20 | using namespace llvm::wasm; |
21 | using namespace llvm::object; |
22 | |
23 | namespace lld::wasm { |
24 | SymbolTable *symtab; |
25 | |
26 | void SymbolTable::addFile(InputFile *file, StringRef symName) { |
27 | log(msg: "Processing: " + toString(file)); |
28 | |
29 | // Lazy object file |
30 | if (file->lazy) { |
31 | if (auto *f = dyn_cast<BitcodeFile>(Val: file)) { |
32 | f->parseLazy(); |
33 | } else { |
34 | cast<ObjFile>(Val: file)->parseLazy(); |
35 | } |
36 | return; |
37 | } |
38 | |
39 | // .so file |
40 | if (auto *f = dyn_cast<SharedFile>(Val: file)) { |
41 | ctx.sharedFiles.push_back(Elt: f); |
42 | return; |
43 | } |
44 | |
45 | // stub file |
46 | if (auto *f = dyn_cast<StubFile>(Val: file)) { |
47 | f->parse(); |
48 | ctx.stubFiles.push_back(Elt: f); |
49 | return; |
50 | } |
51 | |
52 | if (config->trace) |
53 | message(msg: toString(file)); |
54 | |
55 | // LLVM bitcode file |
56 | if (auto *f = dyn_cast<BitcodeFile>(Val: file)) { |
57 | // This order, first adding to `bitcodeFiles` and then parsing is necessary. |
58 | // See https://github.com/llvm/llvm-project/pull/73095 |
59 | ctx.bitcodeFiles.push_back(Elt: f); |
60 | f->parse(symName); |
61 | return; |
62 | } |
63 | |
64 | // Regular object file |
65 | auto *f = cast<ObjFile>(Val: file); |
66 | f->parse(ignoreComdats: false); |
67 | ctx.objectFiles.push_back(Elt: f); |
68 | } |
69 | |
70 | // This function is where all the optimizations of link-time |
71 | // optimization happens. When LTO is in use, some input files are |
72 | // not in native object file format but in the LLVM bitcode format. |
73 | // This function compiles bitcode files into a few big native files |
74 | // using LLVM functions and replaces bitcode symbols with the results. |
75 | // Because all bitcode files that the program consists of are passed |
76 | // to the compiler at once, it can do whole-program optimization. |
77 | void SymbolTable::compileBitcodeFiles() { |
78 | // Prevent further LTO objects being included |
79 | BitcodeFile::doneLTO = true; |
80 | |
81 | if (ctx.bitcodeFiles.empty()) |
82 | return; |
83 | |
84 | // Compile bitcode files and replace bitcode symbols. |
85 | lto.reset(p: new BitcodeCompiler); |
86 | for (BitcodeFile *f : ctx.bitcodeFiles) |
87 | lto->add(f&: *f); |
88 | |
89 | for (StringRef filename : lto->compile()) { |
90 | auto *obj = make<ObjFile>(args: MemoryBufferRef(filename, "lto.tmp" ), args: "" ); |
91 | obj->parse(ignoreComdats: true); |
92 | ctx.objectFiles.push_back(Elt: obj); |
93 | } |
94 | } |
95 | |
96 | Symbol *SymbolTable::find(StringRef name) { |
97 | auto it = symMap.find(Val: CachedHashStringRef(name)); |
98 | if (it == symMap.end() || it->second == -1) |
99 | return nullptr; |
100 | return symVector[it->second]; |
101 | } |
102 | |
103 | void SymbolTable::replace(StringRef name, Symbol* sym) { |
104 | auto it = symMap.find(Val: CachedHashStringRef(name)); |
105 | symVector[it->second] = sym; |
106 | } |
107 | |
108 | std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) { |
109 | bool trace = false; |
110 | auto p = symMap.insert(KV: {CachedHashStringRef(name), (int)symVector.size()}); |
111 | int &symIndex = p.first->second; |
112 | bool isNew = p.second; |
113 | if (symIndex == -1) { |
114 | symIndex = symVector.size(); |
115 | trace = true; |
116 | isNew = true; |
117 | } |
118 | |
119 | if (!isNew) |
120 | return {symVector[symIndex], false}; |
121 | |
122 | Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); |
123 | sym->isUsedInRegularObj = false; |
124 | sym->canInline = true; |
125 | sym->traced = trace; |
126 | sym->forceExport = false; |
127 | sym->referenced = !config->gcSections; |
128 | symVector.emplace_back(args&: sym); |
129 | return {sym, true}; |
130 | } |
131 | |
132 | std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, |
133 | const InputFile *file) { |
134 | Symbol *s; |
135 | bool wasInserted; |
136 | std::tie(args&: s, args&: wasInserted) = insertName(name); |
137 | |
138 | if (!file || file->kind() == InputFile::ObjectKind) |
139 | s->isUsedInRegularObj = true; |
140 | |
141 | return {s, wasInserted}; |
142 | } |
143 | |
144 | static void reportTypeError(const Symbol *existing, const InputFile *file, |
145 | llvm::wasm::WasmSymbolType type) { |
146 | error(msg: "symbol type mismatch: " + toString(sym: *existing) + "\n>>> defined as " + |
147 | toString(type: existing->getWasmType()) + " in " + |
148 | toString(file: existing->getFile()) + "\n>>> defined as " + toString(type) + |
149 | " in " + toString(file)); |
150 | } |
151 | |
152 | // Check the type of new symbol matches that of the symbol is replacing. |
153 | // Returns true if the function types match, false is there is a signature |
154 | // mismatch. |
155 | static bool signatureMatches(FunctionSymbol *existing, |
156 | const WasmSignature *newSig) { |
157 | const WasmSignature *oldSig = existing->signature; |
158 | |
159 | // If either function is missing a signature (this happens for bitcode |
160 | // symbols) then assume they match. Any mismatch will be reported later |
161 | // when the LTO objects are added. |
162 | if (!newSig || !oldSig) |
163 | return true; |
164 | |
165 | return *newSig == *oldSig; |
166 | } |
167 | |
168 | static void checkGlobalType(const Symbol *existing, const InputFile *file, |
169 | const WasmGlobalType *newType) { |
170 | if (!isa<GlobalSymbol>(Val: existing)) { |
171 | reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_GLOBAL); |
172 | return; |
173 | } |
174 | |
175 | const WasmGlobalType *oldType = cast<GlobalSymbol>(Val: existing)->getGlobalType(); |
176 | if (*newType != *oldType) { |
177 | error(msg: "Global type mismatch: " + existing->getName() + "\n>>> defined as " + |
178 | toString(type: *oldType) + " in " + toString(file: existing->getFile()) + |
179 | "\n>>> defined as " + toString(type: *newType) + " in " + toString(file)); |
180 | } |
181 | } |
182 | |
183 | static void checkTagType(const Symbol *existing, const InputFile *file, |
184 | const WasmSignature *newSig) { |
185 | const auto *existingTag = dyn_cast<TagSymbol>(Val: existing); |
186 | if (!isa<TagSymbol>(Val: existing)) { |
187 | reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_TAG); |
188 | return; |
189 | } |
190 | |
191 | const WasmSignature *oldSig = existingTag->signature; |
192 | if (*newSig != *oldSig) |
193 | warn(msg: "Tag signature mismatch: " + existing->getName() + |
194 | "\n>>> defined as " + toString(sig: *oldSig) + " in " + |
195 | toString(file: existing->getFile()) + "\n>>> defined as " + |
196 | toString(sig: *newSig) + " in " + toString(file)); |
197 | } |
198 | |
199 | static void checkTableType(const Symbol *existing, const InputFile *file, |
200 | const WasmTableType *newType) { |
201 | if (!isa<TableSymbol>(Val: existing)) { |
202 | reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_TABLE); |
203 | return; |
204 | } |
205 | |
206 | const WasmTableType *oldType = cast<TableSymbol>(Val: existing)->getTableType(); |
207 | if (newType->ElemType != oldType->ElemType) { |
208 | error(msg: "Table type mismatch: " + existing->getName() + "\n>>> defined as " + |
209 | toString(type: *oldType) + " in " + toString(file: existing->getFile()) + |
210 | "\n>>> defined as " + toString(type: *newType) + " in " + toString(file)); |
211 | } |
212 | // FIXME: No assertions currently on the limits. |
213 | } |
214 | |
215 | static void checkDataType(const Symbol *existing, const InputFile *file) { |
216 | if (!isa<DataSymbol>(Val: existing)) |
217 | reportTypeError(existing, file, type: WASM_SYMBOL_TYPE_DATA); |
218 | } |
219 | |
220 | DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name, |
221 | uint32_t flags, |
222 | InputFunction *function) { |
223 | LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n" ); |
224 | assert(!find(name)); |
225 | ctx.syntheticFunctions.emplace_back(Args&: function); |
226 | return replaceSymbol<DefinedFunction>(s: insertName(name).first, arg&: name, |
227 | arg&: flags, arg: nullptr, arg&: function); |
228 | } |
229 | |
230 | // Adds an optional, linker generated, data symbol. The symbol will only be |
231 | // added if there is an undefine reference to it, or if it is explicitly |
232 | // exported via the --export flag. Otherwise we don't add the symbol and return |
233 | // nullptr. |
234 | DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name, |
235 | uint64_t value) { |
236 | Symbol *s = find(name); |
237 | if (!s && (config->exportAll || config->exportedSymbols.count(Key: name) != 0)) |
238 | s = insertName(name).first; |
239 | else if (!s || s->isDefined()) |
240 | return nullptr; |
241 | LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n" ); |
242 | auto *rtn = replaceSymbol<DefinedData>( |
243 | s, arg&: name, arg: WASM_SYMBOL_VISIBILITY_HIDDEN | WASM_SYMBOL_ABSOLUTE); |
244 | rtn->setVA(value); |
245 | rtn->referenced = true; |
246 | return rtn; |
247 | } |
248 | |
249 | DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name, |
250 | uint32_t flags) { |
251 | LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n" ); |
252 | assert(!find(name)); |
253 | return replaceSymbol<DefinedData>(s: insertName(name).first, arg&: name, |
254 | arg: flags | WASM_SYMBOL_ABSOLUTE); |
255 | } |
256 | |
257 | DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags, |
258 | InputGlobal *global) { |
259 | LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global |
260 | << "\n" ); |
261 | assert(!find(name)); |
262 | ctx.syntheticGlobals.emplace_back(Args&: global); |
263 | return replaceSymbol<DefinedGlobal>(s: insertName(name).first, arg&: name, arg&: flags, |
264 | arg: nullptr, arg&: global); |
265 | } |
266 | |
267 | DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name, |
268 | InputGlobal *global) { |
269 | Symbol *s = find(name); |
270 | if (!s || s->isDefined()) |
271 | return nullptr; |
272 | LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global |
273 | << "\n" ); |
274 | ctx.syntheticGlobals.emplace_back(Args&: global); |
275 | return replaceSymbol<DefinedGlobal>(s, arg&: name, arg: WASM_SYMBOL_VISIBILITY_HIDDEN, |
276 | arg: nullptr, arg&: global); |
277 | } |
278 | |
279 | DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags, |
280 | InputTable *table) { |
281 | LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table |
282 | << "\n" ); |
283 | Symbol *s = find(name); |
284 | assert(!s || s->isUndefined()); |
285 | if (!s) |
286 | s = insertName(name).first; |
287 | ctx.syntheticTables.emplace_back(Args&: table); |
288 | return replaceSymbol<DefinedTable>(s, arg&: name, arg&: flags, arg: nullptr, arg&: table); |
289 | } |
290 | |
291 | static bool shouldReplace(const Symbol *existing, InputFile *newFile, |
292 | uint32_t newFlags) { |
293 | // If existing symbol is undefined, replace it. |
294 | if (!existing->isDefined()) { |
295 | LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " |
296 | << existing->getName() << "\n" ); |
297 | return true; |
298 | } |
299 | |
300 | // Now we have two defined symbols. If the new one is weak, we can ignore it. |
301 | if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { |
302 | LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n" ); |
303 | return false; |
304 | } |
305 | |
306 | // If the existing symbol is weak, we should replace it. |
307 | if (existing->isWeak()) { |
308 | LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n" ); |
309 | return true; |
310 | } |
311 | |
312 | // Neither symbol is week. They conflict. |
313 | error(msg: "duplicate symbol: " + toString(sym: *existing) + "\n>>> defined in " + |
314 | toString(file: existing->getFile()) + "\n>>> defined in " + |
315 | toString(file: newFile)); |
316 | return true; |
317 | } |
318 | |
319 | Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags, |
320 | InputFile *file, |
321 | InputFunction *function) { |
322 | LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " [" |
323 | << (function ? toString(function->signature) : "none" ) |
324 | << "]\n" ); |
325 | Symbol *s; |
326 | bool wasInserted; |
327 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
328 | |
329 | auto replaceSym = [&](Symbol *sym) { |
330 | // If the new defined function doesn't have signature (i.e. bitcode |
331 | // functions) but the old symbol does, then preserve the old signature |
332 | const WasmSignature *oldSig = s->getSignature(); |
333 | auto* newSym = replaceSymbol<DefinedFunction>(s: sym, arg&: name, arg&: flags, arg&: file, arg&: function); |
334 | if (!newSym->signature) |
335 | newSym->signature = oldSig; |
336 | }; |
337 | |
338 | if (wasInserted || s->isLazy()) { |
339 | replaceSym(s); |
340 | return s; |
341 | } |
342 | |
343 | auto existingFunction = dyn_cast<FunctionSymbol>(Val: s); |
344 | if (!existingFunction) { |
345 | reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_FUNCTION); |
346 | return s; |
347 | } |
348 | |
349 | bool checkSig = true; |
350 | if (auto ud = dyn_cast<UndefinedFunction>(Val: existingFunction)) |
351 | checkSig = ud->isCalledDirectly; |
352 | |
353 | if (checkSig && function && !signatureMatches(existing: existingFunction, newSig: &function->signature)) { |
354 | Symbol* variant; |
355 | if (getFunctionVariant(sym: s, sig: &function->signature, file, out: &variant)) |
356 | // New variant, always replace |
357 | replaceSym(variant); |
358 | else if (shouldReplace(existing: s, newFile: file, newFlags: flags)) |
359 | // Variant already exists, replace it after checking shouldReplace |
360 | replaceSym(variant); |
361 | |
362 | // This variant we found take the place in the symbol table as the primary |
363 | // variant. |
364 | replace(name, sym: variant); |
365 | return variant; |
366 | } |
367 | |
368 | // Existing function with matching signature. |
369 | if (shouldReplace(existing: s, newFile: file, newFlags: flags)) |
370 | replaceSym(s); |
371 | |
372 | return s; |
373 | } |
374 | |
375 | Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags, |
376 | InputFile *file, InputChunk *segment, |
377 | uint64_t address, uint64_t size) { |
378 | LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address |
379 | << "\n" ); |
380 | Symbol *s; |
381 | bool wasInserted; |
382 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
383 | |
384 | auto replaceSym = [&]() { |
385 | replaceSymbol<DefinedData>(s, arg&: name, arg&: flags, arg&: file, arg&: segment, arg&: address, arg&: size); |
386 | }; |
387 | |
388 | if (wasInserted || s->isLazy()) { |
389 | replaceSym(); |
390 | return s; |
391 | } |
392 | |
393 | checkDataType(existing: s, file); |
394 | |
395 | if (shouldReplace(existing: s, newFile: file, newFlags: flags)) |
396 | replaceSym(); |
397 | return s; |
398 | } |
399 | |
400 | Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags, |
401 | InputFile *file, InputGlobal *global) { |
402 | LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n" ); |
403 | |
404 | Symbol *s; |
405 | bool wasInserted; |
406 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
407 | |
408 | auto replaceSym = [&]() { |
409 | replaceSymbol<DefinedGlobal>(s, arg&: name, arg&: flags, arg&: file, arg&: global); |
410 | }; |
411 | |
412 | if (wasInserted || s->isLazy()) { |
413 | replaceSym(); |
414 | return s; |
415 | } |
416 | |
417 | checkGlobalType(existing: s, file, newType: &global->getType()); |
418 | |
419 | if (shouldReplace(existing: s, newFile: file, newFlags: flags)) |
420 | replaceSym(); |
421 | return s; |
422 | } |
423 | |
424 | Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags, |
425 | InputFile *file, InputTag *tag) { |
426 | LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n" ); |
427 | |
428 | Symbol *s; |
429 | bool wasInserted; |
430 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
431 | |
432 | auto replaceSym = [&]() { |
433 | replaceSymbol<DefinedTag>(s, arg&: name, arg&: flags, arg&: file, arg&: tag); |
434 | }; |
435 | |
436 | if (wasInserted || s->isLazy()) { |
437 | replaceSym(); |
438 | return s; |
439 | } |
440 | |
441 | checkTagType(existing: s, file, newSig: &tag->signature); |
442 | |
443 | if (shouldReplace(existing: s, newFile: file, newFlags: flags)) |
444 | replaceSym(); |
445 | return s; |
446 | } |
447 | |
448 | Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags, |
449 | InputFile *file, InputTable *table) { |
450 | LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n" ); |
451 | |
452 | Symbol *s; |
453 | bool wasInserted; |
454 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
455 | |
456 | auto replaceSym = [&]() { |
457 | replaceSymbol<DefinedTable>(s, arg&: name, arg&: flags, arg&: file, arg&: table); |
458 | }; |
459 | |
460 | if (wasInserted || s->isLazy()) { |
461 | replaceSym(); |
462 | return s; |
463 | } |
464 | |
465 | checkTableType(existing: s, file, newType: &table->getType()); |
466 | |
467 | if (shouldReplace(existing: s, newFile: file, newFlags: flags)) |
468 | replaceSym(); |
469 | return s; |
470 | } |
471 | |
472 | // This function get called when an undefined symbol is added, and there is |
473 | // already an existing one in the symbols table. In this case we check that |
474 | // custom 'import-module' and 'import-field' symbol attributes agree. |
475 | // With LTO these attributes are not available when the bitcode is read and only |
476 | // become available when the LTO object is read. In this case we silently |
477 | // replace the empty attributes with the valid ones. |
478 | template <typename T> |
479 | static void setImportAttributes(T *existing, |
480 | std::optional<StringRef> importName, |
481 | std::optional<StringRef> importModule, |
482 | uint32_t flags, InputFile *file) { |
483 | if (importName) { |
484 | if (!existing->importName) |
485 | existing->importName = importName; |
486 | if (existing->importName != importName) |
487 | error("import name mismatch for symbol: " + toString(*existing) + |
488 | "\n>>> defined as " + *existing->importName + " in " + |
489 | toString(existing->getFile()) + "\n>>> defined as " + *importName + |
490 | " in " + toString(file)); |
491 | } |
492 | |
493 | if (importModule) { |
494 | if (!existing->importModule) |
495 | existing->importModule = importModule; |
496 | if (existing->importModule != importModule) |
497 | error("import module mismatch for symbol: " + toString(*existing) + |
498 | "\n>>> defined as " + *existing->importModule + " in " + |
499 | toString(existing->getFile()) + "\n>>> defined as " + |
500 | *importModule + " in " + toString(file)); |
501 | } |
502 | |
503 | // Update symbol binding, if the existing symbol is weak |
504 | uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK; |
505 | if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) { |
506 | existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding; |
507 | } |
508 | } |
509 | |
510 | Symbol *SymbolTable::addUndefinedFunction(StringRef name, |
511 | std::optional<StringRef> importName, |
512 | std::optional<StringRef> importModule, |
513 | uint32_t flags, InputFile *file, |
514 | const WasmSignature *sig, |
515 | bool isCalledDirectly) { |
516 | LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " [" |
517 | << (sig ? toString(*sig) : "none" ) |
518 | << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x" |
519 | << utohexstr(flags) << "\n" ); |
520 | assert(flags & WASM_SYMBOL_UNDEFINED); |
521 | |
522 | Symbol *s; |
523 | bool wasInserted; |
524 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
525 | if (s->traced) |
526 | printTraceSymbolUndefined(name, file); |
527 | |
528 | auto replaceSym = [&]() { |
529 | replaceSymbol<UndefinedFunction>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags, |
530 | arg&: file, arg&: sig, arg&: isCalledDirectly); |
531 | }; |
532 | |
533 | if (wasInserted) { |
534 | replaceSym(); |
535 | } else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) { |
536 | if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { |
537 | lazy->setWeak(); |
538 | lazy->signature = sig; |
539 | } else { |
540 | lazy->extract(); |
541 | if (!config->whyExtract.empty()) |
542 | ctx.whyExtractRecords.emplace_back(Args: toString(file), Args: s->getFile(), Args&: *s); |
543 | } |
544 | } else { |
545 | auto existingFunction = dyn_cast<FunctionSymbol>(Val: s); |
546 | if (!existingFunction) { |
547 | reportTypeError(existing: s, file, type: WASM_SYMBOL_TYPE_FUNCTION); |
548 | return s; |
549 | } |
550 | if (!existingFunction->signature && sig) |
551 | existingFunction->signature = sig; |
552 | auto *existingUndefined = dyn_cast<UndefinedFunction>(Val: existingFunction); |
553 | if (isCalledDirectly && !signatureMatches(existing: existingFunction, newSig: sig)) { |
554 | // If the existing undefined functions is not called directly then let |
555 | // this one take precedence. Otherwise the existing function is either |
556 | // directly called or defined, in which case we need a function variant. |
557 | if (existingUndefined && !existingUndefined->isCalledDirectly) |
558 | replaceSym(); |
559 | else if (getFunctionVariant(sym: s, sig, file, out: &s)) |
560 | replaceSym(); |
561 | } |
562 | if (existingUndefined) { |
563 | setImportAttributes(existing: existingUndefined, importName, importModule, flags, |
564 | file); |
565 | if (isCalledDirectly) |
566 | existingUndefined->isCalledDirectly = true; |
567 | if (s->isWeak()) |
568 | s->flags = flags; |
569 | } |
570 | } |
571 | |
572 | return s; |
573 | } |
574 | |
575 | Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags, |
576 | InputFile *file) { |
577 | LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n" ); |
578 | assert(flags & WASM_SYMBOL_UNDEFINED); |
579 | |
580 | Symbol *s; |
581 | bool wasInserted; |
582 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
583 | if (s->traced) |
584 | printTraceSymbolUndefined(name, file); |
585 | |
586 | if (wasInserted) { |
587 | replaceSymbol<UndefinedData>(s, arg&: name, arg&: flags, arg&: file); |
588 | } else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) { |
589 | if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) |
590 | lazy->setWeak(); |
591 | else |
592 | lazy->extract(); |
593 | } else if (s->isDefined()) { |
594 | checkDataType(existing: s, file); |
595 | } else if (s->isWeak()) { |
596 | s->flags = flags; |
597 | } |
598 | return s; |
599 | } |
600 | |
601 | Symbol *SymbolTable::addUndefinedGlobal(StringRef name, |
602 | std::optional<StringRef> importName, |
603 | std::optional<StringRef> importModule, |
604 | uint32_t flags, InputFile *file, |
605 | const WasmGlobalType *type) { |
606 | LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n" ); |
607 | assert(flags & WASM_SYMBOL_UNDEFINED); |
608 | |
609 | Symbol *s; |
610 | bool wasInserted; |
611 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
612 | if (s->traced) |
613 | printTraceSymbolUndefined(name, file); |
614 | |
615 | if (wasInserted) |
616 | replaceSymbol<UndefinedGlobal>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags, |
617 | arg&: file, arg&: type); |
618 | else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) |
619 | lazy->extract(); |
620 | else if (s->isDefined()) |
621 | checkGlobalType(existing: s, file, newType: type); |
622 | else if (s->isWeak()) |
623 | s->flags = flags; |
624 | return s; |
625 | } |
626 | |
627 | Symbol *SymbolTable::addUndefinedTable(StringRef name, |
628 | std::optional<StringRef> importName, |
629 | std::optional<StringRef> importModule, |
630 | uint32_t flags, InputFile *file, |
631 | const WasmTableType *type) { |
632 | LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n" ); |
633 | assert(flags & WASM_SYMBOL_UNDEFINED); |
634 | |
635 | Symbol *s; |
636 | bool wasInserted; |
637 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
638 | if (s->traced) |
639 | printTraceSymbolUndefined(name, file); |
640 | |
641 | if (wasInserted) |
642 | replaceSymbol<UndefinedTable>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags, |
643 | arg&: file, arg&: type); |
644 | else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) |
645 | lazy->extract(); |
646 | else if (s->isDefined()) |
647 | checkTableType(existing: s, file, newType: type); |
648 | else if (s->isWeak()) |
649 | s->flags = flags; |
650 | return s; |
651 | } |
652 | |
653 | Symbol *SymbolTable::addUndefinedTag(StringRef name, |
654 | std::optional<StringRef> importName, |
655 | std::optional<StringRef> importModule, |
656 | uint32_t flags, InputFile *file, |
657 | const WasmSignature *sig) { |
658 | LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n" ); |
659 | assert(flags & WASM_SYMBOL_UNDEFINED); |
660 | |
661 | Symbol *s; |
662 | bool wasInserted; |
663 | std::tie(args&: s, args&: wasInserted) = insert(name, file); |
664 | if (s->traced) |
665 | printTraceSymbolUndefined(name, file); |
666 | |
667 | if (wasInserted) |
668 | replaceSymbol<UndefinedTag>(s, arg&: name, arg&: importName, arg&: importModule, arg&: flags, arg&: file, |
669 | arg&: sig); |
670 | else if (auto *lazy = dyn_cast<LazySymbol>(Val: s)) |
671 | lazy->extract(); |
672 | else if (s->isDefined()) |
673 | checkTagType(existing: s, file, newSig: sig); |
674 | else if (s->isWeak()) |
675 | s->flags = flags; |
676 | return s; |
677 | } |
678 | |
679 | TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) { |
680 | WasmLimits limits{.Flags: 0, .Minimum: 0, .Maximum: 0}; // Set by the writer. |
681 | WasmTableType *type = make<WasmTableType>(); |
682 | type->ElemType = ValType::FUNCREF; |
683 | type->Limits = limits; |
684 | StringRef module(defaultModule); |
685 | uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN; |
686 | flags |= WASM_SYMBOL_UNDEFINED; |
687 | Symbol *sym = addUndefinedTable(name, importName: name, importModule: module, flags, file: nullptr, type); |
688 | sym->markLive(); |
689 | sym->forceExport = config->exportTable; |
690 | return cast<TableSymbol>(Val: sym); |
691 | } |
692 | |
693 | TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) { |
694 | const uint32_t invalidIndex = -1; |
695 | WasmLimits limits{.Flags: 0, .Minimum: 0, .Maximum: 0}; // Set by the writer. |
696 | WasmTableType type{.ElemType: ValType::FUNCREF, .Limits: limits}; |
697 | WasmTable desc{.Index: invalidIndex, .Type: type, .SymbolName: name}; |
698 | InputTable *table = make<InputTable>(args&: desc, args: nullptr); |
699 | uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN; |
700 | TableSymbol *sym = addSyntheticTable(name, flags, table); |
701 | sym->markLive(); |
702 | sym->forceExport = config->exportTable; |
703 | return sym; |
704 | } |
705 | |
706 | // Whether or not we need an indirect function table is usually a function of |
707 | // whether an input declares a need for it. However sometimes it's possible for |
708 | // no input to need the indirect function table, but then a late |
709 | // addInternalGOTEntry causes a function to be allocated an address. In that |
710 | // case address we synthesize a definition at the last minute. |
711 | TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) { |
712 | Symbol *existing = find(name: functionTableName); |
713 | if (existing) { |
714 | if (!isa<TableSymbol>(Val: existing)) { |
715 | error(msg: Twine("reserved symbol must be of type table: `" ) + |
716 | functionTableName + "`" ); |
717 | return nullptr; |
718 | } |
719 | if (existing->isDefined()) { |
720 | error(msg: Twine("reserved symbol must not be defined in input files: `" ) + |
721 | functionTableName + "`" ); |
722 | return nullptr; |
723 | } |
724 | } |
725 | |
726 | if (config->importTable) { |
727 | if (existing) |
728 | return cast<TableSymbol>(Val: existing); |
729 | if (required) |
730 | return createUndefinedIndirectFunctionTable(name: functionTableName); |
731 | } else if ((existing && existing->isLive()) || config->exportTable || |
732 | required) { |
733 | // A defined table is required. Either because the user request an exported |
734 | // table or because the table symbol is already live. The existing table is |
735 | // guaranteed to be undefined due to the check above. |
736 | return createDefinedIndirectFunctionTable(name: functionTableName); |
737 | } |
738 | |
739 | // An indirect function table will only be present in the symbol table if |
740 | // needed by a reloc; if we get here, we don't need one. |
741 | return nullptr; |
742 | } |
743 | |
744 | void SymbolTable::addLazy(StringRef name, InputFile *file) { |
745 | LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n" ); |
746 | |
747 | Symbol *s; |
748 | bool wasInserted; |
749 | std::tie(args&: s, args&: wasInserted) = insertName(name); |
750 | |
751 | if (wasInserted) { |
752 | replaceSymbol<LazySymbol>(s, arg&: name, arg: 0, arg&: file); |
753 | return; |
754 | } |
755 | |
756 | if (!s->isUndefined()) |
757 | return; |
758 | |
759 | // The existing symbol is undefined, load a new one from the archive, |
760 | // unless the existing symbol is weak in which case replace the undefined |
761 | // symbols with a LazySymbol. |
762 | if (s->isWeak()) { |
763 | const WasmSignature *oldSig = nullptr; |
764 | // In the case of an UndefinedFunction we need to preserve the expected |
765 | // signature. |
766 | if (auto *f = dyn_cast<UndefinedFunction>(Val: s)) |
767 | oldSig = f->signature; |
768 | LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n" ); |
769 | auto newSym = |
770 | replaceSymbol<LazySymbol>(s, arg&: name, arg: WASM_SYMBOL_BINDING_WEAK, arg&: file); |
771 | newSym->signature = oldSig; |
772 | return; |
773 | } |
774 | |
775 | LLVM_DEBUG(dbgs() << "replacing existing undefined\n" ); |
776 | const InputFile *oldFile = s->getFile(); |
777 | LazySymbol(name, 0, file).extract(); |
778 | if (!config->whyExtract.empty()) |
779 | ctx.whyExtractRecords.emplace_back(Args: toString(file: oldFile), Args: s->getFile(), Args&: *s); |
780 | } |
781 | |
782 | bool SymbolTable::addComdat(StringRef name) { |
783 | return comdatGroups.insert(V: CachedHashStringRef(name)).second; |
784 | } |
785 | |
786 | // The new signature doesn't match. Create a variant to the symbol with the |
787 | // signature encoded in the name and return that instead. These symbols are |
788 | // then unified later in handleSymbolVariants. |
789 | bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig, |
790 | const InputFile *file, Symbol **out) { |
791 | LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> " |
792 | << " " << toString(*sig) << "\n" ); |
793 | Symbol *variant = nullptr; |
794 | |
795 | // Linear search through symbol variants. Should never be more than two |
796 | // or three entries here. |
797 | auto &variants = symVariants[CachedHashStringRef(sym->getName())]; |
798 | if (variants.empty()) |
799 | variants.push_back(x: sym); |
800 | |
801 | for (Symbol* v : variants) { |
802 | if (*v->getSignature() == *sig) { |
803 | variant = v; |
804 | break; |
805 | } |
806 | } |
807 | |
808 | bool wasAdded = !variant; |
809 | if (wasAdded) { |
810 | // Create a new variant; |
811 | LLVM_DEBUG(dbgs() << "added new variant\n" ); |
812 | variant = reinterpret_cast<Symbol *>(make<SymbolUnion>()); |
813 | variant->isUsedInRegularObj = |
814 | !file || file->kind() == InputFile::ObjectKind; |
815 | variant->canInline = true; |
816 | variant->traced = false; |
817 | variant->forceExport = false; |
818 | variants.push_back(x: variant); |
819 | } else { |
820 | LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n" ); |
821 | assert(*variant->getSignature() == *sig); |
822 | } |
823 | |
824 | *out = variant; |
825 | return wasAdded; |
826 | } |
827 | |
828 | // Set a flag for --trace-symbol so that we can print out a log message |
829 | // if a new symbol with the same name is inserted into the symbol table. |
830 | void SymbolTable::trace(StringRef name) { |
831 | symMap.insert(KV: {CachedHashStringRef(name), -1}); |
832 | } |
833 | |
834 | void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { |
835 | // Swap symbols as instructed by -wrap. |
836 | int &origIdx = symMap[CachedHashStringRef(sym->getName())]; |
837 | int &realIdx= symMap[CachedHashStringRef(real->getName())]; |
838 | int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())]; |
839 | LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n" ); |
840 | |
841 | // Anyone looking up __real symbols should get the original |
842 | realIdx = origIdx; |
843 | // Anyone looking up the original should get the __wrap symbol |
844 | origIdx = wrapIdx; |
845 | } |
846 | |
847 | static const uint8_t unreachableFn[] = { |
848 | 0x03 /* ULEB length */, 0x00 /* ULEB num locals */, |
849 | 0x00 /* opcode unreachable */, 0x0b /* opcode end */ |
850 | }; |
851 | |
852 | // Replace the given symbol body with an unreachable function. |
853 | // This is used by handleWeakUndefines in order to generate a callable |
854 | // equivalent of an undefined function and also handleSymbolVariants for |
855 | // undefined functions that don't match the signature of the definition. |
856 | InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym, |
857 | const WasmSignature &sig, |
858 | StringRef debugName) { |
859 | auto *func = make<SyntheticFunction>(args: sig, args: sym->getName(), args&: debugName); |
860 | func->setBody(unreachableFn); |
861 | ctx.syntheticFunctions.emplace_back(Args&: func); |
862 | // Mark new symbols as local. For relocatable output we don't want them |
863 | // to be exported outside the object file. |
864 | replaceSymbol<DefinedFunction>(s: sym, arg&: debugName, arg: WASM_SYMBOL_BINDING_LOCAL, |
865 | arg: nullptr, arg&: func); |
866 | // Ensure the stub function doesn't get a table entry. Its address |
867 | // should always compare equal to the null pointer. |
868 | sym->isStub = true; |
869 | return func; |
870 | } |
871 | |
872 | void SymbolTable::replaceWithUndefined(Symbol *sym) { |
873 | // Add a synthetic dummy for weak undefined functions. These dummies will |
874 | // be GC'd if not used as the target of any "call" instructions. |
875 | StringRef debugName = saver().save(S: "undefined_weak:" + toString(sym: *sym)); |
876 | replaceWithUnreachable(sym, sig: *sym->getSignature(), debugName); |
877 | // Hide our dummy to prevent export. |
878 | sym->setHidden(true); |
879 | } |
880 | |
881 | // For weak undefined functions, there may be "call" instructions that reference |
882 | // the symbol. In this case, we need to synthesise a dummy/stub function that |
883 | // will abort at runtime, so that relocations can still provided an operand to |
884 | // the call instruction that passes Wasm validation. |
885 | void SymbolTable::handleWeakUndefines() { |
886 | for (Symbol *sym : symbols()) { |
887 | if (sym->isUndefWeak() && sym->isUsedInRegularObj) { |
888 | if (sym->getSignature()) { |
889 | replaceWithUndefined(sym); |
890 | } else { |
891 | // It is possible for undefined functions not to have a signature (eg. |
892 | // if added via "--undefined"), but weak undefined ones do have a |
893 | // signature. Lazy symbols may not be functions and therefore Sig can |
894 | // still be null in some circumstance. |
895 | assert(!isa<FunctionSymbol>(sym)); |
896 | } |
897 | } |
898 | } |
899 | } |
900 | |
901 | DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) { |
902 | if (stubFunctions.count(Val: sig)) |
903 | return stubFunctions[sig]; |
904 | LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n" ); |
905 | auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>()); |
906 | sym->isUsedInRegularObj = true; |
907 | sym->canInline = true; |
908 | sym->traced = false; |
909 | sym->forceExport = false; |
910 | sym->signature = &sig; |
911 | replaceSymbol<DefinedFunction>( |
912 | s: sym, arg: "undefined_stub" , arg: WASM_SYMBOL_VISIBILITY_HIDDEN, arg: nullptr, arg: nullptr); |
913 | replaceWithUnreachable(sym, sig, debugName: "undefined_stub" ); |
914 | stubFunctions[sig] = sym; |
915 | return sym; |
916 | } |
917 | |
918 | static void reportFunctionSignatureMismatch(StringRef symName, |
919 | FunctionSymbol *a, |
920 | FunctionSymbol *b, bool isError) { |
921 | std::string msg = ("function signature mismatch: " + symName + |
922 | "\n>>> defined as " + toString(sig: *a->signature) + " in " + |
923 | toString(file: a->getFile()) + "\n>>> defined as " + |
924 | toString(sig: *b->signature) + " in " + toString(file: b->getFile())) |
925 | .str(); |
926 | if (isError) |
927 | error(msg); |
928 | else |
929 | warn(msg); |
930 | } |
931 | |
932 | // Remove any variant symbols that were created due to function signature |
933 | // mismatches. |
934 | void SymbolTable::handleSymbolVariants() { |
935 | for (auto pair : symVariants) { |
936 | // Push the initial symbol onto the list of variants. |
937 | StringRef symName = pair.first.val(); |
938 | std::vector<Symbol *> &variants = pair.second; |
939 | |
940 | #ifndef NDEBUG |
941 | LLVM_DEBUG(dbgs() << "symbol with (" << variants.size() |
942 | << ") variants: " << symName << "\n" ); |
943 | for (auto *s: variants) { |
944 | auto *f = cast<FunctionSymbol>(Val: s); |
945 | LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " " |
946 | << toString(*f->signature) << "\n" ); |
947 | } |
948 | #endif |
949 | |
950 | // Find the one definition. |
951 | DefinedFunction *defined = nullptr; |
952 | for (auto *symbol : variants) { |
953 | if (auto f = dyn_cast<DefinedFunction>(Val: symbol)) { |
954 | defined = f; |
955 | break; |
956 | } |
957 | } |
958 | |
959 | // If there are no definitions, and the undefined symbols disagree on |
960 | // the signature, there is not we can do since we don't know which one |
961 | // to use as the signature on the import. |
962 | if (!defined) { |
963 | reportFunctionSignatureMismatch(symName, |
964 | a: cast<FunctionSymbol>(Val: variants[0]), |
965 | b: cast<FunctionSymbol>(Val: variants[1]), isError: true); |
966 | return; |
967 | } |
968 | |
969 | for (auto *symbol : variants) { |
970 | if (symbol != defined) { |
971 | auto *f = cast<FunctionSymbol>(Val: symbol); |
972 | reportFunctionSignatureMismatch(symName, a: f, b: defined, isError: false); |
973 | StringRef debugName = |
974 | saver().save(S: "signature_mismatch:" + toString(sym: *f)); |
975 | replaceWithUnreachable(sym: f, sig: *f->signature, debugName); |
976 | } |
977 | } |
978 | } |
979 | } |
980 | |
981 | } // namespace wasm::lld |
982 | |