1//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_WASM_SYMBOLS_H
10#define LLD_WASM_SYMBOLS_H
11
12#include "Config.h"
13#include "lld/Common/LLVM.h"
14#include "llvm/Object/Archive.h"
15#include "llvm/Object/Wasm.h"
16#include <optional>
17
18namespace lld {
19namespace wasm {
20
21// Shared string constants
22
23// The default module name to use for symbol imports.
24extern const char *defaultModule;
25
26// The name under which to import or export the wasm table.
27extern const char *functionTableName;
28
29// The name under which to import or export the wasm memory.
30extern const char *memoryName;
31
32using llvm::wasm::WasmSymbolType;
33
34class InputFile;
35class InputChunk;
36class InputSegment;
37class InputFunction;
38class InputGlobal;
39class InputTag;
40class InputSection;
41class InputTable;
42class OutputSection;
43
44#define INVALID_INDEX UINT32_MAX
45
46// The base class for real symbol classes.
47class Symbol {
48public:
49 enum Kind : uint8_t {
50 DefinedFunctionKind,
51 DefinedDataKind,
52 DefinedGlobalKind,
53 DefinedTagKind,
54 DefinedTableKind,
55 SectionKind,
56 OutputSectionKind,
57 UndefinedFunctionKind,
58 UndefinedDataKind,
59 UndefinedGlobalKind,
60 UndefinedTableKind,
61 UndefinedTagKind,
62 LazyKind,
63 };
64
65 Kind kind() const { return symbolKind; }
66
67 bool isDefined() const { return !isLazy() && !isUndefined(); }
68
69 bool isUndefined() const {
70 return symbolKind == UndefinedFunctionKind ||
71 symbolKind == UndefinedDataKind ||
72 symbolKind == UndefinedGlobalKind ||
73 symbolKind == UndefinedTableKind || symbolKind == UndefinedTagKind;
74 }
75
76 bool isLazy() const { return symbolKind == LazyKind; }
77
78 bool isLocal() const;
79 bool isWeak() const;
80 bool isHidden() const;
81 bool isTLS() const;
82
83 // Returns true if this symbol exists in a discarded (due to COMDAT) section
84 bool isDiscarded() const;
85
86 // True if this is an undefined weak symbol. This only works once
87 // all input files have been added.
88 bool isUndefWeak() const {
89 // See comment on lazy symbols for details.
90 return isWeak() && (isUndefined() || isLazy());
91 }
92
93 // Returns the symbol name.
94 StringRef getName() const { return name; }
95
96 // Returns the file from which this symbol was created.
97 InputFile *getFile() const { return file; }
98
99 InputChunk *getChunk() const;
100
101 // Indicates that the section or import for this symbol will be included in
102 // the final image.
103 bool isLive() const;
104
105 // Marks the symbol's InputChunk as Live, so that it will be included in the
106 // final image.
107 void markLive();
108
109 void setHidden(bool isHidden);
110
111 // Get/set the index in the output symbol table. This is only used for
112 // relocatable output.
113 uint32_t getOutputSymbolIndex() const;
114 void setOutputSymbolIndex(uint32_t index);
115
116 WasmSymbolType getWasmType() const;
117 bool isImported() const;
118 bool isExported() const;
119 bool isExportedExplicit() const;
120
121 // Indicates that the symbol is used in an __attribute__((used)) directive
122 // or similar.
123 bool isNoStrip() const;
124
125 const WasmSignature* getSignature() const;
126
127 uint32_t getGOTIndex() const {
128 assert(gotIndex != INVALID_INDEX);
129 return gotIndex;
130 }
131
132 void setGOTIndex(uint32_t index);
133 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
134
135protected:
136 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
137 : name(name), file(f), symbolKind(k), referenced(!config->gcSections),
138 requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
139 forceImport(false), canInline(false), traced(false), isStub(false),
140 flags(flags) {}
141
142 StringRef name;
143 InputFile *file;
144 uint32_t outputSymbolIndex = INVALID_INDEX;
145 uint32_t gotIndex = INVALID_INDEX;
146 Kind symbolKind;
147
148public:
149 bool referenced : 1;
150
151 // True for data symbols that needs a dummy GOT entry. Used for static
152 // linking of GOT accesses.
153 bool requiresGOT : 1;
154
155 // True if the symbol was used for linking and thus need to be added to the
156 // output file's symbol table. This is true for all symbols except for
157 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
158 // are unreferenced except by other bitcode objects.
159 bool isUsedInRegularObj : 1;
160
161 // True if this symbol is explicitly marked for export (i.e. via the
162 // -e/--export command line flag)
163 bool forceExport : 1;
164
165 bool forceImport : 1;
166
167 // False if LTO shouldn't inline whatever this symbol points to. If a symbol
168 // is overwritten after LTO, LTO shouldn't inline the symbol because it
169 // doesn't know the final contents of the symbol.
170 bool canInline : 1;
171
172 // True if this symbol is specified by --trace-symbol option.
173 bool traced : 1;
174
175 // True if this symbol is a linker-synthesized stub function (traps when
176 // called) and should otherwise be treated as missing/undefined. See
177 // SymbolTable::replaceWithUndefined.
178 // These stubs never appear in the table and any table index relocations
179 // against them will produce address 0 (The table index representing
180 // the null function pointer).
181 bool isStub : 1;
182
183 uint32_t flags;
184
185 std::optional<StringRef> importName;
186 std::optional<StringRef> importModule;
187};
188
189class FunctionSymbol : public Symbol {
190public:
191 static bool classof(const Symbol *s) {
192 return s->kind() == DefinedFunctionKind ||
193 s->kind() == UndefinedFunctionKind;
194 }
195
196 // Get/set the table index
197 void setTableIndex(uint32_t index);
198 uint32_t getTableIndex() const;
199 bool hasTableIndex() const;
200
201 // Get/set the function index
202 uint32_t getFunctionIndex() const;
203 void setFunctionIndex(uint32_t index);
204 bool hasFunctionIndex() const;
205
206 const WasmSignature *signature;
207
208protected:
209 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
210 const WasmSignature *sig)
211 : Symbol(name, k, flags, f), signature(sig) {}
212
213 uint32_t tableIndex = INVALID_INDEX;
214 uint32_t functionIndex = INVALID_INDEX;
215};
216
217class DefinedFunction : public FunctionSymbol {
218public:
219 DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
220 InputFunction *function);
221
222 static bool classof(const Symbol *s) {
223 return s->kind() == DefinedFunctionKind;
224 }
225
226 // Get the function index to be used when exporting. This only applies to
227 // defined functions and can be differ from the regular function index for
228 // weakly defined functions (that are imported and used via one index but
229 // defined and exported via another).
230 uint32_t getExportedFunctionIndex() const;
231
232 InputFunction *function;
233};
234
235class UndefinedFunction : public FunctionSymbol {
236public:
237 UndefinedFunction(StringRef name, std::optional<StringRef> importName,
238 std::optional<StringRef> importModule, uint32_t flags,
239 InputFile *file = nullptr,
240 const WasmSignature *type = nullptr,
241 bool isCalledDirectly = true)
242 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
243 isCalledDirectly(isCalledDirectly) {
244 this->importName = importName;
245 this->importModule = importModule;
246 }
247
248 static bool classof(const Symbol *s) {
249 return s->kind() == UndefinedFunctionKind;
250 }
251
252 DefinedFunction *stubFunction = nullptr;
253 bool isCalledDirectly;
254};
255
256// Section symbols for output sections are different from those for input
257// section. These are generated by the linker and point the OutputSection
258// rather than an InputSection.
259class OutputSectionSymbol : public Symbol {
260public:
261 OutputSectionSymbol(const OutputSection *s)
262 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
263 nullptr),
264 section(s) {}
265
266 static bool classof(const Symbol *s) {
267 return s->kind() == OutputSectionKind;
268 }
269
270 const OutputSection *section;
271};
272
273class SectionSymbol : public Symbol {
274public:
275 SectionSymbol(uint32_t flags, const InputChunk *s, InputFile *f = nullptr)
276 : Symbol("", SectionKind, flags, f), section(s) {}
277
278 static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
279
280 const OutputSectionSymbol *getOutputSectionSymbol() const;
281
282 const InputChunk *section;
283};
284
285class DataSymbol : public Symbol {
286public:
287 static bool classof(const Symbol *s) {
288 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind;
289 }
290
291protected:
292 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
293 : Symbol(name, k, flags, f) {}
294};
295
296class DefinedData : public DataSymbol {
297public:
298 // Constructor for regular data symbols originating from input files.
299 DefinedData(StringRef name, uint32_t flags, InputFile *f, InputChunk *segment,
300 uint64_t value, uint64_t size)
301 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
302 value(value), size(size) {}
303
304 // Constructor for linker synthetic data symbols.
305 DefinedData(StringRef name, uint32_t flags)
306 : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
307
308 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
309
310 // Returns the output virtual address of a defined data symbol.
311 uint64_t getVA() const;
312 void setVA(uint64_t va);
313
314 // Returns the offset of a defined data symbol within its OutputSegment.
315 uint64_t getOutputSegmentOffset() const;
316 uint64_t getOutputSegmentIndex() const;
317 uint64_t getSize() const { return size; }
318
319 InputChunk *segment = nullptr;
320 uint64_t value = 0;
321
322protected:
323 uint64_t size = 0;
324};
325
326class UndefinedData : public DataSymbol {
327public:
328 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
329 : DataSymbol(name, UndefinedDataKind, flags, file) {}
330 static bool classof(const Symbol *s) {
331 return s->kind() == UndefinedDataKind;
332 }
333};
334
335class GlobalSymbol : public Symbol {
336public:
337 static bool classof(const Symbol *s) {
338 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
339 }
340
341 const WasmGlobalType *getGlobalType() const { return globalType; }
342
343 // Get/set the global index
344 uint32_t getGlobalIndex() const;
345 void setGlobalIndex(uint32_t index);
346 bool hasGlobalIndex() const;
347
348protected:
349 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
350 const WasmGlobalType *globalType)
351 : Symbol(name, k, flags, f), globalType(globalType) {}
352
353 const WasmGlobalType *globalType;
354 uint32_t globalIndex = INVALID_INDEX;
355};
356
357class DefinedGlobal : public GlobalSymbol {
358public:
359 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
360 InputGlobal *global);
361
362 static bool classof(const Symbol *s) {
363 return s->kind() == DefinedGlobalKind;
364 }
365
366 InputGlobal *global;
367};
368
369class UndefinedGlobal : public GlobalSymbol {
370public:
371 UndefinedGlobal(StringRef name, std::optional<StringRef> importName,
372 std::optional<StringRef> importModule, uint32_t flags,
373 InputFile *file = nullptr,
374 const WasmGlobalType *type = nullptr)
375 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type) {
376 this->importName = importName;
377 this->importModule = importModule;
378 }
379
380 static bool classof(const Symbol *s) {
381 return s->kind() == UndefinedGlobalKind;
382 }
383};
384
385class TableSymbol : public Symbol {
386public:
387 static bool classof(const Symbol *s) {
388 return s->kind() == DefinedTableKind || s->kind() == UndefinedTableKind;
389 }
390
391 const WasmTableType *getTableType() const { return tableType; }
392 void setLimits(const WasmLimits &limits);
393
394 // Get/set the table number
395 uint32_t getTableNumber() const;
396 void setTableNumber(uint32_t number);
397 bool hasTableNumber() const;
398
399protected:
400 TableSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
401 const WasmTableType *type)
402 : Symbol(name, k, flags, f), tableType(type) {}
403
404 const WasmTableType *tableType;
405 uint32_t tableNumber = INVALID_INDEX;
406};
407
408class DefinedTable : public TableSymbol {
409public:
410 DefinedTable(StringRef name, uint32_t flags, InputFile *file,
411 InputTable *table);
412
413 static bool classof(const Symbol *s) { return s->kind() == DefinedTableKind; }
414
415 InputTable *table;
416};
417
418class UndefinedTable : public TableSymbol {
419public:
420 UndefinedTable(StringRef name, std::optional<StringRef> importName,
421 std::optional<StringRef> importModule, uint32_t flags,
422 InputFile *file, const WasmTableType *type)
423 : TableSymbol(name, UndefinedTableKind, flags, file, type) {
424 this->importName = importName;
425 this->importModule = importModule;
426 }
427
428 static bool classof(const Symbol *s) {
429 return s->kind() == UndefinedTableKind;
430 }
431};
432
433// A tag is a general format to distinguish typed entities. Each tag has an
434// attribute and a type. Currently the attribute can only specify that the tag
435// is for an exception tag.
436//
437// In exception handling, tags are used to distinguish different kinds of
438// exceptions. For example, they can be used to distinguish different language's
439// exceptions, e.g., all C++ exceptions have the same tag and Java exceptions
440// would have a distinct tag. Wasm can filter the exceptions it catches based on
441// their tag.
442//
443// A single TagSymbol object represents a single tag. The C++ exception symbol
444// is a weak symbol generated in every object file in which exceptions are used,
445// and is named '__cpp_exception' for linking.
446class TagSymbol : public Symbol {
447public:
448 static bool classof(const Symbol *s) {
449 return s->kind() == DefinedTagKind || s->kind() == UndefinedTagKind;
450 }
451
452 // Get/set the tag index
453 uint32_t getTagIndex() const;
454 void setTagIndex(uint32_t index);
455 bool hasTagIndex() const;
456
457 const WasmSignature *signature;
458
459protected:
460 TagSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
461 const WasmSignature *sig)
462 : Symbol(name, k, flags, f), signature(sig) {}
463
464 uint32_t tagIndex = INVALID_INDEX;
465};
466
467class DefinedTag : public TagSymbol {
468public:
469 DefinedTag(StringRef name, uint32_t flags, InputFile *file, InputTag *tag);
470
471 static bool classof(const Symbol *s) { return s->kind() == DefinedTagKind; }
472
473 InputTag *tag;
474};
475
476class UndefinedTag : public TagSymbol {
477public:
478 UndefinedTag(StringRef name, std::optional<StringRef> importName,
479 std::optional<StringRef> importModule, uint32_t flags,
480 InputFile *file = nullptr, const WasmSignature *sig = nullptr)
481 : TagSymbol(name, UndefinedTagKind, flags, file, sig) {
482 this->importName = importName;
483 this->importModule = importModule;
484 }
485
486 static bool classof(const Symbol *s) { return s->kind() == UndefinedTagKind; }
487};
488
489// LazySymbol symbols represent symbols in object files between --start-lib and
490// --end-lib options. LLD also handles traditional archives as if all the files
491// in the archive are surrounded by --start-lib and --end-lib.
492//
493// A special complication is the handling of weak undefined symbols. They should
494// not load a file, but we have to remember we have seen both the weak undefined
495// and the lazy. We represent that with a lazy symbol with a weak binding. This
496// means that code looking for undefined symbols normally also has to take lazy
497// symbols into consideration.
498class LazySymbol : public Symbol {
499public:
500 LazySymbol(StringRef name, uint32_t flags, InputFile *file)
501 : Symbol(name, LazyKind, flags, file) {}
502
503 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
504 void extract();
505 void setWeak();
506
507 // Lazy symbols can have a signature because they can replace an
508 // UndefinedFunction in which case we need to be able to preserve the
509 // signature.
510 // TODO(sbc): This repetition of the signature field is inelegant. Revisit
511 // the use of class hierarchy to represent symbol taxonomy.
512 const WasmSignature *signature = nullptr;
513};
514
515// linker-generated symbols
516struct WasmSym {
517 // __global_base
518 // Symbol marking the start of the global section.
519 static DefinedData *globalBase;
520
521 // __stack_pointer/__stack_low/__stack_high
522 // Global that holds current value of stack pointer and data symbols marking
523 // the start and end of the stack region. stackPointer is initialized to
524 // stackHigh and grows downwards towards stackLow
525 static GlobalSymbol *stackPointer;
526 static DefinedData *stackLow;
527 static DefinedData *stackHigh;
528
529 // __tls_base
530 // Global that holds the address of the base of the current thread's
531 // TLS block.
532 static GlobalSymbol *tlsBase;
533
534 // __tls_size
535 // Symbol whose value is the size of the TLS block.
536 static GlobalSymbol *tlsSize;
537
538 // __tls_size
539 // Symbol whose value is the alignment of the TLS block.
540 static GlobalSymbol *tlsAlign;
541
542 // __data_end
543 // Symbol marking the end of the data and bss.
544 static DefinedData *dataEnd;
545
546 // __heap_base/__heap_end
547 // Symbols marking the beginning and end of the "heap". It starts at the end
548 // of the data, bss and explicit stack, and extends to the end of the linear
549 // memory allocated by wasm-ld. This region of memory is not used by the
550 // linked code, so it may be used as a backing store for `sbrk` or `malloc`
551 // implementations.
552 static DefinedData *heapBase;
553 static DefinedData *heapEnd;
554
555 // __wasm_init_memory_flag
556 // Symbol whose contents are nonzero iff memory has already been initialized.
557 static DefinedData *initMemoryFlag;
558
559 // __wasm_init_memory
560 // Function that initializes passive data segments during instantiation.
561 static DefinedFunction *initMemory;
562
563 // __wasm_call_ctors
564 // Function that directly calls all ctors in priority order.
565 static DefinedFunction *callCtors;
566
567 // __wasm_call_dtors
568 // Function that calls the libc/etc. cleanup function.
569 static DefinedFunction *callDtors;
570
571 // __wasm_apply_data_relocs
572 // Function that applies relocations to data segment post-instantiation.
573 static DefinedFunction *applyDataRelocs;
574
575 // __wasm_apply_global_relocs
576 // Function that applies relocations to wasm globals post-instantiation.
577 // Unlike __wasm_apply_data_relocs this needs to run on every thread.
578 static DefinedFunction *applyGlobalRelocs;
579
580 // __wasm_apply_tls_relocs
581 // Like applyDataRelocs but for TLS section. These must be delayed until
582 // __wasm_init_tls.
583 static DefinedFunction *applyTLSRelocs;
584
585 // __wasm_apply_global_tls_relocs
586 // Like applyGlobalRelocs but for globals that hold TLS addresses. These
587 // must be delayed until __wasm_init_tls.
588 static DefinedFunction *applyGlobalTLSRelocs;
589
590 // __wasm_init_tls
591 // Function that allocates thread-local storage and initializes it.
592 static DefinedFunction *initTLS;
593
594 // Pointer to the function that is to be used in the start section.
595 // (normally an alias of initMemory, or applyGlobalRelocs).
596 static DefinedFunction *startFunction;
597
598 // __dso_handle
599 // Symbol used in calls to __cxa_atexit to determine current DLL
600 static DefinedData *dsoHandle;
601
602 // __table_base
603 // Used in PIC code for offset of indirect function table
604 static UndefinedGlobal *tableBase;
605 static DefinedData *definedTableBase;
606 // 32-bit copy in wasm64 to work around init expr limitations.
607 // These can potentially be removed again once we have
608 // https://github.com/WebAssembly/extended-const
609 static UndefinedGlobal *tableBase32;
610 static DefinedData *definedTableBase32;
611
612 // __memory_base
613 // Used in PIC code for offset of global data
614 static UndefinedGlobal *memoryBase;
615 static DefinedData *definedMemoryBase;
616
617 // __indirect_function_table
618 // Used as an address space for function pointers, with each function that is
619 // used as a function pointer being allocated a slot.
620 static TableSymbol *indirectFunctionTable;
621};
622
623// A buffer class that is large enough to hold any Symbol-derived
624// object. We allocate memory using this class and instantiate a symbol
625// using the placement new.
626union SymbolUnion {
627 alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
628 alignas(DefinedData) char b[sizeof(DefinedData)];
629 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
630 alignas(DefinedTag) char d[sizeof(DefinedTag)];
631 alignas(DefinedTable) char e[sizeof(DefinedTable)];
632 alignas(LazySymbol) char f[sizeof(LazySymbol)];
633 alignas(UndefinedFunction) char g[sizeof(UndefinedFunction)];
634 alignas(UndefinedData) char h[sizeof(UndefinedData)];
635 alignas(UndefinedGlobal) char i[sizeof(UndefinedGlobal)];
636 alignas(UndefinedTable) char j[sizeof(UndefinedTable)];
637 alignas(SectionSymbol) char k[sizeof(SectionSymbol)];
638};
639
640// It is important to keep the size of SymbolUnion small for performance and
641// memory usage reasons. 96 bytes is a soft limit based on the size of
642// UndefinedFunction on a 64-bit system.
643static_assert(sizeof(SymbolUnion) <= 120, "SymbolUnion too large");
644
645void printTraceSymbol(Symbol *sym);
646void printTraceSymbolUndefined(StringRef name, const InputFile* file);
647
648template <typename T, typename... ArgT>
649T *replaceSymbol(Symbol *s, ArgT &&... arg) {
650 static_assert(std::is_trivially_destructible<T>(),
651 "Symbol types must be trivially destructible");
652 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
653 static_assert(alignof(T) <= alignof(SymbolUnion),
654 "SymbolUnion not aligned enough");
655 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
656 "Not a Symbol");
657
658 Symbol symCopy = *s;
659
660 T *s2 = new (s) T(std::forward<ArgT>(arg)...);
661 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
662 s2->forceExport = symCopy.forceExport;
663 s2->forceImport = symCopy.forceImport;
664 s2->canInline = symCopy.canInline;
665 s2->traced = symCopy.traced;
666 s2->referenced = symCopy.referenced;
667
668 // Print out a log message if --trace-symbol was specified.
669 // This is for debugging.
670 if (s2->traced)
671 printTraceSymbol(s2);
672
673 return s2;
674}
675
676} // namespace wasm
677
678// Returns a symbol name for an error message.
679std::string toString(const wasm::Symbol &sym);
680std::string toString(wasm::Symbol::Kind kind);
681std::string maybeDemangleSymbol(StringRef name);
682
683} // namespace lld
684
685#endif
686

source code of lld/wasm/Symbols.h