1 | //===- Symbols.h ------------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_COFF_SYMBOLS_H |
10 | #define LLD_COFF_SYMBOLS_H |
11 | |
12 | #include "Chunks.h" |
13 | #include "Config.h" |
14 | #include "lld/Common/LLVM.h" |
15 | #include "lld/Common/Memory.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/Object/Archive.h" |
18 | #include "llvm/Object/COFF.h" |
19 | #include <atomic> |
20 | #include <memory> |
21 | #include <vector> |
22 | |
23 | namespace lld { |
24 | |
25 | namespace coff { |
26 | |
27 | using llvm::object::Archive; |
28 | using llvm::object::COFFSymbolRef; |
29 | using llvm::object::coff_import_header; |
30 | using llvm::object::coff_symbol_generic; |
31 | |
32 | class ArchiveFile; |
33 | class COFFLinkerContext; |
34 | class InputFile; |
35 | class ObjFile; |
36 | class SymbolTable; |
37 | |
38 | // The base class for real symbol classes. |
39 | class Symbol { |
40 | public: |
41 | enum Kind { |
42 | // The order of these is significant. We start with the regular defined |
43 | // symbols as those are the most prevalent and the zero tag is the cheapest |
44 | // to set. Among the defined kinds, the lower the kind is preferred over |
45 | // the higher kind when testing whether one symbol should take precedence |
46 | // over another. |
47 | DefinedRegularKind = 0, |
48 | DefinedCommonKind, |
49 | DefinedLocalImportKind, |
50 | DefinedImportThunkKind, |
51 | DefinedImportDataKind, |
52 | DefinedAbsoluteKind, |
53 | DefinedSyntheticKind, |
54 | |
55 | UndefinedKind, |
56 | LazyArchiveKind, |
57 | LazyObjectKind, |
58 | LazyDLLSymbolKind, |
59 | |
60 | LastDefinedCOFFKind = DefinedCommonKind, |
61 | LastDefinedKind = DefinedSyntheticKind, |
62 | }; |
63 | |
64 | Kind kind() const { return static_cast<Kind>(symbolKind); } |
65 | |
66 | // Returns the symbol name. |
67 | StringRef getName() { |
68 | // COFF symbol names are read lazily for a performance reason. |
69 | // Non-external symbol names are never used by the linker except for logging |
70 | // or debugging. Their internal references are resolved not by name but by |
71 | // symbol index. And because they are not external, no one can refer them by |
72 | // name. Object files contain lots of non-external symbols, and creating |
73 | // StringRefs for them (which involves lots of strlen() on the string table) |
74 | // is a waste of time. |
75 | if (nameData == nullptr) |
76 | computeName(); |
77 | return StringRef(nameData, nameSize); |
78 | } |
79 | |
80 | void replaceKeepingName(Symbol *other, size_t size); |
81 | |
82 | // Returns the file from which this symbol was created. |
83 | InputFile *getFile(); |
84 | |
85 | // Indicates that this symbol will be included in the final image. Only valid |
86 | // after calling markLive. |
87 | bool isLive() const; |
88 | |
89 | bool isLazy() const { |
90 | return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind || |
91 | symbolKind == LazyDLLSymbolKind; |
92 | } |
93 | |
94 | private: |
95 | void computeName(); |
96 | |
97 | protected: |
98 | friend SymbolTable; |
99 | explicit Symbol(Kind k, StringRef n = "" ) |
100 | : symbolKind(k), isExternal(true), isCOMDAT(false), |
101 | writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false), |
102 | isRuntimePseudoReloc(false), deferUndefined(false), canInline(true), |
103 | isWeak(false), nameSize(n.size()), |
104 | nameData(n.empty() ? nullptr : n.data()) { |
105 | assert((!n.empty() || k <= LastDefinedCOFFKind) && |
106 | "If the name is empty, the Symbol must be a DefinedCOFF." ); |
107 | } |
108 | |
109 | const unsigned symbolKind : 8; |
110 | unsigned isExternal : 1; |
111 | |
112 | public: |
113 | // This bit is used by the \c DefinedRegular subclass. |
114 | unsigned isCOMDAT : 1; |
115 | |
116 | // This bit is used by Writer::createSymbolAndStringTable() to prevent |
117 | // symbols from being written to the symbol table more than once. |
118 | unsigned writtenToSymtab : 1; |
119 | |
120 | // True if this symbol was referenced by a regular (non-bitcode) object. |
121 | unsigned isUsedInRegularObj : 1; |
122 | |
123 | // True if we've seen both a lazy and an undefined symbol with this symbol |
124 | // name, which means that we have enqueued an archive member load and should |
125 | // not load any more archive members to resolve the same symbol. |
126 | unsigned pendingArchiveLoad : 1; |
127 | |
128 | /// True if we've already added this symbol to the list of GC roots. |
129 | unsigned isGCRoot : 1; |
130 | |
131 | unsigned isRuntimePseudoReloc : 1; |
132 | |
133 | // True if we want to allow this symbol to be undefined in the early |
134 | // undefined check pass in SymbolTable::reportUnresolvable(), as it |
135 | // might be fixed up later. |
136 | unsigned deferUndefined : 1; |
137 | |
138 | // False if LTO shouldn't inline whatever this symbol points to. If a symbol |
139 | // is overwritten after LTO, LTO shouldn't inline the symbol because it |
140 | // doesn't know the final contents of the symbol. |
141 | unsigned canInline : 1; |
142 | |
143 | // True if the symbol is weak. This is only tracked for bitcode/LTO symbols. |
144 | // This information isn't written to the output; rather, it's used for |
145 | // managing weak symbol overrides. |
146 | unsigned isWeak : 1; |
147 | |
148 | protected: |
149 | // Symbol name length. Assume symbol lengths fit in a 32-bit integer. |
150 | uint32_t nameSize; |
151 | |
152 | const char *nameData; |
153 | }; |
154 | |
155 | // The base class for any defined symbols, including absolute symbols, |
156 | // etc. |
157 | class Defined : public Symbol { |
158 | public: |
159 | Defined(Kind k, StringRef n) : Symbol(k, n) {} |
160 | |
161 | static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; } |
162 | |
163 | // Returns the RVA (relative virtual address) of this symbol. The |
164 | // writer sets and uses RVAs. |
165 | uint64_t getRVA(); |
166 | |
167 | // Returns the chunk containing this symbol. Absolute symbols and __ImageBase |
168 | // do not have chunks, so this may return null. |
169 | Chunk *getChunk(); |
170 | }; |
171 | |
172 | // Symbols defined via a COFF object file or bitcode file. For COFF files, this |
173 | // stores a coff_symbol_generic*, and names of internal symbols are lazily |
174 | // loaded through that. For bitcode files, Sym is nullptr and the name is stored |
175 | // as a decomposed StringRef. |
176 | class DefinedCOFF : public Defined { |
177 | friend Symbol; |
178 | |
179 | public: |
180 | DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s) |
181 | : Defined(k, n), file(f), sym(s) {} |
182 | |
183 | static bool classof(const Symbol *s) { |
184 | return s->kind() <= LastDefinedCOFFKind; |
185 | } |
186 | |
187 | InputFile *getFile() { return file; } |
188 | |
189 | COFFSymbolRef getCOFFSymbol(); |
190 | |
191 | InputFile *file; |
192 | |
193 | protected: |
194 | const coff_symbol_generic *sym; |
195 | }; |
196 | |
197 | // Regular defined symbols read from object file symbol tables. |
198 | class DefinedRegular : public DefinedCOFF { |
199 | public: |
200 | DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT, |
201 | bool isExternal = false, |
202 | const coff_symbol_generic *s = nullptr, |
203 | SectionChunk *c = nullptr, bool isWeak = false) |
204 | : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) { |
205 | this->isExternal = isExternal; |
206 | this->isCOMDAT = isCOMDAT; |
207 | this->isWeak = isWeak; |
208 | } |
209 | |
210 | static bool classof(const Symbol *s) { |
211 | return s->kind() == DefinedRegularKind; |
212 | } |
213 | |
214 | uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; } |
215 | SectionChunk *getChunk() const { return *data; } |
216 | uint32_t getValue() const { return sym->Value; } |
217 | |
218 | SectionChunk **data; |
219 | }; |
220 | |
221 | class DefinedCommon : public DefinedCOFF { |
222 | public: |
223 | DefinedCommon(InputFile *f, StringRef n, uint64_t size, |
224 | const coff_symbol_generic *s = nullptr, |
225 | CommonChunk *c = nullptr) |
226 | : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) { |
227 | this->isExternal = true; |
228 | } |
229 | |
230 | static bool classof(const Symbol *s) { |
231 | return s->kind() == DefinedCommonKind; |
232 | } |
233 | |
234 | uint64_t getRVA() { return data->getRVA(); } |
235 | CommonChunk *getChunk() { return data; } |
236 | |
237 | private: |
238 | friend SymbolTable; |
239 | uint64_t getSize() const { return size; } |
240 | CommonChunk *data; |
241 | uint64_t size; |
242 | }; |
243 | |
244 | // Absolute symbols. |
245 | class DefinedAbsolute : public Defined { |
246 | public: |
247 | DefinedAbsolute(const COFFLinkerContext &c, StringRef n, COFFSymbolRef s) |
248 | : Defined(DefinedAbsoluteKind, n), va(s.getValue()), ctx(c) { |
249 | isExternal = s.isExternal(); |
250 | } |
251 | |
252 | DefinedAbsolute(const COFFLinkerContext &c, StringRef n, uint64_t v) |
253 | : Defined(DefinedAbsoluteKind, n), va(v), ctx(c) {} |
254 | |
255 | static bool classof(const Symbol *s) { |
256 | return s->kind() == DefinedAbsoluteKind; |
257 | } |
258 | |
259 | uint64_t getRVA(); |
260 | void setVA(uint64_t v) { va = v; } |
261 | uint64_t getVA() const { return va; } |
262 | |
263 | private: |
264 | uint64_t va; |
265 | const COFFLinkerContext &ctx; |
266 | }; |
267 | |
268 | // This symbol is used for linker-synthesized symbols like __ImageBase and |
269 | // __safe_se_handler_table. |
270 | class DefinedSynthetic : public Defined { |
271 | public: |
272 | explicit DefinedSynthetic(StringRef name, Chunk *c, uint32_t offset = 0) |
273 | : Defined(DefinedSyntheticKind, name), c(c), offset(offset) {} |
274 | |
275 | static bool classof(const Symbol *s) { |
276 | return s->kind() == DefinedSyntheticKind; |
277 | } |
278 | |
279 | // A null chunk indicates that this is __ImageBase. Otherwise, this is some |
280 | // other synthesized chunk, like SEHTableChunk. |
281 | uint32_t getRVA() { return c ? c->getRVA() + offset : 0; } |
282 | Chunk *getChunk() { return c; } |
283 | |
284 | private: |
285 | Chunk *c; |
286 | uint32_t offset; |
287 | }; |
288 | |
289 | // This class represents a symbol defined in an archive file. It is |
290 | // created from an archive file header, and it knows how to load an |
291 | // object file from an archive to replace itself with a defined |
292 | // symbol. If the resolver finds both Undefined and LazyArchive for |
293 | // the same name, it will ask the LazyArchive to load a file. |
294 | class LazyArchive : public Symbol { |
295 | public: |
296 | LazyArchive(ArchiveFile *f, const Archive::Symbol s) |
297 | : Symbol(LazyArchiveKind, s.getName()), file(f), sym(s) {} |
298 | |
299 | static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } |
300 | |
301 | MemoryBufferRef getMemberBuffer(); |
302 | |
303 | ArchiveFile *file; |
304 | const Archive::Symbol sym; |
305 | }; |
306 | |
307 | class LazyObject : public Symbol { |
308 | public: |
309 | LazyObject(InputFile *f, StringRef n) : Symbol(LazyObjectKind, n), file(f) {} |
310 | static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } |
311 | InputFile *file; |
312 | }; |
313 | |
314 | // MinGW only. |
315 | class LazyDLLSymbol : public Symbol { |
316 | public: |
317 | LazyDLLSymbol(DLLFile *f, DLLFile::Symbol *s, StringRef n) |
318 | : Symbol(LazyDLLSymbolKind, n), file(f), sym(s) {} |
319 | static bool classof(const Symbol *s) { |
320 | return s->kind() == LazyDLLSymbolKind; |
321 | } |
322 | |
323 | DLLFile *file; |
324 | DLLFile::Symbol *sym; |
325 | }; |
326 | |
327 | // Undefined symbols. |
328 | class Undefined : public Symbol { |
329 | public: |
330 | explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {} |
331 | |
332 | static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } |
333 | |
334 | // An undefined symbol can have a fallback symbol which gives an |
335 | // undefined symbol a second chance if it would remain undefined. |
336 | // If it remains undefined, it'll be replaced with whatever the |
337 | // Alias pointer points to. |
338 | Symbol *weakAlias = nullptr; |
339 | |
340 | // If this symbol is external weak, try to resolve it to a defined |
341 | // symbol by searching the chain of fallback symbols. Returns the symbol if |
342 | // successful, otherwise returns null. |
343 | Defined *getWeakAlias(); |
344 | }; |
345 | |
346 | // Windows-specific classes. |
347 | |
348 | // This class represents a symbol imported from a DLL. This has two |
349 | // names for internal use and external use. The former is used for |
350 | // name resolution, and the latter is used for the import descriptor |
351 | // table in an output. The former has "__imp_" prefix. |
352 | class DefinedImportData : public Defined { |
353 | public: |
354 | DefinedImportData(StringRef n, ImportFile *f) |
355 | : Defined(DefinedImportDataKind, n), file(f) { |
356 | } |
357 | |
358 | static bool classof(const Symbol *s) { |
359 | return s->kind() == DefinedImportDataKind; |
360 | } |
361 | |
362 | uint64_t getRVA() { return file->location->getRVA(); } |
363 | Chunk *getChunk() { return file->location; } |
364 | void setLocation(Chunk *addressTable) { file->location = addressTable; } |
365 | |
366 | StringRef getDLLName() { return file->dllName; } |
367 | StringRef getExternalName() { return file->externalName; } |
368 | uint16_t getOrdinal() { return file->hdr->OrdinalHint; } |
369 | |
370 | ImportFile *file; |
371 | |
372 | // This is a pointer to the synthetic symbol associated with the load thunk |
373 | // for this symbol that will be called if the DLL is delay-loaded. This is |
374 | // needed for Control Flow Guard because if this DefinedImportData symbol is a |
375 | // valid call target, the corresponding load thunk must also be marked as a |
376 | // valid call target. |
377 | DefinedSynthetic *loadThunkSym = nullptr; |
378 | }; |
379 | |
380 | // This class represents a symbol for a jump table entry which jumps |
381 | // to a function in a DLL. Linker are supposed to create such symbols |
382 | // without "__imp_" prefix for all function symbols exported from |
383 | // DLLs, so that you can call DLL functions as regular functions with |
384 | // a regular name. A function pointer is given as a DefinedImportData. |
385 | class DefinedImportThunk : public Defined { |
386 | public: |
387 | DefinedImportThunk(COFFLinkerContext &ctx, StringRef name, |
388 | DefinedImportData *s, uint16_t machine); |
389 | |
390 | static bool classof(const Symbol *s) { |
391 | return s->kind() == DefinedImportThunkKind; |
392 | } |
393 | |
394 | uint64_t getRVA() { return data->getRVA(); } |
395 | Chunk *getChunk() { return data; } |
396 | |
397 | DefinedImportData *wrappedSym; |
398 | |
399 | private: |
400 | Chunk *data; |
401 | }; |
402 | |
403 | // If you have a symbol "foo" in your object file, a symbol name |
404 | // "__imp_foo" becomes automatically available as a pointer to "foo". |
405 | // This class is for such automatically-created symbols. |
406 | // Yes, this is an odd feature. We didn't intend to implement that. |
407 | // This is here just for compatibility with MSVC. |
408 | class DefinedLocalImport : public Defined { |
409 | public: |
410 | DefinedLocalImport(COFFLinkerContext &ctx, StringRef n, Defined *s) |
411 | : Defined(DefinedLocalImportKind, n), |
412 | data(make<LocalImportChunk>(args&: ctx, args&: s)) {} |
413 | |
414 | static bool classof(const Symbol *s) { |
415 | return s->kind() == DefinedLocalImportKind; |
416 | } |
417 | |
418 | uint64_t getRVA() { return data->getRVA(); } |
419 | Chunk *getChunk() { return data; } |
420 | |
421 | private: |
422 | LocalImportChunk *data; |
423 | }; |
424 | |
425 | inline uint64_t Defined::getRVA() { |
426 | switch (kind()) { |
427 | case DefinedAbsoluteKind: |
428 | return cast<DefinedAbsolute>(Val: this)->getRVA(); |
429 | case DefinedSyntheticKind: |
430 | return cast<DefinedSynthetic>(Val: this)->getRVA(); |
431 | case DefinedImportDataKind: |
432 | return cast<DefinedImportData>(Val: this)->getRVA(); |
433 | case DefinedImportThunkKind: |
434 | return cast<DefinedImportThunk>(Val: this)->getRVA(); |
435 | case DefinedLocalImportKind: |
436 | return cast<DefinedLocalImport>(Val: this)->getRVA(); |
437 | case DefinedCommonKind: |
438 | return cast<DefinedCommon>(Val: this)->getRVA(); |
439 | case DefinedRegularKind: |
440 | return cast<DefinedRegular>(Val: this)->getRVA(); |
441 | case LazyArchiveKind: |
442 | case LazyObjectKind: |
443 | case LazyDLLSymbolKind: |
444 | case UndefinedKind: |
445 | llvm_unreachable("Cannot get the address for an undefined symbol." ); |
446 | } |
447 | llvm_unreachable("unknown symbol kind" ); |
448 | } |
449 | |
450 | inline Chunk *Defined::getChunk() { |
451 | switch (kind()) { |
452 | case DefinedRegularKind: |
453 | return cast<DefinedRegular>(Val: this)->getChunk(); |
454 | case DefinedAbsoluteKind: |
455 | return nullptr; |
456 | case DefinedSyntheticKind: |
457 | return cast<DefinedSynthetic>(Val: this)->getChunk(); |
458 | case DefinedImportDataKind: |
459 | return cast<DefinedImportData>(Val: this)->getChunk(); |
460 | case DefinedImportThunkKind: |
461 | return cast<DefinedImportThunk>(Val: this)->getChunk(); |
462 | case DefinedLocalImportKind: |
463 | return cast<DefinedLocalImport>(Val: this)->getChunk(); |
464 | case DefinedCommonKind: |
465 | return cast<DefinedCommon>(Val: this)->getChunk(); |
466 | case LazyArchiveKind: |
467 | case LazyObjectKind: |
468 | case LazyDLLSymbolKind: |
469 | case UndefinedKind: |
470 | llvm_unreachable("Cannot get the chunk of an undefined symbol." ); |
471 | } |
472 | llvm_unreachable("unknown symbol kind" ); |
473 | } |
474 | |
475 | // A buffer class that is large enough to hold any Symbol-derived |
476 | // object. We allocate memory using this class and instantiate a symbol |
477 | // using the placement new. |
478 | union SymbolUnion { |
479 | alignas(DefinedRegular) char a[sizeof(DefinedRegular)]; |
480 | alignas(DefinedCommon) char b[sizeof(DefinedCommon)]; |
481 | alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)]; |
482 | alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)]; |
483 | alignas(LazyArchive) char e[sizeof(LazyArchive)]; |
484 | alignas(Undefined) char f[sizeof(Undefined)]; |
485 | alignas(DefinedImportData) char g[sizeof(DefinedImportData)]; |
486 | alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)]; |
487 | alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)]; |
488 | alignas(LazyObject) char j[sizeof(LazyObject)]; |
489 | alignas(LazyDLLSymbol) char k[sizeof(LazyDLLSymbol)]; |
490 | }; |
491 | |
492 | template <typename T, typename... ArgT> |
493 | void replaceSymbol(Symbol *s, ArgT &&... arg) { |
494 | static_assert(std::is_trivially_destructible<T>(), |
495 | "Symbol types must be trivially destructible" ); |
496 | static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small" ); |
497 | static_assert(alignof(T) <= alignof(SymbolUnion), |
498 | "SymbolUnion not aligned enough" ); |
499 | assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && |
500 | "Not a Symbol" ); |
501 | bool canInline = s->canInline; |
502 | new (s) T(std::forward<ArgT>(arg)...); |
503 | s->canInline = canInline; |
504 | } |
505 | } // namespace coff |
506 | |
507 | std::string toString(const coff::COFFLinkerContext &ctx, coff::Symbol &b); |
508 | std::string toCOFFString(const coff::COFFLinkerContext &ctx, |
509 | const llvm::object::Archive::Symbol &b); |
510 | |
511 | } // namespace lld |
512 | |
513 | #endif |
514 | |