1//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_SYMBOLS_H
10#define LLD_MACHO_SYMBOLS_H
11
12#include "Config.h"
13#include "InputFiles.h"
14#include "Target.h"
15#include "lld/Common/ErrorHandler.h"
16#include "lld/Common/Strings.h"
17#include "llvm/Object/Archive.h"
18#include "llvm/Support/MathExtras.h"
19
20namespace lld {
21namespace macho {
22
23class MachHeaderSection;
24
25struct StringRefZ {
26 StringRefZ(const char *s) : data(s), size(-1) {}
27 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
28
29 const char *data;
30 const uint32_t size;
31};
32
33class Symbol {
34public:
35 enum Kind {
36 DefinedKind,
37 UndefinedKind,
38 CommonKind,
39 DylibKind,
40 LazyArchiveKind,
41 LazyObjectKind,
42 };
43
44 virtual ~Symbol() {}
45
46 Kind kind() const { return symbolKind; }
47
48 StringRef getName() const {
49 if (nameSize == (uint32_t)-1)
50 nameSize = strlen(nameData);
51 return {nameData, nameSize};
52 }
53
54 bool isLive() const { return used; }
55 bool isLazy() const {
56 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
57 }
58
59 virtual uint64_t getVA() const { return 0; }
60
61 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
62
63 // Only undefined or dylib symbols can be weak references. A weak reference
64 // need not be satisfied at runtime, e.g. due to the symbol not being
65 // available on a given target platform.
66 virtual bool isWeakRef() const { return false; }
67
68 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
69
70 // Whether this symbol is in the GOT or TLVPointer sections.
71 bool isInGot() const { return gotIndex != UINT32_MAX; }
72
73 // Whether this symbol is in the StubsSection.
74 bool isInStubs() const { return stubsIndex != UINT32_MAX; }
75
76 uint64_t getStubVA() const;
77 uint64_t getGotVA() const;
78 uint64_t getTlvVA() const;
79 uint64_t resolveBranchVA() const {
80 assert(isa<Defined>(this) || isa<DylibSymbol>(this));
81 return isInStubs() ? getStubVA() : getVA();
82 }
83 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
84 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
85
86 // The index of this symbol in the GOT or the TLVPointer section, depending
87 // on whether it is a thread-local. A given symbol cannot be referenced by
88 // both these sections at once.
89 uint32_t gotIndex = UINT32_MAX;
90 uint32_t lazyBindOffset = UINT32_MAX;
91 uint32_t stubsHelperIndex = UINT32_MAX;
92 uint32_t stubsIndex = UINT32_MAX;
93 uint32_t symtabIndex = UINT32_MAX;
94
95 InputFile *getFile() const { return file; }
96
97protected:
98 Symbol(Kind k, StringRefZ name, InputFile *file)
99 : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
100 isUsedInRegularObj(!file || isa<ObjFile>(file)),
101 used(!config->deadStrip) {}
102
103 Kind symbolKind;
104 const char *nameData;
105 InputFile *file;
106 mutable uint32_t nameSize;
107
108public:
109 // True if this symbol was referenced by a regular (non-bitcode) object.
110 bool isUsedInRegularObj : 1;
111
112 // True if this symbol is used from a live section.
113 bool used : 1;
114};
115
116class Defined : public Symbol {
117public:
118 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
119 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
120 bool includeInSymtab, bool isThumb, bool isReferencedDynamically,
121 bool noDeadStrip, bool canOverrideWeakDef = false,
122 bool isWeakDefCanBeHidden = false, bool interposable = false);
123
124 bool isWeakDef() const override { return weakDef; }
125 bool isExternalWeakDef() const {
126 return isWeakDef() && isExternal() && !privateExtern;
127 }
128 bool isTlv() const override;
129
130 bool isExternal() const { return external; }
131 bool isAbsolute() const { return isec == nullptr; }
132
133 uint64_t getVA() const override;
134
135 std::string getSourceLocation();
136
137 // Ensure this symbol's pointers to InputSections point to their canonical
138 // copies.
139 void canonicalize();
140
141 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
142
143 // Place the bitfields first so that they can get placed in the tail padding
144 // of the parent class, on platforms which support it.
145 bool overridesWeakDef : 1;
146 // Whether this symbol should appear in the output binary's export trie.
147 bool privateExtern : 1;
148 // Whether this symbol should appear in the output symbol table.
149 bool includeInSymtab : 1;
150 // Whether this symbol was folded into a different symbol during ICF.
151 bool wasIdenticalCodeFolded : 1;
152 // Only relevant when compiling for Thumb-supporting arm32 archs.
153 bool thumb : 1;
154 // Symbols marked referencedDynamically won't be removed from the output's
155 // symbol table by tools like strip. In theory, this could be set on arbitrary
156 // symbols in input object files. In practice, it's used solely for the
157 // synthetic __mh_execute_header symbol.
158 // This is information for the static linker, and it's also written to the
159 // output file's symbol table for tools running later (such as `strip`).
160 bool referencedDynamically : 1;
161 // Set on symbols that should not be removed by dead code stripping.
162 // Set for example on `__attribute__((used))` globals, or on some Objective-C
163 // metadata. This is information only for the static linker and not written
164 // to the output.
165 bool noDeadStrip : 1;
166 // Whether references to this symbol can be interposed at runtime to point to
167 // a different symbol definition (with the same name). For example, if both
168 // dylib A and B define an interposable symbol _foo, and we load A before B at
169 // runtime, then all references to _foo within dylib B will point to the
170 // definition in dylib A.
171 //
172 // Only extern symbols may be interposable.
173 bool interposable : 1;
174
175 bool weakDefCanBeHidden : 1;
176
177private:
178 const bool weakDef : 1;
179 const bool external : 1;
180
181public:
182 InputSection *isec;
183 // Contains the offset from the containing subsection. Note that this is
184 // different from nlist::n_value, which is the absolute address of the symbol.
185 uint64_t value;
186 // size is only calculated for regular (non-bitcode) symbols.
187 uint64_t size;
188 // This can be a subsection of either __compact_unwind or __eh_frame.
189 ConcatInputSection *unwindEntry = nullptr;
190};
191
192// This enum does double-duty: as a symbol property, it indicates whether & how
193// a dylib symbol is referenced. As a DylibFile property, it indicates the kind
194// of referenced symbols contained within the file. If there are both weak
195// and strong references to the same file, we will count the file as
196// strongly-referenced.
197enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
198
199class Undefined : public Symbol {
200public:
201 Undefined(StringRefZ name, InputFile *file, RefState refState)
202 : Symbol(UndefinedKind, name, file), refState(refState) {
203 assert(refState != RefState::Unreferenced);
204 }
205
206 bool isWeakRef() const override { return refState == RefState::Weak; }
207
208 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
209
210 RefState refState : 2;
211};
212
213// On Unix, it is traditionally allowed to write variable definitions without
214// initialization expressions (such as "int foo;") to header files. These are
215// called tentative definitions.
216//
217// Using tentative definitions is usually considered a bad practice; you should
218// write only declarations (such as "extern int foo;") to header files.
219// Nevertheless, the linker and the compiler have to do something to support
220// bad code by allowing duplicate definitions for this particular case.
221//
222// The compiler creates common symbols when it sees tentative definitions.
223// (You can suppress this behavior and let the compiler create a regular
224// defined symbol by passing -fno-common. -fno-common is the default in clang
225// as of LLVM 11.0.) When linking the final binary, if there are remaining
226// common symbols after name resolution is complete, the linker converts them
227// to regular defined symbols in a __common section.
228class CommonSymbol : public Symbol {
229public:
230 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
231 bool isPrivateExtern)
232 : Symbol(CommonKind, name, file), size(size),
233 align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
234 privateExtern(isPrivateExtern) {
235 // TODO: cap maximum alignment
236 }
237
238 static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
239
240 const uint64_t size;
241 const uint32_t align;
242 const bool privateExtern;
243};
244
245class DylibSymbol : public Symbol {
246public:
247 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
248 RefState refState, bool isTlv)
249 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
250 tlv(isTlv) {
251 if (file && refState > RefState::Unreferenced)
252 file->numReferencedSymbols++;
253 }
254
255 uint64_t getVA() const override;
256 bool isWeakDef() const override { return weakDef; }
257
258 // Symbols from weak libraries/frameworks are also weakly-referenced.
259 bool isWeakRef() const override {
260 return refState == RefState::Weak ||
261 (file && getFile()->umbrella->forceWeakImport);
262 }
263 bool isReferenced() const { return refState != RefState::Unreferenced; }
264 bool isTlv() const override { return tlv; }
265 bool isDynamicLookup() const { return file == nullptr; }
266 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
267
268 DylibFile *getFile() const {
269 assert(!isDynamicLookup());
270 return cast<DylibFile>(file);
271 }
272
273 static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
274
275 RefState getRefState() const { return refState; }
276
277 void reference(RefState newState) {
278 assert(newState > RefState::Unreferenced);
279 if (refState == RefState::Unreferenced && file)
280 getFile()->numReferencedSymbols++;
281 refState = std::max(refState, newState);
282 }
283
284 void unreference() {
285 // dynamic_lookup symbols have no file.
286 if (refState > RefState::Unreferenced && file) {
287 assert(getFile()->numReferencedSymbols > 0);
288 getFile()->numReferencedSymbols--;
289 }
290 }
291
292private:
293 RefState refState : 2;
294 const bool weakDef : 1;
295 const bool tlv : 1;
296};
297
298class LazyArchive : public Symbol {
299public:
300 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
301 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {}
302
303 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
304 void fetchArchiveMember();
305
306 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
307
308private:
309 const llvm::object::Archive::Symbol sym;
310};
311
312// A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and
313// --end-lib.
314class LazyObject : public Symbol {
315public:
316 LazyObject(InputFile &file, StringRef name)
317 : Symbol(LazyObjectKind, name, &file) {
318 isUsedInRegularObj = false;
319 }
320
321 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
322};
323
324union SymbolUnion {
325 alignas(Defined) char a[sizeof(Defined)];
326 alignas(Undefined) char b[sizeof(Undefined)];
327 alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
328 alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
329 alignas(LazyArchive) char e[sizeof(LazyArchive)];
330 alignas(LazyObject) char f[sizeof(LazyObject)];
331};
332
333template <typename T, typename... ArgT>
334T *replaceSymbol(Symbol *s, ArgT &&...arg) {
335 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
336 static_assert(alignof(T) <= alignof(SymbolUnion),
337 "SymbolUnion not aligned enough");
338 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
339 "Not a Symbol");
340
341 bool isUsedInRegularObj = s->isUsedInRegularObj;
342 bool used = s->used;
343 T *sym = new (s) T(std::forward<ArgT>(arg)...);
344 sym->isUsedInRegularObj |= isUsedInRegularObj;
345 sym->used |= used;
346 return sym;
347}
348
349} // namespace macho
350
351std::string toString(const macho::Symbol &);
352std::string toMachOString(const llvm::object::Archive::Symbol &);
353
354} // namespace lld
355
356#endif
357

source code of lld/MachO/Symbols.h