1 | //===- SymbolTable.cpp ----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Symbol table is a bag of all known symbols. We put all symbols of |
10 | // all input files to the symbol table. The symbol table is basically |
11 | // a hash table with the logic to resolve symbol name conflicts using |
12 | // the symbol types. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "SymbolTable.h" |
17 | #include "Config.h" |
18 | #include "InputFiles.h" |
19 | #include "Symbols.h" |
20 | #include "lld/Common/ErrorHandler.h" |
21 | #include "lld/Common/Memory.h" |
22 | #include "lld/Common/Strings.h" |
23 | #include "llvm/ADT/STLExtras.h" |
24 | #include "llvm/Demangle/Demangle.h" |
25 | |
26 | using namespace llvm; |
27 | using namespace llvm::object; |
28 | using namespace llvm::ELF; |
29 | using namespace lld; |
30 | using namespace lld::elf; |
31 | |
32 | SymbolTable elf::symtab; |
33 | |
34 | void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { |
35 | // Redirect __real_foo to the original foo and foo to the original __wrap_foo. |
36 | int &idx1 = symMap[CachedHashStringRef(sym->getName())]; |
37 | int &idx2 = symMap[CachedHashStringRef(real->getName())]; |
38 | int &idx3 = symMap[CachedHashStringRef(wrap->getName())]; |
39 | |
40 | idx2 = idx1; |
41 | idx1 = idx3; |
42 | |
43 | // Propagate symbol usage information to the redirected symbols. |
44 | if (sym->isUsedInRegularObj) |
45 | wrap->isUsedInRegularObj = true; |
46 | if (real->isUsedInRegularObj) |
47 | sym->isUsedInRegularObj = true; |
48 | else if (!sym->isDefined()) |
49 | // Now that all references to sym have been redirected to wrap, if there are |
50 | // no references to real (which has been redirected to sym), we only need to |
51 | // keep sym if it was defined, otherwise it's unused and can be dropped. |
52 | sym->isUsedInRegularObj = false; |
53 | |
54 | // Now renaming is complete, and no one refers to real. We drop real from |
55 | // .symtab and .dynsym. If real is undefined, it is important that we don't |
56 | // leave it in .dynsym, because otherwise it might lead to an undefined symbol |
57 | // error in a subsequent link. If real is defined, we could emit real as an |
58 | // alias for sym, but that could degrade the user experience of some tools |
59 | // that can print out only one symbol for each location: sym is a preferred |
60 | // name than real, but they might print out real instead. |
61 | memcpy(dest: real, src: sym, n: sizeof(SymbolUnion)); |
62 | real->isUsedInRegularObj = false; |
63 | } |
64 | |
65 | // Find an existing symbol or create a new one. |
66 | Symbol *SymbolTable::insert(StringRef name) { |
67 | // <name>@@<version> means the symbol is the default version. In that |
68 | // case <name>@@<version> will be used to resolve references to <name>. |
69 | // |
70 | // Since this is a hot path, the following string search code is |
71 | // optimized for speed. StringRef::find(char) is much faster than |
72 | // StringRef::find(StringRef). |
73 | StringRef stem = name; |
74 | size_t pos = name.find(C: '@'); |
75 | if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@') |
76 | stem = name.take_front(N: pos); |
77 | |
78 | auto p = symMap.insert(KV: {CachedHashStringRef(stem), (int)symVector.size()}); |
79 | if (!p.second) { |
80 | Symbol *sym = symVector[p.first->second]; |
81 | if (stem.size() != name.size()) { |
82 | sym->setName(name); |
83 | sym->hasVersionSuffix = true; |
84 | } |
85 | return sym; |
86 | } |
87 | |
88 | Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); |
89 | symVector.push_back(Elt: sym); |
90 | |
91 | // *sym was not initialized by a constructor. Initialize all Symbol fields. |
92 | memset(s: sym, c: 0, n: sizeof(Symbol)); |
93 | sym->setName(name); |
94 | sym->partition = 1; |
95 | sym->versionId = VER_NDX_GLOBAL; |
96 | if (pos != StringRef::npos) |
97 | sym->hasVersionSuffix = true; |
98 | return sym; |
99 | } |
100 | |
101 | // This variant of addSymbol is used by BinaryFile::parse to check duplicate |
102 | // symbol errors. |
103 | Symbol *SymbolTable::addAndCheckDuplicate(const Defined &newSym) { |
104 | Symbol *sym = insert(name: newSym.getName()); |
105 | if (sym->isDefined()) |
106 | sym->checkDuplicate(other: newSym); |
107 | sym->resolve(other: newSym); |
108 | sym->isUsedInRegularObj = true; |
109 | return sym; |
110 | } |
111 | |
112 | Symbol *SymbolTable::find(StringRef name) { |
113 | auto it = symMap.find(Val: CachedHashStringRef(name)); |
114 | if (it == symMap.end()) |
115 | return nullptr; |
116 | return symVector[it->second]; |
117 | } |
118 | |
119 | // A version script/dynamic list is only meaningful for a Defined symbol. |
120 | // A CommonSymbol will be converted to a Defined in replaceCommonSymbols(). |
121 | // A lazy symbol may be made Defined if an LTO libcall extracts it. |
122 | static bool canBeVersioned(const Symbol &sym) { |
123 | return sym.isDefined() || sym.isCommon() || sym.isLazy(); |
124 | } |
125 | |
126 | // Initialize demangledSyms with a map from demangled symbols to symbol |
127 | // objects. Used to handle "extern C++" directive in version scripts. |
128 | // |
129 | // The map will contain all demangled symbols. That can be very large, |
130 | // and in LLD we generally want to avoid do anything for each symbol. |
131 | // Then, why are we doing this? Here's why. |
132 | // |
133 | // Users can use "extern C++ {}" directive to match against demangled |
134 | // C++ symbols. For example, you can write a pattern such as |
135 | // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this |
136 | // other than trying to match a pattern against all demangled symbols. |
137 | // So, if "extern C++" feature is used, we need to demangle all known |
138 | // symbols. |
139 | StringMap<SmallVector<Symbol *, 0>> &SymbolTable::getDemangledSyms() { |
140 | if (!demangledSyms) { |
141 | demangledSyms.emplace(); |
142 | std::string demangled; |
143 | for (Symbol *sym : symVector) |
144 | if (canBeVersioned(sym: *sym)) { |
145 | StringRef name = sym->getName(); |
146 | size_t pos = name.find(C: '@'); |
147 | std::string substr; |
148 | if (pos == std::string::npos) |
149 | demangled = demangle(MangledName: name); |
150 | else if (pos + 1 == name.size() || name[pos + 1] == '@') { |
151 | substr = name.substr(Start: 0, N: pos); |
152 | demangled = demangle(MangledName: substr); |
153 | } else { |
154 | substr = name.substr(Start: 0, N: pos); |
155 | demangled = (demangle(MangledName: substr) + name.substr(Start: pos)).str(); |
156 | } |
157 | (*demangledSyms)[demangled].push_back(Elt: sym); |
158 | } |
159 | } |
160 | return *demangledSyms; |
161 | } |
162 | |
163 | SmallVector<Symbol *, 0> SymbolTable::findByVersion(SymbolVersion ver) { |
164 | if (ver.isExternCpp) |
165 | return getDemangledSyms().lookup(Key: ver.name); |
166 | if (Symbol *sym = find(name: ver.name)) |
167 | if (canBeVersioned(sym: *sym)) |
168 | return {sym}; |
169 | return {}; |
170 | } |
171 | |
172 | SmallVector<Symbol *, 0> SymbolTable::findAllByVersion(SymbolVersion ver, |
173 | bool includeNonDefault) { |
174 | SmallVector<Symbol *, 0> res; |
175 | SingleStringMatcher m(ver.name); |
176 | auto check = [&](const Symbol &sym) -> bool { |
177 | if (!includeNonDefault) |
178 | return !sym.hasVersionSuffix; |
179 | StringRef name = sym.getName(); |
180 | size_t pos = name.find(C: '@'); |
181 | return !(pos + 1 < name.size() && name[pos + 1] == '@'); |
182 | }; |
183 | |
184 | if (ver.isExternCpp) { |
185 | for (auto &p : getDemangledSyms()) |
186 | if (m.match(s: p.first())) |
187 | for (Symbol *sym : p.second) |
188 | if (check(*sym)) |
189 | res.push_back(Elt: sym); |
190 | return res; |
191 | } |
192 | |
193 | for (Symbol *sym : symVector) |
194 | if (canBeVersioned(sym: *sym) && check(*sym) && m.match(s: sym->getName())) |
195 | res.push_back(Elt: sym); |
196 | return res; |
197 | } |
198 | |
199 | void SymbolTable::handleDynamicList() { |
200 | SmallVector<Symbol *, 0> syms; |
201 | for (SymbolVersion &ver : config->dynamicList) { |
202 | if (ver.hasWildcard) |
203 | syms = findAllByVersion(ver, /*includeNonDefault=*/true); |
204 | else |
205 | syms = findByVersion(ver); |
206 | |
207 | for (Symbol *sym : syms) |
208 | sym->inDynamicList = true; |
209 | } |
210 | } |
211 | |
212 | // Set symbol versions to symbols. This function handles patterns containing no |
213 | // wildcard characters. Return false if no symbol definition matches ver. |
214 | bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, |
215 | StringRef versionName, |
216 | bool includeNonDefault) { |
217 | // Get a list of symbols which we need to assign the version to. |
218 | SmallVector<Symbol *, 0> syms = findByVersion(ver); |
219 | |
220 | auto getName = [](uint16_t ver) -> std::string { |
221 | if (ver == VER_NDX_LOCAL) |
222 | return "VER_NDX_LOCAL" ; |
223 | if (ver == VER_NDX_GLOBAL) |
224 | return "VER_NDX_GLOBAL" ; |
225 | return ("version '" + config->versionDefinitions[ver].name + "'" ).str(); |
226 | }; |
227 | |
228 | // Assign the version. |
229 | for (Symbol *sym : syms) { |
230 | // For a non-local versionId, skip symbols containing version info because |
231 | // symbol versions specified by symbol names take precedence over version |
232 | // scripts. See parseSymbolVersion(). |
233 | if (!includeNonDefault && versionId != VER_NDX_LOCAL && |
234 | sym->getName().contains(C: '@')) |
235 | continue; |
236 | |
237 | // If the version has not been assigned, assign versionId to the symbol. |
238 | if (!sym->versionScriptAssigned) { |
239 | sym->versionScriptAssigned = true; |
240 | sym->versionId = versionId; |
241 | } |
242 | if (sym->versionId == versionId) |
243 | continue; |
244 | |
245 | warn(msg: "attempt to reassign symbol '" + ver.name + "' of " + |
246 | getName(sym->versionId) + " to " + getName(versionId)); |
247 | } |
248 | return !syms.empty(); |
249 | } |
250 | |
251 | void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId, |
252 | bool includeNonDefault) { |
253 | // Exact matching takes precedence over fuzzy matching, |
254 | // so we set a version to a symbol only if no version has been assigned |
255 | // to the symbol. This behavior is compatible with GNU. |
256 | for (Symbol *sym : findAllByVersion(ver, includeNonDefault)) |
257 | if (!sym->versionScriptAssigned) { |
258 | sym->versionScriptAssigned = true; |
259 | sym->versionId = versionId; |
260 | } |
261 | } |
262 | |
263 | // This function processes version scripts by updating the versionId |
264 | // member of symbols. |
265 | // If there's only one anonymous version definition in a version |
266 | // script file, the script does not actually define any symbol version, |
267 | // but just specifies symbols visibilities. |
268 | void SymbolTable::scanVersionScript() { |
269 | SmallString<128> buf; |
270 | // First, we assign versions to exact matching symbols, |
271 | // i.e. version definitions not containing any glob meta-characters. |
272 | for (VersionDefinition &v : config->versionDefinitions) { |
273 | auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) { |
274 | bool found = |
275 | assignExactVersion(ver: pat, versionId: id, versionName: ver, /*includeNonDefault=*/false); |
276 | buf.clear(); |
277 | found |= assignExactVersion(ver: {.name: (pat.name + "@" + v.name).toStringRef(Out&: buf), |
278 | .isExternCpp: pat.isExternCpp, /*hasWildCard=*/.hasWildcard: false}, |
279 | versionId: id, versionName: ver, /*includeNonDefault=*/true); |
280 | if (!found && !config->undefinedVersion) |
281 | errorOrWarn(msg: "version script assignment of '" + ver + "' to symbol '" + |
282 | pat.name + "' failed: symbol not defined" ); |
283 | }; |
284 | for (SymbolVersion &pat : v.nonLocalPatterns) |
285 | if (!pat.hasWildcard) |
286 | assignExact(pat, v.id, v.name); |
287 | for (SymbolVersion pat : v.localPatterns) |
288 | if (!pat.hasWildcard) |
289 | assignExact(pat, VER_NDX_LOCAL, "local" ); |
290 | } |
291 | |
292 | // Next, assign versions to wildcards that are not "*". Note that because the |
293 | // last match takes precedence over previous matches, we iterate over the |
294 | // definitions in the reverse order. |
295 | auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) { |
296 | assignWildcardVersion(ver: pat, versionId: id, /*includeNonDefault=*/false); |
297 | buf.clear(); |
298 | assignWildcardVersion(ver: {.name: (pat.name + "@" + ver).toStringRef(Out&: buf), |
299 | .isExternCpp: pat.isExternCpp, /*hasWildCard=*/.hasWildcard: true}, |
300 | versionId: id, |
301 | /*includeNonDefault=*/true); |
302 | }; |
303 | for (VersionDefinition &v : llvm::reverse(C&: config->versionDefinitions)) { |
304 | for (SymbolVersion &pat : v.nonLocalPatterns) |
305 | if (pat.hasWildcard && pat.name != "*" ) |
306 | assignWildcard(pat, v.id, v.name); |
307 | for (SymbolVersion &pat : v.localPatterns) |
308 | if (pat.hasWildcard && pat.name != "*" ) |
309 | assignWildcard(pat, VER_NDX_LOCAL, v.name); |
310 | } |
311 | |
312 | // Then, assign versions to "*". In GNU linkers they have lower priority than |
313 | // other wildcards. |
314 | for (VersionDefinition &v : llvm::reverse(C&: config->versionDefinitions)) { |
315 | for (SymbolVersion &pat : v.nonLocalPatterns) |
316 | if (pat.hasWildcard && pat.name == "*" ) |
317 | assignWildcard(pat, v.id, v.name); |
318 | for (SymbolVersion &pat : v.localPatterns) |
319 | if (pat.hasWildcard && pat.name == "*" ) |
320 | assignWildcard(pat, VER_NDX_LOCAL, v.name); |
321 | } |
322 | |
323 | // Symbol themselves might know their versions because symbols |
324 | // can contain versions in the form of <name>@<version>. |
325 | // Let them parse and update their names to exclude version suffix. |
326 | for (Symbol *sym : symVector) |
327 | if (sym->hasVersionSuffix) |
328 | sym->parseSymbolVersion(); |
329 | |
330 | // isPreemptible is false at this point. To correctly compute the binding of a |
331 | // Defined (which is used by includeInDynsym()), we need to know if it is |
332 | // VER_NDX_LOCAL or not. Compute symbol versions before handling |
333 | // --dynamic-list. |
334 | handleDynamicList(); |
335 | } |
336 | |
337 | Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) { |
338 | return addSymbol(newSym: Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0}); |
339 | } |
340 | |