1 | //===- SymbolTable.cpp ----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Symbol table is a bag of all known symbols. We put all symbols of |
10 | // all input files to the symbol table. The symbol table is basically |
11 | // a hash table with the logic to resolve symbol name conflicts using |
12 | // the symbol types. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "SymbolTable.h" |
17 | #include "Config.h" |
18 | #include "InputFiles.h" |
19 | #include "Symbols.h" |
20 | #include "lld/Common/Memory.h" |
21 | #include "lld/Common/Strings.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/Demangle/Demangle.h" |
24 | |
25 | using namespace llvm; |
26 | using namespace llvm::object; |
27 | using namespace llvm::ELF; |
28 | using namespace lld; |
29 | using namespace lld::elf; |
30 | |
31 | void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { |
32 | // Redirect __real_foo to the original foo and foo to the original __wrap_foo. |
33 | int &idx1 = symMap[CachedHashStringRef(sym->getName())]; |
34 | int &idx2 = symMap[CachedHashStringRef(real->getName())]; |
35 | int &idx3 = symMap[CachedHashStringRef(wrap->getName())]; |
36 | |
37 | idx2 = idx1; |
38 | idx1 = idx3; |
39 | |
40 | // Propagate symbol usage information to the redirected symbols. |
41 | if (sym->isUsedInRegularObj) |
42 | wrap->isUsedInRegularObj = true; |
43 | if (real->isUsedInRegularObj) |
44 | sym->isUsedInRegularObj = true; |
45 | else if (!sym->isDefined()) |
46 | // Now that all references to sym have been redirected to wrap, if there are |
47 | // no references to real (which has been redirected to sym), we only need to |
48 | // keep sym if it was defined, otherwise it's unused and can be dropped. |
49 | sym->isUsedInRegularObj = false; |
50 | |
51 | // Now renaming is complete, and no one refers to real. We drop real from |
52 | // .symtab and .dynsym. If real is undefined, it is important that we don't |
53 | // leave it in .dynsym, because otherwise it might lead to an undefined symbol |
54 | // error in a subsequent link. If real is defined, we could emit real as an |
55 | // alias for sym, but that could degrade the user experience of some tools |
56 | // that can print out only one symbol for each location: sym is a preferred |
57 | // name than real, but they might print out real instead. |
58 | memcpy(dest: static_cast<void *>(real), src: sym, n: sizeof(SymbolUnion)); |
59 | real->isUsedInRegularObj = false; |
60 | } |
61 | |
62 | // Find an existing symbol or create a new one. |
63 | Symbol *SymbolTable::insert(StringRef name) { |
64 | // <name>@@<version> means the symbol is the default version. In that |
65 | // case <name>@@<version> will be used to resolve references to <name>. |
66 | // |
67 | // Since this is a hot path, the following string search code is |
68 | // optimized for speed. StringRef::find(char) is much faster than |
69 | // StringRef::find(StringRef). |
70 | StringRef stem = name; |
71 | size_t pos = name.find(C: '@'); |
72 | if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@') |
73 | stem = name.take_front(N: pos); |
74 | |
75 | auto p = symMap.insert(KV: {CachedHashStringRef(stem), (int)symVector.size()}); |
76 | if (!p.second) { |
77 | Symbol *sym = symVector[p.first->second]; |
78 | if (stem.size() != name.size()) { |
79 | sym->setName(name); |
80 | sym->hasVersionSuffix = true; |
81 | } |
82 | return sym; |
83 | } |
84 | |
85 | Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); |
86 | symVector.push_back(Elt: sym); |
87 | |
88 | // *sym was not initialized by a constructor. Initialize all Symbol fields. |
89 | memset(s: static_cast<void *>(sym), c: 0, n: sizeof(Symbol)); |
90 | sym->setName(name); |
91 | sym->partition = 1; |
92 | sym->versionId = VER_NDX_GLOBAL; |
93 | if (pos != StringRef::npos) |
94 | sym->hasVersionSuffix = true; |
95 | return sym; |
96 | } |
97 | |
98 | // This variant of addSymbol is used by BinaryFile::parse to check duplicate |
99 | // symbol errors. |
100 | Symbol *SymbolTable::addAndCheckDuplicate(Ctx &ctx, const Defined &newSym) { |
101 | Symbol *sym = insert(name: newSym.getName()); |
102 | if (sym->isDefined()) |
103 | sym->checkDuplicate(ctx, other: newSym); |
104 | sym->resolve(ctx, other: newSym); |
105 | sym->isUsedInRegularObj = true; |
106 | return sym; |
107 | } |
108 | |
109 | Symbol *SymbolTable::find(StringRef name) { |
110 | auto it = symMap.find(Val: CachedHashStringRef(name)); |
111 | if (it == symMap.end()) |
112 | return nullptr; |
113 | return symVector[it->second]; |
114 | } |
115 | |
116 | // A version script/dynamic list is only meaningful for a Defined symbol. |
117 | // A CommonSymbol will be converted to a Defined in replaceCommonSymbols(). |
118 | // A lazy symbol may be made Defined if an LTO libcall extracts it. |
119 | static bool canBeVersioned(const Symbol &sym) { |
120 | return sym.isDefined() || sym.isCommon() || sym.isLazy(); |
121 | } |
122 | |
123 | // Initialize demangledSyms with a map from demangled symbols to symbol |
124 | // objects. Used to handle "extern C++" directive in version scripts. |
125 | // |
126 | // The map will contain all demangled symbols. That can be very large, |
127 | // and in LLD we generally want to avoid do anything for each symbol. |
128 | // Then, why are we doing this? Here's why. |
129 | // |
130 | // Users can use "extern C++ {}" directive to match against demangled |
131 | // C++ symbols. For example, you can write a pattern such as |
132 | // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this |
133 | // other than trying to match a pattern against all demangled symbols. |
134 | // So, if "extern C++" feature is used, we need to demangle all known |
135 | // symbols. |
136 | StringMap<SmallVector<Symbol *, 0>> &SymbolTable::getDemangledSyms() { |
137 | if (!demangledSyms) { |
138 | demangledSyms.emplace(); |
139 | std::string demangled; |
140 | for (Symbol *sym : symVector) |
141 | if (canBeVersioned(sym: *sym)) { |
142 | StringRef name = sym->getName(); |
143 | size_t pos = name.find(C: '@'); |
144 | std::string substr; |
145 | if (pos == std::string::npos) |
146 | demangled = demangle(MangledName: name); |
147 | else if (pos + 1 == name.size() || name[pos + 1] == '@') { |
148 | substr = name.substr(Start: 0, N: pos); |
149 | demangled = demangle(MangledName: substr); |
150 | } else { |
151 | substr = name.substr(Start: 0, N: pos); |
152 | demangled = (demangle(MangledName: substr) + name.substr(Start: pos)).str(); |
153 | } |
154 | (*demangledSyms)[demangled].push_back(Elt: sym); |
155 | } |
156 | } |
157 | return *demangledSyms; |
158 | } |
159 | |
160 | SmallVector<Symbol *, 0> SymbolTable::findByVersion(SymbolVersion ver) { |
161 | if (ver.isExternCpp) |
162 | return getDemangledSyms().lookup(Key: ver.name); |
163 | if (Symbol *sym = find(name: ver.name)) |
164 | if (canBeVersioned(sym: *sym)) |
165 | return {sym}; |
166 | return {}; |
167 | } |
168 | |
169 | SmallVector<Symbol *, 0> SymbolTable::findAllByVersion(SymbolVersion ver, |
170 | bool includeNonDefault) { |
171 | SmallVector<Symbol *, 0> res; |
172 | SingleStringMatcher m(ver.name); |
173 | auto check = [&](const Symbol &sym) -> bool { |
174 | if (!includeNonDefault) |
175 | return !sym.hasVersionSuffix; |
176 | StringRef name = sym.getName(); |
177 | size_t pos = name.find(C: '@'); |
178 | return !(pos + 1 < name.size() && name[pos + 1] == '@'); |
179 | }; |
180 | |
181 | if (ver.isExternCpp) { |
182 | for (auto &p : getDemangledSyms()) |
183 | if (m.match(s: p.first())) |
184 | for (Symbol *sym : p.second) |
185 | if (check(*sym)) |
186 | res.push_back(Elt: sym); |
187 | return res; |
188 | } |
189 | |
190 | for (Symbol *sym : symVector) |
191 | if (canBeVersioned(sym: *sym) && check(*sym) && m.match(s: sym->getName())) |
192 | res.push_back(Elt: sym); |
193 | return res; |
194 | } |
195 | |
196 | void SymbolTable::handleDynamicList() { |
197 | SmallVector<Symbol *, 0> syms; |
198 | for (SymbolVersion &ver : ctx.arg.dynamicList) { |
199 | if (ver.hasWildcard) |
200 | syms = findAllByVersion(ver, /*includeNonDefault=*/true); |
201 | else |
202 | syms = findByVersion(ver); |
203 | |
204 | for (Symbol *sym : syms) |
205 | sym->isExported = sym->inDynamicList = true; |
206 | } |
207 | } |
208 | |
209 | // Set symbol versions to symbols. This function handles patterns containing no |
210 | // wildcard characters. Return false if no symbol definition matches ver. |
211 | bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, |
212 | StringRef versionName, |
213 | bool includeNonDefault) { |
214 | // Get a list of symbols which we need to assign the version to. |
215 | SmallVector<Symbol *, 0> syms = findByVersion(ver); |
216 | |
217 | auto getName = [&ctx = ctx](uint16_t ver) -> std::string { |
218 | if (ver == VER_NDX_LOCAL) |
219 | return "VER_NDX_LOCAL" ; |
220 | if (ver == VER_NDX_GLOBAL) |
221 | return "VER_NDX_GLOBAL" ; |
222 | return ("version '" + ctx.arg.versionDefinitions[ver].name + "'" ).str(); |
223 | }; |
224 | |
225 | // Assign the version. |
226 | for (Symbol *sym : syms) { |
227 | // For a non-local versionId, skip symbols containing version info because |
228 | // symbol versions specified by symbol names take precedence over version |
229 | // scripts. See parseSymbolVersion(ctx). |
230 | if (!includeNonDefault && versionId != VER_NDX_LOCAL && |
231 | sym->getName().contains(C: '@')) |
232 | continue; |
233 | |
234 | // If the version has not been assigned, assign versionId to the symbol. |
235 | if (!sym->versionScriptAssigned) { |
236 | sym->versionScriptAssigned = true; |
237 | sym->versionId = versionId; |
238 | } |
239 | if (sym->versionId == versionId) |
240 | continue; |
241 | |
242 | Warn(ctx) << "attempt to reassign symbol '" << ver.name << "' of " |
243 | << getName(sym->versionId) << " to " << getName(versionId); |
244 | } |
245 | return !syms.empty(); |
246 | } |
247 | |
248 | void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId, |
249 | bool includeNonDefault) { |
250 | // Exact matching takes precedence over fuzzy matching, |
251 | // so we set a version to a symbol only if no version has been assigned |
252 | // to the symbol. This behavior is compatible with GNU. |
253 | for (Symbol *sym : findAllByVersion(ver, includeNonDefault)) |
254 | if (!sym->versionScriptAssigned) { |
255 | sym->versionScriptAssigned = true; |
256 | sym->versionId = versionId; |
257 | } |
258 | } |
259 | |
260 | // This function processes version scripts by updating the versionId |
261 | // member of symbols. |
262 | // If there's only one anonymous version definition in a version |
263 | // script file, the script does not actually define any symbol version, |
264 | // but just specifies symbols visibilities. |
265 | void SymbolTable::scanVersionScript() { |
266 | SmallString<128> buf; |
267 | // First, we assign versions to exact matching symbols, |
268 | // i.e. version definitions not containing any glob meta-characters. |
269 | for (VersionDefinition &v : ctx.arg.versionDefinitions) { |
270 | auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) { |
271 | bool found = |
272 | assignExactVersion(ver: pat, versionId: id, versionName: ver, /*includeNonDefault=*/false); |
273 | buf.clear(); |
274 | found |= assignExactVersion(ver: {.name: (pat.name + "@" + v.name).toStringRef(Out&: buf), |
275 | .isExternCpp: pat.isExternCpp, /*hasWildCard=*/.hasWildcard: false}, |
276 | versionId: id, versionName: ver, /*includeNonDefault=*/true); |
277 | if (!found && !ctx.arg.undefinedVersion) |
278 | Err(ctx) << "version script assignment of '" << ver << "' to symbol '" |
279 | << pat.name << "' failed: symbol not defined" ; |
280 | }; |
281 | for (SymbolVersion &pat : v.nonLocalPatterns) |
282 | if (!pat.hasWildcard) |
283 | assignExact(pat, v.id, v.name); |
284 | for (SymbolVersion pat : v.localPatterns) |
285 | if (!pat.hasWildcard) |
286 | assignExact(pat, VER_NDX_LOCAL, "local" ); |
287 | } |
288 | |
289 | // Next, assign versions to wildcards that are not "*". Note that because the |
290 | // last match takes precedence over previous matches, we iterate over the |
291 | // definitions in the reverse order. |
292 | auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) { |
293 | assignWildcardVersion(ver: pat, versionId: id, /*includeNonDefault=*/false); |
294 | buf.clear(); |
295 | assignWildcardVersion(ver: {.name: (pat.name + "@" + ver).toStringRef(Out&: buf), |
296 | .isExternCpp: pat.isExternCpp, /*hasWildCard=*/.hasWildcard: true}, |
297 | versionId: id, |
298 | /*includeNonDefault=*/true); |
299 | }; |
300 | for (VersionDefinition &v : llvm::reverse(C&: ctx.arg.versionDefinitions)) { |
301 | for (SymbolVersion &pat : v.nonLocalPatterns) |
302 | if (pat.hasWildcard && pat.name != "*" ) |
303 | assignWildcard(pat, v.id, v.name); |
304 | for (SymbolVersion &pat : v.localPatterns) |
305 | if (pat.hasWildcard && pat.name != "*" ) |
306 | assignWildcard(pat, VER_NDX_LOCAL, v.name); |
307 | } |
308 | |
309 | // Then, assign versions to "*". In GNU linkers they have lower priority than |
310 | // other wildcards. |
311 | bool globalAsteriskFound = false; |
312 | bool localAsteriskFound = false; |
313 | bool asteriskReported = false; |
314 | auto assignAsterisk = [&](SymbolVersion &pat, VersionDefinition *ver, |
315 | bool isLocal) { |
316 | // Avoid issuing a warning if both '--retain-symbol-file' and a version |
317 | // script with `global: *` are used. |
318 | // |
319 | // '--retain-symbol-file' adds a "*" pattern to |
320 | // 'versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns', see |
321 | // 'readConfigs()' in 'Driver.cpp'. Note that it is not '.localPatterns', |
322 | // and may seem counterintuitive, but still works as expected. Here we can |
323 | // exploit that and skip analyzing the pattern added for this option. |
324 | if (!asteriskReported && (isLocal || ver->id > VER_NDX_LOCAL)) { |
325 | if ((isLocal && globalAsteriskFound) || |
326 | (!isLocal && localAsteriskFound)) { |
327 | Warn(ctx) |
328 | << "wildcard pattern '*' is used for both 'local' and 'global' " |
329 | "scopes in version script" ; |
330 | asteriskReported = true; |
331 | } else if (!isLocal && globalAsteriskFound) { |
332 | Warn(ctx) << "wildcard pattern '*' is used for multiple version " |
333 | "definitions in " |
334 | "version script" ; |
335 | asteriskReported = true; |
336 | } else { |
337 | localAsteriskFound = isLocal; |
338 | globalAsteriskFound = !isLocal; |
339 | } |
340 | } |
341 | assignWildcard(pat, isLocal ? (uint16_t)VER_NDX_LOCAL : ver->id, ver->name); |
342 | }; |
343 | for (VersionDefinition &v : llvm::reverse(C&: ctx.arg.versionDefinitions)) { |
344 | for (SymbolVersion &pat : v.nonLocalPatterns) |
345 | if (pat.hasWildcard && pat.name == "*" ) |
346 | assignAsterisk(pat, &v, false); |
347 | for (SymbolVersion &pat : v.localPatterns) |
348 | if (pat.hasWildcard && pat.name == "*" ) |
349 | assignAsterisk(pat, &v, true); |
350 | } |
351 | |
352 | // Handle --dynamic-list. If a specified symbol is also matched by local: in a |
353 | // version script, the version script takes precedence. |
354 | handleDynamicList(); |
355 | } |
356 | |
357 | Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) { |
358 | return addSymbol(newSym: Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0}); |
359 | } |
360 | |