1 | //===-- Mangled.cpp -------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "lldb/Core/Mangled.h" |
10 | |
11 | #include "lldb/Core/DataFileCache.h" |
12 | #include "lldb/Core/RichManglingContext.h" |
13 | #include "lldb/Target/Language.h" |
14 | #include "lldb/Utility/ConstString.h" |
15 | #include "lldb/Utility/DataEncoder.h" |
16 | #include "lldb/Utility/LLDBLog.h" |
17 | #include "lldb/Utility/Log.h" |
18 | #include "lldb/Utility/RegularExpression.h" |
19 | #include "lldb/Utility/Stream.h" |
20 | #include "lldb/lldb-enumerations.h" |
21 | |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/Demangle/Demangle.h" |
24 | #include "llvm/Support/Compiler.h" |
25 | |
26 | #include <mutex> |
27 | #include <string> |
28 | #include <string_view> |
29 | #include <utility> |
30 | |
31 | #include <cstdlib> |
32 | #include <cstring> |
33 | using namespace lldb_private; |
34 | |
35 | static inline bool cstring_is_mangled(llvm::StringRef s) { |
36 | return Mangled::GetManglingScheme(name: s) != Mangled::eManglingSchemeNone; |
37 | } |
38 | |
39 | #pragma mark Mangled |
40 | |
41 | Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { |
42 | if (name.empty()) |
43 | return Mangled::eManglingSchemeNone; |
44 | |
45 | if (name.starts_with(Prefix: "?" )) |
46 | return Mangled::eManglingSchemeMSVC; |
47 | |
48 | if (name.starts_with(Prefix: "_R" )) |
49 | return Mangled::eManglingSchemeRustV0; |
50 | |
51 | if (name.starts_with(Prefix: "_D" )) |
52 | return Mangled::eManglingSchemeD; |
53 | |
54 | if (name.starts_with(Prefix: "_Z" )) |
55 | return Mangled::eManglingSchemeItanium; |
56 | |
57 | // ___Z is a clang extension of block invocations |
58 | if (name.starts_with(Prefix: "___Z" )) |
59 | return Mangled::eManglingSchemeItanium; |
60 | |
61 | // Swift's older style of mangling used "_T" as a mangling prefix. This can |
62 | // lead to false positives with other symbols that just so happen to start |
63 | // with "_T". To minimize the chance of that happening, we only return true |
64 | // for select old-style swift mangled names. The known cases are ObjC classes |
65 | // and protocols. Classes are either prefixed with "_TtC" or "_TtGC". |
66 | // Protocols are prefixed with "_TtP". |
67 | if (name.starts_with(Prefix: "_TtC" ) || name.starts_with(Prefix: "_TtGC" ) || |
68 | name.starts_with(Prefix: "_TtP" )) |
69 | return Mangled::eManglingSchemeSwift; |
70 | |
71 | // Swift 4.2 used "$S" and "_$S". |
72 | // Swift 5 and onward uses "$s" and "_$s". |
73 | // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames. |
74 | if (name.starts_with(Prefix: "$S" ) || name.starts_with(Prefix: "_$S" ) || |
75 | name.starts_with(Prefix: "$s" ) || name.starts_with(Prefix: "_$s" ) || |
76 | name.starts_with(Prefix: "@__swiftmacro_" )) |
77 | return Mangled::eManglingSchemeSwift; |
78 | |
79 | return Mangled::eManglingSchemeNone; |
80 | } |
81 | |
82 | Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { |
83 | if (s) |
84 | SetValue(s); |
85 | } |
86 | |
87 | Mangled::Mangled(llvm::StringRef name) { |
88 | if (!name.empty()) |
89 | SetValue(ConstString(name)); |
90 | } |
91 | |
92 | // Convert to bool operator. This allows code to check any Mangled objects |
93 | // to see if they contain anything valid using code such as: |
94 | // |
95 | // Mangled mangled(...); |
96 | // if (mangled) |
97 | // { ... |
98 | Mangled::operator bool() const { return m_mangled || m_demangled; } |
99 | |
100 | // Clear the mangled and demangled values. |
101 | void Mangled::Clear() { |
102 | m_mangled.Clear(); |
103 | m_demangled.Clear(); |
104 | } |
105 | |
106 | // Compare the string values. |
107 | int Mangled::Compare(const Mangled &a, const Mangled &b) { |
108 | return ConstString::Compare(lhs: a.GetName(preference: ePreferMangled), |
109 | rhs: b.GetName(preference: ePreferMangled)); |
110 | } |
111 | |
112 | void Mangled::SetValue(ConstString name) { |
113 | if (name) { |
114 | if (cstring_is_mangled(s: name.GetStringRef())) { |
115 | m_demangled.Clear(); |
116 | m_mangled = name; |
117 | } else { |
118 | m_demangled = name; |
119 | m_mangled.Clear(); |
120 | } |
121 | } else { |
122 | m_demangled.Clear(); |
123 | m_mangled.Clear(); |
124 | } |
125 | } |
126 | |
127 | // Local helpers for different demangling implementations. |
128 | static char *GetMSVCDemangledStr(std::string_view M) { |
129 | char *demangled_cstr = llvm::microsoftDemangle( |
130 | mangled_name: M, n_read: nullptr, status: nullptr, |
131 | Flags: llvm::MSDemangleFlags( |
132 | llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | |
133 | llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); |
134 | |
135 | if (Log *log = GetLog(mask: LLDBLog::Demangle)) { |
136 | if (demangled_cstr && demangled_cstr[0]) |
137 | LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"" , M.data(), demangled_cstr); |
138 | else |
139 | LLDB_LOGF(log, "demangled msvc: %s -> error" , M.data()); |
140 | } |
141 | |
142 | return demangled_cstr; |
143 | } |
144 | |
145 | static char *GetItaniumDemangledStr(const char *M) { |
146 | char *demangled_cstr = nullptr; |
147 | |
148 | llvm::ItaniumPartialDemangler ipd; |
149 | bool err = ipd.partialDemangle(MangledName: M); |
150 | if (!err) { |
151 | // Default buffer and size (will realloc in case it's too small). |
152 | size_t demangled_size = 80; |
153 | demangled_cstr = static_cast<char *>(std::malloc(size: demangled_size)); |
154 | demangled_cstr = ipd.finishDemangle(Buf: demangled_cstr, N: &demangled_size); |
155 | |
156 | assert(demangled_cstr && |
157 | "finishDemangle must always succeed if partialDemangle did" ); |
158 | assert(demangled_cstr[demangled_size - 1] == '\0' && |
159 | "Expected demangled_size to return length including trailing null" ); |
160 | } |
161 | |
162 | if (Log *log = GetLog(mask: LLDBLog::Demangle)) { |
163 | if (demangled_cstr) |
164 | LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"" , M, demangled_cstr); |
165 | else |
166 | LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle" , M); |
167 | } |
168 | |
169 | return demangled_cstr; |
170 | } |
171 | |
172 | static char *GetRustV0DemangledStr(std::string_view M) { |
173 | char *demangled_cstr = llvm::rustDemangle(MangledName: M); |
174 | |
175 | if (Log *log = GetLog(mask: LLDBLog::Demangle)) { |
176 | if (demangled_cstr && demangled_cstr[0]) |
177 | LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"" , M, demangled_cstr); |
178 | else |
179 | LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle" , M); |
180 | } |
181 | |
182 | return demangled_cstr; |
183 | } |
184 | |
185 | static char *GetDLangDemangledStr(std::string_view M) { |
186 | char *demangled_cstr = llvm::dlangDemangle(MangledName: M); |
187 | |
188 | if (Log *log = GetLog(mask: LLDBLog::Demangle)) { |
189 | if (demangled_cstr && demangled_cstr[0]) |
190 | LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"" , M, demangled_cstr); |
191 | else |
192 | LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle" , M); |
193 | } |
194 | |
195 | return demangled_cstr; |
196 | } |
197 | |
198 | // Explicit demangling for scheduled requests during batch processing. This |
199 | // makes use of ItaniumPartialDemangler's rich demangle info |
200 | bool Mangled::GetRichManglingInfo(RichManglingContext &context, |
201 | SkipMangledNameFn *skip_mangled_name) { |
202 | // Others are not meant to arrive here. ObjC names or C's main() for example |
203 | // have their names stored in m_demangled, while m_mangled is empty. |
204 | assert(m_mangled); |
205 | |
206 | // Check whether or not we are interested in this name at all. |
207 | ManglingScheme scheme = GetManglingScheme(name: m_mangled.GetStringRef()); |
208 | if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) |
209 | return false; |
210 | |
211 | switch (scheme) { |
212 | case eManglingSchemeNone: |
213 | // The current mangled_name_filter would allow llvm_unreachable here. |
214 | return false; |
215 | |
216 | case eManglingSchemeItanium: |
217 | // We want the rich mangling info here, so we don't care whether or not |
218 | // there is a demangled string in the pool already. |
219 | return context.FromItaniumName(mangled: m_mangled); |
220 | |
221 | case eManglingSchemeMSVC: { |
222 | // We have no rich mangling for MSVC-mangled names yet, so first try to |
223 | // demangle it if necessary. |
224 | if (!m_demangled && !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) { |
225 | if (char *d = GetMSVCDemangledStr(M: m_mangled)) { |
226 | // Without the rich mangling info we have to demangle the full name. |
227 | // Copy it to string pool and connect the counterparts to accelerate |
228 | // later access in GetDemangledName(). |
229 | m_demangled.SetStringWithMangledCounterpart(demangled: llvm::StringRef(d), |
230 | mangled: m_mangled); |
231 | ::free(ptr: d); |
232 | } else { |
233 | m_demangled.SetCString("" ); |
234 | } |
235 | } |
236 | |
237 | if (m_demangled.IsEmpty()) { |
238 | // Cannot demangle it, so don't try parsing. |
239 | return false; |
240 | } else { |
241 | // Demangled successfully, we can try and parse it with |
242 | // CPlusPlusLanguage::MethodName. |
243 | return context.FromCxxMethodName(demangled: m_demangled); |
244 | } |
245 | } |
246 | |
247 | case eManglingSchemeRustV0: |
248 | case eManglingSchemeD: |
249 | case eManglingSchemeSwift: |
250 | // Rich demangling scheme is not supported |
251 | return false; |
252 | } |
253 | llvm_unreachable("Fully covered switch above!" ); |
254 | } |
255 | |
256 | // Generate the demangled name on demand using this accessor. Code in this |
257 | // class will need to use this accessor if it wishes to decode the demangled |
258 | // name. The result is cached and will be kept until a new string value is |
259 | // supplied to this object, or until the end of the object's lifetime. |
260 | ConstString Mangled::GetDemangledName() const { |
261 | // Check to make sure we have a valid mangled name and that we haven't |
262 | // already decoded our mangled name. |
263 | if (m_mangled && m_demangled.IsNull()) { |
264 | // Don't bother running anything that isn't mangled |
265 | const char *mangled_name = m_mangled.GetCString(); |
266 | ManglingScheme mangling_scheme = |
267 | GetManglingScheme(name: m_mangled.GetStringRef()); |
268 | if (mangling_scheme != eManglingSchemeNone && |
269 | !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) { |
270 | // We didn't already mangle this name, demangle it and if all goes well |
271 | // add it to our map. |
272 | char *demangled_name = nullptr; |
273 | switch (mangling_scheme) { |
274 | case eManglingSchemeMSVC: |
275 | demangled_name = GetMSVCDemangledStr(M: mangled_name); |
276 | break; |
277 | case eManglingSchemeItanium: { |
278 | demangled_name = GetItaniumDemangledStr(M: mangled_name); |
279 | break; |
280 | } |
281 | case eManglingSchemeRustV0: |
282 | demangled_name = GetRustV0DemangledStr(M: m_mangled); |
283 | break; |
284 | case eManglingSchemeD: |
285 | demangled_name = GetDLangDemangledStr(M: m_mangled); |
286 | break; |
287 | case eManglingSchemeSwift: |
288 | // Demangling a swift name requires the swift compiler. This is |
289 | // explicitly unsupported on llvm.org. |
290 | break; |
291 | case eManglingSchemeNone: |
292 | llvm_unreachable("eManglingSchemeNone was handled already" ); |
293 | } |
294 | if (demangled_name) { |
295 | m_demangled.SetStringWithMangledCounterpart( |
296 | demangled: llvm::StringRef(demangled_name), mangled: m_mangled); |
297 | free(ptr: demangled_name); |
298 | } |
299 | } |
300 | if (m_demangled.IsNull()) { |
301 | // Set the demangled string to the empty string to indicate we tried to |
302 | // parse it once and failed. |
303 | m_demangled.SetCString("" ); |
304 | } |
305 | } |
306 | |
307 | return m_demangled; |
308 | } |
309 | |
310 | ConstString Mangled::GetDisplayDemangledName() const { |
311 | return GetDemangledName(); |
312 | } |
313 | |
314 | bool Mangled::NameMatches(const RegularExpression ®ex) const { |
315 | if (m_mangled && regex.Execute(string: m_mangled.GetStringRef())) |
316 | return true; |
317 | |
318 | ConstString demangled = GetDemangledName(); |
319 | return demangled && regex.Execute(string: demangled.GetStringRef()); |
320 | } |
321 | |
322 | // Get the demangled name if there is one, else return the mangled name. |
323 | ConstString Mangled::GetName(Mangled::NamePreference preference) const { |
324 | if (preference == ePreferMangled && m_mangled) |
325 | return m_mangled; |
326 | |
327 | // Call the accessor to make sure we get a demangled name in case it hasn't |
328 | // been demangled yet... |
329 | ConstString demangled = GetDemangledName(); |
330 | |
331 | if (preference == ePreferDemangledWithoutArguments) { |
332 | if (Language *lang = Language::FindPlugin(language: GuessLanguage())) { |
333 | return lang->GetDemangledFunctionNameWithoutArguments(mangled: *this); |
334 | } |
335 | } |
336 | if (preference == ePreferDemangled) { |
337 | if (demangled) |
338 | return demangled; |
339 | return m_mangled; |
340 | } |
341 | return demangled; |
342 | } |
343 | |
344 | // Dump a Mangled object to stream "s". We don't force our demangled name to be |
345 | // computed currently (we don't use the accessor). |
346 | void Mangled::Dump(Stream *s) const { |
347 | if (m_mangled) { |
348 | *s << ", mangled = " << m_mangled; |
349 | } |
350 | if (m_demangled) { |
351 | const char *demangled = m_demangled.AsCString(); |
352 | s->Printf(format: ", demangled = %s" , demangled[0] ? demangled : "<error>" ); |
353 | } |
354 | } |
355 | |
356 | // Dumps a debug version of this string with extra object and state information |
357 | // to stream "s". |
358 | void Mangled::DumpDebug(Stream *s) const { |
359 | s->Printf(format: "%*p: Mangled mangled = " , static_cast<int>(sizeof(void *) * 2), |
360 | static_cast<const void *>(this)); |
361 | m_mangled.DumpDebug(s); |
362 | s->Printf(format: ", demangled = " ); |
363 | m_demangled.DumpDebug(s); |
364 | } |
365 | |
366 | // Return the size in byte that this object takes in memory. The size includes |
367 | // the size of the objects it owns, and not the strings that it references |
368 | // because they are shared strings. |
369 | size_t Mangled::MemorySize() const { |
370 | return m_mangled.MemorySize() + m_demangled.MemorySize(); |
371 | } |
372 | |
373 | // We "guess" the language because we can't determine a symbol's language from |
374 | // it's name. For example, a Pascal symbol can be mangled using the C++ |
375 | // Itanium scheme, and defined in a compilation unit within the same module as |
376 | // other C++ units. In addition, different targets could have different ways |
377 | // of mangling names from a given language, likewise the compilation units |
378 | // within those targets. |
379 | lldb::LanguageType Mangled::GuessLanguage() const { |
380 | lldb::LanguageType result = lldb::eLanguageTypeUnknown; |
381 | // Ask each language plugin to check if the mangled name belongs to it. |
382 | Language::ForEach(callback: [this, &result](Language *l) { |
383 | if (l->SymbolNameFitsToLanguage(name: *this)) { |
384 | result = l->GetLanguageType(); |
385 | return false; |
386 | } |
387 | return true; |
388 | }); |
389 | return result; |
390 | } |
391 | |
392 | // Dump OBJ to the supplied stream S. |
393 | Stream &operator<<(Stream &s, const Mangled &obj) { |
394 | if (obj.GetMangledName()) |
395 | s << "mangled = '" << obj.GetMangledName() << "'" ; |
396 | |
397 | ConstString demangled = obj.GetDemangledName(); |
398 | if (demangled) |
399 | s << ", demangled = '" << demangled << '\''; |
400 | else |
401 | s << ", demangled = <error>" ; |
402 | return s; |
403 | } |
404 | |
405 | // When encoding Mangled objects we can get away with encoding as little |
406 | // information as is required. The enumeration below helps us to efficiently |
407 | // encode Mangled objects. |
408 | enum MangledEncoding { |
409 | /// If the Mangled object has neither a mangled name or demangled name we can |
410 | /// encode the object with one zero byte using the Empty enumeration. |
411 | Empty = 0u, |
412 | /// If the Mangled object has only a demangled name and no mangled named, we |
413 | /// can encode only the demangled name. |
414 | DemangledOnly = 1u, |
415 | /// If the mangle name can calculate the demangled name (it is the |
416 | /// mangled/demangled counterpart), then we only need to encode the mangled |
417 | /// name as the demangled name can be recomputed. |
418 | MangledOnly = 2u, |
419 | /// If we have a Mangled object with two different names that are not related |
420 | /// then we need to save both strings. This can happen if we have a name that |
421 | /// isn't a true mangled name, but we want to be able to lookup a symbol by |
422 | /// name and type in the symbol table. We do this for Objective C symbols like |
423 | /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to |
424 | /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to |
425 | /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it |
426 | /// would fail, but in these cases we want these unrelated names to be |
427 | /// preserved. |
428 | MangledAndDemangled = 3u |
429 | }; |
430 | |
431 | bool Mangled::(const DataExtractor &data, lldb::offset_t *offset_ptr, |
432 | const StringTableReader &strtab) { |
433 | m_mangled.Clear(); |
434 | m_demangled.Clear(); |
435 | MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); |
436 | switch (encoding) { |
437 | case Empty: |
438 | return true; |
439 | |
440 | case DemangledOnly: |
441 | m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr))); |
442 | return true; |
443 | |
444 | case MangledOnly: |
445 | m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr))); |
446 | return true; |
447 | |
448 | case MangledAndDemangled: |
449 | m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr))); |
450 | m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr))); |
451 | return true; |
452 | } |
453 | return false; |
454 | } |
455 | /// The encoding format for the Mangled object is as follows: |
456 | /// |
457 | /// uint8_t encoding; |
458 | /// char str1[]; (only if DemangledOnly, MangledOnly) |
459 | /// char str2[]; (only if MangledAndDemangled) |
460 | /// |
461 | /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 |
462 | /// are only saved if we need them based on the encoding. |
463 | /// |
464 | /// Some mangled names have a mangled name that can be demangled by the built |
465 | /// in demanglers. These kinds of mangled objects know when the mangled and |
466 | /// demangled names are the counterparts for each other. This is done because |
467 | /// demangling is very expensive and avoiding demangling the same name twice |
468 | /// saves us a lot of compute time. For these kinds of names we only need to |
469 | /// save the mangled name and have the encoding set to "MangledOnly". |
470 | /// |
471 | /// If a mangled obejct has only a demangled name, then we save only that string |
472 | /// and have the encoding set to "DemangledOnly". |
473 | /// |
474 | /// Some mangled objects have both mangled and demangled names, but the |
475 | /// demangled name can not be computed from the mangled name. This is often used |
476 | /// for runtime named, like Objective C runtime V2 and V3 names. Both these |
477 | /// names must be saved and the encoding is set to "MangledAndDemangled". |
478 | /// |
479 | /// For a Mangled object with no names, we only need to set the encoding to |
480 | /// "Empty" and not store any string values. |
481 | void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { |
482 | MangledEncoding encoding = Empty; |
483 | if (m_mangled) { |
484 | encoding = MangledOnly; |
485 | if (m_demangled) { |
486 | // We have both mangled and demangled names. If the demangled name is the |
487 | // counterpart of the mangled name, then we only need to save the mangled |
488 | // named. If they are different, we need to save both. |
489 | ConstString s; |
490 | if (!(m_mangled.GetMangledCounterpart(counterpart&: s) && s == m_demangled)) |
491 | encoding = MangledAndDemangled; |
492 | } |
493 | } else if (m_demangled) { |
494 | encoding = DemangledOnly; |
495 | } |
496 | file.AppendU8(value: encoding); |
497 | switch (encoding) { |
498 | case Empty: |
499 | break; |
500 | case DemangledOnly: |
501 | file.AppendU32(value: strtab.Add(s: m_demangled)); |
502 | break; |
503 | case MangledOnly: |
504 | file.AppendU32(value: strtab.Add(s: m_mangled)); |
505 | break; |
506 | case MangledAndDemangled: |
507 | file.AppendU32(value: strtab.Add(s: m_mangled)); |
508 | file.AppendU32(value: strtab.Add(s: m_demangled)); |
509 | break; |
510 | } |
511 | } |
512 | |