1 | //===-- Mangled.cpp -------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "lldb/Core/Mangled.h" |
10 | |
11 | #include "lldb/Core/DataFileCache.h" |
12 | #include "lldb/Core/RichManglingContext.h" |
13 | #include "lldb/Target/Language.h" |
14 | #include "lldb/Utility/ConstString.h" |
15 | #include "lldb/Utility/DataEncoder.h" |
16 | #include "lldb/Utility/LLDBLog.h" |
17 | #include "lldb/Utility/Log.h" |
18 | #include "lldb/Utility/RegularExpression.h" |
19 | #include "lldb/Utility/Stream.h" |
20 | #include "lldb/lldb-enumerations.h" |
21 | |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/Demangle/Demangle.h" |
24 | #include "llvm/Support/Compiler.h" |
25 | |
26 | #include <mutex> |
27 | #include <string> |
28 | #include <string_view> |
29 | #include <utility> |
30 | |
31 | #include <cstdlib> |
32 | #include <cstring> |
33 | using namespace lldb_private; |
34 | |
35 | static inline bool cstring_is_mangled(llvm::StringRef s) { |
36 | return Mangled::GetManglingScheme(name: s) != Mangled::eManglingSchemeNone; |
37 | } |
38 | |
39 | #pragma mark Mangled |
40 | |
41 | Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { |
42 | if (name.empty()) |
43 | return Mangled::eManglingSchemeNone; |
44 | |
45 | if (name.starts_with(Prefix: "?" )) |
46 | return Mangled::eManglingSchemeMSVC; |
47 | |
48 | if (name.starts_with(Prefix: "_R" )) |
49 | return Mangled::eManglingSchemeRustV0; |
50 | |
51 | if (name.starts_with(Prefix: "_D" )) |
52 | return Mangled::eManglingSchemeD; |
53 | |
54 | if (name.starts_with(Prefix: "_Z" )) |
55 | return Mangled::eManglingSchemeItanium; |
56 | |
57 | // ___Z is a clang extension of block invocations |
58 | if (name.starts_with(Prefix: "___Z" )) |
59 | return Mangled::eManglingSchemeItanium; |
60 | |
61 | // Swift's older style of mangling used "_T" as a mangling prefix. This can |
62 | // lead to false positives with other symbols that just so happen to start |
63 | // with "_T". To minimize the chance of that happening, we only return true |
64 | // for select old-style swift mangled names. The known cases are ObjC classes |
65 | // and protocols. Classes are either prefixed with "_TtC" or "_TtGC". |
66 | // Protocols are prefixed with "_TtP". |
67 | if (name.starts_with(Prefix: "_TtC" ) || name.starts_with(Prefix: "_TtGC" ) || |
68 | name.starts_with(Prefix: "_TtP" )) |
69 | return Mangled::eManglingSchemeSwift; |
70 | |
71 | // Swift 4.2 used "$S" and "_$S". |
72 | // Swift 5 and onward uses "$s" and "_$s". |
73 | // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames. |
74 | if (name.starts_with(Prefix: "$S" ) || name.starts_with(Prefix: "_$S" ) || |
75 | name.starts_with(Prefix: "$s" ) || name.starts_with(Prefix: "_$s" ) || |
76 | name.starts_with(Prefix: "@__swiftmacro_" )) |
77 | return Mangled::eManglingSchemeSwift; |
78 | |
79 | return Mangled::eManglingSchemeNone; |
80 | } |
81 | |
82 | Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { |
83 | if (s) |
84 | SetValue(s); |
85 | } |
86 | |
87 | Mangled::Mangled(llvm::StringRef name) { |
88 | if (!name.empty()) |
89 | SetValue(ConstString(name)); |
90 | } |
91 | |
92 | // Convert to bool operator. This allows code to check any Mangled objects |
93 | // to see if they contain anything valid using code such as: |
94 | // |
95 | // Mangled mangled(...); |
96 | // if (mangled) |
97 | // { ... |
98 | Mangled::operator bool() const { return m_mangled || m_demangled; } |
99 | |
100 | // Clear the mangled and demangled values. |
101 | void Mangled::Clear() { |
102 | m_mangled.Clear(); |
103 | m_demangled.Clear(); |
104 | } |
105 | |
106 | // Compare the string values. |
107 | int Mangled::Compare(const Mangled &a, const Mangled &b) { |
108 | return ConstString::Compare(lhs: a.GetName(preference: ePreferMangled), |
109 | rhs: b.GetName(preference: ePreferMangled)); |
110 | } |
111 | |
112 | void Mangled::SetValue(ConstString name) { |
113 | if (name) { |
114 | if (cstring_is_mangled(s: name.GetStringRef())) { |
115 | m_demangled.Clear(); |
116 | m_mangled = name; |
117 | } else { |
118 | m_demangled = name; |
119 | m_mangled.Clear(); |
120 | } |
121 | } else { |
122 | m_demangled.Clear(); |
123 | m_mangled.Clear(); |
124 | } |
125 | } |
126 | |
127 | // Local helpers for different demangling implementations. |
128 | static char *GetMSVCDemangledStr(llvm::StringRef M) { |
129 | char *demangled_cstr = llvm::microsoftDemangle( |
130 | mangled_name: M, n_read: nullptr, status: nullptr, |
131 | Flags: llvm::MSDemangleFlags( |
132 | llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | |
133 | llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); |
134 | |
135 | if (Log *log = GetLog(mask: LLDBLog::Demangle)) { |
136 | if (demangled_cstr && demangled_cstr[0]) |
137 | LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"" , M.data(), demangled_cstr); |
138 | else |
139 | LLDB_LOGF(log, "demangled msvc: %s -> error" , M.data()); |
140 | } |
141 | |
142 | return demangled_cstr; |
143 | } |
144 | |
145 | static char *GetItaniumDemangledStr(const char *M) { |
146 | char *demangled_cstr = nullptr; |
147 | |
148 | llvm::ItaniumPartialDemangler ipd; |
149 | bool err = ipd.partialDemangle(MangledName: M); |
150 | if (!err) { |
151 | // Default buffer and size (will realloc in case it's too small). |
152 | size_t demangled_size = 80; |
153 | demangled_cstr = static_cast<char *>(std::malloc(size: demangled_size)); |
154 | demangled_cstr = ipd.finishDemangle(Buf: demangled_cstr, N: &demangled_size); |
155 | |
156 | assert(demangled_cstr && |
157 | "finishDemangle must always succeed if partialDemangle did" ); |
158 | assert(demangled_cstr[demangled_size - 1] == '\0' && |
159 | "Expected demangled_size to return length including trailing null" ); |
160 | } |
161 | |
162 | if (Log *log = GetLog(mask: LLDBLog::Demangle)) { |
163 | if (demangled_cstr) |
164 | LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"" , M, demangled_cstr); |
165 | else |
166 | LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle" , M); |
167 | } |
168 | |
169 | return demangled_cstr; |
170 | } |
171 | |
172 | static char *GetRustV0DemangledStr(llvm::StringRef M) { |
173 | char *demangled_cstr = llvm::rustDemangle(MangledName: M); |
174 | |
175 | if (Log *log = GetLog(mask: LLDBLog::Demangle)) { |
176 | if (demangled_cstr && demangled_cstr[0]) |
177 | LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"" , M, demangled_cstr); |
178 | else |
179 | LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle" , |
180 | static_cast<std::string_view>(M)); |
181 | } |
182 | |
183 | return demangled_cstr; |
184 | } |
185 | |
186 | static char *GetDLangDemangledStr(llvm::StringRef M) { |
187 | char *demangled_cstr = llvm::dlangDemangle(MangledName: M); |
188 | |
189 | if (Log *log = GetLog(mask: LLDBLog::Demangle)) { |
190 | if (demangled_cstr && demangled_cstr[0]) |
191 | LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"" , M, demangled_cstr); |
192 | else |
193 | LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle" , |
194 | static_cast<std::string_view>(M)); |
195 | } |
196 | |
197 | return demangled_cstr; |
198 | } |
199 | |
200 | // Explicit demangling for scheduled requests during batch processing. This |
201 | // makes use of ItaniumPartialDemangler's rich demangle info |
202 | bool Mangled::GetRichManglingInfo(RichManglingContext &context, |
203 | SkipMangledNameFn *skip_mangled_name) { |
204 | // Others are not meant to arrive here. ObjC names or C's main() for example |
205 | // have their names stored in m_demangled, while m_mangled is empty. |
206 | assert(m_mangled); |
207 | |
208 | // Check whether or not we are interested in this name at all. |
209 | ManglingScheme scheme = GetManglingScheme(name: m_mangled.GetStringRef()); |
210 | if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) |
211 | return false; |
212 | |
213 | switch (scheme) { |
214 | case eManglingSchemeNone: |
215 | // The current mangled_name_filter would allow llvm_unreachable here. |
216 | return false; |
217 | |
218 | case eManglingSchemeItanium: |
219 | // We want the rich mangling info here, so we don't care whether or not |
220 | // there is a demangled string in the pool already. |
221 | return context.FromItaniumName(mangled: m_mangled); |
222 | |
223 | case eManglingSchemeMSVC: { |
224 | // We have no rich mangling for MSVC-mangled names yet, so first try to |
225 | // demangle it if necessary. |
226 | if (!m_demangled && !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) { |
227 | if (char *d = GetMSVCDemangledStr(M: m_mangled)) { |
228 | // Without the rich mangling info we have to demangle the full name. |
229 | // Copy it to string pool and connect the counterparts to accelerate |
230 | // later access in GetDemangledName(). |
231 | m_demangled.SetStringWithMangledCounterpart(demangled: llvm::StringRef(d), |
232 | mangled: m_mangled); |
233 | ::free(ptr: d); |
234 | } else { |
235 | m_demangled.SetCString("" ); |
236 | } |
237 | } |
238 | |
239 | if (m_demangled.IsEmpty()) { |
240 | // Cannot demangle it, so don't try parsing. |
241 | return false; |
242 | } else { |
243 | // Demangled successfully, we can try and parse it with |
244 | // CPlusPlusLanguage::MethodName. |
245 | return context.FromCxxMethodName(demangled: m_demangled); |
246 | } |
247 | } |
248 | |
249 | case eManglingSchemeRustV0: |
250 | case eManglingSchemeD: |
251 | case eManglingSchemeSwift: |
252 | // Rich demangling scheme is not supported |
253 | return false; |
254 | } |
255 | llvm_unreachable("Fully covered switch above!" ); |
256 | } |
257 | |
258 | // Generate the demangled name on demand using this accessor. Code in this |
259 | // class will need to use this accessor if it wishes to decode the demangled |
260 | // name. The result is cached and will be kept until a new string value is |
261 | // supplied to this object, or until the end of the object's lifetime. |
262 | ConstString Mangled::GetDemangledName() const { |
263 | // Check to make sure we have a valid mangled name and that we haven't |
264 | // already decoded our mangled name. |
265 | if (m_mangled && m_demangled.IsNull()) { |
266 | // Don't bother running anything that isn't mangled |
267 | const char *mangled_name = m_mangled.GetCString(); |
268 | ManglingScheme mangling_scheme = |
269 | GetManglingScheme(name: m_mangled.GetStringRef()); |
270 | if (mangling_scheme != eManglingSchemeNone && |
271 | !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) { |
272 | // We didn't already mangle this name, demangle it and if all goes well |
273 | // add it to our map. |
274 | char *demangled_name = nullptr; |
275 | switch (mangling_scheme) { |
276 | case eManglingSchemeMSVC: |
277 | demangled_name = GetMSVCDemangledStr(M: mangled_name); |
278 | break; |
279 | case eManglingSchemeItanium: { |
280 | demangled_name = GetItaniumDemangledStr(M: mangled_name); |
281 | break; |
282 | } |
283 | case eManglingSchemeRustV0: |
284 | demangled_name = GetRustV0DemangledStr(M: m_mangled); |
285 | break; |
286 | case eManglingSchemeD: |
287 | demangled_name = GetDLangDemangledStr(M: m_mangled); |
288 | break; |
289 | case eManglingSchemeSwift: |
290 | // Demangling a swift name requires the swift compiler. This is |
291 | // explicitly unsupported on llvm.org. |
292 | break; |
293 | case eManglingSchemeNone: |
294 | llvm_unreachable("eManglingSchemeNone was handled already" ); |
295 | } |
296 | if (demangled_name) { |
297 | m_demangled.SetStringWithMangledCounterpart( |
298 | demangled: llvm::StringRef(demangled_name), mangled: m_mangled); |
299 | free(ptr: demangled_name); |
300 | } |
301 | } |
302 | if (m_demangled.IsNull()) { |
303 | // Set the demangled string to the empty string to indicate we tried to |
304 | // parse it once and failed. |
305 | m_demangled.SetCString("" ); |
306 | } |
307 | } |
308 | |
309 | return m_demangled; |
310 | } |
311 | |
312 | ConstString Mangled::GetDisplayDemangledName() const { |
313 | return GetDemangledName(); |
314 | } |
315 | |
316 | bool Mangled::NameMatches(const RegularExpression ®ex) const { |
317 | if (m_mangled && regex.Execute(string: m_mangled.GetStringRef())) |
318 | return true; |
319 | |
320 | ConstString demangled = GetDemangledName(); |
321 | return demangled && regex.Execute(string: demangled.GetStringRef()); |
322 | } |
323 | |
324 | // Get the demangled name if there is one, else return the mangled name. |
325 | ConstString Mangled::GetName(Mangled::NamePreference preference) const { |
326 | if (preference == ePreferMangled && m_mangled) |
327 | return m_mangled; |
328 | |
329 | // Call the accessor to make sure we get a demangled name in case it hasn't |
330 | // been demangled yet... |
331 | ConstString demangled = GetDemangledName(); |
332 | |
333 | if (preference == ePreferDemangledWithoutArguments) { |
334 | if (Language *lang = Language::FindPlugin(language: GuessLanguage())) { |
335 | return lang->GetDemangledFunctionNameWithoutArguments(mangled: *this); |
336 | } |
337 | } |
338 | if (preference == ePreferDemangled) { |
339 | if (demangled) |
340 | return demangled; |
341 | return m_mangled; |
342 | } |
343 | return demangled; |
344 | } |
345 | |
346 | // Dump a Mangled object to stream "s". We don't force our demangled name to be |
347 | // computed currently (we don't use the accessor). |
348 | void Mangled::Dump(Stream *s) const { |
349 | if (m_mangled) { |
350 | *s << ", mangled = " << m_mangled; |
351 | } |
352 | if (m_demangled) { |
353 | const char *demangled = m_demangled.AsCString(); |
354 | s->Printf(format: ", demangled = %s" , demangled[0] ? demangled : "<error>" ); |
355 | } |
356 | } |
357 | |
358 | // Dumps a debug version of this string with extra object and state information |
359 | // to stream "s". |
360 | void Mangled::DumpDebug(Stream *s) const { |
361 | s->Printf(format: "%*p: Mangled mangled = " , static_cast<int>(sizeof(void *) * 2), |
362 | static_cast<const void *>(this)); |
363 | m_mangled.DumpDebug(s); |
364 | s->Printf(format: ", demangled = " ); |
365 | m_demangled.DumpDebug(s); |
366 | } |
367 | |
368 | // Return the size in byte that this object takes in memory. The size includes |
369 | // the size of the objects it owns, and not the strings that it references |
370 | // because they are shared strings. |
371 | size_t Mangled::MemorySize() const { |
372 | return m_mangled.MemorySize() + m_demangled.MemorySize(); |
373 | } |
374 | |
375 | // We "guess" the language because we can't determine a symbol's language from |
376 | // it's name. For example, a Pascal symbol can be mangled using the C++ |
377 | // Itanium scheme, and defined in a compilation unit within the same module as |
378 | // other C++ units. In addition, different targets could have different ways |
379 | // of mangling names from a given language, likewise the compilation units |
380 | // within those targets. |
381 | lldb::LanguageType Mangled::GuessLanguage() const { |
382 | lldb::LanguageType result = lldb::eLanguageTypeUnknown; |
383 | // Ask each language plugin to check if the mangled name belongs to it. |
384 | Language::ForEach(callback: [this, &result](Language *l) { |
385 | if (l->SymbolNameFitsToLanguage(name: *this)) { |
386 | result = l->GetLanguageType(); |
387 | return false; |
388 | } |
389 | return true; |
390 | }); |
391 | return result; |
392 | } |
393 | |
394 | // Dump OBJ to the supplied stream S. |
395 | Stream &operator<<(Stream &s, const Mangled &obj) { |
396 | if (obj.GetMangledName()) |
397 | s << "mangled = '" << obj.GetMangledName() << "'" ; |
398 | |
399 | ConstString demangled = obj.GetDemangledName(); |
400 | if (demangled) |
401 | s << ", demangled = '" << demangled << '\''; |
402 | else |
403 | s << ", demangled = <error>" ; |
404 | return s; |
405 | } |
406 | |
407 | // When encoding Mangled objects we can get away with encoding as little |
408 | // information as is required. The enumeration below helps us to efficiently |
409 | // encode Mangled objects. |
410 | enum MangledEncoding { |
411 | /// If the Mangled object has neither a mangled name or demangled name we can |
412 | /// encode the object with one zero byte using the Empty enumeration. |
413 | Empty = 0u, |
414 | /// If the Mangled object has only a demangled name and no mangled named, we |
415 | /// can encode only the demangled name. |
416 | DemangledOnly = 1u, |
417 | /// If the mangle name can calculate the demangled name (it is the |
418 | /// mangled/demangled counterpart), then we only need to encode the mangled |
419 | /// name as the demangled name can be recomputed. |
420 | MangledOnly = 2u, |
421 | /// If we have a Mangled object with two different names that are not related |
422 | /// then we need to save both strings. This can happen if we have a name that |
423 | /// isn't a true mangled name, but we want to be able to lookup a symbol by |
424 | /// name and type in the symbol table. We do this for Objective C symbols like |
425 | /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to |
426 | /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to |
427 | /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it |
428 | /// would fail, but in these cases we want these unrelated names to be |
429 | /// preserved. |
430 | MangledAndDemangled = 3u |
431 | }; |
432 | |
433 | bool Mangled::(const DataExtractor &data, lldb::offset_t *offset_ptr, |
434 | const StringTableReader &strtab) { |
435 | m_mangled.Clear(); |
436 | m_demangled.Clear(); |
437 | MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); |
438 | switch (encoding) { |
439 | case Empty: |
440 | return true; |
441 | |
442 | case DemangledOnly: |
443 | m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr))); |
444 | return true; |
445 | |
446 | case MangledOnly: |
447 | m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr))); |
448 | return true; |
449 | |
450 | case MangledAndDemangled: |
451 | m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr))); |
452 | m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr))); |
453 | return true; |
454 | } |
455 | return false; |
456 | } |
457 | /// The encoding format for the Mangled object is as follows: |
458 | /// |
459 | /// uint8_t encoding; |
460 | /// char str1[]; (only if DemangledOnly, MangledOnly) |
461 | /// char str2[]; (only if MangledAndDemangled) |
462 | /// |
463 | /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 |
464 | /// are only saved if we need them based on the encoding. |
465 | /// |
466 | /// Some mangled names have a mangled name that can be demangled by the built |
467 | /// in demanglers. These kinds of mangled objects know when the mangled and |
468 | /// demangled names are the counterparts for each other. This is done because |
469 | /// demangling is very expensive and avoiding demangling the same name twice |
470 | /// saves us a lot of compute time. For these kinds of names we only need to |
471 | /// save the mangled name and have the encoding set to "MangledOnly". |
472 | /// |
473 | /// If a mangled obejct has only a demangled name, then we save only that string |
474 | /// and have the encoding set to "DemangledOnly". |
475 | /// |
476 | /// Some mangled objects have both mangled and demangled names, but the |
477 | /// demangled name can not be computed from the mangled name. This is often used |
478 | /// for runtime named, like Objective C runtime V2 and V3 names. Both these |
479 | /// names must be saved and the encoding is set to "MangledAndDemangled". |
480 | /// |
481 | /// For a Mangled object with no names, we only need to set the encoding to |
482 | /// "Empty" and not store any string values. |
483 | void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { |
484 | MangledEncoding encoding = Empty; |
485 | if (m_mangled) { |
486 | encoding = MangledOnly; |
487 | if (m_demangled) { |
488 | // We have both mangled and demangled names. If the demangled name is the |
489 | // counterpart of the mangled name, then we only need to save the mangled |
490 | // named. If they are different, we need to save both. |
491 | ConstString s; |
492 | if (!(m_mangled.GetMangledCounterpart(counterpart&: s) && s == m_demangled)) |
493 | encoding = MangledAndDemangled; |
494 | } |
495 | } else if (m_demangled) { |
496 | encoding = DemangledOnly; |
497 | } |
498 | file.AppendU8(value: encoding); |
499 | switch (encoding) { |
500 | case Empty: |
501 | break; |
502 | case DemangledOnly: |
503 | file.AppendU32(value: strtab.Add(s: m_demangled)); |
504 | break; |
505 | case MangledOnly: |
506 | file.AppendU32(value: strtab.Add(s: m_mangled)); |
507 | break; |
508 | case MangledAndDemangled: |
509 | file.AppendU32(value: strtab.Add(s: m_mangled)); |
510 | file.AppendU32(value: strtab.Add(s: m_demangled)); |
511 | break; |
512 | } |
513 | } |
514 | |