1//===-- Mangled.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Mangled.h"
10
11#include "lldb/Core/DataFileCache.h"
12#include "lldb/Core/DemangledNameInfo.h"
13#include "lldb/Core/RichManglingContext.h"
14#include "lldb/Target/Language.h"
15#include "lldb/Utility/ConstString.h"
16#include "lldb/Utility/DataEncoder.h"
17#include "lldb/Utility/LLDBLog.h"
18#include "lldb/Utility/Log.h"
19#include "lldb/Utility/RegularExpression.h"
20#include "lldb/Utility/Stream.h"
21#include "lldb/lldb-enumerations.h"
22
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Demangle/Demangle.h"
26#include "llvm/Support/Compiler.h"
27
28#include <mutex>
29#include <string>
30#include <string_view>
31#include <utility>
32
33#include <cstdlib>
34#include <cstring>
35using namespace lldb_private;
36
37#pragma mark Mangled
38
39bool Mangled::IsMangledName(llvm::StringRef name) {
40 return Mangled::GetManglingScheme(name) != Mangled::eManglingSchemeNone;
41}
42
43Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
44 if (name.empty())
45 return Mangled::eManglingSchemeNone;
46
47 if (name.starts_with(Prefix: "?"))
48 return Mangled::eManglingSchemeMSVC;
49
50 if (name.starts_with(Prefix: "_R"))
51 return Mangled::eManglingSchemeRustV0;
52
53 if (name.starts_with(Prefix: "_D")) {
54 // A dlang mangled name begins with `_D`, followed by a numeric length. One
55 // known exception is the symbol `_Dmain`.
56 // See `SymbolName` and `LName` in
57 // https://dlang.org/spec/abi.html#name_mangling
58 llvm::StringRef buf = name.drop_front(N: 2);
59 if (!buf.empty() && (llvm::isDigit(C: buf.front()) || name == "_Dmain"))
60 return Mangled::eManglingSchemeD;
61 }
62
63 if (name.starts_with(Prefix: "_Z"))
64 return Mangled::eManglingSchemeItanium;
65
66 // ___Z is a clang extension of block invocations
67 if (name.starts_with(Prefix: "___Z"))
68 return Mangled::eManglingSchemeItanium;
69
70 // Swift's older style of mangling used "_T" as a mangling prefix. This can
71 // lead to false positives with other symbols that just so happen to start
72 // with "_T". To minimize the chance of that happening, we only return true
73 // for select old-style swift mangled names. The known cases are ObjC classes
74 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
75 // Protocols are prefixed with "_TtP".
76 if (name.starts_with(Prefix: "_TtC") || name.starts_with(Prefix: "_TtGC") ||
77 name.starts_with(Prefix: "_TtP"))
78 return Mangled::eManglingSchemeSwift;
79
80 // Swift 4.2 used "$S" and "_$S".
81 // Swift 5 and onward uses "$s" and "_$s".
82 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
83 // Embedded Swift introduced "$e" and "_$e" as Swift mangling prefixes.
84 if (name.starts_with(Prefix: "$S") || name.starts_with(Prefix: "_$S") ||
85 name.starts_with(Prefix: "$s") || name.starts_with(Prefix: "_$s") ||
86 name.starts_with(Prefix: "$e") || name.starts_with(Prefix: "_$e") ||
87 name.starts_with(Prefix: "@__swiftmacro_"))
88 return Mangled::eManglingSchemeSwift;
89
90 return Mangled::eManglingSchemeNone;
91}
92
93Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
94 if (s)
95 SetValue(s);
96}
97
98Mangled::Mangled(llvm::StringRef name) {
99 if (!name.empty())
100 SetValue(ConstString(name));
101}
102
103// Convert to bool operator. This allows code to check any Mangled objects
104// to see if they contain anything valid using code such as:
105//
106// Mangled mangled(...);
107// if (mangled)
108// { ...
109Mangled::operator bool() const { return m_mangled || m_demangled; }
110
111// Clear the mangled and demangled values.
112void Mangled::Clear() {
113 m_mangled.Clear();
114 m_demangled.Clear();
115 m_demangled_info.reset();
116}
117
118// Compare the string values.
119int Mangled::Compare(const Mangled &a, const Mangled &b) {
120 return ConstString::Compare(lhs: a.GetName(preference: ePreferMangled),
121 rhs: b.GetName(preference: ePreferMangled));
122}
123
124void Mangled::SetValue(ConstString name) {
125 if (name) {
126 if (IsMangledName(name: name.GetStringRef())) {
127 m_demangled.Clear();
128 m_mangled = name;
129 m_demangled_info.reset();
130 } else {
131 m_demangled = name;
132 m_mangled.Clear();
133 m_demangled_info.reset();
134 }
135 } else {
136 m_demangled.Clear();
137 m_mangled.Clear();
138 m_demangled_info.reset();
139 }
140}
141
142// Local helpers for different demangling implementations.
143static char *GetMSVCDemangledStr(llvm::StringRef M) {
144 char *demangled_cstr = llvm::microsoftDemangle(
145 mangled_name: M, n_read: nullptr, status: nullptr,
146 Flags: llvm::MSDemangleFlags(
147 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
148 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
149
150 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
151 if (demangled_cstr && demangled_cstr[0])
152 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
153 else
154 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
155 }
156
157 return demangled_cstr;
158}
159
160static std::pair<char *, DemangledNameInfo>
161GetItaniumDemangledStr(const char *M) {
162 char *demangled_cstr = nullptr;
163
164 DemangledNameInfo info;
165 llvm::ItaniumPartialDemangler ipd;
166 bool err = ipd.partialDemangle(MangledName: M);
167 if (!err) {
168 // Default buffer and size (OutputBuffer will realloc in case it's too
169 // small).
170 size_t demangled_size = 80;
171 demangled_cstr = static_cast<char *>(std::malloc(size: 80));
172
173 TrackingOutputBuffer OB(demangled_cstr, demangled_size);
174 demangled_cstr = ipd.finishDemangle(OB: &OB);
175 OB.NameInfo.SuffixRange.first = OB.NameInfo.QualifiersRange.second;
176 OB.NameInfo.SuffixRange.second = std::string_view(OB).size();
177 info = std::move(OB.NameInfo);
178
179 assert(demangled_cstr &&
180 "finishDemangle must always succeed if partialDemangle did");
181 assert(demangled_cstr[OB.getCurrentPosition() - 1] == '\0' &&
182 "Expected demangled_size to return length including trailing null");
183 }
184
185 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
186 if (demangled_cstr)
187 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
188 else
189 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
190
191 if (!info.hasBasename())
192 LLDB_LOGF(log,
193 "demangled itanium: %s -> error: failed to retrieve name info",
194 M);
195 }
196
197 return {demangled_cstr, std::move(info)};
198}
199
200static char *GetRustV0DemangledStr(llvm::StringRef M) {
201 char *demangled_cstr = llvm::rustDemangle(MangledName: M);
202
203 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
204 if (demangled_cstr && demangled_cstr[0])
205 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
206 else
207 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
208 static_cast<std::string_view>(M));
209 }
210
211 return demangled_cstr;
212}
213
214static char *GetDLangDemangledStr(llvm::StringRef M) {
215 char *demangled_cstr = llvm::dlangDemangle(MangledName: M);
216
217 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
218 if (demangled_cstr && demangled_cstr[0])
219 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
220 else
221 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
222 static_cast<std::string_view>(M));
223 }
224
225 return demangled_cstr;
226}
227
228// Explicit demangling for scheduled requests during batch processing. This
229// makes use of ItaniumPartialDemangler's rich demangle info
230bool Mangled::GetRichManglingInfo(RichManglingContext &context,
231 SkipMangledNameFn *skip_mangled_name) {
232 // Others are not meant to arrive here. ObjC names or C's main() for example
233 // have their names stored in m_demangled, while m_mangled is empty.
234 assert(m_mangled);
235
236 // Check whether or not we are interested in this name at all.
237 ManglingScheme scheme = GetManglingScheme(name: m_mangled.GetStringRef());
238 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
239 return false;
240
241 switch (scheme) {
242 case eManglingSchemeNone:
243 // The current mangled_name_filter would allow llvm_unreachable here.
244 return false;
245
246 case eManglingSchemeItanium:
247 // We want the rich mangling info here, so we don't care whether or not
248 // there is a demangled string in the pool already.
249 return context.FromItaniumName(mangled: m_mangled);
250
251 case eManglingSchemeMSVC: {
252 // We have no rich mangling for MSVC-mangled names yet, so first try to
253 // demangle it if necessary.
254 if (!m_demangled && !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) {
255 if (char *d = GetMSVCDemangledStr(M: m_mangled)) {
256 // Without the rich mangling info we have to demangle the full name.
257 // Copy it to string pool and connect the counterparts to accelerate
258 // later access in GetDemangledName().
259 m_demangled.SetStringWithMangledCounterpart(demangled: llvm::StringRef(d),
260 mangled: m_mangled);
261 ::free(ptr: d);
262 } else {
263 m_demangled.SetCString("");
264 }
265 }
266
267 if (m_demangled.IsEmpty()) {
268 // Cannot demangle it, so don't try parsing.
269 return false;
270 } else {
271 // Demangled successfully, we can try and parse it with
272 // CPlusPlusLanguage::CxxMethodName.
273 return context.FromCxxMethodName(demangled: m_demangled);
274 }
275 }
276
277 case eManglingSchemeRustV0:
278 case eManglingSchemeD:
279 case eManglingSchemeSwift:
280 // Rich demangling scheme is not supported
281 return false;
282 }
283 llvm_unreachable("Fully covered switch above!");
284}
285
286ConstString Mangled::GetDemangledName() const {
287 return GetDemangledNameImpl(/*force=*/false);
288}
289
290std::optional<DemangledNameInfo> const &Mangled::GetDemangledInfo() const {
291 if (!m_demangled_info)
292 GetDemangledNameImpl(/*force=*/true);
293
294 return m_demangled_info;
295}
296
297// Generate the demangled name on demand using this accessor. Code in this
298// class will need to use this accessor if it wishes to decode the demangled
299// name. The result is cached and will be kept until a new string value is
300// supplied to this object, or until the end of the object's lifetime.
301ConstString Mangled::GetDemangledNameImpl(bool force) const {
302 if (!m_mangled)
303 return m_demangled;
304
305 // Re-use previously demangled names.
306 if (!force && !m_demangled.IsNull())
307 return m_demangled;
308
309 if (!force && m_mangled.GetMangledCounterpart(counterpart&: m_demangled) &&
310 !m_demangled.IsNull())
311 return m_demangled;
312
313 // We didn't already mangle this name, demangle it and if all goes well
314 // add it to our map.
315 char *demangled_name = nullptr;
316 switch (GetManglingScheme(name: m_mangled.GetStringRef())) {
317 case eManglingSchemeMSVC:
318 demangled_name = GetMSVCDemangledStr(M: m_mangled);
319 break;
320 case eManglingSchemeItanium: {
321 std::pair<char *, DemangledNameInfo> demangled =
322 GetItaniumDemangledStr(M: m_mangled.GetCString());
323 demangled_name = demangled.first;
324 m_demangled_info.emplace(args: std::move(demangled.second));
325 break;
326 }
327 case eManglingSchemeRustV0:
328 demangled_name = GetRustV0DemangledStr(M: m_mangled);
329 break;
330 case eManglingSchemeD:
331 demangled_name = GetDLangDemangledStr(M: m_mangled);
332 break;
333 case eManglingSchemeSwift:
334 // Demangling a swift name requires the swift compiler. This is
335 // explicitly unsupported on llvm.org.
336 break;
337 case eManglingSchemeNone:
338 // Don't bother demangling anything that isn't mangled.
339 break;
340 }
341
342 if (demangled_name) {
343 m_demangled.SetStringWithMangledCounterpart(demangled: demangled_name, mangled: m_mangled);
344 free(ptr: demangled_name);
345 }
346
347 if (m_demangled.IsNull()) {
348 // Set the demangled string to the empty string to indicate we tried to
349 // parse it once and failed.
350 m_demangled.SetCString("");
351 }
352
353 return m_demangled;
354}
355
356ConstString Mangled::GetDisplayDemangledName() const {
357 if (Language *lang = Language::FindPlugin(language: GuessLanguage()))
358 return lang->GetDisplayDemangledName(mangled: *this);
359 return GetDemangledName();
360}
361
362bool Mangled::NameMatches(const RegularExpression &regex) const {
363 if (m_mangled && regex.Execute(string: m_mangled.GetStringRef()))
364 return true;
365
366 ConstString demangled = GetDemangledName();
367 return demangled && regex.Execute(string: demangled.GetStringRef());
368}
369
370// Get the demangled name if there is one, else return the mangled name.
371ConstString Mangled::GetName(Mangled::NamePreference preference) const {
372 if (preference == ePreferMangled && m_mangled)
373 return m_mangled;
374
375 // Call the accessor to make sure we get a demangled name in case it hasn't
376 // been demangled yet...
377 ConstString demangled = GetDemangledName();
378
379 if (preference == ePreferDemangledWithoutArguments) {
380 if (Language *lang = Language::FindPlugin(language: GuessLanguage())) {
381 return lang->GetDemangledFunctionNameWithoutArguments(mangled: *this);
382 }
383 }
384 if (preference == ePreferDemangled) {
385 if (demangled)
386 return demangled;
387 return m_mangled;
388 }
389 return demangled;
390}
391
392// Dump a Mangled object to stream "s". We don't force our demangled name to be
393// computed currently (we don't use the accessor).
394void Mangled::Dump(Stream *s) const {
395 if (m_mangled) {
396 *s << ", mangled = " << m_mangled;
397 }
398 if (m_demangled) {
399 const char *demangled = m_demangled.AsCString();
400 s->Printf(format: ", demangled = %s", demangled[0] ? demangled : "<error>");
401 }
402}
403
404// Dumps a debug version of this string with extra object and state information
405// to stream "s".
406void Mangled::DumpDebug(Stream *s) const {
407 s->Printf(format: "%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
408 static_cast<const void *>(this));
409 m_mangled.DumpDebug(s);
410 s->Printf(format: ", demangled = ");
411 m_demangled.DumpDebug(s);
412}
413
414// Return the size in byte that this object takes in memory. The size includes
415// the size of the objects it owns, and not the strings that it references
416// because they are shared strings.
417size_t Mangled::MemorySize() const {
418 return m_mangled.MemorySize() + m_demangled.MemorySize();
419}
420
421// We "guess" the language because we can't determine a symbol's language from
422// it's name. For example, a Pascal symbol can be mangled using the C++
423// Itanium scheme, and defined in a compilation unit within the same module as
424// other C++ units. In addition, different targets could have different ways
425// of mangling names from a given language, likewise the compilation units
426// within those targets.
427lldb::LanguageType Mangled::GuessLanguage() const {
428 lldb::LanguageType result = lldb::eLanguageTypeUnknown;
429 // Ask each language plugin to check if the mangled name belongs to it.
430 Language::ForEach(callback: [this, &result](Language *l) {
431 if (l->SymbolNameFitsToLanguage(name: *this)) {
432 result = l->GetLanguageType();
433 return false;
434 }
435 return true;
436 });
437 return result;
438}
439
440// Dump OBJ to the supplied stream S.
441Stream &operator<<(Stream &s, const Mangled &obj) {
442 if (obj.GetMangledName())
443 s << "mangled = '" << obj.GetMangledName() << "'";
444
445 ConstString demangled = obj.GetDemangledName();
446 if (demangled)
447 s << ", demangled = '" << demangled << '\'';
448 else
449 s << ", demangled = <error>";
450 return s;
451}
452
453// When encoding Mangled objects we can get away with encoding as little
454// information as is required. The enumeration below helps us to efficiently
455// encode Mangled objects.
456enum MangledEncoding {
457 /// If the Mangled object has neither a mangled name or demangled name we can
458 /// encode the object with one zero byte using the Empty enumeration.
459 Empty = 0u,
460 /// If the Mangled object has only a demangled name and no mangled named, we
461 /// can encode only the demangled name.
462 DemangledOnly = 1u,
463 /// If the mangle name can calculate the demangled name (it is the
464 /// mangled/demangled counterpart), then we only need to encode the mangled
465 /// name as the demangled name can be recomputed.
466 MangledOnly = 2u,
467 /// If we have a Mangled object with two different names that are not related
468 /// then we need to save both strings. This can happen if we have a name that
469 /// isn't a true mangled name, but we want to be able to lookup a symbol by
470 /// name and type in the symbol table. We do this for Objective C symbols like
471 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
472 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
473 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
474 /// would fail, but in these cases we want these unrelated names to be
475 /// preserved.
476 MangledAndDemangled = 3u
477};
478
479bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
480 const StringTableReader &strtab) {
481 m_mangled.Clear();
482 m_demangled.Clear();
483 m_demangled_info.reset();
484 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
485 switch (encoding) {
486 case Empty:
487 return true;
488
489 case DemangledOnly:
490 m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
491 return true;
492
493 case MangledOnly:
494 m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
495 return true;
496
497 case MangledAndDemangled:
498 m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
499 m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
500 return true;
501 }
502 return false;
503}
504/// The encoding format for the Mangled object is as follows:
505///
506/// uint8_t encoding;
507/// char str1[]; (only if DemangledOnly, MangledOnly)
508/// char str2[]; (only if MangledAndDemangled)
509///
510/// The strings are stored as NULL terminated UTF8 strings and str1 and str2
511/// are only saved if we need them based on the encoding.
512///
513/// Some mangled names have a mangled name that can be demangled by the built
514/// in demanglers. These kinds of mangled objects know when the mangled and
515/// demangled names are the counterparts for each other. This is done because
516/// demangling is very expensive and avoiding demangling the same name twice
517/// saves us a lot of compute time. For these kinds of names we only need to
518/// save the mangled name and have the encoding set to "MangledOnly".
519///
520/// If a mangled obejct has only a demangled name, then we save only that string
521/// and have the encoding set to "DemangledOnly".
522///
523/// Some mangled objects have both mangled and demangled names, but the
524/// demangled name can not be computed from the mangled name. This is often used
525/// for runtime named, like Objective C runtime V2 and V3 names. Both these
526/// names must be saved and the encoding is set to "MangledAndDemangled".
527///
528/// For a Mangled object with no names, we only need to set the encoding to
529/// "Empty" and not store any string values.
530void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
531 MangledEncoding encoding = Empty;
532 if (m_mangled) {
533 encoding = MangledOnly;
534 if (m_demangled) {
535 // We have both mangled and demangled names. If the demangled name is the
536 // counterpart of the mangled name, then we only need to save the mangled
537 // named. If they are different, we need to save both.
538 ConstString s;
539 if (!(m_mangled.GetMangledCounterpart(counterpart&: s) && s == m_demangled))
540 encoding = MangledAndDemangled;
541 }
542 } else if (m_demangled) {
543 encoding = DemangledOnly;
544 }
545 file.AppendU8(value: encoding);
546 switch (encoding) {
547 case Empty:
548 break;
549 case DemangledOnly:
550 file.AppendU32(value: strtab.Add(s: m_demangled));
551 break;
552 case MangledOnly:
553 file.AppendU32(value: strtab.Add(s: m_mangled));
554 break;
555 case MangledAndDemangled:
556 file.AppendU32(value: strtab.Add(s: m_mangled));
557 file.AppendU32(value: strtab.Add(s: m_demangled));
558 break;
559 }
560}
561

source code of lldb/source/Core/Mangled.cpp