1//===-- Mangled.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Mangled.h"
10
11#include "lldb/Core/DataFileCache.h"
12#include "lldb/Core/RichManglingContext.h"
13#include "lldb/Target/Language.h"
14#include "lldb/Utility/ConstString.h"
15#include "lldb/Utility/DataEncoder.h"
16#include "lldb/Utility/LLDBLog.h"
17#include "lldb/Utility/Log.h"
18#include "lldb/Utility/RegularExpression.h"
19#include "lldb/Utility/Stream.h"
20#include "lldb/lldb-enumerations.h"
21
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Demangle/Demangle.h"
24#include "llvm/Support/Compiler.h"
25
26#include <mutex>
27#include <string>
28#include <string_view>
29#include <utility>
30
31#include <cstdlib>
32#include <cstring>
33using namespace lldb_private;
34
35static inline bool cstring_is_mangled(llvm::StringRef s) {
36 return Mangled::GetManglingScheme(name: s) != Mangled::eManglingSchemeNone;
37}
38
39#pragma mark Mangled
40
41Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
42 if (name.empty())
43 return Mangled::eManglingSchemeNone;
44
45 if (name.starts_with(Prefix: "?"))
46 return Mangled::eManglingSchemeMSVC;
47
48 if (name.starts_with(Prefix: "_R"))
49 return Mangled::eManglingSchemeRustV0;
50
51 if (name.starts_with(Prefix: "_D"))
52 return Mangled::eManglingSchemeD;
53
54 if (name.starts_with(Prefix: "_Z"))
55 return Mangled::eManglingSchemeItanium;
56
57 // ___Z is a clang extension of block invocations
58 if (name.starts_with(Prefix: "___Z"))
59 return Mangled::eManglingSchemeItanium;
60
61 // Swift's older style of mangling used "_T" as a mangling prefix. This can
62 // lead to false positives with other symbols that just so happen to start
63 // with "_T". To minimize the chance of that happening, we only return true
64 // for select old-style swift mangled names. The known cases are ObjC classes
65 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
66 // Protocols are prefixed with "_TtP".
67 if (name.starts_with(Prefix: "_TtC") || name.starts_with(Prefix: "_TtGC") ||
68 name.starts_with(Prefix: "_TtP"))
69 return Mangled::eManglingSchemeSwift;
70
71 // Swift 4.2 used "$S" and "_$S".
72 // Swift 5 and onward uses "$s" and "_$s".
73 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
74 if (name.starts_with(Prefix: "$S") || name.starts_with(Prefix: "_$S") ||
75 name.starts_with(Prefix: "$s") || name.starts_with(Prefix: "_$s") ||
76 name.starts_with(Prefix: "@__swiftmacro_"))
77 return Mangled::eManglingSchemeSwift;
78
79 return Mangled::eManglingSchemeNone;
80}
81
82Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
83 if (s)
84 SetValue(s);
85}
86
87Mangled::Mangled(llvm::StringRef name) {
88 if (!name.empty())
89 SetValue(ConstString(name));
90}
91
92// Convert to bool operator. This allows code to check any Mangled objects
93// to see if they contain anything valid using code such as:
94//
95// Mangled mangled(...);
96// if (mangled)
97// { ...
98Mangled::operator bool() const { return m_mangled || m_demangled; }
99
100// Clear the mangled and demangled values.
101void Mangled::Clear() {
102 m_mangled.Clear();
103 m_demangled.Clear();
104}
105
106// Compare the string values.
107int Mangled::Compare(const Mangled &a, const Mangled &b) {
108 return ConstString::Compare(lhs: a.GetName(preference: ePreferMangled),
109 rhs: b.GetName(preference: ePreferMangled));
110}
111
112void Mangled::SetValue(ConstString name) {
113 if (name) {
114 if (cstring_is_mangled(s: name.GetStringRef())) {
115 m_demangled.Clear();
116 m_mangled = name;
117 } else {
118 m_demangled = name;
119 m_mangled.Clear();
120 }
121 } else {
122 m_demangled.Clear();
123 m_mangled.Clear();
124 }
125}
126
127// Local helpers for different demangling implementations.
128static char *GetMSVCDemangledStr(llvm::StringRef M) {
129 char *demangled_cstr = llvm::microsoftDemangle(
130 mangled_name: M, n_read: nullptr, status: nullptr,
131 Flags: llvm::MSDemangleFlags(
132 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
133 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
134
135 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
136 if (demangled_cstr && demangled_cstr[0])
137 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
138 else
139 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
140 }
141
142 return demangled_cstr;
143}
144
145static char *GetItaniumDemangledStr(const char *M) {
146 char *demangled_cstr = nullptr;
147
148 llvm::ItaniumPartialDemangler ipd;
149 bool err = ipd.partialDemangle(MangledName: M);
150 if (!err) {
151 // Default buffer and size (will realloc in case it's too small).
152 size_t demangled_size = 80;
153 demangled_cstr = static_cast<char *>(std::malloc(size: demangled_size));
154 demangled_cstr = ipd.finishDemangle(Buf: demangled_cstr, N: &demangled_size);
155
156 assert(demangled_cstr &&
157 "finishDemangle must always succeed if partialDemangle did");
158 assert(demangled_cstr[demangled_size - 1] == '\0' &&
159 "Expected demangled_size to return length including trailing null");
160 }
161
162 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
163 if (demangled_cstr)
164 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
165 else
166 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
167 }
168
169 return demangled_cstr;
170}
171
172static char *GetRustV0DemangledStr(llvm::StringRef M) {
173 char *demangled_cstr = llvm::rustDemangle(MangledName: M);
174
175 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
176 if (demangled_cstr && demangled_cstr[0])
177 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
178 else
179 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
180 static_cast<std::string_view>(M));
181 }
182
183 return demangled_cstr;
184}
185
186static char *GetDLangDemangledStr(llvm::StringRef M) {
187 char *demangled_cstr = llvm::dlangDemangle(MangledName: M);
188
189 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
190 if (demangled_cstr && demangled_cstr[0])
191 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
192 else
193 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
194 static_cast<std::string_view>(M));
195 }
196
197 return demangled_cstr;
198}
199
200// Explicit demangling for scheduled requests during batch processing. This
201// makes use of ItaniumPartialDemangler's rich demangle info
202bool Mangled::GetRichManglingInfo(RichManglingContext &context,
203 SkipMangledNameFn *skip_mangled_name) {
204 // Others are not meant to arrive here. ObjC names or C's main() for example
205 // have their names stored in m_demangled, while m_mangled is empty.
206 assert(m_mangled);
207
208 // Check whether or not we are interested in this name at all.
209 ManglingScheme scheme = GetManglingScheme(name: m_mangled.GetStringRef());
210 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
211 return false;
212
213 switch (scheme) {
214 case eManglingSchemeNone:
215 // The current mangled_name_filter would allow llvm_unreachable here.
216 return false;
217
218 case eManglingSchemeItanium:
219 // We want the rich mangling info here, so we don't care whether or not
220 // there is a demangled string in the pool already.
221 return context.FromItaniumName(mangled: m_mangled);
222
223 case eManglingSchemeMSVC: {
224 // We have no rich mangling for MSVC-mangled names yet, so first try to
225 // demangle it if necessary.
226 if (!m_demangled && !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) {
227 if (char *d = GetMSVCDemangledStr(M: m_mangled)) {
228 // Without the rich mangling info we have to demangle the full name.
229 // Copy it to string pool and connect the counterparts to accelerate
230 // later access in GetDemangledName().
231 m_demangled.SetStringWithMangledCounterpart(demangled: llvm::StringRef(d),
232 mangled: m_mangled);
233 ::free(ptr: d);
234 } else {
235 m_demangled.SetCString("");
236 }
237 }
238
239 if (m_demangled.IsEmpty()) {
240 // Cannot demangle it, so don't try parsing.
241 return false;
242 } else {
243 // Demangled successfully, we can try and parse it with
244 // CPlusPlusLanguage::MethodName.
245 return context.FromCxxMethodName(demangled: m_demangled);
246 }
247 }
248
249 case eManglingSchemeRustV0:
250 case eManglingSchemeD:
251 case eManglingSchemeSwift:
252 // Rich demangling scheme is not supported
253 return false;
254 }
255 llvm_unreachable("Fully covered switch above!");
256}
257
258// Generate the demangled name on demand using this accessor. Code in this
259// class will need to use this accessor if it wishes to decode the demangled
260// name. The result is cached and will be kept until a new string value is
261// supplied to this object, or until the end of the object's lifetime.
262ConstString Mangled::GetDemangledName() const {
263 // Check to make sure we have a valid mangled name and that we haven't
264 // already decoded our mangled name.
265 if (m_mangled && m_demangled.IsNull()) {
266 // Don't bother running anything that isn't mangled
267 const char *mangled_name = m_mangled.GetCString();
268 ManglingScheme mangling_scheme =
269 GetManglingScheme(name: m_mangled.GetStringRef());
270 if (mangling_scheme != eManglingSchemeNone &&
271 !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) {
272 // We didn't already mangle this name, demangle it and if all goes well
273 // add it to our map.
274 char *demangled_name = nullptr;
275 switch (mangling_scheme) {
276 case eManglingSchemeMSVC:
277 demangled_name = GetMSVCDemangledStr(M: mangled_name);
278 break;
279 case eManglingSchemeItanium: {
280 demangled_name = GetItaniumDemangledStr(M: mangled_name);
281 break;
282 }
283 case eManglingSchemeRustV0:
284 demangled_name = GetRustV0DemangledStr(M: m_mangled);
285 break;
286 case eManglingSchemeD:
287 demangled_name = GetDLangDemangledStr(M: m_mangled);
288 break;
289 case eManglingSchemeSwift:
290 // Demangling a swift name requires the swift compiler. This is
291 // explicitly unsupported on llvm.org.
292 break;
293 case eManglingSchemeNone:
294 llvm_unreachable("eManglingSchemeNone was handled already");
295 }
296 if (demangled_name) {
297 m_demangled.SetStringWithMangledCounterpart(
298 demangled: llvm::StringRef(demangled_name), mangled: m_mangled);
299 free(ptr: demangled_name);
300 }
301 }
302 if (m_demangled.IsNull()) {
303 // Set the demangled string to the empty string to indicate we tried to
304 // parse it once and failed.
305 m_demangled.SetCString("");
306 }
307 }
308
309 return m_demangled;
310}
311
312ConstString Mangled::GetDisplayDemangledName() const {
313 return GetDemangledName();
314}
315
316bool Mangled::NameMatches(const RegularExpression &regex) const {
317 if (m_mangled && regex.Execute(string: m_mangled.GetStringRef()))
318 return true;
319
320 ConstString demangled = GetDemangledName();
321 return demangled && regex.Execute(string: demangled.GetStringRef());
322}
323
324// Get the demangled name if there is one, else return the mangled name.
325ConstString Mangled::GetName(Mangled::NamePreference preference) const {
326 if (preference == ePreferMangled && m_mangled)
327 return m_mangled;
328
329 // Call the accessor to make sure we get a demangled name in case it hasn't
330 // been demangled yet...
331 ConstString demangled = GetDemangledName();
332
333 if (preference == ePreferDemangledWithoutArguments) {
334 if (Language *lang = Language::FindPlugin(language: GuessLanguage())) {
335 return lang->GetDemangledFunctionNameWithoutArguments(mangled: *this);
336 }
337 }
338 if (preference == ePreferDemangled) {
339 if (demangled)
340 return demangled;
341 return m_mangled;
342 }
343 return demangled;
344}
345
346// Dump a Mangled object to stream "s". We don't force our demangled name to be
347// computed currently (we don't use the accessor).
348void Mangled::Dump(Stream *s) const {
349 if (m_mangled) {
350 *s << ", mangled = " << m_mangled;
351 }
352 if (m_demangled) {
353 const char *demangled = m_demangled.AsCString();
354 s->Printf(format: ", demangled = %s", demangled[0] ? demangled : "<error>");
355 }
356}
357
358// Dumps a debug version of this string with extra object and state information
359// to stream "s".
360void Mangled::DumpDebug(Stream *s) const {
361 s->Printf(format: "%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
362 static_cast<const void *>(this));
363 m_mangled.DumpDebug(s);
364 s->Printf(format: ", demangled = ");
365 m_demangled.DumpDebug(s);
366}
367
368// Return the size in byte that this object takes in memory. The size includes
369// the size of the objects it owns, and not the strings that it references
370// because they are shared strings.
371size_t Mangled::MemorySize() const {
372 return m_mangled.MemorySize() + m_demangled.MemorySize();
373}
374
375// We "guess" the language because we can't determine a symbol's language from
376// it's name. For example, a Pascal symbol can be mangled using the C++
377// Itanium scheme, and defined in a compilation unit within the same module as
378// other C++ units. In addition, different targets could have different ways
379// of mangling names from a given language, likewise the compilation units
380// within those targets.
381lldb::LanguageType Mangled::GuessLanguage() const {
382 lldb::LanguageType result = lldb::eLanguageTypeUnknown;
383 // Ask each language plugin to check if the mangled name belongs to it.
384 Language::ForEach(callback: [this, &result](Language *l) {
385 if (l->SymbolNameFitsToLanguage(name: *this)) {
386 result = l->GetLanguageType();
387 return false;
388 }
389 return true;
390 });
391 return result;
392}
393
394// Dump OBJ to the supplied stream S.
395Stream &operator<<(Stream &s, const Mangled &obj) {
396 if (obj.GetMangledName())
397 s << "mangled = '" << obj.GetMangledName() << "'";
398
399 ConstString demangled = obj.GetDemangledName();
400 if (demangled)
401 s << ", demangled = '" << demangled << '\'';
402 else
403 s << ", demangled = <error>";
404 return s;
405}
406
407// When encoding Mangled objects we can get away with encoding as little
408// information as is required. The enumeration below helps us to efficiently
409// encode Mangled objects.
410enum MangledEncoding {
411 /// If the Mangled object has neither a mangled name or demangled name we can
412 /// encode the object with one zero byte using the Empty enumeration.
413 Empty = 0u,
414 /// If the Mangled object has only a demangled name and no mangled named, we
415 /// can encode only the demangled name.
416 DemangledOnly = 1u,
417 /// If the mangle name can calculate the demangled name (it is the
418 /// mangled/demangled counterpart), then we only need to encode the mangled
419 /// name as the demangled name can be recomputed.
420 MangledOnly = 2u,
421 /// If we have a Mangled object with two different names that are not related
422 /// then we need to save both strings. This can happen if we have a name that
423 /// isn't a true mangled name, but we want to be able to lookup a symbol by
424 /// name and type in the symbol table. We do this for Objective C symbols like
425 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
426 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
427 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
428 /// would fail, but in these cases we want these unrelated names to be
429 /// preserved.
430 MangledAndDemangled = 3u
431};
432
433bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
434 const StringTableReader &strtab) {
435 m_mangled.Clear();
436 m_demangled.Clear();
437 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
438 switch (encoding) {
439 case Empty:
440 return true;
441
442 case DemangledOnly:
443 m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
444 return true;
445
446 case MangledOnly:
447 m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
448 return true;
449
450 case MangledAndDemangled:
451 m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
452 m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
453 return true;
454 }
455 return false;
456}
457/// The encoding format for the Mangled object is as follows:
458///
459/// uint8_t encoding;
460/// char str1[]; (only if DemangledOnly, MangledOnly)
461/// char str2[]; (only if MangledAndDemangled)
462///
463/// The strings are stored as NULL terminated UTF8 strings and str1 and str2
464/// are only saved if we need them based on the encoding.
465///
466/// Some mangled names have a mangled name that can be demangled by the built
467/// in demanglers. These kinds of mangled objects know when the mangled and
468/// demangled names are the counterparts for each other. This is done because
469/// demangling is very expensive and avoiding demangling the same name twice
470/// saves us a lot of compute time. For these kinds of names we only need to
471/// save the mangled name and have the encoding set to "MangledOnly".
472///
473/// If a mangled obejct has only a demangled name, then we save only that string
474/// and have the encoding set to "DemangledOnly".
475///
476/// Some mangled objects have both mangled and demangled names, but the
477/// demangled name can not be computed from the mangled name. This is often used
478/// for runtime named, like Objective C runtime V2 and V3 names. Both these
479/// names must be saved and the encoding is set to "MangledAndDemangled".
480///
481/// For a Mangled object with no names, we only need to set the encoding to
482/// "Empty" and not store any string values.
483void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
484 MangledEncoding encoding = Empty;
485 if (m_mangled) {
486 encoding = MangledOnly;
487 if (m_demangled) {
488 // We have both mangled and demangled names. If the demangled name is the
489 // counterpart of the mangled name, then we only need to save the mangled
490 // named. If they are different, we need to save both.
491 ConstString s;
492 if (!(m_mangled.GetMangledCounterpart(counterpart&: s) && s == m_demangled))
493 encoding = MangledAndDemangled;
494 }
495 } else if (m_demangled) {
496 encoding = DemangledOnly;
497 }
498 file.AppendU8(value: encoding);
499 switch (encoding) {
500 case Empty:
501 break;
502 case DemangledOnly:
503 file.AppendU32(value: strtab.Add(s: m_demangled));
504 break;
505 case MangledOnly:
506 file.AppendU32(value: strtab.Add(s: m_mangled));
507 break;
508 case MangledAndDemangled:
509 file.AppendU32(value: strtab.Add(s: m_mangled));
510 file.AppendU32(value: strtab.Add(s: m_demangled));
511 break;
512 }
513}
514

source code of lldb/source/Core/Mangled.cpp