1//===-- Mangled.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Mangled.h"
10
11#include "lldb/Core/DataFileCache.h"
12#include "lldb/Core/RichManglingContext.h"
13#include "lldb/Target/Language.h"
14#include "lldb/Utility/ConstString.h"
15#include "lldb/Utility/DataEncoder.h"
16#include "lldb/Utility/LLDBLog.h"
17#include "lldb/Utility/Log.h"
18#include "lldb/Utility/RegularExpression.h"
19#include "lldb/Utility/Stream.h"
20#include "lldb/lldb-enumerations.h"
21
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Demangle/Demangle.h"
24#include "llvm/Support/Compiler.h"
25
26#include <mutex>
27#include <string>
28#include <string_view>
29#include <utility>
30
31#include <cstdlib>
32#include <cstring>
33using namespace lldb_private;
34
35static inline bool cstring_is_mangled(llvm::StringRef s) {
36 return Mangled::GetManglingScheme(name: s) != Mangled::eManglingSchemeNone;
37}
38
39#pragma mark Mangled
40
41Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
42 if (name.empty())
43 return Mangled::eManglingSchemeNone;
44
45 if (name.starts_with(Prefix: "?"))
46 return Mangled::eManglingSchemeMSVC;
47
48 if (name.starts_with(Prefix: "_R"))
49 return Mangled::eManglingSchemeRustV0;
50
51 if (name.starts_with(Prefix: "_D"))
52 return Mangled::eManglingSchemeD;
53
54 if (name.starts_with(Prefix: "_Z"))
55 return Mangled::eManglingSchemeItanium;
56
57 // ___Z is a clang extension of block invocations
58 if (name.starts_with(Prefix: "___Z"))
59 return Mangled::eManglingSchemeItanium;
60
61 // Swift's older style of mangling used "_T" as a mangling prefix. This can
62 // lead to false positives with other symbols that just so happen to start
63 // with "_T". To minimize the chance of that happening, we only return true
64 // for select old-style swift mangled names. The known cases are ObjC classes
65 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
66 // Protocols are prefixed with "_TtP".
67 if (name.starts_with(Prefix: "_TtC") || name.starts_with(Prefix: "_TtGC") ||
68 name.starts_with(Prefix: "_TtP"))
69 return Mangled::eManglingSchemeSwift;
70
71 // Swift 4.2 used "$S" and "_$S".
72 // Swift 5 and onward uses "$s" and "_$s".
73 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
74 if (name.starts_with(Prefix: "$S") || name.starts_with(Prefix: "_$S") ||
75 name.starts_with(Prefix: "$s") || name.starts_with(Prefix: "_$s") ||
76 name.starts_with(Prefix: "@__swiftmacro_"))
77 return Mangled::eManglingSchemeSwift;
78
79 return Mangled::eManglingSchemeNone;
80}
81
82Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
83 if (s)
84 SetValue(s);
85}
86
87Mangled::Mangled(llvm::StringRef name) {
88 if (!name.empty())
89 SetValue(ConstString(name));
90}
91
92// Convert to bool operator. This allows code to check any Mangled objects
93// to see if they contain anything valid using code such as:
94//
95// Mangled mangled(...);
96// if (mangled)
97// { ...
98Mangled::operator bool() const { return m_mangled || m_demangled; }
99
100// Clear the mangled and demangled values.
101void Mangled::Clear() {
102 m_mangled.Clear();
103 m_demangled.Clear();
104}
105
106// Compare the string values.
107int Mangled::Compare(const Mangled &a, const Mangled &b) {
108 return ConstString::Compare(lhs: a.GetName(preference: ePreferMangled),
109 rhs: b.GetName(preference: ePreferMangled));
110}
111
112void Mangled::SetValue(ConstString name) {
113 if (name) {
114 if (cstring_is_mangled(s: name.GetStringRef())) {
115 m_demangled.Clear();
116 m_mangled = name;
117 } else {
118 m_demangled = name;
119 m_mangled.Clear();
120 }
121 } else {
122 m_demangled.Clear();
123 m_mangled.Clear();
124 }
125}
126
127// Local helpers for different demangling implementations.
128static char *GetMSVCDemangledStr(std::string_view M) {
129 char *demangled_cstr = llvm::microsoftDemangle(
130 mangled_name: M, n_read: nullptr, status: nullptr,
131 Flags: llvm::MSDemangleFlags(
132 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
133 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
134
135 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
136 if (demangled_cstr && demangled_cstr[0])
137 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
138 else
139 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
140 }
141
142 return demangled_cstr;
143}
144
145static char *GetItaniumDemangledStr(const char *M) {
146 char *demangled_cstr = nullptr;
147
148 llvm::ItaniumPartialDemangler ipd;
149 bool err = ipd.partialDemangle(MangledName: M);
150 if (!err) {
151 // Default buffer and size (will realloc in case it's too small).
152 size_t demangled_size = 80;
153 demangled_cstr = static_cast<char *>(std::malloc(size: demangled_size));
154 demangled_cstr = ipd.finishDemangle(Buf: demangled_cstr, N: &demangled_size);
155
156 assert(demangled_cstr &&
157 "finishDemangle must always succeed if partialDemangle did");
158 assert(demangled_cstr[demangled_size - 1] == '\0' &&
159 "Expected demangled_size to return length including trailing null");
160 }
161
162 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
163 if (demangled_cstr)
164 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
165 else
166 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
167 }
168
169 return demangled_cstr;
170}
171
172static char *GetRustV0DemangledStr(std::string_view M) {
173 char *demangled_cstr = llvm::rustDemangle(MangledName: M);
174
175 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
176 if (demangled_cstr && demangled_cstr[0])
177 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
178 else
179 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M);
180 }
181
182 return demangled_cstr;
183}
184
185static char *GetDLangDemangledStr(std::string_view M) {
186 char *demangled_cstr = llvm::dlangDemangle(MangledName: M);
187
188 if (Log *log = GetLog(mask: LLDBLog::Demangle)) {
189 if (demangled_cstr && demangled_cstr[0])
190 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
191 else
192 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
193 }
194
195 return demangled_cstr;
196}
197
198// Explicit demangling for scheduled requests during batch processing. This
199// makes use of ItaniumPartialDemangler's rich demangle info
200bool Mangled::GetRichManglingInfo(RichManglingContext &context,
201 SkipMangledNameFn *skip_mangled_name) {
202 // Others are not meant to arrive here. ObjC names or C's main() for example
203 // have their names stored in m_demangled, while m_mangled is empty.
204 assert(m_mangled);
205
206 // Check whether or not we are interested in this name at all.
207 ManglingScheme scheme = GetManglingScheme(name: m_mangled.GetStringRef());
208 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
209 return false;
210
211 switch (scheme) {
212 case eManglingSchemeNone:
213 // The current mangled_name_filter would allow llvm_unreachable here.
214 return false;
215
216 case eManglingSchemeItanium:
217 // We want the rich mangling info here, so we don't care whether or not
218 // there is a demangled string in the pool already.
219 return context.FromItaniumName(mangled: m_mangled);
220
221 case eManglingSchemeMSVC: {
222 // We have no rich mangling for MSVC-mangled names yet, so first try to
223 // demangle it if necessary.
224 if (!m_demangled && !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) {
225 if (char *d = GetMSVCDemangledStr(M: m_mangled)) {
226 // Without the rich mangling info we have to demangle the full name.
227 // Copy it to string pool and connect the counterparts to accelerate
228 // later access in GetDemangledName().
229 m_demangled.SetStringWithMangledCounterpart(demangled: llvm::StringRef(d),
230 mangled: m_mangled);
231 ::free(ptr: d);
232 } else {
233 m_demangled.SetCString("");
234 }
235 }
236
237 if (m_demangled.IsEmpty()) {
238 // Cannot demangle it, so don't try parsing.
239 return false;
240 } else {
241 // Demangled successfully, we can try and parse it with
242 // CPlusPlusLanguage::MethodName.
243 return context.FromCxxMethodName(demangled: m_demangled);
244 }
245 }
246
247 case eManglingSchemeRustV0:
248 case eManglingSchemeD:
249 case eManglingSchemeSwift:
250 // Rich demangling scheme is not supported
251 return false;
252 }
253 llvm_unreachable("Fully covered switch above!");
254}
255
256// Generate the demangled name on demand using this accessor. Code in this
257// class will need to use this accessor if it wishes to decode the demangled
258// name. The result is cached and will be kept until a new string value is
259// supplied to this object, or until the end of the object's lifetime.
260ConstString Mangled::GetDemangledName() const {
261 // Check to make sure we have a valid mangled name and that we haven't
262 // already decoded our mangled name.
263 if (m_mangled && m_demangled.IsNull()) {
264 // Don't bother running anything that isn't mangled
265 const char *mangled_name = m_mangled.GetCString();
266 ManglingScheme mangling_scheme =
267 GetManglingScheme(name: m_mangled.GetStringRef());
268 if (mangling_scheme != eManglingSchemeNone &&
269 !m_mangled.GetMangledCounterpart(counterpart&: m_demangled)) {
270 // We didn't already mangle this name, demangle it and if all goes well
271 // add it to our map.
272 char *demangled_name = nullptr;
273 switch (mangling_scheme) {
274 case eManglingSchemeMSVC:
275 demangled_name = GetMSVCDemangledStr(M: mangled_name);
276 break;
277 case eManglingSchemeItanium: {
278 demangled_name = GetItaniumDemangledStr(M: mangled_name);
279 break;
280 }
281 case eManglingSchemeRustV0:
282 demangled_name = GetRustV0DemangledStr(M: m_mangled);
283 break;
284 case eManglingSchemeD:
285 demangled_name = GetDLangDemangledStr(M: m_mangled);
286 break;
287 case eManglingSchemeSwift:
288 // Demangling a swift name requires the swift compiler. This is
289 // explicitly unsupported on llvm.org.
290 break;
291 case eManglingSchemeNone:
292 llvm_unreachable("eManglingSchemeNone was handled already");
293 }
294 if (demangled_name) {
295 m_demangled.SetStringWithMangledCounterpart(
296 demangled: llvm::StringRef(demangled_name), mangled: m_mangled);
297 free(ptr: demangled_name);
298 }
299 }
300 if (m_demangled.IsNull()) {
301 // Set the demangled string to the empty string to indicate we tried to
302 // parse it once and failed.
303 m_demangled.SetCString("");
304 }
305 }
306
307 return m_demangled;
308}
309
310ConstString Mangled::GetDisplayDemangledName() const {
311 return GetDemangledName();
312}
313
314bool Mangled::NameMatches(const RegularExpression &regex) const {
315 if (m_mangled && regex.Execute(string: m_mangled.GetStringRef()))
316 return true;
317
318 ConstString demangled = GetDemangledName();
319 return demangled && regex.Execute(string: demangled.GetStringRef());
320}
321
322// Get the demangled name if there is one, else return the mangled name.
323ConstString Mangled::GetName(Mangled::NamePreference preference) const {
324 if (preference == ePreferMangled && m_mangled)
325 return m_mangled;
326
327 // Call the accessor to make sure we get a demangled name in case it hasn't
328 // been demangled yet...
329 ConstString demangled = GetDemangledName();
330
331 if (preference == ePreferDemangledWithoutArguments) {
332 if (Language *lang = Language::FindPlugin(language: GuessLanguage())) {
333 return lang->GetDemangledFunctionNameWithoutArguments(mangled: *this);
334 }
335 }
336 if (preference == ePreferDemangled) {
337 if (demangled)
338 return demangled;
339 return m_mangled;
340 }
341 return demangled;
342}
343
344// Dump a Mangled object to stream "s". We don't force our demangled name to be
345// computed currently (we don't use the accessor).
346void Mangled::Dump(Stream *s) const {
347 if (m_mangled) {
348 *s << ", mangled = " << m_mangled;
349 }
350 if (m_demangled) {
351 const char *demangled = m_demangled.AsCString();
352 s->Printf(format: ", demangled = %s", demangled[0] ? demangled : "<error>");
353 }
354}
355
356// Dumps a debug version of this string with extra object and state information
357// to stream "s".
358void Mangled::DumpDebug(Stream *s) const {
359 s->Printf(format: "%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
360 static_cast<const void *>(this));
361 m_mangled.DumpDebug(s);
362 s->Printf(format: ", demangled = ");
363 m_demangled.DumpDebug(s);
364}
365
366// Return the size in byte that this object takes in memory. The size includes
367// the size of the objects it owns, and not the strings that it references
368// because they are shared strings.
369size_t Mangled::MemorySize() const {
370 return m_mangled.MemorySize() + m_demangled.MemorySize();
371}
372
373// We "guess" the language because we can't determine a symbol's language from
374// it's name. For example, a Pascal symbol can be mangled using the C++
375// Itanium scheme, and defined in a compilation unit within the same module as
376// other C++ units. In addition, different targets could have different ways
377// of mangling names from a given language, likewise the compilation units
378// within those targets.
379lldb::LanguageType Mangled::GuessLanguage() const {
380 lldb::LanguageType result = lldb::eLanguageTypeUnknown;
381 // Ask each language plugin to check if the mangled name belongs to it.
382 Language::ForEach(callback: [this, &result](Language *l) {
383 if (l->SymbolNameFitsToLanguage(name: *this)) {
384 result = l->GetLanguageType();
385 return false;
386 }
387 return true;
388 });
389 return result;
390}
391
392// Dump OBJ to the supplied stream S.
393Stream &operator<<(Stream &s, const Mangled &obj) {
394 if (obj.GetMangledName())
395 s << "mangled = '" << obj.GetMangledName() << "'";
396
397 ConstString demangled = obj.GetDemangledName();
398 if (demangled)
399 s << ", demangled = '" << demangled << '\'';
400 else
401 s << ", demangled = <error>";
402 return s;
403}
404
405// When encoding Mangled objects we can get away with encoding as little
406// information as is required. The enumeration below helps us to efficiently
407// encode Mangled objects.
408enum MangledEncoding {
409 /// If the Mangled object has neither a mangled name or demangled name we can
410 /// encode the object with one zero byte using the Empty enumeration.
411 Empty = 0u,
412 /// If the Mangled object has only a demangled name and no mangled named, we
413 /// can encode only the demangled name.
414 DemangledOnly = 1u,
415 /// If the mangle name can calculate the demangled name (it is the
416 /// mangled/demangled counterpart), then we only need to encode the mangled
417 /// name as the demangled name can be recomputed.
418 MangledOnly = 2u,
419 /// If we have a Mangled object with two different names that are not related
420 /// then we need to save both strings. This can happen if we have a name that
421 /// isn't a true mangled name, but we want to be able to lookup a symbol by
422 /// name and type in the symbol table. We do this for Objective C symbols like
423 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
424 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
425 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
426 /// would fail, but in these cases we want these unrelated names to be
427 /// preserved.
428 MangledAndDemangled = 3u
429};
430
431bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
432 const StringTableReader &strtab) {
433 m_mangled.Clear();
434 m_demangled.Clear();
435 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
436 switch (encoding) {
437 case Empty:
438 return true;
439
440 case DemangledOnly:
441 m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
442 return true;
443
444 case MangledOnly:
445 m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
446 return true;
447
448 case MangledAndDemangled:
449 m_mangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
450 m_demangled.SetString(strtab.Get(offset: data.GetU32(offset_ptr)));
451 return true;
452 }
453 return false;
454}
455/// The encoding format for the Mangled object is as follows:
456///
457/// uint8_t encoding;
458/// char str1[]; (only if DemangledOnly, MangledOnly)
459/// char str2[]; (only if MangledAndDemangled)
460///
461/// The strings are stored as NULL terminated UTF8 strings and str1 and str2
462/// are only saved if we need them based on the encoding.
463///
464/// Some mangled names have a mangled name that can be demangled by the built
465/// in demanglers. These kinds of mangled objects know when the mangled and
466/// demangled names are the counterparts for each other. This is done because
467/// demangling is very expensive and avoiding demangling the same name twice
468/// saves us a lot of compute time. For these kinds of names we only need to
469/// save the mangled name and have the encoding set to "MangledOnly".
470///
471/// If a mangled obejct has only a demangled name, then we save only that string
472/// and have the encoding set to "DemangledOnly".
473///
474/// Some mangled objects have both mangled and demangled names, but the
475/// demangled name can not be computed from the mangled name. This is often used
476/// for runtime named, like Objective C runtime V2 and V3 names. Both these
477/// names must be saved and the encoding is set to "MangledAndDemangled".
478///
479/// For a Mangled object with no names, we only need to set the encoding to
480/// "Empty" and not store any string values.
481void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
482 MangledEncoding encoding = Empty;
483 if (m_mangled) {
484 encoding = MangledOnly;
485 if (m_demangled) {
486 // We have both mangled and demangled names. If the demangled name is the
487 // counterpart of the mangled name, then we only need to save the mangled
488 // named. If they are different, we need to save both.
489 ConstString s;
490 if (!(m_mangled.GetMangledCounterpart(counterpart&: s) && s == m_demangled))
491 encoding = MangledAndDemangled;
492 }
493 } else if (m_demangled) {
494 encoding = DemangledOnly;
495 }
496 file.AppendU8(value: encoding);
497 switch (encoding) {
498 case Empty:
499 break;
500 case DemangledOnly:
501 file.AppendU32(value: strtab.Add(s: m_demangled));
502 break;
503 case MangledOnly:
504 file.AppendU32(value: strtab.Add(s: m_mangled));
505 break;
506 case MangledAndDemangled:
507 file.AppendU32(value: strtab.Add(s: m_mangled));
508 file.AppendU32(value: strtab.Add(s: m_demangled));
509 break;
510 }
511}
512

source code of lldb/source/Core/Mangled.cpp