1//===-- Mangled.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Mangled.h"
10
11#include "lldb/Core/DataFileCache.h"
12#include "lldb/Core/RichManglingContext.h"
13#include "lldb/Target/Language.h"
14#include "lldb/Utility/ConstString.h"
15#include "lldb/Utility/DataEncoder.h"
16#include "lldb/Utility/LLDBLog.h"
17#include "lldb/Utility/Log.h"
18#include "lldb/Utility/RegularExpression.h"
19#include "lldb/Utility/Stream.h"
20#include "lldb/lldb-enumerations.h"
21
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Demangle/Demangle.h"
24#include "llvm/Support/Compiler.h"
25
26#include <mutex>
27#include <string>
28#include <utility>
29
30#include <cstdlib>
31#include <cstring>
32using namespace lldb_private;
33
34static inline bool cstring_is_mangled(llvm::StringRef s) {
35 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
36}
37
38#pragma mark Mangled
39
40Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
41 if (name.empty())
42 return Mangled::eManglingSchemeNone;
43
44 if (name.startswith("?"))
45 return Mangled::eManglingSchemeMSVC;
46
47 if (name.startswith("_R"))
48 return Mangled::eManglingSchemeRustV0;
49
50 if (name.startswith("_D"))
51 return Mangled::eManglingSchemeD;
52
53 if (name.startswith("_Z"))
54 return Mangled::eManglingSchemeItanium;
55
56 // ___Z is a clang extension of block invocations
57 if (name.startswith("___Z"))
58 return Mangled::eManglingSchemeItanium;
59
60 return Mangled::eManglingSchemeNone;
61}
62
63Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
64 if (s)
65 SetValue(s);
66}
67
68Mangled::Mangled(llvm::StringRef name) {
69 if (!name.empty())
70 SetValue(ConstString(name));
71}
72
73// Convert to bool operator. This allows code to check any Mangled objects
74// to see if they contain anything valid using code such as:
75//
76// Mangled mangled(...);
77// if (mangled)
78// { ...
79Mangled::operator bool() const { return m_mangled || m_demangled; }
80
81// Clear the mangled and demangled values.
82void Mangled::Clear() {
83 m_mangled.Clear();
84 m_demangled.Clear();
85}
86
87// Compare the string values.
88int Mangled::Compare(const Mangled &a, const Mangled &b) {
89 return ConstString::Compare(a.GetName(ePreferMangled),
90 b.GetName(ePreferMangled));
91}
92
93// Set the string value in this objects. If "mangled" is true, then the mangled
94// named is set with the new value in "s", else the demangled name is set.
95void Mangled::SetValue(ConstString s, bool mangled) {
96 if (s) {
97 if (mangled) {
98 m_demangled.Clear();
99 m_mangled = s;
100 } else {
101 m_demangled = s;
102 m_mangled.Clear();
103 }
104 } else {
105 m_demangled.Clear();
106 m_mangled.Clear();
107 }
108}
109
110void Mangled::SetValue(ConstString name) {
111 if (name) {
112 if (cstring_is_mangled(name.GetStringRef())) {
113 m_demangled.Clear();
114 m_mangled = name;
115 } else {
116 m_demangled = name;
117 m_mangled.Clear();
118 }
119 } else {
120 m_demangled.Clear();
121 m_mangled.Clear();
122 }
123}
124
125// Local helpers for different demangling implementations.
126static char *GetMSVCDemangledStr(const char *M) {
127 char *demangled_cstr = llvm::microsoftDemangle(
128 M, nullptr, nullptr, nullptr, nullptr,
129 llvm::MSDemangleFlags(
130 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
131 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
132
133 if (Log *log = GetLog(LLDBLog::Demangle)) {
134 if (demangled_cstr && demangled_cstr[0])
135 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M, demangled_cstr);
136 else
137 LLDB_LOGF(log, "demangled msvc: %s -> error", M);
138 }
139
140 return demangled_cstr;
141}
142
143static char *GetItaniumDemangledStr(const char *M) {
144 char *demangled_cstr = nullptr;
145
146 llvm::ItaniumPartialDemangler ipd;
147 bool err = ipd.partialDemangle(M);
148 if (!err) {
149 // Default buffer and size (will realloc in case it's too small).
150 size_t demangled_size = 80;
151 demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
152 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
153
154 assert(demangled_cstr &&
155 "finishDemangle must always succeed if partialDemangle did");
156 assert(demangled_cstr[demangled_size - 1] == '\0' &&
157 "Expected demangled_size to return length including trailing null");
158 }
159
160 if (Log *log = GetLog(LLDBLog::Demangle)) {
161 if (demangled_cstr)
162 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
163 else
164 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
165 }
166
167 return demangled_cstr;
168}
169
170static char *GetRustV0DemangledStr(const char *M) {
171 char *demangled_cstr = llvm::rustDemangle(M);
172
173 if (Log *log = GetLog(LLDBLog::Demangle)) {
174 if (demangled_cstr && demangled_cstr[0])
175 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
176 else
177 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M);
178 }
179
180 return demangled_cstr;
181}
182
183static char *GetDLangDemangledStr(const char *M) {
184 char *demangled_cstr = llvm::dlangDemangle(M);
185
186 if (Log *log = GetLog(LLDBLog::Demangle)) {
187 if (demangled_cstr && demangled_cstr[0])
188 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
189 else
190 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
191 }
192
193 return demangled_cstr;
194}
195
196// Explicit demangling for scheduled requests during batch processing. This
197// makes use of ItaniumPartialDemangler's rich demangle info
198bool Mangled::GetRichManglingInfo(RichManglingContext &context,
199 SkipMangledNameFn *skip_mangled_name) {
200 // Others are not meant to arrive here. ObjC names or C's main() for example
201 // have their names stored in m_demangled, while m_mangled is empty.
202 assert(m_mangled);
203
204 // Check whether or not we are interested in this name at all.
205 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
206 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
207 return false;
208
209 switch (scheme) {
210 case eManglingSchemeNone:
211 // The current mangled_name_filter would allow llvm_unreachable here.
212 return false;
213
214 case eManglingSchemeItanium:
215 // We want the rich mangling info here, so we don't care whether or not
216 // there is a demangled string in the pool already.
217 return context.FromItaniumName(m_mangled);
218
219 case eManglingSchemeMSVC: {
220 // We have no rich mangling for MSVC-mangled names yet, so first try to
221 // demangle it if necessary.
222 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
223 if (char *d = GetMSVCDemangledStr(m_mangled.GetCString())) {
224 // Without the rich mangling info we have to demangle the full name.
225 // Copy it to string pool and connect the counterparts to accelerate
226 // later access in GetDemangledName().
227 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
228 m_mangled);
229 ::free(d);
230 } else {
231 m_demangled.SetCString("");
232 }
233 }
234
235 if (m_demangled.IsEmpty()) {
236 // Cannot demangle it, so don't try parsing.
237 return false;
238 } else {
239 // Demangled successfully, we can try and parse it with
240 // CPlusPlusLanguage::MethodName.
241 return context.FromCxxMethodName(m_demangled);
242 }
243 }
244
245 case eManglingSchemeRustV0:
246 case eManglingSchemeD:
247 // Rich demangling scheme is not supported
248 return false;
249 }
250 llvm_unreachable("Fully covered switch above!");
251}
252
253// Generate the demangled name on demand using this accessor. Code in this
254// class will need to use this accessor if it wishes to decode the demangled
255// name. The result is cached and will be kept until a new string value is
256// supplied to this object, or until the end of the object's lifetime.
257ConstString Mangled::GetDemangledName() const {
258 // Check to make sure we have a valid mangled name and that we haven't
259 // already decoded our mangled name.
260 if (m_mangled && m_demangled.IsNull()) {
261 // Don't bother running anything that isn't mangled
262 const char *mangled_name = m_mangled.GetCString();
263 ManglingScheme mangling_scheme =
264 GetManglingScheme(m_mangled.GetStringRef());
265 if (mangling_scheme != eManglingSchemeNone &&
266 !m_mangled.GetMangledCounterpart(m_demangled)) {
267 // We didn't already mangle this name, demangle it and if all goes well
268 // add it to our map.
269 char *demangled_name = nullptr;
270 switch (mangling_scheme) {
271 case eManglingSchemeMSVC:
272 demangled_name = GetMSVCDemangledStr(mangled_name);
273 break;
274 case eManglingSchemeItanium: {
275 demangled_name = GetItaniumDemangledStr(mangled_name);
276 break;
277 }
278 case eManglingSchemeRustV0:
279 demangled_name = GetRustV0DemangledStr(mangled_name);
280 break;
281 case eManglingSchemeD:
282 demangled_name = GetDLangDemangledStr(mangled_name);
283 break;
284 case eManglingSchemeNone:
285 llvm_unreachable("eManglingSchemeNone was handled already");
286 }
287 if (demangled_name) {
288 m_demangled.SetStringWithMangledCounterpart(
289 llvm::StringRef(demangled_name), m_mangled);
290 free(demangled_name);
291 }
292 }
293 if (m_demangled.IsNull()) {
294 // Set the demangled string to the empty string to indicate we tried to
295 // parse it once and failed.
296 m_demangled.SetCString("");
297 }
298 }
299
300 return m_demangled;
301}
302
303ConstString Mangled::GetDisplayDemangledName() const {
304 return GetDemangledName();
305}
306
307bool Mangled::NameMatches(const RegularExpression &regex) const {
308 if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
309 return true;
310
311 ConstString demangled = GetDemangledName();
312 return demangled && regex.Execute(demangled.GetStringRef());
313}
314
315// Get the demangled name if there is one, else return the mangled name.
316ConstString Mangled::GetName(Mangled::NamePreference preference) const {
317 if (preference == ePreferMangled && m_mangled)
318 return m_mangled;
319
320 // Call the accessor to make sure we get a demangled name in case it hasn't
321 // been demangled yet...
322 ConstString demangled = GetDemangledName();
323
324 if (preference == ePreferDemangledWithoutArguments) {
325 if (Language *lang = Language::FindPlugin(GuessLanguage())) {
326 return lang->GetDemangledFunctionNameWithoutArguments(*this);
327 }
328 }
329 if (preference == ePreferDemangled) {
330 if (demangled)
331 return demangled;
332 return m_mangled;
333 }
334 return demangled;
335}
336
337// Dump a Mangled object to stream "s". We don't force our demangled name to be
338// computed currently (we don't use the accessor).
339void Mangled::Dump(Stream *s) const {
340 if (m_mangled) {
341 *s << ", mangled = " << m_mangled;
342 }
343 if (m_demangled) {
344 const char *demangled = m_demangled.AsCString();
345 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
346 }
347}
348
349// Dumps a debug version of this string with extra object and state information
350// to stream "s".
351void Mangled::DumpDebug(Stream *s) const {
352 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
353 static_cast<const void *>(this));
354 m_mangled.DumpDebug(s);
355 s->Printf(", demangled = ");
356 m_demangled.DumpDebug(s);
357}
358
359// Return the size in byte that this object takes in memory. The size includes
360// the size of the objects it owns, and not the strings that it references
361// because they are shared strings.
362size_t Mangled::MemorySize() const {
363 return m_mangled.MemorySize() + m_demangled.MemorySize();
364}
365
366// We "guess" the language because we can't determine a symbol's language from
367// it's name. For example, a Pascal symbol can be mangled using the C++
368// Itanium scheme, and defined in a compilation unit within the same module as
369// other C++ units. In addition, different targets could have different ways
370// of mangling names from a given language, likewise the compilation units
371// within those targets.
372lldb::LanguageType Mangled::GuessLanguage() const {
373 lldb::LanguageType result = lldb::eLanguageTypeUnknown;
374 // Ask each language plugin to check if the mangled name belongs to it.
375 Language::ForEach([this, &result](Language *l) {
376 if (l->SymbolNameFitsToLanguage(*this)) {
377 result = l->GetLanguageType();
378 return false;
379 }
380 return true;
381 });
382 return result;
383}
384
385// Dump OBJ to the supplied stream S.
386Stream &operator<<(Stream &s, const Mangled &obj) {
387 if (obj.GetMangledName())
388 s << "mangled = '" << obj.GetMangledName() << "'";
389
390 ConstString demangled = obj.GetDemangledName();
391 if (demangled)
392 s << ", demangled = '" << demangled << '\'';
393 else
394 s << ", demangled = <error>";
395 return s;
396}
397
398// When encoding Mangled objects we can get away with encoding as little
399// information as is required. The enumeration below helps us to efficiently
400// encode Mangled objects.
401enum MangledEncoding {
402 /// If the Mangled object has neither a mangled name or demangled name we can
403 /// encode the object with one zero byte using the Empty enumeration.
404 Empty = 0u,
405 /// If the Mangled object has only a demangled name and no mangled named, we
406 /// can encode only the demangled name.
407 DemangledOnly = 1u,
408 /// If the mangle name can calculate the demangled name (it is the
409 /// mangled/demangled counterpart), then we only need to encode the mangled
410 /// name as the demangled name can be recomputed.
411 MangledOnly = 2u,
412 /// If we have a Mangled object with two different names that are not related
413 /// then we need to save both strings. This can happen if we have a name that
414 /// isn't a true mangled name, but we want to be able to lookup a symbol by
415 /// name and type in the symbol table. We do this for Objective C symbols like
416 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
417 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
418 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
419 /// would fail, but in these cases we want these unrelated names to be
420 /// preserved.
421 MangledAndDemangled = 3u
422};
423
424bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
425 const StringTableReader &strtab) {
426 m_mangled.Clear();
427 m_demangled.Clear();
428 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
429 switch (encoding) {
430 case Empty:
431 return true;
432
433 case DemangledOnly:
434 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
435 return true;
436
437 case MangledOnly:
438 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
439 return true;
440
441 case MangledAndDemangled:
442 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
443 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
444 return true;
445 }
446 return false;
447}
448/// The encoding format for the Mangled object is as follows:
449///
450/// uint8_t encoding;
451/// char str1[]; (only if DemangledOnly, MangledOnly)
452/// char str2[]; (only if MangledAndDemangled)
453///
454/// The strings are stored as NULL terminated UTF8 strings and str1 and str2
455/// are only saved if we need them based on the encoding.
456///
457/// Some mangled names have a mangled name that can be demangled by the built
458/// in demanglers. These kinds of mangled objects know when the mangled and
459/// demangled names are the counterparts for each other. This is done because
460/// demangling is very expensive and avoiding demangling the same name twice
461/// saves us a lot of compute time. For these kinds of names we only need to
462/// save the mangled name and have the encoding set to "MangledOnly".
463///
464/// If a mangled obejct has only a demangled name, then we save only that string
465/// and have the encoding set to "DemangledOnly".
466///
467/// Some mangled objects have both mangled and demangled names, but the
468/// demangled name can not be computed from the mangled name. This is often used
469/// for runtime named, like Objective C runtime V2 and V3 names. Both these
470/// names must be saved and the encoding is set to "MangledAndDemangled".
471///
472/// For a Mangled object with no names, we only need to set the encoding to
473/// "Empty" and not store any string values.
474void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
475 MangledEncoding encoding = Empty;
476 if (m_mangled) {
477 encoding = MangledOnly;
478 if (m_demangled) {
479 // We have both mangled and demangled names. If the demangled name is the
480 // counterpart of the mangled name, then we only need to save the mangled
481 // named. If they are different, we need to save both.
482 ConstString s;
483 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
484 encoding = MangledAndDemangled;
485 }
486 } else if (m_demangled) {
487 encoding = DemangledOnly;
488 }
489 file.AppendU8(encoding);
490 switch (encoding) {
491 case Empty:
492 break;
493 case DemangledOnly:
494 file.AppendU32(strtab.Add(m_demangled));
495 break;
496 case MangledOnly:
497 file.AppendU32(strtab.Add(m_mangled));
498 break;
499 case MangledAndDemangled:
500 file.AppendU32(strtab.Add(m_mangled));
501 file.AppendU32(strtab.Add(m_demangled));
502 break;
503 }
504}
505

source code of lldb/source/Core/Mangled.cpp