1 | //===--- DumpAST.cpp - Serialize clang AST to LSP -------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "DumpAST.h" |
10 | #include "Protocol.h" |
11 | #include "SourceCode.h" |
12 | #include "support/Logger.h" |
13 | #include "clang/AST/ASTTypeTraits.h" |
14 | #include "clang/AST/Expr.h" |
15 | #include "clang/AST/ExprCXX.h" |
16 | #include "clang/AST/NestedNameSpecifier.h" |
17 | #include "clang/AST/PrettyPrinter.h" |
18 | #include "clang/AST/RecursiveASTVisitor.h" |
19 | #include "clang/AST/TextNodeDumper.h" |
20 | #include "clang/AST/Type.h" |
21 | #include "clang/AST/TypeLoc.h" |
22 | #include "clang/Basic/Specifiers.h" |
23 | #include "clang/Tooling/Syntax/Tokens.h" |
24 | #include "llvm/ADT/StringRef.h" |
25 | #include "llvm/Support/raw_ostream.h" |
26 | #include <optional> |
27 | |
28 | namespace clang { |
29 | namespace clangd { |
30 | namespace { |
31 | |
32 | using llvm::raw_ostream; |
33 | template <typename Print> std::string toString(const Print &C) { |
34 | std::string Result; |
35 | llvm::raw_string_ostream OS(Result); |
36 | C(OS); |
37 | return std::move(OS.str()); |
38 | } |
39 | |
40 | bool isInjectedClassName(Decl *D) { |
41 | if (const auto *CRD = llvm::dyn_cast<CXXRecordDecl>(Val: D)) |
42 | return CRD->isInjectedClassName(); |
43 | return false; |
44 | } |
45 | |
46 | class DumpVisitor : public RecursiveASTVisitor<DumpVisitor> { |
47 | using Base = RecursiveASTVisitor<DumpVisitor>; |
48 | |
49 | const syntax::TokenBuffer &Tokens; |
50 | const ASTContext &Ctx; |
51 | |
52 | // Pointers are into 'children' vector. |
53 | // They remain valid because while a node is on the stack we only add |
54 | // descendants, not siblings. |
55 | std::vector<ASTNode *> Stack; |
56 | |
57 | // Generic logic used to handle traversal of all node kinds. |
58 | |
59 | template <typename T> |
60 | bool traverseNodePre(llvm::StringRef Role, const T &Node) { |
61 | if (Stack.empty()) { |
62 | assert(Root.role.empty()); |
63 | Stack.push_back(x: &Root); |
64 | } else { |
65 | Stack.back()->children.emplace_back(); |
66 | Stack.push_back(x: &Stack.back()->children.back()); |
67 | } |
68 | auto &N = *Stack.back(); |
69 | N.role = Role.str(); |
70 | N.kind = getKind(Node); |
71 | N.detail = getDetail(Node); |
72 | N.range = getRange(Node); |
73 | N.arcana = getArcana(Node); |
74 | return true; |
75 | } |
76 | bool traverseNodePost() { |
77 | assert(!Stack.empty()); |
78 | Stack.pop_back(); |
79 | return true; |
80 | } |
81 | template <typename T, typename Callable> |
82 | bool traverseNode(llvm::StringRef Role, const T &Node, const Callable &Body) { |
83 | traverseNodePre(Role, Node); |
84 | Body(); |
85 | return traverseNodePost(); |
86 | } |
87 | |
88 | // Range: most nodes have getSourceRange(), with a couple of exceptions. |
89 | // We only return it if it's valid at both ends and there are no macros. |
90 | |
91 | template <typename T> std::optional<Range> getRange(const T &Node) { |
92 | SourceRange SR = getSourceRange(Node); |
93 | auto Spelled = Tokens.spelledForExpanded(Tokens.expandedTokens(R: SR)); |
94 | if (!Spelled) |
95 | return std::nullopt; |
96 | return halfOpenToRange( |
97 | Tokens.sourceManager(), |
98 | CharSourceRange::getCharRange(Spelled->front().location(), |
99 | Spelled->back().endLocation())); |
100 | } |
101 | template <typename T, typename = decltype(std::declval<T>().getSourceRange())> |
102 | SourceRange getSourceRange(const T &Node) { |
103 | return Node.getSourceRange(); |
104 | } |
105 | template <typename T, |
106 | typename = decltype(std::declval<T *>()->getSourceRange())> |
107 | SourceRange getSourceRange(const T *Node) { |
108 | return Node->getSourceRange(); |
109 | } |
110 | // TemplateName doesn't have a real Loc node type. |
111 | SourceRange getSourceRange(const TemplateName &Node) { return SourceRange(); } |
112 | // Attr just uses a weird method name. Maybe we should fix it instead? |
113 | SourceRange getSourceRange(const Attr *Node) { return Node->getRange(); } |
114 | |
115 | // Kind is usually the class name, without the suffix ("Type" etc). |
116 | // Where there's a set of variants instead, we use the 'Kind' enum values. |
117 | |
118 | std::string getKind(const Decl *D) { return D->getDeclKindName(); } |
119 | std::string getKind(const Stmt *S) { |
120 | std::string Result = S->getStmtClassName(); |
121 | if (llvm::StringRef(Result).ends_with(Suffix: "Stmt" ) || |
122 | llvm::StringRef(Result).ends_with(Suffix: "Expr" )) |
123 | Result.resize(n: Result.size() - 4); |
124 | return Result; |
125 | } |
126 | std::string getKind(const TypeLoc &TL) { |
127 | std::string Result; |
128 | if (TL.getTypeLocClass() == TypeLoc::Qualified) |
129 | return "Qualified" ; |
130 | return TL.getType()->getTypeClassName(); |
131 | } |
132 | std::string getKind(const TemplateArgumentLoc &TAL) { |
133 | switch (TAL.getArgument().getKind()) { |
134 | #define TEMPLATE_ARGUMENT_KIND(X) \ |
135 | case TemplateArgument::X: \ |
136 | return #X |
137 | TEMPLATE_ARGUMENT_KIND(Null); |
138 | TEMPLATE_ARGUMENT_KIND(NullPtr); |
139 | TEMPLATE_ARGUMENT_KIND(Expression); |
140 | TEMPLATE_ARGUMENT_KIND(Integral); |
141 | TEMPLATE_ARGUMENT_KIND(Pack); |
142 | TEMPLATE_ARGUMENT_KIND(Type); |
143 | TEMPLATE_ARGUMENT_KIND(Declaration); |
144 | TEMPLATE_ARGUMENT_KIND(Template); |
145 | TEMPLATE_ARGUMENT_KIND(TemplateExpansion); |
146 | TEMPLATE_ARGUMENT_KIND(StructuralValue); |
147 | #undef TEMPLATE_ARGUMENT_KIND |
148 | } |
149 | llvm_unreachable("Unhandled ArgKind enum" ); |
150 | } |
151 | std::string getKind(const NestedNameSpecifierLoc &NNSL) { |
152 | assert(NNSL.getNestedNameSpecifier()); |
153 | switch (NNSL.getNestedNameSpecifier()->getKind()) { |
154 | #define NNS_KIND(X) \ |
155 | case NestedNameSpecifier::X: \ |
156 | return #X |
157 | NNS_KIND(Identifier); |
158 | NNS_KIND(Namespace); |
159 | NNS_KIND(TypeSpec); |
160 | NNS_KIND(TypeSpecWithTemplate); |
161 | NNS_KIND(Global); |
162 | NNS_KIND(Super); |
163 | NNS_KIND(NamespaceAlias); |
164 | #undef NNS_KIND |
165 | } |
166 | llvm_unreachable("Unhandled SpecifierKind enum" ); |
167 | } |
168 | std::string getKind(const CXXCtorInitializer *CCI) { |
169 | if (CCI->isBaseInitializer()) |
170 | return "BaseInitializer" ; |
171 | if (CCI->isDelegatingInitializer()) |
172 | return "DelegatingInitializer" ; |
173 | if (CCI->isAnyMemberInitializer()) |
174 | return "MemberInitializer" ; |
175 | llvm_unreachable("Unhandled CXXCtorInitializer type" ); |
176 | } |
177 | std::string getKind(const TemplateName &TN) { |
178 | switch (TN.getKind()) { |
179 | #define TEMPLATE_KIND(X) \ |
180 | case TemplateName::X: \ |
181 | return #X; |
182 | TEMPLATE_KIND(Template); |
183 | TEMPLATE_KIND(OverloadedTemplate); |
184 | TEMPLATE_KIND(AssumedTemplate); |
185 | TEMPLATE_KIND(QualifiedTemplate); |
186 | TEMPLATE_KIND(DependentTemplate); |
187 | TEMPLATE_KIND(SubstTemplateTemplateParm); |
188 | TEMPLATE_KIND(SubstTemplateTemplateParmPack); |
189 | TEMPLATE_KIND(UsingTemplate); |
190 | #undef TEMPLATE_KIND |
191 | } |
192 | llvm_unreachable("Unhandled NameKind enum" ); |
193 | } |
194 | std::string getKind(const Attr *A) { |
195 | switch (A->getKind()) { |
196 | #define ATTR(X) \ |
197 | case attr::X: \ |
198 | return #X; |
199 | #include "clang/Basic/AttrList.inc" |
200 | #undef ATTR |
201 | } |
202 | llvm_unreachable("Unhandled attr::Kind enum" ); |
203 | } |
204 | std::string getKind(const CXXBaseSpecifier &CBS) { |
205 | // There aren't really any variants of CXXBaseSpecifier. |
206 | // To avoid special cases in the API/UI, use public/private as the kind. |
207 | return getAccessSpelling(AS: CBS.getAccessSpecifier()).str(); |
208 | } |
209 | std::string getKind(const ConceptReference *CR) { |
210 | // Again there are no variants here. |
211 | // Kind is "Concept", role is "reference" |
212 | return "Concept" ; |
213 | } |
214 | |
215 | // Detail is the single most important fact about the node. |
216 | // Often this is the name, sometimes a "kind" enum like operators or casts. |
217 | // We should avoid unbounded text, like dumping parameter lists. |
218 | |
219 | std::string getDetail(const Decl *D) { |
220 | const auto *ND = dyn_cast<NamedDecl>(D); |
221 | if (!ND || llvm::isa_and_nonnull<CXXConstructorDecl>(ND->getAsFunction()) || |
222 | isa<CXXDestructorDecl>(ND)) |
223 | return "" ; |
224 | std::string Name = toString([&](raw_ostream &OS) { ND->printName(OS); }); |
225 | if (Name.empty()) |
226 | return "(anonymous)" ; |
227 | return Name; |
228 | } |
229 | std::string getDetail(const Stmt *S) { |
230 | if (const auto *DRE = dyn_cast<DeclRefExpr>(S)) |
231 | return DRE->getNameInfo().getAsString(); |
232 | if (const auto *DSDRE = dyn_cast<DependentScopeDeclRefExpr>(S)) |
233 | return DSDRE->getNameInfo().getAsString(); |
234 | if (const auto *ME = dyn_cast<MemberExpr>(S)) |
235 | return ME->getMemberNameInfo().getAsString(); |
236 | if (const auto *CE = dyn_cast<CastExpr>(S)) |
237 | return CE->getCastKindName(); |
238 | if (const auto *BO = dyn_cast<BinaryOperator>(S)) |
239 | return BO->getOpcodeStr().str(); |
240 | if (const auto *UO = dyn_cast<UnaryOperator>(S)) |
241 | return UnaryOperator::getOpcodeStr(Op: UO->getOpcode()).str(); |
242 | if (const auto *CCO = dyn_cast<CXXConstructExpr>(S)) |
243 | return CCO->getConstructor()->getNameAsString(); |
244 | if (const auto *CTE = dyn_cast<CXXThisExpr>(S)) { |
245 | bool Const = CTE->getType()->getPointeeType().isLocalConstQualified(); |
246 | if (CTE->isImplicit()) |
247 | return Const ? "const, implicit" : "implicit" ; |
248 | if (Const) |
249 | return "const" ; |
250 | return "" ; |
251 | } |
252 | if (isa<IntegerLiteral, FloatingLiteral, FixedPointLiteral, |
253 | CharacterLiteral, ImaginaryLiteral, CXXBoolLiteralExpr>(S)) |
254 | return toString([&](raw_ostream &OS) { |
255 | S->printPretty(OS, Helper: nullptr, Policy: Ctx.getPrintingPolicy()); |
256 | }); |
257 | if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(S)) |
258 | return MTE->isBoundToLvalueReference() ? "lvalue" : "rvalue" ; |
259 | return "" ; |
260 | } |
261 | std::string getDetail(const TypeLoc &TL) { |
262 | if (TL.getType().hasLocalQualifiers()) |
263 | return TL.getType().getLocalQualifiers().getAsString( |
264 | Policy: Ctx.getPrintingPolicy()); |
265 | if (const auto *TT = dyn_cast<TagType>(TL.getTypePtr())) |
266 | return getDetail(TT->getDecl()); |
267 | if (const auto *DT = dyn_cast<DeducedType>(TL.getTypePtr())) |
268 | if (DT->isDeduced()) |
269 | return DT->getDeducedType().getAsString(Ctx.getPrintingPolicy()); |
270 | if (const auto *BT = dyn_cast<BuiltinType>(TL.getTypePtr())) |
271 | return BT->getName(Ctx.getPrintingPolicy()).str(); |
272 | if (const auto *TTPT = dyn_cast<TemplateTypeParmType>(TL.getTypePtr())) |
273 | return getDetail(TTPT->getDecl()); |
274 | if (const auto *TT = dyn_cast<TypedefType>(TL.getTypePtr())) |
275 | return getDetail(TT->getDecl()); |
276 | return "" ; |
277 | } |
278 | std::string getDetail(const NestedNameSpecifierLoc &NNSL) { |
279 | const auto &NNS = *NNSL.getNestedNameSpecifier(); |
280 | switch (NNS.getKind()) { |
281 | case NestedNameSpecifier::Identifier: |
282 | return NNS.getAsIdentifier()->getName().str() + "::" ; |
283 | case NestedNameSpecifier::Namespace: |
284 | return NNS.getAsNamespace()->getNameAsString() + "::" ; |
285 | case NestedNameSpecifier::NamespaceAlias: |
286 | return NNS.getAsNamespaceAlias()->getNameAsString() + "::" ; |
287 | default: |
288 | return "" ; |
289 | } |
290 | } |
291 | std::string getDetail(const CXXCtorInitializer *CCI) { |
292 | if (FieldDecl *FD = CCI->getAnyMember()) |
293 | return getDetail(FD); |
294 | if (TypeLoc TL = CCI->getBaseClassLoc()) |
295 | return getDetail(TL); |
296 | return "" ; |
297 | } |
298 | std::string getDetail(const TemplateArgumentLoc &TAL) { |
299 | if (TAL.getArgument().getKind() == TemplateArgument::Integral) |
300 | return toString(I: TAL.getArgument().getAsIntegral(), Radix: 10); |
301 | return "" ; |
302 | } |
303 | std::string getDetail(const TemplateName &TN) { |
304 | return toString([&](raw_ostream &OS) { |
305 | TN.print(OS, Policy: Ctx.getPrintingPolicy(), Qual: TemplateName::Qualified::None); |
306 | }); |
307 | } |
308 | std::string getDetail(const Attr *A) { |
309 | return A->getAttrName() ? A->getNormalizedFullName() : A->getSpelling(); |
310 | } |
311 | std::string getDetail(const CXXBaseSpecifier &CBS) { |
312 | return CBS.isVirtual() ? "virtual" : "" ; |
313 | } |
314 | std::string getDetail(const ConceptReference *CR) { |
315 | return CR->getNamedConcept()->getNameAsString(); |
316 | } |
317 | |
318 | /// Arcana is produced by TextNodeDumper, for the types it supports. |
319 | |
320 | template <typename Dump> std::string dump(const Dump &D) { |
321 | return toString([&](raw_ostream &OS) { |
322 | TextNodeDumper Dumper(OS, Ctx, /*ShowColors=*/false); |
323 | D(Dumper); |
324 | }); |
325 | } |
326 | template <typename T> std::string getArcana(const T &N) { |
327 | return dump([&](TextNodeDumper &D) { D.Visit(N); }); |
328 | } |
329 | std::string getArcana(const NestedNameSpecifierLoc &NNS) { return "" ; } |
330 | std::string getArcana(const TemplateName &NNS) { return "" ; } |
331 | std::string getArcana(const CXXBaseSpecifier &CBS) { return "" ; } |
332 | std::string getArcana(const TemplateArgumentLoc &TAL) { |
333 | return dump([&](TextNodeDumper &D) { |
334 | D.Visit(TA: TAL.getArgument(), R: TAL.getSourceRange()); |
335 | }); |
336 | } |
337 | std::string getArcana(const TypeLoc &TL) { |
338 | return dump([&](TextNodeDumper &D) { D.Visit(T: TL.getType()); }); |
339 | } |
340 | |
341 | public: |
342 | ASTNode Root; |
343 | DumpVisitor(const syntax::TokenBuffer &Tokens, const ASTContext &Ctx) |
344 | : Tokens(Tokens), Ctx(Ctx) {} |
345 | |
346 | // Override traversal to record the nodes we care about. |
347 | // Generally, these are nodes with position information (TypeLoc, not Type). |
348 | |
349 | bool TraverseDecl(Decl *D) { |
350 | return !D || isInjectedClassName(D) || |
351 | traverseNode("declaration" , D, [&] { Base::TraverseDecl(D); }); |
352 | } |
353 | bool TraverseTypeLoc(TypeLoc TL) { |
354 | return !TL || traverseNode("type" , TL, [&] { Base::TraverseTypeLoc(TL); }); |
355 | } |
356 | bool TraverseTemplateName(const TemplateName &TN) { |
357 | return traverseNode("template name" , TN, |
358 | [&] { Base::TraverseTemplateName(Template: TN); }); |
359 | } |
360 | bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &TAL) { |
361 | return traverseNode("template argument" , TAL, |
362 | [&] { Base::TraverseTemplateArgumentLoc(ArgLoc: TAL); }); |
363 | } |
364 | bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSL) { |
365 | return !NNSL || traverseNode("specifier" , NNSL, [&] { |
366 | Base::TraverseNestedNameSpecifierLoc(NNS: NNSL); |
367 | }); |
368 | } |
369 | bool TraverseConstructorInitializer(CXXCtorInitializer *CCI) { |
370 | return !CCI || traverseNode("constructor initializer" , CCI, [&] { |
371 | Base::TraverseConstructorInitializer(Init: CCI); |
372 | }); |
373 | } |
374 | bool TraverseAttr(Attr *A) { |
375 | return !A || traverseNode("attribute" , A, [&] { Base::TraverseAttr(At: A); }); |
376 | } |
377 | bool TraverseConceptReference(ConceptReference *C) { |
378 | return !C || traverseNode("reference" , C, |
379 | [&] { Base::TraverseConceptReference(CR: C); }); |
380 | } |
381 | bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &CBS) { |
382 | return traverseNode("base" , CBS, |
383 | [&] { Base::TraverseCXXBaseSpecifier(Base: CBS); }); |
384 | } |
385 | // Stmt is the same, but this form allows the data recursion optimization. |
386 | bool dataTraverseStmtPre(Stmt *S) { |
387 | return S && traverseNodePre(isa<Expr>(S) ? "expression" : "statement" , S); |
388 | } |
389 | bool dataTraverseStmtPost(Stmt *X) { return traverseNodePost(); } |
390 | |
391 | // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived |
392 | // TraverseTypeLoc is not called for the inner UnqualTypeLoc. |
393 | // This means we'd never see 'int' in 'const int'! Work around that here. |
394 | // (The reason for the behavior is to avoid traversing the nested Type twice, |
395 | // but we ignore TraverseType anyway). |
396 | bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QTL) { |
397 | return TraverseTypeLoc(TL: QTL.getUnqualifiedLoc()); |
398 | } |
399 | // Uninteresting parts of the AST that don't have locations within them. |
400 | bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; } |
401 | bool TraverseType(QualType) { return true; } |
402 | |
403 | // OpaqueValueExpr blocks traversal, we must explicitly traverse it. |
404 | bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) { |
405 | return TraverseStmt(E->getSourceExpr()); |
406 | } |
407 | // We only want to traverse the *syntactic form* to understand the selection. |
408 | bool TraversePseudoObjectExpr(PseudoObjectExpr *E) { |
409 | return TraverseStmt(E->getSyntacticForm()); |
410 | } |
411 | }; |
412 | |
413 | } // namespace |
414 | |
415 | ASTNode dumpAST(const DynTypedNode &N, const syntax::TokenBuffer &Tokens, |
416 | const ASTContext &Ctx) { |
417 | DumpVisitor V(Tokens, Ctx); |
418 | // DynTypedNode only works with const, RecursiveASTVisitor only non-const :-( |
419 | if (const auto *D = N.get<Decl>()) |
420 | V.TraverseDecl(D: const_cast<Decl *>(D)); |
421 | else if (const auto *S = N.get<Stmt>()) |
422 | V.TraverseStmt(const_cast<Stmt *>(S)); |
423 | else if (const auto *NNSL = N.get<NestedNameSpecifierLoc>()) |
424 | V.TraverseNestedNameSpecifierLoc( |
425 | NNSL: *const_cast<NestedNameSpecifierLoc *>(NNSL)); |
426 | else if (const auto *NNS = N.get<NestedNameSpecifier>()) |
427 | V.TraverseNestedNameSpecifier(const_cast<NestedNameSpecifier *>(NNS)); |
428 | else if (const auto *TL = N.get<TypeLoc>()) |
429 | V.TraverseTypeLoc(TL: *const_cast<TypeLoc *>(TL)); |
430 | else if (const auto *QT = N.get<QualType>()) |
431 | V.TraverseType(*const_cast<QualType *>(QT)); |
432 | else if (const auto *CCI = N.get<CXXCtorInitializer>()) |
433 | V.TraverseConstructorInitializer(CCI: const_cast<CXXCtorInitializer *>(CCI)); |
434 | else if (const auto *TAL = N.get<TemplateArgumentLoc>()) |
435 | V.TraverseTemplateArgumentLoc(TAL: *const_cast<TemplateArgumentLoc *>(TAL)); |
436 | else if (const auto *CBS = N.get<CXXBaseSpecifier>()) |
437 | V.TraverseCXXBaseSpecifier(CBS: *const_cast<CXXBaseSpecifier *>(CBS)); |
438 | else if (const auto *CR = N.get<ConceptReference>()) |
439 | V.TraverseConceptReference(C: const_cast<ConceptReference *>(CR)); |
440 | else |
441 | elog("dumpAST: unhandled DynTypedNode kind {0}" , |
442 | N.getNodeKind().asStringRef()); |
443 | return std::move(V.Root); |
444 | } |
445 | |
446 | } // namespace clangd |
447 | } // namespace clang |
448 | |