1 | //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "clang/Tooling/Syntax/BuildTree.h" |
9 | #include "clang/AST/ASTFwd.h" |
10 | #include "clang/AST/Decl.h" |
11 | #include "clang/AST/DeclBase.h" |
12 | #include "clang/AST/DeclCXX.h" |
13 | #include "clang/AST/DeclarationName.h" |
14 | #include "clang/AST/Expr.h" |
15 | #include "clang/AST/ExprCXX.h" |
16 | #include "clang/AST/IgnoreExpr.h" |
17 | #include "clang/AST/OperationKinds.h" |
18 | #include "clang/AST/RecursiveASTVisitor.h" |
19 | #include "clang/AST/Stmt.h" |
20 | #include "clang/AST/TypeLoc.h" |
21 | #include "clang/AST/TypeLocVisitor.h" |
22 | #include "clang/Basic/LLVM.h" |
23 | #include "clang/Basic/SourceLocation.h" |
24 | #include "clang/Basic/SourceManager.h" |
25 | #include "clang/Basic/Specifiers.h" |
26 | #include "clang/Basic/TokenKinds.h" |
27 | #include "clang/Lex/Lexer.h" |
28 | #include "clang/Lex/LiteralSupport.h" |
29 | #include "clang/Tooling/Syntax/Nodes.h" |
30 | #include "clang/Tooling/Syntax/TokenBufferTokenManager.h" |
31 | #include "clang/Tooling/Syntax/Tokens.h" |
32 | #include "clang/Tooling/Syntax/Tree.h" |
33 | #include "llvm/ADT/ArrayRef.h" |
34 | #include "llvm/ADT/DenseMap.h" |
35 | #include "llvm/ADT/PointerUnion.h" |
36 | #include "llvm/ADT/STLExtras.h" |
37 | #include "llvm/ADT/ScopeExit.h" |
38 | #include "llvm/ADT/SmallVector.h" |
39 | #include "llvm/Support/Allocator.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/Compiler.h" |
42 | #include "llvm/Support/FormatVariadic.h" |
43 | #include "llvm/Support/MemoryBuffer.h" |
44 | #include "llvm/Support/raw_ostream.h" |
45 | #include <cstddef> |
46 | #include <map> |
47 | |
48 | using namespace clang; |
49 | |
50 | // Ignores the implicit `CXXConstructExpr` for copy/move constructor calls |
51 | // generated by the compiler, as well as in implicit conversions like the one |
52 | // wrapping `1` in `X x = 1;`. |
53 | static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) { |
54 | if (auto *C = dyn_cast<CXXConstructExpr>(Val: E)) { |
55 | auto NumArgs = C->getNumArgs(); |
56 | if (NumArgs == 1 || (NumArgs > 1 && isa<CXXDefaultArgExpr>(Val: C->getArg(Arg: 1)))) { |
57 | Expr *A = C->getArg(Arg: 0); |
58 | if (C->getParenOrBraceRange().isInvalid()) |
59 | return A; |
60 | } |
61 | } |
62 | return E; |
63 | } |
64 | |
65 | // In: |
66 | // struct X { |
67 | // X(int) |
68 | // }; |
69 | // X x = X(1); |
70 | // Ignores the implicit `CXXFunctionalCastExpr` that wraps |
71 | // `CXXConstructExpr X(1)`. |
72 | static Expr *IgnoreCXXFunctionalCastExprWrappingConstructor(Expr *E) { |
73 | if (auto *F = dyn_cast<CXXFunctionalCastExpr>(Val: E)) { |
74 | if (F->getCastKind() == CK_ConstructorConversion) |
75 | return F->getSubExpr(); |
76 | } |
77 | return E; |
78 | } |
79 | |
80 | static Expr *IgnoreImplicit(Expr *E) { |
81 | return IgnoreExprNodes(E, Fns&: IgnoreImplicitSingleStep, |
82 | Fns&: IgnoreImplicitConstructorSingleStep, |
83 | Fns&: IgnoreCXXFunctionalCastExprWrappingConstructor); |
84 | } |
85 | |
86 | LLVM_ATTRIBUTE_UNUSED |
87 | static bool isImplicitExpr(Expr *E) { return IgnoreImplicit(E) != E; } |
88 | |
89 | namespace { |
90 | /// Get start location of the Declarator from the TypeLoc. |
91 | /// E.g.: |
92 | /// loc of `(` in `int (a)` |
93 | /// loc of `*` in `int *(a)` |
94 | /// loc of the first `(` in `int (*a)(int)` |
95 | /// loc of the `*` in `int *(a)(int)` |
96 | /// loc of the first `*` in `const int *const *volatile a;` |
97 | /// |
98 | /// It is non-trivial to get the start location because TypeLocs are stored |
99 | /// inside out. In the example above `*volatile` is the TypeLoc returned |
100 | /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` |
101 | /// returns. |
102 | struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { |
103 | SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { |
104 | auto L = Visit(TyLoc: T.getInnerLoc()); |
105 | if (L.isValid()) |
106 | return L; |
107 | return T.getLParenLoc(); |
108 | } |
109 | |
110 | // Types spelled in the prefix part of the declarator. |
111 | SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { |
112 | return HandlePointer(T); |
113 | } |
114 | |
115 | SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { |
116 | return HandlePointer(T); |
117 | } |
118 | |
119 | SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { |
120 | return HandlePointer(T); |
121 | } |
122 | |
123 | SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { |
124 | return HandlePointer(T); |
125 | } |
126 | |
127 | SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { |
128 | return HandlePointer(T); |
129 | } |
130 | |
131 | // All other cases are not important, as they are either part of declaration |
132 | // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on |
133 | // existing declarators (e.g. QualifiedTypeLoc). They cannot start the |
134 | // declarator themselves, but their underlying type can. |
135 | SourceLocation VisitTypeLoc(TypeLoc T) { |
136 | auto N = T.getNextTypeLoc(); |
137 | if (!N) |
138 | return SourceLocation(); |
139 | return Visit(TyLoc: N); |
140 | } |
141 | |
142 | SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { |
143 | if (T.getTypePtr()->hasTrailingReturn()) |
144 | return SourceLocation(); // avoid recursing into the suffix of declarator. |
145 | return VisitTypeLoc(T); |
146 | } |
147 | |
148 | private: |
149 | template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { |
150 | auto L = Visit(T.getPointeeLoc()); |
151 | if (L.isValid()) |
152 | return L; |
153 | return T.getLocalSourceRange().getBegin(); |
154 | } |
155 | }; |
156 | } // namespace |
157 | |
158 | static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args) { |
159 | auto FirstDefaultArg = |
160 | llvm::find_if(Range&: Args, P: [](auto It) { return isa<CXXDefaultArgExpr>(It); }); |
161 | return llvm::make_range(x: Args.begin(), y: FirstDefaultArg); |
162 | } |
163 | |
164 | static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { |
165 | switch (E.getOperator()) { |
166 | // Comparison |
167 | case OO_EqualEqual: |
168 | case OO_ExclaimEqual: |
169 | case OO_Greater: |
170 | case OO_GreaterEqual: |
171 | case OO_Less: |
172 | case OO_LessEqual: |
173 | case OO_Spaceship: |
174 | // Assignment |
175 | case OO_Equal: |
176 | case OO_SlashEqual: |
177 | case OO_PercentEqual: |
178 | case OO_CaretEqual: |
179 | case OO_PipeEqual: |
180 | case OO_LessLessEqual: |
181 | case OO_GreaterGreaterEqual: |
182 | case OO_PlusEqual: |
183 | case OO_MinusEqual: |
184 | case OO_StarEqual: |
185 | case OO_AmpEqual: |
186 | // Binary computation |
187 | case OO_Slash: |
188 | case OO_Percent: |
189 | case OO_Caret: |
190 | case OO_Pipe: |
191 | case OO_LessLess: |
192 | case OO_GreaterGreater: |
193 | case OO_AmpAmp: |
194 | case OO_PipePipe: |
195 | case OO_ArrowStar: |
196 | case OO_Comma: |
197 | return syntax::NodeKind::BinaryOperatorExpression; |
198 | case OO_Tilde: |
199 | case OO_Exclaim: |
200 | return syntax::NodeKind::PrefixUnaryOperatorExpression; |
201 | // Prefix/Postfix increment/decrement |
202 | case OO_PlusPlus: |
203 | case OO_MinusMinus: |
204 | switch (E.getNumArgs()) { |
205 | case 1: |
206 | return syntax::NodeKind::PrefixUnaryOperatorExpression; |
207 | case 2: |
208 | return syntax::NodeKind::PostfixUnaryOperatorExpression; |
209 | default: |
210 | llvm_unreachable("Invalid number of arguments for operator" ); |
211 | } |
212 | // Operators that can be unary or binary |
213 | case OO_Plus: |
214 | case OO_Minus: |
215 | case OO_Star: |
216 | case OO_Amp: |
217 | switch (E.getNumArgs()) { |
218 | case 1: |
219 | return syntax::NodeKind::PrefixUnaryOperatorExpression; |
220 | case 2: |
221 | return syntax::NodeKind::BinaryOperatorExpression; |
222 | default: |
223 | llvm_unreachable("Invalid number of arguments for operator" ); |
224 | } |
225 | return syntax::NodeKind::BinaryOperatorExpression; |
226 | // Not yet supported by SyntaxTree |
227 | case OO_New: |
228 | case OO_Delete: |
229 | case OO_Array_New: |
230 | case OO_Array_Delete: |
231 | case OO_Coawait: |
232 | case OO_Subscript: |
233 | case OO_Arrow: |
234 | return syntax::NodeKind::UnknownExpression; |
235 | case OO_Call: |
236 | return syntax::NodeKind::CallExpression; |
237 | case OO_Conditional: // not overloadable |
238 | case NUM_OVERLOADED_OPERATORS: |
239 | case OO_None: |
240 | llvm_unreachable("Not an overloadable operator" ); |
241 | } |
242 | llvm_unreachable("Unknown OverloadedOperatorKind enum" ); |
243 | } |
244 | |
245 | /// Get the start of the qualified name. In the examples below it gives the |
246 | /// location of the `^`: |
247 | /// `int ^a;` |
248 | /// `int *^a;` |
249 | /// `int ^a::S::f(){}` |
250 | static SourceLocation getQualifiedNameStart(NamedDecl *D) { |
251 | assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && |
252 | "only DeclaratorDecl and TypedefNameDecl are supported." ); |
253 | |
254 | auto DN = D->getDeclName(); |
255 | bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); |
256 | if (IsAnonymous) |
257 | return SourceLocation(); |
258 | |
259 | if (const auto *DD = dyn_cast<DeclaratorDecl>(Val: D)) { |
260 | if (DD->getQualifierLoc()) { |
261 | return DD->getQualifierLoc().getBeginLoc(); |
262 | } |
263 | } |
264 | |
265 | return D->getLocation(); |
266 | } |
267 | |
268 | /// Gets the range of the initializer inside an init-declarator C++ [dcl.decl]. |
269 | /// `int a;` -> range of ``, |
270 | /// `int *a = nullptr` -> range of `= nullptr`. |
271 | /// `int a{}` -> range of `{}`. |
272 | /// `int a()` -> range of `()`. |
273 | static SourceRange getInitializerRange(Decl *D) { |
274 | if (auto *V = dyn_cast<VarDecl>(Val: D)) { |
275 | auto *I = V->getInit(); |
276 | // Initializers in range-based-for are not part of the declarator |
277 | if (I && !V->isCXXForRangeDecl()) |
278 | return I->getSourceRange(); |
279 | } |
280 | |
281 | return SourceRange(); |
282 | } |
283 | |
284 | /// Gets the range of declarator as defined by the C++ grammar. E.g. |
285 | /// `int a;` -> range of `a`, |
286 | /// `int *a;` -> range of `*a`, |
287 | /// `int a[10];` -> range of `a[10]`, |
288 | /// `int a[1][2][3];` -> range of `a[1][2][3]`, |
289 | /// `int *a = nullptr` -> range of `*a = nullptr`. |
290 | /// `int S::f(){}` -> range of `S::f()`. |
291 | /// FIXME: \p Name must be a source range. |
292 | static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, |
293 | SourceLocation Name, |
294 | SourceRange Initializer) { |
295 | SourceLocation Start = GetStartLoc().Visit(TyLoc: T); |
296 | SourceLocation End = T.getEndLoc(); |
297 | if (Name.isValid()) { |
298 | if (Start.isInvalid()) |
299 | Start = Name; |
300 | // End of TypeLoc could be invalid if the type is invalid, fallback to the |
301 | // NameLoc. |
302 | if (End.isInvalid() || SM.isBeforeInTranslationUnit(LHS: End, RHS: Name)) |
303 | End = Name; |
304 | } |
305 | if (Initializer.isValid()) { |
306 | auto InitializerEnd = Initializer.getEnd(); |
307 | assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || |
308 | End == InitializerEnd); |
309 | End = InitializerEnd; |
310 | } |
311 | return SourceRange(Start, End); |
312 | } |
313 | |
314 | namespace { |
315 | /// All AST hierarchy roots that can be represented as pointers. |
316 | using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; |
317 | /// Maintains a mapping from AST to syntax tree nodes. This class will get more |
318 | /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. |
319 | /// FIXME: expose this as public API. |
320 | class ASTToSyntaxMapping { |
321 | public: |
322 | void add(ASTPtr From, syntax::Tree *To) { |
323 | assert(To != nullptr); |
324 | assert(!From.isNull()); |
325 | |
326 | bool Added = Nodes.insert(KV: {From, To}).second; |
327 | (void)Added; |
328 | assert(Added && "mapping added twice" ); |
329 | } |
330 | |
331 | void add(NestedNameSpecifierLoc From, syntax::Tree *To) { |
332 | assert(To != nullptr); |
333 | assert(From.hasQualifier()); |
334 | |
335 | bool Added = NNSNodes.insert(KV: {From, To}).second; |
336 | (void)Added; |
337 | assert(Added && "mapping added twice" ); |
338 | } |
339 | |
340 | syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(Val: P); } |
341 | |
342 | syntax::Tree *find(NestedNameSpecifierLoc P) const { |
343 | return NNSNodes.lookup(Val: P); |
344 | } |
345 | |
346 | private: |
347 | llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; |
348 | llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes; |
349 | }; |
350 | } // namespace |
351 | |
352 | /// A helper class for constructing the syntax tree while traversing a clang |
353 | /// AST. |
354 | /// |
355 | /// At each point of the traversal we maintain a list of pending nodes. |
356 | /// Initially all tokens are added as pending nodes. When processing a clang AST |
357 | /// node, the clients need to: |
358 | /// - create a corresponding syntax node, |
359 | /// - assign roles to all pending child nodes with 'markChild' and |
360 | /// 'markChildToken', |
361 | /// - replace the child nodes with the new syntax node in the pending list |
362 | /// with 'foldNode'. |
363 | /// |
364 | /// Note that all children are expected to be processed when building a node. |
365 | /// |
366 | /// Call finalize() to finish building the tree and consume the root node. |
367 | class syntax::TreeBuilder { |
368 | public: |
369 | TreeBuilder(syntax::Arena &Arena, TokenBufferTokenManager& TBTM) |
370 | : Arena(Arena), |
371 | TBTM(TBTM), |
372 | Pending(Arena, TBTM.tokenBuffer()) { |
373 | for (const auto &T : TBTM.tokenBuffer().expandedTokens()) |
374 | LocationToToken.insert(KV: {T.location(), &T}); |
375 | } |
376 | |
377 | llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } |
378 | const SourceManager &sourceManager() const { |
379 | return TBTM.sourceManager(); |
380 | } |
381 | |
382 | /// Populate children for \p New node, assuming it covers tokens from \p |
383 | /// Range. |
384 | void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { |
385 | assert(New); |
386 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: Range, Node: New); |
387 | if (From) |
388 | Mapping.add(From, To: New); |
389 | } |
390 | |
391 | void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { |
392 | // FIXME: add mapping for TypeLocs |
393 | foldNode(Range, New, From: nullptr); |
394 | } |
395 | |
396 | void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, |
397 | NestedNameSpecifierLoc From) { |
398 | assert(New); |
399 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: Range, Node: New); |
400 | if (From) |
401 | Mapping.add(From, To: New); |
402 | } |
403 | |
404 | /// Populate children for \p New list, assuming it covers tokens from a |
405 | /// subrange of \p SuperRange. |
406 | void foldList(ArrayRef<syntax::Token> SuperRange, syntax::List *New, |
407 | ASTPtr From) { |
408 | assert(New); |
409 | auto ListRange = Pending.shrinkToFitList(Range: SuperRange); |
410 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: ListRange, Node: New); |
411 | if (From) |
412 | Mapping.add(From, To: New); |
413 | } |
414 | |
415 | /// Notifies that we should not consume trailing semicolon when computing |
416 | /// token range of \p D. |
417 | void noticeDeclWithoutSemicolon(Decl *D); |
418 | |
419 | /// Mark the \p Child node with a corresponding \p Role. All marked children |
420 | /// should be consumed by foldNode. |
421 | /// When called on expressions (clang::Expr is derived from clang::Stmt), |
422 | /// wraps expressions into expression statement. |
423 | void markStmtChild(Stmt *Child, NodeRole Role); |
424 | /// Should be called for expressions in non-statement position to avoid |
425 | /// wrapping into expression statement. |
426 | void markExprChild(Expr *Child, NodeRole Role); |
427 | /// Set role for a token starting at \p Loc. |
428 | void markChildToken(SourceLocation Loc, NodeRole R); |
429 | /// Set role for \p T. |
430 | void markChildToken(const syntax::Token *T, NodeRole R); |
431 | |
432 | /// Set role for \p N. |
433 | void markChild(syntax::Node *N, NodeRole R); |
434 | /// Set role for the syntax node matching \p N. |
435 | void markChild(ASTPtr N, NodeRole R); |
436 | /// Set role for the syntax node matching \p N. |
437 | void markChild(NestedNameSpecifierLoc N, NodeRole R); |
438 | |
439 | /// Finish building the tree and consume the root node. |
440 | syntax::TranslationUnit *finalize() && { |
441 | auto Tokens = TBTM.tokenBuffer().expandedTokens(); |
442 | assert(!Tokens.empty()); |
443 | assert(Tokens.back().kind() == tok::eof); |
444 | |
445 | // Build the root of the tree, consuming all the children. |
446 | Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(), |
447 | new (Arena.getAllocator()) syntax::TranslationUnit); |
448 | |
449 | auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); |
450 | TU->assertInvariantsRecursive(); |
451 | return TU; |
452 | } |
453 | |
454 | /// Finds a token starting at \p L. The token must exist if \p L is valid. |
455 | const syntax::Token *findToken(SourceLocation L) const; |
456 | |
457 | /// Finds the syntax tokens corresponding to the \p SourceRange. |
458 | ArrayRef<syntax::Token> getRange(SourceRange Range) const { |
459 | assert(Range.isValid()); |
460 | return getRange(First: Range.getBegin(), Last: Range.getEnd()); |
461 | } |
462 | |
463 | /// Finds the syntax tokens corresponding to the passed source locations. |
464 | /// \p First is the start position of the first token and \p Last is the start |
465 | /// position of the last token. |
466 | ArrayRef<syntax::Token> getRange(SourceLocation First, |
467 | SourceLocation Last) const { |
468 | assert(First.isValid()); |
469 | assert(Last.isValid()); |
470 | assert(First == Last || |
471 | TBTM.sourceManager().isBeforeInTranslationUnit(First, Last)); |
472 | return llvm::ArrayRef(findToken(L: First), std::next(x: findToken(L: Last))); |
473 | } |
474 | |
475 | ArrayRef<syntax::Token> |
476 | getTemplateRange(const ClassTemplateSpecializationDecl *D) const { |
477 | auto Tokens = getRange(Range: D->getSourceRange()); |
478 | return maybeAppendSemicolon(Tokens, D); |
479 | } |
480 | |
481 | /// Returns true if \p D is the last declarator in a chain and is thus |
482 | /// reponsible for creating SimpleDeclaration for the whole chain. |
483 | bool isResponsibleForCreatingDeclaration(const Decl *D) const { |
484 | assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && |
485 | "only DeclaratorDecl and TypedefNameDecl are supported." ); |
486 | |
487 | const Decl *Next = D->getNextDeclInContext(); |
488 | |
489 | // There's no next sibling, this one is responsible. |
490 | if (Next == nullptr) { |
491 | return true; |
492 | } |
493 | |
494 | // Next sibling is not the same type, this one is responsible. |
495 | if (D->getKind() != Next->getKind()) { |
496 | return true; |
497 | } |
498 | // Next sibling doesn't begin at the same loc, it must be a different |
499 | // declaration, so this declarator is responsible. |
500 | if (Next->getBeginLoc() != D->getBeginLoc()) { |
501 | return true; |
502 | } |
503 | |
504 | // NextT is a member of the same declaration, and we need the last member to |
505 | // create declaration. This one is not responsible. |
506 | return false; |
507 | } |
508 | |
509 | ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { |
510 | ArrayRef<syntax::Token> Tokens; |
511 | // We want to drop the template parameters for specializations. |
512 | if (const auto *S = dyn_cast<TagDecl>(Val: D)) |
513 | Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); |
514 | else |
515 | Tokens = getRange(Range: D->getSourceRange()); |
516 | return maybeAppendSemicolon(Tokens, D); |
517 | } |
518 | |
519 | ArrayRef<syntax::Token> getExprRange(const Expr *E) const { |
520 | return getRange(E->getSourceRange()); |
521 | } |
522 | |
523 | /// Find the adjusted range for the statement, consuming the trailing |
524 | /// semicolon when needed. |
525 | ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { |
526 | auto Tokens = getRange(Range: S->getSourceRange()); |
527 | if (isa<CompoundStmt>(Val: S)) |
528 | return Tokens; |
529 | |
530 | // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and |
531 | // all statements that end with those. Consume this semicolon here. |
532 | if (Tokens.back().kind() == tok::semi) |
533 | return Tokens; |
534 | return withTrailingSemicolon(Tokens); |
535 | } |
536 | |
537 | private: |
538 | ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, |
539 | const Decl *D) const { |
540 | if (isa<NamespaceDecl>(Val: D)) |
541 | return Tokens; |
542 | if (DeclsWithoutSemicolons.count(V: D)) |
543 | return Tokens; |
544 | // FIXME: do not consume trailing semicolon on function definitions. |
545 | // Most declarations own a semicolon in syntax trees, but not in clang AST. |
546 | return withTrailingSemicolon(Tokens); |
547 | } |
548 | |
549 | ArrayRef<syntax::Token> |
550 | withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { |
551 | assert(!Tokens.empty()); |
552 | assert(Tokens.back().kind() != tok::eof); |
553 | // We never consume 'eof', so looking at the next token is ok. |
554 | if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) |
555 | return llvm::ArrayRef(Tokens.begin(), Tokens.end() + 1); |
556 | return Tokens; |
557 | } |
558 | |
559 | void setRole(syntax::Node *N, NodeRole R) { |
560 | assert(N->getRole() == NodeRole::Detached); |
561 | N->setRole(R); |
562 | } |
563 | |
564 | /// A collection of trees covering the input tokens. |
565 | /// When created, each tree corresponds to a single token in the file. |
566 | /// Clients call 'foldChildren' to attach one or more subtrees to a parent |
567 | /// node and update the list of trees accordingly. |
568 | /// |
569 | /// Ensures that added nodes properly nest and cover the whole token stream. |
570 | struct Forest { |
571 | Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) { |
572 | assert(!TB.expandedTokens().empty()); |
573 | assert(TB.expandedTokens().back().kind() == tok::eof); |
574 | // Create all leaf nodes. |
575 | // Note that we do not have 'eof' in the tree. |
576 | for (const auto &T : TB.expandedTokens().drop_back()) { |
577 | auto *L = new (A.getAllocator()) |
578 | syntax::Leaf(reinterpret_cast<TokenManager::Key>(&T)); |
579 | L->Original = true; |
580 | L->CanModify = TB.spelledForExpanded(Expanded: T).has_value(); |
581 | Trees.insert(position: Trees.end(), x: {&T, L}); |
582 | } |
583 | } |
584 | |
585 | void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { |
586 | assert(!Range.empty()); |
587 | auto It = Trees.lower_bound(x: Range.begin()); |
588 | assert(It != Trees.end() && "no node found" ); |
589 | assert(It->first == Range.begin() && "no child with the specified range" ); |
590 | assert((std::next(It) == Trees.end() || |
591 | std::next(It)->first == Range.end()) && |
592 | "no child with the specified range" ); |
593 | assert(It->second->getRole() == NodeRole::Detached && |
594 | "re-assigning role for a child" ); |
595 | It->second->setRole(Role); |
596 | } |
597 | |
598 | /// Shrink \p Range to a subrange that only contains tokens of a list. |
599 | /// List elements and delimiters should already have correct roles. |
600 | ArrayRef<syntax::Token> shrinkToFitList(ArrayRef<syntax::Token> Range) { |
601 | auto BeginChildren = Trees.lower_bound(x: Range.begin()); |
602 | assert((BeginChildren == Trees.end() || |
603 | BeginChildren->first == Range.begin()) && |
604 | "Range crosses boundaries of existing subtrees" ); |
605 | |
606 | auto EndChildren = Trees.lower_bound(x: Range.end()); |
607 | assert( |
608 | (EndChildren == Trees.end() || EndChildren->first == Range.end()) && |
609 | "Range crosses boundaries of existing subtrees" ); |
610 | |
611 | auto BelongsToList = [](decltype(Trees)::value_type KV) { |
612 | auto Role = KV.second->getRole(); |
613 | return Role == syntax::NodeRole::ListElement || |
614 | Role == syntax::NodeRole::ListDelimiter; |
615 | }; |
616 | |
617 | auto BeginListChildren = |
618 | std::find_if(first: BeginChildren, last: EndChildren, pred: BelongsToList); |
619 | |
620 | auto EndListChildren = |
621 | std::find_if_not(first: BeginListChildren, last: EndChildren, pred: BelongsToList); |
622 | |
623 | return ArrayRef<syntax::Token>(BeginListChildren->first, |
624 | EndListChildren->first); |
625 | } |
626 | |
627 | /// Add \p Node to the forest and attach child nodes based on \p Tokens. |
628 | void foldChildren(const syntax::TokenBuffer &TB, |
629 | ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) { |
630 | // Attach children to `Node`. |
631 | assert(Node->getFirstChild() == nullptr && "node already has children" ); |
632 | |
633 | auto *FirstToken = Tokens.begin(); |
634 | auto BeginChildren = Trees.lower_bound(x: FirstToken); |
635 | |
636 | assert((BeginChildren == Trees.end() || |
637 | BeginChildren->first == FirstToken) && |
638 | "fold crosses boundaries of existing subtrees" ); |
639 | auto EndChildren = Trees.lower_bound(x: Tokens.end()); |
640 | assert( |
641 | (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && |
642 | "fold crosses boundaries of existing subtrees" ); |
643 | |
644 | for (auto It = BeginChildren; It != EndChildren; ++It) { |
645 | auto *C = It->second; |
646 | if (C->getRole() == NodeRole::Detached) |
647 | C->setRole(NodeRole::Unknown); |
648 | Node->appendChildLowLevel(Child: C); |
649 | } |
650 | |
651 | // Mark that this node came from the AST and is backed by the source code. |
652 | Node->Original = true; |
653 | Node->CanModify = |
654 | TB.spelledForExpanded(Expanded: Tokens).has_value(); |
655 | |
656 | Trees.erase(first: BeginChildren, last: EndChildren); |
657 | Trees.insert(x: {FirstToken, Node}); |
658 | } |
659 | |
660 | // EXPECTS: all tokens were consumed and are owned by a single root node. |
661 | syntax::Node *finalize() && { |
662 | assert(Trees.size() == 1); |
663 | auto *Root = Trees.begin()->second; |
664 | Trees = {}; |
665 | return Root; |
666 | } |
667 | |
668 | std::string str(const syntax::TokenBufferTokenManager &STM) const { |
669 | std::string R; |
670 | for (auto It = Trees.begin(); It != Trees.end(); ++It) { |
671 | unsigned CoveredTokens = |
672 | It != Trees.end() |
673 | ? (std::next(x: It)->first - It->first) |
674 | : STM.tokenBuffer().expandedTokens().end() - It->first; |
675 | |
676 | R += std::string( |
677 | formatv(Fmt: "- '{0}' covers '{1}'+{2} tokens\n" , Vals: It->second->getKind(), |
678 | Vals: It->first->text(SM: STM.sourceManager()), Vals&: CoveredTokens)); |
679 | R += It->second->dump(SM: STM); |
680 | } |
681 | return R; |
682 | } |
683 | |
684 | private: |
685 | /// Maps from the start token to a subtree starting at that token. |
686 | /// Keys in the map are pointers into the array of expanded tokens, so |
687 | /// pointer order corresponds to the order of preprocessor tokens. |
688 | std::map<const syntax::Token *, syntax::Node *> Trees; |
689 | }; |
690 | |
691 | /// For debugging purposes. |
692 | std::string str() { return Pending.str(STM: TBTM); } |
693 | |
694 | syntax::Arena &Arena; |
695 | TokenBufferTokenManager& TBTM; |
696 | /// To quickly find tokens by their start location. |
697 | llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken; |
698 | Forest Pending; |
699 | llvm::DenseSet<Decl *> DeclsWithoutSemicolons; |
700 | ASTToSyntaxMapping Mapping; |
701 | }; |
702 | |
703 | namespace { |
704 | class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { |
705 | public: |
706 | explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) |
707 | : Builder(Builder), Context(Context) {} |
708 | |
709 | bool shouldTraversePostOrder() const { return true; } |
710 | |
711 | bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { |
712 | return processDeclaratorAndDeclaration(D: DD); |
713 | } |
714 | |
715 | bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { |
716 | return processDeclaratorAndDeclaration(D: TD); |
717 | } |
718 | |
719 | bool VisitDecl(Decl *D) { |
720 | assert(!D->isImplicit()); |
721 | Builder.foldNode(Range: Builder.getDeclarationRange(D), |
722 | New: new (allocator()) syntax::UnknownDeclaration(), From: D); |
723 | return true; |
724 | } |
725 | |
726 | // RAV does not call WalkUpFrom* on explicit instantiations, so we have to |
727 | // override Traverse. |
728 | // FIXME: make RAV call WalkUpFrom* instead. |
729 | bool |
730 | TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { |
731 | if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) |
732 | return false; |
733 | if (C->isExplicitSpecialization()) |
734 | return true; // we are only interested in explicit instantiations. |
735 | auto *Declaration = |
736 | cast<syntax::SimpleDeclaration>(Val: handleFreeStandingTagDecl(C)); |
737 | foldExplicitTemplateInstantiation( |
738 | Range: Builder.getTemplateRange(D: C), ExternKW: Builder.findToken(L: C->getExternLoc()), |
739 | TemplateKW: Builder.findToken(L: C->getTemplateKeywordLoc()), InnerDeclaration: Declaration, From: C); |
740 | return true; |
741 | } |
742 | |
743 | bool WalkUpFromTemplateDecl(TemplateDecl *S) { |
744 | foldTemplateDeclaration( |
745 | Builder.getDeclarationRange(S), |
746 | Builder.findToken(L: S->getTemplateParameters()->getTemplateLoc()), |
747 | Builder.getDeclarationRange(S->getTemplatedDecl()), S); |
748 | return true; |
749 | } |
750 | |
751 | bool WalkUpFromTagDecl(TagDecl *C) { |
752 | // FIXME: build the ClassSpecifier node. |
753 | if (!C->isFreeStanding()) { |
754 | assert(C->getNumTemplateParameterLists() == 0); |
755 | return true; |
756 | } |
757 | handleFreeStandingTagDecl(C); |
758 | return true; |
759 | } |
760 | |
761 | syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { |
762 | assert(C->isFreeStanding()); |
763 | // Class is a declaration specifier and needs a spanning declaration node. |
764 | auto DeclarationRange = Builder.getDeclarationRange(C); |
765 | syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; |
766 | Builder.foldNode(DeclarationRange, Result, nullptr); |
767 | |
768 | // Build TemplateDeclaration nodes if we had template parameters. |
769 | auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { |
770 | const auto *TemplateKW = Builder.findToken(L: L.getTemplateLoc()); |
771 | auto R = llvm::ArrayRef(TemplateKW, DeclarationRange.end()); |
772 | Result = |
773 | foldTemplateDeclaration(Range: R, TemplateKW, TemplatedDeclaration: DeclarationRange, From: nullptr); |
774 | DeclarationRange = R; |
775 | }; |
776 | if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(Val: C)) |
777 | ConsumeTemplateParameters(*S->getTemplateParameters()); |
778 | for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) |
779 | ConsumeTemplateParameters(*C->getTemplateParameterList(i: I - 1)); |
780 | return Result; |
781 | } |
782 | |
783 | bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { |
784 | // We do not want to call VisitDecl(), the declaration for translation |
785 | // unit is built by finalize(). |
786 | return true; |
787 | } |
788 | |
789 | bool WalkUpFromCompoundStmt(CompoundStmt *S) { |
790 | using NodeRole = syntax::NodeRole; |
791 | |
792 | Builder.markChildToken(Loc: S->getLBracLoc(), R: NodeRole::OpenParen); |
793 | for (auto *Child : S->body()) |
794 | Builder.markStmtChild(Child, Role: NodeRole::Statement); |
795 | Builder.markChildToken(Loc: S->getRBracLoc(), R: NodeRole::CloseParen); |
796 | |
797 | Builder.foldNode(Builder.getStmtRange(S), |
798 | new (allocator()) syntax::CompoundStatement, S); |
799 | return true; |
800 | } |
801 | |
802 | // Some statements are not yet handled by syntax trees. |
803 | bool WalkUpFromStmt(Stmt *S) { |
804 | Builder.foldNode(Range: Builder.getStmtRange(S), |
805 | New: new (allocator()) syntax::UnknownStatement, From: S); |
806 | return true; |
807 | } |
808 | |
809 | bool TraverseIfStmt(IfStmt *S) { |
810 | bool Result = [&, this]() { |
811 | if (S->getInit() && !TraverseStmt(S: S->getInit())) { |
812 | return false; |
813 | } |
814 | // In cases where the condition is an initialized declaration in a |
815 | // statement, we want to preserve the declaration and ignore the |
816 | // implicit condition expression in the syntax tree. |
817 | if (S->hasVarStorage()) { |
818 | if (!TraverseStmt(S: S->getConditionVariableDeclStmt())) |
819 | return false; |
820 | } else if (S->getCond() && !TraverseStmt(S->getCond())) |
821 | return false; |
822 | |
823 | if (S->getThen() && !TraverseStmt(S: S->getThen())) |
824 | return false; |
825 | if (S->getElse() && !TraverseStmt(S: S->getElse())) |
826 | return false; |
827 | return true; |
828 | }(); |
829 | WalkUpFromIfStmt(S); |
830 | return Result; |
831 | } |
832 | |
833 | bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { |
834 | // We override to traverse range initializer as VarDecl. |
835 | // RAV traverses it as a statement, we produce invalid node kinds in that |
836 | // case. |
837 | // FIXME: should do this in RAV instead? |
838 | bool Result = [&, this]() { |
839 | if (S->getInit() && !TraverseStmt(S: S->getInit())) |
840 | return false; |
841 | if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) |
842 | return false; |
843 | if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) |
844 | return false; |
845 | if (S->getBody() && !TraverseStmt(S: S->getBody())) |
846 | return false; |
847 | return true; |
848 | }(); |
849 | WalkUpFromCXXForRangeStmt(S); |
850 | return Result; |
851 | } |
852 | |
853 | bool TraverseStmt(Stmt *S) { |
854 | if (auto *DS = dyn_cast_or_null<DeclStmt>(Val: S)) { |
855 | // We want to consume the semicolon, make sure SimpleDeclaration does not. |
856 | for (auto *D : DS->decls()) |
857 | Builder.noticeDeclWithoutSemicolon(D); |
858 | } else if (auto *E = dyn_cast_or_null<Expr>(Val: S)) { |
859 | return RecursiveASTVisitor::TraverseStmt(IgnoreImplicit(E)); |
860 | } |
861 | return RecursiveASTVisitor::TraverseStmt(S); |
862 | } |
863 | |
864 | bool TraverseOpaqueValueExpr(OpaqueValueExpr *VE) { |
865 | // OpaqueValue doesn't correspond to concrete syntax, ignore it. |
866 | return true; |
867 | } |
868 | |
869 | // Some expressions are not yet handled by syntax trees. |
870 | bool WalkUpFromExpr(Expr *E) { |
871 | assert(!isImplicitExpr(E) && "should be handled by TraverseStmt" ); |
872 | Builder.foldNode(Builder.getExprRange(E), |
873 | new (allocator()) syntax::UnknownExpression, E); |
874 | return true; |
875 | } |
876 | |
877 | bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { |
878 | // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node |
879 | // referencing the location of the UDL suffix (`_w` in `1.2_w`). The |
880 | // UDL suffix location does not point to the beginning of a token, so we |
881 | // can't represent the UDL suffix as a separate syntax tree node. |
882 | |
883 | return WalkUpFromUserDefinedLiteral(S); |
884 | } |
885 | |
886 | syntax::UserDefinedLiteralExpression * |
887 | buildUserDefinedLiteral(UserDefinedLiteral *S) { |
888 | switch (S->getLiteralOperatorKind()) { |
889 | case UserDefinedLiteral::LOK_Integer: |
890 | return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; |
891 | case UserDefinedLiteral::LOK_Floating: |
892 | return new (allocator()) syntax::FloatUserDefinedLiteralExpression; |
893 | case UserDefinedLiteral::LOK_Character: |
894 | return new (allocator()) syntax::CharUserDefinedLiteralExpression; |
895 | case UserDefinedLiteral::LOK_String: |
896 | return new (allocator()) syntax::StringUserDefinedLiteralExpression; |
897 | case UserDefinedLiteral::LOK_Raw: |
898 | case UserDefinedLiteral::LOK_Template: |
899 | // For raw literal operator and numeric literal operator template we |
900 | // cannot get the type of the operand in the semantic AST. We get this |
901 | // information from the token. As integer and floating point have the same |
902 | // token kind, we run `NumericLiteralParser` again to distinguish them. |
903 | auto TokLoc = S->getBeginLoc(); |
904 | auto TokSpelling = |
905 | Builder.findToken(L: TokLoc)->text(SM: Context.getSourceManager()); |
906 | auto Literal = |
907 | NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), |
908 | Context.getLangOpts(), Context.getTargetInfo(), |
909 | Context.getDiagnostics()); |
910 | if (Literal.isIntegerLiteral()) |
911 | return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; |
912 | else { |
913 | assert(Literal.isFloatingLiteral()); |
914 | return new (allocator()) syntax::FloatUserDefinedLiteralExpression; |
915 | } |
916 | } |
917 | llvm_unreachable("Unknown literal operator kind." ); |
918 | } |
919 | |
920 | bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { |
921 | Builder.markChildToken(Loc: S->getBeginLoc(), R: syntax::NodeRole::LiteralToken); |
922 | Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); |
923 | return true; |
924 | } |
925 | |
926 | // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the |
927 | // `DependentTemplateSpecializationType` case. |
928 | /// Given a nested-name-specifier return the range for the last name |
929 | /// specifier. |
930 | /// |
931 | /// e.g. `std::T::template X<U>::` => `template X<U>::` |
932 | SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) { |
933 | auto SR = NNSLoc.getLocalSourceRange(); |
934 | |
935 | // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should* |
936 | // return the desired `SourceRange`, but there is a corner case. For a |
937 | // `DependentTemplateSpecializationType` this method returns its |
938 | // qualifiers as well, in other words in the example above this method |
939 | // returns `T::template X<U>::` instead of only `template X<U>::` |
940 | if (auto TL = NNSLoc.getTypeLoc()) { |
941 | if (auto DependentTL = |
942 | TL.getAs<DependentTemplateSpecializationTypeLoc>()) { |
943 | // The 'template' keyword is always present in dependent template |
944 | // specializations. Except in the case of incorrect code |
945 | // TODO: Treat the case of incorrect code. |
946 | SR.setBegin(DependentTL.getTemplateKeywordLoc()); |
947 | } |
948 | } |
949 | |
950 | return SR; |
951 | } |
952 | |
953 | syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) { |
954 | switch (NNS.getKind()) { |
955 | case NestedNameSpecifier::Global: |
956 | return syntax::NodeKind::GlobalNameSpecifier; |
957 | case NestedNameSpecifier::Namespace: |
958 | case NestedNameSpecifier::NamespaceAlias: |
959 | case NestedNameSpecifier::Identifier: |
960 | return syntax::NodeKind::IdentifierNameSpecifier; |
961 | case NestedNameSpecifier::TypeSpecWithTemplate: |
962 | return syntax::NodeKind::SimpleTemplateNameSpecifier; |
963 | case NestedNameSpecifier::TypeSpec: { |
964 | const auto *NNSType = NNS.getAsType(); |
965 | assert(NNSType); |
966 | if (isa<DecltypeType>(NNSType)) |
967 | return syntax::NodeKind::DecltypeNameSpecifier; |
968 | if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>( |
969 | NNSType)) |
970 | return syntax::NodeKind::SimpleTemplateNameSpecifier; |
971 | return syntax::NodeKind::IdentifierNameSpecifier; |
972 | } |
973 | default: |
974 | // FIXME: Support Microsoft's __super |
975 | llvm::report_fatal_error(reason: "We don't yet support the __super specifier" , |
976 | gen_crash_diag: true); |
977 | } |
978 | } |
979 | |
980 | syntax::NameSpecifier * |
981 | buildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) { |
982 | assert(NNSLoc.hasQualifier()); |
983 | auto NameSpecifierTokens = |
984 | Builder.getRange(Range: getLocalSourceRange(NNSLoc)).drop_back(); |
985 | switch (getNameSpecifierKind(NNS: *NNSLoc.getNestedNameSpecifier())) { |
986 | case syntax::NodeKind::GlobalNameSpecifier: |
987 | return new (allocator()) syntax::GlobalNameSpecifier; |
988 | case syntax::NodeKind::IdentifierNameSpecifier: { |
989 | assert(NameSpecifierTokens.size() == 1); |
990 | Builder.markChildToken(T: NameSpecifierTokens.begin(), |
991 | R: syntax::NodeRole::Unknown); |
992 | auto *NS = new (allocator()) syntax::IdentifierNameSpecifier; |
993 | Builder.foldNode(NameSpecifierTokens, NS, nullptr); |
994 | return NS; |
995 | } |
996 | case syntax::NodeKind::SimpleTemplateNameSpecifier: { |
997 | // TODO: Build `SimpleTemplateNameSpecifier` children and implement |
998 | // accessors to them. |
999 | // Be aware, we cannot do that simply by calling `TraverseTypeLoc`, |
1000 | // some `TypeLoc`s have inside them the previous name specifier and |
1001 | // we want to treat them independently. |
1002 | auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier; |
1003 | Builder.foldNode(NameSpecifierTokens, NS, nullptr); |
1004 | return NS; |
1005 | } |
1006 | case syntax::NodeKind::DecltypeNameSpecifier: { |
1007 | const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>(); |
1008 | if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL)) |
1009 | return nullptr; |
1010 | auto *NS = new (allocator()) syntax::DecltypeNameSpecifier; |
1011 | // TODO: Implement accessor to `DecltypeNameSpecifier` inner |
1012 | // `DecltypeTypeLoc`. |
1013 | // For that add mapping from `TypeLoc` to `syntax::Node*` then: |
1014 | // Builder.markChild(TypeLoc, syntax::NodeRole); |
1015 | Builder.foldNode(NameSpecifierTokens, NS, nullptr); |
1016 | return NS; |
1017 | } |
1018 | default: |
1019 | llvm_unreachable("getChildKind() does not return this value" ); |
1020 | } |
1021 | } |
1022 | |
1023 | // To build syntax tree nodes for NestedNameSpecifierLoc we override |
1024 | // Traverse instead of WalkUpFrom because we want to traverse the children |
1025 | // ourselves and build a list instead of a nested tree of name specifier |
1026 | // prefixes. |
1027 | bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) { |
1028 | if (!QualifierLoc) |
1029 | return true; |
1030 | for (auto It = QualifierLoc; It; It = It.getPrefix()) { |
1031 | auto *NS = buildNameSpecifier(It); |
1032 | if (!NS) |
1033 | return false; |
1034 | Builder.markChild(NS, syntax::NodeRole::ListElement); |
1035 | Builder.markChildToken(Loc: It.getEndLoc(), R: syntax::NodeRole::ListDelimiter); |
1036 | } |
1037 | Builder.foldNode(Range: Builder.getRange(Range: QualifierLoc.getSourceRange()), |
1038 | New: new (allocator()) syntax::NestedNameSpecifier, |
1039 | From: QualifierLoc); |
1040 | return true; |
1041 | } |
1042 | |
1043 | syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc, |
1044 | SourceLocation TemplateKeywordLoc, |
1045 | SourceRange UnqualifiedIdLoc, |
1046 | ASTPtr From) { |
1047 | if (QualifierLoc) { |
1048 | Builder.markChild(N: QualifierLoc, R: syntax::NodeRole::Qualifier); |
1049 | if (TemplateKeywordLoc.isValid()) |
1050 | Builder.markChildToken(Loc: TemplateKeywordLoc, |
1051 | R: syntax::NodeRole::TemplateKeyword); |
1052 | } |
1053 | |
1054 | auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId; |
1055 | Builder.foldNode(Range: Builder.getRange(Range: UnqualifiedIdLoc), New: TheUnqualifiedId, |
1056 | From: nullptr); |
1057 | Builder.markChild(N: TheUnqualifiedId, R: syntax::NodeRole::UnqualifiedId); |
1058 | |
1059 | auto IdExpressionBeginLoc = |
1060 | QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin(); |
1061 | |
1062 | auto *TheIdExpression = new (allocator()) syntax::IdExpression; |
1063 | Builder.foldNode( |
1064 | Builder.getRange(First: IdExpressionBeginLoc, Last: UnqualifiedIdLoc.getEnd()), |
1065 | TheIdExpression, From); |
1066 | |
1067 | return TheIdExpression; |
1068 | } |
1069 | |
1070 | bool WalkUpFromMemberExpr(MemberExpr *S) { |
1071 | // For `MemberExpr` with implicit `this->` we generate a simple |
1072 | // `id-expression` syntax node, beacuse an implicit `member-expression` is |
1073 | // syntactically undistinguishable from an `id-expression` |
1074 | if (S->isImplicitAccess()) { |
1075 | buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), |
1076 | SourceRange(S->getMemberLoc(), S->getEndLoc()), S); |
1077 | return true; |
1078 | } |
1079 | |
1080 | auto *TheIdExpression = buildIdExpression( |
1081 | S->getQualifierLoc(), S->getTemplateKeywordLoc(), |
1082 | SourceRange(S->getMemberLoc(), S->getEndLoc()), nullptr); |
1083 | |
1084 | Builder.markChild(TheIdExpression, syntax::NodeRole::Member); |
1085 | |
1086 | Builder.markExprChild(Child: S->getBase(), Role: syntax::NodeRole::Object); |
1087 | Builder.markChildToken(Loc: S->getOperatorLoc(), R: syntax::NodeRole::AccessToken); |
1088 | |
1089 | Builder.foldNode(Builder.getExprRange(S), |
1090 | new (allocator()) syntax::MemberExpression, S); |
1091 | return true; |
1092 | } |
1093 | |
1094 | bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { |
1095 | buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), |
1096 | SourceRange(S->getLocation(), S->getEndLoc()), S); |
1097 | |
1098 | return true; |
1099 | } |
1100 | |
1101 | // Same logic as DeclRefExpr. |
1102 | bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) { |
1103 | buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), |
1104 | SourceRange(S->getLocation(), S->getEndLoc()), S); |
1105 | |
1106 | return true; |
1107 | } |
1108 | |
1109 | bool WalkUpFromCXXThisExpr(CXXThisExpr *S) { |
1110 | if (!S->isImplicit()) { |
1111 | Builder.markChildToken(Loc: S->getLocation(), |
1112 | R: syntax::NodeRole::IntroducerKeyword); |
1113 | Builder.foldNode(Builder.getExprRange(S), |
1114 | new (allocator()) syntax::ThisExpression, S); |
1115 | } |
1116 | return true; |
1117 | } |
1118 | |
1119 | bool WalkUpFromParenExpr(ParenExpr *S) { |
1120 | Builder.markChildToken(Loc: S->getLParen(), R: syntax::NodeRole::OpenParen); |
1121 | Builder.markExprChild(Child: S->getSubExpr(), Role: syntax::NodeRole::SubExpression); |
1122 | Builder.markChildToken(Loc: S->getRParen(), R: syntax::NodeRole::CloseParen); |
1123 | Builder.foldNode(Builder.getExprRange(S), |
1124 | new (allocator()) syntax::ParenExpression, S); |
1125 | return true; |
1126 | } |
1127 | |
1128 | bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { |
1129 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1130 | Builder.foldNode(Builder.getExprRange(S), |
1131 | new (allocator()) syntax::IntegerLiteralExpression, S); |
1132 | return true; |
1133 | } |
1134 | |
1135 | bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { |
1136 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1137 | Builder.foldNode(Builder.getExprRange(S), |
1138 | new (allocator()) syntax::CharacterLiteralExpression, S); |
1139 | return true; |
1140 | } |
1141 | |
1142 | bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { |
1143 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1144 | Builder.foldNode(Builder.getExprRange(S), |
1145 | new (allocator()) syntax::FloatingLiteralExpression, S); |
1146 | return true; |
1147 | } |
1148 | |
1149 | bool WalkUpFromStringLiteral(StringLiteral *S) { |
1150 | Builder.markChildToken(Loc: S->getBeginLoc(), R: syntax::NodeRole::LiteralToken); |
1151 | Builder.foldNode(Builder.getExprRange(S), |
1152 | new (allocator()) syntax::StringLiteralExpression, S); |
1153 | return true; |
1154 | } |
1155 | |
1156 | bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { |
1157 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1158 | Builder.foldNode(Builder.getExprRange(S), |
1159 | new (allocator()) syntax::BoolLiteralExpression, S); |
1160 | return true; |
1161 | } |
1162 | |
1163 | bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { |
1164 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1165 | Builder.foldNode(Builder.getExprRange(S), |
1166 | new (allocator()) syntax::CxxNullPtrExpression, S); |
1167 | return true; |
1168 | } |
1169 | |
1170 | bool WalkUpFromUnaryOperator(UnaryOperator *S) { |
1171 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1172 | R: syntax::NodeRole::OperatorToken); |
1173 | Builder.markExprChild(Child: S->getSubExpr(), Role: syntax::NodeRole::Operand); |
1174 | |
1175 | if (S->isPostfix()) |
1176 | Builder.foldNode(Builder.getExprRange(S), |
1177 | new (allocator()) syntax::PostfixUnaryOperatorExpression, |
1178 | S); |
1179 | else |
1180 | Builder.foldNode(Builder.getExprRange(S), |
1181 | new (allocator()) syntax::PrefixUnaryOperatorExpression, |
1182 | S); |
1183 | |
1184 | return true; |
1185 | } |
1186 | |
1187 | bool WalkUpFromBinaryOperator(BinaryOperator *S) { |
1188 | Builder.markExprChild(Child: S->getLHS(), Role: syntax::NodeRole::LeftHandSide); |
1189 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1190 | R: syntax::NodeRole::OperatorToken); |
1191 | Builder.markExprChild(Child: S->getRHS(), Role: syntax::NodeRole::RightHandSide); |
1192 | Builder.foldNode(Builder.getExprRange(S), |
1193 | new (allocator()) syntax::BinaryOperatorExpression, S); |
1194 | return true; |
1195 | } |
1196 | |
1197 | /// Builds `CallArguments` syntax node from arguments that appear in source |
1198 | /// code, i.e. not default arguments. |
1199 | syntax::CallArguments * |
1200 | buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) { |
1201 | auto Args = dropDefaultArgs(Args: ArgsAndDefaultArgs); |
1202 | for (auto *Arg : Args) { |
1203 | Builder.markExprChild(Arg, syntax::NodeRole::ListElement); |
1204 | const auto *DelimiterToken = |
1205 | std::next(x: Builder.findToken(L: Arg->getEndLoc())); |
1206 | if (DelimiterToken->kind() == clang::tok::TokenKind::comma) |
1207 | Builder.markChildToken(T: DelimiterToken, R: syntax::NodeRole::ListDelimiter); |
1208 | } |
1209 | |
1210 | auto *Arguments = new (allocator()) syntax::CallArguments; |
1211 | if (!Args.empty()) |
1212 | Builder.foldNode(Builder.getRange((*Args.begin())->getBeginLoc(), |
1213 | (*(Args.end() - 1))->getEndLoc()), |
1214 | Arguments, nullptr); |
1215 | |
1216 | return Arguments; |
1217 | } |
1218 | |
1219 | bool WalkUpFromCallExpr(CallExpr *S) { |
1220 | Builder.markExprChild(Child: S->getCallee(), Role: syntax::NodeRole::Callee); |
1221 | |
1222 | const auto *LParenToken = |
1223 | std::next(Builder.findToken(L: S->getCallee()->getEndLoc())); |
1224 | // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have fixed |
1225 | // the test on decltype desctructors. |
1226 | if (LParenToken->kind() == clang::tok::l_paren) |
1227 | Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); |
1228 | |
1229 | Builder.markChild(N: buildCallArguments(ArgsAndDefaultArgs: S->arguments()), |
1230 | R: syntax::NodeRole::Arguments); |
1231 | |
1232 | Builder.markChildToken(Loc: S->getRParenLoc(), R: syntax::NodeRole::CloseParen); |
1233 | |
1234 | Builder.foldNode(Builder.getRange(S->getSourceRange()), |
1235 | new (allocator()) syntax::CallExpression, S); |
1236 | return true; |
1237 | } |
1238 | |
1239 | bool WalkUpFromCXXConstructExpr(CXXConstructExpr *S) { |
1240 | // Ignore the implicit calls to default constructors. |
1241 | if ((S->getNumArgs() == 0 || isa<CXXDefaultArgExpr>(Val: S->getArg(Arg: 0))) && |
1242 | S->getParenOrBraceRange().isInvalid()) |
1243 | return true; |
1244 | return RecursiveASTVisitor::WalkUpFromCXXConstructExpr(S); |
1245 | } |
1246 | |
1247 | bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { |
1248 | // To construct a syntax tree of the same shape for calls to built-in and |
1249 | // user-defined operators, ignore the `DeclRefExpr` that refers to the |
1250 | // operator and treat it as a simple token. Do that by traversing |
1251 | // arguments instead of children. |
1252 | for (auto *child : S->arguments()) { |
1253 | // A postfix unary operator is declared as taking two operands. The |
1254 | // second operand is used to distinguish from its prefix counterpart. In |
1255 | // the semantic AST this "phantom" operand is represented as a |
1256 | // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this |
1257 | // operand because it does not correspond to anything written in source |
1258 | // code. |
1259 | if (child->getSourceRange().isInvalid()) { |
1260 | assert(getOperatorNodeKind(*S) == |
1261 | syntax::NodeKind::PostfixUnaryOperatorExpression); |
1262 | continue; |
1263 | } |
1264 | if (!TraverseStmt(child)) |
1265 | return false; |
1266 | } |
1267 | return WalkUpFromCXXOperatorCallExpr(S); |
1268 | } |
1269 | |
1270 | bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { |
1271 | switch (getOperatorNodeKind(E: *S)) { |
1272 | case syntax::NodeKind::BinaryOperatorExpression: |
1273 | Builder.markExprChild(Child: S->getArg(0), Role: syntax::NodeRole::LeftHandSide); |
1274 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1275 | R: syntax::NodeRole::OperatorToken); |
1276 | Builder.markExprChild(Child: S->getArg(1), Role: syntax::NodeRole::RightHandSide); |
1277 | Builder.foldNode(Builder.getExprRange(S), |
1278 | new (allocator()) syntax::BinaryOperatorExpression, S); |
1279 | return true; |
1280 | case syntax::NodeKind::PrefixUnaryOperatorExpression: |
1281 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1282 | R: syntax::NodeRole::OperatorToken); |
1283 | Builder.markExprChild(Child: S->getArg(0), Role: syntax::NodeRole::Operand); |
1284 | Builder.foldNode(Builder.getExprRange(S), |
1285 | new (allocator()) syntax::PrefixUnaryOperatorExpression, |
1286 | S); |
1287 | return true; |
1288 | case syntax::NodeKind::PostfixUnaryOperatorExpression: |
1289 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1290 | R: syntax::NodeRole::OperatorToken); |
1291 | Builder.markExprChild(Child: S->getArg(0), Role: syntax::NodeRole::Operand); |
1292 | Builder.foldNode(Builder.getExprRange(S), |
1293 | new (allocator()) syntax::PostfixUnaryOperatorExpression, |
1294 | S); |
1295 | return true; |
1296 | case syntax::NodeKind::CallExpression: { |
1297 | Builder.markExprChild(Child: S->getArg(0), Role: syntax::NodeRole::Callee); |
1298 | |
1299 | const auto *LParenToken = |
1300 | std::next(Builder.findToken(L: S->getArg(0)->getEndLoc())); |
1301 | // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have |
1302 | // fixed the test on decltype desctructors. |
1303 | if (LParenToken->kind() == clang::tok::l_paren) |
1304 | Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); |
1305 | |
1306 | Builder.markChild(N: buildCallArguments(ArgsAndDefaultArgs: CallExpr::arg_range( |
1307 | S->arg_begin() + 1, S->arg_end())), |
1308 | R: syntax::NodeRole::Arguments); |
1309 | |
1310 | Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); |
1311 | |
1312 | Builder.foldNode(Builder.getRange(S->getSourceRange()), |
1313 | new (allocator()) syntax::CallExpression, S); |
1314 | return true; |
1315 | } |
1316 | case syntax::NodeKind::UnknownExpression: |
1317 | return WalkUpFromExpr(S); |
1318 | default: |
1319 | llvm_unreachable("getOperatorNodeKind() does not return this value" ); |
1320 | } |
1321 | } |
1322 | |
1323 | bool WalkUpFromCXXDefaultArgExpr(CXXDefaultArgExpr *S) { return true; } |
1324 | |
1325 | bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { |
1326 | auto Tokens = Builder.getDeclarationRange(S); |
1327 | if (Tokens.front().kind() == tok::coloncolon) { |
1328 | // Handle nested namespace definitions. Those start at '::' token, e.g. |
1329 | // namespace a^::b {} |
1330 | // FIXME: build corresponding nodes for the name of this namespace. |
1331 | return true; |
1332 | } |
1333 | Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); |
1334 | return true; |
1335 | } |
1336 | |
1337 | // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test |
1338 | // results. Find test coverage or remove it. |
1339 | bool TraverseParenTypeLoc(ParenTypeLoc L) { |
1340 | // We reverse order of traversal to get the proper syntax structure. |
1341 | if (!WalkUpFromParenTypeLoc(L)) |
1342 | return false; |
1343 | return TraverseTypeLoc(TL: L.getInnerLoc()); |
1344 | } |
1345 | |
1346 | bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { |
1347 | Builder.markChildToken(Loc: L.getLParenLoc(), R: syntax::NodeRole::OpenParen); |
1348 | Builder.markChildToken(Loc: L.getRParenLoc(), R: syntax::NodeRole::CloseParen); |
1349 | Builder.foldNode(Builder.getRange(First: L.getLParenLoc(), Last: L.getRParenLoc()), |
1350 | new (allocator()) syntax::ParenDeclarator, L); |
1351 | return true; |
1352 | } |
1353 | |
1354 | // Declarator chunks, they are produced by type locs and some clang::Decls. |
1355 | bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { |
1356 | Builder.markChildToken(Loc: L.getLBracketLoc(), R: syntax::NodeRole::OpenParen); |
1357 | Builder.markExprChild(Child: L.getSizeExpr(), Role: syntax::NodeRole::Size); |
1358 | Builder.markChildToken(Loc: L.getRBracketLoc(), R: syntax::NodeRole::CloseParen); |
1359 | Builder.foldNode(Builder.getRange(First: L.getLBracketLoc(), Last: L.getRBracketLoc()), |
1360 | new (allocator()) syntax::ArraySubscript, L); |
1361 | return true; |
1362 | } |
1363 | |
1364 | syntax::ParameterDeclarationList * |
1365 | buildParameterDeclarationList(ArrayRef<ParmVarDecl *> Params) { |
1366 | for (auto *P : Params) { |
1367 | Builder.markChild(P, syntax::NodeRole::ListElement); |
1368 | const auto *DelimiterToken = std::next(Builder.findToken(L: P->getEndLoc())); |
1369 | if (DelimiterToken->kind() == clang::tok::TokenKind::comma) |
1370 | Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); |
1371 | } |
1372 | auto *Parameters = new (allocator()) syntax::ParameterDeclarationList; |
1373 | if (!Params.empty()) |
1374 | Builder.foldNode(Builder.getRange(Params.front()->getBeginLoc(), |
1375 | Params.back()->getEndLoc()), |
1376 | Parameters, nullptr); |
1377 | return Parameters; |
1378 | } |
1379 | |
1380 | bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { |
1381 | Builder.markChildToken(Loc: L.getLParenLoc(), R: syntax::NodeRole::OpenParen); |
1382 | |
1383 | Builder.markChild(N: buildParameterDeclarationList(Params: L.getParams()), |
1384 | R: syntax::NodeRole::Parameters); |
1385 | |
1386 | Builder.markChildToken(Loc: L.getRParenLoc(), R: syntax::NodeRole::CloseParen); |
1387 | Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), |
1388 | new (allocator()) syntax::ParametersAndQualifiers, L); |
1389 | return true; |
1390 | } |
1391 | |
1392 | bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { |
1393 | if (!L.getTypePtr()->hasTrailingReturn()) |
1394 | return WalkUpFromFunctionTypeLoc(L); |
1395 | |
1396 | auto *TrailingReturnTokens = buildTrailingReturn(L); |
1397 | // Finish building the node for parameters. |
1398 | Builder.markChild(N: TrailingReturnTokens, R: syntax::NodeRole::TrailingReturn); |
1399 | return WalkUpFromFunctionTypeLoc(L); |
1400 | } |
1401 | |
1402 | bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { |
1403 | // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds |
1404 | // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to |
1405 | // "(Y::*mp)" We thus reverse the order of traversal to get the proper |
1406 | // syntax structure. |
1407 | if (!WalkUpFromMemberPointerTypeLoc(L)) |
1408 | return false; |
1409 | return TraverseTypeLoc(TL: L.getPointeeLoc()); |
1410 | } |
1411 | |
1412 | bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { |
1413 | auto SR = L.getLocalSourceRange(); |
1414 | Builder.foldNode(Builder.getRange(Range: SR), |
1415 | new (allocator()) syntax::MemberPointer, L); |
1416 | return true; |
1417 | } |
1418 | |
1419 | // The code below is very regular, it could even be generated with some |
1420 | // preprocessor magic. We merely assign roles to the corresponding children |
1421 | // and fold resulting nodes. |
1422 | bool WalkUpFromDeclStmt(DeclStmt *S) { |
1423 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1424 | New: new (allocator()) syntax::DeclarationStatement, From: S); |
1425 | return true; |
1426 | } |
1427 | |
1428 | bool WalkUpFromNullStmt(NullStmt *S) { |
1429 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1430 | New: new (allocator()) syntax::EmptyStatement, From: S); |
1431 | return true; |
1432 | } |
1433 | |
1434 | bool WalkUpFromSwitchStmt(SwitchStmt *S) { |
1435 | Builder.markChildToken(Loc: S->getSwitchLoc(), |
1436 | R: syntax::NodeRole::IntroducerKeyword); |
1437 | Builder.markStmtChild(Child: S->getBody(), Role: syntax::NodeRole::BodyStatement); |
1438 | Builder.foldNode(Builder.getStmtRange(S), |
1439 | new (allocator()) syntax::SwitchStatement, S); |
1440 | return true; |
1441 | } |
1442 | |
1443 | bool WalkUpFromCaseStmt(CaseStmt *S) { |
1444 | Builder.markChildToken(S->getKeywordLoc(), |
1445 | syntax::NodeRole::IntroducerKeyword); |
1446 | Builder.markExprChild(Child: S->getLHS(), Role: syntax::NodeRole::CaseValue); |
1447 | Builder.markStmtChild(Child: S->getSubStmt(), Role: syntax::NodeRole::BodyStatement); |
1448 | Builder.foldNode(Builder.getStmtRange(S), |
1449 | new (allocator()) syntax::CaseStatement, S); |
1450 | return true; |
1451 | } |
1452 | |
1453 | bool WalkUpFromDefaultStmt(DefaultStmt *S) { |
1454 | Builder.markChildToken(Loc: S->getKeywordLoc(), |
1455 | R: syntax::NodeRole::IntroducerKeyword); |
1456 | Builder.markStmtChild(Child: S->getSubStmt(), Role: syntax::NodeRole::BodyStatement); |
1457 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1458 | New: new (allocator()) syntax::DefaultStatement, From: S); |
1459 | return true; |
1460 | } |
1461 | |
1462 | bool WalkUpFromIfStmt(IfStmt *S) { |
1463 | Builder.markChildToken(Loc: S->getIfLoc(), R: syntax::NodeRole::IntroducerKeyword); |
1464 | Stmt *ConditionStatement = S->getCond(); |
1465 | if (S->hasVarStorage()) |
1466 | ConditionStatement = S->getConditionVariableDeclStmt(); |
1467 | Builder.markStmtChild(Child: ConditionStatement, Role: syntax::NodeRole::Condition); |
1468 | Builder.markStmtChild(Child: S->getThen(), Role: syntax::NodeRole::ThenStatement); |
1469 | Builder.markChildToken(Loc: S->getElseLoc(), R: syntax::NodeRole::ElseKeyword); |
1470 | Builder.markStmtChild(Child: S->getElse(), Role: syntax::NodeRole::ElseStatement); |
1471 | Builder.foldNode(Builder.getStmtRange(S), |
1472 | new (allocator()) syntax::IfStatement, S); |
1473 | return true; |
1474 | } |
1475 | |
1476 | bool WalkUpFromForStmt(ForStmt *S) { |
1477 | Builder.markChildToken(Loc: S->getForLoc(), R: syntax::NodeRole::IntroducerKeyword); |
1478 | Builder.markStmtChild(Child: S->getBody(), Role: syntax::NodeRole::BodyStatement); |
1479 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1480 | New: new (allocator()) syntax::ForStatement, From: S); |
1481 | return true; |
1482 | } |
1483 | |
1484 | bool WalkUpFromWhileStmt(WhileStmt *S) { |
1485 | Builder.markChildToken(Loc: S->getWhileLoc(), |
1486 | R: syntax::NodeRole::IntroducerKeyword); |
1487 | Builder.markStmtChild(Child: S->getBody(), Role: syntax::NodeRole::BodyStatement); |
1488 | Builder.foldNode(Builder.getStmtRange(S), |
1489 | new (allocator()) syntax::WhileStatement, S); |
1490 | return true; |
1491 | } |
1492 | |
1493 | bool WalkUpFromContinueStmt(ContinueStmt *S) { |
1494 | Builder.markChildToken(Loc: S->getContinueLoc(), |
1495 | R: syntax::NodeRole::IntroducerKeyword); |
1496 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1497 | New: new (allocator()) syntax::ContinueStatement, From: S); |
1498 | return true; |
1499 | } |
1500 | |
1501 | bool WalkUpFromBreakStmt(BreakStmt *S) { |
1502 | Builder.markChildToken(Loc: S->getBreakLoc(), |
1503 | R: syntax::NodeRole::IntroducerKeyword); |
1504 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1505 | New: new (allocator()) syntax::BreakStatement, From: S); |
1506 | return true; |
1507 | } |
1508 | |
1509 | bool WalkUpFromReturnStmt(ReturnStmt *S) { |
1510 | Builder.markChildToken(Loc: S->getReturnLoc(), |
1511 | R: syntax::NodeRole::IntroducerKeyword); |
1512 | Builder.markExprChild(Child: S->getRetValue(), Role: syntax::NodeRole::ReturnValue); |
1513 | Builder.foldNode(Builder.getStmtRange(S), |
1514 | new (allocator()) syntax::ReturnStatement, S); |
1515 | return true; |
1516 | } |
1517 | |
1518 | bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { |
1519 | Builder.markChildToken(Loc: S->getForLoc(), R: syntax::NodeRole::IntroducerKeyword); |
1520 | Builder.markStmtChild(Child: S->getBody(), Role: syntax::NodeRole::BodyStatement); |
1521 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1522 | New: new (allocator()) syntax::RangeBasedForStatement, From: S); |
1523 | return true; |
1524 | } |
1525 | |
1526 | bool WalkUpFromEmptyDecl(EmptyDecl *S) { |
1527 | Builder.foldNode(Builder.getDeclarationRange(S), |
1528 | new (allocator()) syntax::EmptyDeclaration, S); |
1529 | return true; |
1530 | } |
1531 | |
1532 | bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { |
1533 | Builder.markExprChild(Child: S->getAssertExpr(), Role: syntax::NodeRole::Condition); |
1534 | Builder.markExprChild(Child: S->getMessage(), Role: syntax::NodeRole::Message); |
1535 | Builder.foldNode(Builder.getDeclarationRange(S), |
1536 | new (allocator()) syntax::StaticAssertDeclaration, S); |
1537 | return true; |
1538 | } |
1539 | |
1540 | bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { |
1541 | Builder.foldNode(Builder.getDeclarationRange(S), |
1542 | new (allocator()) syntax::LinkageSpecificationDeclaration, |
1543 | S); |
1544 | return true; |
1545 | } |
1546 | |
1547 | bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { |
1548 | Builder.foldNode(Builder.getDeclarationRange(S), |
1549 | new (allocator()) syntax::NamespaceAliasDefinition, S); |
1550 | return true; |
1551 | } |
1552 | |
1553 | bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { |
1554 | Builder.foldNode(Builder.getDeclarationRange(S), |
1555 | new (allocator()) syntax::UsingNamespaceDirective, S); |
1556 | return true; |
1557 | } |
1558 | |
1559 | bool WalkUpFromUsingDecl(UsingDecl *S) { |
1560 | Builder.foldNode(Builder.getDeclarationRange(S), |
1561 | new (allocator()) syntax::UsingDeclaration, S); |
1562 | return true; |
1563 | } |
1564 | |
1565 | bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { |
1566 | Builder.foldNode(Builder.getDeclarationRange(S), |
1567 | new (allocator()) syntax::UsingDeclaration, S); |
1568 | return true; |
1569 | } |
1570 | |
1571 | bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { |
1572 | Builder.foldNode(Builder.getDeclarationRange(S), |
1573 | new (allocator()) syntax::UsingDeclaration, S); |
1574 | return true; |
1575 | } |
1576 | |
1577 | bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { |
1578 | Builder.foldNode(Builder.getDeclarationRange(S), |
1579 | new (allocator()) syntax::TypeAliasDeclaration, S); |
1580 | return true; |
1581 | } |
1582 | |
1583 | private: |
1584 | /// Folds SimpleDeclarator node (if present) and in case this is the last |
1585 | /// declarator in the chain it also folds SimpleDeclaration node. |
1586 | template <class T> bool processDeclaratorAndDeclaration(T *D) { |
1587 | auto Range = getDeclaratorRange( |
1588 | Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(), |
1589 | getQualifiedNameStart(D), getInitializerRange(D)); |
1590 | |
1591 | // There doesn't have to be a declarator (e.g. `void foo(int)` only has |
1592 | // declaration, but no declarator). |
1593 | if (!Range.getBegin().isValid()) { |
1594 | Builder.markChild(N: new (allocator()) syntax::DeclaratorList, |
1595 | R: syntax::NodeRole::Declarators); |
1596 | Builder.foldNode(Builder.getDeclarationRange(D), |
1597 | new (allocator()) syntax::SimpleDeclaration, D); |
1598 | return true; |
1599 | } |
1600 | |
1601 | auto *N = new (allocator()) syntax::SimpleDeclarator; |
1602 | Builder.foldNode(Builder.getRange(Range), N, nullptr); |
1603 | Builder.markChild(N, R: syntax::NodeRole::ListElement); |
1604 | |
1605 | if (!Builder.isResponsibleForCreatingDeclaration(D)) { |
1606 | // If this is not the last declarator in the declaration we expect a |
1607 | // delimiter after it. |
1608 | const auto *DelimiterToken = std::next(Builder.findToken(L: Range.getEnd())); |
1609 | if (DelimiterToken->kind() == clang::tok::TokenKind::comma) |
1610 | Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); |
1611 | } else { |
1612 | auto *DL = new (allocator()) syntax::DeclaratorList; |
1613 | auto DeclarationRange = Builder.getDeclarationRange(D); |
1614 | Builder.foldList(SuperRange: DeclarationRange, New: DL, From: nullptr); |
1615 | |
1616 | Builder.markChild(N: DL, R: syntax::NodeRole::Declarators); |
1617 | Builder.foldNode(DeclarationRange, |
1618 | new (allocator()) syntax::SimpleDeclaration, D); |
1619 | } |
1620 | return true; |
1621 | } |
1622 | |
1623 | /// Returns the range of the built node. |
1624 | syntax::TrailingReturnType *buildTrailingReturn(FunctionProtoTypeLoc L) { |
1625 | assert(L.getTypePtr()->hasTrailingReturn()); |
1626 | |
1627 | auto ReturnedType = L.getReturnLoc(); |
1628 | // Build node for the declarator, if any. |
1629 | auto ReturnDeclaratorRange = SourceRange(GetStartLoc().Visit(ReturnedType), |
1630 | ReturnedType.getEndLoc()); |
1631 | syntax::SimpleDeclarator *ReturnDeclarator = nullptr; |
1632 | if (ReturnDeclaratorRange.isValid()) { |
1633 | ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; |
1634 | Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), |
1635 | ReturnDeclarator, nullptr); |
1636 | } |
1637 | |
1638 | // Build node for trailing return type. |
1639 | auto Return = Builder.getRange(ReturnedType.getSourceRange()); |
1640 | const auto *Arrow = Return.begin() - 1; |
1641 | assert(Arrow->kind() == tok::arrow); |
1642 | auto Tokens = llvm::ArrayRef(Arrow, Return.end()); |
1643 | Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); |
1644 | if (ReturnDeclarator) |
1645 | Builder.markChild(N: ReturnDeclarator, R: syntax::NodeRole::Declarator); |
1646 | auto *R = new (allocator()) syntax::TrailingReturnType; |
1647 | Builder.foldNode(Tokens, R, L); |
1648 | return R; |
1649 | } |
1650 | |
1651 | void foldExplicitTemplateInstantiation( |
1652 | ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, |
1653 | const syntax::Token *TemplateKW, |
1654 | syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { |
1655 | assert(!ExternKW || ExternKW->kind() == tok::kw_extern); |
1656 | assert(TemplateKW && TemplateKW->kind() == tok::kw_template); |
1657 | Builder.markChildToken(T: ExternKW, R: syntax::NodeRole::ExternKeyword); |
1658 | Builder.markChildToken(T: TemplateKW, R: syntax::NodeRole::IntroducerKeyword); |
1659 | Builder.markChild(N: InnerDeclaration, R: syntax::NodeRole::Declaration); |
1660 | Builder.foldNode( |
1661 | Range, New: new (allocator()) syntax::ExplicitTemplateInstantiation, From); |
1662 | } |
1663 | |
1664 | syntax::TemplateDeclaration *foldTemplateDeclaration( |
1665 | ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, |
1666 | ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { |
1667 | assert(TemplateKW && TemplateKW->kind() == tok::kw_template); |
1668 | Builder.markChildToken(T: TemplateKW, R: syntax::NodeRole::IntroducerKeyword); |
1669 | |
1670 | auto *N = new (allocator()) syntax::TemplateDeclaration; |
1671 | Builder.foldNode(Range, New: N, From); |
1672 | Builder.markChild(N, R: syntax::NodeRole::Declaration); |
1673 | return N; |
1674 | } |
1675 | |
1676 | /// A small helper to save some typing. |
1677 | llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } |
1678 | |
1679 | syntax::TreeBuilder &Builder; |
1680 | const ASTContext &Context; |
1681 | }; |
1682 | } // namespace |
1683 | |
1684 | void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { |
1685 | DeclsWithoutSemicolons.insert(V: D); |
1686 | } |
1687 | |
1688 | void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { |
1689 | if (Loc.isInvalid()) |
1690 | return; |
1691 | Pending.assignRole(Range: *findToken(L: Loc), Role); |
1692 | } |
1693 | |
1694 | void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { |
1695 | if (!T) |
1696 | return; |
1697 | Pending.assignRole(Range: *T, Role: R); |
1698 | } |
1699 | |
1700 | void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { |
1701 | assert(N); |
1702 | setRole(N, R); |
1703 | } |
1704 | |
1705 | void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { |
1706 | auto *SN = Mapping.find(P: N); |
1707 | assert(SN != nullptr); |
1708 | setRole(N: SN, R); |
1709 | } |
1710 | void syntax::TreeBuilder::markChild(NestedNameSpecifierLoc NNSLoc, NodeRole R) { |
1711 | auto *SN = Mapping.find(P: NNSLoc); |
1712 | assert(SN != nullptr); |
1713 | setRole(N: SN, R); |
1714 | } |
1715 | |
1716 | void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { |
1717 | if (!Child) |
1718 | return; |
1719 | |
1720 | syntax::Tree *ChildNode; |
1721 | if (Expr *ChildExpr = dyn_cast<Expr>(Val: Child)) { |
1722 | // This is an expression in a statement position, consume the trailing |
1723 | // semicolon and form an 'ExpressionStatement' node. |
1724 | markExprChild(Child: ChildExpr, Role: NodeRole::Expression); |
1725 | ChildNode = new (allocator()) syntax::ExpressionStatement; |
1726 | // (!) 'getStmtRange()' ensures this covers a trailing semicolon. |
1727 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: getStmtRange(S: Child), Node: ChildNode); |
1728 | } else { |
1729 | ChildNode = Mapping.find(P: Child); |
1730 | } |
1731 | assert(ChildNode != nullptr); |
1732 | setRole(N: ChildNode, R: Role); |
1733 | } |
1734 | |
1735 | void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { |
1736 | if (!Child) |
1737 | return; |
1738 | Child = IgnoreImplicit(E: Child); |
1739 | |
1740 | syntax::Tree *ChildNode = Mapping.find(Child); |
1741 | assert(ChildNode != nullptr); |
1742 | setRole(N: ChildNode, R: Role); |
1743 | } |
1744 | |
1745 | const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { |
1746 | if (L.isInvalid()) |
1747 | return nullptr; |
1748 | auto It = LocationToToken.find(Val: L); |
1749 | assert(It != LocationToToken.end()); |
1750 | return It->second; |
1751 | } |
1752 | |
1753 | syntax::TranslationUnit *syntax::buildSyntaxTree(Arena &A, |
1754 | TokenBufferTokenManager& TBTM, |
1755 | ASTContext &Context) { |
1756 | TreeBuilder Builder(A, TBTM); |
1757 | BuildTreeVisitor(Context, Builder).TraverseAST(AST&: Context); |
1758 | return std::move(Builder).finalize(); |
1759 | } |
1760 | |