1//===- Parser.cpp - Matcher expression parser -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Recursive parser implementation for the matcher expression grammar.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/ASTMatchers/Dynamic/Parser.h"
15#include "clang/ASTMatchers/ASTMatchersInternal.h"
16#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17#include "clang/ASTMatchers/Dynamic/Registry.h"
18#include "clang/Basic/CharInfo.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/ManagedStatic.h"
22#include <cassert>
23#include <cerrno>
24#include <cstddef>
25#include <cstdlib>
26#include <optional>
27#include <string>
28#include <utility>
29#include <vector>
30
31namespace clang {
32namespace ast_matchers {
33namespace dynamic {
34
35/// Simple structure to hold information for one token from the parser.
36struct Parser::TokenInfo {
37 /// Different possible tokens.
38 enum TokenKind {
39 TK_Eof,
40 TK_NewLine,
41 TK_OpenParen,
42 TK_CloseParen,
43 TK_Comma,
44 TK_Period,
45 TK_Literal,
46 TK_Ident,
47 TK_InvalidChar,
48 TK_Error,
49 TK_CodeCompletion
50 };
51
52 /// Some known identifiers.
53 static const char* const ID_Bind;
54 static const char *const ID_With;
55
56 TokenInfo() = default;
57
58 StringRef Text;
59 TokenKind Kind = TK_Eof;
60 SourceRange Range;
61 VariantValue Value;
62};
63
64const char* const Parser::TokenInfo::ID_Bind = "bind";
65const char *const Parser::TokenInfo::ID_With = "with";
66
67/// Simple tokenizer for the parser.
68class Parser::CodeTokenizer {
69public:
70 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
71 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
72 NextToken = getNextToken();
73 }
74
75 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
76 unsigned CodeCompletionOffset)
77 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
78 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
79 NextToken = getNextToken();
80 }
81
82 /// Returns but doesn't consume the next token.
83 const TokenInfo &peekNextToken() const { return NextToken; }
84
85 /// Consumes and returns the next token.
86 TokenInfo consumeNextToken() {
87 TokenInfo ThisToken = NextToken;
88 NextToken = getNextToken();
89 return ThisToken;
90 }
91
92 TokenInfo SkipNewlines() {
93 while (NextToken.Kind == TokenInfo::TK_NewLine)
94 NextToken = getNextToken();
95 return NextToken;
96 }
97
98 TokenInfo consumeNextTokenIgnoreNewlines() {
99 SkipNewlines();
100 if (NextToken.Kind == TokenInfo::TK_Eof)
101 return NextToken;
102 return consumeNextToken();
103 }
104
105 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
106
107private:
108 TokenInfo getNextToken() {
109 consumeWhitespace();
110 TokenInfo Result;
111 Result.Range.Start = currentLocation();
112
113 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
114 Result.Kind = TokenInfo::TK_CodeCompletion;
115 Result.Text = StringRef(CodeCompletionLocation, 0);
116 CodeCompletionLocation = nullptr;
117 return Result;
118 }
119
120 if (Code.empty()) {
121 Result.Kind = TokenInfo::TK_Eof;
122 Result.Text = "";
123 return Result;
124 }
125
126 switch (Code[0]) {
127 case '#':
128 Code = Code.drop_until(F: [](char c) { return c == '\n'; });
129 return getNextToken();
130 case ',':
131 Result.Kind = TokenInfo::TK_Comma;
132 Result.Text = Code.substr(Start: 0, N: 1);
133 Code = Code.drop_front();
134 break;
135 case '.':
136 Result.Kind = TokenInfo::TK_Period;
137 Result.Text = Code.substr(Start: 0, N: 1);
138 Code = Code.drop_front();
139 break;
140 case '\n':
141 ++Line;
142 StartOfLine = Code.drop_front();
143 Result.Kind = TokenInfo::TK_NewLine;
144 Result.Text = Code.substr(Start: 0, N: 1);
145 Code = Code.drop_front();
146 break;
147 case '(':
148 Result.Kind = TokenInfo::TK_OpenParen;
149 Result.Text = Code.substr(Start: 0, N: 1);
150 Code = Code.drop_front();
151 break;
152 case ')':
153 Result.Kind = TokenInfo::TK_CloseParen;
154 Result.Text = Code.substr(Start: 0, N: 1);
155 Code = Code.drop_front();
156 break;
157
158 case '"':
159 case '\'':
160 // Parse a string literal.
161 consumeStringLiteral(Result: &Result);
162 break;
163
164 case '0': case '1': case '2': case '3': case '4':
165 case '5': case '6': case '7': case '8': case '9':
166 // Parse an unsigned and float literal.
167 consumeNumberLiteral(Result: &Result);
168 break;
169
170 default:
171 if (isAlphanumeric(c: Code[0])) {
172 // Parse an identifier
173 size_t TokenLength = 1;
174 while (true) {
175 // A code completion location in/immediately after an identifier will
176 // cause the portion of the identifier before the code completion
177 // location to become a code completion token.
178 if (CodeCompletionLocation == Code.data() + TokenLength) {
179 CodeCompletionLocation = nullptr;
180 Result.Kind = TokenInfo::TK_CodeCompletion;
181 Result.Text = Code.substr(Start: 0, N: TokenLength);
182 Code = Code.drop_front(N: TokenLength);
183 return Result;
184 }
185 if (TokenLength == Code.size() || !isAlphanumeric(c: Code[TokenLength]))
186 break;
187 ++TokenLength;
188 }
189 if (TokenLength == 4 && Code.starts_with(Prefix: "true")) {
190 Result.Kind = TokenInfo::TK_Literal;
191 Result.Value = true;
192 } else if (TokenLength == 5 && Code.starts_with(Prefix: "false")) {
193 Result.Kind = TokenInfo::TK_Literal;
194 Result.Value = false;
195 } else {
196 Result.Kind = TokenInfo::TK_Ident;
197 Result.Text = Code.substr(Start: 0, N: TokenLength);
198 }
199 Code = Code.drop_front(N: TokenLength);
200 } else {
201 Result.Kind = TokenInfo::TK_InvalidChar;
202 Result.Text = Code.substr(Start: 0, N: 1);
203 Code = Code.drop_front(N: 1);
204 }
205 break;
206 }
207
208 Result.Range.End = currentLocation();
209 return Result;
210 }
211
212 /// Consume an unsigned and float literal.
213 void consumeNumberLiteral(TokenInfo *Result) {
214 bool isFloatingLiteral = false;
215 unsigned Length = 1;
216 if (Code.size() > 1) {
217 // Consume the 'x' or 'b' radix modifier, if present.
218 switch (toLowercase(c: Code[1])) {
219 case 'x': case 'b': Length = 2;
220 }
221 }
222 while (Length < Code.size() && isHexDigit(c: Code[Length]))
223 ++Length;
224
225 // Try to recognize a floating point literal.
226 while (Length < Code.size()) {
227 char c = Code[Length];
228 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
229 isFloatingLiteral = true;
230 Length++;
231 } else {
232 break;
233 }
234 }
235
236 Result->Text = Code.substr(Start: 0, N: Length);
237 Code = Code.drop_front(N: Length);
238
239 if (isFloatingLiteral) {
240 char *end;
241 errno = 0;
242 std::string Text = Result->Text.str();
243 double doubleValue = strtod(nptr: Text.c_str(), endptr: &end);
244 if (*end == 0 && errno == 0) {
245 Result->Kind = TokenInfo::TK_Literal;
246 Result->Value = doubleValue;
247 return;
248 }
249 } else {
250 unsigned Value;
251 if (!Result->Text.getAsInteger(Radix: 0, Result&: Value)) {
252 Result->Kind = TokenInfo::TK_Literal;
253 Result->Value = Value;
254 return;
255 }
256 }
257
258 SourceRange Range;
259 Range.Start = Result->Range.Start;
260 Range.End = currentLocation();
261 Error->addError(Range, Error: Error->ET_ParserNumberError) << Result->Text;
262 Result->Kind = TokenInfo::TK_Error;
263 }
264
265 /// Consume a string literal.
266 ///
267 /// \c Code must be positioned at the start of the literal (the opening
268 /// quote). Consumed until it finds the same closing quote character.
269 void consumeStringLiteral(TokenInfo *Result) {
270 bool InEscape = false;
271 const char Marker = Code[0];
272 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
273 if (InEscape) {
274 InEscape = false;
275 continue;
276 }
277 if (Code[Length] == '\\') {
278 InEscape = true;
279 continue;
280 }
281 if (Code[Length] == Marker) {
282 Result->Kind = TokenInfo::TK_Literal;
283 Result->Text = Code.substr(Start: 0, N: Length + 1);
284 Result->Value = Code.substr(Start: 1, N: Length - 1);
285 Code = Code.drop_front(N: Length + 1);
286 return;
287 }
288 }
289
290 StringRef ErrorText = Code;
291 Code = Code.drop_front(N: Code.size());
292 SourceRange Range;
293 Range.Start = Result->Range.Start;
294 Range.End = currentLocation();
295 Error->addError(Range, Error: Error->ET_ParserStringError) << ErrorText;
296 Result->Kind = TokenInfo::TK_Error;
297 }
298
299 /// Consume all leading whitespace from \c Code.
300 void consumeWhitespace() {
301 // Don't trim newlines.
302 Code = Code.ltrim(Chars: " \t\v\f\r");
303 }
304
305 SourceLocation currentLocation() {
306 SourceLocation Location;
307 Location.Line = Line;
308 Location.Column = Code.data() - StartOfLine.data() + 1;
309 return Location;
310 }
311
312 StringRef &Code;
313 StringRef StartOfLine;
314 unsigned Line = 1;
315 Diagnostics *Error;
316 TokenInfo NextToken;
317 const char *CodeCompletionLocation = nullptr;
318};
319
320Parser::Sema::~Sema() = default;
321
322std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
323 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
324 return {};
325}
326
327std::vector<MatcherCompletion>
328Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
329 return {};
330}
331
332struct Parser::ScopedContextEntry {
333 Parser *P;
334
335 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
336 P->ContextStack.push_back(x: std::make_pair(x&: C, y: 0u));
337 }
338
339 ~ScopedContextEntry() {
340 P->ContextStack.pop_back();
341 }
342
343 void nextArg() {
344 ++P->ContextStack.back().second;
345 }
346};
347
348/// Parse expressions that start with an identifier.
349///
350/// This function can parse named values and matchers.
351/// In case of failure it will try to determine the user's intent to give
352/// an appropriate error message.
353bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
354 const TokenInfo NameToken = Tokenizer->consumeNextToken();
355
356 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
357 // Parse as a named value.
358 if (const VariantValue NamedValue =
359 NamedValues ? NamedValues->lookup(Key: NameToken.Text)
360 : VariantValue()) {
361
362 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
363 *Value = NamedValue;
364 return true;
365 }
366
367 std::string BindID;
368 Tokenizer->consumeNextToken();
369 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
370 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
371 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
372 return false;
373 }
374
375 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
376 (ChainCallToken.Text != TokenInfo::ID_Bind &&
377 ChainCallToken.Text != TokenInfo::ID_With)) {
378 Error->addError(Range: ChainCallToken.Range,
379 Error: Error->ET_ParserMalformedChainedExpr);
380 return false;
381 }
382 if (ChainCallToken.Text == TokenInfo::ID_With) {
383
384 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
385 NameToken.Text, NameToken.Range);
386
387 Error->addError(Range: ChainCallToken.Range,
388 Error: Error->ET_RegistryMatcherNoWithSupport);
389 return false;
390 }
391 if (!parseBindID(BindID))
392 return false;
393
394 assert(NamedValue.isMatcher());
395 std::optional<DynTypedMatcher> Result =
396 NamedValue.getMatcher().getSingleMatcher();
397 if (Result) {
398 std::optional<DynTypedMatcher> Bound = Result->tryBind(ID: BindID);
399 if (Bound) {
400 *Value = VariantMatcher::SingleMatcher(Matcher: *Bound);
401 return true;
402 }
403 }
404 return false;
405 }
406
407 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
408 Error->addError(Range: Tokenizer->peekNextToken().Range,
409 Error: Error->ET_ParserNoOpenParen)
410 << "NewLine";
411 return false;
412 }
413
414 // If the syntax is correct and the name is not a matcher either, report
415 // unknown named value.
416 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
417 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
418 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
419 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
420 !S->lookupMatcherCtor(MatcherName: NameToken.Text)) {
421 Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryValueNotFound)
422 << NameToken.Text;
423 return false;
424 }
425 // Otherwise, fallback to the matcher parser.
426 }
427
428 Tokenizer->SkipNewlines();
429
430 assert(NameToken.Kind == TokenInfo::TK_Ident);
431 TokenInfo OpenToken = Tokenizer->consumeNextToken();
432 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
433 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoOpenParen)
434 << OpenToken.Text;
435 return false;
436 }
437
438 std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(MatcherName: NameToken.Text);
439
440 // Parse as a matcher expression.
441 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
442}
443
444bool Parser::parseBindID(std::string &BindID) {
445 // Parse the parenthesized argument to .bind("foo")
446 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
447 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
448 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
449
450 // TODO: We could use different error codes for each/some to be more
451 // explicit about the syntax error.
452 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
453 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserMalformedBindExpr);
454 return false;
455 }
456 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
457 Error->addError(Range: IDToken.Range, Error: Error->ET_ParserMalformedBindExpr);
458 return false;
459 }
460 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
461 Error->addError(Range: CloseToken.Range, Error: Error->ET_ParserMalformedBindExpr);
462 return false;
463 }
464 BindID = IDToken.Value.getString();
465 return true;
466}
467
468bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
469 const TokenInfo &OpenToken,
470 VariantValue *Value) {
471 std::vector<ParserValue> Args;
472 TokenInfo EndToken;
473
474 Tokenizer->SkipNewlines();
475
476 {
477 ScopedContextEntry SCE(this, Ctor);
478
479 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
480 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
481 // End of args.
482 EndToken = Tokenizer->consumeNextToken();
483 break;
484 }
485 if (!Args.empty()) {
486 // We must find a , token to continue.
487 TokenInfo CommaToken = Tokenizer->consumeNextToken();
488 if (CommaToken.Kind != TokenInfo::TK_Comma) {
489 Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma)
490 << CommaToken.Text;
491 return false;
492 }
493 }
494
495 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
496 NameToken.Text, NameToken.Range,
497 Args.size() + 1);
498 ParserValue ArgValue;
499 Tokenizer->SkipNewlines();
500
501 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
502 addExpressionCompletions();
503 return false;
504 }
505
506 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
507
508 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
509 Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher)
510 << NameToken.Text;
511 return false;
512 }
513
514 ArgValue.Text = NodeMatcherToken.Text;
515 ArgValue.Range = NodeMatcherToken.Range;
516
517 std::optional<MatcherCtor> MappedMatcher =
518 S->lookupMatcherCtor(MatcherName: ArgValue.Text);
519
520 if (!MappedMatcher) {
521 Error->addError(Range: NodeMatcherToken.Range,
522 Error: Error->ET_RegistryMatcherNotFound)
523 << NodeMatcherToken.Text;
524 return false;
525 }
526
527 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
528
529 if (NK.isNone()) {
530 Error->addError(Range: NodeMatcherToken.Range,
531 Error: Error->ET_RegistryNonNodeMatcher)
532 << NodeMatcherToken.Text;
533 return false;
534 }
535
536 ArgValue.Value = NK;
537
538 Tokenizer->SkipNewlines();
539 Args.push_back(x: ArgValue);
540
541 SCE.nextArg();
542 }
543 }
544
545 if (EndToken.Kind == TokenInfo::TK_Eof) {
546 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen);
547 return false;
548 }
549
550 internal::MatcherDescriptorPtr BuiltCtor =
551 S->buildMatcherCtor(Ctor, NameRange: NameToken.Range, Args, Error);
552
553 if (!BuiltCtor.get()) {
554 Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher)
555 << NameToken.Text;
556 return false;
557 }
558
559 std::string BindID;
560 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
561 Tokenizer->consumeNextToken();
562 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
563 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
564 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
565 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("with(", "with", 1));
566 return false;
567 }
568 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
569 (ChainCallToken.Text != TokenInfo::ID_Bind &&
570 ChainCallToken.Text != TokenInfo::ID_With)) {
571 Error->addError(Range: ChainCallToken.Range,
572 Error: Error->ET_ParserMalformedChainedExpr);
573 return false;
574 }
575 if (ChainCallToken.Text == TokenInfo::ID_Bind) {
576 if (!parseBindID(BindID))
577 return false;
578 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
579 NameToken.Text, NameToken.Range);
580 SourceRange MatcherRange = NameToken.Range;
581 MatcherRange.End = ChainCallToken.Range.End;
582 VariantMatcher Result = S->actOnMatcherExpression(
583 Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error);
584 if (Result.isNull())
585 return false;
586
587 *Value = Result;
588 return true;
589 } else if (ChainCallToken.Text == TokenInfo::ID_With) {
590 Tokenizer->SkipNewlines();
591
592 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
593 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
594 ? StringRef("EOF")
595 : Tokenizer->peekNextToken().Text;
596 Error->addError(Range: Tokenizer->peekNextToken().Range,
597 Error: Error->ET_ParserNoOpenParen)
598 << ErrTxt;
599 return false;
600 }
601
602 TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
603
604 return parseMatcherExpressionImpl(NameToken, OpenToken: WithOpenToken,
605 Ctor: BuiltCtor.get(), Value);
606 }
607 }
608
609 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
610 NameToken.Text, NameToken.Range);
611 SourceRange MatcherRange = NameToken.Range;
612 MatcherRange.End = EndToken.Range.End;
613 VariantMatcher Result = S->actOnMatcherExpression(
614 Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error);
615 if (Result.isNull())
616 return false;
617
618 *Value = Result;
619 return true;
620}
621
622/// Parse and validate a matcher expression.
623/// \return \c true on success, in which case \c Value has the matcher parsed.
624/// If the input is malformed, or some argument has an error, it
625/// returns \c false.
626bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
627 const TokenInfo &OpenToken,
628 std::optional<MatcherCtor> Ctor,
629 VariantValue *Value) {
630 if (!Ctor) {
631 Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryMatcherNotFound)
632 << NameToken.Text;
633 // Do not return here. We need to continue to give completion suggestions.
634 }
635
636 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
637 return parseMatcherBuilder(Ctor: *Ctor, NameToken, OpenToken, Value);
638
639 std::vector<ParserValue> Args;
640 TokenInfo EndToken;
641
642 Tokenizer->SkipNewlines();
643
644 {
645 ScopedContextEntry SCE(this, Ctor.value_or(u: nullptr));
646
647 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
648 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
649 // End of args.
650 EndToken = Tokenizer->consumeNextToken();
651 break;
652 }
653 if (!Args.empty()) {
654 // We must find a , token to continue.
655 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
656 if (CommaToken.Kind != TokenInfo::TK_Comma) {
657 Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma)
658 << CommaToken.Text;
659 return false;
660 }
661 }
662
663 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
664 NameToken.Text, NameToken.Range,
665 Args.size() + 1);
666 ParserValue ArgValue;
667 Tokenizer->SkipNewlines();
668 ArgValue.Text = Tokenizer->peekNextToken().Text;
669 ArgValue.Range = Tokenizer->peekNextToken().Range;
670 if (!parseExpressionImpl(Value: &ArgValue.Value)) {
671 return false;
672 }
673
674 Tokenizer->SkipNewlines();
675 Args.push_back(x: ArgValue);
676 SCE.nextArg();
677 }
678 }
679
680 if (EndToken.Kind == TokenInfo::TK_Eof) {
681 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen);
682 return false;
683 }
684
685 std::string BindID;
686 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
687 Tokenizer->consumeNextToken();
688 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
689 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
690 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
691 return false;
692 }
693
694 if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
695 Error->addError(Range: ChainCallToken.Range,
696 Error: Error->ET_ParserMalformedChainedExpr);
697 return false;
698 }
699 if (ChainCallToken.Text == TokenInfo::ID_With) {
700
701 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
702 NameToken.Text, NameToken.Range);
703
704 Error->addError(Range: ChainCallToken.Range,
705 Error: Error->ET_RegistryMatcherNoWithSupport);
706 return false;
707 }
708 if (ChainCallToken.Text != TokenInfo::ID_Bind) {
709 Error->addError(Range: ChainCallToken.Range,
710 Error: Error->ET_ParserMalformedChainedExpr);
711 return false;
712 }
713 if (!parseBindID(BindID))
714 return false;
715 }
716
717 if (!Ctor)
718 return false;
719
720 // Merge the start and end infos.
721 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
722 NameToken.Text, NameToken.Range);
723 SourceRange MatcherRange = NameToken.Range;
724 MatcherRange.End = EndToken.Range.End;
725 VariantMatcher Result = S->actOnMatcherExpression(
726 Ctor: *Ctor, NameRange: MatcherRange, BindID, Args, Error);
727 if (Result.isNull()) return false;
728
729 *Value = Result;
730 return true;
731}
732
733// If the prefix of this completion matches the completion token, add it to
734// Completions minus the prefix.
735void Parser::addCompletion(const TokenInfo &CompToken,
736 const MatcherCompletion& Completion) {
737 if (StringRef(Completion.TypedText).starts_with(Prefix: CompToken.Text) &&
738 Completion.Specificity > 0) {
739 Completions.emplace_back(args: Completion.TypedText.substr(pos: CompToken.Text.size()),
740 args: Completion.MatcherDecl, args: Completion.Specificity);
741 }
742}
743
744std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
745 ArrayRef<ArgKind> AcceptedTypes) {
746 if (!NamedValues) return std::vector<MatcherCompletion>();
747 std::vector<MatcherCompletion> Result;
748 for (const auto &Entry : *NamedValues) {
749 unsigned Specificity;
750 if (Entry.getValue().isConvertibleTo(Kinds: AcceptedTypes, Specificity: &Specificity)) {
751 std::string Decl =
752 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
753 Result.emplace_back(args: Entry.getKey(), args&: Decl, args&: Specificity);
754 }
755 }
756 return Result;
757}
758
759void Parser::addExpressionCompletions() {
760 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
761 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
762
763 // We cannot complete code if there is an invalid element on the context
764 // stack.
765 for (ContextStackTy::iterator I = ContextStack.begin(),
766 E = ContextStack.end();
767 I != E; ++I) {
768 if (!I->first)
769 return;
770 }
771
772 auto AcceptedTypes = S->getAcceptedCompletionTypes(Context: ContextStack);
773 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
774 addCompletion(CompToken, Completion);
775 }
776
777 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
778 addCompletion(CompToken, Completion);
779 }
780}
781
782/// Parse an <Expression>
783bool Parser::parseExpressionImpl(VariantValue *Value) {
784 switch (Tokenizer->nextTokenKind()) {
785 case TokenInfo::TK_Literal:
786 *Value = Tokenizer->consumeNextToken().Value;
787 return true;
788
789 case TokenInfo::TK_Ident:
790 return parseIdentifierPrefixImpl(Value);
791
792 case TokenInfo::TK_CodeCompletion:
793 addExpressionCompletions();
794 return false;
795
796 case TokenInfo::TK_Eof:
797 Error->addError(Range: Tokenizer->consumeNextToken().Range,
798 Error: Error->ET_ParserNoCode);
799 return false;
800
801 case TokenInfo::TK_Error:
802 // This error was already reported by the tokenizer.
803 return false;
804 case TokenInfo::TK_NewLine:
805 case TokenInfo::TK_OpenParen:
806 case TokenInfo::TK_CloseParen:
807 case TokenInfo::TK_Comma:
808 case TokenInfo::TK_Period:
809 case TokenInfo::TK_InvalidChar:
810 const TokenInfo Token = Tokenizer->consumeNextToken();
811 Error->addError(Range: Token.Range, Error: Error->ET_ParserInvalidToken)
812 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
813 return false;
814 }
815
816 llvm_unreachable("Unknown token kind.");
817}
818
819static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
820
821Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
822 const NamedValueMap *NamedValues, Diagnostics *Error)
823 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
824 NamedValues(NamedValues), Error(Error) {}
825
826Parser::RegistrySema::~RegistrySema() = default;
827
828std::optional<MatcherCtor>
829Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
830 return Registry::lookupMatcherCtor(MatcherName);
831}
832
833VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
834 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
835 ArrayRef<ParserValue> Args, Diagnostics *Error) {
836 if (BindID.empty()) {
837 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
838 } else {
839 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
840 Error);
841 }
842}
843
844std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
845 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
846 return Registry::getAcceptedCompletionTypes(Context);
847}
848
849std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
850 ArrayRef<ArgKind> AcceptedTypes) {
851 return Registry::getMatcherCompletions(AcceptedTypes);
852}
853
854bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
855 return Registry::isBuilderMatcher(Ctor);
856}
857
858ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
859 return Registry::nodeMatcherType(Ctor);
860}
861
862internal::MatcherDescriptorPtr
863Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
864 ArrayRef<ParserValue> Args,
865 Diagnostics *Error) const {
866 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
867}
868
869bool Parser::parseExpression(StringRef &Code, Sema *S,
870 const NamedValueMap *NamedValues,
871 VariantValue *Value, Diagnostics *Error) {
872 CodeTokenizer Tokenizer(Code, Error);
873 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
874 return false;
875 auto NT = Tokenizer.peekNextToken();
876 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
877 Error->addError(Range: Tokenizer.peekNextToken().Range,
878 Error: Error->ET_ParserTrailingCode);
879 return false;
880 }
881 return true;
882}
883
884std::vector<MatcherCompletion>
885Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
886 const NamedValueMap *NamedValues) {
887 Diagnostics Error;
888 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
889 Parser P(&Tokenizer, S, NamedValues, &Error);
890 VariantValue Dummy;
891 P.parseExpressionImpl(Value: &Dummy);
892
893 // Sort by specificity, then by name.
894 llvm::sort(C&: P.Completions,
895 Comp: [](const MatcherCompletion &A, const MatcherCompletion &B) {
896 if (A.Specificity != B.Specificity)
897 return A.Specificity > B.Specificity;
898 return A.TypedText < B.TypedText;
899 });
900
901 return P.Completions;
902}
903
904std::optional<DynTypedMatcher>
905Parser::parseMatcherExpression(StringRef &Code, Sema *S,
906 const NamedValueMap *NamedValues,
907 Diagnostics *Error) {
908 VariantValue Value;
909 if (!parseExpression(Code, S, NamedValues, Value: &Value, Error))
910 return std::nullopt;
911 if (!Value.isMatcher()) {
912 Error->addError(Range: SourceRange(), Error: Error->ET_ParserNotAMatcher);
913 return std::nullopt;
914 }
915 std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher();
916 if (!Result) {
917 Error->addError(Range: SourceRange(), Error: Error->ET_ParserOverloadedType)
918 << Value.getTypeAsString();
919 }
920 return Result;
921}
922
923} // namespace dynamic
924} // namespace ast_matchers
925} // namespace clang
926

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of clang/lib/ASTMatchers/Dynamic/Parser.cpp