1 | //===- Parser.cpp - Matcher expression parser -----------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Recursive parser implementation for the matcher expression grammar. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/ASTMatchers/Dynamic/Parser.h" |
15 | #include "clang/ASTMatchers/ASTMatchersInternal.h" |
16 | #include "clang/ASTMatchers/Dynamic/Diagnostics.h" |
17 | #include "clang/ASTMatchers/Dynamic/Registry.h" |
18 | #include "clang/Basic/CharInfo.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include "llvm/Support/ErrorHandling.h" |
21 | #include "llvm/Support/ManagedStatic.h" |
22 | #include <cassert> |
23 | #include <cerrno> |
24 | #include <cstddef> |
25 | #include <cstdlib> |
26 | #include <optional> |
27 | #include <string> |
28 | #include <utility> |
29 | #include <vector> |
30 | |
31 | namespace clang { |
32 | namespace ast_matchers { |
33 | namespace dynamic { |
34 | |
35 | /// Simple structure to hold information for one token from the parser. |
36 | struct Parser::TokenInfo { |
37 | /// Different possible tokens. |
38 | enum TokenKind { |
39 | TK_Eof, |
40 | TK_NewLine, |
41 | TK_OpenParen, |
42 | TK_CloseParen, |
43 | TK_Comma, |
44 | TK_Period, |
45 | TK_Literal, |
46 | TK_Ident, |
47 | TK_InvalidChar, |
48 | TK_Error, |
49 | TK_CodeCompletion |
50 | }; |
51 | |
52 | /// Some known identifiers. |
53 | static const char* const ID_Bind; |
54 | static const char *const ID_With; |
55 | |
56 | TokenInfo() = default; |
57 | |
58 | StringRef Text; |
59 | TokenKind Kind = TK_Eof; |
60 | SourceRange Range; |
61 | VariantValue Value; |
62 | }; |
63 | |
64 | const char* const Parser::TokenInfo::ID_Bind = "bind"; |
65 | const char *const Parser::TokenInfo::ID_With = "with"; |
66 | |
67 | /// Simple tokenizer for the parser. |
68 | class Parser::CodeTokenizer { |
69 | public: |
70 | explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error) |
71 | : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { |
72 | NextToken = getNextToken(); |
73 | } |
74 | |
75 | CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, |
76 | unsigned CodeCompletionOffset) |
77 | : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), |
78 | CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { |
79 | NextToken = getNextToken(); |
80 | } |
81 | |
82 | /// Returns but doesn't consume the next token. |
83 | const TokenInfo &peekNextToken() const { return NextToken; } |
84 | |
85 | /// Consumes and returns the next token. |
86 | TokenInfo consumeNextToken() { |
87 | TokenInfo ThisToken = NextToken; |
88 | NextToken = getNextToken(); |
89 | return ThisToken; |
90 | } |
91 | |
92 | TokenInfo SkipNewlines() { |
93 | while (NextToken.Kind == TokenInfo::TK_NewLine) |
94 | NextToken = getNextToken(); |
95 | return NextToken; |
96 | } |
97 | |
98 | TokenInfo consumeNextTokenIgnoreNewlines() { |
99 | SkipNewlines(); |
100 | if (NextToken.Kind == TokenInfo::TK_Eof) |
101 | return NextToken; |
102 | return consumeNextToken(); |
103 | } |
104 | |
105 | TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } |
106 | |
107 | private: |
108 | TokenInfo getNextToken() { |
109 | consumeWhitespace(); |
110 | TokenInfo Result; |
111 | Result.Range.Start = currentLocation(); |
112 | |
113 | if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { |
114 | Result.Kind = TokenInfo::TK_CodeCompletion; |
115 | Result.Text = StringRef(CodeCompletionLocation, 0); |
116 | CodeCompletionLocation = nullptr; |
117 | return Result; |
118 | } |
119 | |
120 | if (Code.empty()) { |
121 | Result.Kind = TokenInfo::TK_Eof; |
122 | Result.Text = ""; |
123 | return Result; |
124 | } |
125 | |
126 | switch (Code[0]) { |
127 | case '#': |
128 | Code = Code.drop_until(F: [](char c) { return c == '\n'; }); |
129 | return getNextToken(); |
130 | case ',': |
131 | Result.Kind = TokenInfo::TK_Comma; |
132 | Result.Text = Code.substr(Start: 0, N: 1); |
133 | Code = Code.drop_front(); |
134 | break; |
135 | case '.': |
136 | Result.Kind = TokenInfo::TK_Period; |
137 | Result.Text = Code.substr(Start: 0, N: 1); |
138 | Code = Code.drop_front(); |
139 | break; |
140 | case '\n': |
141 | ++Line; |
142 | StartOfLine = Code.drop_front(); |
143 | Result.Kind = TokenInfo::TK_NewLine; |
144 | Result.Text = Code.substr(Start: 0, N: 1); |
145 | Code = Code.drop_front(); |
146 | break; |
147 | case '(': |
148 | Result.Kind = TokenInfo::TK_OpenParen; |
149 | Result.Text = Code.substr(Start: 0, N: 1); |
150 | Code = Code.drop_front(); |
151 | break; |
152 | case ')': |
153 | Result.Kind = TokenInfo::TK_CloseParen; |
154 | Result.Text = Code.substr(Start: 0, N: 1); |
155 | Code = Code.drop_front(); |
156 | break; |
157 | |
158 | case '"': |
159 | case '\'': |
160 | // Parse a string literal. |
161 | consumeStringLiteral(Result: &Result); |
162 | break; |
163 | |
164 | case '0': case '1': case '2': case '3': case '4': |
165 | case '5': case '6': case '7': case '8': case '9': |
166 | // Parse an unsigned and float literal. |
167 | consumeNumberLiteral(Result: &Result); |
168 | break; |
169 | |
170 | default: |
171 | if (isAlphanumeric(c: Code[0])) { |
172 | // Parse an identifier |
173 | size_t TokenLength = 1; |
174 | while (true) { |
175 | // A code completion location in/immediately after an identifier will |
176 | // cause the portion of the identifier before the code completion |
177 | // location to become a code completion token. |
178 | if (CodeCompletionLocation == Code.data() + TokenLength) { |
179 | CodeCompletionLocation = nullptr; |
180 | Result.Kind = TokenInfo::TK_CodeCompletion; |
181 | Result.Text = Code.substr(Start: 0, N: TokenLength); |
182 | Code = Code.drop_front(N: TokenLength); |
183 | return Result; |
184 | } |
185 | if (TokenLength == Code.size() || !isAlphanumeric(c: Code[TokenLength])) |
186 | break; |
187 | ++TokenLength; |
188 | } |
189 | if (TokenLength == 4 && Code.starts_with(Prefix: "true")) { |
190 | Result.Kind = TokenInfo::TK_Literal; |
191 | Result.Value = true; |
192 | } else if (TokenLength == 5 && Code.starts_with(Prefix: "false")) { |
193 | Result.Kind = TokenInfo::TK_Literal; |
194 | Result.Value = false; |
195 | } else { |
196 | Result.Kind = TokenInfo::TK_Ident; |
197 | Result.Text = Code.substr(Start: 0, N: TokenLength); |
198 | } |
199 | Code = Code.drop_front(N: TokenLength); |
200 | } else { |
201 | Result.Kind = TokenInfo::TK_InvalidChar; |
202 | Result.Text = Code.substr(Start: 0, N: 1); |
203 | Code = Code.drop_front(N: 1); |
204 | } |
205 | break; |
206 | } |
207 | |
208 | Result.Range.End = currentLocation(); |
209 | return Result; |
210 | } |
211 | |
212 | /// Consume an unsigned and float literal. |
213 | void consumeNumberLiteral(TokenInfo *Result) { |
214 | bool isFloatingLiteral = false; |
215 | unsigned Length = 1; |
216 | if (Code.size() > 1) { |
217 | // Consume the 'x' or 'b' radix modifier, if present. |
218 | switch (toLowercase(c: Code[1])) { |
219 | case 'x': case 'b': Length = 2; |
220 | } |
221 | } |
222 | while (Length < Code.size() && isHexDigit(c: Code[Length])) |
223 | ++Length; |
224 | |
225 | // Try to recognize a floating point literal. |
226 | while (Length < Code.size()) { |
227 | char c = Code[Length]; |
228 | if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) { |
229 | isFloatingLiteral = true; |
230 | Length++; |
231 | } else { |
232 | break; |
233 | } |
234 | } |
235 | |
236 | Result->Text = Code.substr(Start: 0, N: Length); |
237 | Code = Code.drop_front(N: Length); |
238 | |
239 | if (isFloatingLiteral) { |
240 | char *end; |
241 | errno = 0; |
242 | std::string Text = Result->Text.str(); |
243 | double doubleValue = strtod(nptr: Text.c_str(), endptr: &end); |
244 | if (*end == 0 && errno == 0) { |
245 | Result->Kind = TokenInfo::TK_Literal; |
246 | Result->Value = doubleValue; |
247 | return; |
248 | } |
249 | } else { |
250 | unsigned Value; |
251 | if (!Result->Text.getAsInteger(Radix: 0, Result&: Value)) { |
252 | Result->Kind = TokenInfo::TK_Literal; |
253 | Result->Value = Value; |
254 | return; |
255 | } |
256 | } |
257 | |
258 | SourceRange Range; |
259 | Range.Start = Result->Range.Start; |
260 | Range.End = currentLocation(); |
261 | Error->addError(Range, Error: Error->ET_ParserNumberError) << Result->Text; |
262 | Result->Kind = TokenInfo::TK_Error; |
263 | } |
264 | |
265 | /// Consume a string literal. |
266 | /// |
267 | /// \c Code must be positioned at the start of the literal (the opening |
268 | /// quote). Consumed until it finds the same closing quote character. |
269 | void consumeStringLiteral(TokenInfo *Result) { |
270 | bool InEscape = false; |
271 | const char Marker = Code[0]; |
272 | for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { |
273 | if (InEscape) { |
274 | InEscape = false; |
275 | continue; |
276 | } |
277 | if (Code[Length] == '\\') { |
278 | InEscape = true; |
279 | continue; |
280 | } |
281 | if (Code[Length] == Marker) { |
282 | Result->Kind = TokenInfo::TK_Literal; |
283 | Result->Text = Code.substr(Start: 0, N: Length + 1); |
284 | Result->Value = Code.substr(Start: 1, N: Length - 1); |
285 | Code = Code.drop_front(N: Length + 1); |
286 | return; |
287 | } |
288 | } |
289 | |
290 | StringRef ErrorText = Code; |
291 | Code = Code.drop_front(N: Code.size()); |
292 | SourceRange Range; |
293 | Range.Start = Result->Range.Start; |
294 | Range.End = currentLocation(); |
295 | Error->addError(Range, Error: Error->ET_ParserStringError) << ErrorText; |
296 | Result->Kind = TokenInfo::TK_Error; |
297 | } |
298 | |
299 | /// Consume all leading whitespace from \c Code. |
300 | void consumeWhitespace() { |
301 | // Don't trim newlines. |
302 | Code = Code.ltrim(Chars: " \t\v\f\r"); |
303 | } |
304 | |
305 | SourceLocation currentLocation() { |
306 | SourceLocation Location; |
307 | Location.Line = Line; |
308 | Location.Column = Code.data() - StartOfLine.data() + 1; |
309 | return Location; |
310 | } |
311 | |
312 | StringRef &Code; |
313 | StringRef StartOfLine; |
314 | unsigned Line = 1; |
315 | Diagnostics *Error; |
316 | TokenInfo NextToken; |
317 | const char *CodeCompletionLocation = nullptr; |
318 | }; |
319 | |
320 | Parser::Sema::~Sema() = default; |
321 | |
322 | std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( |
323 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { |
324 | return {}; |
325 | } |
326 | |
327 | std::vector<MatcherCompletion> |
328 | Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { |
329 | return {}; |
330 | } |
331 | |
332 | struct Parser::ScopedContextEntry { |
333 | Parser *P; |
334 | |
335 | ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { |
336 | P->ContextStack.push_back(x: std::make_pair(x&: C, y: 0u)); |
337 | } |
338 | |
339 | ~ScopedContextEntry() { |
340 | P->ContextStack.pop_back(); |
341 | } |
342 | |
343 | void nextArg() { |
344 | ++P->ContextStack.back().second; |
345 | } |
346 | }; |
347 | |
348 | /// Parse expressions that start with an identifier. |
349 | /// |
350 | /// This function can parse named values and matchers. |
351 | /// In case of failure it will try to determine the user's intent to give |
352 | /// an appropriate error message. |
353 | bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { |
354 | const TokenInfo NameToken = Tokenizer->consumeNextToken(); |
355 | |
356 | if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { |
357 | // Parse as a named value. |
358 | if (const VariantValue NamedValue = |
359 | NamedValues ? NamedValues->lookup(Key: NameToken.Text) |
360 | : VariantValue()) { |
361 | |
362 | if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) { |
363 | *Value = NamedValue; |
364 | return true; |
365 | } |
366 | |
367 | std::string BindID; |
368 | Tokenizer->consumeNextToken(); |
369 | TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); |
370 | if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { |
371 | addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1)); |
372 | return false; |
373 | } |
374 | |
375 | if (ChainCallToken.Kind != TokenInfo::TK_Ident || |
376 | (ChainCallToken.Text != TokenInfo::ID_Bind && |
377 | ChainCallToken.Text != TokenInfo::ID_With)) { |
378 | Error->addError(Range: ChainCallToken.Range, |
379 | Error: Error->ET_ParserMalformedChainedExpr); |
380 | return false; |
381 | } |
382 | if (ChainCallToken.Text == TokenInfo::ID_With) { |
383 | |
384 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
385 | NameToken.Text, NameToken.Range); |
386 | |
387 | Error->addError(Range: ChainCallToken.Range, |
388 | Error: Error->ET_RegistryMatcherNoWithSupport); |
389 | return false; |
390 | } |
391 | if (!parseBindID(BindID)) |
392 | return false; |
393 | |
394 | assert(NamedValue.isMatcher()); |
395 | std::optional<DynTypedMatcher> Result = |
396 | NamedValue.getMatcher().getSingleMatcher(); |
397 | if (Result) { |
398 | std::optional<DynTypedMatcher> Bound = Result->tryBind(ID: BindID); |
399 | if (Bound) { |
400 | *Value = VariantMatcher::SingleMatcher(Matcher: *Bound); |
401 | return true; |
402 | } |
403 | } |
404 | return false; |
405 | } |
406 | |
407 | if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) { |
408 | Error->addError(Range: Tokenizer->peekNextToken().Range, |
409 | Error: Error->ET_ParserNoOpenParen) |
410 | << "NewLine"; |
411 | return false; |
412 | } |
413 | |
414 | // If the syntax is correct and the name is not a matcher either, report |
415 | // unknown named value. |
416 | if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || |
417 | Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || |
418 | Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine || |
419 | Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && |
420 | !S->lookupMatcherCtor(MatcherName: NameToken.Text)) { |
421 | Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryValueNotFound) |
422 | << NameToken.Text; |
423 | return false; |
424 | } |
425 | // Otherwise, fallback to the matcher parser. |
426 | } |
427 | |
428 | Tokenizer->SkipNewlines(); |
429 | |
430 | assert(NameToken.Kind == TokenInfo::TK_Ident); |
431 | TokenInfo OpenToken = Tokenizer->consumeNextToken(); |
432 | if (OpenToken.Kind != TokenInfo::TK_OpenParen) { |
433 | Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoOpenParen) |
434 | << OpenToken.Text; |
435 | return false; |
436 | } |
437 | |
438 | std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(MatcherName: NameToken.Text); |
439 | |
440 | // Parse as a matcher expression. |
441 | return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value); |
442 | } |
443 | |
444 | bool Parser::parseBindID(std::string &BindID) { |
445 | // Parse the parenthesized argument to .bind("foo") |
446 | const TokenInfo OpenToken = Tokenizer->consumeNextToken(); |
447 | const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines(); |
448 | const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines(); |
449 | |
450 | // TODO: We could use different error codes for each/some to be more |
451 | // explicit about the syntax error. |
452 | if (OpenToken.Kind != TokenInfo::TK_OpenParen) { |
453 | Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserMalformedBindExpr); |
454 | return false; |
455 | } |
456 | if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { |
457 | Error->addError(Range: IDToken.Range, Error: Error->ET_ParserMalformedBindExpr); |
458 | return false; |
459 | } |
460 | if (CloseToken.Kind != TokenInfo::TK_CloseParen) { |
461 | Error->addError(Range: CloseToken.Range, Error: Error->ET_ParserMalformedBindExpr); |
462 | return false; |
463 | } |
464 | BindID = IDToken.Value.getString(); |
465 | return true; |
466 | } |
467 | |
468 | bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken, |
469 | const TokenInfo &OpenToken, |
470 | VariantValue *Value) { |
471 | std::vector<ParserValue> Args; |
472 | TokenInfo EndToken; |
473 | |
474 | Tokenizer->SkipNewlines(); |
475 | |
476 | { |
477 | ScopedContextEntry SCE(this, Ctor); |
478 | |
479 | while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { |
480 | if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { |
481 | // End of args. |
482 | EndToken = Tokenizer->consumeNextToken(); |
483 | break; |
484 | } |
485 | if (!Args.empty()) { |
486 | // We must find a , token to continue. |
487 | TokenInfo CommaToken = Tokenizer->consumeNextToken(); |
488 | if (CommaToken.Kind != TokenInfo::TK_Comma) { |
489 | Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma) |
490 | << CommaToken.Text; |
491 | return false; |
492 | } |
493 | } |
494 | |
495 | Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, |
496 | NameToken.Text, NameToken.Range, |
497 | Args.size() + 1); |
498 | ParserValue ArgValue; |
499 | Tokenizer->SkipNewlines(); |
500 | |
501 | if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) { |
502 | addExpressionCompletions(); |
503 | return false; |
504 | } |
505 | |
506 | TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken(); |
507 | |
508 | if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) { |
509 | Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher) |
510 | << NameToken.Text; |
511 | return false; |
512 | } |
513 | |
514 | ArgValue.Text = NodeMatcherToken.Text; |
515 | ArgValue.Range = NodeMatcherToken.Range; |
516 | |
517 | std::optional<MatcherCtor> MappedMatcher = |
518 | S->lookupMatcherCtor(MatcherName: ArgValue.Text); |
519 | |
520 | if (!MappedMatcher) { |
521 | Error->addError(Range: NodeMatcherToken.Range, |
522 | Error: Error->ET_RegistryMatcherNotFound) |
523 | << NodeMatcherToken.Text; |
524 | return false; |
525 | } |
526 | |
527 | ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher); |
528 | |
529 | if (NK.isNone()) { |
530 | Error->addError(Range: NodeMatcherToken.Range, |
531 | Error: Error->ET_RegistryNonNodeMatcher) |
532 | << NodeMatcherToken.Text; |
533 | return false; |
534 | } |
535 | |
536 | ArgValue.Value = NK; |
537 | |
538 | Tokenizer->SkipNewlines(); |
539 | Args.push_back(x: ArgValue); |
540 | |
541 | SCE.nextArg(); |
542 | } |
543 | } |
544 | |
545 | if (EndToken.Kind == TokenInfo::TK_Eof) { |
546 | Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen); |
547 | return false; |
548 | } |
549 | |
550 | internal::MatcherDescriptorPtr BuiltCtor = |
551 | S->buildMatcherCtor(Ctor, NameRange: NameToken.Range, Args, Error); |
552 | |
553 | if (!BuiltCtor.get()) { |
554 | Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher) |
555 | << NameToken.Text; |
556 | return false; |
557 | } |
558 | |
559 | std::string BindID; |
560 | if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { |
561 | Tokenizer->consumeNextToken(); |
562 | TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); |
563 | if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { |
564 | addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1)); |
565 | addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("with(", "with", 1)); |
566 | return false; |
567 | } |
568 | if (ChainCallToken.Kind != TokenInfo::TK_Ident || |
569 | (ChainCallToken.Text != TokenInfo::ID_Bind && |
570 | ChainCallToken.Text != TokenInfo::ID_With)) { |
571 | Error->addError(Range: ChainCallToken.Range, |
572 | Error: Error->ET_ParserMalformedChainedExpr); |
573 | return false; |
574 | } |
575 | if (ChainCallToken.Text == TokenInfo::ID_Bind) { |
576 | if (!parseBindID(BindID)) |
577 | return false; |
578 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
579 | NameToken.Text, NameToken.Range); |
580 | SourceRange MatcherRange = NameToken.Range; |
581 | MatcherRange.End = ChainCallToken.Range.End; |
582 | VariantMatcher Result = S->actOnMatcherExpression( |
583 | Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error); |
584 | if (Result.isNull()) |
585 | return false; |
586 | |
587 | *Value = Result; |
588 | return true; |
589 | } else if (ChainCallToken.Text == TokenInfo::ID_With) { |
590 | Tokenizer->SkipNewlines(); |
591 | |
592 | if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { |
593 | StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof |
594 | ? StringRef("EOF") |
595 | : Tokenizer->peekNextToken().Text; |
596 | Error->addError(Range: Tokenizer->peekNextToken().Range, |
597 | Error: Error->ET_ParserNoOpenParen) |
598 | << ErrTxt; |
599 | return false; |
600 | } |
601 | |
602 | TokenInfo WithOpenToken = Tokenizer->consumeNextToken(); |
603 | |
604 | return parseMatcherExpressionImpl(NameToken, OpenToken: WithOpenToken, |
605 | Ctor: BuiltCtor.get(), Value); |
606 | } |
607 | } |
608 | |
609 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
610 | NameToken.Text, NameToken.Range); |
611 | SourceRange MatcherRange = NameToken.Range; |
612 | MatcherRange.End = EndToken.Range.End; |
613 | VariantMatcher Result = S->actOnMatcherExpression( |
614 | Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error); |
615 | if (Result.isNull()) |
616 | return false; |
617 | |
618 | *Value = Result; |
619 | return true; |
620 | } |
621 | |
622 | /// Parse and validate a matcher expression. |
623 | /// \return \c true on success, in which case \c Value has the matcher parsed. |
624 | /// If the input is malformed, or some argument has an error, it |
625 | /// returns \c false. |
626 | bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, |
627 | const TokenInfo &OpenToken, |
628 | std::optional<MatcherCtor> Ctor, |
629 | VariantValue *Value) { |
630 | if (!Ctor) { |
631 | Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryMatcherNotFound) |
632 | << NameToken.Text; |
633 | // Do not return here. We need to continue to give completion suggestions. |
634 | } |
635 | |
636 | if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor)) |
637 | return parseMatcherBuilder(Ctor: *Ctor, NameToken, OpenToken, Value); |
638 | |
639 | std::vector<ParserValue> Args; |
640 | TokenInfo EndToken; |
641 | |
642 | Tokenizer->SkipNewlines(); |
643 | |
644 | { |
645 | ScopedContextEntry SCE(this, Ctor.value_or(u: nullptr)); |
646 | |
647 | while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { |
648 | if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { |
649 | // End of args. |
650 | EndToken = Tokenizer->consumeNextToken(); |
651 | break; |
652 | } |
653 | if (!Args.empty()) { |
654 | // We must find a , token to continue. |
655 | const TokenInfo CommaToken = Tokenizer->consumeNextToken(); |
656 | if (CommaToken.Kind != TokenInfo::TK_Comma) { |
657 | Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma) |
658 | << CommaToken.Text; |
659 | return false; |
660 | } |
661 | } |
662 | |
663 | Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, |
664 | NameToken.Text, NameToken.Range, |
665 | Args.size() + 1); |
666 | ParserValue ArgValue; |
667 | Tokenizer->SkipNewlines(); |
668 | ArgValue.Text = Tokenizer->peekNextToken().Text; |
669 | ArgValue.Range = Tokenizer->peekNextToken().Range; |
670 | if (!parseExpressionImpl(Value: &ArgValue.Value)) { |
671 | return false; |
672 | } |
673 | |
674 | Tokenizer->SkipNewlines(); |
675 | Args.push_back(x: ArgValue); |
676 | SCE.nextArg(); |
677 | } |
678 | } |
679 | |
680 | if (EndToken.Kind == TokenInfo::TK_Eof) { |
681 | Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen); |
682 | return false; |
683 | } |
684 | |
685 | std::string BindID; |
686 | if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { |
687 | Tokenizer->consumeNextToken(); |
688 | TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); |
689 | if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { |
690 | addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1)); |
691 | return false; |
692 | } |
693 | |
694 | if (ChainCallToken.Kind != TokenInfo::TK_Ident) { |
695 | Error->addError(Range: ChainCallToken.Range, |
696 | Error: Error->ET_ParserMalformedChainedExpr); |
697 | return false; |
698 | } |
699 | if (ChainCallToken.Text == TokenInfo::ID_With) { |
700 | |
701 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
702 | NameToken.Text, NameToken.Range); |
703 | |
704 | Error->addError(Range: ChainCallToken.Range, |
705 | Error: Error->ET_RegistryMatcherNoWithSupport); |
706 | return false; |
707 | } |
708 | if (ChainCallToken.Text != TokenInfo::ID_Bind) { |
709 | Error->addError(Range: ChainCallToken.Range, |
710 | Error: Error->ET_ParserMalformedChainedExpr); |
711 | return false; |
712 | } |
713 | if (!parseBindID(BindID)) |
714 | return false; |
715 | } |
716 | |
717 | if (!Ctor) |
718 | return false; |
719 | |
720 | // Merge the start and end infos. |
721 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
722 | NameToken.Text, NameToken.Range); |
723 | SourceRange MatcherRange = NameToken.Range; |
724 | MatcherRange.End = EndToken.Range.End; |
725 | VariantMatcher Result = S->actOnMatcherExpression( |
726 | Ctor: *Ctor, NameRange: MatcherRange, BindID, Args, Error); |
727 | if (Result.isNull()) return false; |
728 | |
729 | *Value = Result; |
730 | return true; |
731 | } |
732 | |
733 | // If the prefix of this completion matches the completion token, add it to |
734 | // Completions minus the prefix. |
735 | void Parser::addCompletion(const TokenInfo &CompToken, |
736 | const MatcherCompletion& Completion) { |
737 | if (StringRef(Completion.TypedText).starts_with(Prefix: CompToken.Text) && |
738 | Completion.Specificity > 0) { |
739 | Completions.emplace_back(args: Completion.TypedText.substr(pos: CompToken.Text.size()), |
740 | args: Completion.MatcherDecl, args: Completion.Specificity); |
741 | } |
742 | } |
743 | |
744 | std::vector<MatcherCompletion> Parser::getNamedValueCompletions( |
745 | ArrayRef<ArgKind> AcceptedTypes) { |
746 | if (!NamedValues) return std::vector<MatcherCompletion>(); |
747 | std::vector<MatcherCompletion> Result; |
748 | for (const auto &Entry : *NamedValues) { |
749 | unsigned Specificity; |
750 | if (Entry.getValue().isConvertibleTo(Kinds: AcceptedTypes, Specificity: &Specificity)) { |
751 | std::string Decl = |
752 | (Entry.getValue().getTypeAsString() + " "+ Entry.getKey()).str(); |
753 | Result.emplace_back(args: Entry.getKey(), args&: Decl, args&: Specificity); |
754 | } |
755 | } |
756 | return Result; |
757 | } |
758 | |
759 | void Parser::addExpressionCompletions() { |
760 | const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines(); |
761 | assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); |
762 | |
763 | // We cannot complete code if there is an invalid element on the context |
764 | // stack. |
765 | for (ContextStackTy::iterator I = ContextStack.begin(), |
766 | E = ContextStack.end(); |
767 | I != E; ++I) { |
768 | if (!I->first) |
769 | return; |
770 | } |
771 | |
772 | auto AcceptedTypes = S->getAcceptedCompletionTypes(Context: ContextStack); |
773 | for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { |
774 | addCompletion(CompToken, Completion); |
775 | } |
776 | |
777 | for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { |
778 | addCompletion(CompToken, Completion); |
779 | } |
780 | } |
781 | |
782 | /// Parse an <Expression> |
783 | bool Parser::parseExpressionImpl(VariantValue *Value) { |
784 | switch (Tokenizer->nextTokenKind()) { |
785 | case TokenInfo::TK_Literal: |
786 | *Value = Tokenizer->consumeNextToken().Value; |
787 | return true; |
788 | |
789 | case TokenInfo::TK_Ident: |
790 | return parseIdentifierPrefixImpl(Value); |
791 | |
792 | case TokenInfo::TK_CodeCompletion: |
793 | addExpressionCompletions(); |
794 | return false; |
795 | |
796 | case TokenInfo::TK_Eof: |
797 | Error->addError(Range: Tokenizer->consumeNextToken().Range, |
798 | Error: Error->ET_ParserNoCode); |
799 | return false; |
800 | |
801 | case TokenInfo::TK_Error: |
802 | // This error was already reported by the tokenizer. |
803 | return false; |
804 | case TokenInfo::TK_NewLine: |
805 | case TokenInfo::TK_OpenParen: |
806 | case TokenInfo::TK_CloseParen: |
807 | case TokenInfo::TK_Comma: |
808 | case TokenInfo::TK_Period: |
809 | case TokenInfo::TK_InvalidChar: |
810 | const TokenInfo Token = Tokenizer->consumeNextToken(); |
811 | Error->addError(Range: Token.Range, Error: Error->ET_ParserInvalidToken) |
812 | << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine": Token.Text); |
813 | return false; |
814 | } |
815 | |
816 | llvm_unreachable("Unknown token kind."); |
817 | } |
818 | |
819 | static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; |
820 | |
821 | Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, |
822 | const NamedValueMap *NamedValues, Diagnostics *Error) |
823 | : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), |
824 | NamedValues(NamedValues), Error(Error) {} |
825 | |
826 | Parser::RegistrySema::~RegistrySema() = default; |
827 | |
828 | std::optional<MatcherCtor> |
829 | Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { |
830 | return Registry::lookupMatcherCtor(MatcherName); |
831 | } |
832 | |
833 | VariantMatcher Parser::RegistrySema::actOnMatcherExpression( |
834 | MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, |
835 | ArrayRef<ParserValue> Args, Diagnostics *Error) { |
836 | if (BindID.empty()) { |
837 | return Registry::constructMatcher(Ctor, NameRange, Args, Error); |
838 | } else { |
839 | return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, |
840 | Error); |
841 | } |
842 | } |
843 | |
844 | std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( |
845 | ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { |
846 | return Registry::getAcceptedCompletionTypes(Context); |
847 | } |
848 | |
849 | std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( |
850 | ArrayRef<ArgKind> AcceptedTypes) { |
851 | return Registry::getMatcherCompletions(AcceptedTypes); |
852 | } |
853 | |
854 | bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const { |
855 | return Registry::isBuilderMatcher(Ctor); |
856 | } |
857 | |
858 | ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const { |
859 | return Registry::nodeMatcherType(Ctor); |
860 | } |
861 | |
862 | internal::MatcherDescriptorPtr |
863 | Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange, |
864 | ArrayRef<ParserValue> Args, |
865 | Diagnostics *Error) const { |
866 | return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error); |
867 | } |
868 | |
869 | bool Parser::parseExpression(StringRef &Code, Sema *S, |
870 | const NamedValueMap *NamedValues, |
871 | VariantValue *Value, Diagnostics *Error) { |
872 | CodeTokenizer Tokenizer(Code, Error); |
873 | if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) |
874 | return false; |
875 | auto NT = Tokenizer.peekNextToken(); |
876 | if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) { |
877 | Error->addError(Range: Tokenizer.peekNextToken().Range, |
878 | Error: Error->ET_ParserTrailingCode); |
879 | return false; |
880 | } |
881 | return true; |
882 | } |
883 | |
884 | std::vector<MatcherCompletion> |
885 | Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, |
886 | const NamedValueMap *NamedValues) { |
887 | Diagnostics Error; |
888 | CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); |
889 | Parser P(&Tokenizer, S, NamedValues, &Error); |
890 | VariantValue Dummy; |
891 | P.parseExpressionImpl(Value: &Dummy); |
892 | |
893 | // Sort by specificity, then by name. |
894 | llvm::sort(C&: P.Completions, |
895 | Comp: [](const MatcherCompletion &A, const MatcherCompletion &B) { |
896 | if (A.Specificity != B.Specificity) |
897 | return A.Specificity > B.Specificity; |
898 | return A.TypedText < B.TypedText; |
899 | }); |
900 | |
901 | return P.Completions; |
902 | } |
903 | |
904 | std::optional<DynTypedMatcher> |
905 | Parser::parseMatcherExpression(StringRef &Code, Sema *S, |
906 | const NamedValueMap *NamedValues, |
907 | Diagnostics *Error) { |
908 | VariantValue Value; |
909 | if (!parseExpression(Code, S, NamedValues, Value: &Value, Error)) |
910 | return std::nullopt; |
911 | if (!Value.isMatcher()) { |
912 | Error->addError(Range: SourceRange(), Error: Error->ET_ParserNotAMatcher); |
913 | return std::nullopt; |
914 | } |
915 | std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher(); |
916 | if (!Result) { |
917 | Error->addError(Range: SourceRange(), Error: Error->ET_ParserOverloadedType) |
918 | << Value.getTypeAsString(); |
919 | } |
920 | return Result; |
921 | } |
922 | |
923 | } // namespace dynamic |
924 | } // namespace ast_matchers |
925 | } // namespace clang |
926 |
Definitions
- TokenInfo
- TokenKind
- TokenInfo
- ID_Bind
- ID_With
- CodeTokenizer
- CodeTokenizer
- CodeTokenizer
- peekNextToken
- consumeNextToken
- SkipNewlines
- consumeNextTokenIgnoreNewlines
- nextTokenKind
- getNextToken
- consumeNumberLiteral
- consumeStringLiteral
- consumeWhitespace
- currentLocation
- ~Sema
- getAcceptedCompletionTypes
- getMatcherCompletions
- ScopedContextEntry
- ScopedContextEntry
- ~ScopedContextEntry
- nextArg
- parseIdentifierPrefixImpl
- parseBindID
- parseMatcherBuilder
- parseMatcherExpressionImpl
- addCompletion
- getNamedValueCompletions
- addExpressionCompletions
- parseExpressionImpl
- DefaultRegistrySema
- Parser
- ~RegistrySema
- lookupMatcherCtor
- actOnMatcherExpression
- getAcceptedCompletionTypes
- getMatcherCompletions
- isBuilderMatcher
- nodeMatcherType
- buildMatcherCtor
- parseExpression
- completeExpression
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more