1 | //===--- Parser.h - Matcher expression parser -------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Simple matcher expression parser. |
10 | // |
11 | // This file contains the Parser class, which is responsible for parsing |
12 | // expressions in a specific format: matcherName(Arg0, Arg1, ..., ArgN). The |
13 | // parser can also interpret simple types, like strings. |
14 | // |
15 | // The actual processing of the matchers is handled by a Sema object that is |
16 | // provided to the parser. |
17 | // |
18 | // The grammar for the supported expressions is as follows: |
19 | // <Expression> := <Literal> | <MatcherExpression> |
20 | // <Literal> := <StringLiteral> | <NumericLiteral> | <BooleanLiteral> |
21 | // <StringLiteral> := "quoted string" |
22 | // <BooleanLiteral> := "true" | "false" |
23 | // <NumericLiteral> := [0-9]+ |
24 | // <MatcherExpression> := <MatcherName>(<ArgumentList>) |
25 | // <MatcherName> := [a-zA-Z]+ |
26 | // <ArgumentList> := <Expression> | <Expression>,<ArgumentList> |
27 | // |
28 | //===----------------------------------------------------------------------===// |
29 | |
30 | #ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
31 | #define MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
32 | |
33 | #include "Diagnostics.h" |
34 | #include "RegistryManager.h" |
35 | #include "llvm/ADT/ArrayRef.h" |
36 | #include "llvm/ADT/StringMap.h" |
37 | #include "llvm/ADT/StringRef.h" |
38 | #include <memory> |
39 | #include <vector> |
40 | |
41 | namespace mlir::query::matcher::internal { |
42 | |
43 | // Matcher expression parser. |
44 | class Parser { |
45 | public: |
46 | // Different possible tokens. |
47 | enum class TokenKind { |
48 | Eof, |
49 | NewLine, |
50 | OpenParen, |
51 | CloseParen, |
52 | Comma, |
53 | Period, |
54 | Literal, |
55 | Ident, |
56 | InvalidChar, |
57 | CodeCompletion, |
58 | Error |
59 | }; |
60 | |
61 | // Interface to connect the parser with the registry and more. The parser uses |
62 | // the Sema instance passed into parseMatcherExpression() to handle all |
63 | // matcher tokens. |
64 | class Sema { |
65 | public: |
66 | virtual ~Sema(); |
67 | |
68 | // Process a matcher expression. The caller takes ownership of the Matcher |
69 | // object returned. |
70 | virtual VariantMatcher actOnMatcherExpression( |
71 | MatcherCtor ctor, SourceRange nameRange, llvm::StringRef functionName, |
72 | llvm::ArrayRef<ParserValue> args, Diagnostics *error) = 0; |
73 | |
74 | // Look up a matcher by name in the matcher name found by the parser. |
75 | virtual std::optional<MatcherCtor> |
76 | lookupMatcherCtor(llvm::StringRef matcherName) = 0; |
77 | |
78 | // Compute the list of completion types for Context. |
79 | virtual std::vector<ArgKind> getAcceptedCompletionTypes( |
80 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context); |
81 | |
82 | // Compute the list of completions that match any of acceptedTypes. |
83 | virtual std::vector<MatcherCompletion> |
84 | getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes); |
85 | }; |
86 | |
87 | // An implementation of the Sema interface that uses the matcher registry to |
88 | // process tokens. |
89 | class RegistrySema : public Parser::Sema { |
90 | public: |
91 | RegistrySema(const Registry &matcherRegistry) |
92 | : matcherRegistry(matcherRegistry) {} |
93 | ~RegistrySema() override; |
94 | |
95 | std::optional<MatcherCtor> |
96 | lookupMatcherCtor(llvm::StringRef matcherName) override; |
97 | |
98 | VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
99 | SourceRange NameRange, |
100 | StringRef functionName, |
101 | ArrayRef<ParserValue> Args, |
102 | Diagnostics *Error) override; |
103 | |
104 | std::vector<ArgKind> getAcceptedCompletionTypes( |
105 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> context) override; |
106 | |
107 | std::vector<MatcherCompletion> |
108 | getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes) override; |
109 | |
110 | private: |
111 | const Registry &matcherRegistry; |
112 | }; |
113 | |
114 | using NamedValueMap = llvm::StringMap<VariantValue>; |
115 | |
116 | // Methods to parse a matcher expression and return a DynMatcher object, |
117 | // transferring ownership to the caller. |
118 | static std::optional<DynMatcher> |
119 | parseMatcherExpression(llvm::StringRef &matcherCode, |
120 | const Registry &matcherRegistry, |
121 | const NamedValueMap *namedValues, Diagnostics *error); |
122 | static std::optional<DynMatcher> |
123 | parseMatcherExpression(llvm::StringRef &matcherCode, |
124 | const Registry &matcherRegistry, Diagnostics *error) { |
125 | return parseMatcherExpression(matcherCode, matcherRegistry, namedValues: nullptr, error); |
126 | } |
127 | |
128 | // Methods to parse any expression supported by this parser. |
129 | static bool parseExpression(llvm::StringRef &code, |
130 | const Registry &matcherRegistry, |
131 | const NamedValueMap *namedValues, |
132 | VariantValue *value, Diagnostics *error); |
133 | |
134 | static bool parseExpression(llvm::StringRef &code, |
135 | const Registry &matcherRegistry, |
136 | VariantValue *value, Diagnostics *error) { |
137 | return parseExpression(code, matcherRegistry, namedValues: nullptr, value, error); |
138 | } |
139 | |
140 | // Methods to complete an expression at a given offset. |
141 | static std::vector<MatcherCompletion> |
142 | completeExpression(llvm::StringRef &code, unsigned completionOffset, |
143 | const Registry &matcherRegistry, |
144 | const NamedValueMap *namedValues); |
145 | static std::vector<MatcherCompletion> |
146 | completeExpression(llvm::StringRef &code, unsigned completionOffset, |
147 | const Registry &matcherRegistry) { |
148 | return completeExpression(code, completionOffset, matcherRegistry, namedValues: nullptr); |
149 | } |
150 | |
151 | private: |
152 | class CodeTokenizer; |
153 | struct ScopedContextEntry; |
154 | struct TokenInfo; |
155 | |
156 | Parser(CodeTokenizer *tokenizer, const Registry &matcherRegistry, |
157 | const NamedValueMap *namedValues, Diagnostics *error); |
158 | |
159 | bool parseChainedExpression(std::string &argument); |
160 | |
161 | bool parseExpressionImpl(VariantValue *value); |
162 | |
163 | bool parseMatcherArgs(std::vector<ParserValue> &args, MatcherCtor ctor, |
164 | const TokenInfo &nameToken, TokenInfo &endToken); |
165 | |
166 | bool parseMatcherExpressionImpl(const TokenInfo &nameToken, |
167 | const TokenInfo &openToken, |
168 | std::optional<MatcherCtor> ctor, |
169 | VariantValue *value); |
170 | |
171 | bool parseIdentifierPrefixImpl(VariantValue *value); |
172 | |
173 | void addCompletion(const TokenInfo &compToken, |
174 | const MatcherCompletion &completion); |
175 | void addExpressionCompletions(); |
176 | |
177 | std::vector<MatcherCompletion> |
178 | getNamedValueCompletions(llvm::ArrayRef<ArgKind> acceptedTypes); |
179 | |
180 | CodeTokenizer *const tokenizer; |
181 | std::unique_ptr<RegistrySema> sema; |
182 | const NamedValueMap *const namedValues; |
183 | Diagnostics *const error; |
184 | |
185 | using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>; |
186 | |
187 | ContextStackTy contextStack; |
188 | std::vector<MatcherCompletion> completions; |
189 | }; |
190 | |
191 | } // namespace mlir::query::matcher::internal |
192 | |
193 | #endif // MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
194 | |