1 | //===--- Parser.h - Matcher expression parser -------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Simple matcher expression parser. |
10 | // |
11 | // This file contains the Parser class, which is responsible for parsing |
12 | // expressions in a specific format: matcherName(Arg0, Arg1, ..., ArgN). The |
13 | // parser can also interpret simple types, like strings. |
14 | // |
15 | // The actual processing of the matchers is handled by a Sema object that is |
16 | // provided to the parser. |
17 | // |
18 | // The grammar for the supported expressions is as follows: |
19 | // <Expression> := <StringLiteral> | <MatcherExpression> |
20 | // <StringLiteral> := "quoted string" |
21 | // <MatcherExpression> := <MatcherName>(<ArgumentList>) |
22 | // <MatcherName> := [a-zA-Z]+ |
23 | // <ArgumentList> := <Expression> | <Expression>,<ArgumentList> |
24 | // |
25 | //===----------------------------------------------------------------------===// |
26 | |
27 | #ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
28 | #define MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
29 | |
30 | #include "Diagnostics.h" |
31 | #include "RegistryManager.h" |
32 | #include "llvm/ADT/ArrayRef.h" |
33 | #include "llvm/ADT/StringMap.h" |
34 | #include "llvm/ADT/StringRef.h" |
35 | #include <memory> |
36 | #include <vector> |
37 | |
38 | namespace mlir::query::matcher::internal { |
39 | |
40 | // Matcher expression parser. |
41 | class Parser { |
42 | public: |
43 | // Different possible tokens. |
44 | enum class TokenKind { |
45 | Eof, |
46 | NewLine, |
47 | OpenParen, |
48 | CloseParen, |
49 | Comma, |
50 | Period, |
51 | Literal, |
52 | Ident, |
53 | InvalidChar, |
54 | CodeCompletion, |
55 | Error |
56 | }; |
57 | |
58 | // Interface to connect the parser with the registry and more. The parser uses |
59 | // the Sema instance passed into parseMatcherExpression() to handle all |
60 | // matcher tokens. |
61 | class Sema { |
62 | public: |
63 | virtual ~Sema(); |
64 | |
65 | // Process a matcher expression. The caller takes ownership of the Matcher |
66 | // object returned. |
67 | virtual VariantMatcher actOnMatcherExpression( |
68 | MatcherCtor ctor, SourceRange nameRange, llvm::StringRef functionName, |
69 | llvm::ArrayRef<ParserValue> args, Diagnostics *error) = 0; |
70 | |
71 | // Look up a matcher by name in the matcher name found by the parser. |
72 | virtual std::optional<MatcherCtor> |
73 | lookupMatcherCtor(llvm::StringRef matcherName) = 0; |
74 | |
75 | // Compute the list of completion types for Context. |
76 | virtual std::vector<ArgKind> getAcceptedCompletionTypes( |
77 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context); |
78 | |
79 | // Compute the list of completions that match any of acceptedTypes. |
80 | virtual std::vector<MatcherCompletion> |
81 | getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes); |
82 | }; |
83 | |
84 | // An implementation of the Sema interface that uses the matcher registry to |
85 | // process tokens. |
86 | class RegistrySema : public Parser::Sema { |
87 | public: |
88 | RegistrySema(const Registry &matcherRegistry) |
89 | : matcherRegistry(matcherRegistry) {} |
90 | ~RegistrySema() override; |
91 | |
92 | std::optional<MatcherCtor> |
93 | lookupMatcherCtor(llvm::StringRef matcherName) override; |
94 | |
95 | VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
96 | SourceRange NameRange, |
97 | StringRef functionName, |
98 | ArrayRef<ParserValue> Args, |
99 | Diagnostics *Error) override; |
100 | |
101 | std::vector<ArgKind> getAcceptedCompletionTypes( |
102 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> context) override; |
103 | |
104 | std::vector<MatcherCompletion> |
105 | getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes) override; |
106 | |
107 | private: |
108 | const Registry &matcherRegistry; |
109 | }; |
110 | |
111 | using NamedValueMap = llvm::StringMap<VariantValue>; |
112 | |
113 | // Methods to parse a matcher expression and return a DynMatcher object, |
114 | // transferring ownership to the caller. |
115 | static std::optional<DynMatcher> |
116 | parseMatcherExpression(llvm::StringRef &matcherCode, |
117 | const Registry &matcherRegistry, |
118 | const NamedValueMap *namedValues, Diagnostics *error); |
119 | static std::optional<DynMatcher> |
120 | parseMatcherExpression(llvm::StringRef &matcherCode, |
121 | const Registry &matcherRegistry, Diagnostics *error) { |
122 | return parseMatcherExpression(matcherCode, matcherRegistry, namedValues: nullptr, error); |
123 | } |
124 | |
125 | // Methods to parse any expression supported by this parser. |
126 | static bool parseExpression(llvm::StringRef &code, |
127 | const Registry &matcherRegistry, |
128 | const NamedValueMap *namedValues, |
129 | VariantValue *value, Diagnostics *error); |
130 | |
131 | static bool parseExpression(llvm::StringRef &code, |
132 | const Registry &matcherRegistry, |
133 | VariantValue *value, Diagnostics *error) { |
134 | return parseExpression(code, matcherRegistry, namedValues: nullptr, value, error); |
135 | } |
136 | |
137 | // Methods to complete an expression at a given offset. |
138 | static std::vector<MatcherCompletion> |
139 | completeExpression(llvm::StringRef &code, unsigned completionOffset, |
140 | const Registry &matcherRegistry, |
141 | const NamedValueMap *namedValues); |
142 | static std::vector<MatcherCompletion> |
143 | completeExpression(llvm::StringRef &code, unsigned completionOffset, |
144 | const Registry &matcherRegistry) { |
145 | return completeExpression(code, completionOffset, matcherRegistry, namedValues: nullptr); |
146 | } |
147 | |
148 | private: |
149 | class CodeTokenizer; |
150 | struct ScopedContextEntry; |
151 | struct TokenInfo; |
152 | |
153 | Parser(CodeTokenizer *tokenizer, const Registry &matcherRegistry, |
154 | const NamedValueMap *namedValues, Diagnostics *error); |
155 | |
156 | bool parseChainedExpression(std::string &argument); |
157 | |
158 | bool parseExpressionImpl(VariantValue *value); |
159 | |
160 | bool parseMatcherArgs(std::vector<ParserValue> &args, MatcherCtor ctor, |
161 | const TokenInfo &nameToken, TokenInfo &endToken); |
162 | |
163 | bool parseMatcherExpressionImpl(const TokenInfo &nameToken, |
164 | const TokenInfo &openToken, |
165 | std::optional<MatcherCtor> ctor, |
166 | VariantValue *value); |
167 | |
168 | bool parseIdentifierPrefixImpl(VariantValue *value); |
169 | |
170 | void addCompletion(const TokenInfo &compToken, |
171 | const MatcherCompletion &completion); |
172 | void addExpressionCompletions(); |
173 | |
174 | std::vector<MatcherCompletion> |
175 | getNamedValueCompletions(llvm::ArrayRef<ArgKind> acceptedTypes); |
176 | |
177 | CodeTokenizer *const tokenizer; |
178 | std::unique_ptr<RegistrySema> sema; |
179 | const NamedValueMap *const namedValues; |
180 | Diagnostics *const error; |
181 | |
182 | using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>; |
183 | |
184 | ContextStackTy contextStack; |
185 | std::vector<MatcherCompletion> completions; |
186 | }; |
187 | |
188 | } // namespace mlir::query::matcher::internal |
189 | |
190 | #endif // MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
191 | |