| 1 | //===--- Parser.h - Matcher expression parser -------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Simple matcher expression parser. |
| 10 | // |
| 11 | // This file contains the Parser class, which is responsible for parsing |
| 12 | // expressions in a specific format: matcherName(Arg0, Arg1, ..., ArgN). The |
| 13 | // parser can also interpret simple types, like strings. |
| 14 | // |
| 15 | // The actual processing of the matchers is handled by a Sema object that is |
| 16 | // provided to the parser. |
| 17 | // |
| 18 | // The grammar for the supported expressions is as follows: |
| 19 | // <Expression> := <Literal> | <MatcherExpression> |
| 20 | // <Literal> := <StringLiteral> | <NumericLiteral> | <BooleanLiteral> |
| 21 | // <StringLiteral> := "quoted string" |
| 22 | // <BooleanLiteral> := "true" | "false" |
| 23 | // <NumericLiteral> := [0-9]+ |
| 24 | // <MatcherExpression> := <MatcherName>(<ArgumentList>) |
| 25 | // <MatcherName> := [a-zA-Z]+ |
| 26 | // <ArgumentList> := <Expression> | <Expression>,<ArgumentList> |
| 27 | // |
| 28 | //===----------------------------------------------------------------------===// |
| 29 | |
| 30 | #ifndef MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
| 31 | #define MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
| 32 | |
| 33 | #include "Diagnostics.h" |
| 34 | #include "RegistryManager.h" |
| 35 | #include "llvm/ADT/ArrayRef.h" |
| 36 | #include "llvm/ADT/StringMap.h" |
| 37 | #include "llvm/ADT/StringRef.h" |
| 38 | #include <memory> |
| 39 | #include <vector> |
| 40 | |
| 41 | namespace mlir::query::matcher::internal { |
| 42 | |
| 43 | // Matcher expression parser. |
| 44 | class Parser { |
| 45 | public: |
| 46 | // Different possible tokens. |
| 47 | enum class TokenKind { |
| 48 | Eof, |
| 49 | NewLine, |
| 50 | OpenParen, |
| 51 | CloseParen, |
| 52 | Comma, |
| 53 | Period, |
| 54 | Literal, |
| 55 | Ident, |
| 56 | InvalidChar, |
| 57 | CodeCompletion, |
| 58 | Error |
| 59 | }; |
| 60 | |
| 61 | // Interface to connect the parser with the registry and more. The parser uses |
| 62 | // the Sema instance passed into parseMatcherExpression() to handle all |
| 63 | // matcher tokens. |
| 64 | class Sema { |
| 65 | public: |
| 66 | virtual ~Sema(); |
| 67 | |
| 68 | // Process a matcher expression. The caller takes ownership of the Matcher |
| 69 | // object returned. |
| 70 | virtual VariantMatcher actOnMatcherExpression( |
| 71 | MatcherCtor ctor, SourceRange nameRange, llvm::StringRef functionName, |
| 72 | llvm::ArrayRef<ParserValue> args, Diagnostics *error) = 0; |
| 73 | |
| 74 | // Look up a matcher by name in the matcher name found by the parser. |
| 75 | virtual std::optional<MatcherCtor> |
| 76 | lookupMatcherCtor(llvm::StringRef matcherName) = 0; |
| 77 | |
| 78 | // Compute the list of completion types for Context. |
| 79 | virtual std::vector<ArgKind> getAcceptedCompletionTypes( |
| 80 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context); |
| 81 | |
| 82 | // Compute the list of completions that match any of acceptedTypes. |
| 83 | virtual std::vector<MatcherCompletion> |
| 84 | getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes); |
| 85 | }; |
| 86 | |
| 87 | // An implementation of the Sema interface that uses the matcher registry to |
| 88 | // process tokens. |
| 89 | class RegistrySema : public Parser::Sema { |
| 90 | public: |
| 91 | RegistrySema(const Registry &matcherRegistry) |
| 92 | : matcherRegistry(matcherRegistry) {} |
| 93 | ~RegistrySema() override; |
| 94 | |
| 95 | std::optional<MatcherCtor> |
| 96 | lookupMatcherCtor(llvm::StringRef matcherName) override; |
| 97 | |
| 98 | VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
| 99 | SourceRange NameRange, |
| 100 | StringRef functionName, |
| 101 | ArrayRef<ParserValue> Args, |
| 102 | Diagnostics *Error) override; |
| 103 | |
| 104 | std::vector<ArgKind> getAcceptedCompletionTypes( |
| 105 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> context) override; |
| 106 | |
| 107 | std::vector<MatcherCompletion> |
| 108 | getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes) override; |
| 109 | |
| 110 | private: |
| 111 | const Registry &matcherRegistry; |
| 112 | }; |
| 113 | |
| 114 | using NamedValueMap = llvm::StringMap<VariantValue>; |
| 115 | |
| 116 | // Methods to parse a matcher expression and return a DynMatcher object, |
| 117 | // transferring ownership to the caller. |
| 118 | static std::optional<DynMatcher> |
| 119 | parseMatcherExpression(llvm::StringRef &matcherCode, |
| 120 | const Registry &matcherRegistry, |
| 121 | const NamedValueMap *namedValues, Diagnostics *error); |
| 122 | static std::optional<DynMatcher> |
| 123 | parseMatcherExpression(llvm::StringRef &matcherCode, |
| 124 | const Registry &matcherRegistry, Diagnostics *error) { |
| 125 | return parseMatcherExpression(matcherCode, matcherRegistry, namedValues: nullptr, error); |
| 126 | } |
| 127 | |
| 128 | // Methods to parse any expression supported by this parser. |
| 129 | static bool parseExpression(llvm::StringRef &code, |
| 130 | const Registry &matcherRegistry, |
| 131 | const NamedValueMap *namedValues, |
| 132 | VariantValue *value, Diagnostics *error); |
| 133 | |
| 134 | static bool parseExpression(llvm::StringRef &code, |
| 135 | const Registry &matcherRegistry, |
| 136 | VariantValue *value, Diagnostics *error) { |
| 137 | return parseExpression(code, matcherRegistry, namedValues: nullptr, value, error); |
| 138 | } |
| 139 | |
| 140 | // Methods to complete an expression at a given offset. |
| 141 | static std::vector<MatcherCompletion> |
| 142 | completeExpression(llvm::StringRef &code, unsigned completionOffset, |
| 143 | const Registry &matcherRegistry, |
| 144 | const NamedValueMap *namedValues); |
| 145 | static std::vector<MatcherCompletion> |
| 146 | completeExpression(llvm::StringRef &code, unsigned completionOffset, |
| 147 | const Registry &matcherRegistry) { |
| 148 | return completeExpression(code, completionOffset, matcherRegistry, namedValues: nullptr); |
| 149 | } |
| 150 | |
| 151 | private: |
| 152 | class CodeTokenizer; |
| 153 | struct ScopedContextEntry; |
| 154 | struct TokenInfo; |
| 155 | |
| 156 | Parser(CodeTokenizer *tokenizer, const Registry &matcherRegistry, |
| 157 | const NamedValueMap *namedValues, Diagnostics *error); |
| 158 | |
| 159 | bool parseChainedExpression(std::string &argument); |
| 160 | |
| 161 | bool parseExpressionImpl(VariantValue *value); |
| 162 | |
| 163 | bool parseMatcherArgs(std::vector<ParserValue> &args, MatcherCtor ctor, |
| 164 | const TokenInfo &nameToken, TokenInfo &endToken); |
| 165 | |
| 166 | bool parseMatcherExpressionImpl(const TokenInfo &nameToken, |
| 167 | const TokenInfo &openToken, |
| 168 | std::optional<MatcherCtor> ctor, |
| 169 | VariantValue *value); |
| 170 | |
| 171 | bool parseIdentifierPrefixImpl(VariantValue *value); |
| 172 | |
| 173 | void addCompletion(const TokenInfo &compToken, |
| 174 | const MatcherCompletion &completion); |
| 175 | void addExpressionCompletions(); |
| 176 | |
| 177 | std::vector<MatcherCompletion> |
| 178 | getNamedValueCompletions(llvm::ArrayRef<ArgKind> acceptedTypes); |
| 179 | |
| 180 | CodeTokenizer *const tokenizer; |
| 181 | std::unique_ptr<RegistrySema> sema; |
| 182 | const NamedValueMap *const namedValues; |
| 183 | Diagnostics *const error; |
| 184 | |
| 185 | using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>; |
| 186 | |
| 187 | ContextStackTy contextStack; |
| 188 | std::vector<MatcherCompletion> completions; |
| 189 | }; |
| 190 | |
| 191 | } // namespace mlir::query::matcher::internal |
| 192 | |
| 193 | #endif // MLIR_TOOLS_MLIRQUERY_MATCHER_PARSER_H |
| 194 | |