1 | //===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_ |
10 | #define LIB_TOOLS_PDLL_PARSER_LEXER_H_ |
11 | |
12 | #include "mlir/Support/LLVM.h" |
13 | #include "llvm/ADT/StringRef.h" |
14 | #include "llvm/Support/SMLoc.h" |
15 | |
16 | namespace llvm { |
17 | class SourceMgr; |
18 | } // namespace llvm |
19 | |
20 | namespace mlir { |
21 | namespace pdll { |
22 | class CodeCompleteContext; |
23 | |
24 | namespace ast { |
25 | class DiagnosticEngine; |
26 | } // namespace ast |
27 | |
28 | //===----------------------------------------------------------------------===// |
29 | // Token |
30 | //===----------------------------------------------------------------------===// |
31 | |
32 | class Token { |
33 | public: |
34 | enum Kind { |
35 | /// Markers. |
36 | eof, |
37 | error, |
38 | /// Token signifying a code completion location. |
39 | code_complete, |
40 | /// Token signifying a code completion location within a string. |
41 | code_complete_string, |
42 | |
43 | /// Keywords. |
44 | KW_BEGIN, |
45 | /// Dependent keywords, i.e. those that are treated as keywords depending on |
46 | /// the current parser context. |
47 | KW_DEPENDENT_BEGIN, |
48 | kw_attr, |
49 | kw_op, |
50 | kw_type, |
51 | KW_DEPENDENT_END, |
52 | |
53 | /// General keywords. |
54 | kw_Attr, |
55 | kw_erase, |
56 | kw_let, |
57 | kw_Constraint, |
58 | kw_not, |
59 | kw_Op, |
60 | kw_OpName, |
61 | kw_Pattern, |
62 | kw_replace, |
63 | kw_return, |
64 | kw_rewrite, |
65 | kw_Rewrite, |
66 | kw_Type, |
67 | kw_TypeRange, |
68 | kw_Value, |
69 | kw_ValueRange, |
70 | kw_with, |
71 | KW_END, |
72 | |
73 | /// Punctuation. |
74 | arrow, |
75 | colon, |
76 | comma, |
77 | dot, |
78 | equal, |
79 | equal_arrow, |
80 | semicolon, |
81 | /// Paired punctuation. |
82 | less, |
83 | greater, |
84 | l_brace, |
85 | r_brace, |
86 | l_paren, |
87 | r_paren, |
88 | l_square, |
89 | r_square, |
90 | underscore, |
91 | |
92 | /// Tokens. |
93 | directive, |
94 | identifier, |
95 | integer, |
96 | string_block, |
97 | string |
98 | }; |
99 | Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} |
100 | |
101 | /// Given a token containing a string literal, return its value, including |
102 | /// removing the quote characters and unescaping the contents of the string. |
103 | std::string getStringValue() const; |
104 | |
105 | /// Returns true if the current token is a string literal. |
106 | bool isString() const { return isAny(k1: Token::string, k2: Token::string_block); } |
107 | |
108 | /// Returns true if the current token is a keyword. |
109 | bool isKeyword() const { |
110 | return kind > Token::KW_BEGIN && kind < Token::KW_END; |
111 | } |
112 | |
113 | /// Returns true if the current token is a keyword in a dependent context, and |
114 | /// in any other situation (e.g. variable names) may be treated as an |
115 | /// identifier. |
116 | bool isDependentKeyword() const { |
117 | return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END; |
118 | } |
119 | |
120 | /// Return the bytes that make up this token. |
121 | StringRef getSpelling() const { return spelling; } |
122 | |
123 | /// Return the kind of this token. |
124 | Kind getKind() const { return kind; } |
125 | |
126 | /// Return true if this token is one of the specified kinds. |
127 | bool isAny(Kind k1, Kind k2) const { return is(k: k1) || is(k: k2); } |
128 | template <typename... T> |
129 | bool isAny(Kind k1, Kind k2, Kind k3, T... others) const { |
130 | return is(k: k1) || isAny(k2, k3, others...); |
131 | } |
132 | |
133 | /// Return if the token does not have the given kind. |
134 | bool isNot(Kind k) const { return k != kind; } |
135 | template <typename... T> |
136 | bool isNot(Kind k1, Kind k2, T... others) const { |
137 | return !isAny(k1, k2, others...); |
138 | } |
139 | |
140 | /// Return if the token has the given kind. |
141 | bool is(Kind k) const { return kind == k; } |
142 | |
143 | /// Return a location for the start of this token. |
144 | SMLoc getStartLoc() const { return SMLoc::getFromPointer(Ptr: spelling.data()); } |
145 | /// Return a location at the end of this token. |
146 | SMLoc getEndLoc() const { |
147 | return SMLoc::getFromPointer(Ptr: spelling.data() + spelling.size()); |
148 | } |
149 | /// Return a location for the range of this token. |
150 | SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); } |
151 | |
152 | private: |
153 | /// Discriminator that indicates the kind of token this is. |
154 | Kind kind; |
155 | |
156 | /// A reference to the entire token contents; this is always a pointer into |
157 | /// a memory buffer owned by the source manager. |
158 | StringRef spelling; |
159 | }; |
160 | |
161 | //===----------------------------------------------------------------------===// |
162 | // Lexer |
163 | //===----------------------------------------------------------------------===// |
164 | |
165 | class Lexer { |
166 | public: |
167 | Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine, |
168 | CodeCompleteContext *codeCompleteContext); |
169 | ~Lexer(); |
170 | |
171 | /// Return a reference to the source manager used by the lexer. |
172 | llvm::SourceMgr &getSourceMgr() { return srcMgr; } |
173 | |
174 | /// Return a reference to the diagnostic engine used by the lexer. |
175 | ast::DiagnosticEngine &getDiagEngine() { return diagEngine; } |
176 | |
177 | /// Push an include of the given file. This will cause the lexer to start |
178 | /// processing the provided file. Returns failure if the file could not be |
179 | /// opened, success otherwise. |
180 | LogicalResult pushInclude(StringRef filename, SMRange includeLoc); |
181 | |
182 | /// Lex the next token and return it. |
183 | Token lexToken(); |
184 | |
185 | /// Change the position of the lexer cursor. The next token we lex will start |
186 | /// at the designated point in the input. |
187 | void resetPointer(const char *newPointer) { curPtr = newPointer; } |
188 | |
189 | /// Emit an error to the lexer with the given location and message. |
190 | Token emitError(SMRange loc, const Twine &msg); |
191 | Token emitError(const char *loc, const Twine &msg); |
192 | Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc, |
193 | const Twine ¬e); |
194 | |
195 | private: |
196 | Token formToken(Token::Kind kind, const char *tokStart) { |
197 | return Token(kind, StringRef(tokStart, curPtr - tokStart)); |
198 | } |
199 | |
200 | /// Return the next character in the stream. |
201 | int getNextChar(); |
202 | |
203 | /// Lex methods. |
204 | void (); |
205 | Token lexDirective(const char *tokStart); |
206 | Token lexIdentifier(const char *tokStart); |
207 | Token lexNumber(const char *tokStart); |
208 | Token lexString(const char *tokStart, bool isStringBlock); |
209 | |
210 | llvm::SourceMgr &srcMgr; |
211 | int curBufferID; |
212 | StringRef curBuffer; |
213 | const char *curPtr; |
214 | |
215 | /// The engine used to emit diagnostics during lexing/parsing. |
216 | ast::DiagnosticEngine &diagEngine; |
217 | |
218 | /// A flag indicating if we added a default diagnostic handler to the provided |
219 | /// diagEngine. |
220 | bool addedHandlerToDiagEngine; |
221 | |
222 | /// The optional code completion point within the input file. |
223 | const char *codeCompletionLocation; |
224 | }; |
225 | } // namespace pdll |
226 | } // namespace mlir |
227 | |
228 | #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_ |
229 | |