1 | //===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_ |
10 | #define LIB_TOOLS_PDLL_PARSER_LEXER_H_ |
11 | |
12 | #include "mlir/Support/LLVM.h" |
13 | #include "llvm/ADT/StringRef.h" |
14 | #include "llvm/Support/SMLoc.h" |
15 | |
16 | namespace llvm { |
17 | class SourceMgr; |
18 | } // namespace llvm |
19 | |
20 | namespace mlir { |
21 | struct LogicalResult; |
22 | |
23 | namespace pdll { |
24 | class CodeCompleteContext; |
25 | |
26 | namespace ast { |
27 | class DiagnosticEngine; |
28 | } // namespace ast |
29 | |
30 | //===----------------------------------------------------------------------===// |
31 | // Token |
32 | //===----------------------------------------------------------------------===// |
33 | |
34 | class Token { |
35 | public: |
36 | enum Kind { |
37 | /// Markers. |
38 | eof, |
39 | error, |
40 | /// Token signifying a code completion location. |
41 | code_complete, |
42 | /// Token signifying a code completion location within a string. |
43 | code_complete_string, |
44 | |
45 | /// Keywords. |
46 | KW_BEGIN, |
47 | /// Dependent keywords, i.e. those that are treated as keywords depending on |
48 | /// the current parser context. |
49 | KW_DEPENDENT_BEGIN, |
50 | kw_attr, |
51 | kw_op, |
52 | kw_type, |
53 | KW_DEPENDENT_END, |
54 | |
55 | /// General keywords. |
56 | kw_Attr, |
57 | kw_erase, |
58 | kw_let, |
59 | kw_Constraint, |
60 | kw_not, |
61 | kw_Op, |
62 | kw_OpName, |
63 | kw_Pattern, |
64 | kw_replace, |
65 | kw_return, |
66 | kw_rewrite, |
67 | kw_Rewrite, |
68 | kw_Type, |
69 | kw_TypeRange, |
70 | kw_Value, |
71 | kw_ValueRange, |
72 | kw_with, |
73 | KW_END, |
74 | |
75 | /// Punctuation. |
76 | arrow, |
77 | colon, |
78 | comma, |
79 | dot, |
80 | equal, |
81 | equal_arrow, |
82 | semicolon, |
83 | /// Paired punctuation. |
84 | less, |
85 | greater, |
86 | l_brace, |
87 | r_brace, |
88 | l_paren, |
89 | r_paren, |
90 | l_square, |
91 | r_square, |
92 | underscore, |
93 | |
94 | /// Tokens. |
95 | directive, |
96 | identifier, |
97 | integer, |
98 | string_block, |
99 | string |
100 | }; |
101 | Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} |
102 | |
103 | /// Given a token containing a string literal, return its value, including |
104 | /// removing the quote characters and unescaping the contents of the string. |
105 | std::string getStringValue() const; |
106 | |
107 | /// Returns true if the current token is a string literal. |
108 | bool isString() const { return isAny(k1: Token::string, k2: Token::string_block); } |
109 | |
110 | /// Returns true if the current token is a keyword. |
111 | bool isKeyword() const { |
112 | return kind > Token::KW_BEGIN && kind < Token::KW_END; |
113 | } |
114 | |
115 | /// Returns true if the current token is a keyword in a dependent context, and |
116 | /// in any other situation (e.g. variable names) may be treated as an |
117 | /// identifier. |
118 | bool isDependentKeyword() const { |
119 | return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END; |
120 | } |
121 | |
122 | /// Return the bytes that make up this token. |
123 | StringRef getSpelling() const { return spelling; } |
124 | |
125 | /// Return the kind of this token. |
126 | Kind getKind() const { return kind; } |
127 | |
128 | /// Return true if this token is one of the specified kinds. |
129 | bool isAny(Kind k1, Kind k2) const { return is(k: k1) || is(k: k2); } |
130 | template <typename... T> |
131 | bool isAny(Kind k1, Kind k2, Kind k3, T... others) const { |
132 | return is(k: k1) || isAny(k2, k3, others...); |
133 | } |
134 | |
135 | /// Return if the token does not have the given kind. |
136 | bool isNot(Kind k) const { return k != kind; } |
137 | template <typename... T> |
138 | bool isNot(Kind k1, Kind k2, T... others) const { |
139 | return !isAny(k1, k2, others...); |
140 | } |
141 | |
142 | /// Return if the token has the given kind. |
143 | bool is(Kind k) const { return kind == k; } |
144 | |
145 | /// Return a location for the start of this token. |
146 | SMLoc getStartLoc() const { return SMLoc::getFromPointer(Ptr: spelling.data()); } |
147 | /// Return a location at the end of this token. |
148 | SMLoc getEndLoc() const { |
149 | return SMLoc::getFromPointer(Ptr: spelling.data() + spelling.size()); |
150 | } |
151 | /// Return a location for the range of this token. |
152 | SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); } |
153 | |
154 | private: |
155 | /// Discriminator that indicates the kind of token this is. |
156 | Kind kind; |
157 | |
158 | /// A reference to the entire token contents; this is always a pointer into |
159 | /// a memory buffer owned by the source manager. |
160 | StringRef spelling; |
161 | }; |
162 | |
163 | //===----------------------------------------------------------------------===// |
164 | // Lexer |
165 | //===----------------------------------------------------------------------===// |
166 | |
167 | class Lexer { |
168 | public: |
169 | Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine, |
170 | CodeCompleteContext *codeCompleteContext); |
171 | ~Lexer(); |
172 | |
173 | /// Return a reference to the source manager used by the lexer. |
174 | llvm::SourceMgr &getSourceMgr() { return srcMgr; } |
175 | |
176 | /// Return a reference to the diagnostic engine used by the lexer. |
177 | ast::DiagnosticEngine &getDiagEngine() { return diagEngine; } |
178 | |
179 | /// Push an include of the given file. This will cause the lexer to start |
180 | /// processing the provided file. Returns failure if the file could not be |
181 | /// opened, success otherwise. |
182 | LogicalResult pushInclude(StringRef filename, SMRange includeLoc); |
183 | |
184 | /// Lex the next token and return it. |
185 | Token lexToken(); |
186 | |
187 | /// Change the position of the lexer cursor. The next token we lex will start |
188 | /// at the designated point in the input. |
189 | void resetPointer(const char *newPointer) { curPtr = newPointer; } |
190 | |
191 | /// Emit an error to the lexer with the given location and message. |
192 | Token emitError(SMRange loc, const Twine &msg); |
193 | Token emitError(const char *loc, const Twine &msg); |
194 | Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc, |
195 | const Twine ¬e); |
196 | |
197 | private: |
198 | Token formToken(Token::Kind kind, const char *tokStart) { |
199 | return Token(kind, StringRef(tokStart, curPtr - tokStart)); |
200 | } |
201 | |
202 | /// Return the next character in the stream. |
203 | int getNextChar(); |
204 | |
205 | /// Lex methods. |
206 | void (); |
207 | Token lexDirective(const char *tokStart); |
208 | Token lexIdentifier(const char *tokStart); |
209 | Token lexNumber(const char *tokStart); |
210 | Token lexString(const char *tokStart, bool isStringBlock); |
211 | |
212 | llvm::SourceMgr &srcMgr; |
213 | int curBufferID; |
214 | StringRef curBuffer; |
215 | const char *curPtr; |
216 | |
217 | /// The engine used to emit diagnostics during lexing/parsing. |
218 | ast::DiagnosticEngine &diagEngine; |
219 | |
220 | /// A flag indicating if we added a default diagnostic handler to the provided |
221 | /// diagEngine. |
222 | bool addedHandlerToDiagEngine; |
223 | |
224 | /// The optional code completion point within the input file. |
225 | const char *codeCompletionLocation; |
226 | }; |
227 | } // namespace pdll |
228 | } // namespace mlir |
229 | |
230 | #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_ |
231 | |