1//===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_
10#define LIB_TOOLS_PDLL_PARSER_LEXER_H_
11
12#include "mlir/Support/LLVM.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/Support/SMLoc.h"
15
16namespace llvm {
17class SourceMgr;
18} // namespace llvm
19
20namespace mlir {
21struct LogicalResult;
22
23namespace pdll {
24class CodeCompleteContext;
25
26namespace ast {
27class DiagnosticEngine;
28} // namespace ast
29
30//===----------------------------------------------------------------------===//
31// Token
32//===----------------------------------------------------------------------===//
33
34class Token {
35public:
36 enum Kind {
37 /// Markers.
38 eof,
39 error,
40 /// Token signifying a code completion location.
41 code_complete,
42 /// Token signifying a code completion location within a string.
43 code_complete_string,
44
45 /// Keywords.
46 KW_BEGIN,
47 /// Dependent keywords, i.e. those that are treated as keywords depending on
48 /// the current parser context.
49 KW_DEPENDENT_BEGIN,
50 kw_attr,
51 kw_op,
52 kw_type,
53 KW_DEPENDENT_END,
54
55 /// General keywords.
56 kw_Attr,
57 kw_erase,
58 kw_let,
59 kw_Constraint,
60 kw_not,
61 kw_Op,
62 kw_OpName,
63 kw_Pattern,
64 kw_replace,
65 kw_return,
66 kw_rewrite,
67 kw_Rewrite,
68 kw_Type,
69 kw_TypeRange,
70 kw_Value,
71 kw_ValueRange,
72 kw_with,
73 KW_END,
74
75 /// Punctuation.
76 arrow,
77 colon,
78 comma,
79 dot,
80 equal,
81 equal_arrow,
82 semicolon,
83 /// Paired punctuation.
84 less,
85 greater,
86 l_brace,
87 r_brace,
88 l_paren,
89 r_paren,
90 l_square,
91 r_square,
92 underscore,
93
94 /// Tokens.
95 directive,
96 identifier,
97 integer,
98 string_block,
99 string
100 };
101 Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
102
103 /// Given a token containing a string literal, return its value, including
104 /// removing the quote characters and unescaping the contents of the string.
105 std::string getStringValue() const;
106
107 /// Returns true if the current token is a string literal.
108 bool isString() const { return isAny(k1: Token::string, k2: Token::string_block); }
109
110 /// Returns true if the current token is a keyword.
111 bool isKeyword() const {
112 return kind > Token::KW_BEGIN && kind < Token::KW_END;
113 }
114
115 /// Returns true if the current token is a keyword in a dependent context, and
116 /// in any other situation (e.g. variable names) may be treated as an
117 /// identifier.
118 bool isDependentKeyword() const {
119 return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END;
120 }
121
122 /// Return the bytes that make up this token.
123 StringRef getSpelling() const { return spelling; }
124
125 /// Return the kind of this token.
126 Kind getKind() const { return kind; }
127
128 /// Return true if this token is one of the specified kinds.
129 bool isAny(Kind k1, Kind k2) const { return is(k: k1) || is(k: k2); }
130 template <typename... T>
131 bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
132 return is(k: k1) || isAny(k2, k3, others...);
133 }
134
135 /// Return if the token does not have the given kind.
136 bool isNot(Kind k) const { return k != kind; }
137 template <typename... T>
138 bool isNot(Kind k1, Kind k2, T... others) const {
139 return !isAny(k1, k2, others...);
140 }
141
142 /// Return if the token has the given kind.
143 bool is(Kind k) const { return kind == k; }
144
145 /// Return a location for the start of this token.
146 SMLoc getStartLoc() const { return SMLoc::getFromPointer(Ptr: spelling.data()); }
147 /// Return a location at the end of this token.
148 SMLoc getEndLoc() const {
149 return SMLoc::getFromPointer(Ptr: spelling.data() + spelling.size());
150 }
151 /// Return a location for the range of this token.
152 SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); }
153
154private:
155 /// Discriminator that indicates the kind of token this is.
156 Kind kind;
157
158 /// A reference to the entire token contents; this is always a pointer into
159 /// a memory buffer owned by the source manager.
160 StringRef spelling;
161};
162
163//===----------------------------------------------------------------------===//
164// Lexer
165//===----------------------------------------------------------------------===//
166
167class Lexer {
168public:
169 Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
170 CodeCompleteContext *codeCompleteContext);
171 ~Lexer();
172
173 /// Return a reference to the source manager used by the lexer.
174 llvm::SourceMgr &getSourceMgr() { return srcMgr; }
175
176 /// Return a reference to the diagnostic engine used by the lexer.
177 ast::DiagnosticEngine &getDiagEngine() { return diagEngine; }
178
179 /// Push an include of the given file. This will cause the lexer to start
180 /// processing the provided file. Returns failure if the file could not be
181 /// opened, success otherwise.
182 LogicalResult pushInclude(StringRef filename, SMRange includeLoc);
183
184 /// Lex the next token and return it.
185 Token lexToken();
186
187 /// Change the position of the lexer cursor. The next token we lex will start
188 /// at the designated point in the input.
189 void resetPointer(const char *newPointer) { curPtr = newPointer; }
190
191 /// Emit an error to the lexer with the given location and message.
192 Token emitError(SMRange loc, const Twine &msg);
193 Token emitError(const char *loc, const Twine &msg);
194 Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
195 const Twine &note);
196
197private:
198 Token formToken(Token::Kind kind, const char *tokStart) {
199 return Token(kind, StringRef(tokStart, curPtr - tokStart));
200 }
201
202 /// Return the next character in the stream.
203 int getNextChar();
204
205 /// Lex methods.
206 void lexComment();
207 Token lexDirective(const char *tokStart);
208 Token lexIdentifier(const char *tokStart);
209 Token lexNumber(const char *tokStart);
210 Token lexString(const char *tokStart, bool isStringBlock);
211
212 llvm::SourceMgr &srcMgr;
213 int curBufferID;
214 StringRef curBuffer;
215 const char *curPtr;
216
217 /// The engine used to emit diagnostics during lexing/parsing.
218 ast::DiagnosticEngine &diagEngine;
219
220 /// A flag indicating if we added a default diagnostic handler to the provided
221 /// diagEngine.
222 bool addedHandlerToDiagEngine;
223
224 /// The optional code completion point within the input file.
225 const char *codeCompletionLocation;
226};
227} // namespace pdll
228} // namespace mlir
229
230#endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_
231

source code of mlir/lib/Tools/PDLL/Parser/Lexer.h