1 | //===- Token.cpp - MLIR Token Implementation ------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the Token class for the MLIR textual form. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Token.h" |
14 | #include "mlir/Support/LLVM.h" |
15 | #include "llvm/ADT/StringExtras.h" |
16 | #include "llvm/Support/ErrorHandling.h" |
17 | #include <cassert> |
18 | #include <cstdint> |
19 | #include <optional> |
20 | #include <string> |
21 | |
22 | using namespace mlir; |
23 | |
24 | SMLoc Token::getLoc() const { return SMLoc::getFromPointer(Ptr: spelling.data()); } |
25 | |
26 | SMLoc Token::getEndLoc() const { |
27 | return SMLoc::getFromPointer(Ptr: spelling.data() + spelling.size()); |
28 | } |
29 | |
30 | SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); } |
31 | |
32 | /// For an integer token, return its value as an unsigned. If it doesn't fit, |
33 | /// return std::nullopt. |
34 | std::optional<unsigned> Token::getUnsignedIntegerValue() const { |
35 | bool isHex = spelling.size() > 1 && spelling[1] == 'x'; |
36 | |
37 | unsigned result = 0; |
38 | if (spelling.getAsInteger(Radix: isHex ? 0 : 10, Result&: result)) |
39 | return std::nullopt; |
40 | return result; |
41 | } |
42 | |
43 | /// For an integer token, return its value as a uint64_t. If it doesn't fit, |
44 | /// return std::nullopt. |
45 | std::optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) { |
46 | bool isHex = spelling.size() > 1 && spelling[1] == 'x'; |
47 | |
48 | uint64_t result = 0; |
49 | if (spelling.getAsInteger(Radix: isHex ? 0 : 10, Result&: result)) |
50 | return std::nullopt; |
51 | return result; |
52 | } |
53 | |
54 | /// For a floatliteral, return its value as a double. Return std::nullopt if the |
55 | /// value underflows or overflows. |
56 | std::optional<double> Token::getFloatingPointValue() const { |
57 | double result = 0; |
58 | if (spelling.getAsDouble(Result&: result)) |
59 | return std::nullopt; |
60 | return result; |
61 | } |
62 | |
63 | /// For an inttype token, return its bitwidth. |
64 | std::optional<unsigned> Token::getIntTypeBitwidth() const { |
65 | assert(getKind() == inttype); |
66 | unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2); |
67 | unsigned result = 0; |
68 | if (spelling.drop_front(N: bitwidthStart).getAsInteger(Radix: 10, Result&: result)) |
69 | return std::nullopt; |
70 | return result; |
71 | } |
72 | |
73 | std::optional<bool> Token::getIntTypeSignedness() const { |
74 | assert(getKind() == inttype); |
75 | if (spelling[0] == 'i') |
76 | return std::nullopt; |
77 | if (spelling[0] == 's') |
78 | return true; |
79 | assert(spelling[0] == 'u'); |
80 | return false; |
81 | } |
82 | |
83 | /// Given a token containing a string literal, return its value, including |
84 | /// removing the quote characters and unescaping the contents of the string. The |
85 | /// lexer has already verified that this token is valid. |
86 | std::string Token::getStringValue() const { |
87 | assert(getKind() == string || getKind() == code_complete || |
88 | (getKind() == at_identifier && getSpelling()[1] == '"')); |
89 | // Start by dropping the quotes. |
90 | StringRef bytes = getSpelling().drop_front(); |
91 | if (getKind() != Token::code_complete) { |
92 | bytes = bytes.drop_back(); |
93 | if (getKind() == at_identifier) |
94 | bytes = bytes.drop_front(); |
95 | } |
96 | |
97 | std::string result; |
98 | result.reserve(res: bytes.size()); |
99 | for (unsigned i = 0, e = bytes.size(); i != e;) { |
100 | auto c = bytes[i++]; |
101 | if (c != '\\') { |
102 | result.push_back(c: c); |
103 | continue; |
104 | } |
105 | |
106 | assert(i + 1 <= e && "invalid string should be caught by lexer" ); |
107 | auto c1 = bytes[i++]; |
108 | switch (c1) { |
109 | case '"': |
110 | case '\\': |
111 | result.push_back(c: c1); |
112 | continue; |
113 | case 'n': |
114 | result.push_back(c: '\n'); |
115 | continue; |
116 | case 't': |
117 | result.push_back(c: '\t'); |
118 | continue; |
119 | default: |
120 | break; |
121 | } |
122 | |
123 | assert(i + 1 <= e && "invalid string should be caught by lexer" ); |
124 | auto c2 = bytes[i++]; |
125 | |
126 | assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape" ); |
127 | result.push_back(c: (llvm::hexDigitValue(C: c1) << 4) | llvm::hexDigitValue(C: c2)); |
128 | } |
129 | |
130 | return result; |
131 | } |
132 | |
133 | /// Given a token containing a hex string literal, return its value or |
134 | /// std::nullopt if the token does not contain a valid hex string. |
135 | std::optional<std::string> Token::getHexStringValue() const { |
136 | assert(getKind() == string); |
137 | |
138 | // Get the internal string data, without the quotes. |
139 | StringRef bytes = getSpelling().drop_front().drop_back(); |
140 | |
141 | // Try to extract the binary data from the hex string. We expect the hex |
142 | // string to start with `0x` and have an even number of hex nibbles (nibbles |
143 | // should come in pairs). |
144 | std::string hex; |
145 | if (!bytes.consume_front(Prefix: "0x" ) || (bytes.size() & 1) || |
146 | !llvm::tryGetFromHex(Input: bytes, Output&: hex)) |
147 | return std::nullopt; |
148 | return hex; |
149 | } |
150 | |
151 | /// Given a token containing a symbol reference, return the unescaped string |
152 | /// value. |
153 | std::string Token::getSymbolReference() const { |
154 | assert(is(Token::at_identifier) && "expected valid @-identifier" ); |
155 | StringRef nameStr = getSpelling().drop_front(); |
156 | |
157 | // Check to see if the reference is a string literal, or a bare identifier. |
158 | if (nameStr.front() == '"') |
159 | return getStringValue(); |
160 | return std::string(nameStr); |
161 | } |
162 | |
163 | /// Given a hash_identifier token like #123, try to parse the number out of |
164 | /// the identifier, returning std::nullopt if it is a named identifier like #x |
165 | /// or if the integer doesn't fit. |
166 | std::optional<unsigned> Token::getHashIdentifierNumber() const { |
167 | assert(getKind() == hash_identifier); |
168 | unsigned result = 0; |
169 | if (spelling.drop_front().getAsInteger(Radix: 10, Result&: result)) |
170 | return std::nullopt; |
171 | return result; |
172 | } |
173 | |
174 | /// Given a punctuation or keyword token kind, return the spelling of the |
175 | /// token as a string. Warning: This will abort on markers, identifiers and |
176 | /// literal tokens since they have no fixed spelling. |
177 | StringRef Token::getTokenSpelling(Kind kind) { |
178 | switch (kind) { |
179 | default: |
180 | llvm_unreachable("This token kind has no fixed spelling" ); |
181 | #define TOK_PUNCTUATION(NAME, SPELLING) \ |
182 | case NAME: \ |
183 | return SPELLING; |
184 | #define TOK_KEYWORD(SPELLING) \ |
185 | case kw_##SPELLING: \ |
186 | return #SPELLING; |
187 | #include "TokenKinds.def" |
188 | } |
189 | } |
190 | |
191 | /// Return true if this is one of the keyword token kinds (e.g. kw_if). |
192 | bool Token::isKeyword() const { |
193 | switch (kind) { |
194 | default: |
195 | return false; |
196 | #define TOK_KEYWORD(SPELLING) \ |
197 | case kw_##SPELLING: \ |
198 | return true; |
199 | #include "TokenKinds.def" |
200 | } |
201 | } |
202 | |
203 | bool Token::isCodeCompletionFor(Kind kind) const { |
204 | if (!isCodeCompletion() || spelling.empty()) |
205 | return false; |
206 | switch (kind) { |
207 | case Kind::string: |
208 | return spelling[0] == '"'; |
209 | case Kind::hash_identifier: |
210 | return spelling[0] == '#'; |
211 | case Kind::percent_identifier: |
212 | return spelling[0] == '%'; |
213 | case Kind::caret_identifier: |
214 | return spelling[0] == '^'; |
215 | case Kind::exclamation_identifier: |
216 | return spelling[0] == '!'; |
217 | default: |
218 | return false; |
219 | } |
220 | } |
221 | |