1//===- Token.cpp - MLIR Token Implementation ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Token class for the MLIR textual form.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Token.h"
14#include "mlir/Support/LLVM.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/Support/ErrorHandling.h"
17#include <cassert>
18#include <cstdint>
19#include <optional>
20#include <string>
21
22using namespace mlir;
23
24SMLoc Token::getLoc() const { return SMLoc::getFromPointer(Ptr: spelling.data()); }
25
26SMLoc Token::getEndLoc() const {
27 return SMLoc::getFromPointer(Ptr: spelling.data() + spelling.size());
28}
29
30SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
31
32/// For an integer token, return its value as an unsigned. If it doesn't fit,
33/// return std::nullopt.
34std::optional<unsigned> Token::getUnsignedIntegerValue() const {
35 bool isHex = spelling.size() > 1 && spelling[1] == 'x';
36
37 unsigned result = 0;
38 if (spelling.getAsInteger(Radix: isHex ? 0 : 10, Result&: result))
39 return std::nullopt;
40 return result;
41}
42
43/// For an integer token, return its value as a uint64_t. If it doesn't fit,
44/// return std::nullopt.
45std::optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
46 bool isHex = spelling.size() > 1 && spelling[1] == 'x';
47
48 uint64_t result = 0;
49 if (spelling.getAsInteger(Radix: isHex ? 0 : 10, Result&: result))
50 return std::nullopt;
51 return result;
52}
53
54/// For a floatliteral, return its value as a double. Return std::nullopt if the
55/// value underflows or overflows.
56std::optional<double> Token::getFloatingPointValue() const {
57 double result = 0;
58 if (spelling.getAsDouble(Result&: result))
59 return std::nullopt;
60 return result;
61}
62
63/// For an inttype token, return its bitwidth.
64std::optional<unsigned> Token::getIntTypeBitwidth() const {
65 assert(getKind() == inttype);
66 unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
67 unsigned result = 0;
68 if (spelling.drop_front(N: bitwidthStart).getAsInteger(Radix: 10, Result&: result))
69 return std::nullopt;
70 return result;
71}
72
73std::optional<bool> Token::getIntTypeSignedness() const {
74 assert(getKind() == inttype);
75 if (spelling[0] == 'i')
76 return std::nullopt;
77 if (spelling[0] == 's')
78 return true;
79 assert(spelling[0] == 'u');
80 return false;
81}
82
83/// Given a token containing a string literal, return its value, including
84/// removing the quote characters and unescaping the contents of the string. The
85/// lexer has already verified that this token is valid.
86std::string Token::getStringValue() const {
87 assert(getKind() == string || getKind() == code_complete ||
88 (getKind() == at_identifier && getSpelling()[1] == '"'));
89 // Start by dropping the quotes.
90 StringRef bytes = getSpelling().drop_front();
91 if (getKind() != Token::code_complete) {
92 bytes = bytes.drop_back();
93 if (getKind() == at_identifier)
94 bytes = bytes.drop_front();
95 }
96
97 std::string result;
98 result.reserve(res: bytes.size());
99 for (unsigned i = 0, e = bytes.size(); i != e;) {
100 auto c = bytes[i++];
101 if (c != '\\') {
102 result.push_back(c: c);
103 continue;
104 }
105
106 assert(i + 1 <= e && "invalid string should be caught by lexer");
107 auto c1 = bytes[i++];
108 switch (c1) {
109 case '"':
110 case '\\':
111 result.push_back(c: c1);
112 continue;
113 case 'n':
114 result.push_back(c: '\n');
115 continue;
116 case 't':
117 result.push_back(c: '\t');
118 continue;
119 default:
120 break;
121 }
122
123 assert(i + 1 <= e && "invalid string should be caught by lexer");
124 auto c2 = bytes[i++];
125
126 assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
127 result.push_back(c: (llvm::hexDigitValue(C: c1) << 4) | llvm::hexDigitValue(C: c2));
128 }
129
130 return result;
131}
132
133/// Given a token containing a hex string literal, return its value or
134/// std::nullopt if the token does not contain a valid hex string.
135std::optional<std::string> Token::getHexStringValue() const {
136 assert(getKind() == string);
137
138 // Get the internal string data, without the quotes.
139 StringRef bytes = getSpelling().drop_front().drop_back();
140
141 // Try to extract the binary data from the hex string. We expect the hex
142 // string to start with `0x` and have an even number of hex nibbles (nibbles
143 // should come in pairs).
144 std::string hex;
145 if (!bytes.consume_front(Prefix: "0x") || (bytes.size() & 1) ||
146 !llvm::tryGetFromHex(Input: bytes, Output&: hex))
147 return std::nullopt;
148 return hex;
149}
150
151/// Given a token containing a symbol reference, return the unescaped string
152/// value.
153std::string Token::getSymbolReference() const {
154 assert(is(Token::at_identifier) && "expected valid @-identifier");
155 StringRef nameStr = getSpelling().drop_front();
156
157 // Check to see if the reference is a string literal, or a bare identifier.
158 if (nameStr.front() == '"')
159 return getStringValue();
160 return std::string(nameStr);
161}
162
163/// Given a hash_identifier token like #123, try to parse the number out of
164/// the identifier, returning std::nullopt if it is a named identifier like #x
165/// or if the integer doesn't fit.
166std::optional<unsigned> Token::getHashIdentifierNumber() const {
167 assert(getKind() == hash_identifier);
168 unsigned result = 0;
169 if (spelling.drop_front().getAsInteger(Radix: 10, Result&: result))
170 return std::nullopt;
171 return result;
172}
173
174/// Given a punctuation or keyword token kind, return the spelling of the
175/// token as a string. Warning: This will abort on markers, identifiers and
176/// literal tokens since they have no fixed spelling.
177StringRef Token::getTokenSpelling(Kind kind) {
178 switch (kind) {
179 default:
180 llvm_unreachable("This token kind has no fixed spelling");
181#define TOK_PUNCTUATION(NAME, SPELLING) \
182 case NAME: \
183 return SPELLING;
184#define TOK_KEYWORD(SPELLING) \
185 case kw_##SPELLING: \
186 return #SPELLING;
187#include "TokenKinds.def"
188 }
189}
190
191/// Return true if this is one of the keyword token kinds (e.g. kw_if).
192bool Token::isKeyword() const {
193 switch (kind) {
194 default:
195 return false;
196#define TOK_KEYWORD(SPELLING) \
197 case kw_##SPELLING: \
198 return true;
199#include "TokenKinds.def"
200 }
201}
202
203bool Token::isCodeCompletionFor(Kind kind) const {
204 if (!isCodeCompletion() || spelling.empty())
205 return false;
206 switch (kind) {
207 case Kind::string:
208 return spelling[0] == '"';
209 case Kind::hash_identifier:
210 return spelling[0] == '#';
211 case Kind::percent_identifier:
212 return spelling[0] == '%';
213 case Kind::caret_identifier:
214 return spelling[0] == '^';
215 case Kind::exclamation_identifier:
216 return spelling[0] == '!';
217 default:
218 return false;
219 }
220}
221

source code of mlir/lib/AsmParser/Token.cpp