1 | //===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef MLIR_LIB_PARSER_TOKEN_H |
10 | #define MLIR_LIB_PARSER_TOKEN_H |
11 | |
12 | #include "mlir/Support/LLVM.h" |
13 | #include "llvm/ADT/StringRef.h" |
14 | #include "llvm/Support/SMLoc.h" |
15 | #include <optional> |
16 | |
17 | namespace mlir { |
18 | |
19 | /// This represents a token in the MLIR syntax. |
20 | class Token { |
21 | public: |
22 | enum Kind { |
23 | #define TOK_MARKER(NAME) NAME, |
24 | #define TOK_IDENTIFIER(NAME) NAME, |
25 | #define TOK_LITERAL(NAME) NAME, |
26 | #define TOK_PUNCTUATION(NAME, SPELLING) NAME, |
27 | #define TOK_KEYWORD(SPELLING) kw_##SPELLING, |
28 | #include "TokenKinds.def" |
29 | }; |
30 | |
31 | Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} |
32 | |
33 | // Return the bytes that make up this token. |
34 | StringRef getSpelling() const { return spelling; } |
35 | |
36 | // Token classification. |
37 | Kind getKind() const { return kind; } |
38 | bool is(Kind k) const { return kind == k; } |
39 | |
40 | bool isAny(Kind k1, Kind k2) const { return is(k: k1) || is(k: k2); } |
41 | |
42 | /// Return true if this token is one of the specified kinds. |
43 | template <typename... T> |
44 | bool isAny(Kind k1, Kind k2, Kind k3, T... others) const { |
45 | if (is(k: k1)) |
46 | return true; |
47 | return isAny(k2, k3, others...); |
48 | } |
49 | |
50 | bool isNot(Kind k) const { return kind != k; } |
51 | |
52 | /// Return true if this token isn't one of the specified kinds. |
53 | template <typename... T> |
54 | bool isNot(Kind k1, Kind k2, T... others) const { |
55 | return !isAny(k1, k2, others...); |
56 | } |
57 | |
58 | /// Return true if this is one of the keyword token kinds (e.g. kw_if). |
59 | bool isKeyword() const; |
60 | |
61 | /// Returns true if the current token represents a code completion. |
62 | bool isCodeCompletion() const { return is(k: code_complete); } |
63 | |
64 | /// Returns true if the current token represents a code completion for the |
65 | /// "normal" token type. |
66 | bool isCodeCompletionFor(Kind kind) const; |
67 | |
68 | /// Returns true if the current token is the given type, or represents a code |
69 | /// completion for that type. |
70 | bool isOrIsCodeCompletionFor(Kind kind) const { |
71 | return is(k: kind) || isCodeCompletionFor(kind); |
72 | } |
73 | |
74 | // Helpers to decode specific sorts of tokens. |
75 | |
76 | /// For an integer token, return its value as an unsigned. If it doesn't fit, |
77 | /// return std::nullopt. |
78 | std::optional<unsigned> getUnsignedIntegerValue() const; |
79 | |
80 | /// For an integer token, return its value as an uint64_t. If it doesn't fit, |
81 | /// return std::nullopt. |
82 | static std::optional<uint64_t> getUInt64IntegerValue(StringRef spelling); |
83 | std::optional<uint64_t> getUInt64IntegerValue() const { |
84 | return getUInt64IntegerValue(spelling: getSpelling()); |
85 | } |
86 | |
87 | /// For a floatliteral token, return its value as a double. Returns |
88 | /// std::nullopt in the case of underflow or overflow. |
89 | std::optional<double> getFloatingPointValue() const; |
90 | |
91 | /// For an inttype token, return its bitwidth. |
92 | std::optional<unsigned> getIntTypeBitwidth() const; |
93 | |
94 | /// For an inttype token, return its signedness semantics: std::nullopt means |
95 | /// no signedness semantics; true means signed integer type; false means |
96 | /// unsigned integer type. |
97 | std::optional<bool> getIntTypeSignedness() const; |
98 | |
99 | /// Given a hash_identifier token like #123, try to parse the number out of |
100 | /// the identifier, returning std::nullopt if it is a named identifier like #x |
101 | /// or if the integer doesn't fit. |
102 | std::optional<unsigned> getHashIdentifierNumber() const; |
103 | |
104 | /// Given a token containing a string literal, return its value, including |
105 | /// removing the quote characters and unescaping the contents of the string. |
106 | std::string getStringValue() const; |
107 | |
108 | /// Given a token containing a hex string literal, return its value or |
109 | /// std::nullopt if the token does not contain a valid hex string. A hex |
110 | /// string literal is a string starting with `0x` and only containing hex |
111 | /// digits. |
112 | std::optional<std::string> getHexStringValue() const; |
113 | |
114 | /// Given a token containing a symbol reference, return the unescaped string |
115 | /// value. |
116 | std::string getSymbolReference() const; |
117 | |
118 | // Location processing. |
119 | SMLoc getLoc() const; |
120 | SMLoc getEndLoc() const; |
121 | SMRange getLocRange() const; |
122 | |
123 | /// Given a punctuation or keyword token kind, return the spelling of the |
124 | /// token as a string. Warning: This will abort on markers, identifiers and |
125 | /// literal tokens since they have no fixed spelling. |
126 | static StringRef getTokenSpelling(Kind kind); |
127 | |
128 | private: |
129 | /// Discriminator that indicates the sort of token this is. |
130 | Kind kind; |
131 | |
132 | /// A reference to the entire token contents; this is always a pointer into |
133 | /// a memory buffer owned by the source manager. |
134 | StringRef spelling; |
135 | }; |
136 | |
137 | } // namespace mlir |
138 | |
139 | #endif // MLIR_LIB_PARSER_TOKEN_H |
140 | |