| 1 | //===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef MLIR_LIB_PARSER_TOKEN_H |
| 10 | #define MLIR_LIB_PARSER_TOKEN_H |
| 11 | |
| 12 | #include "mlir/Support/LLVM.h" |
| 13 | #include "llvm/ADT/StringRef.h" |
| 14 | #include "llvm/Support/SMLoc.h" |
| 15 | #include <optional> |
| 16 | |
| 17 | namespace mlir { |
| 18 | |
| 19 | /// This represents a token in the MLIR syntax. |
| 20 | class Token { |
| 21 | public: |
| 22 | enum Kind { |
| 23 | #define TOK_MARKER(NAME) NAME, |
| 24 | #define TOK_IDENTIFIER(NAME) NAME, |
| 25 | #define TOK_LITERAL(NAME) NAME, |
| 26 | #define TOK_PUNCTUATION(NAME, SPELLING) NAME, |
| 27 | #define TOK_KEYWORD(SPELLING) kw_##SPELLING, |
| 28 | #include "TokenKinds.def" |
| 29 | }; |
| 30 | |
| 31 | Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} |
| 32 | |
| 33 | // Return the bytes that make up this token. |
| 34 | StringRef getSpelling() const { return spelling; } |
| 35 | |
| 36 | // Token classification. |
| 37 | Kind getKind() const { return kind; } |
| 38 | bool is(Kind k) const { return kind == k; } |
| 39 | |
| 40 | bool isAny(Kind k1, Kind k2) const { return is(k: k1) || is(k: k2); } |
| 41 | |
| 42 | /// Return true if this token is one of the specified kinds. |
| 43 | template <typename... T> |
| 44 | bool isAny(Kind k1, Kind k2, Kind k3, T... others) const { |
| 45 | if (is(k: k1)) |
| 46 | return true; |
| 47 | return isAny(k2, k3, others...); |
| 48 | } |
| 49 | |
| 50 | bool isNot(Kind k) const { return kind != k; } |
| 51 | |
| 52 | /// Return true if this token isn't one of the specified kinds. |
| 53 | template <typename... T> |
| 54 | bool isNot(Kind k1, Kind k2, T... others) const { |
| 55 | return !isAny(k1, k2, others...); |
| 56 | } |
| 57 | |
| 58 | /// Return true if this is one of the keyword token kinds (e.g. kw_if). |
| 59 | bool isKeyword() const; |
| 60 | |
| 61 | /// Returns true if the current token represents a code completion. |
| 62 | bool isCodeCompletion() const { return is(k: code_complete); } |
| 63 | |
| 64 | /// Returns true if the current token represents a code completion for the |
| 65 | /// "normal" token type. |
| 66 | bool isCodeCompletionFor(Kind kind) const; |
| 67 | |
| 68 | /// Returns true if the current token is the given type, or represents a code |
| 69 | /// completion for that type. |
| 70 | bool isOrIsCodeCompletionFor(Kind kind) const { |
| 71 | return is(k: kind) || isCodeCompletionFor(kind); |
| 72 | } |
| 73 | |
| 74 | // Helpers to decode specific sorts of tokens. |
| 75 | |
| 76 | /// For an integer token, return its value as an unsigned. If it doesn't fit, |
| 77 | /// return std::nullopt. |
| 78 | std::optional<unsigned> getUnsignedIntegerValue() const; |
| 79 | |
| 80 | /// For an integer token, return its value as an uint64_t. If it doesn't fit, |
| 81 | /// return std::nullopt. |
| 82 | static std::optional<uint64_t> getUInt64IntegerValue(StringRef spelling); |
| 83 | std::optional<uint64_t> getUInt64IntegerValue() const { |
| 84 | return getUInt64IntegerValue(spelling: getSpelling()); |
| 85 | } |
| 86 | |
| 87 | /// For a floatliteral token, return its value as a double. Returns |
| 88 | /// std::nullopt in the case of underflow or overflow. |
| 89 | std::optional<double> getFloatingPointValue() const; |
| 90 | |
| 91 | /// For an inttype token, return its bitwidth. |
| 92 | std::optional<unsigned> getIntTypeBitwidth() const; |
| 93 | |
| 94 | /// For an inttype token, return its signedness semantics: std::nullopt means |
| 95 | /// no signedness semantics; true means signed integer type; false means |
| 96 | /// unsigned integer type. |
| 97 | std::optional<bool> getIntTypeSignedness() const; |
| 98 | |
| 99 | /// Given a hash_identifier token like #123, try to parse the number out of |
| 100 | /// the identifier, returning std::nullopt if it is a named identifier like #x |
| 101 | /// or if the integer doesn't fit. |
| 102 | std::optional<unsigned> getHashIdentifierNumber() const; |
| 103 | |
| 104 | /// Given a token containing a string literal, return its value, including |
| 105 | /// removing the quote characters and unescaping the contents of the string. |
| 106 | std::string getStringValue() const; |
| 107 | |
| 108 | /// Given a token containing a hex string literal, return its value or |
| 109 | /// std::nullopt if the token does not contain a valid hex string. A hex |
| 110 | /// string literal is a string starting with `0x` and only containing hex |
| 111 | /// digits. |
| 112 | std::optional<std::string> getHexStringValue() const; |
| 113 | |
| 114 | /// Given a token containing a symbol reference, return the unescaped string |
| 115 | /// value. |
| 116 | std::string getSymbolReference() const; |
| 117 | |
| 118 | // Location processing. |
| 119 | SMLoc getLoc() const; |
| 120 | SMLoc getEndLoc() const; |
| 121 | SMRange getLocRange() const; |
| 122 | |
| 123 | /// Given a punctuation or keyword token kind, return the spelling of the |
| 124 | /// token as a string. Warning: This will abort on markers, identifiers and |
| 125 | /// literal tokens since they have no fixed spelling. |
| 126 | static StringRef getTokenSpelling(Kind kind); |
| 127 | |
| 128 | private: |
| 129 | /// Discriminator that indicates the sort of token this is. |
| 130 | Kind kind; |
| 131 | |
| 132 | /// A reference to the entire token contents; this is always a pointer into |
| 133 | /// a memory buffer owned by the source manager. |
| 134 | StringRef spelling; |
| 135 | }; |
| 136 | |
| 137 | } // namespace mlir |
| 138 | |
| 139 | #endif // MLIR_LIB_PARSER_TOKEN_H |
| 140 | |