Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===--- Token.h - Token interface ------------------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the Token interface. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_CLANG_LEX_TOKEN_H |
14 | #define LLVM_CLANG_LEX_TOKEN_H |
15 | |
16 | #include "clang/Basic/SourceLocation.h" |
17 | #include "clang/Basic/TokenKinds.h" |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include <cassert> |
21 | |
22 | namespace clang { |
23 | |
24 | class IdentifierInfo; |
25 | |
26 | /// Token - This structure provides full information about a lexed token. |
27 | /// It is not intended to be space efficient, it is intended to return as much |
28 | /// information as possible about each returned token. This is expected to be |
29 | /// compressed into a smaller form if memory footprint is important. |
30 | /// |
31 | /// The parser can create a special "annotation token" representing a stream of |
32 | /// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>" |
33 | /// can be represented by a single typename annotation token that carries |
34 | /// information about the SourceRange of the tokens and the type object. |
35 | class Token { |
36 | /// The location of the token. This is actually a SourceLocation. |
37 | SourceLocation::UIntTy Loc; |
38 | |
39 | // Conceptually these next two fields could be in a union. However, this |
40 | // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical |
41 | // routine. Keeping as separate members with casts until a more beautiful fix |
42 | // presents itself. |
43 | |
44 | /// UintData - This holds either the length of the token text, when |
45 | /// a normal token, or the end of the SourceRange when an annotation |
46 | /// token. |
47 | SourceLocation::UIntTy UintData; |
48 | |
49 | /// PtrData - This is a union of four different pointer types, which depends |
50 | /// on what type of token this is: |
51 | /// Identifiers, keywords, etc: |
52 | /// This is an IdentifierInfo*, which contains the uniqued identifier |
53 | /// spelling. |
54 | /// Literals: isLiteral() returns true. |
55 | /// This is a pointer to the start of the token in a text buffer, which |
56 | /// may be dirty (have trigraphs / escaped newlines). |
57 | /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). |
58 | /// This is a pointer to sema-specific data for the annotation token. |
59 | /// Eof: |
60 | // This is a pointer to a Decl. |
61 | /// Other: |
62 | /// This is null. |
63 | void *PtrData; |
64 | |
65 | /// Kind - The actual flavor of token this is. |
66 | tok::TokenKind Kind; |
67 | |
68 | /// Flags - Bits we track about this token, members of the TokenFlags enum. |
69 | unsigned short Flags; |
70 | |
71 | public: |
72 | // Various flags set per token: |
73 | enum TokenFlags { |
74 | StartOfLine = 0x01, // At start of line or only after whitespace |
75 | // (considering the line after macro expansion). |
76 | LeadingSpace = 0x02, // Whitespace exists before this token (considering |
77 | // whitespace after macro expansion). |
78 | DisableExpand = 0x04, // This identifier may never be macro expanded. |
79 | NeedsCleaning = 0x08, // Contained an escaped newline or trigraph. |
80 | LeadingEmptyMacro = 0x10, // Empty macro exists before this token. |
81 | HasUDSuffix = 0x20, // This string or character literal has a ud-suffix. |
82 | HasUCN = 0x40, // This identifier contains a UCN. |
83 | IgnoredComma = 0x80, // This comma is not a macro argument separator (MS). |
84 | StringifiedInMacro = 0x100, // This string or character literal is formed by |
85 | // macro stringizing or charizing operator. |
86 | CommaAfterElided = 0x200, // The comma following this token was elided (MS). |
87 | IsEditorPlaceholder = 0x400, // This identifier is a placeholder. |
88 | IsReinjected = 0x800, // A phase 4 token that was produced before and |
89 | // re-added, e.g. via EnterTokenStream. Annotation |
90 | // tokens are *not* reinjected. |
91 | }; |
92 | |
93 | tok::TokenKind getKind() const { return Kind; } |
94 | void setKind(tok::TokenKind K) { Kind = K; } |
95 | |
96 | /// is/isNot - Predicates to check if this token is a specific kind, as in |
97 | /// "if (Tok.is(tok::l_brace)) {...}". |
98 | bool is(tok::TokenKind K) const { return Kind == K; } |
99 | bool isNot(tok::TokenKind K) const { return Kind != K; } |
100 | bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { |
101 | return is(K1) || is(K2); |
102 | } |
103 | template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const { |
104 | return is(K1) || isOneOf(Ks...); |
105 | } |
106 | |
107 | /// Return true if this is a raw identifier (when lexing |
108 | /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode). |
109 | bool isAnyIdentifier() const { |
110 | return tok::isAnyIdentifier(getKind()); |
111 | } |
112 | |
113 | /// Return true if this is a "literal", like a numeric |
114 | /// constant, string, etc. |
115 | bool isLiteral() const { |
116 | return tok::isLiteral(getKind()); |
117 | } |
118 | |
119 | /// Return true if this is any of tok::annot_* kind tokens. |
120 | bool isAnnotation() const { |
121 | return tok::isAnnotation(getKind()); |
122 | } |
123 | |
124 | /// Return a source location identifier for the specified |
125 | /// offset in the current file. |
126 | SourceLocation getLocation() const { |
127 | return SourceLocation::getFromRawEncoding(Loc); |
128 | } |
129 | unsigned getLength() const { |
130 | assert(!isAnnotation() && "Annotation tokens have no length field"); |
131 | return UintData; |
132 | } |
133 | |
134 | void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); } |
135 | void setLength(unsigned Len) { |
136 | assert(!isAnnotation() && "Annotation tokens have no length field"); |
137 | UintData = Len; |
138 | } |
139 | |
140 | SourceLocation getAnnotationEndLoc() const { |
141 | assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); |
142 | return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc); |
143 | } |
144 | void setAnnotationEndLoc(SourceLocation L) { |
145 | assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); |
146 | UintData = L.getRawEncoding(); |
147 | } |
148 | |
149 | SourceLocation getLastLoc() const { |
150 | return isAnnotation() ? getAnnotationEndLoc() : getLocation(); |
151 | } |
152 | |
153 | SourceLocation getEndLoc() const { |
154 | return isAnnotation() ? getAnnotationEndLoc() |
155 | : getLocation().getLocWithOffset(getLength()); |
156 | } |
157 | |
158 | /// SourceRange of the group of tokens that this annotation token |
159 | /// represents. |
160 | SourceRange getAnnotationRange() const { |
161 | return SourceRange(getLocation(), getAnnotationEndLoc()); |
162 | } |
163 | void setAnnotationRange(SourceRange R) { |
164 | setLocation(R.getBegin()); |
165 | setAnnotationEndLoc(R.getEnd()); |
166 | } |
167 | |
168 | const char *getName() const { return tok::getTokenName(Kind); } |
169 | |
170 | /// Reset all flags to cleared. |
171 | void startToken() { |
172 | Kind = tok::unknown; |
173 | Flags = 0; |
174 | PtrData = nullptr; |
175 | UintData = 0; |
176 | Loc = SourceLocation().getRawEncoding(); |
177 | } |
178 | |
179 | bool hasPtrData() const { return PtrData != nullptr; } |
180 | |
181 | IdentifierInfo *getIdentifierInfo() const { |
182 | assert(isNot(tok::raw_identifier) && |
183 | "getIdentifierInfo() on a tok::raw_identifier token!"); |
184 | assert(!isAnnotation() && |
185 | "getIdentifierInfo() on an annotation token!"); |
186 | if (isLiteral()) return nullptr; |
187 | if (is(tok::eof)) return nullptr; |
188 | return (IdentifierInfo*) PtrData; |
189 | } |
190 | void setIdentifierInfo(IdentifierInfo *II) { |
191 | PtrData = (void*) II; |
192 | } |
193 | |
194 | const void *getEofData() const { |
195 | assert(is(tok::eof)); |
196 | return reinterpret_cast<const void *>(PtrData); |
197 | } |
198 | void setEofData(const void *D) { |
199 | assert(is(tok::eof)); |
200 | assert(!PtrData); |
201 | PtrData = const_cast<void *>(D); |
202 | } |
203 | |
204 | /// getRawIdentifier - For a raw identifier token (i.e., an identifier |
205 | /// lexed in raw mode), returns a reference to the text substring in the |
206 | /// buffer if known. |
207 | StringRef getRawIdentifier() const { |
208 | assert(is(tok::raw_identifier)); |
209 | return StringRef(reinterpret_cast<const char *>(PtrData), getLength()); |
210 | } |
211 | void setRawIdentifierData(const char *Ptr) { |
212 | assert(is(tok::raw_identifier)); |
213 | PtrData = const_cast<char*>(Ptr); |
214 | } |
215 | |
216 | /// getLiteralData - For a literal token (numeric constant, string, etc), this |
217 | /// returns a pointer to the start of it in the text buffer if known, null |
218 | /// otherwise. |
219 | const char *getLiteralData() const { |
220 | assert(isLiteral() && "Cannot get literal data of non-literal"); |
221 | return reinterpret_cast<const char*>(PtrData); |
222 | } |
223 | void setLiteralData(const char *Ptr) { |
224 | assert(isLiteral() && "Cannot set literal data of non-literal"); |
225 | PtrData = const_cast<char*>(Ptr); |
226 | } |
227 | |
228 | void *getAnnotationValue() const { |
229 | assert(isAnnotation() && "Used AnnotVal on non-annotation token"); |
230 | return PtrData; |
231 | } |
232 | void setAnnotationValue(void *val) { |
233 | assert(isAnnotation() && "Used AnnotVal on non-annotation token"); |
234 | PtrData = val; |
235 | } |
236 | |
237 | /// Set the specified flag. |
238 | void setFlag(TokenFlags Flag) { |
239 | Flags |= Flag; |
240 | } |
241 | |
242 | /// Get the specified flag. |
243 | bool getFlag(TokenFlags Flag) const { |
244 | return (Flags & Flag) != 0; |
245 | } |
246 | |
247 | /// Unset the specified flag. |
248 | void clearFlag(TokenFlags Flag) { |
249 | Flags &= ~Flag; |
250 | } |
251 | |
252 | /// Return the internal represtation of the flags. |
253 | /// |
254 | /// This is only intended for low-level operations such as writing tokens to |
255 | /// disk. |
256 | unsigned getFlags() const { |
257 | return Flags; |
258 | } |
259 | |
260 | /// Set a flag to either true or false. |
261 | void setFlagValue(TokenFlags Flag, bool Val) { |
262 | if (Val) |
263 | setFlag(Flag); |
264 | else |
265 | clearFlag(Flag); |
266 | } |
267 | |
268 | /// isAtStartOfLine - Return true if this token is at the start of a line. |
269 | /// |
270 | bool isAtStartOfLine() const { return getFlag(StartOfLine); } |
271 | |
272 | /// Return true if this token has whitespace before it. |
273 | /// |
274 | bool hasLeadingSpace() const { return getFlag(LeadingSpace); } |
275 | |
276 | /// Return true if this identifier token should never |
277 | /// be expanded in the future, due to C99 6.10.3.4p2. |
278 | bool isExpandDisabled() const { return getFlag(DisableExpand); } |
279 | |
280 | /// Return true if we have an ObjC keyword identifier. |
281 | bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; |
282 | |
283 | /// Return the ObjC keyword kind. |
284 | tok::ObjCKeywordKind getObjCKeywordID() const; |
285 | |
286 | /// Return true if this token has trigraphs or escaped newlines in it. |
287 | bool needsCleaning() const { return getFlag(NeedsCleaning); } |
288 | |
289 | /// Return true if this token has an empty macro before it. |
290 | /// |
291 | bool hasLeadingEmptyMacro() const { return getFlag(LeadingEmptyMacro); } |
292 | |
293 | /// Return true if this token is a string or character literal which |
294 | /// has a ud-suffix. |
295 | bool hasUDSuffix() const { return getFlag(HasUDSuffix); } |
296 | |
297 | /// Returns true if this token contains a universal character name. |
298 | bool hasUCN() const { return getFlag(HasUCN); } |
299 | |
300 | /// Returns true if this token is formed by macro by stringizing or charizing |
301 | /// operator. |
302 | bool stringifiedInMacro() const { return getFlag(StringifiedInMacro); } |
303 | |
304 | /// Returns true if the comma after this token was elided. |
305 | bool commaAfterElided() const { return getFlag(CommaAfterElided); } |
306 | |
307 | /// Returns true if this token is an editor placeholder. |
308 | /// |
309 | /// Editor placeholders are produced by the code-completion engine and are |
310 | /// represented as characters between '<#' and '#>' in the source code. The |
311 | /// lexer uses identifier tokens to represent placeholders. |
312 | bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); } |
313 | }; |
314 | |
315 | /// Information about the conditional stack (\#if directives) |
316 | /// currently active. |
317 | struct PPConditionalInfo { |
318 | /// Location where the conditional started. |
319 | SourceLocation IfLoc; |
320 | |
321 | /// True if this was contained in a skipping directive, e.g., |
322 | /// in a "\#if 0" block. |
323 | bool WasSkipping; |
324 | |
325 | /// True if we have emitted tokens already, and now we're in |
326 | /// an \#else block or something. Only useful in Skipping blocks. |
327 | bool FoundNonSkip; |
328 | |
329 | /// True if we've seen a \#else in this block. If so, |
330 | /// \#elif/\#else directives are not allowed. |
331 | bool FoundElse; |
332 | }; |
333 | |
334 | // Extra information needed for annonation tokens. |
335 | struct PragmaLoopHintInfo { |
336 | Token PragmaName; |
337 | Token Option; |
338 | ArrayRef<Token> Toks; |
339 | }; |
340 | } // end namespace clang |
341 | |
342 | #endif // LLVM_CLANG_LEX_TOKEN_H |
343 |
Warning: This file is not a C or C++ file. It does not have highlighting.