Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the NumericLiteralParser, CharLiteralParser, and |
10 | // StringLiteralParser interfaces. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H |
15 | #define LLVM_CLANG_LEX_LITERALSUPPORT_H |
16 | |
17 | #include "clang/Basic/CharInfo.h" |
18 | #include "clang/Basic/LLVM.h" |
19 | #include "clang/Basic/TokenKinds.h" |
20 | #include "llvm/ADT/APFloat.h" |
21 | #include "llvm/ADT/ArrayRef.h" |
22 | #include "llvm/ADT/SmallString.h" |
23 | #include "llvm/ADT/StringRef.h" |
24 | #include "llvm/Support/DataTypes.h" |
25 | |
26 | namespace clang { |
27 | |
28 | class DiagnosticsEngine; |
29 | class Preprocessor; |
30 | class Token; |
31 | class SourceLocation; |
32 | class TargetInfo; |
33 | class SourceManager; |
34 | class LangOptions; |
35 | |
36 | /// Copy characters from Input to Buf, expanding any UCNs. |
37 | void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); |
38 | |
39 | /// NumericLiteralParser - This performs strict semantic analysis of the content |
40 | /// of a ppnumber, classifying it as either integer, floating, or erroneous, |
41 | /// determines the radix of the value and can convert it to a useful value. |
42 | class NumericLiteralParser { |
43 | const SourceManager &SM; |
44 | const LangOptions &LangOpts; |
45 | DiagnosticsEngine &Diags; |
46 | |
47 | const char *const ThisTokBegin; |
48 | const char *const ThisTokEnd; |
49 | const char *DigitsBegin, *SuffixBegin; // markers |
50 | const char *s; // cursor |
51 | |
52 | unsigned radix; |
53 | |
54 | bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix; |
55 | |
56 | SmallString<32> UDSuffixBuf; |
57 | |
58 | public: |
59 | NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, |
60 | const SourceManager &SM, const LangOptions &LangOpts, |
61 | const TargetInfo &Target, DiagnosticsEngine &Diags); |
62 | bool hadError : 1; |
63 | bool isUnsigned : 1; |
64 | bool isLong : 1; // This is *not* set for long long. |
65 | bool isLongLong : 1; |
66 | bool isSizeT : 1; // 1z, 1uz (C++2b) |
67 | bool isHalf : 1; // 1.0h |
68 | bool isFloat : 1; // 1.0f |
69 | bool isImaginary : 1; // 1.0i |
70 | bool isFloat16 : 1; // 1.0f16 |
71 | bool isFloat128 : 1; // 1.0q |
72 | bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr |
73 | bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk |
74 | bool isBitInt : 1; // 1wb, 1uwb (C2x) |
75 | uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. |
76 | |
77 | |
78 | bool isFixedPointLiteral() const { |
79 | return (saw_period || saw_exponent) && saw_fixed_point_suffix; |
80 | } |
81 | |
82 | bool isIntegerLiteral() const { |
83 | return !saw_period && !saw_exponent && !isFixedPointLiteral(); |
84 | } |
85 | bool isFloatingLiteral() const { |
86 | return (saw_period || saw_exponent) && !isFixedPointLiteral(); |
87 | } |
88 | |
89 | bool hasUDSuffix() const { |
90 | return saw_ud_suffix; |
91 | } |
92 | StringRef getUDSuffix() const { |
93 | assert(saw_ud_suffix); |
94 | return UDSuffixBuf; |
95 | } |
96 | unsigned getUDSuffixOffset() const { |
97 | assert(saw_ud_suffix); |
98 | return SuffixBegin - ThisTokBegin; |
99 | } |
100 | |
101 | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
102 | |
103 | unsigned getRadix() const { return radix; } |
104 | |
105 | /// GetIntegerValue - Convert this numeric literal value to an APInt that |
106 | /// matches Val's input width. If there is an overflow (i.e., if the unsigned |
107 | /// value read is larger than the APInt's bits will hold), set Val to the low |
108 | /// bits of the result and return true. Otherwise, return false. |
109 | bool GetIntegerValue(llvm::APInt &Val); |
110 | |
111 | /// GetFloatValue - Convert this numeric literal to a floating value, using |
112 | /// the specified APFloat fltSemantics (specifying float, double, etc). |
113 | /// The optional bool isExact (passed-by-reference) has its value |
114 | /// set to true if the returned APFloat can represent the number in the |
115 | /// literal exactly, and false otherwise. |
116 | llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); |
117 | |
118 | /// GetFixedPointValue - Convert this numeric literal value into a |
119 | /// scaled integer that represents this value. Returns true if an overflow |
120 | /// occurred when calculating the integral part of the scaled integer or |
121 | /// calculating the digit sequence of the exponent. |
122 | bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale); |
123 | |
124 | /// Get the digits that comprise the literal. This excludes any prefix or |
125 | /// suffix associated with the literal. |
126 | StringRef getLiteralDigits() const { |
127 | assert(!hadError && "cannot reliably get the literal digits with an error"); |
128 | return StringRef(DigitsBegin, SuffixBegin - DigitsBegin); |
129 | } |
130 | |
131 | private: |
132 | |
133 | void ParseNumberStartingWithZero(SourceLocation TokLoc); |
134 | void ParseDecimalOrOctalCommon(SourceLocation TokLoc); |
135 | |
136 | static bool isDigitSeparator(char C) { return C == '\''; } |
137 | |
138 | /// Determine whether the sequence of characters [Start, End) contains |
139 | /// any real digits (not digit separators). |
140 | bool containsDigits(const char *Start, const char *End) { |
141 | return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0])); |
142 | } |
143 | |
144 | enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; |
145 | |
146 | /// Ensure that we don't have a digit separator here. |
147 | void checkSeparator(SourceLocation TokLoc, const char *Pos, |
148 | CheckSeparatorKind IsAfterDigits); |
149 | |
150 | /// SkipHexDigits - Read and skip over any hex digits, up to End. |
151 | /// Return a pointer to the first non-hex digit or End. |
152 | const char *SkipHexDigits(const char *ptr) { |
153 | while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) |
154 | ptr++; |
155 | return ptr; |
156 | } |
157 | |
158 | /// SkipOctalDigits - Read and skip over any octal digits, up to End. |
159 | /// Return a pointer to the first non-hex digit or End. |
160 | const char *SkipOctalDigits(const char *ptr) { |
161 | while (ptr != ThisTokEnd && |
162 | ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) |
163 | ptr++; |
164 | return ptr; |
165 | } |
166 | |
167 | /// SkipDigits - Read and skip over any digits, up to End. |
168 | /// Return a pointer to the first non-hex digit or End. |
169 | const char *SkipDigits(const char *ptr) { |
170 | while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) |
171 | ptr++; |
172 | return ptr; |
173 | } |
174 | |
175 | /// SkipBinaryDigits - Read and skip over any binary digits, up to End. |
176 | /// Return a pointer to the first non-binary digit or End. |
177 | const char *SkipBinaryDigits(const char *ptr) { |
178 | while (ptr != ThisTokEnd && |
179 | (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) |
180 | ptr++; |
181 | return ptr; |
182 | } |
183 | |
184 | }; |
185 | |
186 | /// CharLiteralParser - Perform interpretation and semantic analysis of a |
187 | /// character literal. |
188 | class CharLiteralParser { |
189 | uint64_t Value; |
190 | tok::TokenKind Kind; |
191 | bool IsMultiChar; |
192 | bool HadError; |
193 | SmallString<32> UDSuffixBuf; |
194 | unsigned UDSuffixOffset; |
195 | public: |
196 | CharLiteralParser(const char *begin, const char *end, |
197 | SourceLocation Loc, Preprocessor &PP, |
198 | tok::TokenKind kind); |
199 | |
200 | bool hadError() const { return HadError; } |
201 | bool isOrdinary() const { return Kind == tok::char_constant; } |
202 | bool isWide() const { return Kind == tok::wide_char_constant; } |
203 | bool isUTF8() const { return Kind == tok::utf8_char_constant; } |
204 | bool isUTF16() const { return Kind == tok::utf16_char_constant; } |
205 | bool isUTF32() const { return Kind == tok::utf32_char_constant; } |
206 | bool isMultiChar() const { return IsMultiChar; } |
207 | uint64_t getValue() const { return Value; } |
208 | StringRef getUDSuffix() const { return UDSuffixBuf; } |
209 | unsigned getUDSuffixOffset() const { |
210 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
211 | return UDSuffixOffset; |
212 | } |
213 | }; |
214 | |
215 | /// StringLiteralParser - This decodes string escape characters and performs |
216 | /// wide string analysis and Translation Phase #6 (concatenation of string |
217 | /// literals) (C99 5.1.1.2p1). |
218 | class StringLiteralParser { |
219 | const SourceManager &SM; |
220 | const LangOptions &Features; |
221 | const TargetInfo &Target; |
222 | DiagnosticsEngine *Diags; |
223 | |
224 | unsigned MaxTokenLength; |
225 | unsigned SizeBound; |
226 | unsigned CharByteWidth; |
227 | tok::TokenKind Kind; |
228 | SmallString<512> ResultBuf; |
229 | char *ResultPtr; // cursor |
230 | SmallString<32> UDSuffixBuf; |
231 | unsigned UDSuffixToken; |
232 | unsigned UDSuffixOffset; |
233 | public: |
234 | StringLiteralParser(ArrayRef<Token> StringToks, |
235 | Preprocessor &PP); |
236 | StringLiteralParser(ArrayRef<Token> StringToks, |
237 | const SourceManager &sm, const LangOptions &features, |
238 | const TargetInfo &target, |
239 | DiagnosticsEngine *diags = nullptr) |
240 | : SM(sm), Features(features), Target(target), Diags(diags), |
241 | MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), |
242 | ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { |
243 | init(StringToks); |
244 | } |
245 | |
246 | |
247 | bool hadError; |
248 | bool Pascal; |
249 | |
250 | StringRef GetString() const { |
251 | return StringRef(ResultBuf.data(), GetStringLength()); |
252 | } |
253 | unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } |
254 | |
255 | unsigned GetNumStringChars() const { |
256 | return GetStringLength() / CharByteWidth; |
257 | } |
258 | /// getOffsetOfStringByte - This function returns the offset of the |
259 | /// specified byte of the string data represented by Token. This handles |
260 | /// advancing over escape sequences in the string. |
261 | /// |
262 | /// If the Diagnostics pointer is non-null, then this will do semantic |
263 | /// checking of the string literal and emit errors and warnings. |
264 | unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; |
265 | |
266 | bool isOrdinary() const { return Kind == tok::string_literal; } |
267 | bool isWide() const { return Kind == tok::wide_string_literal; } |
268 | bool isUTF8() const { return Kind == tok::utf8_string_literal; } |
269 | bool isUTF16() const { return Kind == tok::utf16_string_literal; } |
270 | bool isUTF32() const { return Kind == tok::utf32_string_literal; } |
271 | bool isPascal() const { return Pascal; } |
272 | |
273 | StringRef getUDSuffix() const { return UDSuffixBuf; } |
274 | |
275 | /// Get the index of a token containing a ud-suffix. |
276 | unsigned getUDSuffixToken() const { |
277 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
278 | return UDSuffixToken; |
279 | } |
280 | /// Get the spelling offset of the first byte of the ud-suffix. |
281 | unsigned getUDSuffixOffset() const { |
282 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
283 | return UDSuffixOffset; |
284 | } |
285 | |
286 | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
287 | |
288 | private: |
289 | void init(ArrayRef<Token> StringToks); |
290 | bool CopyStringFragment(const Token &Tok, const char *TokBegin, |
291 | StringRef Fragment); |
292 | void DiagnoseLexingError(SourceLocation Loc); |
293 | }; |
294 | |
295 | } // end namespace clang |
296 | |
297 | #endif |
298 |
Warning: This file is not a C or C++ file. It does not have highlighting.