1 | //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Lex/Lexer.h" |
10 | #include "clang/Basic/Diagnostic.h" |
11 | #include "clang/Basic/DiagnosticOptions.h" |
12 | #include "clang/Basic/FileManager.h" |
13 | #include "clang/Basic/LangOptions.h" |
14 | #include "clang/Basic/SourceLocation.h" |
15 | #include "clang/Basic/SourceManager.h" |
16 | #include "clang/Basic/TargetInfo.h" |
17 | #include "clang/Basic/TargetOptions.h" |
18 | #include "clang/Basic/TokenKinds.h" |
19 | #include "clang/Lex/HeaderSearch.h" |
20 | #include "clang/Lex/HeaderSearchOptions.h" |
21 | #include "clang/Lex/LiteralSupport.h" |
22 | #include "clang/Lex/MacroArgs.h" |
23 | #include "clang/Lex/MacroInfo.h" |
24 | #include "clang/Lex/ModuleLoader.h" |
25 | #include "clang/Lex/Preprocessor.h" |
26 | #include "clang/Lex/PreprocessorOptions.h" |
27 | #include "llvm/ADT/ArrayRef.h" |
28 | #include "llvm/ADT/StringRef.h" |
29 | #include "llvm/Testing/Annotations/Annotations.h" |
30 | #include "gmock/gmock.h" |
31 | #include "gtest/gtest.h" |
32 | #include <memory> |
33 | #include <string> |
34 | #include <vector> |
35 | |
36 | namespace { |
37 | using namespace clang; |
38 | using testing::ElementsAre; |
39 | |
40 | // The test fixture. |
41 | class LexerTest : public ::testing::Test { |
42 | protected: |
43 | LexerTest() |
44 | : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), |
45 | Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()), |
46 | SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { |
47 | TargetOpts->Triple = "x86_64-apple-darwin11.1.0" ; |
48 | Target = TargetInfo::CreateTargetInfo(Diags, Opts&: *TargetOpts); |
49 | } |
50 | |
51 | std::unique_ptr<Preprocessor> CreatePP(StringRef Source, |
52 | TrivialModuleLoader &ModLoader) { |
53 | std::unique_ptr<llvm::MemoryBuffer> Buf = |
54 | llvm::MemoryBuffer::getMemBuffer(InputData: Source); |
55 | SourceMgr.setMainFileID(SourceMgr.createFileID(Buffer: std::move(Buf))); |
56 | |
57 | HeaderSearchOptions HSOpts; |
58 | HeaderSearch (HSOpts, SourceMgr, Diags, LangOpts, Target.get()); |
59 | PreprocessorOptions PPOpts; |
60 | std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>( |
61 | args&: PPOpts, args&: Diags, args&: LangOpts, args&: SourceMgr, args&: HeaderInfo, args&: ModLoader, |
62 | /*IILookup =*/args: nullptr, |
63 | /*OwnsHeaderSearch =*/args: false); |
64 | PP->Initialize(Target: *Target); |
65 | PP->EnterMainSourceFile(); |
66 | return PP; |
67 | } |
68 | |
69 | std::vector<Token> Lex(StringRef Source) { |
70 | TrivialModuleLoader ModLoader; |
71 | PP = CreatePP(Source, ModLoader); |
72 | |
73 | std::vector<Token> toks; |
74 | PP->LexTokensUntilEOF(Tokens: &toks); |
75 | |
76 | return toks; |
77 | } |
78 | |
79 | std::vector<Token> CheckLex(StringRef Source, |
80 | ArrayRef<tok::TokenKind> ExpectedTokens) { |
81 | auto toks = Lex(Source); |
82 | EXPECT_EQ(ExpectedTokens.size(), toks.size()); |
83 | for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { |
84 | EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); |
85 | } |
86 | |
87 | return toks; |
88 | } |
89 | |
90 | std::string getSourceText(Token Begin, Token End) { |
91 | bool Invalid; |
92 | StringRef Str = |
93 | Lexer::getSourceText(Range: CharSourceRange::getTokenRange(R: SourceRange( |
94 | Begin.getLocation(), End.getLocation())), |
95 | SM: SourceMgr, LangOpts, Invalid: &Invalid); |
96 | if (Invalid) |
97 | return "<INVALID>" ; |
98 | return std::string(Str); |
99 | } |
100 | |
101 | FileSystemOptions FileMgrOpts; |
102 | FileManager FileMgr; |
103 | IntrusiveRefCntPtr<DiagnosticIDs> DiagID; |
104 | DiagnosticOptions DiagOpts; |
105 | DiagnosticsEngine Diags; |
106 | SourceManager SourceMgr; |
107 | LangOptions LangOpts; |
108 | std::shared_ptr<TargetOptions> TargetOpts; |
109 | IntrusiveRefCntPtr<TargetInfo> Target; |
110 | std::unique_ptr<Preprocessor> PP; |
111 | }; |
112 | |
113 | TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { |
114 | std::vector<tok::TokenKind> ExpectedTokens; |
115 | ExpectedTokens.push_back(x: tok::identifier); |
116 | ExpectedTokens.push_back(x: tok::l_paren); |
117 | ExpectedTokens.push_back(x: tok::identifier); |
118 | ExpectedTokens.push_back(x: tok::r_paren); |
119 | |
120 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
121 | "M(f(M(i)))" , |
122 | ExpectedTokens); |
123 | |
124 | EXPECT_EQ("M(i)" , getSourceText(toks[2], toks[2])); |
125 | } |
126 | |
127 | TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { |
128 | std::vector<tok::TokenKind> ExpectedTokens; |
129 | ExpectedTokens.push_back(x: tok::identifier); |
130 | ExpectedTokens.push_back(x: tok::identifier); |
131 | |
132 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
133 | "M(M(i) c)" , |
134 | ExpectedTokens); |
135 | |
136 | EXPECT_EQ("M(i)" , getSourceText(toks[0], toks[0])); |
137 | } |
138 | |
139 | TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { |
140 | std::vector<tok::TokenKind> ExpectedTokens; |
141 | ExpectedTokens.push_back(x: tok::identifier); |
142 | ExpectedTokens.push_back(x: tok::identifier); |
143 | ExpectedTokens.push_back(x: tok::identifier); |
144 | |
145 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
146 | "M(c c M(i))" , |
147 | ExpectedTokens); |
148 | |
149 | EXPECT_EQ("c M(i)" , getSourceText(toks[1], toks[2])); |
150 | } |
151 | |
152 | TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { |
153 | std::vector<tok::TokenKind> ExpectedTokens; |
154 | ExpectedTokens.push_back(x: tok::identifier); |
155 | ExpectedTokens.push_back(x: tok::identifier); |
156 | ExpectedTokens.push_back(x: tok::identifier); |
157 | |
158 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
159 | "M(M(i) c c)" , |
160 | ExpectedTokens); |
161 | |
162 | EXPECT_EQ("M(i) c" , getSourceText(toks[0], toks[1])); |
163 | } |
164 | |
165 | TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { |
166 | std::vector<tok::TokenKind> ExpectedTokens; |
167 | ExpectedTokens.push_back(x: tok::identifier); |
168 | ExpectedTokens.push_back(x: tok::identifier); |
169 | ExpectedTokens.push_back(x: tok::identifier); |
170 | ExpectedTokens.push_back(x: tok::identifier); |
171 | |
172 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
173 | "M(c M(i)) M(M(i) c)" , |
174 | ExpectedTokens); |
175 | |
176 | EXPECT_EQ("<INVALID>" , getSourceText(toks[1], toks[2])); |
177 | } |
178 | |
179 | TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { |
180 | std::vector<tok::TokenKind> ExpectedTokens; |
181 | ExpectedTokens.push_back(x: tok::identifier); |
182 | ExpectedTokens.push_back(x: tok::l_paren); |
183 | ExpectedTokens.push_back(x: tok::identifier); |
184 | ExpectedTokens.push_back(x: tok::r_paren); |
185 | |
186 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
187 | "#define C(x) M(x##c)\n" |
188 | "M(f(C(i)))" , |
189 | ExpectedTokens); |
190 | |
191 | EXPECT_EQ("C(i)" , getSourceText(toks[2], toks[2])); |
192 | } |
193 | |
194 | TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { |
195 | std::vector<tok::TokenKind> ExpectedTokens; |
196 | ExpectedTokens.push_back(x: tok::identifier); |
197 | ExpectedTokens.push_back(x: tok::l_paren); |
198 | ExpectedTokens.push_back(x: tok::identifier); |
199 | ExpectedTokens.push_back(x: tok::r_paren); |
200 | |
201 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
202 | "f(M(M(i)))" , |
203 | ExpectedTokens); |
204 | EXPECT_EQ("M(M(i))" , getSourceText(toks[2], toks[2])); |
205 | } |
206 | |
207 | TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { |
208 | std::vector<tok::TokenKind> ExpectedTokens; |
209 | ExpectedTokens.push_back(x: tok::identifier); |
210 | ExpectedTokens.push_back(x: tok::l_paren); |
211 | ExpectedTokens.push_back(x: tok::identifier); |
212 | ExpectedTokens.push_back(x: tok::r_paren); |
213 | |
214 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
215 | "M(f(i))" , |
216 | ExpectedTokens); |
217 | EXPECT_EQ("i" , getSourceText(toks[2], toks[2])); |
218 | } |
219 | |
220 | TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { |
221 | std::vector<tok::TokenKind> ExpectedTokens; |
222 | ExpectedTokens.push_back(x: tok::identifier); |
223 | ExpectedTokens.push_back(x: tok::l_paren); |
224 | ExpectedTokens.push_back(x: tok::identifier); |
225 | ExpectedTokens.push_back(x: tok::r_paren); |
226 | |
227 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
228 | "#define C(x) x\n" |
229 | "f(C(M(i)))" , |
230 | ExpectedTokens); |
231 | EXPECT_EQ("C(M(i))" , getSourceText(toks[2], toks[2])); |
232 | } |
233 | |
234 | TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { |
235 | std::vector<tok::TokenKind> ExpectedTokens; |
236 | ExpectedTokens.push_back(x: tok::identifier); |
237 | ExpectedTokens.push_back(x: tok::l_paren); |
238 | ExpectedTokens.push_back(x: tok::identifier); |
239 | ExpectedTokens.push_back(x: tok::identifier); |
240 | ExpectedTokens.push_back(x: tok::r_paren); |
241 | |
242 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
243 | "#define C(x) c x\n" |
244 | "f(C(M(i)))" , |
245 | ExpectedTokens); |
246 | EXPECT_EQ("M(i)" , getSourceText(toks[3], toks[3])); |
247 | } |
248 | |
249 | TEST_F(LexerTest, GetSourceTextExpandsRecursively) { |
250 | std::vector<tok::TokenKind> ExpectedTokens; |
251 | ExpectedTokens.push_back(x: tok::identifier); |
252 | ExpectedTokens.push_back(x: tok::identifier); |
253 | ExpectedTokens.push_back(x: tok::l_paren); |
254 | ExpectedTokens.push_back(x: tok::identifier); |
255 | ExpectedTokens.push_back(x: tok::r_paren); |
256 | |
257 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
258 | "#define C(x) c M(x)\n" |
259 | "C(f(M(i)))" , |
260 | ExpectedTokens); |
261 | EXPECT_EQ("M(i)" , getSourceText(toks[3], toks[3])); |
262 | } |
263 | |
264 | TEST_F(LexerTest, LexAPI) { |
265 | std::vector<tok::TokenKind> ExpectedTokens; |
266 | // Line 1 (after the #defines) |
267 | ExpectedTokens.push_back(x: tok::l_square); |
268 | ExpectedTokens.push_back(x: tok::identifier); |
269 | ExpectedTokens.push_back(x: tok::r_square); |
270 | ExpectedTokens.push_back(x: tok::l_square); |
271 | ExpectedTokens.push_back(x: tok::identifier); |
272 | ExpectedTokens.push_back(x: tok::r_square); |
273 | // Line 2 |
274 | ExpectedTokens.push_back(x: tok::identifier); |
275 | ExpectedTokens.push_back(x: tok::identifier); |
276 | ExpectedTokens.push_back(x: tok::identifier); |
277 | ExpectedTokens.push_back(x: tok::identifier); |
278 | |
279 | std::vector<Token> toks = CheckLex(Source: "#define M(x) [x]\n" |
280 | "#define N(x) x\n" |
281 | "#define INN(x) x\n" |
282 | "#define NOF1 INN(val)\n" |
283 | "#define NOF2 val\n" |
284 | "M(foo) N([bar])\n" |
285 | "N(INN(val)) N(NOF1) N(NOF2) N(val)" , |
286 | ExpectedTokens); |
287 | |
288 | SourceLocation lsqrLoc = toks[0].getLocation(); |
289 | SourceLocation idLoc = toks[1].getLocation(); |
290 | SourceLocation rsqrLoc = toks[2].getLocation(); |
291 | CharSourceRange macroRange = SourceMgr.getExpansionRange(Loc: lsqrLoc); |
292 | |
293 | SourceLocation Loc; |
294 | EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc)); |
295 | EXPECT_EQ(Loc, macroRange.getBegin()); |
296 | EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts)); |
297 | EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts)); |
298 | EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc)); |
299 | EXPECT_EQ(Loc, macroRange.getEnd()); |
300 | EXPECT_TRUE(macroRange.isTokenRange()); |
301 | |
302 | CharSourceRange range = Lexer::makeFileCharRange( |
303 | Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: idLoc), SM: SourceMgr, LangOpts); |
304 | EXPECT_TRUE(range.isInvalid()); |
305 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: idLoc, E: rsqrLoc), |
306 | SM: SourceMgr, LangOpts); |
307 | EXPECT_TRUE(range.isInvalid()); |
308 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc), |
309 | SM: SourceMgr, LangOpts); |
310 | EXPECT_TRUE(!range.isTokenRange()); |
311 | EXPECT_EQ(range.getAsRange(), |
312 | SourceRange(macroRange.getBegin(), |
313 | macroRange.getEnd().getLocWithOffset(1))); |
314 | |
315 | StringRef text = Lexer::getSourceText( |
316 | Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc), |
317 | SM: SourceMgr, LangOpts); |
318 | EXPECT_EQ(text, "M(foo)" ); |
319 | |
320 | SourceLocation macroLsqrLoc = toks[3].getLocation(); |
321 | SourceLocation macroIdLoc = toks[4].getLocation(); |
322 | SourceLocation macroRsqrLoc = toks[5].getLocation(); |
323 | SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(Loc: macroLsqrLoc); |
324 | SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(Loc: macroIdLoc); |
325 | SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(Loc: macroRsqrLoc); |
326 | |
327 | range = Lexer::makeFileCharRange( |
328 | Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroIdLoc), |
329 | SM: SourceMgr, LangOpts); |
330 | EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)), |
331 | range.getAsRange()); |
332 | |
333 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: macroIdLoc, E: macroRsqrLoc), |
334 | SM: SourceMgr, LangOpts); |
335 | EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)), |
336 | range.getAsRange()); |
337 | |
338 | macroRange = SourceMgr.getExpansionRange(Loc: macroLsqrLoc); |
339 | range = Lexer::makeFileCharRange( |
340 | Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroRsqrLoc), |
341 | SM: SourceMgr, LangOpts); |
342 | EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)), |
343 | range.getAsRange()); |
344 | |
345 | text = Lexer::getSourceText( |
346 | Range: CharSourceRange::getTokenRange(R: SourceRange(macroLsqrLoc, macroIdLoc)), |
347 | SM: SourceMgr, LangOpts); |
348 | EXPECT_EQ(text, "[bar" ); |
349 | |
350 | |
351 | SourceLocation idLoc1 = toks[6].getLocation(); |
352 | SourceLocation idLoc2 = toks[7].getLocation(); |
353 | SourceLocation idLoc3 = toks[8].getLocation(); |
354 | SourceLocation idLoc4 = toks[9].getLocation(); |
355 | EXPECT_EQ("INN" , Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts)); |
356 | EXPECT_EQ("INN" , Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts)); |
357 | EXPECT_EQ("NOF2" , Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts)); |
358 | EXPECT_EQ("N" , Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); |
359 | } |
360 | |
361 | TEST_F(LexerTest, HandlesSplitTokens) { |
362 | std::vector<tok::TokenKind> ExpectedTokens; |
363 | // Line 1 (after the #defines) |
364 | ExpectedTokens.push_back(x: tok::identifier); |
365 | ExpectedTokens.push_back(x: tok::less); |
366 | ExpectedTokens.push_back(x: tok::identifier); |
367 | ExpectedTokens.push_back(x: tok::less); |
368 | ExpectedTokens.push_back(x: tok::greatergreater); |
369 | // Line 2 |
370 | ExpectedTokens.push_back(x: tok::identifier); |
371 | ExpectedTokens.push_back(x: tok::less); |
372 | ExpectedTokens.push_back(x: tok::identifier); |
373 | ExpectedTokens.push_back(x: tok::less); |
374 | ExpectedTokens.push_back(x: tok::greatergreater); |
375 | |
376 | std::vector<Token> toks = CheckLex(Source: "#define TY ty\n" |
377 | "#define RANGLE ty<ty<>>\n" |
378 | "TY<ty<>>\n" |
379 | "RANGLE" , |
380 | ExpectedTokens); |
381 | |
382 | SourceLocation outerTyLoc = toks[0].getLocation(); |
383 | SourceLocation innerTyLoc = toks[2].getLocation(); |
384 | SourceLocation gtgtLoc = toks[4].getLocation(); |
385 | // Split the token to simulate the action of the parser and force creation of |
386 | // an `ExpansionTokenRange`. |
387 | SourceLocation rangleLoc = PP->SplitToken(TokLoc: gtgtLoc, Length: 1); |
388 | |
389 | // Verify that it only captures the first greater-then and not the second one. |
390 | CharSourceRange range = Lexer::makeFileCharRange( |
391 | Range: CharSourceRange::getTokenRange(B: innerTyLoc, E: rangleLoc), SM: SourceMgr, |
392 | LangOpts); |
393 | EXPECT_TRUE(range.isCharRange()); |
394 | EXPECT_EQ(range.getAsRange(), |
395 | SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1))); |
396 | |
397 | // Verify case where range begins in a macro expansion. |
398 | range = Lexer::makeFileCharRange( |
399 | Range: CharSourceRange::getTokenRange(B: outerTyLoc, E: rangleLoc), SM: SourceMgr, |
400 | LangOpts); |
401 | EXPECT_TRUE(range.isCharRange()); |
402 | EXPECT_EQ(range.getAsRange(), |
403 | SourceRange(SourceMgr.getExpansionLoc(outerTyLoc), |
404 | gtgtLoc.getLocWithOffset(1))); |
405 | |
406 | SourceLocation macroInnerTyLoc = toks[7].getLocation(); |
407 | SourceLocation macroGtgtLoc = toks[9].getLocation(); |
408 | // Split the token to simulate the action of the parser and force creation of |
409 | // an `ExpansionTokenRange`. |
410 | SourceLocation macroRAngleLoc = PP->SplitToken(TokLoc: macroGtgtLoc, Length: 1); |
411 | |
412 | // Verify that it fails (because it only captures the first greater-then and |
413 | // not the second one, so it doesn't span the entire macro expansion). |
414 | range = Lexer::makeFileCharRange( |
415 | Range: CharSourceRange::getTokenRange(B: macroInnerTyLoc, E: macroRAngleLoc), |
416 | SM: SourceMgr, LangOpts); |
417 | EXPECT_TRUE(range.isInvalid()); |
418 | } |
419 | |
420 | TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { |
421 | std::vector<Token> toks = |
422 | Lex(Source: "#define helper1 0\n" |
423 | "void helper2(const char *, ...);\n" |
424 | "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n" |
425 | "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n" |
426 | "void f1() { M2(\"a\", \"b\"); }" ); |
427 | |
428 | // Check the file corresponding to the "helper1" macro arg in M2. |
429 | // |
430 | // The lexer used to report its size as 31, meaning that the end of the |
431 | // expansion would be on the *next line* (just past `M2("a", "b")`). Make |
432 | // sure that we get the correct end location (the comma after "helper1"). |
433 | SourceLocation helper1ArgLoc = toks[20].getLocation(); |
434 | EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); |
435 | } |
436 | |
437 | TEST_F(LexerTest, DontOverallocateStringifyArgs) { |
438 | TrivialModuleLoader ModLoader; |
439 | auto PP = CreatePP(Source: "\"StrArg\", 5, 'C'" , ModLoader); |
440 | |
441 | llvm::BumpPtrAllocator Allocator; |
442 | std::array<IdentifierInfo *, 3> ParamList; |
443 | MacroInfo *MI = PP->AllocateMacroInfo(L: {}); |
444 | MI->setIsFunctionLike(); |
445 | MI->setParameterList(List: ParamList, PPAllocator&: Allocator); |
446 | EXPECT_EQ(3u, MI->getNumParams()); |
447 | EXPECT_TRUE(MI->isFunctionLike()); |
448 | |
449 | Token Eof; |
450 | Eof.setKind(tok::eof); |
451 | std::vector<Token> ArgTokens; |
452 | while (1) { |
453 | Token tok; |
454 | PP->Lex(Result&: tok); |
455 | if (tok.is(K: tok::eof)) { |
456 | ArgTokens.push_back(x: Eof); |
457 | break; |
458 | } |
459 | if (tok.is(K: tok::comma)) |
460 | ArgTokens.push_back(x: Eof); |
461 | else |
462 | ArgTokens.push_back(x: tok); |
463 | } |
464 | |
465 | auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(PP&: *PP); }; |
466 | std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA( |
467 | MacroArgs::create(MI, UnexpArgTokens: ArgTokens, VarargsElided: false, PP&: *PP), MacroArgsDeleter); |
468 | auto StringifyArg = [&](int ArgNo) { |
469 | return MA->StringifyArgument(ArgToks: MA->getUnexpArgument(Arg: ArgNo), PP&: *PP, |
470 | /*Charify=*/false, ExpansionLocStart: {}, ExpansionLocEnd: {}); |
471 | }; |
472 | Token Result = StringifyArg(0); |
473 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
474 | EXPECT_STREQ("\"\\\"StrArg\\\"\"" , Result.getLiteralData()); |
475 | Result = StringifyArg(1); |
476 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
477 | EXPECT_STREQ("\"5\"" , Result.getLiteralData()); |
478 | Result = StringifyArg(2); |
479 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
480 | EXPECT_STREQ("\"'C'\"" , Result.getLiteralData()); |
481 | #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST |
482 | EXPECT_DEATH(StringifyArg(3), "Invalid arg #" ); |
483 | #endif |
484 | } |
485 | |
486 | TEST_F(LexerTest, IsNewLineEscapedValid) { |
487 | auto hasNewLineEscaped = [](const char *S) { |
488 | return Lexer::isNewLineEscaped(BufferStart: S, Str: S + strlen(s: S) - 1); |
489 | }; |
490 | |
491 | EXPECT_TRUE(hasNewLineEscaped("\\\r" )); |
492 | EXPECT_TRUE(hasNewLineEscaped("\\\n" )); |
493 | EXPECT_TRUE(hasNewLineEscaped("\\\r\n" )); |
494 | EXPECT_TRUE(hasNewLineEscaped("\\\n\r" )); |
495 | EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r" )); |
496 | EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n" )); |
497 | |
498 | EXPECT_FALSE(hasNewLineEscaped("\\\r\r" )); |
499 | EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n" )); |
500 | EXPECT_FALSE(hasNewLineEscaped("\\\n\n" )); |
501 | EXPECT_FALSE(hasNewLineEscaped("\r" )); |
502 | EXPECT_FALSE(hasNewLineEscaped("\n" )); |
503 | EXPECT_FALSE(hasNewLineEscaped("\r\n" )); |
504 | EXPECT_FALSE(hasNewLineEscaped("\n\r" )); |
505 | EXPECT_FALSE(hasNewLineEscaped("\r\r" )); |
506 | EXPECT_FALSE(hasNewLineEscaped("\n\n" )); |
507 | } |
508 | |
509 | TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { |
510 | // Each line should have the same length for |
511 | // further offset calculation to be more straightforward. |
512 | const unsigned IdentifierLength = 8; |
513 | std::string TextToLex = "rabarbar\n" |
514 | "foo\\\nbar\n" |
515 | "foo\\\rbar\n" |
516 | "fo\\\r\nbar\n" |
517 | "foo\\\n\rba\n" ; |
518 | std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; |
519 | std::vector<Token> LexedTokens = CheckLex(Source: TextToLex, ExpectedTokens); |
520 | |
521 | for (const Token &Tok : LexedTokens) { |
522 | std::pair<FileID, unsigned> OriginalLocation = |
523 | SourceMgr.getDecomposedLoc(Loc: Tok.getLocation()); |
524 | for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { |
525 | SourceLocation LookupLocation = |
526 | Tok.getLocation().getLocWithOffset(Offset); |
527 | |
528 | std::pair<FileID, unsigned> FoundLocation = |
529 | SourceMgr.getDecomposedExpansionLoc( |
530 | Loc: Lexer::GetBeginningOfToken(Loc: LookupLocation, SM: SourceMgr, LangOpts)); |
531 | |
532 | // Check that location returned by the GetBeginningOfToken |
533 | // is the same as original token location reported by Lexer. |
534 | EXPECT_EQ(FoundLocation.second, OriginalLocation.second); |
535 | } |
536 | } |
537 | } |
538 | |
539 | TEST_F(LexerTest, AvoidPastEndOfStringDereference) { |
540 | EXPECT_TRUE(Lex(" // \\\n" ).empty()); |
541 | EXPECT_TRUE(Lex("#include <\\\\" ).empty()); |
542 | EXPECT_TRUE(Lex("#include <\\\\\n" ).empty()); |
543 | } |
544 | |
545 | TEST_F(LexerTest, StringizingRasString) { |
546 | // For "std::string Lexer::Stringify(StringRef Str, bool Charify)". |
547 | std::string String1 = R"(foo |
548 | {"bar":[]} |
549 | baz)" ; |
550 | // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)". |
551 | SmallString<128> String2; |
552 | String2 += String1.c_str(); |
553 | |
554 | // Corner cases. |
555 | std::string String3 = R"(\ |
556 | \n |
557 | \\n |
558 | \\)" ; |
559 | SmallString<128> String4; |
560 | String4 += String3.c_str(); |
561 | std::string String5 = R"(a\ |
562 | |
563 | |
564 | \\b)" ; |
565 | SmallString<128> String6; |
566 | String6 += String5.c_str(); |
567 | |
568 | String1 = Lexer::Stringify(Str: StringRef(String1)); |
569 | Lexer::Stringify(Str&: String2); |
570 | String3 = Lexer::Stringify(Str: StringRef(String3)); |
571 | Lexer::Stringify(Str&: String4); |
572 | String5 = Lexer::Stringify(Str: StringRef(String5)); |
573 | Lexer::Stringify(Str&: String6); |
574 | |
575 | EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)" ); |
576 | EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)" ); |
577 | EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)" ); |
578 | EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)" ); |
579 | EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)" ); |
580 | EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)" ); |
581 | } |
582 | |
583 | TEST_F(LexerTest, CharRangeOffByOne) { |
584 | std::vector<Token> toks = Lex(Source: R"(#define MOO 1 |
585 | void foo() { MOO; })" ); |
586 | const Token &moo = toks[5]; |
587 | |
588 | EXPECT_EQ(getSourceText(moo, moo), "MOO" ); |
589 | |
590 | SourceRange R{moo.getLocation(), moo.getLocation()}; |
591 | |
592 | EXPECT_TRUE( |
593 | Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts)); |
594 | EXPECT_TRUE( |
595 | Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts)); |
596 | |
597 | CharSourceRange CR = Lexer::getAsCharRange(Range: R, SM: SourceMgr, LangOpts); |
598 | |
599 | EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO" ); // Was "MO". |
600 | } |
601 | |
602 | TEST_F(LexerTest, FindNextToken) { |
603 | Lex(Source: "int abcd = 0;\n" |
604 | "// A comment.\n" |
605 | "int xyz = abcd;\n" ); |
606 | std::vector<std::string> GeneratedByNextToken; |
607 | SourceLocation Loc = |
608 | SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID()); |
609 | while (true) { |
610 | auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts); |
611 | ASSERT_TRUE(T); |
612 | if (T->is(K: tok::eof)) |
613 | break; |
614 | GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
615 | Loc = T->getLocation(); |
616 | } |
617 | EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd" , "=" , "0" , ";" , "int" , |
618 | "xyz" , "=" , "abcd" , ";" )); |
619 | } |
620 | |
621 | TEST_F(LexerTest, FindNextTokenIncludingComments) { |
622 | Lex(Source: "int abcd = 0;\n" |
623 | "// A comment.\n" |
624 | "int xyz = abcd;\n" ); |
625 | std::vector<std::string> GeneratedByNextToken; |
626 | SourceLocation Loc = |
627 | SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID()); |
628 | while (true) { |
629 | auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: true); |
630 | ASSERT_TRUE(T); |
631 | if (T->is(K: tok::eof)) |
632 | break; |
633 | GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
634 | Loc = T->getLocation(); |
635 | } |
636 | EXPECT_THAT(GeneratedByNextToken, |
637 | ElementsAre("abcd" , "=" , "0" , ";" , "// A comment." , "int" , "xyz" , |
638 | "=" , "abcd" , ";" )); |
639 | } |
640 | |
641 | TEST_F(LexerTest, FindPreviousToken) { |
642 | Lex(Source: "int abcd = 0;\n" |
643 | "// A comment.\n" |
644 | "int xyz = abcd;\n" ); |
645 | std::vector<std::string> GeneratedByPrevToken; |
646 | SourceLocation Loc = SourceMgr.getLocForEndOfFile(FID: SourceMgr.getMainFileID()); |
647 | while (true) { |
648 | auto T = Lexer::findPreviousToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: false); |
649 | if (!T.has_value()) |
650 | break; |
651 | GeneratedByPrevToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
652 | Loc = Lexer::GetBeginningOfToken(Loc: T->getLocation(), SM: SourceMgr, LangOpts); |
653 | } |
654 | EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";" , "abcd" , "=" , "xyz" , "int" , |
655 | ";" , "0" , "=" , "abcd" , "int" )); |
656 | } |
657 | |
658 | TEST_F(LexerTest, FindPreviousTokenIncludingComments) { |
659 | Lex(Source: "int abcd = 0;\n" |
660 | "// A comment.\n" |
661 | "int xyz = abcd;\n" ); |
662 | std::vector<std::string> GeneratedByPrevToken; |
663 | SourceLocation Loc = SourceMgr.getLocForEndOfFile(FID: SourceMgr.getMainFileID()); |
664 | while (true) { |
665 | auto T = Lexer::findPreviousToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: true); |
666 | if (!T.has_value()) |
667 | break; |
668 | GeneratedByPrevToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
669 | Loc = Lexer::GetBeginningOfToken(Loc: T->getLocation(), SM: SourceMgr, LangOpts); |
670 | } |
671 | EXPECT_THAT(GeneratedByPrevToken, |
672 | ElementsAre(";" , "abcd" , "=" , "xyz" , "int" , "// A comment." , ";" , |
673 | "0" , "=" , "abcd" , "int" )); |
674 | } |
675 | |
676 | TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) { |
677 | TrivialModuleLoader ModLoader; |
678 | auto PP = CreatePP(Source: "" , ModLoader); |
679 | PP->LexTokensUntilEOF(); |
680 | EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()), |
681 | 1U); |
682 | } |
683 | |
684 | TEST_F(LexerTest, RawAndNormalLexSameForLineComments) { |
685 | const llvm::StringLiteral Source = R"cpp( |
686 | // First line comment. |
687 | //* Second line comment which is ambigious. |
688 | ; // Have a non-comment token to make sure something is lexed. |
689 | )cpp" ; |
690 | LangOpts.LineComment = false; |
691 | auto Toks = Lex(Source); |
692 | auto &SM = PP->getSourceManager(); |
693 | auto SrcBuffer = SM.getBufferData(FID: SM.getMainFileID()); |
694 | Lexer L(SM.getLocForStartOfFile(FID: SM.getMainFileID()), PP->getLangOpts(), |
695 | SrcBuffer.data(), SrcBuffer.data(), |
696 | SrcBuffer.data() + SrcBuffer.size()); |
697 | |
698 | auto ToksView = llvm::ArrayRef(Toks); |
699 | clang::Token T; |
700 | EXPECT_FALSE(ToksView.empty()); |
701 | while (!L.LexFromRawLexer(Result&: T)) { |
702 | ASSERT_TRUE(!ToksView.empty()); |
703 | EXPECT_EQ(T.getKind(), ToksView.front().getKind()); |
704 | ToksView = ToksView.drop_front(); |
705 | } |
706 | EXPECT_TRUE(ToksView.empty()); |
707 | } |
708 | |
709 | TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) { |
710 | const llvm::StringLiteral Source = R"cc( |
711 | #define ONE \ |
712 | 1 |
713 | |
714 | int i = ONE; |
715 | )cc" ; |
716 | std::vector<Token> Toks = |
717 | CheckLex(Source, ExpectedTokens: {tok::kw_int, tok::identifier, tok::equal, |
718 | tok::numeric_constant, tok::semi}); |
719 | |
720 | // Set up by getting the raw token for the `1` in the macro definition. |
721 | const Token &OneExpanded = Toks[3]; |
722 | Token Tok; |
723 | ASSERT_FALSE( |
724 | Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts)); |
725 | // The `ONE`. |
726 | ASSERT_EQ(Tok.getKind(), tok::raw_identifier); |
727 | ASSERT_FALSE( |
728 | Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()), |
729 | Tok, SourceMgr, LangOpts)); |
730 | // The `1` in the macro definition. |
731 | ASSERT_EQ(Tok.getKind(), tok::numeric_constant); |
732 | |
733 | // Go back 4 characters: two spaces, one newline, and the backslash. |
734 | SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(Offset: -4); |
735 | // Expect true (=failure) because the whitespace immediately after the |
736 | // escaped newline is not ignored. |
737 | EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts, |
738 | /*IgnoreWhiteSpace=*/false)); |
739 | } |
740 | |
741 | TEST(LexerPreambleTest, PreambleBounds) { |
742 | std::vector<std::string> Cases = { |
743 | R"cc([[ |
744 | #include <foo> |
745 | ]]int bar; |
746 | )cc" , |
747 | R"cc([[ |
748 | #include <foo> |
749 | ]])cc" , |
750 | R"cc([[ |
751 | // leading comment |
752 | #include <foo> |
753 | ]]// trailing comment |
754 | int bar; |
755 | )cc" , |
756 | R"cc([[ |
757 | module; |
758 | #include <foo> |
759 | ]]module bar; |
760 | int x; |
761 | )cc" , |
762 | }; |
763 | for (const auto& Case : Cases) { |
764 | llvm::Annotations A(Case); |
765 | clang::LangOptions LangOpts; |
766 | LangOpts.CPlusPlusModules = true; |
767 | auto Bounds = Lexer::ComputePreamble(Buffer: A.code(), LangOpts); |
768 | EXPECT_EQ(Bounds.Size, A.range().End) << Case; |
769 | } |
770 | } |
771 | |
772 | } // anonymous namespace |
773 | |