1 | //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Lex/Lexer.h" |
10 | #include "clang/Basic/Diagnostic.h" |
11 | #include "clang/Basic/DiagnosticOptions.h" |
12 | #include "clang/Basic/FileManager.h" |
13 | #include "clang/Basic/LangOptions.h" |
14 | #include "clang/Basic/SourceLocation.h" |
15 | #include "clang/Basic/SourceManager.h" |
16 | #include "clang/Basic/TargetInfo.h" |
17 | #include "clang/Basic/TargetOptions.h" |
18 | #include "clang/Basic/TokenKinds.h" |
19 | #include "clang/Lex/HeaderSearch.h" |
20 | #include "clang/Lex/HeaderSearchOptions.h" |
21 | #include "clang/Lex/LiteralSupport.h" |
22 | #include "clang/Lex/MacroArgs.h" |
23 | #include "clang/Lex/MacroInfo.h" |
24 | #include "clang/Lex/ModuleLoader.h" |
25 | #include "clang/Lex/Preprocessor.h" |
26 | #include "clang/Lex/PreprocessorOptions.h" |
27 | #include "llvm/ADT/ArrayRef.h" |
28 | #include "llvm/ADT/StringRef.h" |
29 | #include "llvm/Testing/Annotations/Annotations.h" |
30 | #include "gmock/gmock.h" |
31 | #include "gtest/gtest.h" |
32 | #include <memory> |
33 | #include <string> |
34 | #include <vector> |
35 | |
36 | namespace { |
37 | using namespace clang; |
38 | using testing::ElementsAre; |
39 | |
40 | // The test fixture. |
41 | class LexerTest : public ::testing::Test { |
42 | protected: |
43 | LexerTest() |
44 | : FileMgr(FileMgrOpts), |
45 | DiagID(new DiagnosticIDs()), |
46 | Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), |
47 | SourceMgr(Diags, FileMgr), |
48 | TargetOpts(new TargetOptions) |
49 | { |
50 | TargetOpts->Triple = "x86_64-apple-darwin11.1.0" ; |
51 | Target = TargetInfo::CreateTargetInfo(Diags, Opts: TargetOpts); |
52 | } |
53 | |
54 | std::unique_ptr<Preprocessor> CreatePP(StringRef Source, |
55 | TrivialModuleLoader &ModLoader) { |
56 | std::unique_ptr<llvm::MemoryBuffer> Buf = |
57 | llvm::MemoryBuffer::getMemBuffer(InputData: Source); |
58 | SourceMgr.setMainFileID(SourceMgr.createFileID(Buffer: std::move(Buf))); |
59 | |
60 | HeaderSearch (std::make_shared<HeaderSearchOptions>(), SourceMgr, |
61 | Diags, LangOpts, Target.get()); |
62 | std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>( |
63 | args: std::make_shared<PreprocessorOptions>(), args&: Diags, args&: LangOpts, args&: SourceMgr, |
64 | args&: HeaderInfo, args&: ModLoader, |
65 | /*IILookup =*/args: nullptr, |
66 | /*OwnsHeaderSearch =*/args: false); |
67 | PP->Initialize(Target: *Target); |
68 | PP->EnterMainSourceFile(); |
69 | return PP; |
70 | } |
71 | |
72 | std::vector<Token> Lex(StringRef Source) { |
73 | TrivialModuleLoader ModLoader; |
74 | PP = CreatePP(Source, ModLoader); |
75 | |
76 | std::vector<Token> toks; |
77 | PP->LexTokensUntilEOF(Tokens: &toks); |
78 | |
79 | return toks; |
80 | } |
81 | |
82 | std::vector<Token> CheckLex(StringRef Source, |
83 | ArrayRef<tok::TokenKind> ExpectedTokens) { |
84 | auto toks = Lex(Source); |
85 | EXPECT_EQ(ExpectedTokens.size(), toks.size()); |
86 | for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { |
87 | EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); |
88 | } |
89 | |
90 | return toks; |
91 | } |
92 | |
93 | std::string getSourceText(Token Begin, Token End) { |
94 | bool Invalid; |
95 | StringRef Str = |
96 | Lexer::getSourceText(Range: CharSourceRange::getTokenRange(R: SourceRange( |
97 | Begin.getLocation(), End.getLocation())), |
98 | SM: SourceMgr, LangOpts, Invalid: &Invalid); |
99 | if (Invalid) |
100 | return "<INVALID>" ; |
101 | return std::string(Str); |
102 | } |
103 | |
104 | FileSystemOptions FileMgrOpts; |
105 | FileManager FileMgr; |
106 | IntrusiveRefCntPtr<DiagnosticIDs> DiagID; |
107 | DiagnosticsEngine Diags; |
108 | SourceManager SourceMgr; |
109 | LangOptions LangOpts; |
110 | std::shared_ptr<TargetOptions> TargetOpts; |
111 | IntrusiveRefCntPtr<TargetInfo> Target; |
112 | std::unique_ptr<Preprocessor> PP; |
113 | }; |
114 | |
115 | TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { |
116 | std::vector<tok::TokenKind> ExpectedTokens; |
117 | ExpectedTokens.push_back(x: tok::identifier); |
118 | ExpectedTokens.push_back(x: tok::l_paren); |
119 | ExpectedTokens.push_back(x: tok::identifier); |
120 | ExpectedTokens.push_back(x: tok::r_paren); |
121 | |
122 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
123 | "M(f(M(i)))" , |
124 | ExpectedTokens); |
125 | |
126 | EXPECT_EQ("M(i)" , getSourceText(toks[2], toks[2])); |
127 | } |
128 | |
129 | TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { |
130 | std::vector<tok::TokenKind> ExpectedTokens; |
131 | ExpectedTokens.push_back(x: tok::identifier); |
132 | ExpectedTokens.push_back(x: tok::identifier); |
133 | |
134 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
135 | "M(M(i) c)" , |
136 | ExpectedTokens); |
137 | |
138 | EXPECT_EQ("M(i)" , getSourceText(toks[0], toks[0])); |
139 | } |
140 | |
141 | TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { |
142 | std::vector<tok::TokenKind> ExpectedTokens; |
143 | ExpectedTokens.push_back(x: tok::identifier); |
144 | ExpectedTokens.push_back(x: tok::identifier); |
145 | ExpectedTokens.push_back(x: tok::identifier); |
146 | |
147 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
148 | "M(c c M(i))" , |
149 | ExpectedTokens); |
150 | |
151 | EXPECT_EQ("c M(i)" , getSourceText(toks[1], toks[2])); |
152 | } |
153 | |
154 | TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { |
155 | std::vector<tok::TokenKind> ExpectedTokens; |
156 | ExpectedTokens.push_back(x: tok::identifier); |
157 | ExpectedTokens.push_back(x: tok::identifier); |
158 | ExpectedTokens.push_back(x: tok::identifier); |
159 | |
160 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
161 | "M(M(i) c c)" , |
162 | ExpectedTokens); |
163 | |
164 | EXPECT_EQ("M(i) c" , getSourceText(toks[0], toks[1])); |
165 | } |
166 | |
167 | TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { |
168 | std::vector<tok::TokenKind> ExpectedTokens; |
169 | ExpectedTokens.push_back(x: tok::identifier); |
170 | ExpectedTokens.push_back(x: tok::identifier); |
171 | ExpectedTokens.push_back(x: tok::identifier); |
172 | ExpectedTokens.push_back(x: tok::identifier); |
173 | |
174 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
175 | "M(c M(i)) M(M(i) c)" , |
176 | ExpectedTokens); |
177 | |
178 | EXPECT_EQ("<INVALID>" , getSourceText(toks[1], toks[2])); |
179 | } |
180 | |
181 | TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { |
182 | std::vector<tok::TokenKind> ExpectedTokens; |
183 | ExpectedTokens.push_back(x: tok::identifier); |
184 | ExpectedTokens.push_back(x: tok::l_paren); |
185 | ExpectedTokens.push_back(x: tok::identifier); |
186 | ExpectedTokens.push_back(x: tok::r_paren); |
187 | |
188 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
189 | "#define C(x) M(x##c)\n" |
190 | "M(f(C(i)))" , |
191 | ExpectedTokens); |
192 | |
193 | EXPECT_EQ("C(i)" , getSourceText(toks[2], toks[2])); |
194 | } |
195 | |
196 | TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { |
197 | std::vector<tok::TokenKind> ExpectedTokens; |
198 | ExpectedTokens.push_back(x: tok::identifier); |
199 | ExpectedTokens.push_back(x: tok::l_paren); |
200 | ExpectedTokens.push_back(x: tok::identifier); |
201 | ExpectedTokens.push_back(x: tok::r_paren); |
202 | |
203 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
204 | "f(M(M(i)))" , |
205 | ExpectedTokens); |
206 | EXPECT_EQ("M(M(i))" , getSourceText(toks[2], toks[2])); |
207 | } |
208 | |
209 | TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { |
210 | std::vector<tok::TokenKind> ExpectedTokens; |
211 | ExpectedTokens.push_back(x: tok::identifier); |
212 | ExpectedTokens.push_back(x: tok::l_paren); |
213 | ExpectedTokens.push_back(x: tok::identifier); |
214 | ExpectedTokens.push_back(x: tok::r_paren); |
215 | |
216 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
217 | "M(f(i))" , |
218 | ExpectedTokens); |
219 | EXPECT_EQ("i" , getSourceText(toks[2], toks[2])); |
220 | } |
221 | |
222 | TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { |
223 | std::vector<tok::TokenKind> ExpectedTokens; |
224 | ExpectedTokens.push_back(x: tok::identifier); |
225 | ExpectedTokens.push_back(x: tok::l_paren); |
226 | ExpectedTokens.push_back(x: tok::identifier); |
227 | ExpectedTokens.push_back(x: tok::r_paren); |
228 | |
229 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
230 | "#define C(x) x\n" |
231 | "f(C(M(i)))" , |
232 | ExpectedTokens); |
233 | EXPECT_EQ("C(M(i))" , getSourceText(toks[2], toks[2])); |
234 | } |
235 | |
236 | TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { |
237 | std::vector<tok::TokenKind> ExpectedTokens; |
238 | ExpectedTokens.push_back(x: tok::identifier); |
239 | ExpectedTokens.push_back(x: tok::l_paren); |
240 | ExpectedTokens.push_back(x: tok::identifier); |
241 | ExpectedTokens.push_back(x: tok::identifier); |
242 | ExpectedTokens.push_back(x: tok::r_paren); |
243 | |
244 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
245 | "#define C(x) c x\n" |
246 | "f(C(M(i)))" , |
247 | ExpectedTokens); |
248 | EXPECT_EQ("M(i)" , getSourceText(toks[3], toks[3])); |
249 | } |
250 | |
251 | TEST_F(LexerTest, GetSourceTextExpandsRecursively) { |
252 | std::vector<tok::TokenKind> ExpectedTokens; |
253 | ExpectedTokens.push_back(x: tok::identifier); |
254 | ExpectedTokens.push_back(x: tok::identifier); |
255 | ExpectedTokens.push_back(x: tok::l_paren); |
256 | ExpectedTokens.push_back(x: tok::identifier); |
257 | ExpectedTokens.push_back(x: tok::r_paren); |
258 | |
259 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
260 | "#define C(x) c M(x)\n" |
261 | "C(f(M(i)))" , |
262 | ExpectedTokens); |
263 | EXPECT_EQ("M(i)" , getSourceText(toks[3], toks[3])); |
264 | } |
265 | |
266 | TEST_F(LexerTest, LexAPI) { |
267 | std::vector<tok::TokenKind> ExpectedTokens; |
268 | // Line 1 (after the #defines) |
269 | ExpectedTokens.push_back(x: tok::l_square); |
270 | ExpectedTokens.push_back(x: tok::identifier); |
271 | ExpectedTokens.push_back(x: tok::r_square); |
272 | ExpectedTokens.push_back(x: tok::l_square); |
273 | ExpectedTokens.push_back(x: tok::identifier); |
274 | ExpectedTokens.push_back(x: tok::r_square); |
275 | // Line 2 |
276 | ExpectedTokens.push_back(x: tok::identifier); |
277 | ExpectedTokens.push_back(x: tok::identifier); |
278 | ExpectedTokens.push_back(x: tok::identifier); |
279 | ExpectedTokens.push_back(x: tok::identifier); |
280 | |
281 | std::vector<Token> toks = CheckLex(Source: "#define M(x) [x]\n" |
282 | "#define N(x) x\n" |
283 | "#define INN(x) x\n" |
284 | "#define NOF1 INN(val)\n" |
285 | "#define NOF2 val\n" |
286 | "M(foo) N([bar])\n" |
287 | "N(INN(val)) N(NOF1) N(NOF2) N(val)" , |
288 | ExpectedTokens); |
289 | |
290 | SourceLocation lsqrLoc = toks[0].getLocation(); |
291 | SourceLocation idLoc = toks[1].getLocation(); |
292 | SourceLocation rsqrLoc = toks[2].getLocation(); |
293 | CharSourceRange macroRange = SourceMgr.getExpansionRange(Loc: lsqrLoc); |
294 | |
295 | SourceLocation Loc; |
296 | EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc)); |
297 | EXPECT_EQ(Loc, macroRange.getBegin()); |
298 | EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts)); |
299 | EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts)); |
300 | EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc)); |
301 | EXPECT_EQ(Loc, macroRange.getEnd()); |
302 | EXPECT_TRUE(macroRange.isTokenRange()); |
303 | |
304 | CharSourceRange range = Lexer::makeFileCharRange( |
305 | Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: idLoc), SM: SourceMgr, LangOpts); |
306 | EXPECT_TRUE(range.isInvalid()); |
307 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: idLoc, E: rsqrLoc), |
308 | SM: SourceMgr, LangOpts); |
309 | EXPECT_TRUE(range.isInvalid()); |
310 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc), |
311 | SM: SourceMgr, LangOpts); |
312 | EXPECT_TRUE(!range.isTokenRange()); |
313 | EXPECT_EQ(range.getAsRange(), |
314 | SourceRange(macroRange.getBegin(), |
315 | macroRange.getEnd().getLocWithOffset(1))); |
316 | |
317 | StringRef text = Lexer::getSourceText( |
318 | Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc), |
319 | SM: SourceMgr, LangOpts); |
320 | EXPECT_EQ(text, "M(foo)" ); |
321 | |
322 | SourceLocation macroLsqrLoc = toks[3].getLocation(); |
323 | SourceLocation macroIdLoc = toks[4].getLocation(); |
324 | SourceLocation macroRsqrLoc = toks[5].getLocation(); |
325 | SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(Loc: macroLsqrLoc); |
326 | SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(Loc: macroIdLoc); |
327 | SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(Loc: macroRsqrLoc); |
328 | |
329 | range = Lexer::makeFileCharRange( |
330 | Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroIdLoc), |
331 | SM: SourceMgr, LangOpts); |
332 | EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)), |
333 | range.getAsRange()); |
334 | |
335 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: macroIdLoc, E: macroRsqrLoc), |
336 | SM: SourceMgr, LangOpts); |
337 | EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)), |
338 | range.getAsRange()); |
339 | |
340 | macroRange = SourceMgr.getExpansionRange(Loc: macroLsqrLoc); |
341 | range = Lexer::makeFileCharRange( |
342 | Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroRsqrLoc), |
343 | SM: SourceMgr, LangOpts); |
344 | EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)), |
345 | range.getAsRange()); |
346 | |
347 | text = Lexer::getSourceText( |
348 | Range: CharSourceRange::getTokenRange(R: SourceRange(macroLsqrLoc, macroIdLoc)), |
349 | SM: SourceMgr, LangOpts); |
350 | EXPECT_EQ(text, "[bar" ); |
351 | |
352 | |
353 | SourceLocation idLoc1 = toks[6].getLocation(); |
354 | SourceLocation idLoc2 = toks[7].getLocation(); |
355 | SourceLocation idLoc3 = toks[8].getLocation(); |
356 | SourceLocation idLoc4 = toks[9].getLocation(); |
357 | EXPECT_EQ("INN" , Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts)); |
358 | EXPECT_EQ("INN" , Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts)); |
359 | EXPECT_EQ("NOF2" , Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts)); |
360 | EXPECT_EQ("N" , Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); |
361 | } |
362 | |
363 | TEST_F(LexerTest, HandlesSplitTokens) { |
364 | std::vector<tok::TokenKind> ExpectedTokens; |
365 | // Line 1 (after the #defines) |
366 | ExpectedTokens.push_back(x: tok::identifier); |
367 | ExpectedTokens.push_back(x: tok::less); |
368 | ExpectedTokens.push_back(x: tok::identifier); |
369 | ExpectedTokens.push_back(x: tok::less); |
370 | ExpectedTokens.push_back(x: tok::greatergreater); |
371 | // Line 2 |
372 | ExpectedTokens.push_back(x: tok::identifier); |
373 | ExpectedTokens.push_back(x: tok::less); |
374 | ExpectedTokens.push_back(x: tok::identifier); |
375 | ExpectedTokens.push_back(x: tok::less); |
376 | ExpectedTokens.push_back(x: tok::greatergreater); |
377 | |
378 | std::vector<Token> toks = CheckLex(Source: "#define TY ty\n" |
379 | "#define RANGLE ty<ty<>>\n" |
380 | "TY<ty<>>\n" |
381 | "RANGLE" , |
382 | ExpectedTokens); |
383 | |
384 | SourceLocation outerTyLoc = toks[0].getLocation(); |
385 | SourceLocation innerTyLoc = toks[2].getLocation(); |
386 | SourceLocation gtgtLoc = toks[4].getLocation(); |
387 | // Split the token to simulate the action of the parser and force creation of |
388 | // an `ExpansionTokenRange`. |
389 | SourceLocation rangleLoc = PP->SplitToken(TokLoc: gtgtLoc, Length: 1); |
390 | |
391 | // Verify that it only captures the first greater-then and not the second one. |
392 | CharSourceRange range = Lexer::makeFileCharRange( |
393 | Range: CharSourceRange::getTokenRange(B: innerTyLoc, E: rangleLoc), SM: SourceMgr, |
394 | LangOpts); |
395 | EXPECT_TRUE(range.isCharRange()); |
396 | EXPECT_EQ(range.getAsRange(), |
397 | SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1))); |
398 | |
399 | // Verify case where range begins in a macro expansion. |
400 | range = Lexer::makeFileCharRange( |
401 | Range: CharSourceRange::getTokenRange(B: outerTyLoc, E: rangleLoc), SM: SourceMgr, |
402 | LangOpts); |
403 | EXPECT_TRUE(range.isCharRange()); |
404 | EXPECT_EQ(range.getAsRange(), |
405 | SourceRange(SourceMgr.getExpansionLoc(outerTyLoc), |
406 | gtgtLoc.getLocWithOffset(1))); |
407 | |
408 | SourceLocation macroInnerTyLoc = toks[7].getLocation(); |
409 | SourceLocation macroGtgtLoc = toks[9].getLocation(); |
410 | // Split the token to simulate the action of the parser and force creation of |
411 | // an `ExpansionTokenRange`. |
412 | SourceLocation macroRAngleLoc = PP->SplitToken(TokLoc: macroGtgtLoc, Length: 1); |
413 | |
414 | // Verify that it fails (because it only captures the first greater-then and |
415 | // not the second one, so it doesn't span the entire macro expansion). |
416 | range = Lexer::makeFileCharRange( |
417 | Range: CharSourceRange::getTokenRange(B: macroInnerTyLoc, E: macroRAngleLoc), |
418 | SM: SourceMgr, LangOpts); |
419 | EXPECT_TRUE(range.isInvalid()); |
420 | } |
421 | |
422 | TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { |
423 | std::vector<Token> toks = |
424 | Lex(Source: "#define helper1 0\n" |
425 | "void helper2(const char *, ...);\n" |
426 | "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n" |
427 | "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n" |
428 | "void f1() { M2(\"a\", \"b\"); }" ); |
429 | |
430 | // Check the file corresponding to the "helper1" macro arg in M2. |
431 | // |
432 | // The lexer used to report its size as 31, meaning that the end of the |
433 | // expansion would be on the *next line* (just past `M2("a", "b")`). Make |
434 | // sure that we get the correct end location (the comma after "helper1"). |
435 | SourceLocation helper1ArgLoc = toks[20].getLocation(); |
436 | EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); |
437 | } |
438 | |
439 | TEST_F(LexerTest, DontOverallocateStringifyArgs) { |
440 | TrivialModuleLoader ModLoader; |
441 | auto PP = CreatePP(Source: "\"StrArg\", 5, 'C'" , ModLoader); |
442 | |
443 | llvm::BumpPtrAllocator Allocator; |
444 | std::array<IdentifierInfo *, 3> ParamList; |
445 | MacroInfo *MI = PP->AllocateMacroInfo(L: {}); |
446 | MI->setIsFunctionLike(); |
447 | MI->setParameterList(List: ParamList, PPAllocator&: Allocator); |
448 | EXPECT_EQ(3u, MI->getNumParams()); |
449 | EXPECT_TRUE(MI->isFunctionLike()); |
450 | |
451 | Token Eof; |
452 | Eof.setKind(tok::eof); |
453 | std::vector<Token> ArgTokens; |
454 | while (1) { |
455 | Token tok; |
456 | PP->Lex(Result&: tok); |
457 | if (tok.is(K: tok::eof)) { |
458 | ArgTokens.push_back(x: Eof); |
459 | break; |
460 | } |
461 | if (tok.is(K: tok::comma)) |
462 | ArgTokens.push_back(x: Eof); |
463 | else |
464 | ArgTokens.push_back(x: tok); |
465 | } |
466 | |
467 | auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(PP&: *PP); }; |
468 | std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA( |
469 | MacroArgs::create(MI, UnexpArgTokens: ArgTokens, VarargsElided: false, PP&: *PP), MacroArgsDeleter); |
470 | auto StringifyArg = [&](int ArgNo) { |
471 | return MA->StringifyArgument(ArgToks: MA->getUnexpArgument(Arg: ArgNo), PP&: *PP, |
472 | /*Charify=*/false, ExpansionLocStart: {}, ExpansionLocEnd: {}); |
473 | }; |
474 | Token Result = StringifyArg(0); |
475 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
476 | EXPECT_STREQ("\"\\\"StrArg\\\"\"" , Result.getLiteralData()); |
477 | Result = StringifyArg(1); |
478 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
479 | EXPECT_STREQ("\"5\"" , Result.getLiteralData()); |
480 | Result = StringifyArg(2); |
481 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
482 | EXPECT_STREQ("\"'C'\"" , Result.getLiteralData()); |
483 | #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST |
484 | EXPECT_DEATH(StringifyArg(3), "Invalid arg #" ); |
485 | #endif |
486 | } |
487 | |
488 | TEST_F(LexerTest, IsNewLineEscapedValid) { |
489 | auto hasNewLineEscaped = [](const char *S) { |
490 | return Lexer::isNewLineEscaped(BufferStart: S, Str: S + strlen(s: S) - 1); |
491 | }; |
492 | |
493 | EXPECT_TRUE(hasNewLineEscaped("\\\r" )); |
494 | EXPECT_TRUE(hasNewLineEscaped("\\\n" )); |
495 | EXPECT_TRUE(hasNewLineEscaped("\\\r\n" )); |
496 | EXPECT_TRUE(hasNewLineEscaped("\\\n\r" )); |
497 | EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r" )); |
498 | EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n" )); |
499 | |
500 | EXPECT_FALSE(hasNewLineEscaped("\\\r\r" )); |
501 | EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n" )); |
502 | EXPECT_FALSE(hasNewLineEscaped("\\\n\n" )); |
503 | EXPECT_FALSE(hasNewLineEscaped("\r" )); |
504 | EXPECT_FALSE(hasNewLineEscaped("\n" )); |
505 | EXPECT_FALSE(hasNewLineEscaped("\r\n" )); |
506 | EXPECT_FALSE(hasNewLineEscaped("\n\r" )); |
507 | EXPECT_FALSE(hasNewLineEscaped("\r\r" )); |
508 | EXPECT_FALSE(hasNewLineEscaped("\n\n" )); |
509 | } |
510 | |
511 | TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { |
512 | // Each line should have the same length for |
513 | // further offset calculation to be more straightforward. |
514 | const unsigned IdentifierLength = 8; |
515 | std::string TextToLex = "rabarbar\n" |
516 | "foo\\\nbar\n" |
517 | "foo\\\rbar\n" |
518 | "fo\\\r\nbar\n" |
519 | "foo\\\n\rba\n" ; |
520 | std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; |
521 | std::vector<Token> LexedTokens = CheckLex(Source: TextToLex, ExpectedTokens); |
522 | |
523 | for (const Token &Tok : LexedTokens) { |
524 | std::pair<FileID, unsigned> OriginalLocation = |
525 | SourceMgr.getDecomposedLoc(Loc: Tok.getLocation()); |
526 | for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { |
527 | SourceLocation LookupLocation = |
528 | Tok.getLocation().getLocWithOffset(Offset); |
529 | |
530 | std::pair<FileID, unsigned> FoundLocation = |
531 | SourceMgr.getDecomposedExpansionLoc( |
532 | Loc: Lexer::GetBeginningOfToken(Loc: LookupLocation, SM: SourceMgr, LangOpts)); |
533 | |
534 | // Check that location returned by the GetBeginningOfToken |
535 | // is the same as original token location reported by Lexer. |
536 | EXPECT_EQ(FoundLocation.second, OriginalLocation.second); |
537 | } |
538 | } |
539 | } |
540 | |
541 | TEST_F(LexerTest, AvoidPastEndOfStringDereference) { |
542 | EXPECT_TRUE(Lex(" // \\\n" ).empty()); |
543 | EXPECT_TRUE(Lex("#include <\\\\" ).empty()); |
544 | EXPECT_TRUE(Lex("#include <\\\\\n" ).empty()); |
545 | } |
546 | |
547 | TEST_F(LexerTest, StringizingRasString) { |
548 | // For "std::string Lexer::Stringify(StringRef Str, bool Charify)". |
549 | std::string String1 = R"(foo |
550 | {"bar":[]} |
551 | baz)" ; |
552 | // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)". |
553 | SmallString<128> String2; |
554 | String2 += String1.c_str(); |
555 | |
556 | // Corner cases. |
557 | std::string String3 = R"(\ |
558 | \n |
559 | \\n |
560 | \\)" ; |
561 | SmallString<128> String4; |
562 | String4 += String3.c_str(); |
563 | std::string String5 = R"(a\ |
564 | |
565 | |
566 | \\b)" ; |
567 | SmallString<128> String6; |
568 | String6 += String5.c_str(); |
569 | |
570 | String1 = Lexer::Stringify(Str: StringRef(String1)); |
571 | Lexer::Stringify(Str&: String2); |
572 | String3 = Lexer::Stringify(Str: StringRef(String3)); |
573 | Lexer::Stringify(Str&: String4); |
574 | String5 = Lexer::Stringify(Str: StringRef(String5)); |
575 | Lexer::Stringify(Str&: String6); |
576 | |
577 | EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)" ); |
578 | EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)" ); |
579 | EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)" ); |
580 | EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)" ); |
581 | EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)" ); |
582 | EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)" ); |
583 | } |
584 | |
585 | TEST_F(LexerTest, CharRangeOffByOne) { |
586 | std::vector<Token> toks = Lex(Source: R"(#define MOO 1 |
587 | void foo() { MOO; })" ); |
588 | const Token &moo = toks[5]; |
589 | |
590 | EXPECT_EQ(getSourceText(moo, moo), "MOO" ); |
591 | |
592 | SourceRange R{moo.getLocation(), moo.getLocation()}; |
593 | |
594 | EXPECT_TRUE( |
595 | Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts)); |
596 | EXPECT_TRUE( |
597 | Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts)); |
598 | |
599 | CharSourceRange CR = Lexer::getAsCharRange(Range: R, SM: SourceMgr, LangOpts); |
600 | |
601 | EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO" ); // Was "MO". |
602 | } |
603 | |
604 | TEST_F(LexerTest, FindNextToken) { |
605 | Lex(Source: "int abcd = 0;\n" |
606 | "int xyz = abcd;\n" ); |
607 | std::vector<std::string> GeneratedByNextToken; |
608 | SourceLocation Loc = |
609 | SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID()); |
610 | while (true) { |
611 | auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts); |
612 | ASSERT_TRUE(T); |
613 | if (T->is(K: tok::eof)) |
614 | break; |
615 | GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
616 | Loc = T->getLocation(); |
617 | } |
618 | EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd" , "=" , "0" , ";" , "int" , |
619 | "xyz" , "=" , "abcd" , ";" )); |
620 | } |
621 | |
622 | TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) { |
623 | TrivialModuleLoader ModLoader; |
624 | auto PP = CreatePP(Source: "" , ModLoader); |
625 | PP->LexTokensUntilEOF(); |
626 | EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()), |
627 | 1U); |
628 | } |
629 | |
630 | TEST_F(LexerTest, RawAndNormalLexSameForLineComments) { |
631 | const llvm::StringLiteral Source = R"cpp( |
632 | // First line comment. |
633 | //* Second line comment which is ambigious. |
634 | ; // Have a non-comment token to make sure something is lexed. |
635 | )cpp" ; |
636 | LangOpts.LineComment = false; |
637 | auto Toks = Lex(Source); |
638 | auto &SM = PP->getSourceManager(); |
639 | auto SrcBuffer = SM.getBufferData(FID: SM.getMainFileID()); |
640 | Lexer L(SM.getLocForStartOfFile(FID: SM.getMainFileID()), PP->getLangOpts(), |
641 | SrcBuffer.data(), SrcBuffer.data(), |
642 | SrcBuffer.data() + SrcBuffer.size()); |
643 | |
644 | auto ToksView = llvm::ArrayRef(Toks); |
645 | clang::Token T; |
646 | EXPECT_FALSE(ToksView.empty()); |
647 | while (!L.LexFromRawLexer(Result&: T)) { |
648 | ASSERT_TRUE(!ToksView.empty()); |
649 | EXPECT_EQ(T.getKind(), ToksView.front().getKind()); |
650 | ToksView = ToksView.drop_front(); |
651 | } |
652 | EXPECT_TRUE(ToksView.empty()); |
653 | } |
654 | |
655 | TEST(LexerPreambleTest, PreambleBounds) { |
656 | std::vector<std::string> Cases = { |
657 | R"cc([[ |
658 | #include <foo> |
659 | ]]int bar; |
660 | )cc" , |
661 | R"cc([[ |
662 | #include <foo> |
663 | ]])cc" , |
664 | R"cc([[ |
665 | // leading comment |
666 | #include <foo> |
667 | ]]// trailing comment |
668 | int bar; |
669 | )cc" , |
670 | R"cc([[ |
671 | module; |
672 | #include <foo> |
673 | ]]module bar; |
674 | int x; |
675 | )cc" , |
676 | }; |
677 | for (const auto& Case : Cases) { |
678 | llvm::Annotations A(Case); |
679 | clang::LangOptions LangOpts; |
680 | LangOpts.CPlusPlusModules = true; |
681 | auto Bounds = Lexer::ComputePreamble(Buffer: A.code(), LangOpts); |
682 | EXPECT_EQ(Bounds.Size, A.range().End) << Case; |
683 | } |
684 | } |
685 | |
686 | } // anonymous namespace |
687 | |