1//===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Lex/Lexer.h"
10#include "clang/Basic/Diagnostic.h"
11#include "clang/Basic/DiagnosticOptions.h"
12#include "clang/Basic/FileManager.h"
13#include "clang/Basic/LangOptions.h"
14#include "clang/Basic/SourceLocation.h"
15#include "clang/Basic/SourceManager.h"
16#include "clang/Basic/TargetInfo.h"
17#include "clang/Basic/TargetOptions.h"
18#include "clang/Basic/TokenKinds.h"
19#include "clang/Lex/HeaderSearch.h"
20#include "clang/Lex/HeaderSearchOptions.h"
21#include "clang/Lex/LiteralSupport.h"
22#include "clang/Lex/MacroArgs.h"
23#include "clang/Lex/MacroInfo.h"
24#include "clang/Lex/ModuleLoader.h"
25#include "clang/Lex/Preprocessor.h"
26#include "clang/Lex/PreprocessorOptions.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/Testing/Annotations/Annotations.h"
30#include "gmock/gmock.h"
31#include "gtest/gtest.h"
32#include <memory>
33#include <string>
34#include <vector>
35
36namespace {
37using namespace clang;
38using testing::ElementsAre;
39
40// The test fixture.
41class LexerTest : public ::testing::Test {
42protected:
43 LexerTest()
44 : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
45 Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
46 SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
47 TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
48 Target = TargetInfo::CreateTargetInfo(Diags, Opts&: *TargetOpts);
49 }
50
51 std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
52 TrivialModuleLoader &ModLoader) {
53 std::unique_ptr<llvm::MemoryBuffer> Buf =
54 llvm::MemoryBuffer::getMemBuffer(InputData: Source);
55 SourceMgr.setMainFileID(SourceMgr.createFileID(Buffer: std::move(Buf)));
56
57 HeaderSearchOptions HSOpts;
58 HeaderSearch HeaderInfo(HSOpts, SourceMgr, Diags, LangOpts, Target.get());
59 PreprocessorOptions PPOpts;
60 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
61 args&: PPOpts, args&: Diags, args&: LangOpts, args&: SourceMgr, args&: HeaderInfo, args&: ModLoader,
62 /*IILookup =*/args: nullptr,
63 /*OwnsHeaderSearch =*/args: false);
64 if (!PreDefines.empty())
65 PP->setPredefines(PreDefines);
66 PP->Initialize(Target: *Target);
67 PP->EnterMainSourceFile();
68 return PP;
69 }
70
71 std::vector<Token> Lex(StringRef Source) {
72 TrivialModuleLoader ModLoader;
73 PP = CreatePP(Source, ModLoader);
74
75 std::vector<Token> toks;
76 PP->LexTokensUntilEOF(Tokens: &toks);
77
78 return toks;
79 }
80
81 std::vector<Token> CheckLex(StringRef Source,
82 ArrayRef<tok::TokenKind> ExpectedTokens) {
83 auto toks = Lex(Source);
84 EXPECT_EQ(ExpectedTokens.size(), toks.size());
85 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
86 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
87 }
88
89 return toks;
90 }
91
92 std::string getSourceText(Token Begin, Token End) {
93 bool Invalid;
94 StringRef Str =
95 Lexer::getSourceText(Range: CharSourceRange::getTokenRange(R: SourceRange(
96 Begin.getLocation(), End.getLocation())),
97 SM: SourceMgr, LangOpts, Invalid: &Invalid);
98 if (Invalid)
99 return "<INVALID>";
100 return std::string(Str);
101 }
102
103 FileSystemOptions FileMgrOpts;
104 FileManager FileMgr;
105 IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
106 DiagnosticOptions DiagOpts;
107 DiagnosticsEngine Diags;
108 SourceManager SourceMgr;
109 LangOptions LangOpts;
110 std::shared_ptr<TargetOptions> TargetOpts;
111 IntrusiveRefCntPtr<TargetInfo> Target;
112 std::unique_ptr<Preprocessor> PP;
113 std::string PreDefines;
114};
115
116TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
117 std::vector<tok::TokenKind> ExpectedTokens;
118 ExpectedTokens.push_back(x: tok::identifier);
119 ExpectedTokens.push_back(x: tok::l_paren);
120 ExpectedTokens.push_back(x: tok::identifier);
121 ExpectedTokens.push_back(x: tok::r_paren);
122
123 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
124 "M(f(M(i)))",
125 ExpectedTokens);
126
127 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
128}
129
130TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
131 std::vector<tok::TokenKind> ExpectedTokens;
132 ExpectedTokens.push_back(x: tok::identifier);
133 ExpectedTokens.push_back(x: tok::identifier);
134
135 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
136 "M(M(i) c)",
137 ExpectedTokens);
138
139 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
140}
141
142TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
143 std::vector<tok::TokenKind> ExpectedTokens;
144 ExpectedTokens.push_back(x: tok::identifier);
145 ExpectedTokens.push_back(x: tok::identifier);
146 ExpectedTokens.push_back(x: tok::identifier);
147
148 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
149 "M(c c M(i))",
150 ExpectedTokens);
151
152 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
153}
154
155TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
156 std::vector<tok::TokenKind> ExpectedTokens;
157 ExpectedTokens.push_back(x: tok::identifier);
158 ExpectedTokens.push_back(x: tok::identifier);
159 ExpectedTokens.push_back(x: tok::identifier);
160
161 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
162 "M(M(i) c c)",
163 ExpectedTokens);
164
165 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
166}
167
168TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
169 std::vector<tok::TokenKind> ExpectedTokens;
170 ExpectedTokens.push_back(x: tok::identifier);
171 ExpectedTokens.push_back(x: tok::identifier);
172 ExpectedTokens.push_back(x: tok::identifier);
173 ExpectedTokens.push_back(x: tok::identifier);
174
175 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
176 "M(c M(i)) M(M(i) c)",
177 ExpectedTokens);
178
179 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
180}
181
182TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
183 std::vector<tok::TokenKind> ExpectedTokens;
184 ExpectedTokens.push_back(x: tok::identifier);
185 ExpectedTokens.push_back(x: tok::l_paren);
186 ExpectedTokens.push_back(x: tok::identifier);
187 ExpectedTokens.push_back(x: tok::r_paren);
188
189 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
190 "#define C(x) M(x##c)\n"
191 "M(f(C(i)))",
192 ExpectedTokens);
193
194 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
195}
196
197TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
198 std::vector<tok::TokenKind> ExpectedTokens;
199 ExpectedTokens.push_back(x: tok::identifier);
200 ExpectedTokens.push_back(x: tok::l_paren);
201 ExpectedTokens.push_back(x: tok::identifier);
202 ExpectedTokens.push_back(x: tok::r_paren);
203
204 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
205 "f(M(M(i)))",
206 ExpectedTokens);
207 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
208}
209
210TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
211 std::vector<tok::TokenKind> ExpectedTokens;
212 ExpectedTokens.push_back(x: tok::identifier);
213 ExpectedTokens.push_back(x: tok::l_paren);
214 ExpectedTokens.push_back(x: tok::identifier);
215 ExpectedTokens.push_back(x: tok::r_paren);
216
217 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
218 "M(f(i))",
219 ExpectedTokens);
220 EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
221}
222
223TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
224 std::vector<tok::TokenKind> ExpectedTokens;
225 ExpectedTokens.push_back(x: tok::identifier);
226 ExpectedTokens.push_back(x: tok::l_paren);
227 ExpectedTokens.push_back(x: tok::identifier);
228 ExpectedTokens.push_back(x: tok::r_paren);
229
230 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
231 "#define C(x) x\n"
232 "f(C(M(i)))",
233 ExpectedTokens);
234 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
235}
236
237TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
238 std::vector<tok::TokenKind> ExpectedTokens;
239 ExpectedTokens.push_back(x: tok::identifier);
240 ExpectedTokens.push_back(x: tok::l_paren);
241 ExpectedTokens.push_back(x: tok::identifier);
242 ExpectedTokens.push_back(x: tok::identifier);
243 ExpectedTokens.push_back(x: tok::r_paren);
244
245 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
246 "#define C(x) c x\n"
247 "f(C(M(i)))",
248 ExpectedTokens);
249 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
250}
251
252TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
253 std::vector<tok::TokenKind> ExpectedTokens;
254 ExpectedTokens.push_back(x: tok::identifier);
255 ExpectedTokens.push_back(x: tok::identifier);
256 ExpectedTokens.push_back(x: tok::l_paren);
257 ExpectedTokens.push_back(x: tok::identifier);
258 ExpectedTokens.push_back(x: tok::r_paren);
259
260 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
261 "#define C(x) c M(x)\n"
262 "C(f(M(i)))",
263 ExpectedTokens);
264 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
265}
266
267TEST_F(LexerTest, LexAPI) {
268 std::vector<tok::TokenKind> ExpectedTokens;
269 // Line 1 (after the #defines)
270 ExpectedTokens.push_back(x: tok::l_square);
271 ExpectedTokens.push_back(x: tok::identifier);
272 ExpectedTokens.push_back(x: tok::r_square);
273 ExpectedTokens.push_back(x: tok::l_square);
274 ExpectedTokens.push_back(x: tok::identifier);
275 ExpectedTokens.push_back(x: tok::r_square);
276 // Line 2
277 ExpectedTokens.push_back(x: tok::identifier);
278 ExpectedTokens.push_back(x: tok::identifier);
279 ExpectedTokens.push_back(x: tok::identifier);
280 ExpectedTokens.push_back(x: tok::identifier);
281
282 std::vector<Token> toks = CheckLex(Source: "#define M(x) [x]\n"
283 "#define N(x) x\n"
284 "#define INN(x) x\n"
285 "#define NOF1 INN(val)\n"
286 "#define NOF2 val\n"
287 "M(foo) N([bar])\n"
288 "N(INN(val)) N(NOF1) N(NOF2) N(val)",
289 ExpectedTokens);
290
291 SourceLocation lsqrLoc = toks[0].getLocation();
292 SourceLocation idLoc = toks[1].getLocation();
293 SourceLocation rsqrLoc = toks[2].getLocation();
294 CharSourceRange macroRange = SourceMgr.getExpansionRange(Loc: lsqrLoc);
295
296 SourceLocation Loc;
297 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
298 EXPECT_EQ(Loc, macroRange.getBegin());
299 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
300 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
301 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
302 EXPECT_EQ(Loc, macroRange.getEnd());
303 EXPECT_TRUE(macroRange.isTokenRange());
304
305 CharSourceRange range = Lexer::makeFileCharRange(
306 Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: idLoc), SM: SourceMgr, LangOpts);
307 EXPECT_TRUE(range.isInvalid());
308 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: idLoc, E: rsqrLoc),
309 SM: SourceMgr, LangOpts);
310 EXPECT_TRUE(range.isInvalid());
311 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc),
312 SM: SourceMgr, LangOpts);
313 EXPECT_TRUE(!range.isTokenRange());
314 EXPECT_EQ(range.getAsRange(),
315 SourceRange(macroRange.getBegin(),
316 macroRange.getEnd().getLocWithOffset(1)));
317
318 StringRef text = Lexer::getSourceText(
319 Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc),
320 SM: SourceMgr, LangOpts);
321 EXPECT_EQ(text, "M(foo)");
322
323 SourceLocation macroLsqrLoc = toks[3].getLocation();
324 SourceLocation macroIdLoc = toks[4].getLocation();
325 SourceLocation macroRsqrLoc = toks[5].getLocation();
326 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(Loc: macroLsqrLoc);
327 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(Loc: macroIdLoc);
328 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(Loc: macroRsqrLoc);
329
330 range = Lexer::makeFileCharRange(
331 Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroIdLoc),
332 SM: SourceMgr, LangOpts);
333 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
334 range.getAsRange());
335
336 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: macroIdLoc, E: macroRsqrLoc),
337 SM: SourceMgr, LangOpts);
338 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
339 range.getAsRange());
340
341 macroRange = SourceMgr.getExpansionRange(Loc: macroLsqrLoc);
342 range = Lexer::makeFileCharRange(
343 Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroRsqrLoc),
344 SM: SourceMgr, LangOpts);
345 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
346 range.getAsRange());
347
348 text = Lexer::getSourceText(
349 Range: CharSourceRange::getTokenRange(R: SourceRange(macroLsqrLoc, macroIdLoc)),
350 SM: SourceMgr, LangOpts);
351 EXPECT_EQ(text, "[bar");
352
353
354 SourceLocation idLoc1 = toks[6].getLocation();
355 SourceLocation idLoc2 = toks[7].getLocation();
356 SourceLocation idLoc3 = toks[8].getLocation();
357 SourceLocation idLoc4 = toks[9].getLocation();
358 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
359 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
360 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
361 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
362}
363
364TEST_F(LexerTest, HandlesSplitTokens) {
365 std::vector<tok::TokenKind> ExpectedTokens;
366 // Line 1 (after the #defines)
367 ExpectedTokens.push_back(x: tok::identifier);
368 ExpectedTokens.push_back(x: tok::less);
369 ExpectedTokens.push_back(x: tok::identifier);
370 ExpectedTokens.push_back(x: tok::less);
371 ExpectedTokens.push_back(x: tok::greatergreater);
372 // Line 2
373 ExpectedTokens.push_back(x: tok::identifier);
374 ExpectedTokens.push_back(x: tok::less);
375 ExpectedTokens.push_back(x: tok::identifier);
376 ExpectedTokens.push_back(x: tok::less);
377 ExpectedTokens.push_back(x: tok::greatergreater);
378
379 std::vector<Token> toks = CheckLex(Source: "#define TY ty\n"
380 "#define RANGLE ty<ty<>>\n"
381 "TY<ty<>>\n"
382 "RANGLE",
383 ExpectedTokens);
384
385 SourceLocation outerTyLoc = toks[0].getLocation();
386 SourceLocation innerTyLoc = toks[2].getLocation();
387 SourceLocation gtgtLoc = toks[4].getLocation();
388 // Split the token to simulate the action of the parser and force creation of
389 // an `ExpansionTokenRange`.
390 SourceLocation rangleLoc = PP->SplitToken(TokLoc: gtgtLoc, Length: 1);
391
392 // Verify that it only captures the first greater-then and not the second one.
393 CharSourceRange range = Lexer::makeFileCharRange(
394 Range: CharSourceRange::getTokenRange(B: innerTyLoc, E: rangleLoc), SM: SourceMgr,
395 LangOpts);
396 EXPECT_TRUE(range.isCharRange());
397 EXPECT_EQ(range.getAsRange(),
398 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
399
400 // Verify case where range begins in a macro expansion.
401 range = Lexer::makeFileCharRange(
402 Range: CharSourceRange::getTokenRange(B: outerTyLoc, E: rangleLoc), SM: SourceMgr,
403 LangOpts);
404 EXPECT_TRUE(range.isCharRange());
405 EXPECT_EQ(range.getAsRange(),
406 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
407 gtgtLoc.getLocWithOffset(1)));
408
409 SourceLocation macroInnerTyLoc = toks[7].getLocation();
410 SourceLocation macroGtgtLoc = toks[9].getLocation();
411 // Split the token to simulate the action of the parser and force creation of
412 // an `ExpansionTokenRange`.
413 SourceLocation macroRAngleLoc = PP->SplitToken(TokLoc: macroGtgtLoc, Length: 1);
414
415 // Verify that it fails (because it only captures the first greater-then and
416 // not the second one, so it doesn't span the entire macro expansion).
417 range = Lexer::makeFileCharRange(
418 Range: CharSourceRange::getTokenRange(B: macroInnerTyLoc, E: macroRAngleLoc),
419 SM: SourceMgr, LangOpts);
420 EXPECT_TRUE(range.isInvalid());
421}
422
423TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
424 std::vector<Token> toks =
425 Lex(Source: "#define helper1 0\n"
426 "void helper2(const char *, ...);\n"
427 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
428 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
429 "void f1() { M2(\"a\", \"b\"); }");
430
431 // Check the file corresponding to the "helper1" macro arg in M2.
432 //
433 // The lexer used to report its size as 31, meaning that the end of the
434 // expansion would be on the *next line* (just past `M2("a", "b")`). Make
435 // sure that we get the correct end location (the comma after "helper1").
436 SourceLocation helper1ArgLoc = toks[20].getLocation();
437 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
438}
439
440TEST_F(LexerTest, DontOverallocateStringifyArgs) {
441 TrivialModuleLoader ModLoader;
442 auto PP = CreatePP(Source: "\"StrArg\", 5, 'C'", ModLoader);
443
444 llvm::BumpPtrAllocator Allocator;
445 std::array<IdentifierInfo *, 3> ParamList;
446 MacroInfo *MI = PP->AllocateMacroInfo(L: {});
447 MI->setIsFunctionLike();
448 MI->setParameterList(List: ParamList, PPAllocator&: Allocator);
449 EXPECT_EQ(3u, MI->getNumParams());
450 EXPECT_TRUE(MI->isFunctionLike());
451
452 Token Eof;
453 Eof.setKind(tok::eof);
454 std::vector<Token> ArgTokens;
455 while (1) {
456 Token tok;
457 PP->Lex(Result&: tok);
458 if (tok.is(K: tok::eof)) {
459 ArgTokens.push_back(x: Eof);
460 break;
461 }
462 if (tok.is(K: tok::comma))
463 ArgTokens.push_back(x: Eof);
464 else
465 ArgTokens.push_back(x: tok);
466 }
467
468 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(PP&: *PP); };
469 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
470 MacroArgs::create(MI, UnexpArgTokens: ArgTokens, VarargsElided: false, PP&: *PP), MacroArgsDeleter);
471 auto StringifyArg = [&](int ArgNo) {
472 return MA->StringifyArgument(ArgToks: MA->getUnexpArgument(Arg: ArgNo), PP&: *PP,
473 /*Charify=*/false, ExpansionLocStart: {}, ExpansionLocEnd: {});
474 };
475 Token Result = StringifyArg(0);
476 EXPECT_EQ(tok::string_literal, Result.getKind());
477 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
478 Result = StringifyArg(1);
479 EXPECT_EQ(tok::string_literal, Result.getKind());
480 EXPECT_STREQ("\"5\"", Result.getLiteralData());
481 Result = StringifyArg(2);
482 EXPECT_EQ(tok::string_literal, Result.getKind());
483 EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
484#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
485 EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
486#endif
487}
488
489TEST_F(LexerTest, IsNewLineEscapedValid) {
490 auto hasNewLineEscaped = [](const char *S) {
491 return Lexer::isNewLineEscaped(BufferStart: S, Str: S + strlen(s: S) - 1);
492 };
493
494 EXPECT_TRUE(hasNewLineEscaped("\\\r"));
495 EXPECT_TRUE(hasNewLineEscaped("\\\n"));
496 EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
497 EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
498 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
499 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
500
501 EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
502 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
503 EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
504 EXPECT_FALSE(hasNewLineEscaped("\r"));
505 EXPECT_FALSE(hasNewLineEscaped("\n"));
506 EXPECT_FALSE(hasNewLineEscaped("\r\n"));
507 EXPECT_FALSE(hasNewLineEscaped("\n\r"));
508 EXPECT_FALSE(hasNewLineEscaped("\r\r"));
509 EXPECT_FALSE(hasNewLineEscaped("\n\n"));
510}
511
512TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
513 // Each line should have the same length for
514 // further offset calculation to be more straightforward.
515 const unsigned IdentifierLength = 8;
516 std::string TextToLex = "rabarbar\n"
517 "foo\\\nbar\n"
518 "foo\\\rbar\n"
519 "fo\\\r\nbar\n"
520 "foo\\\n\rba\n";
521 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
522 std::vector<Token> LexedTokens = CheckLex(Source: TextToLex, ExpectedTokens);
523
524 for (const Token &Tok : LexedTokens) {
525 FileIDAndOffset OriginalLocation =
526 SourceMgr.getDecomposedLoc(Loc: Tok.getLocation());
527 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
528 SourceLocation LookupLocation =
529 Tok.getLocation().getLocWithOffset(Offset);
530
531 FileIDAndOffset FoundLocation = SourceMgr.getDecomposedExpansionLoc(
532 Loc: Lexer::GetBeginningOfToken(Loc: LookupLocation, SM: SourceMgr, LangOpts));
533
534 // Check that location returned by the GetBeginningOfToken
535 // is the same as original token location reported by Lexer.
536 EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
537 }
538 }
539}
540
541TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
542 EXPECT_TRUE(Lex(" // \\\n").empty());
543 EXPECT_TRUE(Lex("#include <\\\\").empty());
544 EXPECT_TRUE(Lex("#include <\\\\\n").empty());
545}
546
547TEST_F(LexerTest, StringizingRasString) {
548 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
549 std::string String1 = R"(foo
550 {"bar":[]}
551 baz)";
552 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
553 SmallString<128> String2;
554 String2 += String1.c_str();
555
556 // Corner cases.
557 std::string String3 = R"(\
558 \n
559 \\n
560 \\)";
561 SmallString<128> String4;
562 String4 += String3.c_str();
563 std::string String5 = R"(a\
564
565
566 \\b)";
567 SmallString<128> String6;
568 String6 += String5.c_str();
569
570 String1 = Lexer::Stringify(Str: StringRef(String1));
571 Lexer::Stringify(Str&: String2);
572 String3 = Lexer::Stringify(Str: StringRef(String3));
573 Lexer::Stringify(Str&: String4);
574 String5 = Lexer::Stringify(Str: StringRef(String5));
575 Lexer::Stringify(Str&: String6);
576
577 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
578 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
579 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)");
580 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)");
581 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)");
582 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)");
583}
584
585TEST_F(LexerTest, CharRangeOffByOne) {
586 std::vector<Token> toks = Lex(Source: R"(#define MOO 1
587 void foo() { MOO; })");
588 const Token &moo = toks[5];
589
590 EXPECT_EQ(getSourceText(moo, moo), "MOO");
591
592 SourceRange R{moo.getLocation(), moo.getLocation()};
593
594 EXPECT_TRUE(
595 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
596 EXPECT_TRUE(
597 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
598
599 CharSourceRange CR = Lexer::getAsCharRange(Range: R, SM: SourceMgr, LangOpts);
600
601 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
602}
603
604TEST_F(LexerTest, FindNextToken) {
605 Lex(Source: "int abcd = 0;\n"
606 "// A comment.\n"
607 "int xyz = abcd;\n");
608 std::vector<std::string> GeneratedByNextToken;
609 SourceLocation Loc =
610 SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID());
611 while (true) {
612 auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts);
613 ASSERT_TRUE(T);
614 if (T->is(K: tok::eof))
615 break;
616 GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T));
617 Loc = T->getLocation();
618 }
619 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
620 "xyz", "=", "abcd", ";"));
621}
622
623TEST_F(LexerTest, FindNextTokenIncludingComments) {
624 Lex(Source: "int abcd = 0;\n"
625 "// A comment.\n"
626 "int xyz = abcd;\n");
627 std::vector<std::string> GeneratedByNextToken;
628 SourceLocation Loc =
629 SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID());
630 while (true) {
631 auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: true);
632 ASSERT_TRUE(T);
633 if (T->is(K: tok::eof))
634 break;
635 GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T));
636 Loc = T->getLocation();
637 }
638 EXPECT_THAT(GeneratedByNextToken,
639 ElementsAre("abcd", "=", "0", ";", "// A comment.", "int", "xyz",
640 "=", "abcd", ";"));
641}
642
643TEST_F(LexerTest, FindPreviousToken) {
644 Lex(Source: "int abcd = 0;\n"
645 "// A comment.\n"
646 "int xyz = abcd;\n");
647 std::vector<std::string> GeneratedByPrevToken;
648 SourceLocation Loc = SourceMgr.getLocForEndOfFile(FID: SourceMgr.getMainFileID());
649 while (true) {
650 auto T = Lexer::findPreviousToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: false);
651 if (!T.has_value())
652 break;
653 GeneratedByPrevToken.push_back(x: getSourceText(Begin: *T, End: *T));
654 Loc = Lexer::GetBeginningOfToken(Loc: T->getLocation(), SM: SourceMgr, LangOpts);
655 }
656 EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int",
657 ";", "0", "=", "abcd", "int"));
658}
659
660TEST_F(LexerTest, FindPreviousTokenIncludingComments) {
661 Lex(Source: "int abcd = 0;\n"
662 "// A comment.\n"
663 "int xyz = abcd;\n");
664 std::vector<std::string> GeneratedByPrevToken;
665 SourceLocation Loc = SourceMgr.getLocForEndOfFile(FID: SourceMgr.getMainFileID());
666 while (true) {
667 auto T = Lexer::findPreviousToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: true);
668 if (!T.has_value())
669 break;
670 GeneratedByPrevToken.push_back(x: getSourceText(Begin: *T, End: *T));
671 Loc = Lexer::GetBeginningOfToken(Loc: T->getLocation(), SM: SourceMgr, LangOpts);
672 }
673 EXPECT_THAT(GeneratedByPrevToken,
674 ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";",
675 "0", "=", "abcd", "int"));
676}
677
678TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
679 TrivialModuleLoader ModLoader;
680 auto PP = CreatePP(Source: "", ModLoader);
681 PP->LexTokensUntilEOF();
682 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
683 1U);
684}
685
686TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
687 const llvm::StringLiteral Source = R"cpp(
688 // First line comment.
689 //* Second line comment which is ambigious.
690 ; // Have a non-comment token to make sure something is lexed.
691 )cpp";
692 LangOpts.LineComment = false;
693 auto Toks = Lex(Source);
694 auto &SM = PP->getSourceManager();
695 auto SrcBuffer = SM.getBufferData(FID: SM.getMainFileID());
696 Lexer L(SM.getLocForStartOfFile(FID: SM.getMainFileID()), PP->getLangOpts(),
697 SrcBuffer.data(), SrcBuffer.data(),
698 SrcBuffer.data() + SrcBuffer.size());
699
700 auto ToksView = llvm::ArrayRef(Toks);
701 clang::Token T;
702 EXPECT_FALSE(ToksView.empty());
703 while (!L.LexFromRawLexer(Result&: T)) {
704 ASSERT_TRUE(!ToksView.empty());
705 EXPECT_EQ(T.getKind(), ToksView.front().getKind());
706 ToksView = ToksView.drop_front();
707 }
708 EXPECT_TRUE(ToksView.empty());
709}
710
711TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) {
712 const llvm::StringLiteral Source = R"cc(
713 #define ONE \
714 1
715
716 int i = ONE;
717 )cc";
718 std::vector<Token> Toks =
719 CheckLex(Source, ExpectedTokens: {tok::kw_int, tok::identifier, tok::equal,
720 tok::numeric_constant, tok::semi});
721
722 // Set up by getting the raw token for the `1` in the macro definition.
723 const Token &OneExpanded = Toks[3];
724 Token Tok;
725 ASSERT_FALSE(
726 Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts));
727 // The `ONE`.
728 ASSERT_EQ(Tok.getKind(), tok::raw_identifier);
729 ASSERT_FALSE(
730 Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()),
731 Tok, SourceMgr, LangOpts));
732 // The `1` in the macro definition.
733 ASSERT_EQ(Tok.getKind(), tok::numeric_constant);
734
735 // Go back 4 characters: two spaces, one newline, and the backslash.
736 SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(Offset: -4);
737 // Expect true (=failure) because the whitespace immediately after the
738 // escaped newline is not ignored.
739 EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts,
740 /*IgnoreWhiteSpace=*/false));
741}
742
743TEST(LexerPreambleTest, PreambleBounds) {
744 std::vector<std::string> Cases = {
745 R"cc([[
746 #include <foo>
747 ]]int bar;
748 )cc",
749 R"cc([[
750 #include <foo>
751 ]])cc",
752 R"cc([[
753 // leading comment
754 #include <foo>
755 ]]// trailing comment
756 int bar;
757 )cc",
758 R"cc([[
759 module;
760 #include <foo>
761 ]]module bar;
762 int x;
763 )cc",
764 };
765 for (const auto& Case : Cases) {
766 llvm::Annotations A(Case);
767 clang::LangOptions LangOpts;
768 LangOpts.CPlusPlusModules = true;
769 auto Bounds = Lexer::ComputePreamble(Buffer: A.code(), LangOpts);
770 EXPECT_EQ(Bounds.Size, A.range().End) << Case;
771 }
772}
773
774TEST_F(LexerTest, CheckFirstPPToken) {
775 LangOpts.CPlusPlusModules = true;
776 {
777 TrivialModuleLoader ModLoader;
778 auto PP = CreatePP(Source: "// This is a comment\n"
779 "int a;",
780 ModLoader);
781 Token Tok;
782 PP->Lex(Result&: Tok);
783 EXPECT_TRUE(Tok.is(tok::kw_int));
784 EXPECT_TRUE(PP->getMainFileFirstPPTokenLoc().isValid());
785 EXPECT_EQ(PP->getMainFileFirstPPTokenLoc(), Tok.getLocation());
786 }
787 {
788 TrivialModuleLoader ModLoader;
789 auto PP = CreatePP(Source: "// This is a comment\n"
790 "#define FOO int\n"
791 "FOO a;",
792 ModLoader);
793 Token Tok;
794 PP->Lex(Result&: Tok);
795 EXPECT_TRUE(Tok.is(tok::kw_int));
796 EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok,
797 PP->getSourceManager(), PP->getLangOpts(),
798 /*IgnoreWhiteSpace=*/false));
799 EXPECT_TRUE(PP->getMainFileFirstPPTokenLoc() == Tok.getLocation());
800 EXPECT_TRUE(Tok.is(tok::hash));
801 }
802
803 {
804 PreDefines = "#define FOO int\n";
805 TrivialModuleLoader ModLoader;
806 auto PP = CreatePP(Source: "// This is a comment\n"
807 "FOO a;",
808 ModLoader);
809 Token Tok;
810 PP->Lex(Result&: Tok);
811 EXPECT_TRUE(Tok.is(tok::kw_int));
812 EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok,
813 PP->getSourceManager(), PP->getLangOpts(),
814 /*IgnoreWhiteSpace=*/false));
815 EXPECT_TRUE(PP->getMainFileFirstPPTokenLoc() == Tok.getLocation());
816 EXPECT_TRUE(Tok.is(tok::raw_identifier));
817 EXPECT_TRUE(Tok.getRawIdentifier() == "FOO");
818 }
819}
820} // anonymous namespace
821

source code of clang/unittests/Lex/LexerTest.cpp