1//===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Lex/Lexer.h"
10#include "clang/Basic/Diagnostic.h"
11#include "clang/Basic/DiagnosticOptions.h"
12#include "clang/Basic/FileManager.h"
13#include "clang/Basic/LangOptions.h"
14#include "clang/Basic/SourceLocation.h"
15#include "clang/Basic/SourceManager.h"
16#include "clang/Basic/TargetInfo.h"
17#include "clang/Basic/TargetOptions.h"
18#include "clang/Basic/TokenKinds.h"
19#include "clang/Lex/HeaderSearch.h"
20#include "clang/Lex/HeaderSearchOptions.h"
21#include "clang/Lex/LiteralSupport.h"
22#include "clang/Lex/MacroArgs.h"
23#include "clang/Lex/MacroInfo.h"
24#include "clang/Lex/ModuleLoader.h"
25#include "clang/Lex/Preprocessor.h"
26#include "clang/Lex/PreprocessorOptions.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/Testing/Annotations/Annotations.h"
30#include "gmock/gmock.h"
31#include "gtest/gtest.h"
32#include <memory>
33#include <string>
34#include <vector>
35
36namespace {
37using namespace clang;
38using testing::ElementsAre;
39
40// The test fixture.
41class LexerTest : public ::testing::Test {
42protected:
43 LexerTest()
44 : FileMgr(FileMgrOpts),
45 DiagID(new DiagnosticIDs()),
46 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
47 SourceMgr(Diags, FileMgr),
48 TargetOpts(new TargetOptions)
49 {
50 TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
51 Target = TargetInfo::CreateTargetInfo(Diags, Opts: TargetOpts);
52 }
53
54 std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
55 TrivialModuleLoader &ModLoader) {
56 std::unique_ptr<llvm::MemoryBuffer> Buf =
57 llvm::MemoryBuffer::getMemBuffer(InputData: Source);
58 SourceMgr.setMainFileID(SourceMgr.createFileID(Buffer: std::move(Buf)));
59
60 HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
61 Diags, LangOpts, Target.get());
62 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
63 args: std::make_shared<PreprocessorOptions>(), args&: Diags, args&: LangOpts, args&: SourceMgr,
64 args&: HeaderInfo, args&: ModLoader,
65 /*IILookup =*/args: nullptr,
66 /*OwnsHeaderSearch =*/args: false);
67 PP->Initialize(Target: *Target);
68 PP->EnterMainSourceFile();
69 return PP;
70 }
71
72 std::vector<Token> Lex(StringRef Source) {
73 TrivialModuleLoader ModLoader;
74 PP = CreatePP(Source, ModLoader);
75
76 std::vector<Token> toks;
77 PP->LexTokensUntilEOF(Tokens: &toks);
78
79 return toks;
80 }
81
82 std::vector<Token> CheckLex(StringRef Source,
83 ArrayRef<tok::TokenKind> ExpectedTokens) {
84 auto toks = Lex(Source);
85 EXPECT_EQ(ExpectedTokens.size(), toks.size());
86 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
87 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
88 }
89
90 return toks;
91 }
92
93 std::string getSourceText(Token Begin, Token End) {
94 bool Invalid;
95 StringRef Str =
96 Lexer::getSourceText(Range: CharSourceRange::getTokenRange(R: SourceRange(
97 Begin.getLocation(), End.getLocation())),
98 SM: SourceMgr, LangOpts, Invalid: &Invalid);
99 if (Invalid)
100 return "<INVALID>";
101 return std::string(Str);
102 }
103
104 FileSystemOptions FileMgrOpts;
105 FileManager FileMgr;
106 IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
107 DiagnosticsEngine Diags;
108 SourceManager SourceMgr;
109 LangOptions LangOpts;
110 std::shared_ptr<TargetOptions> TargetOpts;
111 IntrusiveRefCntPtr<TargetInfo> Target;
112 std::unique_ptr<Preprocessor> PP;
113};
114
115TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
116 std::vector<tok::TokenKind> ExpectedTokens;
117 ExpectedTokens.push_back(x: tok::identifier);
118 ExpectedTokens.push_back(x: tok::l_paren);
119 ExpectedTokens.push_back(x: tok::identifier);
120 ExpectedTokens.push_back(x: tok::r_paren);
121
122 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
123 "M(f(M(i)))",
124 ExpectedTokens);
125
126 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
127}
128
129TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
130 std::vector<tok::TokenKind> ExpectedTokens;
131 ExpectedTokens.push_back(x: tok::identifier);
132 ExpectedTokens.push_back(x: tok::identifier);
133
134 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
135 "M(M(i) c)",
136 ExpectedTokens);
137
138 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
139}
140
141TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
142 std::vector<tok::TokenKind> ExpectedTokens;
143 ExpectedTokens.push_back(x: tok::identifier);
144 ExpectedTokens.push_back(x: tok::identifier);
145 ExpectedTokens.push_back(x: tok::identifier);
146
147 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
148 "M(c c M(i))",
149 ExpectedTokens);
150
151 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
152}
153
154TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
155 std::vector<tok::TokenKind> ExpectedTokens;
156 ExpectedTokens.push_back(x: tok::identifier);
157 ExpectedTokens.push_back(x: tok::identifier);
158 ExpectedTokens.push_back(x: tok::identifier);
159
160 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
161 "M(M(i) c c)",
162 ExpectedTokens);
163
164 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
165}
166
167TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
168 std::vector<tok::TokenKind> ExpectedTokens;
169 ExpectedTokens.push_back(x: tok::identifier);
170 ExpectedTokens.push_back(x: tok::identifier);
171 ExpectedTokens.push_back(x: tok::identifier);
172 ExpectedTokens.push_back(x: tok::identifier);
173
174 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
175 "M(c M(i)) M(M(i) c)",
176 ExpectedTokens);
177
178 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
179}
180
181TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
182 std::vector<tok::TokenKind> ExpectedTokens;
183 ExpectedTokens.push_back(x: tok::identifier);
184 ExpectedTokens.push_back(x: tok::l_paren);
185 ExpectedTokens.push_back(x: tok::identifier);
186 ExpectedTokens.push_back(x: tok::r_paren);
187
188 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
189 "#define C(x) M(x##c)\n"
190 "M(f(C(i)))",
191 ExpectedTokens);
192
193 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
194}
195
196TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
197 std::vector<tok::TokenKind> ExpectedTokens;
198 ExpectedTokens.push_back(x: tok::identifier);
199 ExpectedTokens.push_back(x: tok::l_paren);
200 ExpectedTokens.push_back(x: tok::identifier);
201 ExpectedTokens.push_back(x: tok::r_paren);
202
203 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
204 "f(M(M(i)))",
205 ExpectedTokens);
206 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
207}
208
209TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
210 std::vector<tok::TokenKind> ExpectedTokens;
211 ExpectedTokens.push_back(x: tok::identifier);
212 ExpectedTokens.push_back(x: tok::l_paren);
213 ExpectedTokens.push_back(x: tok::identifier);
214 ExpectedTokens.push_back(x: tok::r_paren);
215
216 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
217 "M(f(i))",
218 ExpectedTokens);
219 EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
220}
221
222TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
223 std::vector<tok::TokenKind> ExpectedTokens;
224 ExpectedTokens.push_back(x: tok::identifier);
225 ExpectedTokens.push_back(x: tok::l_paren);
226 ExpectedTokens.push_back(x: tok::identifier);
227 ExpectedTokens.push_back(x: tok::r_paren);
228
229 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
230 "#define C(x) x\n"
231 "f(C(M(i)))",
232 ExpectedTokens);
233 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
234}
235
236TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
237 std::vector<tok::TokenKind> ExpectedTokens;
238 ExpectedTokens.push_back(x: tok::identifier);
239 ExpectedTokens.push_back(x: tok::l_paren);
240 ExpectedTokens.push_back(x: tok::identifier);
241 ExpectedTokens.push_back(x: tok::identifier);
242 ExpectedTokens.push_back(x: tok::r_paren);
243
244 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
245 "#define C(x) c x\n"
246 "f(C(M(i)))",
247 ExpectedTokens);
248 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
249}
250
251TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
252 std::vector<tok::TokenKind> ExpectedTokens;
253 ExpectedTokens.push_back(x: tok::identifier);
254 ExpectedTokens.push_back(x: tok::identifier);
255 ExpectedTokens.push_back(x: tok::l_paren);
256 ExpectedTokens.push_back(x: tok::identifier);
257 ExpectedTokens.push_back(x: tok::r_paren);
258
259 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
260 "#define C(x) c M(x)\n"
261 "C(f(M(i)))",
262 ExpectedTokens);
263 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
264}
265
266TEST_F(LexerTest, LexAPI) {
267 std::vector<tok::TokenKind> ExpectedTokens;
268 // Line 1 (after the #defines)
269 ExpectedTokens.push_back(x: tok::l_square);
270 ExpectedTokens.push_back(x: tok::identifier);
271 ExpectedTokens.push_back(x: tok::r_square);
272 ExpectedTokens.push_back(x: tok::l_square);
273 ExpectedTokens.push_back(x: tok::identifier);
274 ExpectedTokens.push_back(x: tok::r_square);
275 // Line 2
276 ExpectedTokens.push_back(x: tok::identifier);
277 ExpectedTokens.push_back(x: tok::identifier);
278 ExpectedTokens.push_back(x: tok::identifier);
279 ExpectedTokens.push_back(x: tok::identifier);
280
281 std::vector<Token> toks = CheckLex(Source: "#define M(x) [x]\n"
282 "#define N(x) x\n"
283 "#define INN(x) x\n"
284 "#define NOF1 INN(val)\n"
285 "#define NOF2 val\n"
286 "M(foo) N([bar])\n"
287 "N(INN(val)) N(NOF1) N(NOF2) N(val)",
288 ExpectedTokens);
289
290 SourceLocation lsqrLoc = toks[0].getLocation();
291 SourceLocation idLoc = toks[1].getLocation();
292 SourceLocation rsqrLoc = toks[2].getLocation();
293 CharSourceRange macroRange = SourceMgr.getExpansionRange(Loc: lsqrLoc);
294
295 SourceLocation Loc;
296 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
297 EXPECT_EQ(Loc, macroRange.getBegin());
298 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
299 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
300 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
301 EXPECT_EQ(Loc, macroRange.getEnd());
302 EXPECT_TRUE(macroRange.isTokenRange());
303
304 CharSourceRange range = Lexer::makeFileCharRange(
305 Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: idLoc), SM: SourceMgr, LangOpts);
306 EXPECT_TRUE(range.isInvalid());
307 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: idLoc, E: rsqrLoc),
308 SM: SourceMgr, LangOpts);
309 EXPECT_TRUE(range.isInvalid());
310 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc),
311 SM: SourceMgr, LangOpts);
312 EXPECT_TRUE(!range.isTokenRange());
313 EXPECT_EQ(range.getAsRange(),
314 SourceRange(macroRange.getBegin(),
315 macroRange.getEnd().getLocWithOffset(1)));
316
317 StringRef text = Lexer::getSourceText(
318 Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc),
319 SM: SourceMgr, LangOpts);
320 EXPECT_EQ(text, "M(foo)");
321
322 SourceLocation macroLsqrLoc = toks[3].getLocation();
323 SourceLocation macroIdLoc = toks[4].getLocation();
324 SourceLocation macroRsqrLoc = toks[5].getLocation();
325 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(Loc: macroLsqrLoc);
326 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(Loc: macroIdLoc);
327 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(Loc: macroRsqrLoc);
328
329 range = Lexer::makeFileCharRange(
330 Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroIdLoc),
331 SM: SourceMgr, LangOpts);
332 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
333 range.getAsRange());
334
335 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: macroIdLoc, E: macroRsqrLoc),
336 SM: SourceMgr, LangOpts);
337 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
338 range.getAsRange());
339
340 macroRange = SourceMgr.getExpansionRange(Loc: macroLsqrLoc);
341 range = Lexer::makeFileCharRange(
342 Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroRsqrLoc),
343 SM: SourceMgr, LangOpts);
344 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
345 range.getAsRange());
346
347 text = Lexer::getSourceText(
348 Range: CharSourceRange::getTokenRange(R: SourceRange(macroLsqrLoc, macroIdLoc)),
349 SM: SourceMgr, LangOpts);
350 EXPECT_EQ(text, "[bar");
351
352
353 SourceLocation idLoc1 = toks[6].getLocation();
354 SourceLocation idLoc2 = toks[7].getLocation();
355 SourceLocation idLoc3 = toks[8].getLocation();
356 SourceLocation idLoc4 = toks[9].getLocation();
357 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
358 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
359 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
360 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
361}
362
363TEST_F(LexerTest, HandlesSplitTokens) {
364 std::vector<tok::TokenKind> ExpectedTokens;
365 // Line 1 (after the #defines)
366 ExpectedTokens.push_back(x: tok::identifier);
367 ExpectedTokens.push_back(x: tok::less);
368 ExpectedTokens.push_back(x: tok::identifier);
369 ExpectedTokens.push_back(x: tok::less);
370 ExpectedTokens.push_back(x: tok::greatergreater);
371 // Line 2
372 ExpectedTokens.push_back(x: tok::identifier);
373 ExpectedTokens.push_back(x: tok::less);
374 ExpectedTokens.push_back(x: tok::identifier);
375 ExpectedTokens.push_back(x: tok::less);
376 ExpectedTokens.push_back(x: tok::greatergreater);
377
378 std::vector<Token> toks = CheckLex(Source: "#define TY ty\n"
379 "#define RANGLE ty<ty<>>\n"
380 "TY<ty<>>\n"
381 "RANGLE",
382 ExpectedTokens);
383
384 SourceLocation outerTyLoc = toks[0].getLocation();
385 SourceLocation innerTyLoc = toks[2].getLocation();
386 SourceLocation gtgtLoc = toks[4].getLocation();
387 // Split the token to simulate the action of the parser and force creation of
388 // an `ExpansionTokenRange`.
389 SourceLocation rangleLoc = PP->SplitToken(TokLoc: gtgtLoc, Length: 1);
390
391 // Verify that it only captures the first greater-then and not the second one.
392 CharSourceRange range = Lexer::makeFileCharRange(
393 Range: CharSourceRange::getTokenRange(B: innerTyLoc, E: rangleLoc), SM: SourceMgr,
394 LangOpts);
395 EXPECT_TRUE(range.isCharRange());
396 EXPECT_EQ(range.getAsRange(),
397 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
398
399 // Verify case where range begins in a macro expansion.
400 range = Lexer::makeFileCharRange(
401 Range: CharSourceRange::getTokenRange(B: outerTyLoc, E: rangleLoc), SM: SourceMgr,
402 LangOpts);
403 EXPECT_TRUE(range.isCharRange());
404 EXPECT_EQ(range.getAsRange(),
405 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
406 gtgtLoc.getLocWithOffset(1)));
407
408 SourceLocation macroInnerTyLoc = toks[7].getLocation();
409 SourceLocation macroGtgtLoc = toks[9].getLocation();
410 // Split the token to simulate the action of the parser and force creation of
411 // an `ExpansionTokenRange`.
412 SourceLocation macroRAngleLoc = PP->SplitToken(TokLoc: macroGtgtLoc, Length: 1);
413
414 // Verify that it fails (because it only captures the first greater-then and
415 // not the second one, so it doesn't span the entire macro expansion).
416 range = Lexer::makeFileCharRange(
417 Range: CharSourceRange::getTokenRange(B: macroInnerTyLoc, E: macroRAngleLoc),
418 SM: SourceMgr, LangOpts);
419 EXPECT_TRUE(range.isInvalid());
420}
421
422TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
423 std::vector<Token> toks =
424 Lex(Source: "#define helper1 0\n"
425 "void helper2(const char *, ...);\n"
426 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
427 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
428 "void f1() { M2(\"a\", \"b\"); }");
429
430 // Check the file corresponding to the "helper1" macro arg in M2.
431 //
432 // The lexer used to report its size as 31, meaning that the end of the
433 // expansion would be on the *next line* (just past `M2("a", "b")`). Make
434 // sure that we get the correct end location (the comma after "helper1").
435 SourceLocation helper1ArgLoc = toks[20].getLocation();
436 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
437}
438
439TEST_F(LexerTest, DontOverallocateStringifyArgs) {
440 TrivialModuleLoader ModLoader;
441 auto PP = CreatePP(Source: "\"StrArg\", 5, 'C'", ModLoader);
442
443 llvm::BumpPtrAllocator Allocator;
444 std::array<IdentifierInfo *, 3> ParamList;
445 MacroInfo *MI = PP->AllocateMacroInfo(L: {});
446 MI->setIsFunctionLike();
447 MI->setParameterList(List: ParamList, PPAllocator&: Allocator);
448 EXPECT_EQ(3u, MI->getNumParams());
449 EXPECT_TRUE(MI->isFunctionLike());
450
451 Token Eof;
452 Eof.setKind(tok::eof);
453 std::vector<Token> ArgTokens;
454 while (1) {
455 Token tok;
456 PP->Lex(Result&: tok);
457 if (tok.is(K: tok::eof)) {
458 ArgTokens.push_back(x: Eof);
459 break;
460 }
461 if (tok.is(K: tok::comma))
462 ArgTokens.push_back(x: Eof);
463 else
464 ArgTokens.push_back(x: tok);
465 }
466
467 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(PP&: *PP); };
468 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
469 MacroArgs::create(MI, UnexpArgTokens: ArgTokens, VarargsElided: false, PP&: *PP), MacroArgsDeleter);
470 auto StringifyArg = [&](int ArgNo) {
471 return MA->StringifyArgument(ArgToks: MA->getUnexpArgument(Arg: ArgNo), PP&: *PP,
472 /*Charify=*/false, ExpansionLocStart: {}, ExpansionLocEnd: {});
473 };
474 Token Result = StringifyArg(0);
475 EXPECT_EQ(tok::string_literal, Result.getKind());
476 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
477 Result = StringifyArg(1);
478 EXPECT_EQ(tok::string_literal, Result.getKind());
479 EXPECT_STREQ("\"5\"", Result.getLiteralData());
480 Result = StringifyArg(2);
481 EXPECT_EQ(tok::string_literal, Result.getKind());
482 EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
483#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
484 EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
485#endif
486}
487
488TEST_F(LexerTest, IsNewLineEscapedValid) {
489 auto hasNewLineEscaped = [](const char *S) {
490 return Lexer::isNewLineEscaped(BufferStart: S, Str: S + strlen(s: S) - 1);
491 };
492
493 EXPECT_TRUE(hasNewLineEscaped("\\\r"));
494 EXPECT_TRUE(hasNewLineEscaped("\\\n"));
495 EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
496 EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
497 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
498 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
499
500 EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
501 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
502 EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
503 EXPECT_FALSE(hasNewLineEscaped("\r"));
504 EXPECT_FALSE(hasNewLineEscaped("\n"));
505 EXPECT_FALSE(hasNewLineEscaped("\r\n"));
506 EXPECT_FALSE(hasNewLineEscaped("\n\r"));
507 EXPECT_FALSE(hasNewLineEscaped("\r\r"));
508 EXPECT_FALSE(hasNewLineEscaped("\n\n"));
509}
510
511TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
512 // Each line should have the same length for
513 // further offset calculation to be more straightforward.
514 const unsigned IdentifierLength = 8;
515 std::string TextToLex = "rabarbar\n"
516 "foo\\\nbar\n"
517 "foo\\\rbar\n"
518 "fo\\\r\nbar\n"
519 "foo\\\n\rba\n";
520 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
521 std::vector<Token> LexedTokens = CheckLex(Source: TextToLex, ExpectedTokens);
522
523 for (const Token &Tok : LexedTokens) {
524 std::pair<FileID, unsigned> OriginalLocation =
525 SourceMgr.getDecomposedLoc(Loc: Tok.getLocation());
526 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
527 SourceLocation LookupLocation =
528 Tok.getLocation().getLocWithOffset(Offset);
529
530 std::pair<FileID, unsigned> FoundLocation =
531 SourceMgr.getDecomposedExpansionLoc(
532 Loc: Lexer::GetBeginningOfToken(Loc: LookupLocation, SM: SourceMgr, LangOpts));
533
534 // Check that location returned by the GetBeginningOfToken
535 // is the same as original token location reported by Lexer.
536 EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
537 }
538 }
539}
540
541TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
542 EXPECT_TRUE(Lex(" // \\\n").empty());
543 EXPECT_TRUE(Lex("#include <\\\\").empty());
544 EXPECT_TRUE(Lex("#include <\\\\\n").empty());
545}
546
547TEST_F(LexerTest, StringizingRasString) {
548 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
549 std::string String1 = R"(foo
550 {"bar":[]}
551 baz)";
552 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
553 SmallString<128> String2;
554 String2 += String1.c_str();
555
556 // Corner cases.
557 std::string String3 = R"(\
558 \n
559 \\n
560 \\)";
561 SmallString<128> String4;
562 String4 += String3.c_str();
563 std::string String5 = R"(a\
564
565
566 \\b)";
567 SmallString<128> String6;
568 String6 += String5.c_str();
569
570 String1 = Lexer::Stringify(Str: StringRef(String1));
571 Lexer::Stringify(Str&: String2);
572 String3 = Lexer::Stringify(Str: StringRef(String3));
573 Lexer::Stringify(Str&: String4);
574 String5 = Lexer::Stringify(Str: StringRef(String5));
575 Lexer::Stringify(Str&: String6);
576
577 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
578 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
579 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)");
580 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)");
581 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)");
582 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)");
583}
584
585TEST_F(LexerTest, CharRangeOffByOne) {
586 std::vector<Token> toks = Lex(Source: R"(#define MOO 1
587 void foo() { MOO; })");
588 const Token &moo = toks[5];
589
590 EXPECT_EQ(getSourceText(moo, moo), "MOO");
591
592 SourceRange R{moo.getLocation(), moo.getLocation()};
593
594 EXPECT_TRUE(
595 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
596 EXPECT_TRUE(
597 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
598
599 CharSourceRange CR = Lexer::getAsCharRange(Range: R, SM: SourceMgr, LangOpts);
600
601 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
602}
603
604TEST_F(LexerTest, FindNextToken) {
605 Lex(Source: "int abcd = 0;\n"
606 "int xyz = abcd;\n");
607 std::vector<std::string> GeneratedByNextToken;
608 SourceLocation Loc =
609 SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID());
610 while (true) {
611 auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts);
612 ASSERT_TRUE(T);
613 if (T->is(K: tok::eof))
614 break;
615 GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T));
616 Loc = T->getLocation();
617 }
618 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
619 "xyz", "=", "abcd", ";"));
620}
621
622TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
623 TrivialModuleLoader ModLoader;
624 auto PP = CreatePP(Source: "", ModLoader);
625 PP->LexTokensUntilEOF();
626 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
627 1U);
628}
629
630TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
631 const llvm::StringLiteral Source = R"cpp(
632 // First line comment.
633 //* Second line comment which is ambigious.
634 ; // Have a non-comment token to make sure something is lexed.
635 )cpp";
636 LangOpts.LineComment = false;
637 auto Toks = Lex(Source);
638 auto &SM = PP->getSourceManager();
639 auto SrcBuffer = SM.getBufferData(FID: SM.getMainFileID());
640 Lexer L(SM.getLocForStartOfFile(FID: SM.getMainFileID()), PP->getLangOpts(),
641 SrcBuffer.data(), SrcBuffer.data(),
642 SrcBuffer.data() + SrcBuffer.size());
643
644 auto ToksView = llvm::ArrayRef(Toks);
645 clang::Token T;
646 EXPECT_FALSE(ToksView.empty());
647 while (!L.LexFromRawLexer(Result&: T)) {
648 ASSERT_TRUE(!ToksView.empty());
649 EXPECT_EQ(T.getKind(), ToksView.front().getKind());
650 ToksView = ToksView.drop_front();
651 }
652 EXPECT_TRUE(ToksView.empty());
653}
654
655TEST(LexerPreambleTest, PreambleBounds) {
656 std::vector<std::string> Cases = {
657 R"cc([[
658 #include <foo>
659 ]]int bar;
660 )cc",
661 R"cc([[
662 #include <foo>
663 ]])cc",
664 R"cc([[
665 // leading comment
666 #include <foo>
667 ]]// trailing comment
668 int bar;
669 )cc",
670 R"cc([[
671 module;
672 #include <foo>
673 ]]module bar;
674 int x;
675 )cc",
676 };
677 for (const auto& Case : Cases) {
678 llvm::Annotations A(Case);
679 clang::LangOptions LangOpts;
680 LangOpts.CPlusPlusModules = true;
681 auto Bounds = Lexer::ComputePreamble(Buffer: A.code(), LangOpts);
682 EXPECT_EQ(Bounds.Size, A.range().End) << Case;
683 }
684}
685
686} // anonymous namespace
687

source code of clang/unittests/Lex/LexerTest.cpp