1//===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Lex/Lexer.h"
10#include "clang/Basic/Diagnostic.h"
11#include "clang/Basic/DiagnosticOptions.h"
12#include "clang/Basic/FileManager.h"
13#include "clang/Basic/LangOptions.h"
14#include "clang/Basic/SourceLocation.h"
15#include "clang/Basic/SourceManager.h"
16#include "clang/Basic/TargetInfo.h"
17#include "clang/Basic/TargetOptions.h"
18#include "clang/Basic/TokenKinds.h"
19#include "clang/Lex/HeaderSearch.h"
20#include "clang/Lex/HeaderSearchOptions.h"
21#include "clang/Lex/LiteralSupport.h"
22#include "clang/Lex/MacroArgs.h"
23#include "clang/Lex/MacroInfo.h"
24#include "clang/Lex/ModuleLoader.h"
25#include "clang/Lex/Preprocessor.h"
26#include "clang/Lex/PreprocessorOptions.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/Testing/Annotations/Annotations.h"
30#include "gmock/gmock.h"
31#include "gtest/gtest.h"
32#include <memory>
33#include <string>
34#include <vector>
35
36namespace {
37using namespace clang;
38using testing::ElementsAre;
39
40// The test fixture.
41class LexerTest : public ::testing::Test {
42protected:
43 LexerTest()
44 : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
45 Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
46 SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
47 TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
48 Target = TargetInfo::CreateTargetInfo(Diags, Opts&: *TargetOpts);
49 }
50
51 std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
52 TrivialModuleLoader &ModLoader) {
53 std::unique_ptr<llvm::MemoryBuffer> Buf =
54 llvm::MemoryBuffer::getMemBuffer(InputData: Source);
55 SourceMgr.setMainFileID(SourceMgr.createFileID(Buffer: std::move(Buf)));
56
57 HeaderSearchOptions HSOpts;
58 HeaderSearch HeaderInfo(HSOpts, SourceMgr, Diags, LangOpts, Target.get());
59 PreprocessorOptions PPOpts;
60 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
61 args&: PPOpts, args&: Diags, args&: LangOpts, args&: SourceMgr, args&: HeaderInfo, args&: ModLoader,
62 /*IILookup =*/args: nullptr,
63 /*OwnsHeaderSearch =*/args: false);
64 PP->Initialize(Target: *Target);
65 PP->EnterMainSourceFile();
66 return PP;
67 }
68
69 std::vector<Token> Lex(StringRef Source) {
70 TrivialModuleLoader ModLoader;
71 PP = CreatePP(Source, ModLoader);
72
73 std::vector<Token> toks;
74 PP->LexTokensUntilEOF(Tokens: &toks);
75
76 return toks;
77 }
78
79 std::vector<Token> CheckLex(StringRef Source,
80 ArrayRef<tok::TokenKind> ExpectedTokens) {
81 auto toks = Lex(Source);
82 EXPECT_EQ(ExpectedTokens.size(), toks.size());
83 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
84 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
85 }
86
87 return toks;
88 }
89
90 std::string getSourceText(Token Begin, Token End) {
91 bool Invalid;
92 StringRef Str =
93 Lexer::getSourceText(Range: CharSourceRange::getTokenRange(R: SourceRange(
94 Begin.getLocation(), End.getLocation())),
95 SM: SourceMgr, LangOpts, Invalid: &Invalid);
96 if (Invalid)
97 return "<INVALID>";
98 return std::string(Str);
99 }
100
101 FileSystemOptions FileMgrOpts;
102 FileManager FileMgr;
103 IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
104 DiagnosticOptions DiagOpts;
105 DiagnosticsEngine Diags;
106 SourceManager SourceMgr;
107 LangOptions LangOpts;
108 std::shared_ptr<TargetOptions> TargetOpts;
109 IntrusiveRefCntPtr<TargetInfo> Target;
110 std::unique_ptr<Preprocessor> PP;
111};
112
113TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
114 std::vector<tok::TokenKind> ExpectedTokens;
115 ExpectedTokens.push_back(x: tok::identifier);
116 ExpectedTokens.push_back(x: tok::l_paren);
117 ExpectedTokens.push_back(x: tok::identifier);
118 ExpectedTokens.push_back(x: tok::r_paren);
119
120 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
121 "M(f(M(i)))",
122 ExpectedTokens);
123
124 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
125}
126
127TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
128 std::vector<tok::TokenKind> ExpectedTokens;
129 ExpectedTokens.push_back(x: tok::identifier);
130 ExpectedTokens.push_back(x: tok::identifier);
131
132 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
133 "M(M(i) c)",
134 ExpectedTokens);
135
136 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
137}
138
139TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
140 std::vector<tok::TokenKind> ExpectedTokens;
141 ExpectedTokens.push_back(x: tok::identifier);
142 ExpectedTokens.push_back(x: tok::identifier);
143 ExpectedTokens.push_back(x: tok::identifier);
144
145 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
146 "M(c c M(i))",
147 ExpectedTokens);
148
149 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
150}
151
152TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
153 std::vector<tok::TokenKind> ExpectedTokens;
154 ExpectedTokens.push_back(x: tok::identifier);
155 ExpectedTokens.push_back(x: tok::identifier);
156 ExpectedTokens.push_back(x: tok::identifier);
157
158 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
159 "M(M(i) c c)",
160 ExpectedTokens);
161
162 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
163}
164
165TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
166 std::vector<tok::TokenKind> ExpectedTokens;
167 ExpectedTokens.push_back(x: tok::identifier);
168 ExpectedTokens.push_back(x: tok::identifier);
169 ExpectedTokens.push_back(x: tok::identifier);
170 ExpectedTokens.push_back(x: tok::identifier);
171
172 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
173 "M(c M(i)) M(M(i) c)",
174 ExpectedTokens);
175
176 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
177}
178
179TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
180 std::vector<tok::TokenKind> ExpectedTokens;
181 ExpectedTokens.push_back(x: tok::identifier);
182 ExpectedTokens.push_back(x: tok::l_paren);
183 ExpectedTokens.push_back(x: tok::identifier);
184 ExpectedTokens.push_back(x: tok::r_paren);
185
186 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
187 "#define C(x) M(x##c)\n"
188 "M(f(C(i)))",
189 ExpectedTokens);
190
191 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
192}
193
194TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
195 std::vector<tok::TokenKind> ExpectedTokens;
196 ExpectedTokens.push_back(x: tok::identifier);
197 ExpectedTokens.push_back(x: tok::l_paren);
198 ExpectedTokens.push_back(x: tok::identifier);
199 ExpectedTokens.push_back(x: tok::r_paren);
200
201 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
202 "f(M(M(i)))",
203 ExpectedTokens);
204 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
205}
206
207TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
208 std::vector<tok::TokenKind> ExpectedTokens;
209 ExpectedTokens.push_back(x: tok::identifier);
210 ExpectedTokens.push_back(x: tok::l_paren);
211 ExpectedTokens.push_back(x: tok::identifier);
212 ExpectedTokens.push_back(x: tok::r_paren);
213
214 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
215 "M(f(i))",
216 ExpectedTokens);
217 EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
218}
219
220TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
221 std::vector<tok::TokenKind> ExpectedTokens;
222 ExpectedTokens.push_back(x: tok::identifier);
223 ExpectedTokens.push_back(x: tok::l_paren);
224 ExpectedTokens.push_back(x: tok::identifier);
225 ExpectedTokens.push_back(x: tok::r_paren);
226
227 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
228 "#define C(x) x\n"
229 "f(C(M(i)))",
230 ExpectedTokens);
231 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
232}
233
234TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
235 std::vector<tok::TokenKind> ExpectedTokens;
236 ExpectedTokens.push_back(x: tok::identifier);
237 ExpectedTokens.push_back(x: tok::l_paren);
238 ExpectedTokens.push_back(x: tok::identifier);
239 ExpectedTokens.push_back(x: tok::identifier);
240 ExpectedTokens.push_back(x: tok::r_paren);
241
242 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
243 "#define C(x) c x\n"
244 "f(C(M(i)))",
245 ExpectedTokens);
246 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
247}
248
249TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
250 std::vector<tok::TokenKind> ExpectedTokens;
251 ExpectedTokens.push_back(x: tok::identifier);
252 ExpectedTokens.push_back(x: tok::identifier);
253 ExpectedTokens.push_back(x: tok::l_paren);
254 ExpectedTokens.push_back(x: tok::identifier);
255 ExpectedTokens.push_back(x: tok::r_paren);
256
257 std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
258 "#define C(x) c M(x)\n"
259 "C(f(M(i)))",
260 ExpectedTokens);
261 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
262}
263
264TEST_F(LexerTest, LexAPI) {
265 std::vector<tok::TokenKind> ExpectedTokens;
266 // Line 1 (after the #defines)
267 ExpectedTokens.push_back(x: tok::l_square);
268 ExpectedTokens.push_back(x: tok::identifier);
269 ExpectedTokens.push_back(x: tok::r_square);
270 ExpectedTokens.push_back(x: tok::l_square);
271 ExpectedTokens.push_back(x: tok::identifier);
272 ExpectedTokens.push_back(x: tok::r_square);
273 // Line 2
274 ExpectedTokens.push_back(x: tok::identifier);
275 ExpectedTokens.push_back(x: tok::identifier);
276 ExpectedTokens.push_back(x: tok::identifier);
277 ExpectedTokens.push_back(x: tok::identifier);
278
279 std::vector<Token> toks = CheckLex(Source: "#define M(x) [x]\n"
280 "#define N(x) x\n"
281 "#define INN(x) x\n"
282 "#define NOF1 INN(val)\n"
283 "#define NOF2 val\n"
284 "M(foo) N([bar])\n"
285 "N(INN(val)) N(NOF1) N(NOF2) N(val)",
286 ExpectedTokens);
287
288 SourceLocation lsqrLoc = toks[0].getLocation();
289 SourceLocation idLoc = toks[1].getLocation();
290 SourceLocation rsqrLoc = toks[2].getLocation();
291 CharSourceRange macroRange = SourceMgr.getExpansionRange(Loc: lsqrLoc);
292
293 SourceLocation Loc;
294 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
295 EXPECT_EQ(Loc, macroRange.getBegin());
296 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
297 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
298 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
299 EXPECT_EQ(Loc, macroRange.getEnd());
300 EXPECT_TRUE(macroRange.isTokenRange());
301
302 CharSourceRange range = Lexer::makeFileCharRange(
303 Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: idLoc), SM: SourceMgr, LangOpts);
304 EXPECT_TRUE(range.isInvalid());
305 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: idLoc, E: rsqrLoc),
306 SM: SourceMgr, LangOpts);
307 EXPECT_TRUE(range.isInvalid());
308 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc),
309 SM: SourceMgr, LangOpts);
310 EXPECT_TRUE(!range.isTokenRange());
311 EXPECT_EQ(range.getAsRange(),
312 SourceRange(macroRange.getBegin(),
313 macroRange.getEnd().getLocWithOffset(1)));
314
315 StringRef text = Lexer::getSourceText(
316 Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc),
317 SM: SourceMgr, LangOpts);
318 EXPECT_EQ(text, "M(foo)");
319
320 SourceLocation macroLsqrLoc = toks[3].getLocation();
321 SourceLocation macroIdLoc = toks[4].getLocation();
322 SourceLocation macroRsqrLoc = toks[5].getLocation();
323 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(Loc: macroLsqrLoc);
324 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(Loc: macroIdLoc);
325 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(Loc: macroRsqrLoc);
326
327 range = Lexer::makeFileCharRange(
328 Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroIdLoc),
329 SM: SourceMgr, LangOpts);
330 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
331 range.getAsRange());
332
333 range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: macroIdLoc, E: macroRsqrLoc),
334 SM: SourceMgr, LangOpts);
335 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
336 range.getAsRange());
337
338 macroRange = SourceMgr.getExpansionRange(Loc: macroLsqrLoc);
339 range = Lexer::makeFileCharRange(
340 Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroRsqrLoc),
341 SM: SourceMgr, LangOpts);
342 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
343 range.getAsRange());
344
345 text = Lexer::getSourceText(
346 Range: CharSourceRange::getTokenRange(R: SourceRange(macroLsqrLoc, macroIdLoc)),
347 SM: SourceMgr, LangOpts);
348 EXPECT_EQ(text, "[bar");
349
350
351 SourceLocation idLoc1 = toks[6].getLocation();
352 SourceLocation idLoc2 = toks[7].getLocation();
353 SourceLocation idLoc3 = toks[8].getLocation();
354 SourceLocation idLoc4 = toks[9].getLocation();
355 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
356 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
357 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
358 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
359}
360
361TEST_F(LexerTest, HandlesSplitTokens) {
362 std::vector<tok::TokenKind> ExpectedTokens;
363 // Line 1 (after the #defines)
364 ExpectedTokens.push_back(x: tok::identifier);
365 ExpectedTokens.push_back(x: tok::less);
366 ExpectedTokens.push_back(x: tok::identifier);
367 ExpectedTokens.push_back(x: tok::less);
368 ExpectedTokens.push_back(x: tok::greatergreater);
369 // Line 2
370 ExpectedTokens.push_back(x: tok::identifier);
371 ExpectedTokens.push_back(x: tok::less);
372 ExpectedTokens.push_back(x: tok::identifier);
373 ExpectedTokens.push_back(x: tok::less);
374 ExpectedTokens.push_back(x: tok::greatergreater);
375
376 std::vector<Token> toks = CheckLex(Source: "#define TY ty\n"
377 "#define RANGLE ty<ty<>>\n"
378 "TY<ty<>>\n"
379 "RANGLE",
380 ExpectedTokens);
381
382 SourceLocation outerTyLoc = toks[0].getLocation();
383 SourceLocation innerTyLoc = toks[2].getLocation();
384 SourceLocation gtgtLoc = toks[4].getLocation();
385 // Split the token to simulate the action of the parser and force creation of
386 // an `ExpansionTokenRange`.
387 SourceLocation rangleLoc = PP->SplitToken(TokLoc: gtgtLoc, Length: 1);
388
389 // Verify that it only captures the first greater-then and not the second one.
390 CharSourceRange range = Lexer::makeFileCharRange(
391 Range: CharSourceRange::getTokenRange(B: innerTyLoc, E: rangleLoc), SM: SourceMgr,
392 LangOpts);
393 EXPECT_TRUE(range.isCharRange());
394 EXPECT_EQ(range.getAsRange(),
395 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
396
397 // Verify case where range begins in a macro expansion.
398 range = Lexer::makeFileCharRange(
399 Range: CharSourceRange::getTokenRange(B: outerTyLoc, E: rangleLoc), SM: SourceMgr,
400 LangOpts);
401 EXPECT_TRUE(range.isCharRange());
402 EXPECT_EQ(range.getAsRange(),
403 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
404 gtgtLoc.getLocWithOffset(1)));
405
406 SourceLocation macroInnerTyLoc = toks[7].getLocation();
407 SourceLocation macroGtgtLoc = toks[9].getLocation();
408 // Split the token to simulate the action of the parser and force creation of
409 // an `ExpansionTokenRange`.
410 SourceLocation macroRAngleLoc = PP->SplitToken(TokLoc: macroGtgtLoc, Length: 1);
411
412 // Verify that it fails (because it only captures the first greater-then and
413 // not the second one, so it doesn't span the entire macro expansion).
414 range = Lexer::makeFileCharRange(
415 Range: CharSourceRange::getTokenRange(B: macroInnerTyLoc, E: macroRAngleLoc),
416 SM: SourceMgr, LangOpts);
417 EXPECT_TRUE(range.isInvalid());
418}
419
420TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
421 std::vector<Token> toks =
422 Lex(Source: "#define helper1 0\n"
423 "void helper2(const char *, ...);\n"
424 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
425 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
426 "void f1() { M2(\"a\", \"b\"); }");
427
428 // Check the file corresponding to the "helper1" macro arg in M2.
429 //
430 // The lexer used to report its size as 31, meaning that the end of the
431 // expansion would be on the *next line* (just past `M2("a", "b")`). Make
432 // sure that we get the correct end location (the comma after "helper1").
433 SourceLocation helper1ArgLoc = toks[20].getLocation();
434 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
435}
436
437TEST_F(LexerTest, DontOverallocateStringifyArgs) {
438 TrivialModuleLoader ModLoader;
439 auto PP = CreatePP(Source: "\"StrArg\", 5, 'C'", ModLoader);
440
441 llvm::BumpPtrAllocator Allocator;
442 std::array<IdentifierInfo *, 3> ParamList;
443 MacroInfo *MI = PP->AllocateMacroInfo(L: {});
444 MI->setIsFunctionLike();
445 MI->setParameterList(List: ParamList, PPAllocator&: Allocator);
446 EXPECT_EQ(3u, MI->getNumParams());
447 EXPECT_TRUE(MI->isFunctionLike());
448
449 Token Eof;
450 Eof.setKind(tok::eof);
451 std::vector<Token> ArgTokens;
452 while (1) {
453 Token tok;
454 PP->Lex(Result&: tok);
455 if (tok.is(K: tok::eof)) {
456 ArgTokens.push_back(x: Eof);
457 break;
458 }
459 if (tok.is(K: tok::comma))
460 ArgTokens.push_back(x: Eof);
461 else
462 ArgTokens.push_back(x: tok);
463 }
464
465 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(PP&: *PP); };
466 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
467 MacroArgs::create(MI, UnexpArgTokens: ArgTokens, VarargsElided: false, PP&: *PP), MacroArgsDeleter);
468 auto StringifyArg = [&](int ArgNo) {
469 return MA->StringifyArgument(ArgToks: MA->getUnexpArgument(Arg: ArgNo), PP&: *PP,
470 /*Charify=*/false, ExpansionLocStart: {}, ExpansionLocEnd: {});
471 };
472 Token Result = StringifyArg(0);
473 EXPECT_EQ(tok::string_literal, Result.getKind());
474 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
475 Result = StringifyArg(1);
476 EXPECT_EQ(tok::string_literal, Result.getKind());
477 EXPECT_STREQ("\"5\"", Result.getLiteralData());
478 Result = StringifyArg(2);
479 EXPECT_EQ(tok::string_literal, Result.getKind());
480 EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
481#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
482 EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
483#endif
484}
485
486TEST_F(LexerTest, IsNewLineEscapedValid) {
487 auto hasNewLineEscaped = [](const char *S) {
488 return Lexer::isNewLineEscaped(BufferStart: S, Str: S + strlen(s: S) - 1);
489 };
490
491 EXPECT_TRUE(hasNewLineEscaped("\\\r"));
492 EXPECT_TRUE(hasNewLineEscaped("\\\n"));
493 EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
494 EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
495 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
496 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
497
498 EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
499 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
500 EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
501 EXPECT_FALSE(hasNewLineEscaped("\r"));
502 EXPECT_FALSE(hasNewLineEscaped("\n"));
503 EXPECT_FALSE(hasNewLineEscaped("\r\n"));
504 EXPECT_FALSE(hasNewLineEscaped("\n\r"));
505 EXPECT_FALSE(hasNewLineEscaped("\r\r"));
506 EXPECT_FALSE(hasNewLineEscaped("\n\n"));
507}
508
509TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
510 // Each line should have the same length for
511 // further offset calculation to be more straightforward.
512 const unsigned IdentifierLength = 8;
513 std::string TextToLex = "rabarbar\n"
514 "foo\\\nbar\n"
515 "foo\\\rbar\n"
516 "fo\\\r\nbar\n"
517 "foo\\\n\rba\n";
518 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
519 std::vector<Token> LexedTokens = CheckLex(Source: TextToLex, ExpectedTokens);
520
521 for (const Token &Tok : LexedTokens) {
522 std::pair<FileID, unsigned> OriginalLocation =
523 SourceMgr.getDecomposedLoc(Loc: Tok.getLocation());
524 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
525 SourceLocation LookupLocation =
526 Tok.getLocation().getLocWithOffset(Offset);
527
528 std::pair<FileID, unsigned> FoundLocation =
529 SourceMgr.getDecomposedExpansionLoc(
530 Loc: Lexer::GetBeginningOfToken(Loc: LookupLocation, SM: SourceMgr, LangOpts));
531
532 // Check that location returned by the GetBeginningOfToken
533 // is the same as original token location reported by Lexer.
534 EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
535 }
536 }
537}
538
539TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
540 EXPECT_TRUE(Lex(" // \\\n").empty());
541 EXPECT_TRUE(Lex("#include <\\\\").empty());
542 EXPECT_TRUE(Lex("#include <\\\\\n").empty());
543}
544
545TEST_F(LexerTest, StringizingRasString) {
546 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
547 std::string String1 = R"(foo
548 {"bar":[]}
549 baz)";
550 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
551 SmallString<128> String2;
552 String2 += String1.c_str();
553
554 // Corner cases.
555 std::string String3 = R"(\
556 \n
557 \\n
558 \\)";
559 SmallString<128> String4;
560 String4 += String3.c_str();
561 std::string String5 = R"(a\
562
563
564 \\b)";
565 SmallString<128> String6;
566 String6 += String5.c_str();
567
568 String1 = Lexer::Stringify(Str: StringRef(String1));
569 Lexer::Stringify(Str&: String2);
570 String3 = Lexer::Stringify(Str: StringRef(String3));
571 Lexer::Stringify(Str&: String4);
572 String5 = Lexer::Stringify(Str: StringRef(String5));
573 Lexer::Stringify(Str&: String6);
574
575 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
576 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
577 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)");
578 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)");
579 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)");
580 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)");
581}
582
583TEST_F(LexerTest, CharRangeOffByOne) {
584 std::vector<Token> toks = Lex(Source: R"(#define MOO 1
585 void foo() { MOO; })");
586 const Token &moo = toks[5];
587
588 EXPECT_EQ(getSourceText(moo, moo), "MOO");
589
590 SourceRange R{moo.getLocation(), moo.getLocation()};
591
592 EXPECT_TRUE(
593 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
594 EXPECT_TRUE(
595 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
596
597 CharSourceRange CR = Lexer::getAsCharRange(Range: R, SM: SourceMgr, LangOpts);
598
599 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
600}
601
602TEST_F(LexerTest, FindNextToken) {
603 Lex(Source: "int abcd = 0;\n"
604 "// A comment.\n"
605 "int xyz = abcd;\n");
606 std::vector<std::string> GeneratedByNextToken;
607 SourceLocation Loc =
608 SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID());
609 while (true) {
610 auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts);
611 ASSERT_TRUE(T);
612 if (T->is(K: tok::eof))
613 break;
614 GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T));
615 Loc = T->getLocation();
616 }
617 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
618 "xyz", "=", "abcd", ";"));
619}
620
621TEST_F(LexerTest, FindNextTokenIncludingComments) {
622 Lex(Source: "int abcd = 0;\n"
623 "// A comment.\n"
624 "int xyz = abcd;\n");
625 std::vector<std::string> GeneratedByNextToken;
626 SourceLocation Loc =
627 SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID());
628 while (true) {
629 auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: true);
630 ASSERT_TRUE(T);
631 if (T->is(K: tok::eof))
632 break;
633 GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T));
634 Loc = T->getLocation();
635 }
636 EXPECT_THAT(GeneratedByNextToken,
637 ElementsAre("abcd", "=", "0", ";", "// A comment.", "int", "xyz",
638 "=", "abcd", ";"));
639}
640
641TEST_F(LexerTest, FindPreviousToken) {
642 Lex(Source: "int abcd = 0;\n"
643 "// A comment.\n"
644 "int xyz = abcd;\n");
645 std::vector<std::string> GeneratedByPrevToken;
646 SourceLocation Loc = SourceMgr.getLocForEndOfFile(FID: SourceMgr.getMainFileID());
647 while (true) {
648 auto T = Lexer::findPreviousToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: false);
649 if (!T.has_value())
650 break;
651 GeneratedByPrevToken.push_back(x: getSourceText(Begin: *T, End: *T));
652 Loc = Lexer::GetBeginningOfToken(Loc: T->getLocation(), SM: SourceMgr, LangOpts);
653 }
654 EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int",
655 ";", "0", "=", "abcd", "int"));
656}
657
658TEST_F(LexerTest, FindPreviousTokenIncludingComments) {
659 Lex(Source: "int abcd = 0;\n"
660 "// A comment.\n"
661 "int xyz = abcd;\n");
662 std::vector<std::string> GeneratedByPrevToken;
663 SourceLocation Loc = SourceMgr.getLocForEndOfFile(FID: SourceMgr.getMainFileID());
664 while (true) {
665 auto T = Lexer::findPreviousToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: true);
666 if (!T.has_value())
667 break;
668 GeneratedByPrevToken.push_back(x: getSourceText(Begin: *T, End: *T));
669 Loc = Lexer::GetBeginningOfToken(Loc: T->getLocation(), SM: SourceMgr, LangOpts);
670 }
671 EXPECT_THAT(GeneratedByPrevToken,
672 ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";",
673 "0", "=", "abcd", "int"));
674}
675
676TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
677 TrivialModuleLoader ModLoader;
678 auto PP = CreatePP(Source: "", ModLoader);
679 PP->LexTokensUntilEOF();
680 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
681 1U);
682}
683
684TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
685 const llvm::StringLiteral Source = R"cpp(
686 // First line comment.
687 //* Second line comment which is ambigious.
688 ; // Have a non-comment token to make sure something is lexed.
689 )cpp";
690 LangOpts.LineComment = false;
691 auto Toks = Lex(Source);
692 auto &SM = PP->getSourceManager();
693 auto SrcBuffer = SM.getBufferData(FID: SM.getMainFileID());
694 Lexer L(SM.getLocForStartOfFile(FID: SM.getMainFileID()), PP->getLangOpts(),
695 SrcBuffer.data(), SrcBuffer.data(),
696 SrcBuffer.data() + SrcBuffer.size());
697
698 auto ToksView = llvm::ArrayRef(Toks);
699 clang::Token T;
700 EXPECT_FALSE(ToksView.empty());
701 while (!L.LexFromRawLexer(Result&: T)) {
702 ASSERT_TRUE(!ToksView.empty());
703 EXPECT_EQ(T.getKind(), ToksView.front().getKind());
704 ToksView = ToksView.drop_front();
705 }
706 EXPECT_TRUE(ToksView.empty());
707}
708
709TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) {
710 const llvm::StringLiteral Source = R"cc(
711 #define ONE \
712 1
713
714 int i = ONE;
715 )cc";
716 std::vector<Token> Toks =
717 CheckLex(Source, ExpectedTokens: {tok::kw_int, tok::identifier, tok::equal,
718 tok::numeric_constant, tok::semi});
719
720 // Set up by getting the raw token for the `1` in the macro definition.
721 const Token &OneExpanded = Toks[3];
722 Token Tok;
723 ASSERT_FALSE(
724 Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts));
725 // The `ONE`.
726 ASSERT_EQ(Tok.getKind(), tok::raw_identifier);
727 ASSERT_FALSE(
728 Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()),
729 Tok, SourceMgr, LangOpts));
730 // The `1` in the macro definition.
731 ASSERT_EQ(Tok.getKind(), tok::numeric_constant);
732
733 // Go back 4 characters: two spaces, one newline, and the backslash.
734 SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(Offset: -4);
735 // Expect true (=failure) because the whitespace immediately after the
736 // escaped newline is not ignored.
737 EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts,
738 /*IgnoreWhiteSpace=*/false));
739}
740
741TEST(LexerPreambleTest, PreambleBounds) {
742 std::vector<std::string> Cases = {
743 R"cc([[
744 #include <foo>
745 ]]int bar;
746 )cc",
747 R"cc([[
748 #include <foo>
749 ]])cc",
750 R"cc([[
751 // leading comment
752 #include <foo>
753 ]]// trailing comment
754 int bar;
755 )cc",
756 R"cc([[
757 module;
758 #include <foo>
759 ]]module bar;
760 int x;
761 )cc",
762 };
763 for (const auto& Case : Cases) {
764 llvm::Annotations A(Case);
765 clang::LangOptions LangOpts;
766 LangOpts.CPlusPlusModules = true;
767 auto Bounds = Lexer::ComputePreamble(Buffer: A.code(), LangOpts);
768 EXPECT_EQ(Bounds.Size, A.range().End) << Case;
769 }
770}
771
772} // anonymous namespace
773

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang/unittests/Lex/LexerTest.cpp