LexerTest.cpp source code [clang/unittests/Lex/LexerTest.cpp]

1	//===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang/Lex/Lexer.h"
10	#include "clang/Basic/Diagnostic.h"
11	#include "clang/Basic/DiagnosticOptions.h"
12	#include "clang/Basic/FileManager.h"
13	#include "clang/Basic/LangOptions.h"
14	#include "clang/Basic/SourceLocation.h"
15	#include "clang/Basic/SourceManager.h"
16	#include "clang/Basic/TargetInfo.h"
17	#include "clang/Basic/TargetOptions.h"
18	#include "clang/Basic/TokenKinds.h"
19	#include "clang/Lex/HeaderSearch.h"
20	#include "clang/Lex/HeaderSearchOptions.h"
21	#include "clang/Lex/LiteralSupport.h"
22	#include "clang/Lex/MacroArgs.h"
23	#include "clang/Lex/MacroInfo.h"
24	#include "clang/Lex/ModuleLoader.h"
25	#include "clang/Lex/Preprocessor.h"
26	#include "clang/Lex/PreprocessorOptions.h"
27	#include "llvm/ADT/ArrayRef.h"
28	#include "llvm/ADT/StringRef.h"
29	#include "llvm/Testing/Annotations/Annotations.h"
30	#include "gmock/gmock.h"
31	#include "gtest/gtest.h"
32	#include <memory>
33	#include <string>
34	#include <vector>
35
36	namespace {
37	using namespace clang;
38	using testing::ElementsAre;
39
40	// The test fixture.
41	class LexerTest : public ::testing::Test {
42	protected:
43	LexerTest()
44	: FileMgr (FileMgrOpts),
45	DiagID (new DiagnosticIDs ()),
46	Diags (DiagID, new DiagnosticOptions, new IgnoringDiagConsumer ()),
47	SourceMgr (Diags, FileMgr),
48	TargetOpts (new TargetOptions)
49	{
50	TargetOpts ->Triple = "x86_64-apple-darwin11.1.0";
51	Target = TargetInfo::CreateTargetInfo(Diags, Opts: TargetOpts);
52	}
53
54	std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
55	TrivialModuleLoader &ModLoader) {
56	std::unique_ptr<llvm::MemoryBuffer> Buf =
57	llvm::MemoryBuffer::getMemBuffer(InputData: Source);
58	SourceMgr.setMainFileID(SourceMgr.createFileID(Buffer: std::move(Buf)));
59
60	HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
61	Diags, LangOpts, Target.get());
62	std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
63	args: std::make_shared<PreprocessorOptions>(), args&: Diags, args&: LangOpts, args&: SourceMgr,
64	args&: HeaderInfo, args&: ModLoader,
65	/IILookup =/args: nullptr,
66	/OwnsHeaderSearch =/args: false);
67	PP ->Initialize(Target: *Target);
68	PP ->EnterMainSourceFile();
69	return PP;
70	}
71
72	std::vector<Token> Lex(StringRef Source) {
73	TrivialModuleLoader ModLoader;
74	PP = CreatePP(Source, ModLoader);
75
76	std::vector<Token> toks;
77	PP ->LexTokensUntilEOF(Tokens: &toks);
78
79	return toks;
80	}
81
82	std::vector<Token> CheckLex(StringRef Source,
83	ArrayRef<tok::TokenKind> ExpectedTokens) {
84	auto toks = Lex(Source);
85	EXPECT_EQ(ExpectedTokens.size(), toks.size());
86	for (unsigned i = `0`, e = ExpectedTokens.size(); i != e; ++i) {
87	EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
88	}
89
90	return toks;
91	}
92
93	std::string getSourceText(Token Begin, Token End) {
94	bool Invalid;
95	StringRef Str =
96	Lexer::getSourceText(Range: CharSourceRange::getTokenRange(R: SourceRange (
97	Begin.getLocation(), End.getLocation())),
98	SM: SourceMgr, LangOpts, Invalid: &Invalid);
99	if (Invalid)
100	return "<INVALID>";
101	return std::string (Str);
102	}
103
104	FileSystemOptions FileMgrOpts;
105	FileManager FileMgr;
106	IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
107	DiagnosticsEngine Diags;
108	SourceManager SourceMgr;
109	LangOptions LangOpts;
110	std::shared_ptr<TargetOptions> TargetOpts;
111	IntrusiveRefCntPtr<TargetInfo> Target;
112	std::unique_ptr<Preprocessor> PP;
113	};
114
115	TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
116	std::vector<tok::TokenKind> ExpectedTokens;
117	ExpectedTokens.push_back(x: tok::identifier);
118	ExpectedTokens.push_back(x: tok::l_paren);
119	ExpectedTokens.push_back(x: tok::identifier);
120	ExpectedTokens.push_back(x: tok::r_paren);
121
122	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
123	"M(f(M(i)))",
124	ExpectedTokens);
125
126	EXPECT_EQ("M(i)", getSourceText(toks[`2`], toks[`2`]));
127	}
128
129	TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
130	std::vector<tok::TokenKind> ExpectedTokens;
131	ExpectedTokens.push_back(x: tok::identifier);
132	ExpectedTokens.push_back(x: tok::identifier);
133
134	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
135	"M(M(i) c)",
136	ExpectedTokens);
137
138	EXPECT_EQ("M(i)", getSourceText(toks[`0`], toks[`0`]));
139	}
140
141	TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
142	std::vector<tok::TokenKind> ExpectedTokens;
143	ExpectedTokens.push_back(x: tok::identifier);
144	ExpectedTokens.push_back(x: tok::identifier);
145	ExpectedTokens.push_back(x: tok::identifier);
146
147	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
148	"M(c c M(i))",
149	ExpectedTokens);
150
151	EXPECT_EQ("c M(i)", getSourceText(toks[`1`], toks[`2`]));
152	}
153
154	TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
155	std::vector<tok::TokenKind> ExpectedTokens;
156	ExpectedTokens.push_back(x: tok::identifier);
157	ExpectedTokens.push_back(x: tok::identifier);
158	ExpectedTokens.push_back(x: tok::identifier);
159
160	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
161	"M(M(i) c c)",
162	ExpectedTokens);
163
164	EXPECT_EQ("M(i) c", getSourceText(toks[`0`], toks[`1`]));
165	}
166
167	TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
168	std::vector<tok::TokenKind> ExpectedTokens;
169	ExpectedTokens.push_back(x: tok::identifier);
170	ExpectedTokens.push_back(x: tok::identifier);
171	ExpectedTokens.push_back(x: tok::identifier);
172	ExpectedTokens.push_back(x: tok::identifier);
173
174	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
175	"M(c M(i)) M(M(i) c)",
176	ExpectedTokens);
177
178	EXPECT_EQ("<INVALID>", getSourceText(toks[`1`], toks[`2`]));
179	}
180
181	TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
182	std::vector<tok::TokenKind> ExpectedTokens;
183	ExpectedTokens.push_back(x: tok::identifier);
184	ExpectedTokens.push_back(x: tok::l_paren);
185	ExpectedTokens.push_back(x: tok::identifier);
186	ExpectedTokens.push_back(x: tok::r_paren);
187
188	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
189	"#define C(x) M(x##c)\n"
190	"M(f(C(i)))",
191	ExpectedTokens);
192
193	EXPECT_EQ("C(i)", getSourceText(toks[`2`], toks[`2`]));
194	}
195
196	TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
197	std::vector<tok::TokenKind> ExpectedTokens;
198	ExpectedTokens.push_back(x: tok::identifier);
199	ExpectedTokens.push_back(x: tok::l_paren);
200	ExpectedTokens.push_back(x: tok::identifier);
201	ExpectedTokens.push_back(x: tok::r_paren);
202
203	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
204	"f(M(M(i)))",
205	ExpectedTokens);
206	EXPECT_EQ("M(M(i))", getSourceText(toks[`2`], toks[`2`]));
207	}
208
209	TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
210	std::vector<tok::TokenKind> ExpectedTokens;
211	ExpectedTokens.push_back(x: tok::identifier);
212	ExpectedTokens.push_back(x: tok::l_paren);
213	ExpectedTokens.push_back(x: tok::identifier);
214	ExpectedTokens.push_back(x: tok::r_paren);
215
216	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
217	"M(f(i))",
218	ExpectedTokens);
219	EXPECT_EQ("i", getSourceText(toks[`2`], toks[`2`]));
220	}
221
222	TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
223	std::vector<tok::TokenKind> ExpectedTokens;
224	ExpectedTokens.push_back(x: tok::identifier);
225	ExpectedTokens.push_back(x: tok::l_paren);
226	ExpectedTokens.push_back(x: tok::identifier);
227	ExpectedTokens.push_back(x: tok::r_paren);
228
229	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
230	"#define C(x) x\n"
231	"f(C(M(i)))",
232	ExpectedTokens);
233	EXPECT_EQ("C(M(i))", getSourceText(toks[`2`], toks[`2`]));
234	}
235
236	TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
237	std::vector<tok::TokenKind> ExpectedTokens;
238	ExpectedTokens.push_back(x: tok::identifier);
239	ExpectedTokens.push_back(x: tok::l_paren);
240	ExpectedTokens.push_back(x: tok::identifier);
241	ExpectedTokens.push_back(x: tok::identifier);
242	ExpectedTokens.push_back(x: tok::r_paren);
243
244	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
245	"#define C(x) c x\n"
246	"f(C(M(i)))",
247	ExpectedTokens);
248	EXPECT_EQ("M(i)", getSourceText(toks[`3`], toks[`3`]));
249	}
250
251	TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
252	std::vector<tok::TokenKind> ExpectedTokens;
253	ExpectedTokens.push_back(x: tok::identifier);
254	ExpectedTokens.push_back(x: tok::identifier);
255	ExpectedTokens.push_back(x: tok::l_paren);
256	ExpectedTokens.push_back(x: tok::identifier);
257	ExpectedTokens.push_back(x: tok::r_paren);
258
259	std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n"
260	"#define C(x) c M(x)\n"
261	"C(f(M(i)))",
262	ExpectedTokens);
263	EXPECT_EQ("M(i)", getSourceText(toks[`3`], toks[`3`]));
264	}
265
266	TEST_F(LexerTest, LexAPI) {
267	std::vector<tok::TokenKind> ExpectedTokens;
268	// Line 1 (after the #defines)
269	ExpectedTokens.push_back(x: tok::l_square);
270	ExpectedTokens.push_back(x: tok::identifier);
271	ExpectedTokens.push_back(x: tok::r_square);
272	ExpectedTokens.push_back(x: tok::l_square);
273	ExpectedTokens.push_back(x: tok::identifier);
274	ExpectedTokens.push_back(x: tok::r_square);
275	// Line 2
276	ExpectedTokens.push_back(x: tok::identifier);
277	ExpectedTokens.push_back(x: tok::identifier);
278	ExpectedTokens.push_back(x: tok::identifier);
279	ExpectedTokens.push_back(x: tok::identifier);
280
281	std::vector<Token> toks = CheckLex(Source: "#define M(x) [x]\n"
282	"#define N(x) x\n"
283	"#define INN(x) x\n"
284	"#define NOF1 INN(val)\n"
285	"#define NOF2 val\n"
286	"M(foo) N([bar])\n"
287	"N(INN(val)) N(NOF1) N(NOF2) N(val)",
288	ExpectedTokens);
289
290	SourceLocation lsqrLoc = toks [`0`].getLocation();
291	SourceLocation idLoc = toks [`1`].getLocation();
292	SourceLocation rsqrLoc = toks [`2`].getLocation();
293	CharSourceRange macroRange = SourceMgr.getExpansionRange(Loc: lsqrLoc);
294
295	SourceLocation Loc;
296	EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
297	EXPECT_EQ(Loc, macroRange.getBegin());
298	EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
299	EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
300	EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
301	EXPECT_EQ(Loc, macroRange.getEnd());
302	EXPECT_TRUE(macroRange.isTokenRange());
303
304	CharSourceRange range = Lexer::makeFileCharRange(
305	Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: idLoc), SM: SourceMgr, LangOpts);
306	EXPECT_TRUE(range.isInvalid());
307	range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: idLoc, E: rsqrLoc),
308	SM: SourceMgr, LangOpts);
309	EXPECT_TRUE(range.isInvalid());
310	range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc),
311	SM: SourceMgr, LangOpts);
312	EXPECT_TRUE(!range.isTokenRange());
313	EXPECT_EQ(range.getAsRange(),
314	SourceRange (macroRange.getBegin(),
315	macroRange.getEnd().getLocWithOffset(`1`)));
316
317	StringRef text = Lexer::getSourceText(
318	Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc),
319	SM: SourceMgr, LangOpts);
320	EXPECT_EQ(text, "M(foo)");
321
322	SourceLocation macroLsqrLoc = toks [`3`].getLocation();
323	SourceLocation macroIdLoc = toks [`4`].getLocation();
324	SourceLocation macroRsqrLoc = toks [`5`].getLocation();
325	SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(Loc: macroLsqrLoc);
326	SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(Loc: macroIdLoc);
327	SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(Loc: macroRsqrLoc);
328
329	range = Lexer::makeFileCharRange(
330	Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroIdLoc),
331	SM: SourceMgr, LangOpts);
332	EXPECT_EQ(SourceRange (fileLsqrLoc, fileIdLoc.getLocWithOffset(`3`)),
333	range.getAsRange());
334
335	range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: macroIdLoc, E: macroRsqrLoc),
336	SM: SourceMgr, LangOpts);
337	EXPECT_EQ(SourceRange (fileIdLoc, fileRsqrLoc.getLocWithOffset(`1`)),
338	range.getAsRange());
339
340	macroRange = SourceMgr.getExpansionRange(Loc: macroLsqrLoc);
341	range = Lexer::makeFileCharRange(
342	Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroRsqrLoc),
343	SM: SourceMgr, LangOpts);
344	EXPECT_EQ(SourceRange (macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(`1`)),
345	range.getAsRange());
346
347	text = Lexer::getSourceText(
348	Range: CharSourceRange::getTokenRange(R: SourceRange (macroLsqrLoc, macroIdLoc)),
349	SM: SourceMgr, LangOpts);
350	EXPECT_EQ(text, "[bar");
351
352
353	SourceLocation idLoc1 = toks [`6`].getLocation();
354	SourceLocation idLoc2 = toks [`7`].getLocation();
355	SourceLocation idLoc3 = toks [`8`].getLocation();
356	SourceLocation idLoc4 = toks [`9`].getLocation();
357	EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
358	EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
359	EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
360	EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
361	}
362
363	TEST_F(LexerTest, HandlesSplitTokens) {
364	std::vector<tok::TokenKind> ExpectedTokens;
365	// Line 1 (after the #defines)
366	ExpectedTokens.push_back(x: tok::identifier);
367	ExpectedTokens.push_back(x: tok::less);
368	ExpectedTokens.push_back(x: tok::identifier);
369	ExpectedTokens.push_back(x: tok::less);
370	ExpectedTokens.push_back(x: tok::greatergreater);
371	// Line 2
372	ExpectedTokens.push_back(x: tok::identifier);
373	ExpectedTokens.push_back(x: tok::less);
374	ExpectedTokens.push_back(x: tok::identifier);
375	ExpectedTokens.push_back(x: tok::less);
376	ExpectedTokens.push_back(x: tok::greatergreater);
377
378	std::vector<Token> toks = CheckLex(Source: "#define TY ty\n"
379	"#define RANGLE ty<ty<>>\n"
380	"TY<ty<>>\n"
381	"RANGLE",
382	ExpectedTokens);
383
384	SourceLocation outerTyLoc = toks [`0`].getLocation();
385	SourceLocation innerTyLoc = toks [`2`].getLocation();
386	SourceLocation gtgtLoc = toks [`4`].getLocation();
387	// Split the token to simulate the action of the parser and force creation of
388	// an `ExpansionTokenRange`.
389	SourceLocation rangleLoc = PP ->SplitToken(TokLoc: gtgtLoc, Length: `1`);
390
391	// Verify that it only captures the first greater-then and not the second one.
392	CharSourceRange range = Lexer::makeFileCharRange(
393	Range: CharSourceRange::getTokenRange(B: innerTyLoc, E: rangleLoc), SM: SourceMgr,
394	LangOpts);
395	EXPECT_TRUE(range.isCharRange());
396	EXPECT_EQ(range.getAsRange(),
397	SourceRange (innerTyLoc, gtgtLoc.getLocWithOffset(`1`)));
398
399	// Verify case where range begins in a macro expansion.
400	range = Lexer::makeFileCharRange(
401	Range: CharSourceRange::getTokenRange(B: outerTyLoc, E: rangleLoc), SM: SourceMgr,
402	LangOpts);
403	EXPECT_TRUE(range.isCharRange());
404	EXPECT_EQ(range.getAsRange(),
405	SourceRange (SourceMgr.getExpansionLoc(outerTyLoc),
406	gtgtLoc.getLocWithOffset(`1`)));
407
408	SourceLocation macroInnerTyLoc = toks [`7`].getLocation();
409	SourceLocation macroGtgtLoc = toks [`9`].getLocation();
410	// Split the token to simulate the action of the parser and force creation of
411	// an `ExpansionTokenRange`.
412	SourceLocation macroRAngleLoc = PP ->SplitToken(TokLoc: macroGtgtLoc, Length: `1`);
413
414	// Verify that it fails (because it only captures the first greater-then and
415	// not the second one, so it doesn't span the entire macro expansion).
416	range = Lexer::makeFileCharRange(
417	Range: CharSourceRange::getTokenRange(B: macroInnerTyLoc, E: macroRAngleLoc),
418	SM: SourceMgr, LangOpts);
419	EXPECT_TRUE(range.isInvalid());
420	}
421
422	TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
423	std::vector<Token> toks =
424	Lex(Source: "#define helper1 0\n"
425	"void helper2(const char *, ...);\n"
426	"#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
427	"#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
428	"void f1() { M2(\"a\", \"b\"); }");
429
430	// Check the file corresponding to the "helper1" macro arg in M2.
431	//
432	// The lexer used to report its size as 31, meaning that the end of the
433	// expansion would be on the next line* (just past `M2("a", "b")`). Make*
434	// sure that we get the correct end location (the comma after "helper1").
435	SourceLocation helper1ArgLoc = toks [`20`].getLocation();
436	EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), `8U`);
437	}
438
439	TEST_F(LexerTest, DontOverallocateStringifyArgs) {
440	TrivialModuleLoader ModLoader;
441	auto PP = CreatePP(Source: "\"StrArg\", 5, 'C'", ModLoader);
442
443	llvm::BumpPtrAllocator Allocator;
444	std::array<IdentifierInfo *, `3`> ParamList;
445	MacroInfo *MI = PP ->AllocateMacroInfo(L: {});
446	MI->setIsFunctionLike();
447	MI->setParameterList(List: ParamList, PPAllocator&: Allocator);
448	EXPECT_EQ(`3u`, MI->getNumParams());
449	EXPECT_TRUE(MI->isFunctionLike());
450
451	Token Eof;
452	Eof.setKind(tok::eof);
453	std::vector<Token> ArgTokens;
454	while (`1`) {
455	Token tok;
456	PP ->Lex(Result&: tok);
457	if (tok.is(K: tok::eof)) {
458	ArgTokens.push_back(x: Eof);
459	break;
460	}
461	if (tok.is(K: tok::comma))
462	ArgTokens.push_back(x: Eof);
463	else
464	ArgTokens.push_back(x: tok);
465	}
466
467	auto MacroArgsDeleter = [&PP](MacroArgs M) { M->destroy(PP&: PP); };
468	std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
469	MacroArgs::create(MI, UnexpArgTokens: ArgTokens, VarargsElided: false, PP&: *PP), MacroArgsDeleter);
470	auto StringifyArg = [&](int ArgNo) {
471	return MA ->StringifyArgument(ArgToks: MA ->getUnexpArgument(Arg: ArgNo), PP&: *PP,
472	/Charify=/false, ExpansionLocStart: {}, ExpansionLocEnd: {});
473	};
474	Token Result = StringifyArg (`0`);
475	EXPECT_EQ(tok::string_literal, Result.getKind());
476	EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
477	Result = StringifyArg (`1`);
478	EXPECT_EQ(tok::string_literal, Result.getKind());
479	EXPECT_STREQ("\"5\"", Result.getLiteralData());
480	Result = StringifyArg (`2`);
481	EXPECT_EQ(tok::string_literal, Result.getKind());
482	EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
483	#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
484	EXPECT_DEATH(StringifyArg(`3`), "Invalid arg #");
485	#endif
486	}
487
488	TEST_F(LexerTest, IsNewLineEscapedValid) {
489	auto hasNewLineEscaped = [](const char *S) {
490	return Lexer::isNewLineEscaped(BufferStart: S, Str: S + strlen(s: S) - `1`);
491	};
492
493	EXPECT_TRUE(hasNewLineEscaped("\\\r"));
494	EXPECT_TRUE(hasNewLineEscaped("\\\n"));
495	EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
496	EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
497	EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
498	EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
499
500	EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
501	EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
502	EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
503	EXPECT_FALSE(hasNewLineEscaped("\r"));
504	EXPECT_FALSE(hasNewLineEscaped("\n"));
505	EXPECT_FALSE(hasNewLineEscaped("\r\n"));
506	EXPECT_FALSE(hasNewLineEscaped("\n\r"));
507	EXPECT_FALSE(hasNewLineEscaped("\r\r"));
508	EXPECT_FALSE(hasNewLineEscaped("\n\n"));
509	}
510
511	TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
512	// Each line should have the same length for
513	// further offset calculation to be more straightforward.
514	const unsigned IdentifierLength = `8`;
515	std::string TextToLex = "rabarbar\n"
516	"foo\\\nbar\n"
517	"foo\\\rbar\n"
518	"fo\\\r\nbar\n"
519	"foo\\\n\rba\n";
520	std::vector<tok::TokenKind> ExpectedTokens{`5`, tok::identifier};
521	std::vector<Token> LexedTokens = CheckLex(Source: TextToLex, ExpectedTokens);
522
523	for (const Token &Tok : LexedTokens) {
524	std::pair<FileID, unsigned> OriginalLocation =
525	SourceMgr.getDecomposedLoc(Loc: Tok.getLocation());
526	for (unsigned Offset = `0`; Offset < IdentifierLength; ++Offset) {
527	SourceLocation LookupLocation =
528	Tok.getLocation().getLocWithOffset(Offset);
529
530	std::pair<FileID, unsigned> FoundLocation =
531	SourceMgr.getDecomposedExpansionLoc(
532	Loc: Lexer::GetBeginningOfToken(Loc: LookupLocation, SM: SourceMgr, LangOpts));
533
534	// Check that location returned by the GetBeginningOfToken
535	// is the same as original token location reported by Lexer.
536	EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
537	}
538	}
539	}
540
541	TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
542	EXPECT_TRUE(Lex(" // \\\n").empty());
543	EXPECT_TRUE(Lex("#include <\\\\").empty());
544	EXPECT_TRUE(Lex("#include <\\\\\n").empty());
545	}
546
547	TEST_F(LexerTest, StringizingRasString) {
548	// For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
549	std::string String1 = R"(foo
550	{"bar":[]}
551	baz)";
552	// For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
553	SmallString<`128`> String2;
554	String2 += String1.c_str();
555
556	// Corner cases.
557	std::string String3 = R"(\
558	\n
559	\\n
560	\\)";
561	SmallString<`128`> String4;
562	String4 += String3.c_str();
563	std::string String5 = R"(a\
564
565
566	\\b)";
567	SmallString<`128`> String6;
568	String6 += String5.c_str();
569
570	String1 = Lexer::Stringify(Str: StringRef(String1));
571	Lexer::Stringify(Str&: String2);
572	String3 = Lexer::Stringify(Str: StringRef(String3));
573	Lexer::Stringify(Str&: String4);
574	String5 = Lexer::Stringify(Str: StringRef(String5));
575	Lexer::Stringify(Str&: String6);
576
577	EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
578	EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
579	EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)");
580	EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)");
581	EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)");
582	EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)");
583	}
584
585	TEST_F(LexerTest, CharRangeOffByOne) {
586	std::vector<Token> toks = Lex(Source: R"(#define MOO 1
587	void foo() { MOO; })");
588	const Token &moo = toks [`5`];
589
590	EXPECT_EQ(getSourceText(moo, moo), "MOO");
591
592	SourceRange R{moo.getLocation(), moo.getLocation()};
593
594	EXPECT_TRUE(
595	Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
596	EXPECT_TRUE(
597	Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
598
599	CharSourceRange CR = Lexer::getAsCharRange(Range: R, SM: SourceMgr, LangOpts);
600
601	EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
602	}
603
604	TEST_F(LexerTest, FindNextToken) {
605	Lex(Source: "int abcd = 0;\n"
606	"int xyz = abcd;\n");
607	std::vector<std::string> GeneratedByNextToken;
608	SourceLocation Loc =
609	SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID());
610	while (true) {
611	auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts);
612	ASSERT_TRUE(T);
613	if (T ->is(K: tok::eof))
614	break;
615	GeneratedByNextToken.push_back(x: getSourceText(Begin: T, End: T));
616	Loc = T ->getLocation();
617	}
618	EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
619	"xyz", "=", "abcd", ";"));
620	}
621
622	TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
623	TrivialModuleLoader ModLoader;
624	auto PP = CreatePP(Source: "", ModLoader);
625	PP ->LexTokensUntilEOF();
626	EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP ->getPredefinesFileID()),
627	`1U`);
628	}
629
630	TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
631	const llvm::StringLiteral Source = R"cpp(
632	// First line comment.
633	//* Second line comment which is ambigious.
634	; // Have a non-comment token to make sure something is lexed.
635	)cpp";
636	LangOpts.LineComment = false;
637	auto Toks = Lex(Source);
638	auto &SM = PP ->getSourceManager();
639	auto SrcBuffer = SM.getBufferData(FID: SM.getMainFileID());
640	Lexer L(SM.getLocForStartOfFile(FID: SM.getMainFileID()), PP ->getLangOpts(),
641	SrcBuffer.data(), SrcBuffer.data(),
642	SrcBuffer.data() + SrcBuffer.size());
643
644	auto ToksView = llvm::ArrayRef(Toks);
645	clang::Token T;
646	EXPECT_FALSE(ToksView.empty());
647	while (!L.LexFromRawLexer(Result&: T)) {
648	ASSERT_TRUE(!ToksView.empty());
649	EXPECT_EQ(T.getKind(), ToksView.front().getKind());
650	ToksView = ToksView.drop_front();
651	}
652	EXPECT_TRUE(ToksView.empty());
653	}
654
655	TEST(LexerPreambleTest, PreambleBounds) {
656	std::vector<std::string> Cases = {
657	R"cc([[
658	#include <foo>
659	]]int bar;
660	)cc",
661	R"cc([[
662	#include <foo>
663	]])cc",
664	R"cc([[
665	// leading comment
666	#include <foo>
667	]]// trailing comment
668	int bar;
669	)cc",
670	R"cc([[
671	module;
672	#include <foo>
673	]]module bar;
674	int x;
675	)cc",
676	};
677	for (const auto& Case : Cases) {
678	llvm::Annotations A(Case);
679	clang::LangOptions LangOpts;
680	LangOpts.CPlusPlusModules = true;
681	auto Bounds = Lexer::ComputePreamble(Buffer: A.code(), LangOpts);
682	EXPECT_EQ(Bounds.Size, A.range().End) << Case;
683	}
684	}
685
686	} // anonymous namespace
687

source code of clang/unittests/Lex/LexerTest.cpp