TokensTest.cpp source code [clang/unittests/Tooling/Syntax/TokensTest.cpp]

1	//===- TokensTest.cpp -----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang/Tooling/Syntax/Tokens.h"
10	#include "clang/AST/ASTConsumer.h"
11	#include "clang/AST/Expr.h"
12	#include "clang/Basic/Diagnostic.h"
13	#include "clang/Basic/DiagnosticIDs.h"
14	#include "clang/Basic/DiagnosticOptions.h"
15	#include "clang/Basic/FileManager.h"
16	#include "clang/Basic/FileSystemOptions.h"
17	#include "clang/Basic/LLVM.h"
18	#include "clang/Basic/LangOptions.h"
19	#include "clang/Basic/SourceLocation.h"
20	#include "clang/Basic/SourceManager.h"
21	#include "clang/Basic/TokenKinds.def"
22	#include "clang/Basic/TokenKinds.h"
23	#include "clang/Frontend/CompilerInstance.h"
24	#include "clang/Frontend/FrontendAction.h"
25	#include "clang/Frontend/Utils.h"
26	#include "clang/Lex/Lexer.h"
27	#include "clang/Lex/PreprocessorOptions.h"
28	#include "clang/Lex/Token.h"
29	#include "clang/Tooling/Tooling.h"
30	#include "llvm/ADT/ArrayRef.h"
31	#include "llvm/ADT/IntrusiveRefCntPtr.h"
32	#include "llvm/ADT/STLExtras.h"
33	#include "llvm/ADT/StringRef.h"
34	#include "llvm/Support/FormatVariadic.h"
35	#include "llvm/Support/MemoryBuffer.h"
36	#include "llvm/Support/VirtualFileSystem.h"
37	#include "llvm/Support/raw_os_ostream.h"
38	#include "llvm/Support/raw_ostream.h"
39	#include "llvm/Testing/Annotations/Annotations.h"
40	#include "llvm/Testing/Support/SupportHelpers.h"
41	#include <cassert>
42	#include <cstdlib>
43	#include <gmock/gmock.h>
44	#include <gtest/gtest.h>
45	#include <memory>
46	#include <optional>
47	#include <ostream>
48	#include <string>
49
50	using namespace clang;
51	using namespace clang::syntax;
52
53	using llvm::ValueIs;
54	using ::testing::_;
55	using ::testing::AllOf;
56	using ::testing::Contains;
57	using ::testing::ElementsAre;
58	using ::testing::Field;
59	using ::testing::IsEmpty;
60	using ::testing::Matcher;
61	using ::testing::Not;
62	using ::testing::Pointee;
63	using ::testing::StartsWith;
64
65	namespace {
66	// Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
67	// argument.
68	MATCHER_P(SameRange, A, "") {
69	return A.begin() == arg.begin() && A.end() == arg.end();
70	}
71
72	Matcher<TokenBuffer::Expansion>
73	IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
74	Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
75	return AllOf(matchers: Field(field: &TokenBuffer::Expansion::Spelled, matcher: Spelled),
76	matchers: Field(field: &TokenBuffer::Expansion::Expanded, matcher: Expanded));
77	}
78	// Matchers for syntax::Token.
79	MATCHER_P(Kind, K, "") { return arg.kind() == K; }
80	MATCHER_P2(HasText, Text, SourceMgr, "") {
81	return arg.text(*SourceMgr) == Text;
82	}
83	/// Checks the start and end location of a token are equal to SourceRng.
84	MATCHER_P(RangeIs, SourceRng, "") {
85	return arg.location() == SourceRng.first &&
86	arg.endLocation() == SourceRng.second;
87	}
88
89	class TokenCollectorTest : public ::testing::Test {
90	public:
91	/// Run the clang frontend, collect the preprocessed tokens from the frontend
92	/// invocation and store them in this->Buffer.
93	/// This also clears SourceManager before running the compiler.
94	void recordTokens(llvm::StringRef Code) {
95	class RecordTokens : public ASTFrontendAction {
96	public:
97	explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
98
99	bool BeginSourceFileAction(CompilerInstance &CI) override {
100	assert(!Collector && "expected only a single call to BeginSourceFile");
101	Collector.emplace(args&: CI.getPreprocessor());
102	return true;
103	}
104	void EndSourceFileAction() override {
105	assert(Collector && "BeginSourceFileAction was never called");
106	Result = std::move(*Collector).consume();
107	Result.indexExpandedTokens();
108	}
109
110	std::unique_ptr<ASTConsumer>
111	CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
112	return std::make_unique<ASTConsumer>();
113	}
114
115	private:
116	TokenBuffer &Result;
117	std::optional<TokenCollector> Collector;
118	};
119
120	constexpr const char *FileName = "./input.cpp";
121	FS ->addFile(Path: FileName, ModificationTime: time_t(), Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: ""));
122	// Prepare to run a compiler.
123	if (!Diags ->getClient())
124	Diags ->setClient(client: new IgnoringDiagConsumer);
125	std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
126	FileName};
127	CreateInvocationOptions CIOpts;
128	CIOpts.Diags = Diags;
129	CIOpts.VFS = FS;
130	auto CI = createInvocation(Args, Opts: std::move(CIOpts));
131	assert(CI);
132	CI ->getFrontendOpts().DisableFree = false;
133	CI ->getPreprocessorOpts().addRemappedFile(
134	From: FileName, To: llvm::MemoryBuffer::getMemBufferCopy(InputData: Code).release());
135	CompilerInstance Compiler(std::move(CI));
136	Compiler.setDiagnostics(Diags.get());
137	Compiler.setFileManager(FileMgr.get());
138	Compiler.setSourceManager(SourceMgr.get());
139
140	this->Buffer = TokenBuffer (*SourceMgr);
141	RecordTokens Recorder(this->Buffer);
142	ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
143	<< "failed to run the frontend";
144	}
145
146	/// Record the tokens and return a test dump of the resulting buffer.
147	std::string collectAndDump(llvm::StringRef Code) {
148	recordTokens(Code);
149	return Buffer.dumpForTests();
150	}
151
152	// Adds a file to the test VFS.
153	void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
154	if (!FS ->addFile(Path, ModificationTime: time_t(),
155	Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: Contents))) {
156	ADD_FAILURE() << "could not add a file to VFS: " << Path;
157	}
158	}
159
160	/// Add a new file, run syntax::tokenize() on the range if any, run it on the
161	/// whole file otherwise and return the results.
162	std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
163	llvm::Annotations Annot(Text);
164	auto FID = SourceMgr ->createFileID(
165	Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: Annot.code()));
166	// FIXME: pass proper LangOptions.
167	if (Annot.ranges().empty())
168	return syntax::tokenize(FID, SM: *SourceMgr, LO: LangOptions ());
169	return syntax::tokenize(
170	FR: syntax::FileRange (FID, Annot.range().Begin, Annot.range().End),
171	SM: *SourceMgr, LO: LangOptions ());
172	}
173
174	// Specialized versions of matchers that hide the SourceManager from clients.
175	Matcher<syntax::Token> HasText(std::string Text) const {
176	return ::HasText(gmock_p0: Text, gmock_p1: SourceMgr.get());
177	}
178	Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
179	std::pair<SourceLocation, SourceLocation> Ls;
180	Ls.first = SourceMgr ->getLocForStartOfFile(FID: SourceMgr ->getMainFileID())
181	.getLocWithOffset(Offset: R.Begin);
182	Ls.second = SourceMgr ->getLocForStartOfFile(FID: SourceMgr ->getMainFileID())
183	.getLocWithOffset(Offset: R.End);
184	return ::RangeIs(gmock_p0: Ls);
185	}
186
187	/// Finds a subrange in O(n m).*
188	template <class T, class U, class Eq>
189	llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
190	llvm::ArrayRef<T> Range, Eq F) {
191	assert(Subrange.size() >= `1`);
192	if (Range.size() < Subrange.size())
193	return llvm::ArrayRef(Range.end(), Range.end());
194	for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
195	Begin <= Last; ++Begin) {
196	auto It = Begin;
197	for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
198	++ItSub, ++It) {
199	if (!F(ItSub, It))
200	goto continue_outer;
201	}
202	return llvm::ArrayRef(Begin, It);
203	continue_outer:;
204	}
205	return llvm::ArrayRef(Range.end(), Range.end());
206	}
207
208	/// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
209	/// The match should be unique. \p Query is a whitespace-separated list of
210	/// tokens to search for.
211	llvm::ArrayRef<syntax::Token>
212	findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
213	llvm::SmallVector<llvm::StringRef, `8`> QueryTokens;
214	Query.split(A&: QueryTokens, Separator: `' '`, /MaxSplit=/-`1`, /KeepEmpty=/false);
215	if (QueryTokens.empty()) {
216	ADD_FAILURE() << "will not look for an empty list of tokens";
217	std::abort();
218	}
219	// An equality test for search.
220	auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
221	return Q == T.text(SM: *SourceMgr);
222	};
223	// Find a match.
224	auto Found = findSubrange(Subrange: llvm::ArrayRef(QueryTokens), Range: Tokens, F: TextMatches);
225	if (Found.begin() == Tokens.end()) {
226	ADD_FAILURE() << "could not find the subrange for " << Query;
227	std::abort();
228	}
229	// Check that the match is unique.
230	if (findSubrange(Subrange: llvm::ArrayRef(QueryTokens),
231	Range: llvm::ArrayRef(Found.end(), Tokens.end()), F: TextMatches)
232	.begin() != Tokens.end()) {
233	ADD_FAILURE() << "match is not unique for " << Query;
234	std::abort();
235	}
236	return Found;
237	};
238
239	// Specialized versions of findTokenRange for expanded and spelled tokens.
240	llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
241	return findTokenRange(Query, Tokens: Buffer.expandedTokens());
242	}
243	llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
244	FileID File = FileID ()) {
245	if (!File.isValid())
246	File = SourceMgr ->getMainFileID();
247	return findTokenRange(Query, Tokens: Buffer.spelledTokens(FID: File));
248	}
249
250	// Data fields.
251	DiagnosticOptions DiagOpts;
252	llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
253	new DiagnosticsEngine (new DiagnosticIDs, DiagOpts);
254	IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
255	new llvm::vfs::InMemoryFileSystem;
256	llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
257	new FileManager (FileSystemOptions (), FS);
258	llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
259	new SourceManager (Diags, FileMgr);
260	/// Contains last result of calling recordTokens().
261	TokenBuffer Buffer = TokenBuffer (*SourceMgr);
262	};
263
264	TEST_F(TokenCollectorTest, RawMode) {
265	EXPECT_THAT(tokenize("int main() {}"),
266	ElementsAre(Kind(tok::kw_int),
267	AllOf(HasText("main"), Kind(tok::identifier)),
268	Kind(tok::l_paren), Kind(tok::r_paren),
269	Kind(tok::l_brace), Kind(tok::r_brace)));
270	// Comments are ignored for now.
271	EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
272	ElementsAre(Kind(tok::kw_int),
273	AllOf(HasText("a"), Kind(tok::identifier)),
274	Kind(tok::semi)));
275	EXPECT_THAT(tokenize("int [[main() {]]}"),
276	ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
277	Kind(tok::l_paren), Kind(tok::r_paren),
278	Kind(tok::l_brace)));
279	EXPECT_THAT(tokenize("int [[main() { ]]}"),
280	ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
281	Kind(tok::l_paren), Kind(tok::r_paren),
282	Kind(tok::l_brace)));
283	// First token is partially parsed, last token is fully included even though
284	// only a part of it is contained in the range.
285	EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
286	ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
287	Kind(tok::l_paren), Kind(tok::r_paren),
288	Kind(tok::l_brace), Kind(tok::kw_return)));
289	}
290
291	TEST_F(TokenCollectorTest, Basic) {
292	std::pair</Input/ std::string, /Expected/ std::string> TestCases[] = {
293	{"int main() {}",
294	R"(expanded tokens:
295	int main ( ) { }
296	file './input.cpp'
297	spelled tokens:
298	int main ( ) { }
299	no mappings.
300	)"},
301	// All kinds of whitespace are ignored.
302	{"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
303	R"(expanded tokens:
304	int main ( ) { }
305	file './input.cpp'
306	spelled tokens:
307	int main ( ) { }
308	no mappings.
309	)"},
310	// Annotation tokens are ignored.
311	{R"cpp(
312	#pragma GCC visibility push (public)
313	#pragma GCC visibility pop
314	)cpp",
315	R"(expanded tokens:
316	<empty>
317	file './input.cpp'
318	spelled tokens:
319	# pragma GCC visibility push ( public ) # pragma GCC visibility pop
320	mappings:
321	['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
322	)"},
323	// Empty files should not crash.
324	{R"cpp()cpp", R"(expanded tokens:
325	<empty>
326	file './input.cpp'
327	spelled tokens:
328	<empty>
329	no mappings.
330	)"},
331	// Should not crash on errors inside '#define' directives. Error is that
332	// stringification (#B) does not refer to a macro parameter.
333	{
334	R"cpp(
335	a
336	#define MACRO() A #B
337	)cpp",
338	R"(expanded tokens:
339	a
340	file './input.cpp'
341	spelled tokens:
342	a # define MACRO ( ) A # B
343	mappings:
344	['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
345	)"}};
346	for (auto &Test : TestCases)
347	EXPECT_EQ(collectAndDump(Test.first), Test.second)
348	<< collectAndDump(Code: Test.first);
349	}
350
351	TEST_F(TokenCollectorTest, Locations) {
352	// Check locations of the tokens.
353	llvm::Annotations Code(R"cpp(
354	$r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
355	)cpp");
356	recordTokens(Code: Code.code());
357	// Check expanded tokens.
358	EXPECT_THAT(
359	Buffer.expandedTokens(),
360	ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
361	AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
362	AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
363	AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
364	AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
365	Kind(tok::eof)));
366	// Check spelled tokens.
367	EXPECT_THAT(
368	Buffer.spelledTokens(SourceMgr ->getMainFileID()),
369	ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
370	AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
371	AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
372	AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
373	AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
374
375	auto StartLoc = SourceMgr ->getLocForStartOfFile(FID: SourceMgr ->getMainFileID());
376	for (auto &R : Code.ranges()) {
377	EXPECT_THAT(
378	Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(R.Begin)),
379	Pointee(RangeIs(R)));
380	}
381	}
382
383	TEST_F(TokenCollectorTest, LocationInMiddleOfSpelledToken) {
384	llvm::Annotations Code(R"cpp(
385	int foo = [[baa^aar]];
386	)cpp");
387	recordTokens(Code: Code.code());
388	// Check spelled tokens.
389	auto StartLoc = SourceMgr ->getLocForStartOfFile(FID: SourceMgr ->getMainFileID());
390	EXPECT_THAT(
391	Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(Code.point())),
392	Pointee(RangeIs(Code.range())));
393	}
394
395	TEST_F(TokenCollectorTest, MacroDirectives) {
396	// Macro directives are not stored anywhere at the moment.
397	std::string Code = R"cpp(
398	#define FOO a
399	#include "unresolved_file.h"
400	#undef FOO
401	#ifdef X
402	#else
403	#endif
404	#ifndef Y
405	#endif
406	#if 1
407	#elif 2
408	#else
409	#endif
410	#pragma once
411	#pragma something lalala
412
413	int a;
414	)cpp";
415	std::string Expected =
416	"expanded tokens:\n"
417	" int a ;\n"
418	"file './input.cpp'\n"
419	" spelled tokens:\n"
420	" # define FOO a # include \"unresolved_file.h\" # undef FOO "
421	"# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
422	"# endif # pragma once # pragma something lalala int a ;\n"
423	" mappings:\n"
424	" ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
425	EXPECT_EQ(collectAndDump(Code), Expected);
426	}
427
428	TEST_F(TokenCollectorTest, MacroReplacements) {
429	std::pair</Input/ std::string, /Expected/ std::string> TestCases[] = {
430	// A simple object-like macro.
431	{R"cpp(
432	#define INT int const
433	INT a;
434	)cpp",
435	R"(expanded tokens:
436	int const a ;
437	file './input.cpp'
438	spelled tokens:
439	# define INT int const INT a ;
440	mappings:
441	['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
442	['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
443	)"},
444	// A simple function-like macro.
445	{R"cpp(
446	#define INT(a) const int
447	INT(10+10) a;
448	)cpp",
449	R"(expanded tokens:
450	const int a ;
451	file './input.cpp'
452	spelled tokens:
453	# define INT ( a ) const int INT ( 10 + 10 ) a ;
454	mappings:
455	['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
456	['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
457	)"},
458	// Recursive macro replacements.
459	{R"cpp(
460	#define ID(X) X
461	#define INT int const
462	ID(ID(INT)) a;
463	)cpp",
464	R"(expanded tokens:
465	int const a ;
466	file './input.cpp'
467	spelled tokens:
468	# define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
469	mappings:
470	['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
471	['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
472	)"},
473	// A little more complicated recursive macro replacements.
474	{R"cpp(
475	#define ADD(X, Y) X+Y
476	#define MULT(X, Y) X*Y
477
478	int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
479	)cpp",
480	"expanded tokens:\n"
481	" int a = 1 * 2 + 3 * 4 + 5 ;\n"
482	"file './input.cpp'\n"
483	" spelled tokens:\n"
484	" # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
485	"a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
486	" mappings:\n"
487	" ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
488	" ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
489	// Empty macro replacement.
490	// FIXME: the #define directives should not be glued together.
491	{R"cpp(
492	#define EMPTY
493	#define EMPTY_FUNC(X)
494	EMPTY
495	EMPTY_FUNC(1+2+3)
496	)cpp",
497	R"(expanded tokens:
498	<empty>
499	file './input.cpp'
500	spelled tokens:
501	# define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
502	mappings:
503	['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
504	['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
505	['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
506	)"},
507	// File ends with a macro replacement.
508	{R"cpp(
509	#define FOO 10+10;
510	int a = FOO
511	)cpp",
512	R"(expanded tokens:
513	int a = 10 + 10 ;
514	file './input.cpp'
515	spelled tokens:
516	# define FOO 10 + 10 ; int a = FOO
517	mappings:
518	['#'_0, 'int'_7) => ['int'_0, 'int'_0)
519	['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
520	)"},
521	{R"cpp(
522	#define NUM 42
523	#define ID(a) a
524	#define M 1 + ID
525	M(NUM)
526	)cpp",
527	R"(expanded tokens:
528	1 + 42
529	file './input.cpp'
530	spelled tokens:
531	# define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
532	mappings:
533	['#'_0, 'M'_17) => ['1'_0, '1'_0)
534	['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
535	)"},
536	};
537
538	for (auto &Test : TestCases) {
539	std::string Dump = collectAndDump(Code: Test.first);
540	EXPECT_EQ(Test.second, Dump) << Dump;
541	}
542	}
543
544	TEST_F(TokenCollectorTest, SpecialTokens) {
545	// Tokens coming from concatenations.
546	recordTokens(Code: R"cpp(
547	#define CONCAT(a, b) a ## b
548	int a = CONCAT(1, 2);
549	)cpp");
550	EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
551	Contains(HasText("12")));
552	// Multi-line tokens with slashes at the end.
553	recordTokens(Code: "i\\\nn\\\nt");
554	EXPECT_THAT(Buffer.expandedTokens(),
555	ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
556	Kind(tok::eof)));
557	// FIXME: test tokens with digraphs and UCN identifiers.
558	}
559
560	TEST_F(TokenCollectorTest, LateBoundTokens) {
561	// The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
562	// but we choose to record them as a single token (for now).
563	llvm::Annotations Code(R"cpp(
564	template <class T>
565	struct foo { int a; };
566	int bar = foo<foo<int$br[[>>]]().a;
567	int baz = 10 $op[[>>]] 2;
568	)cpp");
569	recordTokens(Code: Code.code());
570	EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
571	AllOf(Contains(AllOf(Kind(tok::greatergreater),
572	RangeIs(Code.range("br")))),
573	Contains(AllOf(Kind(tok::greatergreater),
574	RangeIs(Code.range("op"))))));
575	}
576
577	TEST_F(TokenCollectorTest, DelayedParsing) {
578	llvm::StringLiteral Code = R"cpp(
579	struct Foo {
580	int method() {
581	// Parser will visit method bodies and initializers multiple times, but
582	// TokenBuffer should only record the first walk over the tokens;
583	return 100;
584	}
585	int a = 10;
586
587	struct Subclass {
588	void foo() {
589	Foo().method();
590	}
591	};
592	};
593	)cpp";
594	std::string ExpectedTokens =
595	"expanded tokens:\n"
596	" struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
597	"Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
598	EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
599	}
600
601	TEST_F(TokenCollectorTest, MultiFile) {
602	addFile(Path: "./foo.h", Contents: R"cpp(
603	#define ADD(X, Y) X+Y
604	int a = 100;
605	#include "bar.h"
606	)cpp");
607	addFile(Path: "./bar.h", Contents: R"cpp(
608	int b = ADD(1, 2);
609	#define MULT(X, Y) X*Y
610	)cpp");
611	llvm::StringLiteral Code = R"cpp(
612	#include "foo.h"
613	int c = ADD(1, MULT(2,3));
614	)cpp";
615
616	std::string Expected = R"(expanded tokens:
617	int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
618	file './input.cpp'
619	spelled tokens:
620	# include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
621	mappings:
622	['#'_0, 'int'_3) => ['int'_12, 'int'_12)
623	['ADD'_6, ';'_17) => ['1'_15, ';'_20)
624	file './foo.h'
625	spelled tokens:
626	# define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
627	mappings:
628	['#'_0, 'int'_11) => ['int'_0, 'int'_0)
629	['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
630	file './bar.h'
631	spelled tokens:
632	int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
633	mappings:
634	['ADD'_3, ';'_9) => ['1'_8, ';'_11)
635	['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
636	)";
637
638	EXPECT_EQ(Expected, collectAndDump(Code))
639	<< "input: " << Code << "\nresults: " << collectAndDump(Code);
640	}
641
642	class TokenBufferTest : public TokenCollectorTest {};
643
644	TEST_F(TokenBufferTest, SpelledByExpanded) {
645	recordTokens(Code: R"cpp(
646	a1 a2 a3 b1 b2
647	)cpp");
648
649	// Expanded and spelled tokens are stored separately.
650	EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
651	// Searching for subranges of expanded tokens should give the corresponding
652	// spelled ones.
653	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
654	ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
655	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
656	ValueIs(SameRange(findSpelled("a1 a2 a3"))));
657	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
658	ValueIs(SameRange(findSpelled("b1 b2"))));
659
660	// Test search on simple macro expansions.
661	recordTokens(Code: R"cpp(
662	#define A a1 a2 a3
663	#define B b1 b2
664
665	A split B
666	)cpp");
667	// Ranges going across expansion boundaries.
668	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
669	ValueIs(SameRange(findSpelled("A split B"))));
670	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
671	ValueIs(SameRange(findSpelled("A split").drop_back())));
672	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
673	ValueIs(SameRange(findSpelled("split B").drop_front())));
674	// Ranges not fully covering macro invocations should fail.
675	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
676	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), std::nullopt);
677	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
678	std::nullopt);
679
680	// Recursive macro invocations.
681	recordTokens(Code: R"cpp(
682	#define ID(x) x
683	#define B b1 b2
684
685	ID(ID(ID(a1) a2 a3)) split ID(B)
686	)cpp");
687
688	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
689	ValueIs(SameRange(findSpelled("( B").drop_front())));
690	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
691	ValueIs(SameRange(findSpelled(
692	"ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
693	// Mixed ranges with expanded and spelled tokens.
694	EXPECT_THAT(
695	Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
696	ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
697	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
698	ValueIs(SameRange(findSpelled("split ID ( B )"))));
699	// Macro arguments
700	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
701	ValueIs(SameRange(findSpelled("a1"))));
702	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
703	ValueIs(SameRange(findSpelled("a2"))));
704	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
705	ValueIs(SameRange(findSpelled("a3"))));
706	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
707	ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
708	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
709	ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
710
711	// Empty macro expansions.
712	recordTokens(Code: R"cpp(
713	#define EMPTY
714	#define ID(X) X
715
716	EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
717	EMPTY EMPTY ID(4 5 6) split2
718	ID(7 8 9) EMPTY EMPTY
719	)cpp");
720	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
721	ValueIs(SameRange(findSpelled("1 2 3"))));
722	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
723	ValueIs(SameRange(findSpelled("4 5 6"))));
724	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
725	ValueIs(SameRange(findSpelled("7 8 9"))));
726
727	// Empty mappings coming from various directives.
728	recordTokens(Code: R"cpp(
729	#define ID(X) X
730	ID(1)
731	#pragma lalala
732	not_mapped
733	)cpp");
734	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
735	ValueIs(SameRange(findSpelled("not_mapped"))));
736
737	// Multiple macro arguments
738	recordTokens(Code: R"cpp(
739	#define ID(X) X
740	#define ID2(X, Y) X Y
741
742	ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
743	)cpp");
744	// Should fail, spans multiple arguments.
745	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
746	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
747	ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
748	EXPECT_THAT(
749	Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
750	ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
751	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
752	ValueIs(SameRange(findSpelled("a5 a6"))));
753	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
754	ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
755	// Should fail, spans multiple invocations.
756	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")),
757	std::nullopt);
758
759	// https://github.com/clangd/clangd/issues/1289
760	recordTokens(Code: R"cpp(
761	#define FOO(X) foo(X)
762	#define INDIRECT FOO(y)
763	INDIRECT // expands to foo(y)
764	)cpp");
765	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), std::nullopt);
766
767	recordTokens(Code: R"cpp(
768	#define FOO(X) a X b
769	FOO(y)
770	)cpp");
771	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y")),
772	ValueIs(SameRange(findSpelled("y"))));
773
774	recordTokens(Code: R"cpp(
775	#define ID(X) X
776	#define BAR ID(1)
777	BAR
778	)cpp");
779	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1")),
780	ValueIs(SameRange(findSpelled(") BAR").drop_front())));
781
782	// Critical cases for mapping of Prev/Next in spelledForExpandedSlow.
783	recordTokens(Code: R"cpp(
784	#define ID(X) X
785	ID(prev good)
786	ID(prev ID(good2))
787	#define LARGE ID(prev ID(bad))
788	LARGE
789	)cpp");
790	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
791	ValueIs(SameRange(findSpelled("good"))));
792	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
793	ValueIs(SameRange(findSpelled("good2"))));
794	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
795
796	recordTokens(Code: R"cpp(
797	#define PREV prev
798	#define ID(X) X
799	PREV ID(good)
800	#define LARGE PREV ID(bad)
801	LARGE
802	)cpp");
803	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
804	ValueIs(SameRange(findSpelled("good"))));
805	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
806
807	recordTokens(Code: R"cpp(
808	#define ID(X) X
809	#define ID2(X, Y) X Y
810	ID2(prev, good)
811	ID2(prev, ID(good2))
812	#define LARGE ID2(prev, bad)
813	LARGE
814	)cpp");
815	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
816	ValueIs(SameRange(findSpelled("good"))));
817	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
818	ValueIs(SameRange(findSpelled("good2"))));
819	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
820
821	// Prev from macro body.
822	recordTokens(Code: R"cpp(
823	#define ID(X) X
824	#define ID2(X, Y) X prev ID(Y)
825	ID2(not_prev, good)
826	)cpp");
827	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
828	ValueIs(SameRange(findSpelled("good"))));
829	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good")), std::nullopt);
830	}
831
832	TEST_F(TokenBufferTest, NoCrashForEofToken) {
833	recordTokens(Code: R"cpp(
834	int main() {
835	)cpp");
836	ASSERT_TRUE(!Buffer.expandedTokens().empty());
837	ASSERT_EQ(Buffer.expandedTokens().back().kind(), tok::eof);
838	// Expanded range including `eof` is handled gracefully (`eof` is ignored).
839	EXPECT_THAT(
840	Buffer.spelledForExpanded(Buffer.expandedTokens()),
841	ValueIs(SameRange(Buffer.spelledTokens(SourceMgr ->getMainFileID()))));
842	}
843
844	TEST_F(TokenBufferTest, ExpandedTokensForRange) {
845	recordTokens(Code: R"cpp(
846	#define SIGN(X) X##_washere
847	A SIGN(B) C SIGN(D) E SIGN(F) G
848	)cpp");
849
850	SourceRange R(findExpanded(Query: "C").front().location(),
851	findExpanded(Query: "F_washere").front().location());
852	// Expanded and spelled tokens are stored separately.
853	EXPECT_THAT(Buffer.expandedTokens(R),
854	SameRange(findExpanded("C D_washere E F_washere")));
855	EXPECT_THAT(Buffer.expandedTokens(SourceRange ()), testing::IsEmpty());
856	}
857
858	TEST_F(TokenBufferTest, ExpansionsOverlapping) {
859	// Object-like macro expansions.
860	recordTokens(Code: R"cpp(
861	#define FOO 3+4
862	int a = FOO 1;
863	int b = FOO 2;
864	)cpp");
865
866	llvm::ArrayRef<syntax::Token> Foo1 = findSpelled(Query: "FOO 1");
867	EXPECT_THAT(
868	Buffer.expansionStartingAt(Foo1.data()),
869	ValueIs(IsExpansion(SameRange(Foo1.drop_back()),
870	SameRange(findExpanded("3 + 4 1").drop_back()))));
871	EXPECT_THAT(
872	Buffer.expansionsOverlapping(Foo1),
873	ElementsAre(IsExpansion(SameRange(Foo1.drop_back()),
874	SameRange(findExpanded("3 + 4 1").drop_back()))));
875
876	llvm::ArrayRef<syntax::Token> Foo2 = findSpelled(Query: "FOO 2");
877	EXPECT_THAT(
878	Buffer.expansionStartingAt(Foo2.data()),
879	ValueIs(IsExpansion(SameRange(Foo2.drop_back()),
880	SameRange(findExpanded("3 + 4 2").drop_back()))));
881	EXPECT_THAT(
882	Buffer.expansionsOverlapping(llvm::ArrayRef(Foo1.begin(), Foo2.end())),
883	ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _),
884	IsExpansion(SameRange(Foo2.drop_back()), _)));
885
886	// Function-like macro expansions.
887	recordTokens(Code: R"cpp(
888	#define ID(X) X
889	int a = ID(1+2+3);
890	int b = ID(ID(2+3+4));
891	)cpp");
892
893	llvm::ArrayRef<syntax::Token> ID1 = findSpelled(Query: "ID ( 1 + 2 + 3 )");
894	EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
895	ValueIs(IsExpansion(SameRange(ID1),
896	SameRange(findExpanded("1 + 2 + 3")))));
897	// Only the first spelled token should be found.
898	for (const auto &T : ID1.drop_front())
899	EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
900
901	llvm::ArrayRef<syntax::Token> ID2 = findSpelled(Query: "ID ( ID ( 2 + 3 + 4 ) )");
902	EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
903	ValueIs(IsExpansion(SameRange(ID2),
904	SameRange(findExpanded("2 + 3 + 4")))));
905	// Only the first spelled token should be found.
906	for (const auto &T : ID2.drop_front())
907	EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
908
909	EXPECT_THAT(Buffer.expansionsOverlapping(llvm::ArrayRef(
910	findSpelled("1 + 2").data(), findSpelled("4").data())),
911	ElementsAre(IsExpansion(SameRange(ID1), _),
912	IsExpansion(SameRange(ID2), _)));
913
914	// PP directives.
915	recordTokens(Code: R"cpp(
916	#define FOO 1
917	int a = FOO;
918	#pragma once
919	int b = 1;
920	)cpp");
921
922	llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled(Query: "# define FOO 1");
923	EXPECT_THAT(
924	Buffer.expansionStartingAt(&DefineFoo.front()),
925	ValueIs(IsExpansion(SameRange(DefineFoo),
926	SameRange(findExpanded("int a").take_front(`0`)))));
927	// Only the first spelled token should be found.
928	for (const auto &T : DefineFoo.drop_front())
929	EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
930
931	llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled(Query: "# pragma once");
932	EXPECT_THAT(
933	Buffer.expansionStartingAt(&PragmaOnce.front()),
934	ValueIs(IsExpansion(SameRange(PragmaOnce),
935	SameRange(findExpanded("int b").take_front(`0`)))));
936	// Only the first spelled token should be found.
937	for (const auto &T : PragmaOnce.drop_front())
938	EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
939
940	EXPECT_THAT(
941	Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")),
942	ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _),
943	IsExpansion(SameRange(PragmaOnce), _)));
944	}
945
946	TEST_F(TokenBufferTest, TokensToFileRange) {
947	addFile(Path: "./foo.h", Contents: "token_from_header");
948	llvm::Annotations Code(R"cpp(
949	#define FOO token_from_expansion
950	#include "./foo.h"
951	$all[[$i[[int]] a = FOO;]]
952	)cpp");
953	recordTokens(Code: Code.code());
954
955	auto &SM = *SourceMgr;
956
957	// Two simple examples.
958	auto Int = findExpanded(Query: "int").front();
959	auto Semi = findExpanded(Query: ";").front();
960	EXPECT_EQ(Int.range(SM), FileRange (SM.getMainFileID(), Code.range("i").Begin,
961	Code.range("i").End));
962	EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
963	FileRange (SM.getMainFileID(), Code.range("all").Begin,
964	Code.range("all").End));
965	// We don't test assertion failures because death tests are slow.
966	}
967
968	TEST_F(TokenBufferTest, MacroExpansions) {
969	llvm::Annotations Code(R"cpp(
970	#define FOO B
971	#define FOO2 BA
972	#define CALL(X) int X
973	#define G CALL(FOO2)
974	int B;
975	$macro[[FOO]];
976	$macro[[CALL]](A);
977	$macro[[G]];
978	)cpp");
979	recordTokens(Code: Code.code());
980	auto &SM = *SourceMgr;
981	auto Expansions = Buffer.macroExpansions(FID: SM.getMainFileID());
982	std::vector<FileRange> ExpectedMacroRanges;
983	for (auto Range : Code.ranges(Name: "macro"))
984	ExpectedMacroRanges.push_back(
985	x: FileRange (SM.getMainFileID(), Range.Begin, Range.End));
986	std::vector<FileRange> ActualMacroRanges;
987	for (auto Expansion : Expansions)
988	ActualMacroRanges.push_back(x: Expansion->range(SM));
989	EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
990	}
991
992	TEST_F(TokenBufferTest, Touching) {
993	llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
994	recordTokens(Code: Code.code());
995
996	auto Touching = [&](int Index) {
997	SourceLocation Loc = SourceMgr ->getComposedLoc(FID: SourceMgr ->getMainFileID(),
998	Offset: Code.points()[Index]);
999	return spelledTokensTouching(Loc, Tokens: Buffer);
1000	};
1001	auto Identifier = [&](int Index) {
1002	SourceLocation Loc = SourceMgr ->getComposedLoc(FID: SourceMgr ->getMainFileID(),
1003	Offset: Code.points()[Index]);
1004	const syntax::Token *Tok = spelledIdentifierTouching(Loc, Tokens: Buffer);
1005	return Tok ? Tok->text(SM: *SourceMgr) : "";
1006	};
1007
1008	EXPECT_THAT(Touching(`0`), SameRange(findSpelled("int")));
1009	EXPECT_EQ(Identifier(`0`), "");
1010	EXPECT_THAT(Touching(`1`), SameRange(findSpelled("int")));
1011	EXPECT_EQ(Identifier(`1`), "");
1012	EXPECT_THAT(Touching(`2`), SameRange(findSpelled("int")));
1013	EXPECT_EQ(Identifier(`2`), "");
1014
1015	EXPECT_THAT(Touching(`3`), SameRange(findSpelled("ab")));
1016	EXPECT_EQ(Identifier(`3`), "ab");
1017	EXPECT_THAT(Touching(`4`), SameRange(findSpelled("ab")));
1018	EXPECT_EQ(Identifier(`4`), "ab");
1019
1020	EXPECT_THAT(Touching(`5`), SameRange(findSpelled("ab =")));
1021	EXPECT_EQ(Identifier(`5`), "ab");
1022
1023	EXPECT_THAT(Touching(`6`), SameRange(findSpelled("= 1")));
1024	EXPECT_EQ(Identifier(`6`), "");
1025
1026	EXPECT_THAT(Touching(`7`), SameRange(findSpelled(";")));
1027	EXPECT_EQ(Identifier(`7`), "");
1028
1029	ASSERT_EQ(Code.points().size(), `8u`);
1030	}
1031
1032	TEST_F(TokenBufferTest, ExpandedBySpelled) {
1033	recordTokens(Code: R"cpp(
1034	a1 a2 a3 b1 b2
1035	)cpp");
1036	// Expanded and spelled tokens are stored separately.
1037	EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
1038	// Searching for subranges of expanded tokens should give the corresponding
1039	// spelled ones.
1040	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
1041	ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
1042	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
1043	ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1044	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
1045	ElementsAre(SameRange(findExpanded("b1 b2"))));
1046
1047	// Test search on simple macro expansions.
1048	recordTokens(Code: R"cpp(
1049	#define A a1 a2 a3
1050	#define B b1 b2
1051
1052	A split B
1053	)cpp");
1054	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
1055	ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1056	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
1057	ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1058	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
1059	ElementsAre(SameRange(findExpanded("b1 b2"))));
1060
1061	// Ranges not fully covering macro expansions should fail.
1062	recordTokens(Code: R"cpp(
1063	#define ID(x) x
1064
1065	ID(a)
1066	)cpp");
1067	// Spelled don't cover entire mapping (missing ID token) -> empty result
1068	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
1069	// Spelled don't cover entire mapping (missing ) token) -> empty result
1070	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
1071
1072	// Recursive macro invocations.
1073	recordTokens(Code: R"cpp(
1074	#define ID(x) x
1075	#define B b1 b2
1076
1077	ID(ID(ID(a1) a2 a3)) split ID(B)
1078	)cpp");
1079
1080	EXPECT_THAT(
1081	Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
1082	ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1083	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
1084	ElementsAre(SameRange(findExpanded("b1 b2"))));
1085	EXPECT_THAT(Buffer.expandedForSpelled(
1086	findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
1087	ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1088	// FIXME: these should succeed, but we do not support macro arguments yet.
1089	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
1090	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
1091	IsEmpty());
1092
1093	// Empty macro expansions.
1094	recordTokens(Code: R"cpp(
1095	#define EMPTY
1096	#define ID(X) X
1097
1098	EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
1099	EMPTY EMPTY ID(4 5 6) split2
1100	ID(7 8 9) EMPTY EMPTY
1101	)cpp");
1102	// Covered by empty expansions on one of both of the sides.
1103	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
1104	ElementsAre(SameRange(findExpanded("1 2 3"))));
1105	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
1106	ElementsAre(SameRange(findExpanded("4 5 6"))));
1107	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
1108	ElementsAre(SameRange(findExpanded("7 8 9"))));
1109	// Including the empty macro expansions on the side.
1110	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
1111	ElementsAre(SameRange(findExpanded("1 2 3"))));
1112	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
1113	ElementsAre(SameRange(findExpanded("1 2 3"))));
1114	EXPECT_THAT(
1115	Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
1116	ElementsAre(SameRange(findExpanded("1 2 3"))));
1117
1118	// Empty mappings coming from various directives.
1119	recordTokens(Code: R"cpp(
1120	#define ID(X) X
1121	ID(1)
1122	#pragma lalala
1123	not_mapped
1124	)cpp");
1125	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
1126	IsEmpty());
1127	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
1128	IsEmpty());
1129
1130	// Empty macro expansion.
1131	recordTokens(Code: R"cpp(
1132	#define EMPTY
1133	EMPTY int a = 100;
1134	)cpp");
1135	EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
1136	IsEmpty());
1137	}
1138
1139	TEST_F(TokenCollectorTest, Pragmas) {
1140	// Tokens coming from concatenations.
1141	recordTokens(Code: R"cpp(
1142	void foo() {
1143	#pragma unroll 4
1144	for(int i=0;i<4;++i);
1145	}
1146	)cpp");
1147	}
1148	} // namespace
1149

source code of clang/unittests/Tooling/Syntax/TokensTest.cpp