1 | //===- TokensTest.cpp -----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Syntax/Tokens.h" |
10 | #include "clang/AST/ASTConsumer.h" |
11 | #include "clang/AST/Expr.h" |
12 | #include "clang/Basic/Diagnostic.h" |
13 | #include "clang/Basic/DiagnosticIDs.h" |
14 | #include "clang/Basic/DiagnosticOptions.h" |
15 | #include "clang/Basic/FileManager.h" |
16 | #include "clang/Basic/FileSystemOptions.h" |
17 | #include "clang/Basic/LLVM.h" |
18 | #include "clang/Basic/LangOptions.h" |
19 | #include "clang/Basic/SourceLocation.h" |
20 | #include "clang/Basic/SourceManager.h" |
21 | #include "clang/Basic/TokenKinds.def" |
22 | #include "clang/Basic/TokenKinds.h" |
23 | #include "clang/Frontend/CompilerInstance.h" |
24 | #include "clang/Frontend/FrontendAction.h" |
25 | #include "clang/Frontend/Utils.h" |
26 | #include "clang/Lex/Lexer.h" |
27 | #include "clang/Lex/PreprocessorOptions.h" |
28 | #include "clang/Lex/Token.h" |
29 | #include "clang/Tooling/Tooling.h" |
30 | #include "llvm/ADT/ArrayRef.h" |
31 | #include "llvm/ADT/IntrusiveRefCntPtr.h" |
32 | #include "llvm/ADT/STLExtras.h" |
33 | #include "llvm/ADT/StringRef.h" |
34 | #include "llvm/Support/FormatVariadic.h" |
35 | #include "llvm/Support/MemoryBuffer.h" |
36 | #include "llvm/Support/VirtualFileSystem.h" |
37 | #include "llvm/Support/raw_os_ostream.h" |
38 | #include "llvm/Support/raw_ostream.h" |
39 | #include "llvm/Testing/Annotations/Annotations.h" |
40 | #include "llvm/Testing/Support/SupportHelpers.h" |
41 | #include <cassert> |
42 | #include <cstdlib> |
43 | #include <gmock/gmock.h> |
44 | #include <gtest/gtest.h> |
45 | #include <memory> |
46 | #include <optional> |
47 | #include <ostream> |
48 | #include <string> |
49 | |
50 | using namespace clang; |
51 | using namespace clang::syntax; |
52 | |
53 | using llvm::ValueIs; |
54 | using ::testing::_; |
55 | using ::testing::AllOf; |
56 | using ::testing::Contains; |
57 | using ::testing::ElementsAre; |
58 | using ::testing::Field; |
59 | using ::testing::IsEmpty; |
60 | using ::testing::Matcher; |
61 | using ::testing::Not; |
62 | using ::testing::Pointee; |
63 | using ::testing::StartsWith; |
64 | |
65 | namespace { |
66 | // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the |
67 | // argument. |
68 | MATCHER_P(SameRange, A, "" ) { |
69 | return A.begin() == arg.begin() && A.end() == arg.end(); |
70 | } |
71 | |
72 | Matcher<TokenBuffer::Expansion> |
73 | IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, |
74 | Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { |
75 | return AllOf(matchers: Field(field: &TokenBuffer::Expansion::Spelled, matcher: Spelled), |
76 | matchers: Field(field: &TokenBuffer::Expansion::Expanded, matcher: Expanded)); |
77 | } |
78 | // Matchers for syntax::Token. |
79 | MATCHER_P(Kind, K, "" ) { return arg.kind() == K; } |
80 | MATCHER_P2(HasText, Text, SourceMgr, "" ) { |
81 | return arg.text(*SourceMgr) == Text; |
82 | } |
83 | /// Checks the start and end location of a token are equal to SourceRng. |
84 | MATCHER_P(RangeIs, SourceRng, "" ) { |
85 | return arg.location() == SourceRng.first && |
86 | arg.endLocation() == SourceRng.second; |
87 | } |
88 | |
89 | class TokenCollectorTest : public ::testing::Test { |
90 | public: |
91 | /// Run the clang frontend, collect the preprocessed tokens from the frontend |
92 | /// invocation and store them in this->Buffer. |
93 | /// This also clears SourceManager before running the compiler. |
94 | void recordTokens(llvm::StringRef Code) { |
95 | class RecordTokens : public ASTFrontendAction { |
96 | public: |
97 | explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} |
98 | |
99 | bool BeginSourceFileAction(CompilerInstance &CI) override { |
100 | assert(!Collector && "expected only a single call to BeginSourceFile" ); |
101 | Collector.emplace(args&: CI.getPreprocessor()); |
102 | return true; |
103 | } |
104 | void EndSourceFileAction() override { |
105 | assert(Collector && "BeginSourceFileAction was never called" ); |
106 | Result = std::move(*Collector).consume(); |
107 | Result.indexExpandedTokens(); |
108 | } |
109 | |
110 | std::unique_ptr<ASTConsumer> |
111 | CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { |
112 | return std::make_unique<ASTConsumer>(); |
113 | } |
114 | |
115 | private: |
116 | TokenBuffer &Result; |
117 | std::optional<TokenCollector> Collector; |
118 | }; |
119 | |
120 | constexpr const char *FileName = "./input.cpp" ; |
121 | FS->addFile(Path: FileName, ModificationTime: time_t(), Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: "" )); |
122 | // Prepare to run a compiler. |
123 | if (!Diags->getClient()) |
124 | Diags->setClient(client: new IgnoringDiagConsumer); |
125 | std::vector<const char *> Args = {"tok-test" , "-std=c++03" , "-fsyntax-only" , |
126 | FileName}; |
127 | CreateInvocationOptions CIOpts; |
128 | CIOpts.Diags = Diags; |
129 | CIOpts.VFS = FS; |
130 | auto CI = createInvocation(Args, Opts: std::move(CIOpts)); |
131 | assert(CI); |
132 | CI->getFrontendOpts().DisableFree = false; |
133 | CI->getPreprocessorOpts().addRemappedFile( |
134 | From: FileName, To: llvm::MemoryBuffer::getMemBufferCopy(InputData: Code).release()); |
135 | CompilerInstance Compiler(std::move(CI)); |
136 | Compiler.setDiagnostics(Diags.get()); |
137 | Compiler.setFileManager(FileMgr.get()); |
138 | Compiler.setSourceManager(SourceMgr.get()); |
139 | |
140 | this->Buffer = TokenBuffer(*SourceMgr); |
141 | RecordTokens Recorder(this->Buffer); |
142 | ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) |
143 | << "failed to run the frontend" ; |
144 | } |
145 | |
146 | /// Record the tokens and return a test dump of the resulting buffer. |
147 | std::string collectAndDump(llvm::StringRef Code) { |
148 | recordTokens(Code); |
149 | return Buffer.dumpForTests(); |
150 | } |
151 | |
152 | // Adds a file to the test VFS. |
153 | void addFile(llvm::StringRef Path, llvm::StringRef Contents) { |
154 | if (!FS->addFile(Path, ModificationTime: time_t(), |
155 | Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: Contents))) { |
156 | ADD_FAILURE() << "could not add a file to VFS: " << Path; |
157 | } |
158 | } |
159 | |
160 | /// Add a new file, run syntax::tokenize() on the range if any, run it on the |
161 | /// whole file otherwise and return the results. |
162 | std::vector<syntax::Token> tokenize(llvm::StringRef Text) { |
163 | llvm::Annotations Annot(Text); |
164 | auto FID = SourceMgr->createFileID( |
165 | Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: Annot.code())); |
166 | // FIXME: pass proper LangOptions. |
167 | if (Annot.ranges().empty()) |
168 | return syntax::tokenize(FID, SM: *SourceMgr, LO: LangOptions()); |
169 | return syntax::tokenize( |
170 | FR: syntax::FileRange(FID, Annot.range().Begin, Annot.range().End), |
171 | SM: *SourceMgr, LO: LangOptions()); |
172 | } |
173 | |
174 | // Specialized versions of matchers that hide the SourceManager from clients. |
175 | Matcher<syntax::Token> HasText(std::string Text) const { |
176 | return ::HasText(gmock_p0: Text, gmock_p1: SourceMgr.get()); |
177 | } |
178 | Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { |
179 | std::pair<SourceLocation, SourceLocation> Ls; |
180 | Ls.first = SourceMgr->getLocForStartOfFile(FID: SourceMgr->getMainFileID()) |
181 | .getLocWithOffset(Offset: R.Begin); |
182 | Ls.second = SourceMgr->getLocForStartOfFile(FID: SourceMgr->getMainFileID()) |
183 | .getLocWithOffset(Offset: R.End); |
184 | return ::RangeIs(gmock_p0: Ls); |
185 | } |
186 | |
187 | /// Finds a subrange in O(n * m). |
188 | template <class T, class U, class Eq> |
189 | llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, |
190 | llvm::ArrayRef<T> Range, Eq F) { |
191 | assert(Subrange.size() >= 1); |
192 | if (Range.size() < Subrange.size()) |
193 | return llvm::ArrayRef(Range.end(), Range.end()); |
194 | for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size(); |
195 | Begin <= Last; ++Begin) { |
196 | auto It = Begin; |
197 | for (auto ItSub = Subrange.begin(); ItSub != Subrange.end(); |
198 | ++ItSub, ++It) { |
199 | if (!F(*ItSub, *It)) |
200 | goto continue_outer; |
201 | } |
202 | return llvm::ArrayRef(Begin, It); |
203 | continue_outer:; |
204 | } |
205 | return llvm::ArrayRef(Range.end(), Range.end()); |
206 | } |
207 | |
208 | /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. |
209 | /// The match should be unique. \p Query is a whitespace-separated list of |
210 | /// tokens to search for. |
211 | llvm::ArrayRef<syntax::Token> |
212 | findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { |
213 | llvm::SmallVector<llvm::StringRef, 8> QueryTokens; |
214 | Query.split(A&: QueryTokens, Separator: ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
215 | if (QueryTokens.empty()) { |
216 | ADD_FAILURE() << "will not look for an empty list of tokens" ; |
217 | std::abort(); |
218 | } |
219 | // An equality test for search. |
220 | auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { |
221 | return Q == T.text(SM: *SourceMgr); |
222 | }; |
223 | // Find a match. |
224 | auto Found = findSubrange(Subrange: llvm::ArrayRef(QueryTokens), Range: Tokens, F: TextMatches); |
225 | if (Found.begin() == Tokens.end()) { |
226 | ADD_FAILURE() << "could not find the subrange for " << Query; |
227 | std::abort(); |
228 | } |
229 | // Check that the match is unique. |
230 | if (findSubrange(Subrange: llvm::ArrayRef(QueryTokens), |
231 | Range: llvm::ArrayRef(Found.end(), Tokens.end()), F: TextMatches) |
232 | .begin() != Tokens.end()) { |
233 | ADD_FAILURE() << "match is not unique for " << Query; |
234 | std::abort(); |
235 | } |
236 | return Found; |
237 | }; |
238 | |
239 | // Specialized versions of findTokenRange for expanded and spelled tokens. |
240 | llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { |
241 | return findTokenRange(Query, Tokens: Buffer.expandedTokens()); |
242 | } |
243 | llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, |
244 | FileID File = FileID()) { |
245 | if (!File.isValid()) |
246 | File = SourceMgr->getMainFileID(); |
247 | return findTokenRange(Query, Tokens: Buffer.spelledTokens(FID: File)); |
248 | } |
249 | |
250 | // Data fields. |
251 | DiagnosticOptions DiagOpts; |
252 | llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = |
253 | new DiagnosticsEngine(new DiagnosticIDs, DiagOpts); |
254 | IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = |
255 | new llvm::vfs::InMemoryFileSystem; |
256 | llvm::IntrusiveRefCntPtr<FileManager> FileMgr = |
257 | new FileManager(FileSystemOptions(), FS); |
258 | llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = |
259 | new SourceManager(*Diags, *FileMgr); |
260 | /// Contains last result of calling recordTokens(). |
261 | TokenBuffer Buffer = TokenBuffer(*SourceMgr); |
262 | }; |
263 | |
264 | TEST_F(TokenCollectorTest, RawMode) { |
265 | EXPECT_THAT(tokenize("int main() {}" ), |
266 | ElementsAre(Kind(tok::kw_int), |
267 | AllOf(HasText("main" ), Kind(tok::identifier)), |
268 | Kind(tok::l_paren), Kind(tok::r_paren), |
269 | Kind(tok::l_brace), Kind(tok::r_brace))); |
270 | // Comments are ignored for now. |
271 | EXPECT_THAT(tokenize("/* foo */int a; // more comments" ), |
272 | ElementsAre(Kind(tok::kw_int), |
273 | AllOf(HasText("a" ), Kind(tok::identifier)), |
274 | Kind(tok::semi))); |
275 | EXPECT_THAT(tokenize("int [[main() {]]}" ), |
276 | ElementsAre(AllOf(HasText("main" ), Kind(tok::identifier)), |
277 | Kind(tok::l_paren), Kind(tok::r_paren), |
278 | Kind(tok::l_brace))); |
279 | EXPECT_THAT(tokenize("int [[main() { ]]}" ), |
280 | ElementsAre(AllOf(HasText("main" ), Kind(tok::identifier)), |
281 | Kind(tok::l_paren), Kind(tok::r_paren), |
282 | Kind(tok::l_brace))); |
283 | // First token is partially parsed, last token is fully included even though |
284 | // only a part of it is contained in the range. |
285 | EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}" ), |
286 | ElementsAre(AllOf(HasText("ain" ), Kind(tok::identifier)), |
287 | Kind(tok::l_paren), Kind(tok::r_paren), |
288 | Kind(tok::l_brace), Kind(tok::kw_return))); |
289 | } |
290 | |
291 | TEST_F(TokenCollectorTest, Basic) { |
292 | std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { |
293 | {"int main() {}" , |
294 | R"(expanded tokens: |
295 | int main ( ) { } |
296 | file './input.cpp' |
297 | spelled tokens: |
298 | int main ( ) { } |
299 | no mappings. |
300 | )" }, |
301 | // All kinds of whitespace are ignored. |
302 | {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n" , |
303 | R"(expanded tokens: |
304 | int main ( ) { } |
305 | file './input.cpp' |
306 | spelled tokens: |
307 | int main ( ) { } |
308 | no mappings. |
309 | )" }, |
310 | // Annotation tokens are ignored. |
311 | {R"cpp( |
312 | #pragma GCC visibility push (public) |
313 | #pragma GCC visibility pop |
314 | )cpp" , |
315 | R"(expanded tokens: |
316 | <empty> |
317 | file './input.cpp' |
318 | spelled tokens: |
319 | # pragma GCC visibility push ( public ) # pragma GCC visibility pop |
320 | mappings: |
321 | ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) |
322 | )" }, |
323 | // Empty files should not crash. |
324 | {R"cpp()cpp" , R"(expanded tokens: |
325 | <empty> |
326 | file './input.cpp' |
327 | spelled tokens: |
328 | <empty> |
329 | no mappings. |
330 | )" }, |
331 | // Should not crash on errors inside '#define' directives. Error is that |
332 | // stringification (#B) does not refer to a macro parameter. |
333 | { |
334 | R"cpp( |
335 | a |
336 | #define MACRO() A #B |
337 | )cpp" , |
338 | R"(expanded tokens: |
339 | a |
340 | file './input.cpp' |
341 | spelled tokens: |
342 | a # define MACRO ( ) A # B |
343 | mappings: |
344 | ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) |
345 | )" }}; |
346 | for (auto &Test : TestCases) |
347 | EXPECT_EQ(collectAndDump(Test.first), Test.second) |
348 | << collectAndDump(Code: Test.first); |
349 | } |
350 | |
351 | TEST_F(TokenCollectorTest, Locations) { |
352 | // Check locations of the tokens. |
353 | llvm::Annotations Code(R"cpp( |
354 | $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] |
355 | )cpp" ); |
356 | recordTokens(Code: Code.code()); |
357 | // Check expanded tokens. |
358 | EXPECT_THAT( |
359 | Buffer.expandedTokens(), |
360 | ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1" ))), |
361 | AllOf(Kind(tok::identifier), RangeIs(Code.range("r2" ))), |
362 | AllOf(Kind(tok::equal), RangeIs(Code.range("r3" ))), |
363 | AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4" ))), |
364 | AllOf(Kind(tok::semi), RangeIs(Code.range("r5" ))), |
365 | Kind(tok::eof))); |
366 | // Check spelled tokens. |
367 | EXPECT_THAT( |
368 | Buffer.spelledTokens(SourceMgr->getMainFileID()), |
369 | ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1" ))), |
370 | AllOf(Kind(tok::identifier), RangeIs(Code.range("r2" ))), |
371 | AllOf(Kind(tok::equal), RangeIs(Code.range("r3" ))), |
372 | AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4" ))), |
373 | AllOf(Kind(tok::semi), RangeIs(Code.range("r5" ))))); |
374 | |
375 | auto StartLoc = SourceMgr->getLocForStartOfFile(FID: SourceMgr->getMainFileID()); |
376 | for (auto &R : Code.ranges()) { |
377 | EXPECT_THAT( |
378 | Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(R.Begin)), |
379 | Pointee(RangeIs(R))); |
380 | } |
381 | } |
382 | |
383 | TEST_F(TokenCollectorTest, LocationInMiddleOfSpelledToken) { |
384 | llvm::Annotations Code(R"cpp( |
385 | int foo = [[baa^aar]]; |
386 | )cpp" ); |
387 | recordTokens(Code: Code.code()); |
388 | // Check spelled tokens. |
389 | auto StartLoc = SourceMgr->getLocForStartOfFile(FID: SourceMgr->getMainFileID()); |
390 | EXPECT_THAT( |
391 | Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(Code.point())), |
392 | Pointee(RangeIs(Code.range()))); |
393 | } |
394 | |
395 | TEST_F(TokenCollectorTest, MacroDirectives) { |
396 | // Macro directives are not stored anywhere at the moment. |
397 | std::string Code = R"cpp( |
398 | #define FOO a |
399 | #include "unresolved_file.h" |
400 | #undef FOO |
401 | #ifdef X |
402 | #else |
403 | #endif |
404 | #ifndef Y |
405 | #endif |
406 | #if 1 |
407 | #elif 2 |
408 | #else |
409 | #endif |
410 | #pragma once |
411 | #pragma something lalala |
412 | |
413 | int a; |
414 | )cpp" ; |
415 | std::string Expected = |
416 | "expanded tokens:\n" |
417 | " int a ;\n" |
418 | "file './input.cpp'\n" |
419 | " spelled tokens:\n" |
420 | " # define FOO a # include \"unresolved_file.h\" # undef FOO " |
421 | "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " |
422 | "# endif # pragma once # pragma something lalala int a ;\n" |
423 | " mappings:\n" |
424 | " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n" ; |
425 | EXPECT_EQ(collectAndDump(Code), Expected); |
426 | } |
427 | |
428 | TEST_F(TokenCollectorTest, MacroReplacements) { |
429 | std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { |
430 | // A simple object-like macro. |
431 | {R"cpp( |
432 | #define INT int const |
433 | INT a; |
434 | )cpp" , |
435 | R"(expanded tokens: |
436 | int const a ; |
437 | file './input.cpp' |
438 | spelled tokens: |
439 | # define INT int const INT a ; |
440 | mappings: |
441 | ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) |
442 | ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) |
443 | )" }, |
444 | // A simple function-like macro. |
445 | {R"cpp( |
446 | #define INT(a) const int |
447 | INT(10+10) a; |
448 | )cpp" , |
449 | R"(expanded tokens: |
450 | const int a ; |
451 | file './input.cpp' |
452 | spelled tokens: |
453 | # define INT ( a ) const int INT ( 10 + 10 ) a ; |
454 | mappings: |
455 | ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) |
456 | ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) |
457 | )" }, |
458 | // Recursive macro replacements. |
459 | {R"cpp( |
460 | #define ID(X) X |
461 | #define INT int const |
462 | ID(ID(INT)) a; |
463 | )cpp" , |
464 | R"(expanded tokens: |
465 | int const a ; |
466 | file './input.cpp' |
467 | spelled tokens: |
468 | # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; |
469 | mappings: |
470 | ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) |
471 | ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) |
472 | )" }, |
473 | // A little more complicated recursive macro replacements. |
474 | {R"cpp( |
475 | #define ADD(X, Y) X+Y |
476 | #define MULT(X, Y) X*Y |
477 | |
478 | int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); |
479 | )cpp" , |
480 | "expanded tokens:\n" |
481 | " int a = 1 * 2 + 3 * 4 + 5 ;\n" |
482 | "file './input.cpp'\n" |
483 | " spelled tokens:\n" |
484 | " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " |
485 | "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" |
486 | " mappings:\n" |
487 | " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" |
488 | " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n" }, |
489 | // Empty macro replacement. |
490 | // FIXME: the #define directives should not be glued together. |
491 | {R"cpp( |
492 | #define EMPTY |
493 | #define EMPTY_FUNC(X) |
494 | EMPTY |
495 | EMPTY_FUNC(1+2+3) |
496 | )cpp" , |
497 | R"(expanded tokens: |
498 | <empty> |
499 | file './input.cpp' |
500 | spelled tokens: |
501 | # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) |
502 | mappings: |
503 | ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) |
504 | ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) |
505 | ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) |
506 | )" }, |
507 | // File ends with a macro replacement. |
508 | {R"cpp( |
509 | #define FOO 10+10; |
510 | int a = FOO |
511 | )cpp" , |
512 | R"(expanded tokens: |
513 | int a = 10 + 10 ; |
514 | file './input.cpp' |
515 | spelled tokens: |
516 | # define FOO 10 + 10 ; int a = FOO |
517 | mappings: |
518 | ['#'_0, 'int'_7) => ['int'_0, 'int'_0) |
519 | ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) |
520 | )" }, |
521 | {R"cpp( |
522 | #define NUM 42 |
523 | #define ID(a) a |
524 | #define M 1 + ID |
525 | M(NUM) |
526 | )cpp" , |
527 | R"(expanded tokens: |
528 | 1 + 42 |
529 | file './input.cpp' |
530 | spelled tokens: |
531 | # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM ) |
532 | mappings: |
533 | ['#'_0, 'M'_17) => ['1'_0, '1'_0) |
534 | ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3) |
535 | )" }, |
536 | }; |
537 | |
538 | for (auto &Test : TestCases) { |
539 | std::string Dump = collectAndDump(Code: Test.first); |
540 | EXPECT_EQ(Test.second, Dump) << Dump; |
541 | } |
542 | } |
543 | |
544 | TEST_F(TokenCollectorTest, SpecialTokens) { |
545 | // Tokens coming from concatenations. |
546 | recordTokens(Code: R"cpp( |
547 | #define CONCAT(a, b) a ## b |
548 | int a = CONCAT(1, 2); |
549 | )cpp" ); |
550 | EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), |
551 | Contains(HasText("12" ))); |
552 | // Multi-line tokens with slashes at the end. |
553 | recordTokens(Code: "i\\\nn\\\nt" ); |
554 | EXPECT_THAT(Buffer.expandedTokens(), |
555 | ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt" )), |
556 | Kind(tok::eof))); |
557 | // FIXME: test tokens with digraphs and UCN identifiers. |
558 | } |
559 | |
560 | TEST_F(TokenCollectorTest, LateBoundTokens) { |
561 | // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), |
562 | // but we choose to record them as a single token (for now). |
563 | llvm::Annotations Code(R"cpp( |
564 | template <class T> |
565 | struct foo { int a; }; |
566 | int bar = foo<foo<int$br[[>>]]().a; |
567 | int baz = 10 $op[[>>]] 2; |
568 | )cpp" ); |
569 | recordTokens(Code: Code.code()); |
570 | EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), |
571 | AllOf(Contains(AllOf(Kind(tok::greatergreater), |
572 | RangeIs(Code.range("br" )))), |
573 | Contains(AllOf(Kind(tok::greatergreater), |
574 | RangeIs(Code.range("op" )))))); |
575 | } |
576 | |
577 | TEST_F(TokenCollectorTest, DelayedParsing) { |
578 | llvm::StringLiteral Code = R"cpp( |
579 | struct Foo { |
580 | int method() { |
581 | // Parser will visit method bodies and initializers multiple times, but |
582 | // TokenBuffer should only record the first walk over the tokens; |
583 | return 100; |
584 | } |
585 | int a = 10; |
586 | |
587 | struct Subclass { |
588 | void foo() { |
589 | Foo().method(); |
590 | } |
591 | }; |
592 | }; |
593 | )cpp" ; |
594 | std::string ExpectedTokens = |
595 | "expanded tokens:\n" |
596 | " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " |
597 | "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n" ; |
598 | EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); |
599 | } |
600 | |
601 | TEST_F(TokenCollectorTest, MultiFile) { |
602 | addFile(Path: "./foo.h" , Contents: R"cpp( |
603 | #define ADD(X, Y) X+Y |
604 | int a = 100; |
605 | #include "bar.h" |
606 | )cpp" ); |
607 | addFile(Path: "./bar.h" , Contents: R"cpp( |
608 | int b = ADD(1, 2); |
609 | #define MULT(X, Y) X*Y |
610 | )cpp" ); |
611 | llvm::StringLiteral Code = R"cpp( |
612 | #include "foo.h" |
613 | int c = ADD(1, MULT(2,3)); |
614 | )cpp" ; |
615 | |
616 | std::string Expected = R"(expanded tokens: |
617 | int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; |
618 | file './input.cpp' |
619 | spelled tokens: |
620 | # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; |
621 | mappings: |
622 | ['#'_0, 'int'_3) => ['int'_12, 'int'_12) |
623 | ['ADD'_6, ';'_17) => ['1'_15, ';'_20) |
624 | file './foo.h' |
625 | spelled tokens: |
626 | # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" |
627 | mappings: |
628 | ['#'_0, 'int'_11) => ['int'_0, 'int'_0) |
629 | ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) |
630 | file './bar.h' |
631 | spelled tokens: |
632 | int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y |
633 | mappings: |
634 | ['ADD'_3, ';'_9) => ['1'_8, ';'_11) |
635 | ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) |
636 | )" ; |
637 | |
638 | EXPECT_EQ(Expected, collectAndDump(Code)) |
639 | << "input: " << Code << "\nresults: " << collectAndDump(Code); |
640 | } |
641 | |
642 | class TokenBufferTest : public TokenCollectorTest {}; |
643 | |
644 | TEST_F(TokenBufferTest, SpelledByExpanded) { |
645 | recordTokens(Code: R"cpp( |
646 | a1 a2 a3 b1 b2 |
647 | )cpp" ); |
648 | |
649 | // Expanded and spelled tokens are stored separately. |
650 | EXPECT_THAT(findExpanded("a1 a2" ), Not(SameRange(findSpelled("a1 a2" )))); |
651 | // Searching for subranges of expanded tokens should give the corresponding |
652 | // spelled ones. |
653 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2" )), |
654 | ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2" )))); |
655 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3" )), |
656 | ValueIs(SameRange(findSpelled("a1 a2 a3" )))); |
657 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2" )), |
658 | ValueIs(SameRange(findSpelled("b1 b2" )))); |
659 | |
660 | // Test search on simple macro expansions. |
661 | recordTokens(Code: R"cpp( |
662 | #define A a1 a2 a3 |
663 | #define B b1 b2 |
664 | |
665 | A split B |
666 | )cpp" ); |
667 | // Ranges going across expansion boundaries. |
668 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2" )), |
669 | ValueIs(SameRange(findSpelled("A split B" )))); |
670 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3" )), |
671 | ValueIs(SameRange(findSpelled("A split" ).drop_back()))); |
672 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2" )), |
673 | ValueIs(SameRange(findSpelled("split B" ).drop_front()))); |
674 | // Ranges not fully covering macro invocations should fail. |
675 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2" )), std::nullopt); |
676 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2" )), std::nullopt); |
677 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2" )), |
678 | std::nullopt); |
679 | |
680 | // Recursive macro invocations. |
681 | recordTokens(Code: R"cpp( |
682 | #define ID(x) x |
683 | #define B b1 b2 |
684 | |
685 | ID(ID(ID(a1) a2 a3)) split ID(B) |
686 | )cpp" ); |
687 | |
688 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2" )), |
689 | ValueIs(SameRange(findSpelled("( B" ).drop_front()))); |
690 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2" )), |
691 | ValueIs(SameRange(findSpelled( |
692 | "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )" )))); |
693 | // Mixed ranges with expanded and spelled tokens. |
694 | EXPECT_THAT( |
695 | Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split" )), |
696 | ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split" )))); |
697 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2" )), |
698 | ValueIs(SameRange(findSpelled("split ID ( B )" )))); |
699 | // Macro arguments |
700 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1" )), |
701 | ValueIs(SameRange(findSpelled("a1" )))); |
702 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2" )), |
703 | ValueIs(SameRange(findSpelled("a2" )))); |
704 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3" )), |
705 | ValueIs(SameRange(findSpelled("a3" )))); |
706 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2" )), |
707 | ValueIs(SameRange(findSpelled("ID ( a1 ) a2" )))); |
708 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3" )), |
709 | ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3" )))); |
710 | |
711 | // Empty macro expansions. |
712 | recordTokens(Code: R"cpp( |
713 | #define EMPTY |
714 | #define ID(X) X |
715 | |
716 | EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 |
717 | EMPTY EMPTY ID(4 5 6) split2 |
718 | ID(7 8 9) EMPTY EMPTY |
719 | )cpp" ); |
720 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3" )), |
721 | ValueIs(SameRange(findSpelled("1 2 3" )))); |
722 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6" )), |
723 | ValueIs(SameRange(findSpelled("4 5 6" )))); |
724 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9" )), |
725 | ValueIs(SameRange(findSpelled("7 8 9" )))); |
726 | |
727 | // Empty mappings coming from various directives. |
728 | recordTokens(Code: R"cpp( |
729 | #define ID(X) X |
730 | ID(1) |
731 | #pragma lalala |
732 | not_mapped |
733 | )cpp" ); |
734 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped" )), |
735 | ValueIs(SameRange(findSpelled("not_mapped" )))); |
736 | |
737 | // Multiple macro arguments |
738 | recordTokens(Code: R"cpp( |
739 | #define ID(X) X |
740 | #define ID2(X, Y) X Y |
741 | |
742 | ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7) |
743 | )cpp" ); |
744 | // Should fail, spans multiple arguments. |
745 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2" )), std::nullopt); |
746 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3" )), |
747 | ValueIs(SameRange(findSpelled("ID ( a2 ) a3" )))); |
748 | EXPECT_THAT( |
749 | Buffer.spelledForExpanded(findExpanded("a1 a2 a3" )), |
750 | ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )" )))); |
751 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6" )), |
752 | ValueIs(SameRange(findSpelled("a5 a6" )))); |
753 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7" )), |
754 | ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )" )))); |
755 | // Should fail, spans multiple invocations. |
756 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4" )), |
757 | std::nullopt); |
758 | |
759 | // https://github.com/clangd/clangd/issues/1289 |
760 | recordTokens(Code: R"cpp( |
761 | #define FOO(X) foo(X) |
762 | #define INDIRECT FOO(y) |
763 | INDIRECT // expands to foo(y) |
764 | )cpp" ); |
765 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y" )), std::nullopt); |
766 | |
767 | recordTokens(Code: R"cpp( |
768 | #define FOO(X) a X b |
769 | FOO(y) |
770 | )cpp" ); |
771 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y" )), |
772 | ValueIs(SameRange(findSpelled("y" )))); |
773 | |
774 | recordTokens(Code: R"cpp( |
775 | #define ID(X) X |
776 | #define BAR ID(1) |
777 | BAR |
778 | )cpp" ); |
779 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1" )), |
780 | ValueIs(SameRange(findSpelled(") BAR" ).drop_front()))); |
781 | |
782 | // Critical cases for mapping of Prev/Next in spelledForExpandedSlow. |
783 | recordTokens(Code: R"cpp( |
784 | #define ID(X) X |
785 | ID(prev good) |
786 | ID(prev ID(good2)) |
787 | #define LARGE ID(prev ID(bad)) |
788 | LARGE |
789 | )cpp" ); |
790 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good" )), |
791 | ValueIs(SameRange(findSpelled("good" )))); |
792 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2" )), |
793 | ValueIs(SameRange(findSpelled("good2" )))); |
794 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad" )), std::nullopt); |
795 | |
796 | recordTokens(Code: R"cpp( |
797 | #define PREV prev |
798 | #define ID(X) X |
799 | PREV ID(good) |
800 | #define LARGE PREV ID(bad) |
801 | LARGE |
802 | )cpp" ); |
803 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good" )), |
804 | ValueIs(SameRange(findSpelled("good" )))); |
805 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad" )), std::nullopt); |
806 | |
807 | recordTokens(Code: R"cpp( |
808 | #define ID(X) X |
809 | #define ID2(X, Y) X Y |
810 | ID2(prev, good) |
811 | ID2(prev, ID(good2)) |
812 | #define LARGE ID2(prev, bad) |
813 | LARGE |
814 | )cpp" ); |
815 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good" )), |
816 | ValueIs(SameRange(findSpelled("good" )))); |
817 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2" )), |
818 | ValueIs(SameRange(findSpelled("good2" )))); |
819 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad" )), std::nullopt); |
820 | |
821 | // Prev from macro body. |
822 | recordTokens(Code: R"cpp( |
823 | #define ID(X) X |
824 | #define ID2(X, Y) X prev ID(Y) |
825 | ID2(not_prev, good) |
826 | )cpp" ); |
827 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good" )), |
828 | ValueIs(SameRange(findSpelled("good" )))); |
829 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good" )), std::nullopt); |
830 | } |
831 | |
832 | TEST_F(TokenBufferTest, NoCrashForEofToken) { |
833 | recordTokens(Code: R"cpp( |
834 | int main() { |
835 | )cpp" ); |
836 | ASSERT_TRUE(!Buffer.expandedTokens().empty()); |
837 | ASSERT_EQ(Buffer.expandedTokens().back().kind(), tok::eof); |
838 | // Expanded range including `eof` is handled gracefully (`eof` is ignored). |
839 | EXPECT_THAT( |
840 | Buffer.spelledForExpanded(Buffer.expandedTokens()), |
841 | ValueIs(SameRange(Buffer.spelledTokens(SourceMgr->getMainFileID())))); |
842 | } |
843 | |
844 | TEST_F(TokenBufferTest, ExpandedTokensForRange) { |
845 | recordTokens(Code: R"cpp( |
846 | #define SIGN(X) X##_washere |
847 | A SIGN(B) C SIGN(D) E SIGN(F) G |
848 | )cpp" ); |
849 | |
850 | SourceRange R(findExpanded(Query: "C" ).front().location(), |
851 | findExpanded(Query: "F_washere" ).front().location()); |
852 | // Expanded and spelled tokens are stored separately. |
853 | EXPECT_THAT(Buffer.expandedTokens(R), |
854 | SameRange(findExpanded("C D_washere E F_washere" ))); |
855 | EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); |
856 | } |
857 | |
858 | TEST_F(TokenBufferTest, ExpansionsOverlapping) { |
859 | // Object-like macro expansions. |
860 | recordTokens(Code: R"cpp( |
861 | #define FOO 3+4 |
862 | int a = FOO 1; |
863 | int b = FOO 2; |
864 | )cpp" ); |
865 | |
866 | llvm::ArrayRef<syntax::Token> Foo1 = findSpelled(Query: "FOO 1" ); |
867 | EXPECT_THAT( |
868 | Buffer.expansionStartingAt(Foo1.data()), |
869 | ValueIs(IsExpansion(SameRange(Foo1.drop_back()), |
870 | SameRange(findExpanded("3 + 4 1" ).drop_back())))); |
871 | EXPECT_THAT( |
872 | Buffer.expansionsOverlapping(Foo1), |
873 | ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), |
874 | SameRange(findExpanded("3 + 4 1" ).drop_back())))); |
875 | |
876 | llvm::ArrayRef<syntax::Token> Foo2 = findSpelled(Query: "FOO 2" ); |
877 | EXPECT_THAT( |
878 | Buffer.expansionStartingAt(Foo2.data()), |
879 | ValueIs(IsExpansion(SameRange(Foo2.drop_back()), |
880 | SameRange(findExpanded("3 + 4 2" ).drop_back())))); |
881 | EXPECT_THAT( |
882 | Buffer.expansionsOverlapping(llvm::ArrayRef(Foo1.begin(), Foo2.end())), |
883 | ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _), |
884 | IsExpansion(SameRange(Foo2.drop_back()), _))); |
885 | |
886 | // Function-like macro expansions. |
887 | recordTokens(Code: R"cpp( |
888 | #define ID(X) X |
889 | int a = ID(1+2+3); |
890 | int b = ID(ID(2+3+4)); |
891 | )cpp" ); |
892 | |
893 | llvm::ArrayRef<syntax::Token> ID1 = findSpelled(Query: "ID ( 1 + 2 + 3 )" ); |
894 | EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), |
895 | ValueIs(IsExpansion(SameRange(ID1), |
896 | SameRange(findExpanded("1 + 2 + 3" ))))); |
897 | // Only the first spelled token should be found. |
898 | for (const auto &T : ID1.drop_front()) |
899 | EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); |
900 | |
901 | llvm::ArrayRef<syntax::Token> ID2 = findSpelled(Query: "ID ( ID ( 2 + 3 + 4 ) )" ); |
902 | EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), |
903 | ValueIs(IsExpansion(SameRange(ID2), |
904 | SameRange(findExpanded("2 + 3 + 4" ))))); |
905 | // Only the first spelled token should be found. |
906 | for (const auto &T : ID2.drop_front()) |
907 | EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); |
908 | |
909 | EXPECT_THAT(Buffer.expansionsOverlapping(llvm::ArrayRef( |
910 | findSpelled("1 + 2" ).data(), findSpelled("4" ).data())), |
911 | ElementsAre(IsExpansion(SameRange(ID1), _), |
912 | IsExpansion(SameRange(ID2), _))); |
913 | |
914 | // PP directives. |
915 | recordTokens(Code: R"cpp( |
916 | #define FOO 1 |
917 | int a = FOO; |
918 | #pragma once |
919 | int b = 1; |
920 | )cpp" ); |
921 | |
922 | llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled(Query: "# define FOO 1" ); |
923 | EXPECT_THAT( |
924 | Buffer.expansionStartingAt(&DefineFoo.front()), |
925 | ValueIs(IsExpansion(SameRange(DefineFoo), |
926 | SameRange(findExpanded("int a" ).take_front(0))))); |
927 | // Only the first spelled token should be found. |
928 | for (const auto &T : DefineFoo.drop_front()) |
929 | EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); |
930 | |
931 | llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled(Query: "# pragma once" ); |
932 | EXPECT_THAT( |
933 | Buffer.expansionStartingAt(&PragmaOnce.front()), |
934 | ValueIs(IsExpansion(SameRange(PragmaOnce), |
935 | SameRange(findExpanded("int b" ).take_front(0))))); |
936 | // Only the first spelled token should be found. |
937 | for (const auto &T : PragmaOnce.drop_front()) |
938 | EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); |
939 | |
940 | EXPECT_THAT( |
941 | Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma" )), |
942 | ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;" ).drop_back()), _), |
943 | IsExpansion(SameRange(PragmaOnce), _))); |
944 | } |
945 | |
946 | TEST_F(TokenBufferTest, TokensToFileRange) { |
947 | addFile(Path: "./foo.h" , Contents: "token_from_header" ); |
948 | llvm::Annotations Code(R"cpp( |
949 | #define FOO token_from_expansion |
950 | #include "./foo.h" |
951 | $all[[$i[[int]] a = FOO;]] |
952 | )cpp" ); |
953 | recordTokens(Code: Code.code()); |
954 | |
955 | auto &SM = *SourceMgr; |
956 | |
957 | // Two simple examples. |
958 | auto Int = findExpanded(Query: "int" ).front(); |
959 | auto Semi = findExpanded(Query: ";" ).front(); |
960 | EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i" ).Begin, |
961 | Code.range("i" ).End)); |
962 | EXPECT_EQ(syntax::Token::range(SM, Int, Semi), |
963 | FileRange(SM.getMainFileID(), Code.range("all" ).Begin, |
964 | Code.range("all" ).End)); |
965 | // We don't test assertion failures because death tests are slow. |
966 | } |
967 | |
968 | TEST_F(TokenBufferTest, MacroExpansions) { |
969 | llvm::Annotations Code(R"cpp( |
970 | #define FOO B |
971 | #define FOO2 BA |
972 | #define CALL(X) int X |
973 | #define G CALL(FOO2) |
974 | int B; |
975 | $macro[[FOO]]; |
976 | $macro[[CALL]](A); |
977 | $macro[[G]]; |
978 | )cpp" ); |
979 | recordTokens(Code: Code.code()); |
980 | auto &SM = *SourceMgr; |
981 | auto Expansions = Buffer.macroExpansions(FID: SM.getMainFileID()); |
982 | std::vector<FileRange> ExpectedMacroRanges; |
983 | for (auto Range : Code.ranges(Name: "macro" )) |
984 | ExpectedMacroRanges.push_back( |
985 | x: FileRange(SM.getMainFileID(), Range.Begin, Range.End)); |
986 | std::vector<FileRange> ActualMacroRanges; |
987 | for (auto Expansion : Expansions) |
988 | ActualMacroRanges.push_back(x: Expansion->range(SM)); |
989 | EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); |
990 | } |
991 | |
992 | TEST_F(TokenBufferTest, Touching) { |
993 | llvm::Annotations Code("^i^nt^ ^a^b^=^1;^" ); |
994 | recordTokens(Code: Code.code()); |
995 | |
996 | auto Touching = [&](int Index) { |
997 | SourceLocation Loc = SourceMgr->getComposedLoc(FID: SourceMgr->getMainFileID(), |
998 | Offset: Code.points()[Index]); |
999 | return spelledTokensTouching(Loc, Tokens: Buffer); |
1000 | }; |
1001 | auto Identifier = [&](int Index) { |
1002 | SourceLocation Loc = SourceMgr->getComposedLoc(FID: SourceMgr->getMainFileID(), |
1003 | Offset: Code.points()[Index]); |
1004 | const syntax::Token *Tok = spelledIdentifierTouching(Loc, Tokens: Buffer); |
1005 | return Tok ? Tok->text(SM: *SourceMgr) : "" ; |
1006 | }; |
1007 | |
1008 | EXPECT_THAT(Touching(0), SameRange(findSpelled("int" ))); |
1009 | EXPECT_EQ(Identifier(0), "" ); |
1010 | EXPECT_THAT(Touching(1), SameRange(findSpelled("int" ))); |
1011 | EXPECT_EQ(Identifier(1), "" ); |
1012 | EXPECT_THAT(Touching(2), SameRange(findSpelled("int" ))); |
1013 | EXPECT_EQ(Identifier(2), "" ); |
1014 | |
1015 | EXPECT_THAT(Touching(3), SameRange(findSpelled("ab" ))); |
1016 | EXPECT_EQ(Identifier(3), "ab" ); |
1017 | EXPECT_THAT(Touching(4), SameRange(findSpelled("ab" ))); |
1018 | EXPECT_EQ(Identifier(4), "ab" ); |
1019 | |
1020 | EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =" ))); |
1021 | EXPECT_EQ(Identifier(5), "ab" ); |
1022 | |
1023 | EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1" ))); |
1024 | EXPECT_EQ(Identifier(6), "" ); |
1025 | |
1026 | EXPECT_THAT(Touching(7), SameRange(findSpelled(";" ))); |
1027 | EXPECT_EQ(Identifier(7), "" ); |
1028 | |
1029 | ASSERT_EQ(Code.points().size(), 8u); |
1030 | } |
1031 | |
1032 | TEST_F(TokenBufferTest, ExpandedBySpelled) { |
1033 | recordTokens(Code: R"cpp( |
1034 | a1 a2 a3 b1 b2 |
1035 | )cpp" ); |
1036 | // Expanded and spelled tokens are stored separately. |
1037 | EXPECT_THAT(findExpanded("a1 a2" ), Not(SameRange(findSpelled("a1 a2" )))); |
1038 | // Searching for subranges of expanded tokens should give the corresponding |
1039 | // spelled ones. |
1040 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2" )), |
1041 | ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2" )))); |
1042 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3" )), |
1043 | ElementsAre(SameRange(findExpanded("a1 a2 a3" )))); |
1044 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2" )), |
1045 | ElementsAre(SameRange(findExpanded("b1 b2" )))); |
1046 | |
1047 | // Test search on simple macro expansions. |
1048 | recordTokens(Code: R"cpp( |
1049 | #define A a1 a2 a3 |
1050 | #define B b1 b2 |
1051 | |
1052 | A split B |
1053 | )cpp" ); |
1054 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B" )), |
1055 | ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2" )))); |
1056 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split" ).drop_back()), |
1057 | ElementsAre(SameRange(findExpanded("a1 a2 a3" )))); |
1058 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B" ).drop_front()), |
1059 | ElementsAre(SameRange(findExpanded("b1 b2" )))); |
1060 | |
1061 | // Ranges not fully covering macro expansions should fail. |
1062 | recordTokens(Code: R"cpp( |
1063 | #define ID(x) x |
1064 | |
1065 | ID(a) |
1066 | )cpp" ); |
1067 | // Spelled don't cover entire mapping (missing ID token) -> empty result |
1068 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )" )), IsEmpty()); |
1069 | // Spelled don't cover entire mapping (missing ) token) -> empty result |
1070 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a" )), IsEmpty()); |
1071 | |
1072 | // Recursive macro invocations. |
1073 | recordTokens(Code: R"cpp( |
1074 | #define ID(x) x |
1075 | #define B b1 b2 |
1076 | |
1077 | ID(ID(ID(a1) a2 a3)) split ID(B) |
1078 | )cpp" ); |
1079 | |
1080 | EXPECT_THAT( |
1081 | Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )" )), |
1082 | ElementsAre(SameRange(findExpanded("a1 a2 a3" )))); |
1083 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )" )), |
1084 | ElementsAre(SameRange(findExpanded("b1 b2" )))); |
1085 | EXPECT_THAT(Buffer.expandedForSpelled( |
1086 | findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )" )), |
1087 | ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2" )))); |
1088 | // FIXME: these should succeed, but we do not support macro arguments yet. |
1089 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1" )), IsEmpty()); |
1090 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2" )), |
1091 | IsEmpty()); |
1092 | |
1093 | // Empty macro expansions. |
1094 | recordTokens(Code: R"cpp( |
1095 | #define EMPTY |
1096 | #define ID(X) X |
1097 | |
1098 | EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 |
1099 | EMPTY EMPTY ID(4 5 6) split2 |
1100 | ID(7 8 9) EMPTY EMPTY |
1101 | )cpp" ); |
1102 | // Covered by empty expansions on one of both of the sides. |
1103 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )" )), |
1104 | ElementsAre(SameRange(findExpanded("1 2 3" )))); |
1105 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )" )), |
1106 | ElementsAre(SameRange(findExpanded("4 5 6" )))); |
1107 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )" )), |
1108 | ElementsAre(SameRange(findExpanded("7 8 9" )))); |
1109 | // Including the empty macro expansions on the side. |
1110 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )" )), |
1111 | ElementsAre(SameRange(findExpanded("1 2 3" )))); |
1112 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY" )), |
1113 | ElementsAre(SameRange(findExpanded("1 2 3" )))); |
1114 | EXPECT_THAT( |
1115 | Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY" )), |
1116 | ElementsAre(SameRange(findExpanded("1 2 3" )))); |
1117 | |
1118 | // Empty mappings coming from various directives. |
1119 | recordTokens(Code: R"cpp( |
1120 | #define ID(X) X |
1121 | ID(1) |
1122 | #pragma lalala |
1123 | not_mapped |
1124 | )cpp" ); |
1125 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X" )), |
1126 | IsEmpty()); |
1127 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala" )), |
1128 | IsEmpty()); |
1129 | |
1130 | // Empty macro expansion. |
1131 | recordTokens(Code: R"cpp( |
1132 | #define EMPTY |
1133 | EMPTY int a = 100; |
1134 | )cpp" ); |
1135 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int" ).drop_back()), |
1136 | IsEmpty()); |
1137 | } |
1138 | |
1139 | TEST_F(TokenCollectorTest, Pragmas) { |
1140 | // Tokens coming from concatenations. |
1141 | recordTokens(Code: R"cpp( |
1142 | void foo() { |
1143 | #pragma unroll 4 |
1144 | for(int i=0;i<4;++i); |
1145 | } |
1146 | )cpp" ); |
1147 | } |
1148 | } // namespace |
1149 | |