1 | //===- TokensTest.cpp -----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Syntax/Tokens.h" |
10 | #include "clang/AST/ASTConsumer.h" |
11 | #include "clang/AST/Expr.h" |
12 | #include "clang/Basic/Diagnostic.h" |
13 | #include "clang/Basic/DiagnosticIDs.h" |
14 | #include "clang/Basic/DiagnosticOptions.h" |
15 | #include "clang/Basic/FileManager.h" |
16 | #include "clang/Basic/FileSystemOptions.h" |
17 | #include "clang/Basic/LLVM.h" |
18 | #include "clang/Basic/LangOptions.h" |
19 | #include "clang/Basic/SourceLocation.h" |
20 | #include "clang/Basic/SourceManager.h" |
21 | #include "clang/Basic/TokenKinds.def" |
22 | #include "clang/Basic/TokenKinds.h" |
23 | #include "clang/Frontend/CompilerInstance.h" |
24 | #include "clang/Frontend/FrontendAction.h" |
25 | #include "clang/Frontend/Utils.h" |
26 | #include "clang/Lex/Lexer.h" |
27 | #include "clang/Lex/PreprocessorOptions.h" |
28 | #include "clang/Lex/Token.h" |
29 | #include "clang/Tooling/Tooling.h" |
30 | #include "llvm/ADT/ArrayRef.h" |
31 | #include "llvm/ADT/IntrusiveRefCntPtr.h" |
32 | #include "llvm/ADT/STLExtras.h" |
33 | #include "llvm/ADT/StringRef.h" |
34 | #include "llvm/Support/FormatVariadic.h" |
35 | #include "llvm/Support/MemoryBuffer.h" |
36 | #include "llvm/Support/VirtualFileSystem.h" |
37 | #include "llvm/Support/raw_os_ostream.h" |
38 | #include "llvm/Support/raw_ostream.h" |
39 | #include "llvm/Testing/Annotations/Annotations.h" |
40 | #include "llvm/Testing/Support/SupportHelpers.h" |
41 | #include <cassert> |
42 | #include <cstdlib> |
43 | #include <gmock/gmock.h> |
44 | #include <gtest/gtest.h> |
45 | #include <memory> |
46 | #include <optional> |
47 | #include <ostream> |
48 | #include <string> |
49 | |
50 | using namespace clang; |
51 | using namespace clang::syntax; |
52 | |
53 | using llvm::ValueIs; |
54 | using ::testing::_; |
55 | using ::testing::AllOf; |
56 | using ::testing::Contains; |
57 | using ::testing::ElementsAre; |
58 | using ::testing::Field; |
59 | using ::testing::IsEmpty; |
60 | using ::testing::Matcher; |
61 | using ::testing::Not; |
62 | using ::testing::Pointee; |
63 | using ::testing::StartsWith; |
64 | |
65 | namespace { |
66 | // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the |
67 | // argument. |
68 | MATCHER_P(SameRange, A, "" ) { |
69 | return A.begin() == arg.begin() && A.end() == arg.end(); |
70 | } |
71 | |
72 | Matcher<TokenBuffer::Expansion> |
73 | IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, |
74 | Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { |
75 | return AllOf(matchers: Field(field: &TokenBuffer::Expansion::Spelled, matcher: Spelled), |
76 | matchers: Field(field: &TokenBuffer::Expansion::Expanded, matcher: Expanded)); |
77 | } |
78 | // Matchers for syntax::Token. |
79 | MATCHER_P(Kind, K, "" ) { return arg.kind() == K; } |
80 | MATCHER_P2(HasText, Text, SourceMgr, "" ) { |
81 | return arg.text(*SourceMgr) == Text; |
82 | } |
83 | /// Checks the start and end location of a token are equal to SourceRng. |
84 | MATCHER_P(RangeIs, SourceRng, "" ) { |
85 | return arg.location() == SourceRng.first && |
86 | arg.endLocation() == SourceRng.second; |
87 | } |
88 | |
89 | class TokenCollectorTest : public ::testing::Test { |
90 | public: |
91 | /// Run the clang frontend, collect the preprocessed tokens from the frontend |
92 | /// invocation and store them in this->Buffer. |
93 | /// This also clears SourceManager before running the compiler. |
94 | void recordTokens(llvm::StringRef Code) { |
95 | class RecordTokens : public ASTFrontendAction { |
96 | public: |
97 | explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} |
98 | |
99 | bool BeginSourceFileAction(CompilerInstance &CI) override { |
100 | assert(!Collector && "expected only a single call to BeginSourceFile" ); |
101 | Collector.emplace(args&: CI.getPreprocessor()); |
102 | return true; |
103 | } |
104 | void EndSourceFileAction() override { |
105 | assert(Collector && "BeginSourceFileAction was never called" ); |
106 | Result = std::move(*Collector).consume(); |
107 | Result.indexExpandedTokens(); |
108 | } |
109 | |
110 | std::unique_ptr<ASTConsumer> |
111 | CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { |
112 | return std::make_unique<ASTConsumer>(); |
113 | } |
114 | |
115 | private: |
116 | TokenBuffer &Result; |
117 | std::optional<TokenCollector> Collector; |
118 | }; |
119 | |
120 | constexpr const char *FileName = "./input.cpp" ; |
121 | FS->addFile(Path: FileName, ModificationTime: time_t(), Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: "" )); |
122 | // Prepare to run a compiler. |
123 | if (!Diags->getClient()) |
124 | Diags->setClient(client: new IgnoringDiagConsumer); |
125 | std::vector<const char *> Args = {"tok-test" , "-std=c++03" , "-fsyntax-only" , |
126 | FileName}; |
127 | CreateInvocationOptions CIOpts; |
128 | CIOpts.Diags = Diags; |
129 | CIOpts.VFS = FS; |
130 | auto CI = createInvocation(Args, Opts: std::move(CIOpts)); |
131 | assert(CI); |
132 | CI->getFrontendOpts().DisableFree = false; |
133 | CI->getPreprocessorOpts().addRemappedFile( |
134 | From: FileName, To: llvm::MemoryBuffer::getMemBufferCopy(InputData: Code).release()); |
135 | CompilerInstance Compiler; |
136 | Compiler.setInvocation(std::move(CI)); |
137 | Compiler.setDiagnostics(Diags.get()); |
138 | Compiler.setFileManager(FileMgr.get()); |
139 | Compiler.setSourceManager(SourceMgr.get()); |
140 | |
141 | this->Buffer = TokenBuffer(*SourceMgr); |
142 | RecordTokens Recorder(this->Buffer); |
143 | ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) |
144 | << "failed to run the frontend" ; |
145 | } |
146 | |
147 | /// Record the tokens and return a test dump of the resulting buffer. |
148 | std::string collectAndDump(llvm::StringRef Code) { |
149 | recordTokens(Code); |
150 | return Buffer.dumpForTests(); |
151 | } |
152 | |
153 | // Adds a file to the test VFS. |
154 | void addFile(llvm::StringRef Path, llvm::StringRef Contents) { |
155 | if (!FS->addFile(Path, ModificationTime: time_t(), |
156 | Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: Contents))) { |
157 | ADD_FAILURE() << "could not add a file to VFS: " << Path; |
158 | } |
159 | } |
160 | |
161 | /// Add a new file, run syntax::tokenize() on the range if any, run it on the |
162 | /// whole file otherwise and return the results. |
163 | std::vector<syntax::Token> tokenize(llvm::StringRef Text) { |
164 | llvm::Annotations Annot(Text); |
165 | auto FID = SourceMgr->createFileID( |
166 | Buffer: llvm::MemoryBuffer::getMemBufferCopy(InputData: Annot.code())); |
167 | // FIXME: pass proper LangOptions. |
168 | if (Annot.ranges().empty()) |
169 | return syntax::tokenize(FID, SM: *SourceMgr, LO: LangOptions()); |
170 | return syntax::tokenize( |
171 | FR: syntax::FileRange(FID, Annot.range().Begin, Annot.range().End), |
172 | SM: *SourceMgr, LO: LangOptions()); |
173 | } |
174 | |
175 | // Specialized versions of matchers that hide the SourceManager from clients. |
176 | Matcher<syntax::Token> HasText(std::string Text) const { |
177 | return ::HasText(gmock_p0: Text, gmock_p1: SourceMgr.get()); |
178 | } |
179 | Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { |
180 | std::pair<SourceLocation, SourceLocation> Ls; |
181 | Ls.first = SourceMgr->getLocForStartOfFile(FID: SourceMgr->getMainFileID()) |
182 | .getLocWithOffset(Offset: R.Begin); |
183 | Ls.second = SourceMgr->getLocForStartOfFile(FID: SourceMgr->getMainFileID()) |
184 | .getLocWithOffset(Offset: R.End); |
185 | return ::RangeIs(gmock_p0: Ls); |
186 | } |
187 | |
188 | /// Finds a subrange in O(n * m). |
189 | template <class T, class U, class Eq> |
190 | llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, |
191 | llvm::ArrayRef<T> Range, Eq F) { |
192 | assert(Subrange.size() >= 1); |
193 | if (Range.size() < Subrange.size()) |
194 | return llvm::ArrayRef(Range.end(), Range.end()); |
195 | for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size(); |
196 | Begin <= Last; ++Begin) { |
197 | auto It = Begin; |
198 | for (auto ItSub = Subrange.begin(); ItSub != Subrange.end(); |
199 | ++ItSub, ++It) { |
200 | if (!F(*ItSub, *It)) |
201 | goto continue_outer; |
202 | } |
203 | return llvm::ArrayRef(Begin, It); |
204 | continue_outer:; |
205 | } |
206 | return llvm::ArrayRef(Range.end(), Range.end()); |
207 | } |
208 | |
209 | /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. |
210 | /// The match should be unique. \p Query is a whitespace-separated list of |
211 | /// tokens to search for. |
212 | llvm::ArrayRef<syntax::Token> |
213 | findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { |
214 | llvm::SmallVector<llvm::StringRef, 8> QueryTokens; |
215 | Query.split(A&: QueryTokens, Separator: ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
216 | if (QueryTokens.empty()) { |
217 | ADD_FAILURE() << "will not look for an empty list of tokens" ; |
218 | std::abort(); |
219 | } |
220 | // An equality test for search. |
221 | auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { |
222 | return Q == T.text(SM: *SourceMgr); |
223 | }; |
224 | // Find a match. |
225 | auto Found = findSubrange(Subrange: llvm::ArrayRef(QueryTokens), Range: Tokens, F: TextMatches); |
226 | if (Found.begin() == Tokens.end()) { |
227 | ADD_FAILURE() << "could not find the subrange for " << Query; |
228 | std::abort(); |
229 | } |
230 | // Check that the match is unique. |
231 | if (findSubrange(Subrange: llvm::ArrayRef(QueryTokens), |
232 | Range: llvm::ArrayRef(Found.end(), Tokens.end()), F: TextMatches) |
233 | .begin() != Tokens.end()) { |
234 | ADD_FAILURE() << "match is not unique for " << Query; |
235 | std::abort(); |
236 | } |
237 | return Found; |
238 | }; |
239 | |
240 | // Specialized versions of findTokenRange for expanded and spelled tokens. |
241 | llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { |
242 | return findTokenRange(Query, Tokens: Buffer.expandedTokens()); |
243 | } |
244 | llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, |
245 | FileID File = FileID()) { |
246 | if (!File.isValid()) |
247 | File = SourceMgr->getMainFileID(); |
248 | return findTokenRange(Query, Tokens: Buffer.spelledTokens(FID: File)); |
249 | } |
250 | |
251 | // Data fields. |
252 | llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = |
253 | new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); |
254 | IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = |
255 | new llvm::vfs::InMemoryFileSystem; |
256 | llvm::IntrusiveRefCntPtr<FileManager> FileMgr = |
257 | new FileManager(FileSystemOptions(), FS); |
258 | llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = |
259 | new SourceManager(*Diags, *FileMgr); |
260 | /// Contains last result of calling recordTokens(). |
261 | TokenBuffer Buffer = TokenBuffer(*SourceMgr); |
262 | }; |
263 | |
264 | TEST_F(TokenCollectorTest, RawMode) { |
265 | EXPECT_THAT(tokenize("int main() {}" ), |
266 | ElementsAre(Kind(tok::kw_int), |
267 | AllOf(HasText("main" ), Kind(tok::identifier)), |
268 | Kind(tok::l_paren), Kind(tok::r_paren), |
269 | Kind(tok::l_brace), Kind(tok::r_brace))); |
270 | // Comments are ignored for now. |
271 | EXPECT_THAT(tokenize("/* foo */int a; // more comments" ), |
272 | ElementsAre(Kind(tok::kw_int), |
273 | AllOf(HasText("a" ), Kind(tok::identifier)), |
274 | Kind(tok::semi))); |
275 | EXPECT_THAT(tokenize("int [[main() {]]}" ), |
276 | ElementsAre(AllOf(HasText("main" ), Kind(tok::identifier)), |
277 | Kind(tok::l_paren), Kind(tok::r_paren), |
278 | Kind(tok::l_brace))); |
279 | EXPECT_THAT(tokenize("int [[main() { ]]}" ), |
280 | ElementsAre(AllOf(HasText("main" ), Kind(tok::identifier)), |
281 | Kind(tok::l_paren), Kind(tok::r_paren), |
282 | Kind(tok::l_brace))); |
283 | // First token is partially parsed, last token is fully included even though |
284 | // only a part of it is contained in the range. |
285 | EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}" ), |
286 | ElementsAre(AllOf(HasText("ain" ), Kind(tok::identifier)), |
287 | Kind(tok::l_paren), Kind(tok::r_paren), |
288 | Kind(tok::l_brace), Kind(tok::kw_return))); |
289 | } |
290 | |
291 | TEST_F(TokenCollectorTest, Basic) { |
292 | std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { |
293 | {"int main() {}" , |
294 | R"(expanded tokens: |
295 | int main ( ) { } |
296 | file './input.cpp' |
297 | spelled tokens: |
298 | int main ( ) { } |
299 | no mappings. |
300 | )" }, |
301 | // All kinds of whitespace are ignored. |
302 | {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n" , |
303 | R"(expanded tokens: |
304 | int main ( ) { } |
305 | file './input.cpp' |
306 | spelled tokens: |
307 | int main ( ) { } |
308 | no mappings. |
309 | )" }, |
310 | // Annotation tokens are ignored. |
311 | {R"cpp( |
312 | #pragma GCC visibility push (public) |
313 | #pragma GCC visibility pop |
314 | )cpp" , |
315 | R"(expanded tokens: |
316 | <empty> |
317 | file './input.cpp' |
318 | spelled tokens: |
319 | # pragma GCC visibility push ( public ) # pragma GCC visibility pop |
320 | mappings: |
321 | ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) |
322 | )" }, |
323 | // Empty files should not crash. |
324 | {R"cpp()cpp" , R"(expanded tokens: |
325 | <empty> |
326 | file './input.cpp' |
327 | spelled tokens: |
328 | <empty> |
329 | no mappings. |
330 | )" }, |
331 | // Should not crash on errors inside '#define' directives. Error is that |
332 | // stringification (#B) does not refer to a macro parameter. |
333 | { |
334 | R"cpp( |
335 | a |
336 | #define MACRO() A #B |
337 | )cpp" , |
338 | R"(expanded tokens: |
339 | a |
340 | file './input.cpp' |
341 | spelled tokens: |
342 | a # define MACRO ( ) A # B |
343 | mappings: |
344 | ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) |
345 | )" }}; |
346 | for (auto &Test : TestCases) |
347 | EXPECT_EQ(collectAndDump(Test.first), Test.second) |
348 | << collectAndDump(Code: Test.first); |
349 | } |
350 | |
351 | TEST_F(TokenCollectorTest, Locations) { |
352 | // Check locations of the tokens. |
353 | llvm::Annotations Code(R"cpp( |
354 | $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] |
355 | )cpp" ); |
356 | recordTokens(Code: Code.code()); |
357 | // Check expanded tokens. |
358 | EXPECT_THAT( |
359 | Buffer.expandedTokens(), |
360 | ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1" ))), |
361 | AllOf(Kind(tok::identifier), RangeIs(Code.range("r2" ))), |
362 | AllOf(Kind(tok::equal), RangeIs(Code.range("r3" ))), |
363 | AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4" ))), |
364 | AllOf(Kind(tok::semi), RangeIs(Code.range("r5" ))), |
365 | Kind(tok::eof))); |
366 | // Check spelled tokens. |
367 | EXPECT_THAT( |
368 | Buffer.spelledTokens(SourceMgr->getMainFileID()), |
369 | ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1" ))), |
370 | AllOf(Kind(tok::identifier), RangeIs(Code.range("r2" ))), |
371 | AllOf(Kind(tok::equal), RangeIs(Code.range("r3" ))), |
372 | AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4" ))), |
373 | AllOf(Kind(tok::semi), RangeIs(Code.range("r5" ))))); |
374 | |
375 | auto StartLoc = SourceMgr->getLocForStartOfFile(FID: SourceMgr->getMainFileID()); |
376 | for (auto &R : Code.ranges()) { |
377 | EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)), |
378 | Pointee(RangeIs(R))); |
379 | } |
380 | } |
381 | |
382 | TEST_F(TokenCollectorTest, MacroDirectives) { |
383 | // Macro directives are not stored anywhere at the moment. |
384 | std::string Code = R"cpp( |
385 | #define FOO a |
386 | #include "unresolved_file.h" |
387 | #undef FOO |
388 | #ifdef X |
389 | #else |
390 | #endif |
391 | #ifndef Y |
392 | #endif |
393 | #if 1 |
394 | #elif 2 |
395 | #else |
396 | #endif |
397 | #pragma once |
398 | #pragma something lalala |
399 | |
400 | int a; |
401 | )cpp" ; |
402 | std::string Expected = |
403 | "expanded tokens:\n" |
404 | " int a ;\n" |
405 | "file './input.cpp'\n" |
406 | " spelled tokens:\n" |
407 | " # define FOO a # include \"unresolved_file.h\" # undef FOO " |
408 | "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " |
409 | "# endif # pragma once # pragma something lalala int a ;\n" |
410 | " mappings:\n" |
411 | " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n" ; |
412 | EXPECT_EQ(collectAndDump(Code), Expected); |
413 | } |
414 | |
415 | TEST_F(TokenCollectorTest, MacroReplacements) { |
416 | std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { |
417 | // A simple object-like macro. |
418 | {R"cpp( |
419 | #define INT int const |
420 | INT a; |
421 | )cpp" , |
422 | R"(expanded tokens: |
423 | int const a ; |
424 | file './input.cpp' |
425 | spelled tokens: |
426 | # define INT int const INT a ; |
427 | mappings: |
428 | ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) |
429 | ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) |
430 | )" }, |
431 | // A simple function-like macro. |
432 | {R"cpp( |
433 | #define INT(a) const int |
434 | INT(10+10) a; |
435 | )cpp" , |
436 | R"(expanded tokens: |
437 | const int a ; |
438 | file './input.cpp' |
439 | spelled tokens: |
440 | # define INT ( a ) const int INT ( 10 + 10 ) a ; |
441 | mappings: |
442 | ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) |
443 | ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) |
444 | )" }, |
445 | // Recursive macro replacements. |
446 | {R"cpp( |
447 | #define ID(X) X |
448 | #define INT int const |
449 | ID(ID(INT)) a; |
450 | )cpp" , |
451 | R"(expanded tokens: |
452 | int const a ; |
453 | file './input.cpp' |
454 | spelled tokens: |
455 | # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; |
456 | mappings: |
457 | ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) |
458 | ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) |
459 | )" }, |
460 | // A little more complicated recursive macro replacements. |
461 | {R"cpp( |
462 | #define ADD(X, Y) X+Y |
463 | #define MULT(X, Y) X*Y |
464 | |
465 | int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); |
466 | )cpp" , |
467 | "expanded tokens:\n" |
468 | " int a = 1 * 2 + 3 * 4 + 5 ;\n" |
469 | "file './input.cpp'\n" |
470 | " spelled tokens:\n" |
471 | " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " |
472 | "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" |
473 | " mappings:\n" |
474 | " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" |
475 | " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n" }, |
476 | // Empty macro replacement. |
477 | // FIXME: the #define directives should not be glued together. |
478 | {R"cpp( |
479 | #define EMPTY |
480 | #define EMPTY_FUNC(X) |
481 | EMPTY |
482 | EMPTY_FUNC(1+2+3) |
483 | )cpp" , |
484 | R"(expanded tokens: |
485 | <empty> |
486 | file './input.cpp' |
487 | spelled tokens: |
488 | # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) |
489 | mappings: |
490 | ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) |
491 | ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) |
492 | ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) |
493 | )" }, |
494 | // File ends with a macro replacement. |
495 | {R"cpp( |
496 | #define FOO 10+10; |
497 | int a = FOO |
498 | )cpp" , |
499 | R"(expanded tokens: |
500 | int a = 10 + 10 ; |
501 | file './input.cpp' |
502 | spelled tokens: |
503 | # define FOO 10 + 10 ; int a = FOO |
504 | mappings: |
505 | ['#'_0, 'int'_7) => ['int'_0, 'int'_0) |
506 | ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) |
507 | )" }, |
508 | {R"cpp( |
509 | #define NUM 42 |
510 | #define ID(a) a |
511 | #define M 1 + ID |
512 | M(NUM) |
513 | )cpp" , |
514 | R"(expanded tokens: |
515 | 1 + 42 |
516 | file './input.cpp' |
517 | spelled tokens: |
518 | # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM ) |
519 | mappings: |
520 | ['#'_0, 'M'_17) => ['1'_0, '1'_0) |
521 | ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3) |
522 | )" }, |
523 | }; |
524 | |
525 | for (auto &Test : TestCases) { |
526 | std::string Dump = collectAndDump(Code: Test.first); |
527 | EXPECT_EQ(Test.second, Dump) << Dump; |
528 | } |
529 | } |
530 | |
531 | TEST_F(TokenCollectorTest, SpecialTokens) { |
532 | // Tokens coming from concatenations. |
533 | recordTokens(Code: R"cpp( |
534 | #define CONCAT(a, b) a ## b |
535 | int a = CONCAT(1, 2); |
536 | )cpp" ); |
537 | EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), |
538 | Contains(HasText("12" ))); |
539 | // Multi-line tokens with slashes at the end. |
540 | recordTokens(Code: "i\\\nn\\\nt" ); |
541 | EXPECT_THAT(Buffer.expandedTokens(), |
542 | ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt" )), |
543 | Kind(tok::eof))); |
544 | // FIXME: test tokens with digraphs and UCN identifiers. |
545 | } |
546 | |
547 | TEST_F(TokenCollectorTest, LateBoundTokens) { |
548 | // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), |
549 | // but we choose to record them as a single token (for now). |
550 | llvm::Annotations Code(R"cpp( |
551 | template <class T> |
552 | struct foo { int a; }; |
553 | int bar = foo<foo<int$br[[>>]]().a; |
554 | int baz = 10 $op[[>>]] 2; |
555 | )cpp" ); |
556 | recordTokens(Code: Code.code()); |
557 | EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), |
558 | AllOf(Contains(AllOf(Kind(tok::greatergreater), |
559 | RangeIs(Code.range("br" )))), |
560 | Contains(AllOf(Kind(tok::greatergreater), |
561 | RangeIs(Code.range("op" )))))); |
562 | } |
563 | |
564 | TEST_F(TokenCollectorTest, DelayedParsing) { |
565 | llvm::StringLiteral Code = R"cpp( |
566 | struct Foo { |
567 | int method() { |
568 | // Parser will visit method bodies and initializers multiple times, but |
569 | // TokenBuffer should only record the first walk over the tokens; |
570 | return 100; |
571 | } |
572 | int a = 10; |
573 | |
574 | struct Subclass { |
575 | void foo() { |
576 | Foo().method(); |
577 | } |
578 | }; |
579 | }; |
580 | )cpp" ; |
581 | std::string ExpectedTokens = |
582 | "expanded tokens:\n" |
583 | " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " |
584 | "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n" ; |
585 | EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); |
586 | } |
587 | |
588 | TEST_F(TokenCollectorTest, MultiFile) { |
589 | addFile(Path: "./foo.h" , Contents: R"cpp( |
590 | #define ADD(X, Y) X+Y |
591 | int a = 100; |
592 | #include "bar.h" |
593 | )cpp" ); |
594 | addFile(Path: "./bar.h" , Contents: R"cpp( |
595 | int b = ADD(1, 2); |
596 | #define MULT(X, Y) X*Y |
597 | )cpp" ); |
598 | llvm::StringLiteral Code = R"cpp( |
599 | #include "foo.h" |
600 | int c = ADD(1, MULT(2,3)); |
601 | )cpp" ; |
602 | |
603 | std::string Expected = R"(expanded tokens: |
604 | int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; |
605 | file './input.cpp' |
606 | spelled tokens: |
607 | # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; |
608 | mappings: |
609 | ['#'_0, 'int'_3) => ['int'_12, 'int'_12) |
610 | ['ADD'_6, ';'_17) => ['1'_15, ';'_20) |
611 | file './foo.h' |
612 | spelled tokens: |
613 | # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" |
614 | mappings: |
615 | ['#'_0, 'int'_11) => ['int'_0, 'int'_0) |
616 | ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) |
617 | file './bar.h' |
618 | spelled tokens: |
619 | int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y |
620 | mappings: |
621 | ['ADD'_3, ';'_9) => ['1'_8, ';'_11) |
622 | ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) |
623 | )" ; |
624 | |
625 | EXPECT_EQ(Expected, collectAndDump(Code)) |
626 | << "input: " << Code << "\nresults: " << collectAndDump(Code); |
627 | } |
628 | |
629 | class TokenBufferTest : public TokenCollectorTest {}; |
630 | |
631 | TEST_F(TokenBufferTest, SpelledByExpanded) { |
632 | recordTokens(Code: R"cpp( |
633 | a1 a2 a3 b1 b2 |
634 | )cpp" ); |
635 | |
636 | // Expanded and spelled tokens are stored separately. |
637 | EXPECT_THAT(findExpanded("a1 a2" ), Not(SameRange(findSpelled("a1 a2" )))); |
638 | // Searching for subranges of expanded tokens should give the corresponding |
639 | // spelled ones. |
640 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2" )), |
641 | ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2" )))); |
642 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3" )), |
643 | ValueIs(SameRange(findSpelled("a1 a2 a3" )))); |
644 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2" )), |
645 | ValueIs(SameRange(findSpelled("b1 b2" )))); |
646 | |
647 | // Test search on simple macro expansions. |
648 | recordTokens(Code: R"cpp( |
649 | #define A a1 a2 a3 |
650 | #define B b1 b2 |
651 | |
652 | A split B |
653 | )cpp" ); |
654 | // Ranges going across expansion boundaries. |
655 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2" )), |
656 | ValueIs(SameRange(findSpelled("A split B" )))); |
657 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3" )), |
658 | ValueIs(SameRange(findSpelled("A split" ).drop_back()))); |
659 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2" )), |
660 | ValueIs(SameRange(findSpelled("split B" ).drop_front()))); |
661 | // Ranges not fully covering macro invocations should fail. |
662 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2" )), std::nullopt); |
663 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2" )), std::nullopt); |
664 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2" )), |
665 | std::nullopt); |
666 | |
667 | // Recursive macro invocations. |
668 | recordTokens(Code: R"cpp( |
669 | #define ID(x) x |
670 | #define B b1 b2 |
671 | |
672 | ID(ID(ID(a1) a2 a3)) split ID(B) |
673 | )cpp" ); |
674 | |
675 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2" )), |
676 | ValueIs(SameRange(findSpelled("( B" ).drop_front()))); |
677 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2" )), |
678 | ValueIs(SameRange(findSpelled( |
679 | "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )" )))); |
680 | // Mixed ranges with expanded and spelled tokens. |
681 | EXPECT_THAT( |
682 | Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split" )), |
683 | ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split" )))); |
684 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2" )), |
685 | ValueIs(SameRange(findSpelled("split ID ( B )" )))); |
686 | // Macro arguments |
687 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1" )), |
688 | ValueIs(SameRange(findSpelled("a1" )))); |
689 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2" )), |
690 | ValueIs(SameRange(findSpelled("a2" )))); |
691 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3" )), |
692 | ValueIs(SameRange(findSpelled("a3" )))); |
693 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2" )), |
694 | ValueIs(SameRange(findSpelled("ID ( a1 ) a2" )))); |
695 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3" )), |
696 | ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3" )))); |
697 | |
698 | // Empty macro expansions. |
699 | recordTokens(Code: R"cpp( |
700 | #define EMPTY |
701 | #define ID(X) X |
702 | |
703 | EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 |
704 | EMPTY EMPTY ID(4 5 6) split2 |
705 | ID(7 8 9) EMPTY EMPTY |
706 | )cpp" ); |
707 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3" )), |
708 | ValueIs(SameRange(findSpelled("1 2 3" )))); |
709 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6" )), |
710 | ValueIs(SameRange(findSpelled("4 5 6" )))); |
711 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9" )), |
712 | ValueIs(SameRange(findSpelled("7 8 9" )))); |
713 | |
714 | // Empty mappings coming from various directives. |
715 | recordTokens(Code: R"cpp( |
716 | #define ID(X) X |
717 | ID(1) |
718 | #pragma lalala |
719 | not_mapped |
720 | )cpp" ); |
721 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped" )), |
722 | ValueIs(SameRange(findSpelled("not_mapped" )))); |
723 | |
724 | // Multiple macro arguments |
725 | recordTokens(Code: R"cpp( |
726 | #define ID(X) X |
727 | #define ID2(X, Y) X Y |
728 | |
729 | ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7) |
730 | )cpp" ); |
731 | // Should fail, spans multiple arguments. |
732 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2" )), std::nullopt); |
733 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3" )), |
734 | ValueIs(SameRange(findSpelled("ID ( a2 ) a3" )))); |
735 | EXPECT_THAT( |
736 | Buffer.spelledForExpanded(findExpanded("a1 a2 a3" )), |
737 | ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )" )))); |
738 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6" )), |
739 | ValueIs(SameRange(findSpelled("a5 a6" )))); |
740 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7" )), |
741 | ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )" )))); |
742 | // Should fail, spans multiple invocations. |
743 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4" )), |
744 | std::nullopt); |
745 | |
746 | // https://github.com/clangd/clangd/issues/1289 |
747 | recordTokens(Code: R"cpp( |
748 | #define FOO(X) foo(X) |
749 | #define INDIRECT FOO(y) |
750 | INDIRECT // expands to foo(y) |
751 | )cpp" ); |
752 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y" )), std::nullopt); |
753 | |
754 | recordTokens(Code: R"cpp( |
755 | #define FOO(X) a X b |
756 | FOO(y) |
757 | )cpp" ); |
758 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y" )), |
759 | ValueIs(SameRange(findSpelled("y" )))); |
760 | |
761 | recordTokens(Code: R"cpp( |
762 | #define ID(X) X |
763 | #define BAR ID(1) |
764 | BAR |
765 | )cpp" ); |
766 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1" )), |
767 | ValueIs(SameRange(findSpelled(") BAR" ).drop_front()))); |
768 | |
769 | // Critical cases for mapping of Prev/Next in spelledForExpandedSlow. |
770 | recordTokens(Code: R"cpp( |
771 | #define ID(X) X |
772 | ID(prev good) |
773 | ID(prev ID(good2)) |
774 | #define LARGE ID(prev ID(bad)) |
775 | LARGE |
776 | )cpp" ); |
777 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good" )), |
778 | ValueIs(SameRange(findSpelled("good" )))); |
779 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2" )), |
780 | ValueIs(SameRange(findSpelled("good2" )))); |
781 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad" )), std::nullopt); |
782 | |
783 | recordTokens(Code: R"cpp( |
784 | #define PREV prev |
785 | #define ID(X) X |
786 | PREV ID(good) |
787 | #define LARGE PREV ID(bad) |
788 | LARGE |
789 | )cpp" ); |
790 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good" )), |
791 | ValueIs(SameRange(findSpelled("good" )))); |
792 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad" )), std::nullopt); |
793 | |
794 | recordTokens(Code: R"cpp( |
795 | #define ID(X) X |
796 | #define ID2(X, Y) X Y |
797 | ID2(prev, good) |
798 | ID2(prev, ID(good2)) |
799 | #define LARGE ID2(prev, bad) |
800 | LARGE |
801 | )cpp" ); |
802 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good" )), |
803 | ValueIs(SameRange(findSpelled("good" )))); |
804 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2" )), |
805 | ValueIs(SameRange(findSpelled("good2" )))); |
806 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad" )), std::nullopt); |
807 | |
808 | // Prev from macro body. |
809 | recordTokens(Code: R"cpp( |
810 | #define ID(X) X |
811 | #define ID2(X, Y) X prev ID(Y) |
812 | ID2(not_prev, good) |
813 | )cpp" ); |
814 | EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good" )), |
815 | ValueIs(SameRange(findSpelled("good" )))); |
816 | EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good" )), std::nullopt); |
817 | } |
818 | |
819 | TEST_F(TokenBufferTest, NoCrashForEofToken) { |
820 | recordTokens(Code: R"cpp( |
821 | int main() { |
822 | )cpp" ); |
823 | ASSERT_TRUE(!Buffer.expandedTokens().empty()); |
824 | ASSERT_EQ(Buffer.expandedTokens().back().kind(), tok::eof); |
825 | // Expanded range including `eof` is handled gracefully (`eof` is ignored). |
826 | EXPECT_THAT( |
827 | Buffer.spelledForExpanded(Buffer.expandedTokens()), |
828 | ValueIs(SameRange(Buffer.spelledTokens(SourceMgr->getMainFileID())))); |
829 | } |
830 | |
831 | TEST_F(TokenBufferTest, ExpandedTokensForRange) { |
832 | recordTokens(Code: R"cpp( |
833 | #define SIGN(X) X##_washere |
834 | A SIGN(B) C SIGN(D) E SIGN(F) G |
835 | )cpp" ); |
836 | |
837 | SourceRange R(findExpanded(Query: "C" ).front().location(), |
838 | findExpanded(Query: "F_washere" ).front().location()); |
839 | // Expanded and spelled tokens are stored separately. |
840 | EXPECT_THAT(Buffer.expandedTokens(R), |
841 | SameRange(findExpanded("C D_washere E F_washere" ))); |
842 | EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); |
843 | } |
844 | |
845 | TEST_F(TokenBufferTest, ExpansionsOverlapping) { |
846 | // Object-like macro expansions. |
847 | recordTokens(Code: R"cpp( |
848 | #define FOO 3+4 |
849 | int a = FOO 1; |
850 | int b = FOO 2; |
851 | )cpp" ); |
852 | |
853 | llvm::ArrayRef<syntax::Token> Foo1 = findSpelled(Query: "FOO 1" ); |
854 | EXPECT_THAT( |
855 | Buffer.expansionStartingAt(Foo1.data()), |
856 | ValueIs(IsExpansion(SameRange(Foo1.drop_back()), |
857 | SameRange(findExpanded("3 + 4 1" ).drop_back())))); |
858 | EXPECT_THAT( |
859 | Buffer.expansionsOverlapping(Foo1), |
860 | ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), |
861 | SameRange(findExpanded("3 + 4 1" ).drop_back())))); |
862 | |
863 | llvm::ArrayRef<syntax::Token> Foo2 = findSpelled(Query: "FOO 2" ); |
864 | EXPECT_THAT( |
865 | Buffer.expansionStartingAt(Foo2.data()), |
866 | ValueIs(IsExpansion(SameRange(Foo2.drop_back()), |
867 | SameRange(findExpanded("3 + 4 2" ).drop_back())))); |
868 | EXPECT_THAT( |
869 | Buffer.expansionsOverlapping(llvm::ArrayRef(Foo1.begin(), Foo2.end())), |
870 | ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _), |
871 | IsExpansion(SameRange(Foo2.drop_back()), _))); |
872 | |
873 | // Function-like macro expansions. |
874 | recordTokens(Code: R"cpp( |
875 | #define ID(X) X |
876 | int a = ID(1+2+3); |
877 | int b = ID(ID(2+3+4)); |
878 | )cpp" ); |
879 | |
880 | llvm::ArrayRef<syntax::Token> ID1 = findSpelled(Query: "ID ( 1 + 2 + 3 )" ); |
881 | EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), |
882 | ValueIs(IsExpansion(SameRange(ID1), |
883 | SameRange(findExpanded("1 + 2 + 3" ))))); |
884 | // Only the first spelled token should be found. |
885 | for (const auto &T : ID1.drop_front()) |
886 | EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); |
887 | |
888 | llvm::ArrayRef<syntax::Token> ID2 = findSpelled(Query: "ID ( ID ( 2 + 3 + 4 ) )" ); |
889 | EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), |
890 | ValueIs(IsExpansion(SameRange(ID2), |
891 | SameRange(findExpanded("2 + 3 + 4" ))))); |
892 | // Only the first spelled token should be found. |
893 | for (const auto &T : ID2.drop_front()) |
894 | EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); |
895 | |
896 | EXPECT_THAT(Buffer.expansionsOverlapping(llvm::ArrayRef( |
897 | findSpelled("1 + 2" ).data(), findSpelled("4" ).data())), |
898 | ElementsAre(IsExpansion(SameRange(ID1), _), |
899 | IsExpansion(SameRange(ID2), _))); |
900 | |
901 | // PP directives. |
902 | recordTokens(Code: R"cpp( |
903 | #define FOO 1 |
904 | int a = FOO; |
905 | #pragma once |
906 | int b = 1; |
907 | )cpp" ); |
908 | |
909 | llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled(Query: "# define FOO 1" ); |
910 | EXPECT_THAT( |
911 | Buffer.expansionStartingAt(&DefineFoo.front()), |
912 | ValueIs(IsExpansion(SameRange(DefineFoo), |
913 | SameRange(findExpanded("int a" ).take_front(0))))); |
914 | // Only the first spelled token should be found. |
915 | for (const auto &T : DefineFoo.drop_front()) |
916 | EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); |
917 | |
918 | llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled(Query: "# pragma once" ); |
919 | EXPECT_THAT( |
920 | Buffer.expansionStartingAt(&PragmaOnce.front()), |
921 | ValueIs(IsExpansion(SameRange(PragmaOnce), |
922 | SameRange(findExpanded("int b" ).take_front(0))))); |
923 | // Only the first spelled token should be found. |
924 | for (const auto &T : PragmaOnce.drop_front()) |
925 | EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); |
926 | |
927 | EXPECT_THAT( |
928 | Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma" )), |
929 | ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;" ).drop_back()), _), |
930 | IsExpansion(SameRange(PragmaOnce), _))); |
931 | } |
932 | |
933 | TEST_F(TokenBufferTest, TokensToFileRange) { |
934 | addFile(Path: "./foo.h" , Contents: "token_from_header" ); |
935 | llvm::Annotations Code(R"cpp( |
936 | #define FOO token_from_expansion |
937 | #include "./foo.h" |
938 | $all[[$i[[int]] a = FOO;]] |
939 | )cpp" ); |
940 | recordTokens(Code: Code.code()); |
941 | |
942 | auto &SM = *SourceMgr; |
943 | |
944 | // Two simple examples. |
945 | auto Int = findExpanded(Query: "int" ).front(); |
946 | auto Semi = findExpanded(Query: ";" ).front(); |
947 | EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i" ).Begin, |
948 | Code.range("i" ).End)); |
949 | EXPECT_EQ(syntax::Token::range(SM, Int, Semi), |
950 | FileRange(SM.getMainFileID(), Code.range("all" ).Begin, |
951 | Code.range("all" ).End)); |
952 | // We don't test assertion failures because death tests are slow. |
953 | } |
954 | |
955 | TEST_F(TokenBufferTest, MacroExpansions) { |
956 | llvm::Annotations Code(R"cpp( |
957 | #define FOO B |
958 | #define FOO2 BA |
959 | #define CALL(X) int X |
960 | #define G CALL(FOO2) |
961 | int B; |
962 | $macro[[FOO]]; |
963 | $macro[[CALL]](A); |
964 | $macro[[G]]; |
965 | )cpp" ); |
966 | recordTokens(Code: Code.code()); |
967 | auto &SM = *SourceMgr; |
968 | auto Expansions = Buffer.macroExpansions(FID: SM.getMainFileID()); |
969 | std::vector<FileRange> ExpectedMacroRanges; |
970 | for (auto Range : Code.ranges(Name: "macro" )) |
971 | ExpectedMacroRanges.push_back( |
972 | x: FileRange(SM.getMainFileID(), Range.Begin, Range.End)); |
973 | std::vector<FileRange> ActualMacroRanges; |
974 | for (auto Expansion : Expansions) |
975 | ActualMacroRanges.push_back(x: Expansion->range(SM)); |
976 | EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); |
977 | } |
978 | |
979 | TEST_F(TokenBufferTest, Touching) { |
980 | llvm::Annotations Code("^i^nt^ ^a^b^=^1;^" ); |
981 | recordTokens(Code: Code.code()); |
982 | |
983 | auto Touching = [&](int Index) { |
984 | SourceLocation Loc = SourceMgr->getComposedLoc(FID: SourceMgr->getMainFileID(), |
985 | Offset: Code.points()[Index]); |
986 | return spelledTokensTouching(Loc, Tokens: Buffer); |
987 | }; |
988 | auto Identifier = [&](int Index) { |
989 | SourceLocation Loc = SourceMgr->getComposedLoc(FID: SourceMgr->getMainFileID(), |
990 | Offset: Code.points()[Index]); |
991 | const syntax::Token *Tok = spelledIdentifierTouching(Loc, Tokens: Buffer); |
992 | return Tok ? Tok->text(SM: *SourceMgr) : "" ; |
993 | }; |
994 | |
995 | EXPECT_THAT(Touching(0), SameRange(findSpelled("int" ))); |
996 | EXPECT_EQ(Identifier(0), "" ); |
997 | EXPECT_THAT(Touching(1), SameRange(findSpelled("int" ))); |
998 | EXPECT_EQ(Identifier(1), "" ); |
999 | EXPECT_THAT(Touching(2), SameRange(findSpelled("int" ))); |
1000 | EXPECT_EQ(Identifier(2), "" ); |
1001 | |
1002 | EXPECT_THAT(Touching(3), SameRange(findSpelled("ab" ))); |
1003 | EXPECT_EQ(Identifier(3), "ab" ); |
1004 | EXPECT_THAT(Touching(4), SameRange(findSpelled("ab" ))); |
1005 | EXPECT_EQ(Identifier(4), "ab" ); |
1006 | |
1007 | EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =" ))); |
1008 | EXPECT_EQ(Identifier(5), "ab" ); |
1009 | |
1010 | EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1" ))); |
1011 | EXPECT_EQ(Identifier(6), "" ); |
1012 | |
1013 | EXPECT_THAT(Touching(7), SameRange(findSpelled(";" ))); |
1014 | EXPECT_EQ(Identifier(7), "" ); |
1015 | |
1016 | ASSERT_EQ(Code.points().size(), 8u); |
1017 | } |
1018 | |
1019 | TEST_F(TokenBufferTest, ExpandedBySpelled) { |
1020 | recordTokens(Code: R"cpp( |
1021 | a1 a2 a3 b1 b2 |
1022 | )cpp" ); |
1023 | // Expanded and spelled tokens are stored separately. |
1024 | EXPECT_THAT(findExpanded("a1 a2" ), Not(SameRange(findSpelled("a1 a2" )))); |
1025 | // Searching for subranges of expanded tokens should give the corresponding |
1026 | // spelled ones. |
1027 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2" )), |
1028 | ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2" )))); |
1029 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3" )), |
1030 | ElementsAre(SameRange(findExpanded("a1 a2 a3" )))); |
1031 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2" )), |
1032 | ElementsAre(SameRange(findExpanded("b1 b2" )))); |
1033 | |
1034 | // Test search on simple macro expansions. |
1035 | recordTokens(Code: R"cpp( |
1036 | #define A a1 a2 a3 |
1037 | #define B b1 b2 |
1038 | |
1039 | A split B |
1040 | )cpp" ); |
1041 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B" )), |
1042 | ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2" )))); |
1043 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split" ).drop_back()), |
1044 | ElementsAre(SameRange(findExpanded("a1 a2 a3" )))); |
1045 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B" ).drop_front()), |
1046 | ElementsAre(SameRange(findExpanded("b1 b2" )))); |
1047 | |
1048 | // Ranges not fully covering macro expansions should fail. |
1049 | recordTokens(Code: R"cpp( |
1050 | #define ID(x) x |
1051 | |
1052 | ID(a) |
1053 | )cpp" ); |
1054 | // Spelled don't cover entire mapping (missing ID token) -> empty result |
1055 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )" )), IsEmpty()); |
1056 | // Spelled don't cover entire mapping (missing ) token) -> empty result |
1057 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a" )), IsEmpty()); |
1058 | |
1059 | // Recursive macro invocations. |
1060 | recordTokens(Code: R"cpp( |
1061 | #define ID(x) x |
1062 | #define B b1 b2 |
1063 | |
1064 | ID(ID(ID(a1) a2 a3)) split ID(B) |
1065 | )cpp" ); |
1066 | |
1067 | EXPECT_THAT( |
1068 | Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )" )), |
1069 | ElementsAre(SameRange(findExpanded("a1 a2 a3" )))); |
1070 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )" )), |
1071 | ElementsAre(SameRange(findExpanded("b1 b2" )))); |
1072 | EXPECT_THAT(Buffer.expandedForSpelled( |
1073 | findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )" )), |
1074 | ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2" )))); |
1075 | // FIXME: these should succeed, but we do not support macro arguments yet. |
1076 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1" )), IsEmpty()); |
1077 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2" )), |
1078 | IsEmpty()); |
1079 | |
1080 | // Empty macro expansions. |
1081 | recordTokens(Code: R"cpp( |
1082 | #define EMPTY |
1083 | #define ID(X) X |
1084 | |
1085 | EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 |
1086 | EMPTY EMPTY ID(4 5 6) split2 |
1087 | ID(7 8 9) EMPTY EMPTY |
1088 | )cpp" ); |
1089 | // Covered by empty expansions on one of both of the sides. |
1090 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )" )), |
1091 | ElementsAre(SameRange(findExpanded("1 2 3" )))); |
1092 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )" )), |
1093 | ElementsAre(SameRange(findExpanded("4 5 6" )))); |
1094 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )" )), |
1095 | ElementsAre(SameRange(findExpanded("7 8 9" )))); |
1096 | // Including the empty macro expansions on the side. |
1097 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )" )), |
1098 | ElementsAre(SameRange(findExpanded("1 2 3" )))); |
1099 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY" )), |
1100 | ElementsAre(SameRange(findExpanded("1 2 3" )))); |
1101 | EXPECT_THAT( |
1102 | Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY" )), |
1103 | ElementsAre(SameRange(findExpanded("1 2 3" )))); |
1104 | |
1105 | // Empty mappings coming from various directives. |
1106 | recordTokens(Code: R"cpp( |
1107 | #define ID(X) X |
1108 | ID(1) |
1109 | #pragma lalala |
1110 | not_mapped |
1111 | )cpp" ); |
1112 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X" )), |
1113 | IsEmpty()); |
1114 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala" )), |
1115 | IsEmpty()); |
1116 | |
1117 | // Empty macro expansion. |
1118 | recordTokens(Code: R"cpp( |
1119 | #define EMPTY |
1120 | EMPTY int a = 100; |
1121 | )cpp" ); |
1122 | EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int" ).drop_back()), |
1123 | IsEmpty()); |
1124 | } |
1125 | |
1126 | TEST_F(TokenCollectorTest, Pragmas) { |
1127 | // Tokens coming from concatenations. |
1128 | recordTokens(Code: R"cpp( |
1129 | void foo() { |
1130 | #pragma unroll 4 |
1131 | for(int i=0;i<4;++i); |
1132 | } |
1133 | )cpp" ); |
1134 | } |
1135 | } // namespace |
1136 | |