| 1 | //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "clang/Lex/Lexer.h" |
| 10 | #include "clang/Basic/Diagnostic.h" |
| 11 | #include "clang/Basic/DiagnosticOptions.h" |
| 12 | #include "clang/Basic/FileManager.h" |
| 13 | #include "clang/Basic/LangOptions.h" |
| 14 | #include "clang/Basic/SourceLocation.h" |
| 15 | #include "clang/Basic/SourceManager.h" |
| 16 | #include "clang/Basic/TargetInfo.h" |
| 17 | #include "clang/Basic/TargetOptions.h" |
| 18 | #include "clang/Basic/TokenKinds.h" |
| 19 | #include "clang/Lex/HeaderSearch.h" |
| 20 | #include "clang/Lex/HeaderSearchOptions.h" |
| 21 | #include "clang/Lex/LiteralSupport.h" |
| 22 | #include "clang/Lex/MacroArgs.h" |
| 23 | #include "clang/Lex/MacroInfo.h" |
| 24 | #include "clang/Lex/ModuleLoader.h" |
| 25 | #include "clang/Lex/Preprocessor.h" |
| 26 | #include "clang/Lex/PreprocessorOptions.h" |
| 27 | #include "llvm/ADT/ArrayRef.h" |
| 28 | #include "llvm/ADT/StringRef.h" |
| 29 | #include "llvm/Testing/Annotations/Annotations.h" |
| 30 | #include "gmock/gmock.h" |
| 31 | #include "gtest/gtest.h" |
| 32 | #include <memory> |
| 33 | #include <string> |
| 34 | #include <vector> |
| 35 | |
| 36 | namespace { |
| 37 | using namespace clang; |
| 38 | using testing::ElementsAre; |
| 39 | |
| 40 | // The test fixture. |
| 41 | class LexerTest : public ::testing::Test { |
| 42 | protected: |
| 43 | LexerTest() |
| 44 | : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), |
| 45 | Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()), |
| 46 | SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { |
| 47 | TargetOpts->Triple = "x86_64-apple-darwin11.1.0" ; |
| 48 | Target = TargetInfo::CreateTargetInfo(Diags, Opts&: *TargetOpts); |
| 49 | } |
| 50 | |
| 51 | std::unique_ptr<Preprocessor> CreatePP(StringRef Source, |
| 52 | TrivialModuleLoader &ModLoader) { |
| 53 | std::unique_ptr<llvm::MemoryBuffer> Buf = |
| 54 | llvm::MemoryBuffer::getMemBuffer(InputData: Source); |
| 55 | SourceMgr.setMainFileID(SourceMgr.createFileID(Buffer: std::move(Buf))); |
| 56 | |
| 57 | HeaderSearchOptions HSOpts; |
| 58 | HeaderSearch (HSOpts, SourceMgr, Diags, LangOpts, Target.get()); |
| 59 | PreprocessorOptions PPOpts; |
| 60 | std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>( |
| 61 | args&: PPOpts, args&: Diags, args&: LangOpts, args&: SourceMgr, args&: HeaderInfo, args&: ModLoader, |
| 62 | /*IILookup =*/args: nullptr, |
| 63 | /*OwnsHeaderSearch =*/args: false); |
| 64 | PP->Initialize(Target: *Target); |
| 65 | PP->EnterMainSourceFile(); |
| 66 | return PP; |
| 67 | } |
| 68 | |
| 69 | std::vector<Token> Lex(StringRef Source) { |
| 70 | TrivialModuleLoader ModLoader; |
| 71 | PP = CreatePP(Source, ModLoader); |
| 72 | |
| 73 | std::vector<Token> toks; |
| 74 | PP->LexTokensUntilEOF(Tokens: &toks); |
| 75 | |
| 76 | return toks; |
| 77 | } |
| 78 | |
| 79 | std::vector<Token> CheckLex(StringRef Source, |
| 80 | ArrayRef<tok::TokenKind> ExpectedTokens) { |
| 81 | auto toks = Lex(Source); |
| 82 | EXPECT_EQ(ExpectedTokens.size(), toks.size()); |
| 83 | for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { |
| 84 | EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); |
| 85 | } |
| 86 | |
| 87 | return toks; |
| 88 | } |
| 89 | |
| 90 | std::string getSourceText(Token Begin, Token End) { |
| 91 | bool Invalid; |
| 92 | StringRef Str = |
| 93 | Lexer::getSourceText(Range: CharSourceRange::getTokenRange(R: SourceRange( |
| 94 | Begin.getLocation(), End.getLocation())), |
| 95 | SM: SourceMgr, LangOpts, Invalid: &Invalid); |
| 96 | if (Invalid) |
| 97 | return "<INVALID>" ; |
| 98 | return std::string(Str); |
| 99 | } |
| 100 | |
| 101 | FileSystemOptions FileMgrOpts; |
| 102 | FileManager FileMgr; |
| 103 | IntrusiveRefCntPtr<DiagnosticIDs> DiagID; |
| 104 | DiagnosticOptions DiagOpts; |
| 105 | DiagnosticsEngine Diags; |
| 106 | SourceManager SourceMgr; |
| 107 | LangOptions LangOpts; |
| 108 | std::shared_ptr<TargetOptions> TargetOpts; |
| 109 | IntrusiveRefCntPtr<TargetInfo> Target; |
| 110 | std::unique_ptr<Preprocessor> PP; |
| 111 | }; |
| 112 | |
| 113 | TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { |
| 114 | std::vector<tok::TokenKind> ExpectedTokens; |
| 115 | ExpectedTokens.push_back(x: tok::identifier); |
| 116 | ExpectedTokens.push_back(x: tok::l_paren); |
| 117 | ExpectedTokens.push_back(x: tok::identifier); |
| 118 | ExpectedTokens.push_back(x: tok::r_paren); |
| 119 | |
| 120 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 121 | "M(f(M(i)))" , |
| 122 | ExpectedTokens); |
| 123 | |
| 124 | EXPECT_EQ("M(i)" , getSourceText(toks[2], toks[2])); |
| 125 | } |
| 126 | |
| 127 | TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { |
| 128 | std::vector<tok::TokenKind> ExpectedTokens; |
| 129 | ExpectedTokens.push_back(x: tok::identifier); |
| 130 | ExpectedTokens.push_back(x: tok::identifier); |
| 131 | |
| 132 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 133 | "M(M(i) c)" , |
| 134 | ExpectedTokens); |
| 135 | |
| 136 | EXPECT_EQ("M(i)" , getSourceText(toks[0], toks[0])); |
| 137 | } |
| 138 | |
| 139 | TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { |
| 140 | std::vector<tok::TokenKind> ExpectedTokens; |
| 141 | ExpectedTokens.push_back(x: tok::identifier); |
| 142 | ExpectedTokens.push_back(x: tok::identifier); |
| 143 | ExpectedTokens.push_back(x: tok::identifier); |
| 144 | |
| 145 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 146 | "M(c c M(i))" , |
| 147 | ExpectedTokens); |
| 148 | |
| 149 | EXPECT_EQ("c M(i)" , getSourceText(toks[1], toks[2])); |
| 150 | } |
| 151 | |
| 152 | TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { |
| 153 | std::vector<tok::TokenKind> ExpectedTokens; |
| 154 | ExpectedTokens.push_back(x: tok::identifier); |
| 155 | ExpectedTokens.push_back(x: tok::identifier); |
| 156 | ExpectedTokens.push_back(x: tok::identifier); |
| 157 | |
| 158 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 159 | "M(M(i) c c)" , |
| 160 | ExpectedTokens); |
| 161 | |
| 162 | EXPECT_EQ("M(i) c" , getSourceText(toks[0], toks[1])); |
| 163 | } |
| 164 | |
| 165 | TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { |
| 166 | std::vector<tok::TokenKind> ExpectedTokens; |
| 167 | ExpectedTokens.push_back(x: tok::identifier); |
| 168 | ExpectedTokens.push_back(x: tok::identifier); |
| 169 | ExpectedTokens.push_back(x: tok::identifier); |
| 170 | ExpectedTokens.push_back(x: tok::identifier); |
| 171 | |
| 172 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 173 | "M(c M(i)) M(M(i) c)" , |
| 174 | ExpectedTokens); |
| 175 | |
| 176 | EXPECT_EQ("<INVALID>" , getSourceText(toks[1], toks[2])); |
| 177 | } |
| 178 | |
| 179 | TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { |
| 180 | std::vector<tok::TokenKind> ExpectedTokens; |
| 181 | ExpectedTokens.push_back(x: tok::identifier); |
| 182 | ExpectedTokens.push_back(x: tok::l_paren); |
| 183 | ExpectedTokens.push_back(x: tok::identifier); |
| 184 | ExpectedTokens.push_back(x: tok::r_paren); |
| 185 | |
| 186 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 187 | "#define C(x) M(x##c)\n" |
| 188 | "M(f(C(i)))" , |
| 189 | ExpectedTokens); |
| 190 | |
| 191 | EXPECT_EQ("C(i)" , getSourceText(toks[2], toks[2])); |
| 192 | } |
| 193 | |
| 194 | TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { |
| 195 | std::vector<tok::TokenKind> ExpectedTokens; |
| 196 | ExpectedTokens.push_back(x: tok::identifier); |
| 197 | ExpectedTokens.push_back(x: tok::l_paren); |
| 198 | ExpectedTokens.push_back(x: tok::identifier); |
| 199 | ExpectedTokens.push_back(x: tok::r_paren); |
| 200 | |
| 201 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 202 | "f(M(M(i)))" , |
| 203 | ExpectedTokens); |
| 204 | EXPECT_EQ("M(M(i))" , getSourceText(toks[2], toks[2])); |
| 205 | } |
| 206 | |
| 207 | TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { |
| 208 | std::vector<tok::TokenKind> ExpectedTokens; |
| 209 | ExpectedTokens.push_back(x: tok::identifier); |
| 210 | ExpectedTokens.push_back(x: tok::l_paren); |
| 211 | ExpectedTokens.push_back(x: tok::identifier); |
| 212 | ExpectedTokens.push_back(x: tok::r_paren); |
| 213 | |
| 214 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 215 | "M(f(i))" , |
| 216 | ExpectedTokens); |
| 217 | EXPECT_EQ("i" , getSourceText(toks[2], toks[2])); |
| 218 | } |
| 219 | |
| 220 | TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { |
| 221 | std::vector<tok::TokenKind> ExpectedTokens; |
| 222 | ExpectedTokens.push_back(x: tok::identifier); |
| 223 | ExpectedTokens.push_back(x: tok::l_paren); |
| 224 | ExpectedTokens.push_back(x: tok::identifier); |
| 225 | ExpectedTokens.push_back(x: tok::r_paren); |
| 226 | |
| 227 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 228 | "#define C(x) x\n" |
| 229 | "f(C(M(i)))" , |
| 230 | ExpectedTokens); |
| 231 | EXPECT_EQ("C(M(i))" , getSourceText(toks[2], toks[2])); |
| 232 | } |
| 233 | |
| 234 | TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { |
| 235 | std::vector<tok::TokenKind> ExpectedTokens; |
| 236 | ExpectedTokens.push_back(x: tok::identifier); |
| 237 | ExpectedTokens.push_back(x: tok::l_paren); |
| 238 | ExpectedTokens.push_back(x: tok::identifier); |
| 239 | ExpectedTokens.push_back(x: tok::identifier); |
| 240 | ExpectedTokens.push_back(x: tok::r_paren); |
| 241 | |
| 242 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 243 | "#define C(x) c x\n" |
| 244 | "f(C(M(i)))" , |
| 245 | ExpectedTokens); |
| 246 | EXPECT_EQ("M(i)" , getSourceText(toks[3], toks[3])); |
| 247 | } |
| 248 | |
| 249 | TEST_F(LexerTest, GetSourceTextExpandsRecursively) { |
| 250 | std::vector<tok::TokenKind> ExpectedTokens; |
| 251 | ExpectedTokens.push_back(x: tok::identifier); |
| 252 | ExpectedTokens.push_back(x: tok::identifier); |
| 253 | ExpectedTokens.push_back(x: tok::l_paren); |
| 254 | ExpectedTokens.push_back(x: tok::identifier); |
| 255 | ExpectedTokens.push_back(x: tok::r_paren); |
| 256 | |
| 257 | std::vector<Token> toks = CheckLex(Source: "#define M(x) x\n" |
| 258 | "#define C(x) c M(x)\n" |
| 259 | "C(f(M(i)))" , |
| 260 | ExpectedTokens); |
| 261 | EXPECT_EQ("M(i)" , getSourceText(toks[3], toks[3])); |
| 262 | } |
| 263 | |
| 264 | TEST_F(LexerTest, LexAPI) { |
| 265 | std::vector<tok::TokenKind> ExpectedTokens; |
| 266 | // Line 1 (after the #defines) |
| 267 | ExpectedTokens.push_back(x: tok::l_square); |
| 268 | ExpectedTokens.push_back(x: tok::identifier); |
| 269 | ExpectedTokens.push_back(x: tok::r_square); |
| 270 | ExpectedTokens.push_back(x: tok::l_square); |
| 271 | ExpectedTokens.push_back(x: tok::identifier); |
| 272 | ExpectedTokens.push_back(x: tok::r_square); |
| 273 | // Line 2 |
| 274 | ExpectedTokens.push_back(x: tok::identifier); |
| 275 | ExpectedTokens.push_back(x: tok::identifier); |
| 276 | ExpectedTokens.push_back(x: tok::identifier); |
| 277 | ExpectedTokens.push_back(x: tok::identifier); |
| 278 | |
| 279 | std::vector<Token> toks = CheckLex(Source: "#define M(x) [x]\n" |
| 280 | "#define N(x) x\n" |
| 281 | "#define INN(x) x\n" |
| 282 | "#define NOF1 INN(val)\n" |
| 283 | "#define NOF2 val\n" |
| 284 | "M(foo) N([bar])\n" |
| 285 | "N(INN(val)) N(NOF1) N(NOF2) N(val)" , |
| 286 | ExpectedTokens); |
| 287 | |
| 288 | SourceLocation lsqrLoc = toks[0].getLocation(); |
| 289 | SourceLocation idLoc = toks[1].getLocation(); |
| 290 | SourceLocation rsqrLoc = toks[2].getLocation(); |
| 291 | CharSourceRange macroRange = SourceMgr.getExpansionRange(Loc: lsqrLoc); |
| 292 | |
| 293 | SourceLocation Loc; |
| 294 | EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc)); |
| 295 | EXPECT_EQ(Loc, macroRange.getBegin()); |
| 296 | EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts)); |
| 297 | EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts)); |
| 298 | EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc)); |
| 299 | EXPECT_EQ(Loc, macroRange.getEnd()); |
| 300 | EXPECT_TRUE(macroRange.isTokenRange()); |
| 301 | |
| 302 | CharSourceRange range = Lexer::makeFileCharRange( |
| 303 | Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: idLoc), SM: SourceMgr, LangOpts); |
| 304 | EXPECT_TRUE(range.isInvalid()); |
| 305 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: idLoc, E: rsqrLoc), |
| 306 | SM: SourceMgr, LangOpts); |
| 307 | EXPECT_TRUE(range.isInvalid()); |
| 308 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc), |
| 309 | SM: SourceMgr, LangOpts); |
| 310 | EXPECT_TRUE(!range.isTokenRange()); |
| 311 | EXPECT_EQ(range.getAsRange(), |
| 312 | SourceRange(macroRange.getBegin(), |
| 313 | macroRange.getEnd().getLocWithOffset(1))); |
| 314 | |
| 315 | StringRef text = Lexer::getSourceText( |
| 316 | Range: CharSourceRange::getTokenRange(B: lsqrLoc, E: rsqrLoc), |
| 317 | SM: SourceMgr, LangOpts); |
| 318 | EXPECT_EQ(text, "M(foo)" ); |
| 319 | |
| 320 | SourceLocation macroLsqrLoc = toks[3].getLocation(); |
| 321 | SourceLocation macroIdLoc = toks[4].getLocation(); |
| 322 | SourceLocation macroRsqrLoc = toks[5].getLocation(); |
| 323 | SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(Loc: macroLsqrLoc); |
| 324 | SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(Loc: macroIdLoc); |
| 325 | SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(Loc: macroRsqrLoc); |
| 326 | |
| 327 | range = Lexer::makeFileCharRange( |
| 328 | Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroIdLoc), |
| 329 | SM: SourceMgr, LangOpts); |
| 330 | EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)), |
| 331 | range.getAsRange()); |
| 332 | |
| 333 | range = Lexer::makeFileCharRange(Range: CharSourceRange::getTokenRange(B: macroIdLoc, E: macroRsqrLoc), |
| 334 | SM: SourceMgr, LangOpts); |
| 335 | EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)), |
| 336 | range.getAsRange()); |
| 337 | |
| 338 | macroRange = SourceMgr.getExpansionRange(Loc: macroLsqrLoc); |
| 339 | range = Lexer::makeFileCharRange( |
| 340 | Range: CharSourceRange::getTokenRange(B: macroLsqrLoc, E: macroRsqrLoc), |
| 341 | SM: SourceMgr, LangOpts); |
| 342 | EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)), |
| 343 | range.getAsRange()); |
| 344 | |
| 345 | text = Lexer::getSourceText( |
| 346 | Range: CharSourceRange::getTokenRange(R: SourceRange(macroLsqrLoc, macroIdLoc)), |
| 347 | SM: SourceMgr, LangOpts); |
| 348 | EXPECT_EQ(text, "[bar" ); |
| 349 | |
| 350 | |
| 351 | SourceLocation idLoc1 = toks[6].getLocation(); |
| 352 | SourceLocation idLoc2 = toks[7].getLocation(); |
| 353 | SourceLocation idLoc3 = toks[8].getLocation(); |
| 354 | SourceLocation idLoc4 = toks[9].getLocation(); |
| 355 | EXPECT_EQ("INN" , Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts)); |
| 356 | EXPECT_EQ("INN" , Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts)); |
| 357 | EXPECT_EQ("NOF2" , Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts)); |
| 358 | EXPECT_EQ("N" , Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); |
| 359 | } |
| 360 | |
| 361 | TEST_F(LexerTest, HandlesSplitTokens) { |
| 362 | std::vector<tok::TokenKind> ExpectedTokens; |
| 363 | // Line 1 (after the #defines) |
| 364 | ExpectedTokens.push_back(x: tok::identifier); |
| 365 | ExpectedTokens.push_back(x: tok::less); |
| 366 | ExpectedTokens.push_back(x: tok::identifier); |
| 367 | ExpectedTokens.push_back(x: tok::less); |
| 368 | ExpectedTokens.push_back(x: tok::greatergreater); |
| 369 | // Line 2 |
| 370 | ExpectedTokens.push_back(x: tok::identifier); |
| 371 | ExpectedTokens.push_back(x: tok::less); |
| 372 | ExpectedTokens.push_back(x: tok::identifier); |
| 373 | ExpectedTokens.push_back(x: tok::less); |
| 374 | ExpectedTokens.push_back(x: tok::greatergreater); |
| 375 | |
| 376 | std::vector<Token> toks = CheckLex(Source: "#define TY ty\n" |
| 377 | "#define RANGLE ty<ty<>>\n" |
| 378 | "TY<ty<>>\n" |
| 379 | "RANGLE" , |
| 380 | ExpectedTokens); |
| 381 | |
| 382 | SourceLocation outerTyLoc = toks[0].getLocation(); |
| 383 | SourceLocation innerTyLoc = toks[2].getLocation(); |
| 384 | SourceLocation gtgtLoc = toks[4].getLocation(); |
| 385 | // Split the token to simulate the action of the parser and force creation of |
| 386 | // an `ExpansionTokenRange`. |
| 387 | SourceLocation rangleLoc = PP->SplitToken(TokLoc: gtgtLoc, Length: 1); |
| 388 | |
| 389 | // Verify that it only captures the first greater-then and not the second one. |
| 390 | CharSourceRange range = Lexer::makeFileCharRange( |
| 391 | Range: CharSourceRange::getTokenRange(B: innerTyLoc, E: rangleLoc), SM: SourceMgr, |
| 392 | LangOpts); |
| 393 | EXPECT_TRUE(range.isCharRange()); |
| 394 | EXPECT_EQ(range.getAsRange(), |
| 395 | SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1))); |
| 396 | |
| 397 | // Verify case where range begins in a macro expansion. |
| 398 | range = Lexer::makeFileCharRange( |
| 399 | Range: CharSourceRange::getTokenRange(B: outerTyLoc, E: rangleLoc), SM: SourceMgr, |
| 400 | LangOpts); |
| 401 | EXPECT_TRUE(range.isCharRange()); |
| 402 | EXPECT_EQ(range.getAsRange(), |
| 403 | SourceRange(SourceMgr.getExpansionLoc(outerTyLoc), |
| 404 | gtgtLoc.getLocWithOffset(1))); |
| 405 | |
| 406 | SourceLocation macroInnerTyLoc = toks[7].getLocation(); |
| 407 | SourceLocation macroGtgtLoc = toks[9].getLocation(); |
| 408 | // Split the token to simulate the action of the parser and force creation of |
| 409 | // an `ExpansionTokenRange`. |
| 410 | SourceLocation macroRAngleLoc = PP->SplitToken(TokLoc: macroGtgtLoc, Length: 1); |
| 411 | |
| 412 | // Verify that it fails (because it only captures the first greater-then and |
| 413 | // not the second one, so it doesn't span the entire macro expansion). |
| 414 | range = Lexer::makeFileCharRange( |
| 415 | Range: CharSourceRange::getTokenRange(B: macroInnerTyLoc, E: macroRAngleLoc), |
| 416 | SM: SourceMgr, LangOpts); |
| 417 | EXPECT_TRUE(range.isInvalid()); |
| 418 | } |
| 419 | |
| 420 | TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { |
| 421 | std::vector<Token> toks = |
| 422 | Lex(Source: "#define helper1 0\n" |
| 423 | "void helper2(const char *, ...);\n" |
| 424 | "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n" |
| 425 | "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n" |
| 426 | "void f1() { M2(\"a\", \"b\"); }" ); |
| 427 | |
| 428 | // Check the file corresponding to the "helper1" macro arg in M2. |
| 429 | // |
| 430 | // The lexer used to report its size as 31, meaning that the end of the |
| 431 | // expansion would be on the *next line* (just past `M2("a", "b")`). Make |
| 432 | // sure that we get the correct end location (the comma after "helper1"). |
| 433 | SourceLocation helper1ArgLoc = toks[20].getLocation(); |
| 434 | EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); |
| 435 | } |
| 436 | |
| 437 | TEST_F(LexerTest, DontOverallocateStringifyArgs) { |
| 438 | TrivialModuleLoader ModLoader; |
| 439 | auto PP = CreatePP(Source: "\"StrArg\", 5, 'C'" , ModLoader); |
| 440 | |
| 441 | llvm::BumpPtrAllocator Allocator; |
| 442 | std::array<IdentifierInfo *, 3> ParamList; |
| 443 | MacroInfo *MI = PP->AllocateMacroInfo(L: {}); |
| 444 | MI->setIsFunctionLike(); |
| 445 | MI->setParameterList(List: ParamList, PPAllocator&: Allocator); |
| 446 | EXPECT_EQ(3u, MI->getNumParams()); |
| 447 | EXPECT_TRUE(MI->isFunctionLike()); |
| 448 | |
| 449 | Token Eof; |
| 450 | Eof.setKind(tok::eof); |
| 451 | std::vector<Token> ArgTokens; |
| 452 | while (1) { |
| 453 | Token tok; |
| 454 | PP->Lex(Result&: tok); |
| 455 | if (tok.is(K: tok::eof)) { |
| 456 | ArgTokens.push_back(x: Eof); |
| 457 | break; |
| 458 | } |
| 459 | if (tok.is(K: tok::comma)) |
| 460 | ArgTokens.push_back(x: Eof); |
| 461 | else |
| 462 | ArgTokens.push_back(x: tok); |
| 463 | } |
| 464 | |
| 465 | auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(PP&: *PP); }; |
| 466 | std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA( |
| 467 | MacroArgs::create(MI, UnexpArgTokens: ArgTokens, VarargsElided: false, PP&: *PP), MacroArgsDeleter); |
| 468 | auto StringifyArg = [&](int ArgNo) { |
| 469 | return MA->StringifyArgument(ArgToks: MA->getUnexpArgument(Arg: ArgNo), PP&: *PP, |
| 470 | /*Charify=*/false, ExpansionLocStart: {}, ExpansionLocEnd: {}); |
| 471 | }; |
| 472 | Token Result = StringifyArg(0); |
| 473 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
| 474 | EXPECT_STREQ("\"\\\"StrArg\\\"\"" , Result.getLiteralData()); |
| 475 | Result = StringifyArg(1); |
| 476 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
| 477 | EXPECT_STREQ("\"5\"" , Result.getLiteralData()); |
| 478 | Result = StringifyArg(2); |
| 479 | EXPECT_EQ(tok::string_literal, Result.getKind()); |
| 480 | EXPECT_STREQ("\"'C'\"" , Result.getLiteralData()); |
| 481 | #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST |
| 482 | EXPECT_DEATH(StringifyArg(3), "Invalid arg #" ); |
| 483 | #endif |
| 484 | } |
| 485 | |
| 486 | TEST_F(LexerTest, IsNewLineEscapedValid) { |
| 487 | auto hasNewLineEscaped = [](const char *S) { |
| 488 | return Lexer::isNewLineEscaped(BufferStart: S, Str: S + strlen(s: S) - 1); |
| 489 | }; |
| 490 | |
| 491 | EXPECT_TRUE(hasNewLineEscaped("\\\r" )); |
| 492 | EXPECT_TRUE(hasNewLineEscaped("\\\n" )); |
| 493 | EXPECT_TRUE(hasNewLineEscaped("\\\r\n" )); |
| 494 | EXPECT_TRUE(hasNewLineEscaped("\\\n\r" )); |
| 495 | EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r" )); |
| 496 | EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n" )); |
| 497 | |
| 498 | EXPECT_FALSE(hasNewLineEscaped("\\\r\r" )); |
| 499 | EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n" )); |
| 500 | EXPECT_FALSE(hasNewLineEscaped("\\\n\n" )); |
| 501 | EXPECT_FALSE(hasNewLineEscaped("\r" )); |
| 502 | EXPECT_FALSE(hasNewLineEscaped("\n" )); |
| 503 | EXPECT_FALSE(hasNewLineEscaped("\r\n" )); |
| 504 | EXPECT_FALSE(hasNewLineEscaped("\n\r" )); |
| 505 | EXPECT_FALSE(hasNewLineEscaped("\r\r" )); |
| 506 | EXPECT_FALSE(hasNewLineEscaped("\n\n" )); |
| 507 | } |
| 508 | |
| 509 | TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { |
| 510 | // Each line should have the same length for |
| 511 | // further offset calculation to be more straightforward. |
| 512 | const unsigned IdentifierLength = 8; |
| 513 | std::string TextToLex = "rabarbar\n" |
| 514 | "foo\\\nbar\n" |
| 515 | "foo\\\rbar\n" |
| 516 | "fo\\\r\nbar\n" |
| 517 | "foo\\\n\rba\n" ; |
| 518 | std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; |
| 519 | std::vector<Token> LexedTokens = CheckLex(Source: TextToLex, ExpectedTokens); |
| 520 | |
| 521 | for (const Token &Tok : LexedTokens) { |
| 522 | std::pair<FileID, unsigned> OriginalLocation = |
| 523 | SourceMgr.getDecomposedLoc(Loc: Tok.getLocation()); |
| 524 | for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { |
| 525 | SourceLocation LookupLocation = |
| 526 | Tok.getLocation().getLocWithOffset(Offset); |
| 527 | |
| 528 | std::pair<FileID, unsigned> FoundLocation = |
| 529 | SourceMgr.getDecomposedExpansionLoc( |
| 530 | Loc: Lexer::GetBeginningOfToken(Loc: LookupLocation, SM: SourceMgr, LangOpts)); |
| 531 | |
| 532 | // Check that location returned by the GetBeginningOfToken |
| 533 | // is the same as original token location reported by Lexer. |
| 534 | EXPECT_EQ(FoundLocation.second, OriginalLocation.second); |
| 535 | } |
| 536 | } |
| 537 | } |
| 538 | |
| 539 | TEST_F(LexerTest, AvoidPastEndOfStringDereference) { |
| 540 | EXPECT_TRUE(Lex(" // \\\n" ).empty()); |
| 541 | EXPECT_TRUE(Lex("#include <\\\\" ).empty()); |
| 542 | EXPECT_TRUE(Lex("#include <\\\\\n" ).empty()); |
| 543 | } |
| 544 | |
| 545 | TEST_F(LexerTest, StringizingRasString) { |
| 546 | // For "std::string Lexer::Stringify(StringRef Str, bool Charify)". |
| 547 | std::string String1 = R"(foo |
| 548 | {"bar":[]} |
| 549 | baz)" ; |
| 550 | // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)". |
| 551 | SmallString<128> String2; |
| 552 | String2 += String1.c_str(); |
| 553 | |
| 554 | // Corner cases. |
| 555 | std::string String3 = R"(\ |
| 556 | \n |
| 557 | \\n |
| 558 | \\)" ; |
| 559 | SmallString<128> String4; |
| 560 | String4 += String3.c_str(); |
| 561 | std::string String5 = R"(a\ |
| 562 | |
| 563 | |
| 564 | \\b)" ; |
| 565 | SmallString<128> String6; |
| 566 | String6 += String5.c_str(); |
| 567 | |
| 568 | String1 = Lexer::Stringify(Str: StringRef(String1)); |
| 569 | Lexer::Stringify(Str&: String2); |
| 570 | String3 = Lexer::Stringify(Str: StringRef(String3)); |
| 571 | Lexer::Stringify(Str&: String4); |
| 572 | String5 = Lexer::Stringify(Str: StringRef(String5)); |
| 573 | Lexer::Stringify(Str&: String6); |
| 574 | |
| 575 | EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)" ); |
| 576 | EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)" ); |
| 577 | EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)" ); |
| 578 | EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)" ); |
| 579 | EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)" ); |
| 580 | EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)" ); |
| 581 | } |
| 582 | |
| 583 | TEST_F(LexerTest, CharRangeOffByOne) { |
| 584 | std::vector<Token> toks = Lex(Source: R"(#define MOO 1 |
| 585 | void foo() { MOO; })" ); |
| 586 | const Token &moo = toks[5]; |
| 587 | |
| 588 | EXPECT_EQ(getSourceText(moo, moo), "MOO" ); |
| 589 | |
| 590 | SourceRange R{moo.getLocation(), moo.getLocation()}; |
| 591 | |
| 592 | EXPECT_TRUE( |
| 593 | Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts)); |
| 594 | EXPECT_TRUE( |
| 595 | Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts)); |
| 596 | |
| 597 | CharSourceRange CR = Lexer::getAsCharRange(Range: R, SM: SourceMgr, LangOpts); |
| 598 | |
| 599 | EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO" ); // Was "MO". |
| 600 | } |
| 601 | |
| 602 | TEST_F(LexerTest, FindNextToken) { |
| 603 | Lex(Source: "int abcd = 0;\n" |
| 604 | "// A comment.\n" |
| 605 | "int xyz = abcd;\n" ); |
| 606 | std::vector<std::string> GeneratedByNextToken; |
| 607 | SourceLocation Loc = |
| 608 | SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID()); |
| 609 | while (true) { |
| 610 | auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts); |
| 611 | ASSERT_TRUE(T); |
| 612 | if (T->is(K: tok::eof)) |
| 613 | break; |
| 614 | GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
| 615 | Loc = T->getLocation(); |
| 616 | } |
| 617 | EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd" , "=" , "0" , ";" , "int" , |
| 618 | "xyz" , "=" , "abcd" , ";" )); |
| 619 | } |
| 620 | |
| 621 | TEST_F(LexerTest, FindNextTokenIncludingComments) { |
| 622 | Lex(Source: "int abcd = 0;\n" |
| 623 | "// A comment.\n" |
| 624 | "int xyz = abcd;\n" ); |
| 625 | std::vector<std::string> GeneratedByNextToken; |
| 626 | SourceLocation Loc = |
| 627 | SourceMgr.getLocForStartOfFile(FID: SourceMgr.getMainFileID()); |
| 628 | while (true) { |
| 629 | auto T = Lexer::findNextToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: true); |
| 630 | ASSERT_TRUE(T); |
| 631 | if (T->is(K: tok::eof)) |
| 632 | break; |
| 633 | GeneratedByNextToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
| 634 | Loc = T->getLocation(); |
| 635 | } |
| 636 | EXPECT_THAT(GeneratedByNextToken, |
| 637 | ElementsAre("abcd" , "=" , "0" , ";" , "// A comment." , "int" , "xyz" , |
| 638 | "=" , "abcd" , ";" )); |
| 639 | } |
| 640 | |
| 641 | TEST_F(LexerTest, FindPreviousToken) { |
| 642 | Lex(Source: "int abcd = 0;\n" |
| 643 | "// A comment.\n" |
| 644 | "int xyz = abcd;\n" ); |
| 645 | std::vector<std::string> GeneratedByPrevToken; |
| 646 | SourceLocation Loc = SourceMgr.getLocForEndOfFile(FID: SourceMgr.getMainFileID()); |
| 647 | while (true) { |
| 648 | auto T = Lexer::findPreviousToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: false); |
| 649 | if (!T.has_value()) |
| 650 | break; |
| 651 | GeneratedByPrevToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
| 652 | Loc = Lexer::GetBeginningOfToken(Loc: T->getLocation(), SM: SourceMgr, LangOpts); |
| 653 | } |
| 654 | EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";" , "abcd" , "=" , "xyz" , "int" , |
| 655 | ";" , "0" , "=" , "abcd" , "int" )); |
| 656 | } |
| 657 | |
| 658 | TEST_F(LexerTest, FindPreviousTokenIncludingComments) { |
| 659 | Lex(Source: "int abcd = 0;\n" |
| 660 | "// A comment.\n" |
| 661 | "int xyz = abcd;\n" ); |
| 662 | std::vector<std::string> GeneratedByPrevToken; |
| 663 | SourceLocation Loc = SourceMgr.getLocForEndOfFile(FID: SourceMgr.getMainFileID()); |
| 664 | while (true) { |
| 665 | auto T = Lexer::findPreviousToken(Loc, SM: SourceMgr, LangOpts, IncludeComments: true); |
| 666 | if (!T.has_value()) |
| 667 | break; |
| 668 | GeneratedByPrevToken.push_back(x: getSourceText(Begin: *T, End: *T)); |
| 669 | Loc = Lexer::GetBeginningOfToken(Loc: T->getLocation(), SM: SourceMgr, LangOpts); |
| 670 | } |
| 671 | EXPECT_THAT(GeneratedByPrevToken, |
| 672 | ElementsAre(";" , "abcd" , "=" , "xyz" , "int" , "// A comment." , ";" , |
| 673 | "0" , "=" , "abcd" , "int" )); |
| 674 | } |
| 675 | |
| 676 | TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) { |
| 677 | TrivialModuleLoader ModLoader; |
| 678 | auto PP = CreatePP(Source: "" , ModLoader); |
| 679 | PP->LexTokensUntilEOF(); |
| 680 | EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()), |
| 681 | 1U); |
| 682 | } |
| 683 | |
| 684 | TEST_F(LexerTest, RawAndNormalLexSameForLineComments) { |
| 685 | const llvm::StringLiteral Source = R"cpp( |
| 686 | // First line comment. |
| 687 | //* Second line comment which is ambigious. |
| 688 | ; // Have a non-comment token to make sure something is lexed. |
| 689 | )cpp" ; |
| 690 | LangOpts.LineComment = false; |
| 691 | auto Toks = Lex(Source); |
| 692 | auto &SM = PP->getSourceManager(); |
| 693 | auto SrcBuffer = SM.getBufferData(FID: SM.getMainFileID()); |
| 694 | Lexer L(SM.getLocForStartOfFile(FID: SM.getMainFileID()), PP->getLangOpts(), |
| 695 | SrcBuffer.data(), SrcBuffer.data(), |
| 696 | SrcBuffer.data() + SrcBuffer.size()); |
| 697 | |
| 698 | auto ToksView = llvm::ArrayRef(Toks); |
| 699 | clang::Token T; |
| 700 | EXPECT_FALSE(ToksView.empty()); |
| 701 | while (!L.LexFromRawLexer(Result&: T)) { |
| 702 | ASSERT_TRUE(!ToksView.empty()); |
| 703 | EXPECT_EQ(T.getKind(), ToksView.front().getKind()); |
| 704 | ToksView = ToksView.drop_front(); |
| 705 | } |
| 706 | EXPECT_TRUE(ToksView.empty()); |
| 707 | } |
| 708 | |
| 709 | TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) { |
| 710 | const llvm::StringLiteral Source = R"cc( |
| 711 | #define ONE \ |
| 712 | 1 |
| 713 | |
| 714 | int i = ONE; |
| 715 | )cc" ; |
| 716 | std::vector<Token> Toks = |
| 717 | CheckLex(Source, ExpectedTokens: {tok::kw_int, tok::identifier, tok::equal, |
| 718 | tok::numeric_constant, tok::semi}); |
| 719 | |
| 720 | // Set up by getting the raw token for the `1` in the macro definition. |
| 721 | const Token &OneExpanded = Toks[3]; |
| 722 | Token Tok; |
| 723 | ASSERT_FALSE( |
| 724 | Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts)); |
| 725 | // The `ONE`. |
| 726 | ASSERT_EQ(Tok.getKind(), tok::raw_identifier); |
| 727 | ASSERT_FALSE( |
| 728 | Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()), |
| 729 | Tok, SourceMgr, LangOpts)); |
| 730 | // The `1` in the macro definition. |
| 731 | ASSERT_EQ(Tok.getKind(), tok::numeric_constant); |
| 732 | |
| 733 | // Go back 4 characters: two spaces, one newline, and the backslash. |
| 734 | SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(Offset: -4); |
| 735 | // Expect true (=failure) because the whitespace immediately after the |
| 736 | // escaped newline is not ignored. |
| 737 | EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts, |
| 738 | /*IgnoreWhiteSpace=*/false)); |
| 739 | } |
| 740 | |
| 741 | TEST(LexerPreambleTest, PreambleBounds) { |
| 742 | std::vector<std::string> Cases = { |
| 743 | R"cc([[ |
| 744 | #include <foo> |
| 745 | ]]int bar; |
| 746 | )cc" , |
| 747 | R"cc([[ |
| 748 | #include <foo> |
| 749 | ]])cc" , |
| 750 | R"cc([[ |
| 751 | // leading comment |
| 752 | #include <foo> |
| 753 | ]]// trailing comment |
| 754 | int bar; |
| 755 | )cc" , |
| 756 | R"cc([[ |
| 757 | module; |
| 758 | #include <foo> |
| 759 | ]]module bar; |
| 760 | int x; |
| 761 | )cc" , |
| 762 | }; |
| 763 | for (const auto& Case : Cases) { |
| 764 | llvm::Annotations A(Case); |
| 765 | clang::LangOptions LangOpts; |
| 766 | LangOpts.CPlusPlusModules = true; |
| 767 | auto Bounds = Lexer::ComputePreamble(Buffer: A.code(), LangOpts); |
| 768 | EXPECT_EQ(Bounds.Size, A.range().End) << Case; |
| 769 | } |
| 770 | } |
| 771 | |
| 772 | } // anonymous namespace |
| 773 | |