1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Basic/Builtins.h"
29#include "clang/Basic/FileManager.h"
30#include "clang/Basic/FileSystemStatCache.h"
31#include "clang/Basic/IdentifierTable.h"
32#include "clang/Basic/LLVM.h"
33#include "clang/Basic/LangOptions.h"
34#include "clang/Basic/Module.h"
35#include "clang/Basic/SourceLocation.h"
36#include "clang/Basic/SourceManager.h"
37#include "clang/Basic/TargetInfo.h"
38#include "clang/Lex/CodeCompletionHandler.h"
39#include "clang/Lex/ExternalPreprocessorSource.h"
40#include "clang/Lex/HeaderSearch.h"
41#include "clang/Lex/LexDiagnostic.h"
42#include "clang/Lex/Lexer.h"
43#include "clang/Lex/LiteralSupport.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
46#include "clang/Lex/ModuleLoader.h"
47#include "clang/Lex/Pragma.h"
48#include "clang/Lex/PreprocessingRecord.h"
49#include "clang/Lex/PreprocessorLexer.h"
50#include "clang/Lex/PreprocessorOptions.h"
51#include "clang/Lex/ScratchBuffer.h"
52#include "clang/Lex/Token.h"
53#include "clang/Lex/TokenLexer.h"
54#include "llvm/ADT/APInt.h"
55#include "llvm/ADT/ArrayRef.h"
56#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/STLExtras.h"
58#include "llvm/ADT/SmallString.h"
59#include "llvm/ADT/SmallVector.h"
60#include "llvm/ADT/StringRef.h"
61#include "llvm/Support/Capacity.h"
62#include "llvm/Support/ErrorHandling.h"
63#include "llvm/Support/MemoryBuffer.h"
64#include "llvm/Support/raw_ostream.h"
65#include <algorithm>
66#include <cassert>
67#include <memory>
68#include <optional>
69#include <string>
70#include <utility>
71#include <vector>
72
73using namespace clang;
74
75/// Minimum distance between two check points, in tokens.
76static constexpr unsigned CheckPointStepSize = 1024;
77
78LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
79
80ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
81
82Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
83 DiagnosticsEngine &diags, const LangOptions &opts,
84 SourceManager &SM, HeaderSearch &Headers,
85 ModuleLoader &TheModuleLoader,
86 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
87 TranslationUnitKind TUKind)
88 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
89 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
90 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
91 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
92 // As the language options may have not been loaded yet (when
93 // deserializing an ASTUnit), adding keywords to the identifier table is
94 // deferred to Preprocessor::Initialize().
95 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
96 TUKind(TUKind), SkipMainFilePreamble(0, true),
97 CurSubmoduleState(&NullSubmoduleState) {
98 OwnsHeaderSearch = OwnsHeaders;
99
100 // Default to discarding comments.
101 KeepComments = false;
102 KeepMacroComments = false;
103 SuppressIncludeNotFoundError = false;
104
105 // Macro expansion is enabled.
106 DisableMacroExpansion = false;
107 MacroExpansionInDirectivesOverride = false;
108 InMacroArgs = false;
109 ArgMacro = nullptr;
110 InMacroArgPreExpansion = false;
111 NumCachedTokenLexers = 0;
112 PragmasEnabled = true;
113 ParsingIfOrElifDirective = false;
114 PreprocessedOutput = false;
115
116 // We haven't read anything from the external source.
117 ReadMacrosFromExternalSource = false;
118
119 BuiltinInfo = std::make_unique<Builtin::Context>();
120
121 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
122 // a macro. They get unpoisoned where it is allowed.
123 (Ident__VA_ARGS__ = getIdentifierInfo(Name: "__VA_ARGS__"))->setIsPoisoned();
124 SetPoisonReason(II: Ident__VA_ARGS__,diag::DiagID: ext_pp_bad_vaargs_use);
125 (Ident__VA_OPT__ = getIdentifierInfo(Name: "__VA_OPT__"))->setIsPoisoned();
126 SetPoisonReason(II: Ident__VA_OPT__,diag::DiagID: ext_pp_bad_vaopt_use);
127
128 // Initialize the pragma handlers.
129 RegisterBuiltinPragmas();
130
131 // Initialize builtin macros like __LINE__ and friends.
132 RegisterBuiltinMacros();
133
134 if(LangOpts.Borland) {
135 Ident__exception_info = getIdentifierInfo(Name: "_exception_info");
136 Ident___exception_info = getIdentifierInfo(Name: "__exception_info");
137 Ident_GetExceptionInfo = getIdentifierInfo(Name: "GetExceptionInformation");
138 Ident__exception_code = getIdentifierInfo(Name: "_exception_code");
139 Ident___exception_code = getIdentifierInfo(Name: "__exception_code");
140 Ident_GetExceptionCode = getIdentifierInfo(Name: "GetExceptionCode");
141 Ident__abnormal_termination = getIdentifierInfo(Name: "_abnormal_termination");
142 Ident___abnormal_termination = getIdentifierInfo(Name: "__abnormal_termination");
143 Ident_AbnormalTermination = getIdentifierInfo(Name: "AbnormalTermination");
144 } else {
145 Ident__exception_info = Ident__exception_code = nullptr;
146 Ident__abnormal_termination = Ident___exception_info = nullptr;
147 Ident___exception_code = Ident___abnormal_termination = nullptr;
148 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
149 Ident_AbnormalTermination = nullptr;
150 }
151
152 // Default incremental processing to -fincremental-extensions, clients can
153 // override with `enableIncrementalProcessing` if desired.
154 IncrementalProcessing = LangOpts.IncrementalExtensions;
155
156 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
157 if (usingPCHWithPragmaHdrStop())
158 SkippingUntilPragmaHdrStop = true;
159
160 // If using a PCH with a through header, start skipping tokens.
161 if (!this->PPOpts->PCHThroughHeader.empty() &&
162 !this->PPOpts->ImplicitPCHInclude.empty())
163 SkippingUntilPCHThroughHeader = true;
164
165 if (this->PPOpts->GeneratePreamble)
166 PreambleConditionalStack.startRecording();
167
168 MaxTokens = LangOpts.MaxTokens;
169}
170
171Preprocessor::~Preprocessor() {
172 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
173
174 IncludeMacroStack.clear();
175
176 // Free any cached macro expanders.
177 // This populates MacroArgCache, so all TokenLexers need to be destroyed
178 // before the code below that frees up the MacroArgCache list.
179 std::fill(first: TokenLexerCache, last: TokenLexerCache + NumCachedTokenLexers, value: nullptr);
180 CurTokenLexer.reset();
181
182 // Free any cached MacroArgs.
183 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
184 ArgList = ArgList->deallocate();
185
186 // Delete the header search info, if we own it.
187 if (OwnsHeaderSearch)
188 delete &HeaderInfo;
189}
190
191void Preprocessor::Initialize(const TargetInfo &Target,
192 const TargetInfo *AuxTarget) {
193 assert((!this->Target || this->Target == &Target) &&
194 "Invalid override of target information");
195 this->Target = &Target;
196
197 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
198 "Invalid override of aux target information.");
199 this->AuxTarget = AuxTarget;
200
201 // Initialize information about built-ins.
202 BuiltinInfo->InitializeTarget(Target, AuxTarget);
203 HeaderInfo.setTarget(Target);
204
205 // Populate the identifier table with info about keywords for the current language.
206 Identifiers.AddKeywords(LangOpts);
207
208 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
209 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
210
211 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
212 // Use setting from TargetInfo.
213 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: Target.getFPEvalMethod());
214 else
215 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
216 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: getLangOpts().getFPEvalMethod());
217}
218
219void Preprocessor::InitializeForModelFile() {
220 NumEnteredSourceFiles = 0;
221
222 // Reset pragmas
223 PragmaHandlersBackup = std::move(PragmaHandlers);
224 PragmaHandlers = std::make_unique<PragmaNamespace>(args: StringRef());
225 RegisterBuiltinPragmas();
226
227 // Reset PredefinesFileID
228 PredefinesFileID = FileID();
229}
230
231void Preprocessor::FinalizeForModelFile() {
232 NumEnteredSourceFiles = 1;
233
234 PragmaHandlers = std::move(PragmaHandlersBackup);
235}
236
237void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
238 llvm::errs() << tok::getTokenName(Kind: Tok.getKind());
239
240 if (!Tok.isAnnotation())
241 llvm::errs() << " '" << getSpelling(Tok) << "'";
242
243 if (!DumpFlags) return;
244
245 llvm::errs() << "\t";
246 if (Tok.isAtStartOfLine())
247 llvm::errs() << " [StartOfLine]";
248 if (Tok.hasLeadingSpace())
249 llvm::errs() << " [LeadingSpace]";
250 if (Tok.isExpandDisabled())
251 llvm::errs() << " [ExpandDisabled]";
252 if (Tok.needsCleaning()) {
253 const char *Start = SourceMgr.getCharacterData(SL: Tok.getLocation());
254 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
255 << "']";
256 }
257
258 llvm::errs() << "\tLoc=<";
259 DumpLocation(Loc: Tok.getLocation());
260 llvm::errs() << ">";
261}
262
263void Preprocessor::DumpLocation(SourceLocation Loc) const {
264 Loc.print(OS&: llvm::errs(), SM: SourceMgr);
265}
266
267void Preprocessor::DumpMacro(const MacroInfo &MI) const {
268 llvm::errs() << "MACRO: ";
269 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
270 DumpToken(Tok: MI.getReplacementToken(Tok: i));
271 llvm::errs() << " ";
272 }
273 llvm::errs() << "\n";
274}
275
276void Preprocessor::PrintStats() {
277 llvm::errs() << "\n*** Preprocessor Stats:\n";
278 llvm::errs() << NumDirectives << " directives found:\n";
279 llvm::errs() << " " << NumDefined << " #define.\n";
280 llvm::errs() << " " << NumUndefined << " #undef.\n";
281 llvm::errs() << " #include/#include_next/#import:\n";
282 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
283 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
284 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
285 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
286 llvm::errs() << " " << NumEndif << " #endif.\n";
287 llvm::errs() << " " << NumPragma << " #pragma.\n";
288 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
289
290 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
291 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
292 << NumFastMacroExpanded << " on the fast path.\n";
293 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
294 << " token paste (##) operations performed, "
295 << NumFastTokenPaste << " on the fast path.\n";
296
297 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
298
299 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
300 llvm::errs() << "\n Macro Expanded Tokens: "
301 << llvm::capacity_in_bytes(X: MacroExpandedTokens);
302 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
303 // FIXME: List information for all submodules.
304 llvm::errs() << "\n Macros: "
305 << llvm::capacity_in_bytes(X: CurSubmoduleState->Macros);
306 llvm::errs() << "\n #pragma push_macro Info: "
307 << llvm::capacity_in_bytes(X: PragmaPushMacroInfo);
308 llvm::errs() << "\n Poison Reasons: "
309 << llvm::capacity_in_bytes(X: PoisonReasons);
310 llvm::errs() << "\n Comment Handlers: "
311 << llvm::capacity_in_bytes(x: CommentHandlers) << "\n";
312}
313
314Preprocessor::macro_iterator
315Preprocessor::macro_begin(bool IncludeExternalMacros) const {
316 if (IncludeExternalMacros && ExternalSource &&
317 !ReadMacrosFromExternalSource) {
318 ReadMacrosFromExternalSource = true;
319 ExternalSource->ReadDefinedMacros();
320 }
321
322 // Make sure we cover all macros in visible modules.
323 for (const ModuleMacro &Macro : ModuleMacros)
324 CurSubmoduleState->Macros.insert(KV: std::make_pair(x: Macro.II, y: MacroState()));
325
326 return CurSubmoduleState->Macros.begin();
327}
328
329size_t Preprocessor::getTotalMemory() const {
330 return BP.getTotalMemory()
331 + llvm::capacity_in_bytes(X: MacroExpandedTokens)
332 + Predefines.capacity() /* Predefines buffer. */
333 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
334 // and ModuleMacros.
335 + llvm::capacity_in_bytes(X: CurSubmoduleState->Macros)
336 + llvm::capacity_in_bytes(X: PragmaPushMacroInfo)
337 + llvm::capacity_in_bytes(X: PoisonReasons)
338 + llvm::capacity_in_bytes(x: CommentHandlers);
339}
340
341Preprocessor::macro_iterator
342Preprocessor::macro_end(bool IncludeExternalMacros) const {
343 if (IncludeExternalMacros && ExternalSource &&
344 !ReadMacrosFromExternalSource) {
345 ReadMacrosFromExternalSource = true;
346 ExternalSource->ReadDefinedMacros();
347 }
348
349 return CurSubmoduleState->Macros.end();
350}
351
352/// Compares macro tokens with a specified token value sequence.
353static bool MacroDefinitionEquals(const MacroInfo *MI,
354 ArrayRef<TokenValue> Tokens) {
355 return Tokens.size() == MI->getNumTokens() &&
356 std::equal(first1: Tokens.begin(), last1: Tokens.end(), first2: MI->tokens_begin());
357}
358
359StringRef Preprocessor::getLastMacroWithSpelling(
360 SourceLocation Loc,
361 ArrayRef<TokenValue> Tokens) const {
362 SourceLocation BestLocation;
363 StringRef BestSpelling;
364 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
365 I != E; ++I) {
366 const MacroDirective::DefInfo
367 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
368 if (!Def || !Def.getMacroInfo())
369 continue;
370 if (!Def.getMacroInfo()->isObjectLike())
371 continue;
372 if (!MacroDefinitionEquals(MI: Def.getMacroInfo(), Tokens))
373 continue;
374 SourceLocation Location = Def.getLocation();
375 // Choose the macro defined latest.
376 if (BestLocation.isInvalid() ||
377 (Location.isValid() &&
378 SourceMgr.isBeforeInTranslationUnit(LHS: BestLocation, RHS: Location))) {
379 BestLocation = Location;
380 BestSpelling = I->first->getName();
381 }
382 }
383 return BestSpelling;
384}
385
386void Preprocessor::recomputeCurLexerKind() {
387 if (CurLexer)
388 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
389 ? CLK_DependencyDirectivesLexer
390 : CLK_Lexer;
391 else if (CurTokenLexer)
392 CurLexerCallback = CLK_TokenLexer;
393 else
394 CurLexerCallback = CLK_CachingLexer;
395}
396
397bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File,
398 unsigned CompleteLine,
399 unsigned CompleteColumn) {
400 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
401 assert(!CodeCompletionFile && "Already set");
402
403 // Load the actual file's contents.
404 std::optional<llvm::MemoryBufferRef> Buffer =
405 SourceMgr.getMemoryBufferForFileOrNone(File);
406 if (!Buffer)
407 return true;
408
409 // Find the byte position of the truncation point.
410 const char *Position = Buffer->getBufferStart();
411 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
412 for (; *Position; ++Position) {
413 if (*Position != '\r' && *Position != '\n')
414 continue;
415
416 // Eat \r\n or \n\r as a single line.
417 if ((Position[1] == '\r' || Position[1] == '\n') &&
418 Position[0] != Position[1])
419 ++Position;
420 ++Position;
421 break;
422 }
423 }
424
425 Position += CompleteColumn - 1;
426
427 // If pointing inside the preamble, adjust the position at the beginning of
428 // the file after the preamble.
429 if (SkipMainFilePreamble.first &&
430 SourceMgr.getFileEntryForID(FID: SourceMgr.getMainFileID()) == File) {
431 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
432 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
433 }
434
435 if (Position > Buffer->getBufferEnd())
436 Position = Buffer->getBufferEnd();
437
438 CodeCompletionFile = File;
439 CodeCompletionOffset = Position - Buffer->getBufferStart();
440
441 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
442 Size: Buffer->getBufferSize() + 1, BufferName: Buffer->getBufferIdentifier());
443 char *NewBuf = NewBuffer->getBufferStart();
444 char *NewPos = std::copy(first: Buffer->getBufferStart(), last: Position, result: NewBuf);
445 *NewPos = '\0';
446 std::copy(first: Position, last: Buffer->getBufferEnd(), result: NewPos+1);
447 SourceMgr.overrideFileContents(SourceFile: File, Buffer: std::move(NewBuffer));
448
449 return false;
450}
451
452void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
453 bool IsAngled) {
454 setCodeCompletionReached();
455 if (CodeComplete)
456 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
457}
458
459void Preprocessor::CodeCompleteNaturalLanguage() {
460 setCodeCompletionReached();
461 if (CodeComplete)
462 CodeComplete->CodeCompleteNaturalLanguage();
463}
464
465/// getSpelling - This method is used to get the spelling of a token into a
466/// SmallVector. Note that the returned StringRef may not point to the
467/// supplied buffer if a copy can be avoided.
468StringRef Preprocessor::getSpelling(const Token &Tok,
469 SmallVectorImpl<char> &Buffer,
470 bool *Invalid) const {
471 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
472 if (Tok.isNot(K: tok::raw_identifier) && !Tok.hasUCN()) {
473 // Try the fast path.
474 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
475 return II->getName();
476 }
477
478 // Resize the buffer if we need to copy into it.
479 if (Tok.needsCleaning())
480 Buffer.resize(N: Tok.getLength());
481
482 const char *Ptr = Buffer.data();
483 unsigned Len = getSpelling(Tok, Buffer&: Ptr, Invalid);
484 return StringRef(Ptr, Len);
485}
486
487/// CreateString - Plop the specified string into a scratch buffer and return a
488/// location for it. If specified, the source location provides a source
489/// location for the token.
490void Preprocessor::CreateString(StringRef Str, Token &Tok,
491 SourceLocation ExpansionLocStart,
492 SourceLocation ExpansionLocEnd) {
493 Tok.setLength(Str.size());
494
495 const char *DestPtr;
496 SourceLocation Loc = ScratchBuf->getToken(Buf: Str.data(), Len: Str.size(), DestPtr);
497
498 if (ExpansionLocStart.isValid())
499 Loc = SourceMgr.createExpansionLoc(SpellingLoc: Loc, ExpansionLocStart,
500 ExpansionLocEnd, Length: Str.size());
501 Tok.setLocation(Loc);
502
503 // If this is a raw identifier or a literal token, set the pointer data.
504 if (Tok.is(K: tok::raw_identifier))
505 Tok.setRawIdentifierData(DestPtr);
506 else if (Tok.isLiteral())
507 Tok.setLiteralData(DestPtr);
508}
509
510SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
511 auto &SM = getSourceManager();
512 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
513 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc: SpellingLoc);
514 bool Invalid = false;
515 StringRef Buffer = SM.getBufferData(FID: LocInfo.first, Invalid: &Invalid);
516 if (Invalid)
517 return SourceLocation();
518
519 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
520 const char *DestPtr;
521 SourceLocation Spelling =
522 ScratchBuf->getToken(Buf: Buffer.data() + LocInfo.second, Len: Length, DestPtr);
523 return SM.createTokenSplitLoc(SpellingLoc: Spelling, TokenStart: Loc, TokenEnd: Loc.getLocWithOffset(Offset: Length));
524}
525
526Module *Preprocessor::getCurrentModule() {
527 if (!getLangOpts().isCompilingModule())
528 return nullptr;
529
530 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().CurrentModule);
531}
532
533Module *Preprocessor::getCurrentModuleImplementation() {
534 if (!getLangOpts().isCompilingModuleImplementation())
535 return nullptr;
536
537 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().ModuleName);
538}
539
540//===----------------------------------------------------------------------===//
541// Preprocessor Initialization Methods
542//===----------------------------------------------------------------------===//
543
544/// EnterMainSourceFile - Enter the specified FileID as the main source file,
545/// which implicitly adds the builtin defines etc.
546void Preprocessor::EnterMainSourceFile() {
547 // We do not allow the preprocessor to reenter the main file. Doing so will
548 // cause FileID's to accumulate information from both runs (e.g. #line
549 // information) and predefined macros aren't guaranteed to be set properly.
550 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
551 FileID MainFileID = SourceMgr.getMainFileID();
552
553 // If MainFileID is loaded it means we loaded an AST file, no need to enter
554 // a main file.
555 if (!SourceMgr.isLoadedFileID(FID: MainFileID)) {
556 // Enter the main file source buffer.
557 EnterSourceFile(FID: MainFileID, Dir: nullptr, Loc: SourceLocation());
558
559 // If we've been asked to skip bytes in the main file (e.g., as part of a
560 // precompiled preamble), do so now.
561 if (SkipMainFilePreamble.first > 0)
562 CurLexer->SetByteOffset(Offset: SkipMainFilePreamble.first,
563 StartOfLine: SkipMainFilePreamble.second);
564
565 // Tell the header info that the main file was entered. If the file is later
566 // #imported, it won't be re-entered.
567 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(FID: MainFileID))
568 markIncluded(File: *FE);
569 }
570
571 // Preprocess Predefines to populate the initial preprocessor state.
572 std::unique_ptr<llvm::MemoryBuffer> SB =
573 llvm::MemoryBuffer::getMemBufferCopy(InputData: Predefines, BufferName: "<built-in>");
574 assert(SB && "Cannot create predefined source buffer");
575 FileID FID = SourceMgr.createFileID(Buffer: std::move(SB));
576 assert(FID.isValid() && "Could not create FileID for predefines?");
577 setPredefinesFileID(FID);
578
579 // Start parsing the predefines.
580 EnterSourceFile(FID, Dir: nullptr, Loc: SourceLocation());
581
582 if (!PPOpts->PCHThroughHeader.empty()) {
583 // Lookup and save the FileID for the through header. If it isn't found
584 // in the search path, it's a fatal error.
585 OptionalFileEntryRef File = LookupFile(
586 FilenameLoc: SourceLocation(), Filename: PPOpts->PCHThroughHeader,
587 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
588 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
589 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
590 /*IsFrameworkFound=*/nullptr);
591 if (!File) {
592 Diag(SourceLocation(), diag::err_pp_through_header_not_found)
593 << PPOpts->PCHThroughHeader;
594 return;
595 }
596 setPCHThroughHeaderFileID(
597 SourceMgr.createFileID(SourceFile: *File, IncludePos: SourceLocation(), FileCharacter: SrcMgr::C_User));
598 }
599
600 // Skip tokens from the Predefines and if needed the main file.
601 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
602 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
603 SkipTokensWhileUsingPCH();
604}
605
606void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
607 assert(PCHThroughHeaderFileID.isInvalid() &&
608 "PCHThroughHeaderFileID already set!");
609 PCHThroughHeaderFileID = FID;
610}
611
612bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
613 assert(PCHThroughHeaderFileID.isValid() &&
614 "Invalid PCH through header FileID");
615 return FE == SourceMgr.getFileEntryForID(FID: PCHThroughHeaderFileID);
616}
617
618bool Preprocessor::creatingPCHWithThroughHeader() {
619 return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
620 PCHThroughHeaderFileID.isValid();
621}
622
623bool Preprocessor::usingPCHWithThroughHeader() {
624 return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
625 PCHThroughHeaderFileID.isValid();
626}
627
628bool Preprocessor::creatingPCHWithPragmaHdrStop() {
629 return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop;
630}
631
632bool Preprocessor::usingPCHWithPragmaHdrStop() {
633 return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop;
634}
635
636/// Skip tokens until after the #include of the through header or
637/// until after a #pragma hdrstop is seen. Tokens in the predefines file
638/// and the main file may be skipped. If the end of the predefines file
639/// is reached, skipping continues into the main file. If the end of the
640/// main file is reached, it's a fatal error.
641void Preprocessor::SkipTokensWhileUsingPCH() {
642 bool ReachedMainFileEOF = false;
643 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
644 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
645 Token Tok;
646 while (true) {
647 bool InPredefines =
648 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
649 CurLexerCallback(*this, Tok);
650 if (Tok.is(K: tok::eof) && !InPredefines) {
651 ReachedMainFileEOF = true;
652 break;
653 }
654 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
655 break;
656 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
657 break;
658 }
659 if (ReachedMainFileEOF) {
660 if (UsingPCHThroughHeader)
661 Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
662 << PPOpts->PCHThroughHeader << 1;
663 else if (!PPOpts->PCHWithHdrStopCreate)
664 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
665 }
666}
667
668void Preprocessor::replayPreambleConditionalStack() {
669 // Restore the conditional stack from the preamble, if there is one.
670 if (PreambleConditionalStack.isReplaying()) {
671 assert(CurPPLexer &&
672 "CurPPLexer is null when calling replayPreambleConditionalStack.");
673 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
674 PreambleConditionalStack.doneReplaying();
675 if (PreambleConditionalStack.reachedEOFWhileSkipping())
676 SkipExcludedConditionalBlock(
677 HashTokenLoc: PreambleConditionalStack.SkipInfo->HashTokenLoc,
678 IfTokenLoc: PreambleConditionalStack.SkipInfo->IfTokenLoc,
679 FoundNonSkipPortion: PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
680 FoundElse: PreambleConditionalStack.SkipInfo->FoundElse,
681 ElseLoc: PreambleConditionalStack.SkipInfo->ElseLoc);
682 }
683}
684
685void Preprocessor::EndSourceFile() {
686 // Notify the client that we reached the end of the source file.
687 if (Callbacks)
688 Callbacks->EndOfMainFile();
689}
690
691//===----------------------------------------------------------------------===//
692// Lexer Event Handling.
693//===----------------------------------------------------------------------===//
694
695/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
696/// identifier information for the token and install it into the token,
697/// updating the token kind accordingly.
698IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
699 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
700
701 // Look up this token, see if it is a macro, or if it is a language keyword.
702 IdentifierInfo *II;
703 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
704 // No cleaning needed, just use the characters from the lexed buffer.
705 II = getIdentifierInfo(Name: Identifier.getRawIdentifier());
706 } else {
707 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
708 SmallString<64> IdentifierBuffer;
709 StringRef CleanedStr = getSpelling(Tok: Identifier, Buffer&: IdentifierBuffer);
710
711 if (Identifier.hasUCN()) {
712 SmallString<64> UCNIdentifierBuffer;
713 expandUCNs(Buf&: UCNIdentifierBuffer, Input: CleanedStr);
714 II = getIdentifierInfo(Name: UCNIdentifierBuffer);
715 } else {
716 II = getIdentifierInfo(Name: CleanedStr);
717 }
718 }
719
720 // Update the token info (identifier info and appropriate token kind).
721 // FIXME: the raw_identifier may contain leading whitespace which is removed
722 // from the cleaned identifier token. The SourceLocation should be updated to
723 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
724 // line continuation before 'B') is parsed as a single tok::raw_identifier and
725 // is cleaned to tok::identifier "B". After cleaning the token's length is
726 // still 3 and the SourceLocation refers to the location of the backslash.
727 Identifier.setIdentifierInfo(II);
728 Identifier.setKind(II->getTokenID());
729
730 return II;
731}
732
733void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
734 PoisonReasons[II] = DiagID;
735}
736
737void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
738 assert(Ident__exception_code && Ident__exception_info);
739 assert(Ident___exception_code && Ident___exception_info);
740 Ident__exception_code->setIsPoisoned(Poison);
741 Ident___exception_code->setIsPoisoned(Poison);
742 Ident_GetExceptionCode->setIsPoisoned(Poison);
743 Ident__exception_info->setIsPoisoned(Poison);
744 Ident___exception_info->setIsPoisoned(Poison);
745 Ident_GetExceptionInfo->setIsPoisoned(Poison);
746 Ident__abnormal_termination->setIsPoisoned(Poison);
747 Ident___abnormal_termination->setIsPoisoned(Poison);
748 Ident_AbnormalTermination->setIsPoisoned(Poison);
749}
750
751void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
752 assert(Identifier.getIdentifierInfo() &&
753 "Can't handle identifiers without identifier info!");
754 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
755 PoisonReasons.find(Val: Identifier.getIdentifierInfo());
756 if(it == PoisonReasons.end())
757 Diag(Identifier, diag::err_pp_used_poisoned_id);
758 else
759 Diag(Tok: Identifier,DiagID: it->second) << Identifier.getIdentifierInfo();
760}
761
762void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
763 assert(II.isOutOfDate() && "not out of date");
764 getExternalSource()->updateOutOfDateIdentifier(II);
765}
766
767/// HandleIdentifier - This callback is invoked when the lexer reads an
768/// identifier. This callback looks up the identifier in the map and/or
769/// potentially macro expands it or turns it into a named token (like 'for').
770///
771/// Note that callers of this method are guarded by checking the
772/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
773/// IdentifierInfo methods that compute these properties will need to change to
774/// match.
775bool Preprocessor::HandleIdentifier(Token &Identifier) {
776 assert(Identifier.getIdentifierInfo() &&
777 "Can't handle identifiers without identifier info!");
778
779 IdentifierInfo &II = *Identifier.getIdentifierInfo();
780
781 // If the information about this identifier is out of date, update it from
782 // the external source.
783 // We have to treat __VA_ARGS__ in a special way, since it gets
784 // serialized with isPoisoned = true, but our preprocessor may have
785 // unpoisoned it if we're defining a C99 macro.
786 if (II.isOutOfDate()) {
787 bool CurrentIsPoisoned = false;
788 const bool IsSpecialVariadicMacro =
789 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
790 if (IsSpecialVariadicMacro)
791 CurrentIsPoisoned = II.isPoisoned();
792
793 updateOutOfDateIdentifier(II);
794 Identifier.setKind(II.getTokenID());
795
796 if (IsSpecialVariadicMacro)
797 II.setIsPoisoned(CurrentIsPoisoned);
798 }
799
800 // If this identifier was poisoned, and if it was not produced from a macro
801 // expansion, emit an error.
802 if (II.isPoisoned() && CurPPLexer) {
803 HandlePoisonedIdentifier(Identifier);
804 }
805
806 // If this is a macro to be expanded, do it.
807 if (const MacroDefinition MD = getMacroDefinition(II: &II)) {
808 const auto *MI = MD.getMacroInfo();
809 assert(MI && "macro definition with no macro info?");
810 if (!DisableMacroExpansion) {
811 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
812 // C99 6.10.3p10: If the preprocessing token immediately after the
813 // macro name isn't a '(', this macro should not be expanded.
814 if (!MI->isFunctionLike() || isNextPPTokenLParen())
815 return HandleMacroExpandedIdentifier(Identifier, MD);
816 } else {
817 // C99 6.10.3.4p2 says that a disabled macro may never again be
818 // expanded, even if it's in a context where it could be expanded in the
819 // future.
820 Identifier.setFlag(Token::DisableExpand);
821 if (MI->isObjectLike() || isNextPPTokenLParen())
822 Diag(Tok: Identifier, diag::DiagID: pp_disabled_macro_expansion);
823 }
824 }
825 }
826
827 // If this identifier is a keyword in a newer Standard or proposed Standard,
828 // produce a warning. Don't warn if we're not considering macro expansion,
829 // since this identifier might be the name of a macro.
830 // FIXME: This warning is disabled in cases where it shouldn't be, like
831 // "#define constexpr constexpr", "int constexpr;"
832 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
833 Diag(Tok: Identifier, DiagID: getIdentifierTable().getFutureCompatDiagKind(II, LangOpts: getLangOpts()))
834 << II.getName();
835 // Don't diagnose this keyword again in this translation unit.
836 II.setIsFutureCompatKeyword(false);
837 }
838
839 // If this is an extension token, diagnose its use.
840 // We avoid diagnosing tokens that originate from macro definitions.
841 // FIXME: This warning is disabled in cases where it shouldn't be,
842 // like "#define TY typeof", "TY(1) x".
843 if (II.isExtensionToken() && !DisableMacroExpansion)
844 Diag(Identifier, diag::ext_token_used);
845
846 // If this is the 'import' contextual keyword following an '@', note
847 // that the next token indicates a module name.
848 //
849 // Note that we do not treat 'import' as a contextual
850 // keyword when we're in a caching lexer, because caching lexers only get
851 // used in contexts where import declarations are disallowed.
852 //
853 // Likewise if this is the standard C++ import keyword.
854 if (((LastTokenWasAt && II.isModulesImport()) ||
855 Identifier.is(K: tok::kw_import)) &&
856 !InMacroArgs && !DisableMacroExpansion &&
857 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
858 CurLexerCallback != CLK_CachingLexer) {
859 ModuleImportLoc = Identifier.getLocation();
860 NamedModuleImportPath.clear();
861 IsAtImport = true;
862 ModuleImportExpectsIdentifier = true;
863 CurLexerCallback = CLK_LexAfterModuleImport;
864 }
865 return true;
866}
867
868void Preprocessor::Lex(Token &Result) {
869 ++LexLevel;
870
871 // We loop here until a lex function returns a token; this avoids recursion.
872 while (!CurLexerCallback(*this, Result))
873 ;
874
875 if (Result.is(K: tok::unknown) && TheModuleLoader.HadFatalFailure)
876 return;
877
878 if (Result.is(K: tok::code_completion) && Result.getIdentifierInfo()) {
879 // Remember the identifier before code completion token.
880 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
881 setCodeCompletionTokenRange(Start: Result.getLocation(), End: Result.getEndLoc());
882 // Set IdenfitierInfo to null to avoid confusing code that handles both
883 // identifiers and completion tokens.
884 Result.setIdentifierInfo(nullptr);
885 }
886
887 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
888 // if this token is being produced as a result of phase 4 of translation.
889 // Update TrackGMFState to decide if we are currently in a Global Module
890 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
891 // depends on the prevailing StdCXXImportSeq state in two cases.
892 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
893 !Result.getFlag(Flag: Token::IsReinjected)) {
894 switch (Result.getKind()) {
895 case tok::l_paren: case tok::l_square: case tok::l_brace:
896 StdCXXImportSeqState.handleOpenBracket();
897 break;
898 case tok::r_paren: case tok::r_square:
899 StdCXXImportSeqState.handleCloseBracket();
900 break;
901 case tok::r_brace:
902 StdCXXImportSeqState.handleCloseBrace();
903 break;
904 // This token is injected to represent the translation of '#include "a.h"'
905 // into "import a.h;". Mimic the notional ';'.
906 case tok::annot_module_include:
907 case tok::semi:
908 TrackGMFState.handleSemi();
909 StdCXXImportSeqState.handleSemi();
910 ModuleDeclState.handleSemi();
911 break;
912 case tok::header_name:
913 case tok::annot_header_unit:
914 StdCXXImportSeqState.handleHeaderName();
915 break;
916 case tok::kw_export:
917 TrackGMFState.handleExport();
918 StdCXXImportSeqState.handleExport();
919 ModuleDeclState.handleExport();
920 break;
921 case tok::colon:
922 ModuleDeclState.handleColon();
923 break;
924 case tok::period:
925 ModuleDeclState.handlePeriod();
926 break;
927 case tok::identifier:
928 // Check "import" and "module" when there is no open bracket. The two
929 // identifiers are not meaningful with open brackets.
930 if (StdCXXImportSeqState.atTopLevel()) {
931 if (Result.getIdentifierInfo()->isModulesImport()) {
932 TrackGMFState.handleImport(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
933 StdCXXImportSeqState.handleImport();
934 if (StdCXXImportSeqState.afterImportSeq()) {
935 ModuleImportLoc = Result.getLocation();
936 NamedModuleImportPath.clear();
937 IsAtImport = false;
938 ModuleImportExpectsIdentifier = true;
939 CurLexerCallback = CLK_LexAfterModuleImport;
940 }
941 break;
942 } else if (Result.getIdentifierInfo() == getIdentifierInfo(Name: "module")) {
943 TrackGMFState.handleModule(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
944 ModuleDeclState.handleModule();
945 break;
946 }
947 }
948 ModuleDeclState.handleIdentifier(Identifier: Result.getIdentifierInfo());
949 if (ModuleDeclState.isModuleCandidate())
950 break;
951 [[fallthrough]];
952 default:
953 TrackGMFState.handleMisc();
954 StdCXXImportSeqState.handleMisc();
955 ModuleDeclState.handleMisc();
956 break;
957 }
958 }
959
960 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
961 CheckPoints[CurLexer->getFileID()].push_back(Elt: CurLexer->BufferPtr);
962 CheckPointCounter = 0;
963 }
964
965 LastTokenWasAt = Result.is(K: tok::at);
966 --LexLevel;
967
968 if ((LexLevel == 0 || PreprocessToken) &&
969 !Result.getFlag(Flag: Token::IsReinjected)) {
970 if (LexLevel == 0)
971 ++TokenCount;
972 if (OnToken)
973 OnToken(Result);
974 }
975}
976
977void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
978 while (1) {
979 Token Tok;
980 Lex(Result&: Tok);
981 if (Tok.isOneOf(K1: tok::unknown, Ks: tok::eof, Ks: tok::eod,
982 Ks: tok::annot_repl_input_end))
983 break;
984 if (Tokens != nullptr)
985 Tokens->push_back(x: Tok);
986 }
987}
988
989/// Lex a header-name token (including one formed from header-name-tokens if
990/// \p AllowConcatenation is \c true).
991///
992/// \param FilenameTok Filled in with the next token. On success, this will
993/// be either a header_name token. On failure, it will be whatever other
994/// token was found instead.
995/// \param AllowMacroExpansion If \c true, allow the header name to be formed
996/// by macro expansion (concatenating tokens as necessary if the first
997/// token is a '<').
998/// \return \c true if we reached EOD or EOF while looking for a > token in
999/// a concatenated header name and diagnosed it. \c false otherwise.
1000bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1001 // Lex using header-name tokenization rules if tokens are being lexed from
1002 // a file. Just grab a token normally if we're in a macro expansion.
1003 if (CurPPLexer)
1004 CurPPLexer->LexIncludeFilename(FilenameTok);
1005 else
1006 Lex(Result&: FilenameTok);
1007
1008 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1009 // case, glue the tokens together into an angle_string_literal token.
1010 SmallString<128> FilenameBuffer;
1011 if (FilenameTok.is(K: tok::less) && AllowMacroExpansion) {
1012 bool StartOfLine = FilenameTok.isAtStartOfLine();
1013 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1014 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1015
1016 SourceLocation Start = FilenameTok.getLocation();
1017 SourceLocation End;
1018 FilenameBuffer.push_back(Elt: '<');
1019
1020 // Consume tokens until we find a '>'.
1021 // FIXME: A header-name could be formed starting or ending with an
1022 // alternative token. It's not clear whether that's ill-formed in all
1023 // cases.
1024 while (FilenameTok.isNot(K: tok::greater)) {
1025 Lex(Result&: FilenameTok);
1026 if (FilenameTok.isOneOf(K1: tok::eod, K2: tok::eof)) {
1027 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1028 Diag(Start, diag::note_matching) << tok::less;
1029 return true;
1030 }
1031
1032 End = FilenameTok.getLocation();
1033
1034 // FIXME: Provide code completion for #includes.
1035 if (FilenameTok.is(K: tok::code_completion)) {
1036 setCodeCompletionReached();
1037 Lex(Result&: FilenameTok);
1038 continue;
1039 }
1040
1041 // Append the spelling of this token to the buffer. If there was a space
1042 // before it, add it now.
1043 if (FilenameTok.hasLeadingSpace())
1044 FilenameBuffer.push_back(Elt: ' ');
1045
1046 // Get the spelling of the token, directly into FilenameBuffer if
1047 // possible.
1048 size_t PreAppendSize = FilenameBuffer.size();
1049 FilenameBuffer.resize(N: PreAppendSize + FilenameTok.getLength());
1050
1051 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1052 unsigned ActualLen = getSpelling(Tok: FilenameTok, Buffer&: BufPtr);
1053
1054 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1055 if (BufPtr != &FilenameBuffer[PreAppendSize])
1056 memcpy(dest: &FilenameBuffer[PreAppendSize], src: BufPtr, n: ActualLen);
1057
1058 // Resize FilenameBuffer to the correct size.
1059 if (FilenameTok.getLength() != ActualLen)
1060 FilenameBuffer.resize(N: PreAppendSize + ActualLen);
1061 }
1062
1063 FilenameTok.startToken();
1064 FilenameTok.setKind(tok::header_name);
1065 FilenameTok.setFlagValue(Flag: Token::StartOfLine, Val: StartOfLine);
1066 FilenameTok.setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
1067 FilenameTok.setFlagValue(Flag: Token::LeadingEmptyMacro, Val: LeadingEmptyMacro);
1068 CreateString(Str: FilenameBuffer, Tok&: FilenameTok, ExpansionLocStart: Start, ExpansionLocEnd: End);
1069 } else if (FilenameTok.is(K: tok::string_literal) && AllowMacroExpansion) {
1070 // Convert a string-literal token of the form " h-char-sequence "
1071 // (produced by macro expansion) into a header-name token.
1072 //
1073 // The rules for header-names don't quite match the rules for
1074 // string-literals, but all the places where they differ result in
1075 // undefined behavior, so we can and do treat them the same.
1076 //
1077 // A string-literal with a prefix or suffix is not translated into a
1078 // header-name. This could theoretically be observable via the C++20
1079 // context-sensitive header-name formation rules.
1080 StringRef Str = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
1081 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1082 FilenameTok.setKind(tok::header_name);
1083 }
1084
1085 return false;
1086}
1087
1088/// Collect the tokens of a C++20 pp-import-suffix.
1089void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
1090 // FIXME: For error recovery, consider recognizing attribute syntax here
1091 // and terminating / diagnosing a missing semicolon if we find anything
1092 // else? (Can we leave that to the parser?)
1093 unsigned BracketDepth = 0;
1094 while (true) {
1095 Toks.emplace_back();
1096 Lex(Result&: Toks.back());
1097
1098 switch (Toks.back().getKind()) {
1099 case tok::l_paren: case tok::l_square: case tok::l_brace:
1100 ++BracketDepth;
1101 break;
1102
1103 case tok::r_paren: case tok::r_square: case tok::r_brace:
1104 if (BracketDepth == 0)
1105 return;
1106 --BracketDepth;
1107 break;
1108
1109 case tok::semi:
1110 if (BracketDepth == 0)
1111 return;
1112 break;
1113
1114 case tok::eof:
1115 return;
1116
1117 default:
1118 break;
1119 }
1120 }
1121}
1122
1123
1124/// Lex a token following the 'import' contextual keyword.
1125///
1126/// pp-import: [C++20]
1127/// import header-name pp-import-suffix[opt] ;
1128/// import header-name-tokens pp-import-suffix[opt] ;
1129/// [ObjC] @ import module-name ;
1130/// [Clang] import module-name ;
1131///
1132/// header-name-tokens:
1133/// string-literal
1134/// < [any sequence of preprocessing-tokens other than >] >
1135///
1136/// module-name:
1137/// module-name-qualifier[opt] identifier
1138///
1139/// module-name-qualifier
1140/// module-name-qualifier[opt] identifier .
1141///
1142/// We respond to a pp-import by importing macros from the named module.
1143bool Preprocessor::LexAfterModuleImport(Token &Result) {
1144 // Figure out what kind of lexer we actually have.
1145 recomputeCurLexerKind();
1146
1147 // Lex the next token. The header-name lexing rules are used at the start of
1148 // a pp-import.
1149 //
1150 // For now, we only support header-name imports in C++20 mode.
1151 // FIXME: Should we allow this in all language modes that support an import
1152 // declaration as an extension?
1153 if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
1154 if (LexHeaderName(FilenameTok&: Result))
1155 return true;
1156
1157 if (Result.is(K: tok::colon) && ModuleDeclState.isNamedModule()) {
1158 std::string Name = ModuleDeclState.getPrimaryName().str();
1159 Name += ":";
1160 NamedModuleImportPath.push_back(
1161 Elt: {getIdentifierInfo(Name), Result.getLocation()});
1162 CurLexerCallback = CLK_LexAfterModuleImport;
1163 return true;
1164 }
1165 } else {
1166 Lex(Result);
1167 }
1168
1169 // Allocate a holding buffer for a sequence of tokens and introduce it into
1170 // the token stream.
1171 auto EnterTokens = [this](ArrayRef<Token> Toks) {
1172 auto ToksCopy = std::make_unique<Token[]>(num: Toks.size());
1173 std::copy(first: Toks.begin(), last: Toks.end(), result: ToksCopy.get());
1174 EnterTokenStream(Toks: std::move(ToksCopy), NumToks: Toks.size(),
1175 /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
1176 };
1177
1178 bool ImportingHeader = Result.is(K: tok::header_name);
1179 // Check for a header-name.
1180 SmallVector<Token, 32> Suffix;
1181 if (ImportingHeader) {
1182 // Enter the header-name token into the token stream; a Lex action cannot
1183 // both return a token and cache tokens (doing so would corrupt the token
1184 // cache if the call to Lex comes from CachingLex / PeekAhead).
1185 Suffix.push_back(Elt: Result);
1186
1187 // Consume the pp-import-suffix and expand any macros in it now. We'll add
1188 // it back into the token stream later.
1189 CollectPpImportSuffix(Toks&: Suffix);
1190 if (Suffix.back().isNot(K: tok::semi)) {
1191 // This is not a pp-import after all.
1192 EnterTokens(Suffix);
1193 return false;
1194 }
1195
1196 // C++2a [cpp.module]p1:
1197 // The ';' preprocessing-token terminating a pp-import shall not have
1198 // been produced by macro replacement.
1199 SourceLocation SemiLoc = Suffix.back().getLocation();
1200 if (SemiLoc.isMacroID())
1201 Diag(SemiLoc, diag::err_header_import_semi_in_macro);
1202
1203 // Reconstitute the import token.
1204 Token ImportTok;
1205 ImportTok.startToken();
1206 ImportTok.setKind(tok::kw_import);
1207 ImportTok.setLocation(ModuleImportLoc);
1208 ImportTok.setIdentifierInfo(getIdentifierInfo(Name: "import"));
1209 ImportTok.setLength(6);
1210
1211 auto Action = HandleHeaderIncludeOrImport(
1212 /*HashLoc*/ SourceLocation(), IncludeTok&: ImportTok, FilenameTok&: Suffix.front(), EndLoc: SemiLoc);
1213 switch (Action.Kind) {
1214 case ImportAction::None:
1215 break;
1216
1217 case ImportAction::ModuleBegin:
1218 // Let the parser know we're textually entering the module.
1219 Suffix.emplace_back();
1220 Suffix.back().startToken();
1221 Suffix.back().setKind(tok::annot_module_begin);
1222 Suffix.back().setLocation(SemiLoc);
1223 Suffix.back().setAnnotationEndLoc(SemiLoc);
1224 Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1225 [[fallthrough]];
1226
1227 case ImportAction::ModuleImport:
1228 case ImportAction::HeaderUnitImport:
1229 case ImportAction::SkippedModuleImport:
1230 // We chose to import (or textually enter) the file. Convert the
1231 // header-name token into a header unit annotation token.
1232 Suffix[0].setKind(tok::annot_header_unit);
1233 Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
1234 Suffix[0].setAnnotationValue(Action.ModuleForHeader);
1235 // FIXME: Call the moduleImport callback?
1236 break;
1237 case ImportAction::Failure:
1238 assert(TheModuleLoader.HadFatalFailure &&
1239 "This should be an early exit only to a fatal error");
1240 Result.setKind(tok::eof);
1241 CurLexer->cutOffLexing();
1242 EnterTokens(Suffix);
1243 return true;
1244 }
1245
1246 EnterTokens(Suffix);
1247 return false;
1248 }
1249
1250 // The token sequence
1251 //
1252 // import identifier (. identifier)*
1253 //
1254 // indicates a module import directive. We already saw the 'import'
1255 // contextual keyword, so now we're looking for the identifiers.
1256 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
1257 // We expected to see an identifier here, and we did; continue handling
1258 // identifiers.
1259 NamedModuleImportPath.push_back(
1260 Elt: std::make_pair(x: Result.getIdentifierInfo(), y: Result.getLocation()));
1261 ModuleImportExpectsIdentifier = false;
1262 CurLexerCallback = CLK_LexAfterModuleImport;
1263 return true;
1264 }
1265
1266 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
1267 // see the next identifier. (We can also see a '[[' that begins an
1268 // attribute-specifier-seq here under the Standard C++ Modules.)
1269 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
1270 ModuleImportExpectsIdentifier = true;
1271 CurLexerCallback = CLK_LexAfterModuleImport;
1272 return true;
1273 }
1274
1275 // If we didn't recognize a module name at all, this is not a (valid) import.
1276 if (NamedModuleImportPath.empty() || Result.is(K: tok::eof))
1277 return true;
1278
1279 // Consume the pp-import-suffix and expand any macros in it now, if we're not
1280 // at the semicolon already.
1281 SourceLocation SemiLoc = Result.getLocation();
1282 if (Result.isNot(K: tok::semi)) {
1283 Suffix.push_back(Elt: Result);
1284 CollectPpImportSuffix(Toks&: Suffix);
1285 if (Suffix.back().isNot(K: tok::semi)) {
1286 // This is not an import after all.
1287 EnterTokens(Suffix);
1288 return false;
1289 }
1290 SemiLoc = Suffix.back().getLocation();
1291 }
1292
1293 // Under the standard C++ Modules, the dot is just part of the module name,
1294 // and not a real hierarchy separator. Flatten such module names now.
1295 //
1296 // FIXME: Is this the right level to be performing this transformation?
1297 std::string FlatModuleName;
1298 if (getLangOpts().CPlusPlusModules) {
1299 for (auto &Piece : NamedModuleImportPath) {
1300 // If the FlatModuleName ends with colon, it implies it is a partition.
1301 if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
1302 FlatModuleName += ".";
1303 FlatModuleName += Piece.first->getName();
1304 }
1305 SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;
1306 NamedModuleImportPath.clear();
1307 NamedModuleImportPath.push_back(
1308 Elt: std::make_pair(x: getIdentifierInfo(Name: FlatModuleName), y&: FirstPathLoc));
1309 }
1310
1311 Module *Imported = nullptr;
1312 // We don't/shouldn't load the standard c++20 modules when preprocessing.
1313 if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
1314 Imported = TheModuleLoader.loadModule(ImportLoc: ModuleImportLoc,
1315 Path: NamedModuleImportPath,
1316 Visibility: Module::Hidden,
1317 /*IsInclusionDirective=*/false);
1318 if (Imported)
1319 makeModuleVisible(M: Imported, Loc: SemiLoc);
1320 }
1321
1322 if (Callbacks)
1323 Callbacks->moduleImport(ImportLoc: ModuleImportLoc, Path: NamedModuleImportPath, Imported);
1324
1325 if (!Suffix.empty()) {
1326 EnterTokens(Suffix);
1327 return false;
1328 }
1329 return true;
1330}
1331
1332void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
1333 CurSubmoduleState->VisibleModules.setVisible(
1334 M, Loc, Vis: [](Module *) {},
1335 Cb: [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1336 // FIXME: Include the path in the diagnostic.
1337 // FIXME: Include the import location for the conflicting module.
1338 Diag(ModuleImportLoc, diag::warn_module_conflict)
1339 << Path[0]->getFullModuleName()
1340 << Conflict->getFullModuleName()
1341 << Message;
1342 });
1343
1344 // Add this module to the imports list of the currently-built submodule.
1345 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1346 BuildingSubmoduleStack.back().M->Imports.insert(X: M);
1347}
1348
1349bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1350 const char *DiagnosticTag,
1351 bool AllowMacroExpansion) {
1352 // We need at least one string literal.
1353 if (Result.isNot(K: tok::string_literal)) {
1354 Diag(Result, diag::err_expected_string_literal)
1355 << /*Source='in...'*/0 << DiagnosticTag;
1356 return false;
1357 }
1358
1359 // Lex string literal tokens, optionally with macro expansion.
1360 SmallVector<Token, 4> StrToks;
1361 do {
1362 StrToks.push_back(Elt: Result);
1363
1364 if (Result.hasUDSuffix())
1365 Diag(Result, diag::err_invalid_string_udl);
1366
1367 if (AllowMacroExpansion)
1368 Lex(Result);
1369 else
1370 LexUnexpandedToken(Result);
1371 } while (Result.is(K: tok::string_literal));
1372
1373 // Concatenate and parse the strings.
1374 StringLiteralParser Literal(StrToks, *this);
1375 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1376
1377 if (Literal.hadError)
1378 return false;
1379
1380 if (Literal.Pascal) {
1381 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1382 << /*Source='in...'*/0 << DiagnosticTag;
1383 return false;
1384 }
1385
1386 String = std::string(Literal.GetString());
1387 return true;
1388}
1389
1390bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1391 assert(Tok.is(tok::numeric_constant));
1392 SmallString<8> IntegerBuffer;
1393 bool NumberInvalid = false;
1394 StringRef Spelling = getSpelling(Tok, Buffer&: IntegerBuffer, Invalid: &NumberInvalid);
1395 if (NumberInvalid)
1396 return false;
1397 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1398 getLangOpts(), getTargetInfo(),
1399 getDiagnostics());
1400 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1401 return false;
1402 llvm::APInt APVal(64, 0);
1403 if (Literal.GetIntegerValue(Val&: APVal))
1404 return false;
1405 Lex(Result&: Tok);
1406 Value = APVal.getLimitedValue();
1407 return true;
1408}
1409
1410void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1411 assert(Handler && "NULL comment handler");
1412 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1413 "Comment handler already registered");
1414 CommentHandlers.push_back(x: Handler);
1415}
1416
1417void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1418 std::vector<CommentHandler *>::iterator Pos =
1419 llvm::find(Range&: CommentHandlers, Val: Handler);
1420 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1421 CommentHandlers.erase(position: Pos);
1422}
1423
1424bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1425 bool AnyPendingTokens = false;
1426 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1427 HEnd = CommentHandlers.end();
1428 H != HEnd; ++H) {
1429 if ((*H)->HandleComment(PP&: *this, Comment))
1430 AnyPendingTokens = true;
1431 }
1432 if (!AnyPendingTokens || getCommentRetentionState())
1433 return false;
1434 Lex(Result&: result);
1435 return true;
1436}
1437
1438void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1439 const MacroAnnotations &A =
1440 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1441 assert(A.DeprecationInfo &&
1442 "Macro deprecation warning without recorded annotation!");
1443 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1444 if (Info.Message.empty())
1445 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1446 << Identifier.getIdentifierInfo() << 0;
1447 else
1448 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1449 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1450 Diag(Info.Location, diag::note_pp_macro_annotation) << 0;
1451}
1452
1453void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1454 const MacroAnnotations &A =
1455 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1456 assert(A.RestrictExpansionInfo &&
1457 "Macro restricted expansion warning without recorded annotation!");
1458 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1459 if (Info.Message.empty())
1460 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1461 << Identifier.getIdentifierInfo() << 0;
1462 else
1463 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1464 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1465 Diag(Info.Location, diag::note_pp_macro_annotation) << 1;
1466}
1467
1468void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1469 unsigned DiagSelection) const {
1470 Diag(Identifier, diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1471}
1472
1473void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1474 bool IsUndef) const {
1475 const MacroAnnotations &A =
1476 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1477 assert(A.FinalAnnotationLoc &&
1478 "Final macro warning without recorded annotation!");
1479
1480 Diag(Identifier, diag::warn_pragma_final_macro)
1481 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1482 Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << 2;
1483}
1484
1485bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
1486 const SourceLocation &Loc) const {
1487 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1488 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1489 Range: SafeBufferOptOutMap,
1490 P: [&SourceMgr,
1491 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1492 return SourceMgr.isBeforeInTranslationUnit(LHS: Region.second, RHS: Loc);
1493 });
1494
1495 if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) {
1496 // To test if the start location of the found region precedes `Loc`:
1497 return SourceMgr.isBeforeInTranslationUnit(LHS: FirstRegionEndingAfterLoc->first,
1498 RHS: Loc);
1499 }
1500 // If we do not find a region whose end location passes `Loc`, we want to
1501 // check if the current region is still open:
1502 if (!SafeBufferOptOutMap.empty() &&
1503 SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second)
1504 return SourceMgr.isBeforeInTranslationUnit(LHS: SafeBufferOptOutMap.back().first,
1505 RHS: Loc);
1506 return false;
1507}
1508
1509bool Preprocessor::enterOrExitSafeBufferOptOutRegion(
1510 bool isEnter, const SourceLocation &Loc) {
1511 if (isEnter) {
1512 if (isPPInSafeBufferOptOutRegion())
1513 return true; // invalid enter action
1514 InSafeBufferOptOutRegion = true;
1515 CurrentSafeBufferOptOutStart = Loc;
1516
1517 // To set the start location of a new region:
1518
1519 if (!SafeBufferOptOutMap.empty()) {
1520 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1521 assert(PrevRegion->first != PrevRegion->second &&
1522 "Shall not begin a safe buffer opt-out region before closing the "
1523 "previous one.");
1524 }
1525 // If the start location equals to the end location, we call the region a
1526 // open region or a unclosed region (i.e., end location has not been set
1527 // yet).
1528 SafeBufferOptOutMap.emplace_back(Args: Loc, Args: Loc);
1529 } else {
1530 if (!isPPInSafeBufferOptOutRegion())
1531 return true; // invalid enter action
1532 InSafeBufferOptOutRegion = false;
1533
1534 // To set the end location of the current open region:
1535
1536 assert(!SafeBufferOptOutMap.empty() &&
1537 "Misordered safe buffer opt-out regions");
1538 auto *CurrRegion = &SafeBufferOptOutMap.back();
1539 assert(CurrRegion->first == CurrRegion->second &&
1540 "Set end location to a closed safe buffer opt-out region");
1541 CurrRegion->second = Loc;
1542 }
1543 return false;
1544}
1545
1546bool Preprocessor::isPPInSafeBufferOptOutRegion() {
1547 return InSafeBufferOptOutRegion;
1548}
1549bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
1550 StartLoc = CurrentSafeBufferOptOutStart;
1551 return InSafeBufferOptOutRegion;
1552}
1553
1554ModuleLoader::~ModuleLoader() = default;
1555
1556CommentHandler::~CommentHandler() = default;
1557
1558EmptylineHandler::~EmptylineHandler() = default;
1559
1560CodeCompletionHandler::~CodeCompletionHandler() = default;
1561
1562void Preprocessor::createPreprocessingRecord() {
1563 if (Record)
1564 return;
1565
1566 Record = new PreprocessingRecord(getSourceManager());
1567 addPPCallbacks(C: std::unique_ptr<PPCallbacks>(Record));
1568}
1569
1570const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1571 if (auto It = CheckPoints.find(Val: FID); It != CheckPoints.end()) {
1572 const SmallVector<const char *> &FileCheckPoints = It->second;
1573 const char *Last = nullptr;
1574 // FIXME: Do better than a linear search.
1575 for (const char *P : FileCheckPoints) {
1576 if (P > Start)
1577 break;
1578 Last = P;
1579 }
1580 return Last;
1581 }
1582
1583 return nullptr;
1584}
1585

source code of clang/lib/Lex/Preprocessor.cpp