1//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implements # directive processing for the Preprocessor.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/AttributeCommonInfo.h"
15#include "clang/Basic/Attributes.h"
16#include "clang/Basic/CharInfo.h"
17#include "clang/Basic/DirectoryEntry.h"
18#include "clang/Basic/FileManager.h"
19#include "clang/Basic/IdentifierTable.h"
20#include "clang/Basic/LangOptions.h"
21#include "clang/Basic/Module.h"
22#include "clang/Basic/SourceLocation.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Basic/TargetInfo.h"
25#include "clang/Basic/TokenKinds.h"
26#include "clang/Lex/CodeCompletionHandler.h"
27#include "clang/Lex/HeaderSearch.h"
28#include "clang/Lex/LexDiagnostic.h"
29#include "clang/Lex/LiteralSupport.h"
30#include "clang/Lex/MacroInfo.h"
31#include "clang/Lex/ModuleLoader.h"
32#include "clang/Lex/ModuleMap.h"
33#include "clang/Lex/PPCallbacks.h"
34#include "clang/Lex/Pragma.h"
35#include "clang/Lex/Preprocessor.h"
36#include "clang/Lex/PreprocessorOptions.h"
37#include "clang/Lex/Token.h"
38#include "clang/Lex/VariadicMacroSupport.h"
39#include "llvm/ADT/ArrayRef.h"
40#include "llvm/ADT/STLExtras.h"
41#include "llvm/ADT/ScopeExit.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/StringExtras.h"
44#include "llvm/ADT/StringRef.h"
45#include "llvm/ADT/StringSwitch.h"
46#include "llvm/Support/ErrorHandling.h"
47#include "llvm/Support/Path.h"
48#include "llvm/Support/SaveAndRestore.h"
49#include <algorithm>
50#include <cassert>
51#include <cstring>
52#include <optional>
53#include <string>
54#include <utility>
55
56using namespace clang;
57
58//===----------------------------------------------------------------------===//
59// Utility Methods for Preprocessor Directive Handling.
60//===----------------------------------------------------------------------===//
61
62MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
63 static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
64 return new (BP) MacroInfo(L);
65}
66
67DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
68 SourceLocation Loc) {
69 return new (BP) DefMacroDirective(MI, Loc);
70}
71
72UndefMacroDirective *
73Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
74 return new (BP) UndefMacroDirective(UndefLoc);
75}
76
77VisibilityMacroDirective *
78Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
79 bool isPublic) {
80 return new (BP) VisibilityMacroDirective(Loc, isPublic);
81}
82
83/// Read and discard all tokens remaining on the current line until
84/// the tok::eod token is found.
85SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
86 SourceRange Res;
87
88 LexUnexpandedToken(Result&: Tmp);
89 Res.setBegin(Tmp.getLocation());
90 while (Tmp.isNot(K: tok::eod)) {
91 assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
92 LexUnexpandedToken(Result&: Tmp);
93 }
94 Res.setEnd(Tmp.getLocation());
95 return Res;
96}
97
98/// Enumerates possible cases of #define/#undef a reserved identifier.
99enum MacroDiag {
100 MD_NoWarn, //> Not a reserved identifier
101 MD_KeywordDef, //> Macro hides keyword, enabled by default
102 MD_ReservedMacro, //> #define of #undef reserved id, disabled by default
103 MD_ReservedAttributeIdentifier
104};
105
106/// Enumerates possible %select values for the pp_err_elif_after_else and
107/// pp_err_elif_without_if diagnostics.
108enum PPElifDiag {
109 PED_Elif,
110 PED_Elifdef,
111 PED_Elifndef
112};
113
114static bool isFeatureTestMacro(StringRef MacroName) {
115 // list from:
116 // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
117 // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
118 // * man 7 feature_test_macros
119 // The list must be sorted for correct binary search.
120 static constexpr StringRef ReservedMacro[] = {
121 "_ATFILE_SOURCE",
122 "_BSD_SOURCE",
123 "_CRT_NONSTDC_NO_WARNINGS",
124 "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
125 "_CRT_SECURE_NO_WARNINGS",
126 "_FILE_OFFSET_BITS",
127 "_FORTIFY_SOURCE",
128 "_GLIBCXX_ASSERTIONS",
129 "_GLIBCXX_CONCEPT_CHECKS",
130 "_GLIBCXX_DEBUG",
131 "_GLIBCXX_DEBUG_PEDANTIC",
132 "_GLIBCXX_PARALLEL",
133 "_GLIBCXX_PARALLEL_ASSERTIONS",
134 "_GLIBCXX_SANITIZE_VECTOR",
135 "_GLIBCXX_USE_CXX11_ABI",
136 "_GLIBCXX_USE_DEPRECATED",
137 "_GNU_SOURCE",
138 "_ISOC11_SOURCE",
139 "_ISOC95_SOURCE",
140 "_ISOC99_SOURCE",
141 "_LARGEFILE64_SOURCE",
142 "_POSIX_C_SOURCE",
143 "_REENTRANT",
144 "_SVID_SOURCE",
145 "_THREAD_SAFE",
146 "_XOPEN_SOURCE",
147 "_XOPEN_SOURCE_EXTENDED",
148 "__STDCPP_WANT_MATH_SPEC_FUNCS__",
149 "__STDC_FORMAT_MACROS",
150 };
151 return llvm::binary_search(Range: ReservedMacro, Value&: MacroName);
152}
153
154static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
155 const MacroInfo *MI,
156 const StringRef MacroName) {
157 // If this is a macro with special handling (like __LINE__) then it's language
158 // defined.
159 if (MI->isBuiltinMacro())
160 return true;
161 // Builtin macros are defined in the builtin file
162 if (!SourceMgr.isWrittenInBuiltinFile(Loc: MI->getDefinitionLoc()))
163 return false;
164 // C defines macros starting with __STDC, and C++ defines macros starting with
165 // __STDCPP
166 if (MacroName.starts_with(Prefix: "__STDC"))
167 return true;
168 // C++ defines the __cplusplus macro
169 if (MacroName == "__cplusplus")
170 return true;
171 // C++ defines various feature-test macros starting with __cpp
172 if (MacroName.starts_with(Prefix: "__cpp"))
173 return true;
174 // Anything else isn't language-defined
175 return false;
176}
177
178static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) {
179 const LangOptions &Lang = PP.getLangOpts();
180 if (Lang.CPlusPlus &&
181 hasAttribute(Syntax: AttributeCommonInfo::AS_CXX11, /* Scope*/ nullptr, Attr: II,
182 Target: PP.getTargetInfo(), LangOpts: Lang, /*CheckPlugins*/ false) > 0) {
183 AttributeCommonInfo::AttrArgsInfo AttrArgsInfo =
184 AttributeCommonInfo::getCXX11AttrArgsInfo(Name: II);
185 if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required)
186 return PP.isNextPPTokenLParen();
187
188 return !PP.isNextPPTokenLParen() ||
189 AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional;
190 }
191 return false;
192}
193
194static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
195 const LangOptions &Lang = PP.getLangOpts();
196 StringRef Text = II->getName();
197 if (isReservedInAllContexts(Status: II->isReserved(LangOpts: Lang)))
198 return isFeatureTestMacro(MacroName: Text) ? MD_NoWarn : MD_ReservedMacro;
199 if (II->isKeyword(LangOpts: Lang))
200 return MD_KeywordDef;
201 if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
202 return MD_KeywordDef;
203 if (isReservedCXXAttributeName(PP, II))
204 return MD_ReservedAttributeIdentifier;
205 return MD_NoWarn;
206}
207
208static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
209 const LangOptions &Lang = PP.getLangOpts();
210 // Do not warn on keyword undef. It is generally harmless and widely used.
211 if (isReservedInAllContexts(Status: II->isReserved(LangOpts: Lang)))
212 return MD_ReservedMacro;
213 if (isReservedCXXAttributeName(PP, II))
214 return MD_ReservedAttributeIdentifier;
215 return MD_NoWarn;
216}
217
218// Return true if we want to issue a diagnostic by default if we
219// encounter this name in a #include with the wrong case. For now,
220// this includes the standard C and C++ headers, Posix headers,
221// and Boost headers. Improper case for these #includes is a
222// potential portability issue.
223static bool warnByDefaultOnWrongCase(StringRef Include) {
224 // If the first component of the path is "boost", treat this like a standard header
225 // for the purposes of diagnostics.
226 if (::llvm::sys::path::begin(path: Include)->equals_insensitive(RHS: "boost"))
227 return true;
228
229 // "condition_variable" is the longest standard header name at 18 characters.
230 // If the include file name is longer than that, it can't be a standard header.
231 static const size_t MaxStdHeaderNameLen = 18u;
232 if (Include.size() > MaxStdHeaderNameLen)
233 return false;
234
235 // Lowercase and normalize the search string.
236 SmallString<32> LowerInclude{Include};
237 for (char &Ch : LowerInclude) {
238 // In the ASCII range?
239 if (static_cast<unsigned char>(Ch) > 0x7f)
240 return false; // Can't be a standard header
241 // ASCII lowercase:
242 if (Ch >= 'A' && Ch <= 'Z')
243 Ch += 'a' - 'A';
244 // Normalize path separators for comparison purposes.
245 else if (::llvm::sys::path::is_separator(value: Ch))
246 Ch = '/';
247 }
248
249 // The standard C/C++ and Posix headers
250 return llvm::StringSwitch<bool>(LowerInclude)
251 // C library headers
252 .Cases(S0: "assert.h", S1: "complex.h", S2: "ctype.h", S3: "errno.h", S4: "fenv.h", Value: true)
253 .Cases(S0: "float.h", S1: "inttypes.h", S2: "iso646.h", S3: "limits.h", S4: "locale.h", Value: true)
254 .Cases(S0: "math.h", S1: "setjmp.h", S2: "signal.h", S3: "stdalign.h", S4: "stdarg.h", Value: true)
255 .Cases(S0: "stdatomic.h", S1: "stdbool.h", S2: "stdckdint.h", S3: "stdcountof.h", Value: true)
256 .Cases(S0: "stddef.h", S1: "stdint.h", S2: "stdio.h", S3: "stdlib.h", S4: "stdnoreturn.h", Value: true)
257 .Cases(S0: "string.h", S1: "tgmath.h", S2: "threads.h", S3: "time.h", S4: "uchar.h", Value: true)
258 .Cases(S0: "wchar.h", S1: "wctype.h", Value: true)
259
260 // C++ headers for C library facilities
261 .Cases(S0: "cassert", S1: "ccomplex", S2: "cctype", S3: "cerrno", S4: "cfenv", Value: true)
262 .Cases(S0: "cfloat", S1: "cinttypes", S2: "ciso646", S3: "climits", S4: "clocale", Value: true)
263 .Cases(S0: "cmath", S1: "csetjmp", S2: "csignal", S3: "cstdalign", S4: "cstdarg", Value: true)
264 .Cases(S0: "cstdbool", S1: "cstddef", S2: "cstdint", S3: "cstdio", S4: "cstdlib", Value: true)
265 .Cases(S0: "cstring", S1: "ctgmath", S2: "ctime", S3: "cuchar", S4: "cwchar", Value: true)
266 .Case(S: "cwctype", Value: true)
267
268 // C++ library headers
269 .Cases(S0: "algorithm", S1: "fstream", S2: "list", S3: "regex", S4: "thread", Value: true)
270 .Cases(S0: "array", S1: "functional", S2: "locale", S3: "scoped_allocator", S4: "tuple", Value: true)
271 .Cases(S0: "atomic", S1: "future", S2: "map", S3: "set", S4: "type_traits", Value: true)
272 .Cases(S0: "bitset", S1: "initializer_list", S2: "memory", S3: "shared_mutex", S4: "typeindex", Value: true)
273 .Cases(S0: "chrono", S1: "iomanip", S2: "mutex", S3: "sstream", S4: "typeinfo", Value: true)
274 .Cases(S0: "codecvt", S1: "ios", S2: "new", S3: "stack", S4: "unordered_map", Value: true)
275 .Cases(S0: "complex", S1: "iosfwd", S2: "numeric", S3: "stdexcept", S4: "unordered_set", Value: true)
276 .Cases(S0: "condition_variable", S1: "iostream", S2: "ostream", S3: "streambuf", S4: "utility", Value: true)
277 .Cases(S0: "deque", S1: "istream", S2: "queue", S3: "string", S4: "valarray", Value: true)
278 .Cases(S0: "exception", S1: "iterator", S2: "random", S3: "strstream", S4: "vector", Value: true)
279 .Cases(S0: "forward_list", S1: "limits", S2: "ratio", S3: "system_error", Value: true)
280
281 // POSIX headers (which aren't also C headers)
282 .Cases(S0: "aio.h", S1: "arpa/inet.h", S2: "cpio.h", S3: "dirent.h", S4: "dlfcn.h", Value: true)
283 .Cases(S0: "fcntl.h", S1: "fmtmsg.h", S2: "fnmatch.h", S3: "ftw.h", S4: "glob.h", Value: true)
284 .Cases(S0: "grp.h", S1: "iconv.h", S2: "langinfo.h", S3: "libgen.h", S4: "monetary.h", Value: true)
285 .Cases(S0: "mqueue.h", S1: "ndbm.h", S2: "net/if.h", S3: "netdb.h", S4: "netinet/in.h", Value: true)
286 .Cases(S0: "netinet/tcp.h", S1: "nl_types.h", S2: "poll.h", S3: "pthread.h", S4: "pwd.h", Value: true)
287 .Cases(S0: "regex.h", S1: "sched.h", S2: "search.h", S3: "semaphore.h", S4: "spawn.h", Value: true)
288 .Cases(S0: "strings.h", S1: "stropts.h", S2: "sys/ipc.h", S3: "sys/mman.h", S4: "sys/msg.h", Value: true)
289 .Cases(S0: "sys/resource.h", S1: "sys/select.h", S2: "sys/sem.h", S3: "sys/shm.h", S4: "sys/socket.h", Value: true)
290 .Cases(S0: "sys/stat.h", S1: "sys/statvfs.h", S2: "sys/time.h", S3: "sys/times.h", S4: "sys/types.h", Value: true)
291 .Cases(S0: "sys/uio.h", S1: "sys/un.h", S2: "sys/utsname.h", S3: "sys/wait.h", S4: "syslog.h", Value: true)
292 .Cases(S0: "tar.h", S1: "termios.h", S2: "trace.h", S3: "ulimit.h", Value: true)
293 .Cases(S0: "unistd.h", S1: "utime.h", S2: "utmpx.h", S3: "wordexp.h", Value: true)
294 .Default(Value: false);
295}
296
297/// Find a similar string in `Candidates`.
298///
299/// \param LHS a string for a similar string in `Candidates`
300///
301/// \param Candidates the candidates to find a similar string.
302///
303/// \returns a similar string if exists. If no similar string exists,
304/// returns std::nullopt.
305static std::optional<StringRef>
306findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
307 // We need to check if `Candidates` has the exact case-insensitive string
308 // because the Levenshtein distance match does not care about it.
309 for (StringRef C : Candidates) {
310 if (LHS.equals_insensitive(RHS: C)) {
311 return C;
312 }
313 }
314
315 // Keep going with the Levenshtein distance match.
316 // If the LHS size is less than 3, use the LHS size minus 1 and if not,
317 // use the LHS size divided by 3.
318 size_t Length = LHS.size();
319 size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
320
321 std::optional<std::pair<StringRef, size_t>> SimilarStr;
322 for (StringRef C : Candidates) {
323 size_t CurDist = LHS.edit_distance(Other: C, AllowReplacements: true);
324 if (CurDist <= MaxDist) {
325 if (!SimilarStr) {
326 // The first similar string found.
327 SimilarStr = {C, CurDist};
328 } else if (CurDist < SimilarStr->second) {
329 // More similar string found.
330 SimilarStr = {C, CurDist};
331 }
332 }
333 }
334
335 if (SimilarStr) {
336 return SimilarStr->first;
337 } else {
338 return std::nullopt;
339 }
340}
341
342bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
343 bool *ShadowFlag) {
344 // Missing macro name?
345 if (MacroNameTok.is(K: tok::eod))
346 return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
347
348 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
349 if (!II)
350 return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
351
352 if (II->isCPlusPlusOperatorKeyword()) {
353 // C++ 2.5p2: Alternative tokens behave the same as its primary token
354 // except for their spellings.
355 Diag(MacroNameTok, getLangOpts().MicrosoftExt
356 ? diag::ext_pp_operator_used_as_macro_name
357 : diag::err_pp_operator_used_as_macro_name)
358 << II << MacroNameTok.getKind();
359 // Allow #defining |and| and friends for Microsoft compatibility or
360 // recovery when legacy C headers are included in C++.
361 }
362
363 if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
364 // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
365 return Diag(MacroNameTok, diag::err_defined_macro_name);
366 }
367
368 // If defining/undefining reserved identifier or a keyword, we need to issue
369 // a warning.
370 SourceLocation MacroNameLoc = MacroNameTok.getLocation();
371 if (ShadowFlag)
372 *ShadowFlag = false;
373 // Macro names with reserved identifiers are accepted if built-in or passed
374 // through the command line (the later may be present if -dD was used to
375 // generate the preprocessed file).
376 // NB: isInPredefinedFile() is relatively expensive, so keep it at the end
377 // of the condition.
378 if (!SourceMgr.isInSystemHeader(Loc: MacroNameLoc) &&
379 !SourceMgr.isInPredefinedFile(Loc: MacroNameLoc)) {
380 MacroDiag D = MD_NoWarn;
381 if (isDefineUndef == MU_Define) {
382 D = shouldWarnOnMacroDef(PP&: *this, II);
383 }
384 else if (isDefineUndef == MU_Undef)
385 D = shouldWarnOnMacroUndef(PP&: *this, II);
386 if (D == MD_KeywordDef) {
387 // We do not want to warn on some patterns widely used in configuration
388 // scripts. This requires analyzing next tokens, so do not issue warnings
389 // now, only inform caller.
390 if (ShadowFlag)
391 *ShadowFlag = true;
392 }
393 if (D == MD_ReservedMacro)
394 Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
395 if (D == MD_ReservedAttributeIdentifier)
396 Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_attribute_id)
397 << II->getName();
398 }
399
400 // Okay, we got a good identifier.
401 return false;
402}
403
404/// Lex and validate a macro name, which occurs after a
405/// \#define or \#undef.
406///
407/// This sets the token kind to eod and discards the rest of the macro line if
408/// the macro name is invalid.
409///
410/// \param MacroNameTok Token that is expected to be a macro name.
411/// \param isDefineUndef Context in which macro is used.
412/// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
413void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
414 bool *ShadowFlag) {
415 // Read the token, don't allow macro expansion on it.
416 LexUnexpandedToken(Result&: MacroNameTok);
417
418 if (MacroNameTok.is(K: tok::code_completion)) {
419 if (CodeComplete)
420 CodeComplete->CodeCompleteMacroName(IsDefinition: isDefineUndef == MU_Define);
421 setCodeCompletionReached();
422 LexUnexpandedToken(Result&: MacroNameTok);
423 }
424
425 if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
426 return;
427
428 // Invalid macro name, read and discard the rest of the line and set the
429 // token kind to tok::eod if necessary.
430 if (MacroNameTok.isNot(K: tok::eod)) {
431 MacroNameTok.setKind(tok::eod);
432 DiscardUntilEndOfDirective();
433 }
434}
435
436/// Ensure that the next token is a tok::eod token.
437///
438/// If not, emit a diagnostic and consume up until the eod. If EnableMacros is
439/// true, then we consider macros that expand to zero tokens as being ok.
440///
441/// Returns the location of the end of the directive.
442SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
443 bool EnableMacros) {
444 Token Tmp;
445 // Lex unexpanded tokens for most directives: macros might expand to zero
446 // tokens, causing us to miss diagnosing invalid lines. Some directives (like
447 // #line) allow empty macros.
448 if (EnableMacros)
449 Lex(Result&: Tmp);
450 else
451 LexUnexpandedToken(Result&: Tmp);
452
453 // There should be no tokens after the directive, but we allow them as an
454 // extension.
455 while (Tmp.is(K: tok::comment)) // Skip comments in -C mode.
456 LexUnexpandedToken(Result&: Tmp);
457
458 if (Tmp.is(K: tok::eod))
459 return Tmp.getLocation();
460
461 // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,
462 // or if this is a macro-style preprocessing directive, because it is more
463 // trouble than it is worth to insert /**/ and check that there is no /**/
464 // in the range also.
465 FixItHint Hint;
466 if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
467 !CurTokenLexer)
468 Hint = FixItHint::CreateInsertion(InsertionLoc: Tmp.getLocation(),Code: "//");
469 Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
470 return DiscardUntilEndOfDirective().getEnd();
471}
472
473void Preprocessor::SuggestTypoedDirective(const Token &Tok,
474 StringRef Directive) const {
475 // If this is a `.S` file, treat unknown # directives as non-preprocessor
476 // directives.
477 if (getLangOpts().AsmPreprocessor) return;
478
479 std::vector<StringRef> Candidates = {
480 "if", "ifdef", "ifndef", "elif", "else", "endif"
481 };
482 if (LangOpts.C23 || LangOpts.CPlusPlus23)
483 Candidates.insert(position: Candidates.end(), l: {"elifdef", "elifndef"});
484
485 if (std::optional<StringRef> Sugg = findSimilarStr(LHS: Directive, Candidates)) {
486 // Directive cannot be coming from macro.
487 assert(Tok.getLocation().isFileID());
488 CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
489 B: Tok.getLocation(),
490 E: Tok.getLocation().getLocWithOffset(Offset: Directive.size()));
491 StringRef SuggValue = *Sugg;
492
493 auto Hint = FixItHint::CreateReplacement(RemoveRange: DirectiveRange, Code: SuggValue);
494 Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
495 }
496}
497
498/// SkipExcludedConditionalBlock - We just read a \#if or related directive and
499/// decided that the subsequent tokens are in the \#if'd out portion of the
500/// file. Lex the rest of the file, until we see an \#endif. If
501/// FoundNonSkipPortion is true, then we have already emitted code for part of
502/// this \#if directive, so \#else/\#elif blocks should never be entered.
503/// If ElseOk is true, then \#else directives are ok, if not, then we have
504/// already seen one so a \#else directive is a duplicate. When this returns,
505/// the caller can lex the first valid token.
506void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
507 SourceLocation IfTokenLoc,
508 bool FoundNonSkipPortion,
509 bool FoundElse,
510 SourceLocation ElseLoc) {
511 // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
512 // not getting called recursively by storing the RecordedSkippedRanges
513 // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
514 // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
515 // invalidated. If this changes and there is a need to call
516 // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
517 // change to do a second lookup in endLexPass function instead of reusing the
518 // lookup pointer.
519 assert(!SkippingExcludedConditionalBlock &&
520 "calling SkipExcludedConditionalBlock recursively");
521 llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
522
523 ++NumSkipped;
524 assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
525 assert(CurPPLexer && "Conditional PP block must be in a file!");
526 assert(CurLexer && "Conditional PP block but no current lexer set!");
527
528 if (PreambleConditionalStack.reachedEOFWhileSkipping())
529 PreambleConditionalStack.clearSkipInfo();
530 else
531 CurPPLexer->pushConditionalLevel(DirectiveStart: IfTokenLoc, /*isSkipping*/ WasSkipping: false,
532 FoundNonSkip: FoundNonSkipPortion, FoundElse);
533
534 // Enter raw mode to disable identifier lookup (and thus macro expansion),
535 // disabling warnings, etc.
536 CurPPLexer->LexingRawMode = true;
537 Token Tok;
538 SourceLocation endLoc;
539
540 /// Keeps track and caches skipped ranges and also retrieves a prior skipped
541 /// range if the same block is re-visited.
542 struct SkippingRangeStateTy {
543 Preprocessor &PP;
544
545 const char *BeginPtr = nullptr;
546 unsigned *SkipRangePtr = nullptr;
547
548 SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
549
550 void beginLexPass() {
551 if (BeginPtr)
552 return; // continue skipping a block.
553
554 // Initiate a skipping block and adjust the lexer if we already skipped it
555 // before.
556 BeginPtr = PP.CurLexer->getBufferLocation();
557 SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
558 if (*SkipRangePtr) {
559 PP.CurLexer->seek(Offset: PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
560 /*IsAtStartOfLine*/ true);
561 }
562 }
563
564 void endLexPass(const char *Hashptr) {
565 if (!BeginPtr) {
566 // Not doing normal lexing.
567 assert(PP.CurLexer->isDependencyDirectivesLexer());
568 return;
569 }
570
571 // Finished skipping a block, record the range if it's first time visited.
572 if (!*SkipRangePtr) {
573 *SkipRangePtr = Hashptr - BeginPtr;
574 }
575 assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
576 BeginPtr = nullptr;
577 SkipRangePtr = nullptr;
578 }
579 } SkippingRangeState(*this);
580
581 while (true) {
582 if (CurLexer->isDependencyDirectivesLexer()) {
583 CurLexer->LexDependencyDirectiveTokenWhileSkipping(Result&: Tok);
584 } else {
585 SkippingRangeState.beginLexPass();
586 while (true) {
587 CurLexer->Lex(Result&: Tok);
588
589 if (Tok.is(K: tok::code_completion)) {
590 setCodeCompletionReached();
591 if (CodeComplete)
592 CodeComplete->CodeCompleteInConditionalExclusion();
593 continue;
594 }
595
596 // If this is the end of the buffer, we have an error.
597 if (Tok.is(K: tok::eof)) {
598 // We don't emit errors for unterminated conditionals here,
599 // Lexer::LexEndOfFile can do that properly.
600 // Just return and let the caller lex after this #include.
601 if (PreambleConditionalStack.isRecording())
602 PreambleConditionalStack.SkipInfo.emplace(args&: HashTokenLoc, args&: IfTokenLoc,
603 args&: FoundNonSkipPortion,
604 args&: FoundElse, args&: ElseLoc);
605 break;
606 }
607
608 // If this token is not a preprocessor directive, just skip it.
609 if (Tok.isNot(K: tok::hash) || !Tok.isAtStartOfLine())
610 continue;
611
612 break;
613 }
614 }
615 if (Tok.is(K: tok::eof))
616 break;
617
618 // We just parsed a # character at the start of a line, so we're in
619 // directive mode. Tell the lexer this so any newlines we see will be
620 // converted into an EOD token (this terminates the macro).
621 CurPPLexer->ParsingPreprocessorDirective = true;
622 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
623
624 assert(Tok.is(tok::hash));
625 const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
626 assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
627
628 // Read the next token, the directive flavor.
629 LexUnexpandedToken(Result&: Tok);
630
631 // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
632 // something bogus), skip it.
633 if (Tok.isNot(K: tok::raw_identifier)) {
634 CurPPLexer->ParsingPreprocessorDirective = false;
635 // Restore comment saving mode.
636 if (CurLexer) CurLexer->resetExtendedTokenMode();
637 continue;
638 }
639
640 // If the first letter isn't i or e, it isn't intesting to us. We know that
641 // this is safe in the face of spelling differences, because there is no way
642 // to spell an i/e in a strange way that is another letter. Skipping this
643 // allows us to avoid looking up the identifier info for #define/#undef and
644 // other common directives.
645 StringRef RI = Tok.getRawIdentifier();
646
647 char FirstChar = RI[0];
648 if (FirstChar >= 'a' && FirstChar <= 'z' &&
649 FirstChar != 'i' && FirstChar != 'e') {
650 CurPPLexer->ParsingPreprocessorDirective = false;
651 // Restore comment saving mode.
652 if (CurLexer) CurLexer->resetExtendedTokenMode();
653 continue;
654 }
655
656 // Get the identifier name without trigraphs or embedded newlines. Note
657 // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
658 // when skipping.
659 char DirectiveBuf[20];
660 StringRef Directive;
661 if (!Tok.needsCleaning() && RI.size() < 20) {
662 Directive = RI;
663 } else {
664 std::string DirectiveStr = getSpelling(Tok);
665 size_t IdLen = DirectiveStr.size();
666 if (IdLen >= 20) {
667 CurPPLexer->ParsingPreprocessorDirective = false;
668 // Restore comment saving mode.
669 if (CurLexer) CurLexer->resetExtendedTokenMode();
670 continue;
671 }
672 memcpy(dest: DirectiveBuf, src: &DirectiveStr[0], n: IdLen);
673 Directive = StringRef(DirectiveBuf, IdLen);
674 }
675
676 if (Directive.starts_with(Prefix: "if")) {
677 StringRef Sub = Directive.substr(Start: 2);
678 if (Sub.empty() || // "if"
679 Sub == "def" || // "ifdef"
680 Sub == "ndef") { // "ifndef"
681 // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
682 // bother parsing the condition.
683 DiscardUntilEndOfDirective();
684 CurPPLexer->pushConditionalLevel(DirectiveStart: Tok.getLocation(), /*wasskipping*/WasSkipping: true,
685 /*foundnonskip*/FoundNonSkip: false,
686 /*foundelse*/FoundElse: false);
687 } else {
688 SuggestTypoedDirective(Tok, Directive);
689 }
690 } else if (Directive[0] == 'e') {
691 StringRef Sub = Directive.substr(Start: 1);
692 if (Sub == "ndif") { // "endif"
693 PPConditionalInfo CondInfo;
694 CondInfo.WasSkipping = true; // Silence bogus warning.
695 bool InCond = CurPPLexer->popConditionalLevel(CI&: CondInfo);
696 (void)InCond; // Silence warning in no-asserts mode.
697 assert(!InCond && "Can't be skipping if not in a conditional!");
698
699 // If we popped the outermost skipping block, we're done skipping!
700 if (!CondInfo.WasSkipping) {
701 SkippingRangeState.endLexPass(Hashptr);
702 // Restore the value of LexingRawMode so that trailing comments
703 // are handled correctly, if we've reached the outermost block.
704 CurPPLexer->LexingRawMode = false;
705 endLoc = CheckEndOfDirective(DirType: "endif");
706 CurPPLexer->LexingRawMode = true;
707 if (Callbacks)
708 Callbacks->Endif(Loc: Tok.getLocation(), IfLoc: CondInfo.IfLoc);
709 break;
710 } else {
711 DiscardUntilEndOfDirective();
712 }
713 } else if (Sub == "lse") { // "else".
714 // #else directive in a skipping conditional. If not in some other
715 // skipping conditional, and if #else hasn't already been seen, enter it
716 // as a non-skipping conditional.
717 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
718
719 if (!CondInfo.WasSkipping)
720 SkippingRangeState.endLexPass(Hashptr);
721
722 // If this is a #else with a #else before it, report the error.
723 if (CondInfo.FoundElse)
724 Diag(Tok, diag::pp_err_else_after_else);
725
726 // Note that we've seen a #else in this conditional.
727 CondInfo.FoundElse = true;
728
729 // If the conditional is at the top level, and the #if block wasn't
730 // entered, enter the #else block now.
731 if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
732 CondInfo.FoundNonSkip = true;
733 // Restore the value of LexingRawMode so that trailing comments
734 // are handled correctly.
735 CurPPLexer->LexingRawMode = false;
736 endLoc = CheckEndOfDirective(DirType: "else");
737 CurPPLexer->LexingRawMode = true;
738 if (Callbacks)
739 Callbacks->Else(Loc: Tok.getLocation(), IfLoc: CondInfo.IfLoc);
740 break;
741 } else {
742 DiscardUntilEndOfDirective(); // C99 6.10p4.
743 }
744 } else if (Sub == "lif") { // "elif".
745 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
746
747 if (!CondInfo.WasSkipping)
748 SkippingRangeState.endLexPass(Hashptr);
749
750 // If this is a #elif with a #else before it, report the error.
751 if (CondInfo.FoundElse)
752 Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
753
754 // If this is in a skipping block or if we're already handled this #if
755 // block, don't bother parsing the condition.
756 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
757 // FIXME: We should probably do at least some minimal parsing of the
758 // condition to verify that it is well-formed. The current state
759 // allows #elif* directives with completely malformed (or missing)
760 // conditions.
761 DiscardUntilEndOfDirective();
762 } else {
763 // Restore the value of LexingRawMode so that identifiers are
764 // looked up, etc, inside the #elif expression.
765 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
766 CurPPLexer->LexingRawMode = false;
767 IdentifierInfo *IfNDefMacro = nullptr;
768 DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
769 // Stop if Lexer became invalid after hitting code completion token.
770 if (!CurPPLexer)
771 return;
772 const bool CondValue = DER.Conditional;
773 CurPPLexer->LexingRawMode = true;
774 if (Callbacks) {
775 Callbacks->Elif(
776 Loc: Tok.getLocation(), ConditionRange: DER.ExprRange,
777 ConditionValue: (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
778 IfLoc: CondInfo.IfLoc);
779 }
780 // If this condition is true, enter it!
781 if (CondValue) {
782 CondInfo.FoundNonSkip = true;
783 break;
784 }
785 }
786 } else if (Sub == "lifdef" || // "elifdef"
787 Sub == "lifndef") { // "elifndef"
788 bool IsElifDef = Sub == "lifdef";
789 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
790 Token DirectiveToken = Tok;
791
792 if (!CondInfo.WasSkipping)
793 SkippingRangeState.endLexPass(Hashptr);
794
795 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
796 // if this branch is in a skipping block.
797 unsigned DiagID;
798 if (LangOpts.CPlusPlus)
799 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
800 : diag::ext_cxx23_pp_directive;
801 else
802 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
803 : diag::ext_c23_pp_directive;
804 Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
805
806 // If this is a #elif with a #else before it, report the error.
807 if (CondInfo.FoundElse)
808 Diag(Tok, diag::pp_err_elif_after_else)
809 << (IsElifDef ? PED_Elifdef : PED_Elifndef);
810
811 // If this is in a skipping block or if we're already handled this #if
812 // block, don't bother parsing the condition.
813 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
814 // FIXME: We should probably do at least some minimal parsing of the
815 // condition to verify that it is well-formed. The current state
816 // allows #elif* directives with completely malformed (or missing)
817 // conditions.
818 DiscardUntilEndOfDirective();
819 } else {
820 // Restore the value of LexingRawMode so that identifiers are
821 // looked up, etc, inside the #elif[n]def expression.
822 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
823 CurPPLexer->LexingRawMode = false;
824 Token MacroNameTok;
825 ReadMacroName(MacroNameTok);
826 CurPPLexer->LexingRawMode = true;
827
828 // If the macro name token is tok::eod, there was an error that was
829 // already reported.
830 if (MacroNameTok.is(K: tok::eod)) {
831 // Skip code until we get to #endif. This helps with recovery by
832 // not emitting an error when the #endif is reached.
833 continue;
834 }
835
836 emitMacroExpansionWarnings(Identifier: MacroNameTok);
837
838 CheckEndOfDirective(DirType: IsElifDef ? "elifdef" : "elifndef");
839
840 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
841 auto MD = getMacroDefinition(II: MII);
842 MacroInfo *MI = MD.getMacroInfo();
843
844 if (Callbacks) {
845 if (IsElifDef) {
846 Callbacks->Elifdef(Loc: DirectiveToken.getLocation(), MacroNameTok,
847 MD);
848 } else {
849 Callbacks->Elifndef(Loc: DirectiveToken.getLocation(), MacroNameTok,
850 MD);
851 }
852 }
853 // If this condition is true, enter it!
854 if (static_cast<bool>(MI) == IsElifDef) {
855 CondInfo.FoundNonSkip = true;
856 break;
857 }
858 }
859 } else {
860 SuggestTypoedDirective(Tok, Directive);
861 }
862 } else {
863 SuggestTypoedDirective(Tok, Directive);
864 }
865
866 CurPPLexer->ParsingPreprocessorDirective = false;
867 // Restore comment saving mode.
868 if (CurLexer) CurLexer->resetExtendedTokenMode();
869 }
870
871 // Finally, if we are out of the conditional (saw an #endif or ran off the end
872 // of the file, just stop skipping and return to lexing whatever came after
873 // the #if block.
874 CurPPLexer->LexingRawMode = false;
875
876 // The last skipped range isn't actually skipped yet if it's truncated
877 // by the end of the preamble; we'll resume parsing after the preamble.
878 if (Callbacks && (Tok.isNot(K: tok::eof) || !isRecordingPreamble()))
879 Callbacks->SourceRangeSkipped(
880 Range: SourceRange(HashTokenLoc, endLoc.isValid()
881 ? endLoc
882 : CurPPLexer->getSourceLocation()),
883 EndifLoc: Tok.getLocation());
884}
885
886Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
887 bool AllowTextual) {
888 if (!SourceMgr.isInMainFile(Loc)) {
889 // Try to determine the module of the include directive.
890 // FIXME: Look into directly passing the FileEntry from LookupFile instead.
891 FileID IDOfIncl = SourceMgr.getFileID(SpellingLoc: SourceMgr.getExpansionLoc(Loc));
892 if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(FID: IDOfIncl)) {
893 // The include comes from an included file.
894 return HeaderInfo.getModuleMap()
895 .findModuleForHeader(File: *EntryOfIncl, AllowTextual)
896 .getModule();
897 }
898 }
899
900 // This is either in the main file or not in a file at all. It belongs
901 // to the current module, if there is one.
902 return getLangOpts().CurrentModule.empty()
903 ? nullptr
904 : HeaderInfo.lookupModule(ModuleName: getLangOpts().CurrentModule, ImportLoc: Loc);
905}
906
907OptionalFileEntryRef
908Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
909 SourceLocation Loc) {
910 Module *IncM = getModuleForLocation(
911 Loc: IncLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
912
913 // Walk up through the include stack, looking through textual headers of M
914 // until we hit a non-textual header that we can #include. (We assume textual
915 // headers of a module with non-textual headers aren't meant to be used to
916 // import entities from the module.)
917 auto &SM = getSourceManager();
918 while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
919 auto ID = SM.getFileID(SpellingLoc: SM.getExpansionLoc(Loc));
920 auto FE = SM.getFileEntryRefForID(FID: ID);
921 if (!FE)
922 break;
923
924 // We want to find all possible modules that might contain this header, so
925 // search all enclosing directories for module maps and load them.
926 HeaderInfo.hasModuleMap(Filename: FE->getName(), /*Root*/ nullptr,
927 IsSystem: SourceMgr.isInSystemHeader(Loc));
928
929 bool InPrivateHeader = false;
930 for (auto Header : HeaderInfo.findAllModulesForHeader(File: *FE)) {
931 if (!Header.isAccessibleFrom(M: IncM)) {
932 // It's in a private header; we can't #include it.
933 // FIXME: If there's a public header in some module that re-exports it,
934 // then we could suggest including that, but it's not clear that's the
935 // expected way to make this entity visible.
936 InPrivateHeader = true;
937 continue;
938 }
939
940 // Don't suggest explicitly excluded headers.
941 if (Header.getRole() == ModuleMap::ExcludedHeader)
942 continue;
943
944 // We'll suggest including textual headers below if they're
945 // include-guarded.
946 if (Header.getRole() & ModuleMap::TextualHeader)
947 continue;
948
949 // If we have a module import syntax, we shouldn't include a header to
950 // make a particular module visible. Let the caller know they should
951 // suggest an import instead.
952 if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
953 return std::nullopt;
954
955 // If this is an accessible, non-textual header of M's top-level module
956 // that transitively includes the given location and makes the
957 // corresponding module visible, this is the thing to #include.
958 return *FE;
959 }
960
961 // FIXME: If we're bailing out due to a private header, we shouldn't suggest
962 // an import either.
963 if (InPrivateHeader)
964 return std::nullopt;
965
966 // If the header is includable and has an include guard, assume the
967 // intended way to expose its contents is by #include, not by importing a
968 // module that transitively includes it.
969 if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(File: *FE))
970 return *FE;
971
972 Loc = SM.getIncludeLoc(FID: ID);
973 }
974
975 return std::nullopt;
976}
977
978OptionalFileEntryRef Preprocessor::LookupFile(
979 SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
980 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
981 ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
982 SmallVectorImpl<char> *RelativePath,
983 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
984 bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
985 ConstSearchDirIterator CurDirLocal = nullptr;
986 ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
987
988 Module *RequestingModule = getModuleForLocation(
989 Loc: FilenameLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
990
991 // If the header lookup mechanism may be relative to the current inclusion
992 // stack, record the parent #includes.
993 SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
994 bool BuildSystemModule = false;
995 if (!FromDir && !FromFile) {
996 FileID FID = getCurrentFileLexer()->getFileID();
997 OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
998
999 // If there is no file entry associated with this file, it must be the
1000 // predefines buffer or the module includes buffer. Any other file is not
1001 // lexed with a normal lexer, so it won't be scanned for preprocessor
1002 // directives.
1003 //
1004 // If we have the predefines buffer, resolve #include references (which come
1005 // from the -include command line argument) from the current working
1006 // directory instead of relative to the main file.
1007 //
1008 // If we have the module includes buffer, resolve #include references (which
1009 // come from header declarations in the module map) relative to the module
1010 // map file.
1011 if (!FileEnt) {
1012 if (FID == SourceMgr.getMainFileID() && MainFileDir) {
1013 auto IncludeDir =
1014 HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
1015 FileName: Filename, Module: getCurrentModule())
1016 ? HeaderInfo.getModuleMap().getBuiltinDir()
1017 : MainFileDir;
1018 Includers.push_back(Elt: std::make_pair(x: std::nullopt, y&: *IncludeDir));
1019 BuildSystemModule = getCurrentModule()->IsSystem;
1020 } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
1021 FID: SourceMgr.getMainFileID()))) {
1022 auto CWD = FileMgr.getOptionalDirectoryRef(DirName: ".");
1023 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y&: *CWD));
1024 }
1025 } else {
1026 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y: FileEnt->getDir()));
1027 }
1028
1029 // MSVC searches the current include stack from top to bottom for
1030 // headers included by quoted include directives.
1031 // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1032 if (LangOpts.MSVCCompat && !isAngled) {
1033 for (IncludeStackInfo &ISEntry : llvm::reverse(C&: IncludeMacroStack)) {
1034 if (IsFileLexer(I: ISEntry))
1035 if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1036 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y: FileEnt->getDir()));
1037 }
1038 }
1039 }
1040
1041 CurDir = CurDirLookup;
1042
1043 if (FromFile) {
1044 // We're supposed to start looking from after a particular file. Search
1045 // the include path until we find that file or run out of files.
1046 ConstSearchDirIterator TmpCurDir = CurDir;
1047 ConstSearchDirIterator TmpFromDir = nullptr;
1048 while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1049 Filename, IncludeLoc: FilenameLoc, isAngled, FromDir: TmpFromDir, CurDir: &TmpCurDir,
1050 Includers, SearchPath, RelativePath, RequestingModule,
1051 SuggestedModule, /*IsMapped=*/nullptr,
1052 /*IsFrameworkFound=*/nullptr, SkipCache)) {
1053 // Keep looking as if this file did a #include_next.
1054 TmpFromDir = TmpCurDir;
1055 ++TmpFromDir;
1056 if (&FE->getFileEntry() == FromFile) {
1057 // Found it.
1058 FromDir = TmpFromDir;
1059 CurDir = TmpCurDir;
1060 break;
1061 }
1062 }
1063 }
1064
1065 // Do a standard file entry lookup.
1066 OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1067 Filename, IncludeLoc: FilenameLoc, isAngled, FromDir, CurDir: &CurDir, Includers, SearchPath,
1068 RelativePath, RequestingModule, SuggestedModule, IsMapped,
1069 IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1070 if (FE)
1071 return FE;
1072
1073 OptionalFileEntryRef CurFileEnt;
1074 // Otherwise, see if this is a subframework header. If so, this is relative
1075 // to one of the headers on the #include stack. Walk the list of the current
1076 // headers on the #include stack and pass them to HeaderInfo.
1077 if (IsFileLexer()) {
1078 if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1079 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1080 Filename, ContextFileEnt: *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1081 SuggestedModule)) {
1082 return FE;
1083 }
1084 }
1085 }
1086
1087 for (IncludeStackInfo &ISEntry : llvm::reverse(C&: IncludeMacroStack)) {
1088 if (IsFileLexer(I: ISEntry)) {
1089 if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1090 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1091 Filename, ContextFileEnt: *CurFileEnt, SearchPath, RelativePath,
1092 RequestingModule, SuggestedModule)) {
1093 return FE;
1094 }
1095 }
1096 }
1097 }
1098
1099 // Otherwise, we really couldn't find the file.
1100 return std::nullopt;
1101}
1102
1103OptionalFileEntryRef
1104Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
1105 const FileEntry *LookupFromFile) {
1106 FileManager &FM = this->getFileManager();
1107 if (llvm::sys::path::is_absolute(path: Filename)) {
1108 // lookup path or immediately fail
1109 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1110 Filename, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1111 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1112 }
1113
1114 auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1115 StringRef StartingFrom, StringRef FileName,
1116 bool RemoveInitialFileComponentFromLookupPath) {
1117 llvm::sys::path::native(path: StartingFrom, result&: LookupPath);
1118 if (RemoveInitialFileComponentFromLookupPath)
1119 llvm::sys::path::remove_filename(path&: LookupPath);
1120 if (!LookupPath.empty() &&
1121 !llvm::sys::path::is_separator(value: LookupPath.back())) {
1122 LookupPath.push_back(Elt: llvm::sys::path::get_separator().front());
1123 }
1124 LookupPath.append(in_start: FileName.begin(), in_end: FileName.end());
1125 };
1126
1127 // Otherwise, it's search time!
1128 SmallString<512> LookupPath;
1129 // Non-angled lookup
1130 if (!isAngled) {
1131 if (LookupFromFile) {
1132 // Use file-based lookup.
1133 StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
1134 if (!FullFileDir.empty()) {
1135 SeparateComponents(LookupPath, FullFileDir, Filename, true);
1136 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1137 Filename: LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1138 if (ShouldBeEntry)
1139 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1140 llvm::consumeError(Err: ShouldBeEntry.takeError());
1141 }
1142 }
1143
1144 // Otherwise, do working directory lookup.
1145 LookupPath.clear();
1146 auto MaybeWorkingDirEntry = FM.getDirectoryRef(DirName: ".");
1147 if (MaybeWorkingDirEntry) {
1148 DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1149 StringRef WorkingDir = WorkingDirEntry.getName();
1150 if (!WorkingDir.empty()) {
1151 SeparateComponents(LookupPath, WorkingDir, Filename, false);
1152 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1153 Filename: LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1154 if (ShouldBeEntry)
1155 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1156 llvm::consumeError(Err: ShouldBeEntry.takeError());
1157 }
1158 }
1159 }
1160
1161 for (const auto &Entry : PPOpts.EmbedEntries) {
1162 LookupPath.clear();
1163 SeparateComponents(LookupPath, Entry, Filename, false);
1164 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1165 Filename: LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1166 if (ShouldBeEntry)
1167 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1168 llvm::consumeError(Err: ShouldBeEntry.takeError());
1169 }
1170 return std::nullopt;
1171}
1172
1173//===----------------------------------------------------------------------===//
1174// Preprocessor Directive Handling.
1175//===----------------------------------------------------------------------===//
1176
1177class Preprocessor::ResetMacroExpansionHelper {
1178public:
1179 ResetMacroExpansionHelper(Preprocessor *pp)
1180 : PP(pp), save(pp->DisableMacroExpansion) {
1181 if (pp->MacroExpansionInDirectivesOverride)
1182 pp->DisableMacroExpansion = false;
1183 }
1184
1185 ~ResetMacroExpansionHelper() {
1186 PP->DisableMacroExpansion = save;
1187 }
1188
1189private:
1190 Preprocessor *PP;
1191 bool save;
1192};
1193
1194/// Process a directive while looking for the through header or a #pragma
1195/// hdrstop. The following directives are handled:
1196/// #include (to check if it is the through header)
1197/// #define (to warn about macros that don't match the PCH)
1198/// #pragma (to check for pragma hdrstop).
1199/// All other directives are completely discarded.
1200void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1201 SourceLocation HashLoc) {
1202 if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1203 if (II->getPPKeywordID() == tok::pp_define) {
1204 return HandleDefineDirective(Tok&: Result,
1205 /*ImmediatelyAfterHeaderGuard=*/false);
1206 }
1207 if (SkippingUntilPCHThroughHeader &&
1208 II->getPPKeywordID() == tok::pp_include) {
1209 return HandleIncludeDirective(HashLoc, Tok&: Result);
1210 }
1211 if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1212 Lex(Result);
1213 auto *II = Result.getIdentifierInfo();
1214 if (II && II->getName() == "hdrstop")
1215 return HandlePragmaHdrstop(Tok&: Result);
1216 }
1217 }
1218 DiscardUntilEndOfDirective();
1219}
1220
1221/// HandleDirective - This callback is invoked when the lexer sees a # token
1222/// at the start of a line. This consumes the directive, modifies the
1223/// lexer/preprocessor state, and advances the lexer(s) so that the next token
1224/// read is the correct one.
1225void Preprocessor::HandleDirective(Token &Result) {
1226 // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1227
1228 // We just parsed a # character at the start of a line, so we're in directive
1229 // mode. Tell the lexer this so any newlines we see will be converted into an
1230 // EOD token (which terminates the directive).
1231 CurPPLexer->ParsingPreprocessorDirective = true;
1232 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1233
1234 bool ImmediatelyAfterTopLevelIfndef =
1235 CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1236 CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1237
1238 ++NumDirectives;
1239
1240 // We are about to read a token. For the multiple-include optimization FA to
1241 // work, we have to remember if we had read any tokens *before* this
1242 // pp-directive.
1243 bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1244
1245 // Save the '#' token in case we need to return it later.
1246 Token SavedHash = Result;
1247
1248 // Read the next token, the directive flavor. This isn't expanded due to
1249 // C99 6.10.3p8.
1250 LexUnexpandedToken(Result);
1251
1252 // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
1253 // #define A(x) #x
1254 // A(abc
1255 // #warning blah
1256 // def)
1257 // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1258 // not support this for #include-like directives, since that can result in
1259 // terrible diagnostics, and does not work in GCC.
1260 if (InMacroArgs) {
1261 if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1262 switch (II->getPPKeywordID()) {
1263 case tok::pp_include:
1264 case tok::pp_import:
1265 case tok::pp_include_next:
1266 case tok::pp___include_macros:
1267 case tok::pp_pragma:
1268 case tok::pp_embed:
1269 Diag(Result, diag::err_embedded_directive) << II->getName();
1270 Diag(*ArgMacro, diag::note_macro_expansion_here)
1271 << ArgMacro->getIdentifierInfo();
1272 DiscardUntilEndOfDirective();
1273 return;
1274 default:
1275 break;
1276 }
1277 }
1278 Diag(Result, diag::ext_embedded_directive);
1279 }
1280
1281 // Temporarily enable macro expansion if set so
1282 // and reset to previous state when returning from this function.
1283 ResetMacroExpansionHelper helper(this);
1284
1285 if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1286 return HandleSkippedDirectiveWhileUsingPCH(Result, HashLoc: SavedHash.getLocation());
1287
1288 switch (Result.getKind()) {
1289 case tok::eod:
1290 // Ignore the null directive with regards to the multiple-include
1291 // optimization, i.e. allow the null directive to appear outside of the
1292 // include guard and still enable the multiple-include optimization.
1293 CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1294 return; // null directive.
1295 case tok::code_completion:
1296 setCodeCompletionReached();
1297 if (CodeComplete)
1298 CodeComplete->CodeCompleteDirective(
1299 InConditional: CurPPLexer->getConditionalStackDepth() > 0);
1300 return;
1301 case tok::numeric_constant: // # 7 GNU line marker directive.
1302 // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1303 // directive. However do permit it in the predefines file, as we use line
1304 // markers to mark the builtin macros as being in a system header.
1305 if (getLangOpts().AsmPreprocessor &&
1306 SourceMgr.getFileID(SpellingLoc: SavedHash.getLocation()) != getPredefinesFileID())
1307 break;
1308 return HandleDigitDirective(Tok&: Result);
1309 default:
1310 IdentifierInfo *II = Result.getIdentifierInfo();
1311 if (!II) break; // Not an identifier.
1312
1313 // Ask what the preprocessor keyword ID is.
1314 switch (II->getPPKeywordID()) {
1315 default: break;
1316 // C99 6.10.1 - Conditional Inclusion.
1317 case tok::pp_if:
1318 return HandleIfDirective(IfToken&: Result, HashToken: SavedHash, ReadAnyTokensBeforeDirective);
1319 case tok::pp_ifdef:
1320 return HandleIfdefDirective(Result, HashToken: SavedHash, isIfndef: false,
1321 ReadAnyTokensBeforeDirective: true /*not valid for miopt*/);
1322 case tok::pp_ifndef:
1323 return HandleIfdefDirective(Result, HashToken: SavedHash, isIfndef: true,
1324 ReadAnyTokensBeforeDirective);
1325 case tok::pp_elif:
1326 case tok::pp_elifdef:
1327 case tok::pp_elifndef:
1328 return HandleElifFamilyDirective(ElifToken&: Result, HashToken: SavedHash, Kind: II->getPPKeywordID());
1329
1330 case tok::pp_else:
1331 return HandleElseDirective(Result, HashToken: SavedHash);
1332 case tok::pp_endif:
1333 return HandleEndifDirective(EndifToken&: Result);
1334
1335 // C99 6.10.2 - Source File Inclusion.
1336 case tok::pp_include:
1337 // Handle #include.
1338 return HandleIncludeDirective(HashLoc: SavedHash.getLocation(), Tok&: Result);
1339 case tok::pp___include_macros:
1340 // Handle -imacros.
1341 return HandleIncludeMacrosDirective(HashLoc: SavedHash.getLocation(), Tok&: Result);
1342
1343 // C99 6.10.3 - Macro Replacement.
1344 case tok::pp_define:
1345 return HandleDefineDirective(Tok&: Result, ImmediatelyAfterHeaderGuard: ImmediatelyAfterTopLevelIfndef);
1346 case tok::pp_undef:
1347 return HandleUndefDirective();
1348
1349 // C99 6.10.4 - Line Control.
1350 case tok::pp_line:
1351 return HandleLineDirective();
1352
1353 // C99 6.10.5 - Error Directive.
1354 case tok::pp_error:
1355 return HandleUserDiagnosticDirective(Tok&: Result, isWarning: false);
1356
1357 // C99 6.10.6 - Pragma Directive.
1358 case tok::pp_pragma:
1359 return HandlePragmaDirective(Introducer: {.Kind: PIK_HashPragma, .Loc: SavedHash.getLocation()});
1360
1361 // GNU Extensions.
1362 case tok::pp_import:
1363 return HandleImportDirective(HashLoc: SavedHash.getLocation(), Tok&: Result);
1364 case tok::pp_include_next:
1365 return HandleIncludeNextDirective(HashLoc: SavedHash.getLocation(), Tok&: Result);
1366
1367 case tok::pp_warning:
1368 if (LangOpts.CPlusPlus)
1369 Diag(Result, LangOpts.CPlusPlus23
1370 ? diag::warn_cxx23_compat_warning_directive
1371 : diag::ext_pp_warning_directive)
1372 << /*C++23*/ 1;
1373 else
1374 Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1375 : diag::ext_pp_warning_directive)
1376 << /*C23*/ 0;
1377
1378 return HandleUserDiagnosticDirective(Tok&: Result, isWarning: true);
1379 case tok::pp_ident:
1380 return HandleIdentSCCSDirective(Tok&: Result);
1381 case tok::pp_sccs:
1382 return HandleIdentSCCSDirective(Tok&: Result);
1383 case tok::pp_embed:
1384 return HandleEmbedDirective(HashLoc: SavedHash.getLocation(), Tok&: Result,
1385 LookupFromFile: getCurrentFileLexer()
1386 ? *getCurrentFileLexer()->getFileEntry()
1387 : static_cast<FileEntry *>(nullptr));
1388 case tok::pp_assert:
1389 //isExtension = true; // FIXME: implement #assert
1390 break;
1391 case tok::pp_unassert:
1392 //isExtension = true; // FIXME: implement #unassert
1393 break;
1394
1395 case tok::pp___public_macro:
1396 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1397 return HandleMacroPublicDirective(Tok&: Result);
1398 break;
1399
1400 case tok::pp___private_macro:
1401 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1402 return HandleMacroPrivateDirective();
1403 break;
1404 }
1405 break;
1406 }
1407
1408 // If this is a .S file, treat unknown # directives as non-preprocessor
1409 // directives. This is important because # may be a comment or introduce
1410 // various pseudo-ops. Just return the # token and push back the following
1411 // token to be lexed next time.
1412 if (getLangOpts().AsmPreprocessor) {
1413 auto Toks = std::make_unique<Token[]>(num: 2);
1414 // Return the # and the token after it.
1415 Toks[0] = SavedHash;
1416 Toks[1] = Result;
1417
1418 // If the second token is a hashhash token, then we need to translate it to
1419 // unknown so the token lexer doesn't try to perform token pasting.
1420 if (Result.is(K: tok::hashhash))
1421 Toks[1].setKind(tok::unknown);
1422
1423 // Enter this token stream so that we re-lex the tokens. Make sure to
1424 // enable macro expansion, in case the token after the # is an identifier
1425 // that is expanded.
1426 EnterTokenStream(Toks: std::move(Toks), NumToks: 2, DisableMacroExpansion: false, /*IsReinject*/false);
1427 return;
1428 }
1429
1430 // If we reached here, the preprocessing token is not valid!
1431 // Start suggesting if a similar directive found.
1432 Diag(Result, diag::err_pp_invalid_directive) << 0;
1433
1434 // Read the rest of the PP line.
1435 DiscardUntilEndOfDirective();
1436
1437 // Okay, we're done parsing the directive.
1438}
1439
1440/// GetLineValue - Convert a numeric token into an unsigned value, emitting
1441/// Diagnostic DiagID if it is invalid, and returning the value in Val.
1442static bool GetLineValue(Token &DigitTok, unsigned &Val,
1443 unsigned DiagID, Preprocessor &PP,
1444 bool IsGNULineDirective=false) {
1445 if (DigitTok.isNot(K: tok::numeric_constant)) {
1446 PP.Diag(Tok: DigitTok, DiagID);
1447
1448 if (DigitTok.isNot(K: tok::eod))
1449 PP.DiscardUntilEndOfDirective();
1450 return true;
1451 }
1452
1453 SmallString<64> IntegerBuffer;
1454 IntegerBuffer.resize(N: DigitTok.getLength());
1455 const char *DigitTokBegin = &IntegerBuffer[0];
1456 bool Invalid = false;
1457 unsigned ActualLength = PP.getSpelling(Tok: DigitTok, Buffer&: DigitTokBegin, Invalid: &Invalid);
1458 if (Invalid)
1459 return true;
1460
1461 // Verify that we have a simple digit-sequence, and compute the value. This
1462 // is always a simple digit string computed in decimal, so we do this manually
1463 // here.
1464 Val = 0;
1465 for (unsigned i = 0; i != ActualLength; ++i) {
1466 // C++1y [lex.fcon]p1:
1467 // Optional separating single quotes in a digit-sequence are ignored
1468 if (DigitTokBegin[i] == '\'')
1469 continue;
1470
1471 if (!isDigit(c: DigitTokBegin[i])) {
1472 PP.Diag(PP.AdvanceToTokenCharacter(TokStart: DigitTok.getLocation(), Char: i),
1473 diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1474 PP.DiscardUntilEndOfDirective();
1475 return true;
1476 }
1477
1478 unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1479 if (NextVal < Val) { // overflow.
1480 PP.Diag(Tok: DigitTok, DiagID);
1481 PP.DiscardUntilEndOfDirective();
1482 return true;
1483 }
1484 Val = NextVal;
1485 }
1486
1487 if (DigitTokBegin[0] == '0' && Val)
1488 PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1489 << IsGNULineDirective;
1490
1491 return false;
1492}
1493
1494/// Handle a \#line directive: C99 6.10.4.
1495///
1496/// The two acceptable forms are:
1497/// \verbatim
1498/// # line digit-sequence
1499/// # line digit-sequence "s-char-sequence"
1500/// \endverbatim
1501void Preprocessor::HandleLineDirective() {
1502 // Read the line # and string argument. Per C99 6.10.4p5, these tokens are
1503 // expanded.
1504 Token DigitTok;
1505 Lex(Result&: DigitTok);
1506
1507 // Validate the number and convert it to an unsigned.
1508 unsigned LineNo;
1509 if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1510 return;
1511
1512 if (LineNo == 0)
1513 Diag(DigitTok, diag::ext_pp_line_zero);
1514
1515 // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1516 // number greater than 2147483647". C90 requires that the line # be <= 32767.
1517 unsigned LineLimit = 32768U;
1518 if (LangOpts.C99 || LangOpts.CPlusPlus11)
1519 LineLimit = 2147483648U;
1520 if (LineNo >= LineLimit)
1521 Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1522 else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1523 Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1524
1525 int FilenameID = -1;
1526 Token StrTok;
1527 Lex(Result&: StrTok);
1528
1529 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1530 // string followed by eod.
1531 if (StrTok.is(K: tok::eod))
1532 ; // ok
1533 else if (StrTok.isNot(K: tok::string_literal)) {
1534 Diag(StrTok, diag::err_pp_line_invalid_filename);
1535 DiscardUntilEndOfDirective();
1536 return;
1537 } else if (StrTok.hasUDSuffix()) {
1538 Diag(StrTok, diag::err_invalid_string_udl);
1539 DiscardUntilEndOfDirective();
1540 return;
1541 } else {
1542 // Parse and validate the string, converting it into a unique ID.
1543 StringLiteralParser Literal(StrTok, *this);
1544 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1545 if (Literal.hadError) {
1546 DiscardUntilEndOfDirective();
1547 return;
1548 }
1549 if (Literal.Pascal) {
1550 Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1551 DiscardUntilEndOfDirective();
1552 return;
1553 }
1554 FilenameID = SourceMgr.getLineTableFilenameID(Str: Literal.GetString());
1555
1556 // Verify that there is nothing after the string, other than EOD. Because
1557 // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1558 CheckEndOfDirective(DirType: "line", EnableMacros: true);
1559 }
1560
1561 // Take the file kind of the file containing the #line directive. #line
1562 // directives are often used for generated sources from the same codebase, so
1563 // the new file should generally be classified the same way as the current
1564 // file. This is visible in GCC's pre-processed output, which rewrites #line
1565 // to GNU line markers.
1566 SrcMgr::CharacteristicKind FileKind =
1567 SourceMgr.getFileCharacteristic(Loc: DigitTok.getLocation());
1568
1569 SourceMgr.AddLineNote(Loc: DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry: false,
1570 IsFileExit: false, FileKind);
1571
1572 if (Callbacks)
1573 Callbacks->FileChanged(Loc: CurPPLexer->getSourceLocation(),
1574 Reason: PPCallbacks::RenameFile, FileType: FileKind);
1575}
1576
1577/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1578/// marker directive.
1579static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1580 SrcMgr::CharacteristicKind &FileKind,
1581 Preprocessor &PP) {
1582 unsigned FlagVal;
1583 Token FlagTok;
1584 PP.Lex(Result&: FlagTok);
1585 if (FlagTok.is(K: tok::eod)) return false;
1586 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1587 return true;
1588
1589 if (FlagVal == 1) {
1590 IsFileEntry = true;
1591
1592 PP.Lex(Result&: FlagTok);
1593 if (FlagTok.is(K: tok::eod)) return false;
1594 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1595 return true;
1596 } else if (FlagVal == 2) {
1597 IsFileExit = true;
1598
1599 SourceManager &SM = PP.getSourceManager();
1600 // If we are leaving the current presumed file, check to make sure the
1601 // presumed include stack isn't empty!
1602 FileID CurFileID =
1603 SM.getDecomposedExpansionLoc(Loc: FlagTok.getLocation()).first;
1604 PresumedLoc PLoc = SM.getPresumedLoc(Loc: FlagTok.getLocation());
1605 if (PLoc.isInvalid())
1606 return true;
1607
1608 // If there is no include loc (main file) or if the include loc is in a
1609 // different physical file, then we aren't in a "1" line marker flag region.
1610 SourceLocation IncLoc = PLoc.getIncludeLoc();
1611 if (IncLoc.isInvalid() ||
1612 SM.getDecomposedExpansionLoc(Loc: IncLoc).first != CurFileID) {
1613 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1614 PP.DiscardUntilEndOfDirective();
1615 return true;
1616 }
1617
1618 PP.Lex(Result&: FlagTok);
1619 if (FlagTok.is(K: tok::eod)) return false;
1620 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1621 return true;
1622 }
1623
1624 // We must have 3 if there are still flags.
1625 if (FlagVal != 3) {
1626 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1627 PP.DiscardUntilEndOfDirective();
1628 return true;
1629 }
1630
1631 FileKind = SrcMgr::C_System;
1632
1633 PP.Lex(Result&: FlagTok);
1634 if (FlagTok.is(K: tok::eod)) return false;
1635 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1636 return true;
1637
1638 // We must have 4 if there is yet another flag.
1639 if (FlagVal != 4) {
1640 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1641 PP.DiscardUntilEndOfDirective();
1642 return true;
1643 }
1644
1645 FileKind = SrcMgr::C_ExternCSystem;
1646
1647 PP.Lex(Result&: FlagTok);
1648 if (FlagTok.is(K: tok::eod)) return false;
1649
1650 // There are no more valid flags here.
1651 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1652 PP.DiscardUntilEndOfDirective();
1653 return true;
1654}
1655
1656/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1657/// one of the following forms:
1658///
1659/// # 42
1660/// # 42 "file" ('1' | '2')?
1661/// # 42 "file" ('1' | '2')? '3' '4'?
1662///
1663void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1664 // Validate the number and convert it to an unsigned. GNU does not have a
1665 // line # limit other than it fit in 32-bits.
1666 unsigned LineNo;
1667 if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1668 *this, true))
1669 return;
1670
1671 Token StrTok;
1672 Lex(Result&: StrTok);
1673
1674 bool IsFileEntry = false, IsFileExit = false;
1675 int FilenameID = -1;
1676 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1677
1678 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1679 // string followed by eod.
1680 if (StrTok.is(K: tok::eod)) {
1681 Diag(StrTok, diag::ext_pp_gnu_line_directive);
1682 // Treat this like "#line NN", which doesn't change file characteristics.
1683 FileKind = SourceMgr.getFileCharacteristic(Loc: DigitTok.getLocation());
1684 } else if (StrTok.isNot(K: tok::string_literal)) {
1685 Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1686 DiscardUntilEndOfDirective();
1687 return;
1688 } else if (StrTok.hasUDSuffix()) {
1689 Diag(StrTok, diag::err_invalid_string_udl);
1690 DiscardUntilEndOfDirective();
1691 return;
1692 } else {
1693 // Parse and validate the string, converting it into a unique ID.
1694 StringLiteralParser Literal(StrTok, *this);
1695 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1696 if (Literal.hadError) {
1697 DiscardUntilEndOfDirective();
1698 return;
1699 }
1700 if (Literal.Pascal) {
1701 Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1702 DiscardUntilEndOfDirective();
1703 return;
1704 }
1705
1706 // If a filename was present, read any flags that are present.
1707 if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, PP&: *this))
1708 return;
1709 if (!SourceMgr.isInPredefinedFile(Loc: DigitTok.getLocation()))
1710 Diag(StrTok, diag::ext_pp_gnu_line_directive);
1711
1712 // Exiting to an empty string means pop to the including file, so leave
1713 // FilenameID as -1 in that case.
1714 if (!(IsFileExit && Literal.GetString().empty()))
1715 FilenameID = SourceMgr.getLineTableFilenameID(Str: Literal.GetString());
1716 }
1717
1718 // Create a line note with this information.
1719 SourceMgr.AddLineNote(Loc: DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1720 IsFileExit, FileKind);
1721
1722 // If the preprocessor has callbacks installed, notify them of the #line
1723 // change. This is used so that the line marker comes out in -E mode for
1724 // example.
1725 if (Callbacks) {
1726 PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1727 if (IsFileEntry)
1728 Reason = PPCallbacks::EnterFile;
1729 else if (IsFileExit)
1730 Reason = PPCallbacks::ExitFile;
1731
1732 Callbacks->FileChanged(Loc: CurPPLexer->getSourceLocation(), Reason, FileType: FileKind);
1733 }
1734}
1735
1736/// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1737///
1738void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1739 bool isWarning) {
1740 // Read the rest of the line raw. We do this because we don't want macros
1741 // to be expanded and we don't require that the tokens be valid preprocessing
1742 // tokens. For example, this is allowed: "#warning ` 'foo". GCC does
1743 // collapse multiple consecutive white space between tokens, but this isn't
1744 // specified by the standard.
1745 SmallString<128> Message;
1746 CurLexer->ReadToEndOfLine(Result: &Message);
1747
1748 // Find the first non-whitespace character, so that we can make the
1749 // diagnostic more succinct.
1750 StringRef Msg = Message.str().ltrim(Char: ' ');
1751
1752 if (isWarning)
1753 Diag(Tok, diag::pp_hash_warning) << Msg;
1754 else
1755 Diag(Tok, diag::err_pp_hash_error) << Msg;
1756}
1757
1758/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1759///
1760void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1761 // Yes, this directive is an extension.
1762 Diag(Tok, diag::ext_pp_ident_directive);
1763
1764 // Read the string argument.
1765 Token StrTok;
1766 Lex(Result&: StrTok);
1767
1768 // If the token kind isn't a string, it's a malformed directive.
1769 if (StrTok.isNot(K: tok::string_literal) &&
1770 StrTok.isNot(K: tok::wide_string_literal)) {
1771 Diag(StrTok, diag::err_pp_malformed_ident);
1772 if (StrTok.isNot(K: tok::eod))
1773 DiscardUntilEndOfDirective();
1774 return;
1775 }
1776
1777 if (StrTok.hasUDSuffix()) {
1778 Diag(StrTok, diag::err_invalid_string_udl);
1779 DiscardUntilEndOfDirective();
1780 return;
1781 }
1782
1783 // Verify that there is nothing after the string, other than EOD.
1784 CheckEndOfDirective(DirType: "ident");
1785
1786 if (Callbacks) {
1787 bool Invalid = false;
1788 std::string Str = getSpelling(Tok: StrTok, Invalid: &Invalid);
1789 if (!Invalid)
1790 Callbacks->Ident(Loc: Tok.getLocation(), str: Str);
1791 }
1792}
1793
1794/// Handle a #public directive.
1795void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1796 Token MacroNameTok;
1797 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
1798
1799 // Error reading macro name? If so, diagnostic already issued.
1800 if (MacroNameTok.is(K: tok::eod))
1801 return;
1802
1803 // Check to see if this is the last token on the #__public_macro line.
1804 CheckEndOfDirective(DirType: "__public_macro");
1805
1806 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1807 // Okay, we finally have a valid identifier to undef.
1808 MacroDirective *MD = getLocalMacroDirective(II);
1809
1810 // If the macro is not defined, this is an error.
1811 if (!MD) {
1812 Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1813 return;
1814 }
1815
1816 // Note that this macro has now been exported.
1817 appendMacroDirective(II, MD: AllocateVisibilityMacroDirective(
1818 Loc: MacroNameTok.getLocation(), /*isPublic=*/true));
1819}
1820
1821/// Handle a #private directive.
1822void Preprocessor::HandleMacroPrivateDirective() {
1823 Token MacroNameTok;
1824 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
1825
1826 // Error reading macro name? If so, diagnostic already issued.
1827 if (MacroNameTok.is(K: tok::eod))
1828 return;
1829
1830 // Check to see if this is the last token on the #__private_macro line.
1831 CheckEndOfDirective(DirType: "__private_macro");
1832
1833 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1834 // Okay, we finally have a valid identifier to undef.
1835 MacroDirective *MD = getLocalMacroDirective(II);
1836
1837 // If the macro is not defined, this is an error.
1838 if (!MD) {
1839 Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1840 return;
1841 }
1842
1843 // Note that this macro has now been marked private.
1844 appendMacroDirective(II, MD: AllocateVisibilityMacroDirective(
1845 Loc: MacroNameTok.getLocation(), /*isPublic=*/false));
1846}
1847
1848//===----------------------------------------------------------------------===//
1849// Preprocessor Include Directive Handling.
1850//===----------------------------------------------------------------------===//
1851
1852/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1853/// checked and spelled filename, e.g. as an operand of \#include. This returns
1854/// true if the input filename was in <>'s or false if it were in ""'s. The
1855/// caller is expected to provide a buffer that is large enough to hold the
1856/// spelling of the filename, but is also expected to handle the case when
1857/// this method decides to use a different buffer.
1858bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1859 StringRef &Buffer) {
1860 // Get the text form of the filename.
1861 assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1862
1863 // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1864 // C++20 [lex.header]/2:
1865 //
1866 // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1867 // in C: behavior is undefined
1868 // in C++: program is conditionally-supported with implementation-defined
1869 // semantics
1870
1871 // Make sure the filename is <x> or "x".
1872 bool isAngled;
1873 if (Buffer[0] == '<') {
1874 if (Buffer.back() != '>') {
1875 Diag(Loc, diag::err_pp_expects_filename);
1876 Buffer = StringRef();
1877 return true;
1878 }
1879 isAngled = true;
1880 } else if (Buffer[0] == '"') {
1881 if (Buffer.back() != '"') {
1882 Diag(Loc, diag::err_pp_expects_filename);
1883 Buffer = StringRef();
1884 return true;
1885 }
1886 isAngled = false;
1887 } else {
1888 Diag(Loc, diag::err_pp_expects_filename);
1889 Buffer = StringRef();
1890 return true;
1891 }
1892
1893 // Diagnose #include "" as invalid.
1894 if (Buffer.size() <= 2) {
1895 Diag(Loc, diag::err_pp_empty_filename);
1896 Buffer = StringRef();
1897 return true;
1898 }
1899
1900 // Skip the brackets.
1901 Buffer = Buffer.substr(Start: 1, N: Buffer.size()-2);
1902 return isAngled;
1903}
1904
1905/// Push a token onto the token stream containing an annotation.
1906void Preprocessor::EnterAnnotationToken(SourceRange Range,
1907 tok::TokenKind Kind,
1908 void *AnnotationVal) {
1909 // FIXME: Produce this as the current token directly, rather than
1910 // allocating a new token for it.
1911 auto Tok = std::make_unique<Token[]>(num: 1);
1912 Tok[0].startToken();
1913 Tok[0].setKind(Kind);
1914 Tok[0].setLocation(Range.getBegin());
1915 Tok[0].setAnnotationEndLoc(Range.getEnd());
1916 Tok[0].setAnnotationValue(AnnotationVal);
1917 EnterTokenStream(Toks: std::move(Tok), NumToks: 1, DisableMacroExpansion: true, /*IsReinject*/ false);
1918}
1919
1920/// Produce a diagnostic informing the user that a #include or similar
1921/// was implicitly treated as a module import.
1922static void diagnoseAutoModuleImport(Preprocessor &PP, SourceLocation HashLoc,
1923 Token &IncludeTok,
1924 ArrayRef<IdentifierLoc> Path,
1925 SourceLocation PathEnd) {
1926 SmallString<128> PathString;
1927 for (size_t I = 0, N = Path.size(); I != N; ++I) {
1928 if (I)
1929 PathString += '.';
1930 PathString += Path[I].getIdentifierInfo()->getName();
1931 }
1932
1933 int IncludeKind = 0;
1934 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1935 case tok::pp_include:
1936 IncludeKind = 0;
1937 break;
1938
1939 case tok::pp_import:
1940 IncludeKind = 1;
1941 break;
1942
1943 case tok::pp_include_next:
1944 IncludeKind = 2;
1945 break;
1946
1947 case tok::pp___include_macros:
1948 IncludeKind = 3;
1949 break;
1950
1951 default:
1952 llvm_unreachable("unknown include directive kind");
1953 }
1954
1955 PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1956 << IncludeKind << PathString;
1957}
1958
1959// Given a vector of path components and a string containing the real
1960// path to the file, build a properly-cased replacement in the vector,
1961// and return true if the replacement should be suggested.
1962static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1963 StringRef RealPathName,
1964 llvm::sys::path::Style Separator) {
1965 auto RealPathComponentIter = llvm::sys::path::rbegin(path: RealPathName);
1966 auto RealPathComponentEnd = llvm::sys::path::rend(path: RealPathName);
1967 int Cnt = 0;
1968 bool SuggestReplacement = false;
1969
1970 auto IsSep = [Separator](StringRef Component) {
1971 return Component.size() == 1 &&
1972 llvm::sys::path::is_separator(value: Component[0], style: Separator);
1973 };
1974
1975 // Below is a best-effort to handle ".." in paths. It is admittedly
1976 // not 100% correct in the presence of symlinks.
1977 for (auto &Component : llvm::reverse(C&: Components)) {
1978 if ("." == Component) {
1979 } else if (".." == Component) {
1980 ++Cnt;
1981 } else if (Cnt) {
1982 --Cnt;
1983 } else if (RealPathComponentIter != RealPathComponentEnd) {
1984 if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1985 Component != *RealPathComponentIter) {
1986 // If these non-separator path components differ by more than just case,
1987 // then we may be looking at symlinked paths. Bail on this diagnostic to
1988 // avoid noisy false positives.
1989 SuggestReplacement =
1990 RealPathComponentIter->equals_insensitive(RHS: Component);
1991 if (!SuggestReplacement)
1992 break;
1993 Component = *RealPathComponentIter;
1994 }
1995 ++RealPathComponentIter;
1996 }
1997 }
1998 return SuggestReplacement;
1999}
2000
2001bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
2002 const TargetInfo &TargetInfo,
2003 const Module &M,
2004 DiagnosticsEngine &Diags) {
2005 Module::Requirement Requirement;
2006 Module::UnresolvedHeaderDirective MissingHeader;
2007 Module *ShadowingModule = nullptr;
2008 if (M.isAvailable(LangOpts, Target: TargetInfo, Req&: Requirement, MissingHeader,
2009 ShadowingModule))
2010 return false;
2011
2012 if (MissingHeader.FileNameLoc.isValid()) {
2013 Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
2014 << MissingHeader.IsUmbrella << MissingHeader.FileName;
2015 } else if (ShadowingModule) {
2016 Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;
2017 Diags.Report(ShadowingModule->DefinitionLoc,
2018 diag::note_previous_definition);
2019 } else {
2020 // FIXME: Track the location at which the requirement was specified, and
2021 // use it here.
2022 Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)
2023 << M.getFullModuleName() << Requirement.RequiredState
2024 << Requirement.FeatureName;
2025 }
2026 return true;
2027}
2028
2029std::pair<ConstSearchDirIterator, const FileEntry *>
2030Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2031 // #include_next is like #include, except that we start searching after
2032 // the current found directory. If we can't do this, issue a
2033 // diagnostic.
2034 ConstSearchDirIterator Lookup = CurDirLookup;
2035 const FileEntry *LookupFromFile = nullptr;
2036
2037 if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2038 // If the main file is a header, then it's either for PCH/AST generation,
2039 // or libclang opened it. Either way, handle it as a normal include below
2040 // and do not complain about include_next.
2041 } else if (isInPrimaryFile()) {
2042 Lookup = nullptr;
2043 Diag(IncludeNextTok, diag::pp_include_next_in_primary);
2044 } else if (CurLexerSubmodule) {
2045 // Start looking up in the directory *after* the one in which the current
2046 // file would be found, if any.
2047 assert(CurPPLexer && "#include_next directive in macro?");
2048 if (auto FE = CurPPLexer->getFileEntry())
2049 LookupFromFile = *FE;
2050 Lookup = nullptr;
2051 } else if (!Lookup) {
2052 // The current file was not found by walking the include path. Either it
2053 // is the primary file (handled above), or it was found by absolute path,
2054 // or it was found relative to such a file.
2055 // FIXME: Track enough information so we know which case we're in.
2056 Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
2057 } else {
2058 // Start looking up in the next directory.
2059 ++Lookup;
2060 }
2061
2062 return {Lookup, LookupFromFile};
2063}
2064
2065/// HandleIncludeDirective - The "\#include" tokens have just been read, read
2066/// the file to be included from the lexer, then include it! This is a common
2067/// routine with functionality shared between \#include, \#include_next and
2068/// \#import. LookupFrom is set when this is a \#include_next directive, it
2069/// specifies the file to start searching from.
2070void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2071 Token &IncludeTok,
2072 ConstSearchDirIterator LookupFrom,
2073 const FileEntry *LookupFromFile) {
2074 Token FilenameTok;
2075 if (LexHeaderName(Result&: FilenameTok))
2076 return;
2077
2078 if (FilenameTok.isNot(K: tok::header_name)) {
2079 if (FilenameTok.is(K: tok::identifier) && PPOpts.SingleFileParseMode) {
2080 // If we saw #include IDENTIFIER and lexing didn't turn in into a header
2081 // name, it was undefined. In 'single-file-parse' mode, just skip the
2082 // directive without emitting diagnostics - the identifier might be
2083 // normally defined in previously-skipped include directive.
2084 DiscardUntilEndOfDirective();
2085 return;
2086 }
2087
2088 Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
2089 if (FilenameTok.isNot(K: tok::eod))
2090 DiscardUntilEndOfDirective();
2091 return;
2092 }
2093
2094 // Verify that there is nothing after the filename, other than EOD. Note
2095 // that we allow macros that expand to nothing after the filename, because
2096 // this falls into the category of "#include pp-tokens new-line" specified
2097 // in C99 6.10.2p4.
2098 SourceLocation EndLoc =
2099 CheckEndOfDirective(DirType: IncludeTok.getIdentifierInfo()->getNameStart(), EnableMacros: true);
2100
2101 auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2102 EndLoc, LookupFrom, LookupFromFile);
2103 switch (Action.Kind) {
2104 case ImportAction::None:
2105 case ImportAction::SkippedModuleImport:
2106 break;
2107 case ImportAction::ModuleBegin:
2108 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc),
2109 Kind: tok::annot_module_begin, AnnotationVal: Action.ModuleForHeader);
2110 break;
2111 case ImportAction::HeaderUnitImport:
2112 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc), Kind: tok::annot_header_unit,
2113 AnnotationVal: Action.ModuleForHeader);
2114 break;
2115 case ImportAction::ModuleImport:
2116 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc),
2117 Kind: tok::annot_module_include, AnnotationVal: Action.ModuleForHeader);
2118 break;
2119 case ImportAction::Failure:
2120 assert(TheModuleLoader.HadFatalFailure &&
2121 "This should be an early exit only to a fatal error");
2122 TheModuleLoader.HadFatalFailure = true;
2123 IncludeTok.setKind(tok::eof);
2124 CurLexer->cutOffLexing();
2125 return;
2126 }
2127}
2128
2129OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2130 ConstSearchDirIterator *CurDir, StringRef &Filename,
2131 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2132 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2133 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2134 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2135 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2136 ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2137 auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2138 if (LangOpts.AsmPreprocessor)
2139 return;
2140
2141 Module *RequestingModule = getModuleForLocation(
2142 Loc: FilenameLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
2143 bool RequestingModuleIsModuleInterface =
2144 !SourceMgr.isInMainFile(Loc: FilenameLoc);
2145
2146 HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2147 RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2148 Filename, File: FE);
2149 };
2150
2151 OptionalFileEntryRef File = LookupFile(
2152 FilenameLoc, Filename: LookupFilename, isAngled, FromDir: LookupFrom, FromFile: LookupFromFile, CurDirArg: CurDir,
2153 SearchPath: Callbacks ? &SearchPath : nullptr, RelativePath: Callbacks ? &RelativePath : nullptr,
2154 SuggestedModule: &SuggestedModule, IsMapped: &IsMapped, IsFrameworkFound: &IsFrameworkFound);
2155 if (File) {
2156 DiagnoseHeaderInclusion(*File);
2157 return File;
2158 }
2159
2160 // Give the clients a chance to silently skip this include.
2161 if (Callbacks && Callbacks->FileNotFound(FileName: Filename))
2162 return std::nullopt;
2163
2164 if (SuppressIncludeNotFoundError)
2165 return std::nullopt;
2166
2167 // If the file could not be located and it was included via angle
2168 // brackets, we can attempt a lookup as though it were a quoted path to
2169 // provide the user with a possible fixit.
2170 if (isAngled) {
2171 OptionalFileEntryRef File = LookupFile(
2172 FilenameLoc, Filename: LookupFilename, isAngled: false, FromDir: LookupFrom, FromFile: LookupFromFile, CurDirArg: CurDir,
2173 SearchPath: Callbacks ? &SearchPath : nullptr, RelativePath: Callbacks ? &RelativePath : nullptr,
2174 SuggestedModule: &SuggestedModule, IsMapped: &IsMapped,
2175 /*IsFrameworkFound=*/nullptr);
2176 if (File) {
2177 DiagnoseHeaderInclusion(*File);
2178 Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2179 << Filename << IsImportDecl
2180 << FixItHint::CreateReplacement(FilenameRange,
2181 "\"" + Filename.str() + "\"");
2182 return File;
2183 }
2184 }
2185
2186 // Check for likely typos due to leading or trailing non-isAlphanumeric
2187 // characters
2188 StringRef OriginalFilename = Filename;
2189 if (LangOpts.SpellChecking) {
2190 // A heuristic to correct a typo file name by removing leading and
2191 // trailing non-isAlphanumeric characters.
2192 auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2193 Filename = Filename.drop_until(F: isAlphanumeric);
2194 while (!Filename.empty() && !isAlphanumeric(c: Filename.back())) {
2195 Filename = Filename.drop_back();
2196 }
2197 return Filename;
2198 };
2199 StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2200 StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2201
2202 OptionalFileEntryRef File = LookupFile(
2203 FilenameLoc, Filename: TypoCorrectionLookupName, isAngled, FromDir: LookupFrom,
2204 FromFile: LookupFromFile, CurDirArg: CurDir, SearchPath: Callbacks ? &SearchPath : nullptr,
2205 RelativePath: Callbacks ? &RelativePath : nullptr, SuggestedModule: &SuggestedModule, IsMapped: &IsMapped,
2206 /*IsFrameworkFound=*/nullptr);
2207 if (File) {
2208 DiagnoseHeaderInclusion(*File);
2209 auto Hint =
2210 isAngled ? FixItHint::CreateReplacement(
2211 RemoveRange: FilenameRange, Code: "<" + TypoCorrectionName.str() + ">")
2212 : FixItHint::CreateReplacement(
2213 RemoveRange: FilenameRange, Code: "\"" + TypoCorrectionName.str() + "\"");
2214 Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2215 << OriginalFilename << TypoCorrectionName << Hint;
2216 // We found the file, so set the Filename to the name after typo
2217 // correction.
2218 Filename = TypoCorrectionName;
2219 LookupFilename = TypoCorrectionLookupName;
2220 return File;
2221 }
2222 }
2223
2224 // If the file is still not found, just go with the vanilla diagnostic
2225 assert(!File && "expected missing file");
2226 Diag(FilenameTok, diag::err_pp_file_not_found)
2227 << OriginalFilename << FilenameRange;
2228 if (IsFrameworkFound) {
2229 size_t SlashPos = OriginalFilename.find(C: '/');
2230 assert(SlashPos != StringRef::npos &&
2231 "Include with framework name should have '/' in the filename");
2232 StringRef FrameworkName = OriginalFilename.substr(Start: 0, N: SlashPos);
2233 FrameworkCacheEntry &CacheEntry =
2234 HeaderInfo.LookupFrameworkCache(FWName: FrameworkName);
2235 assert(CacheEntry.Directory && "Found framework should be in cache");
2236 Diag(FilenameTok, diag::note_pp_framework_without_header)
2237 << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2238 << CacheEntry.Directory->getName();
2239 }
2240
2241 return std::nullopt;
2242}
2243
2244/// Handle either a #include-like directive or an import declaration that names
2245/// a header file.
2246///
2247/// \param HashLoc The location of the '#' token for an include, or
2248/// SourceLocation() for an import declaration.
2249/// \param IncludeTok The include / include_next / import token.
2250/// \param FilenameTok The header-name token.
2251/// \param EndLoc The location at which any imported macros become visible.
2252/// \param LookupFrom For #include_next, the starting directory for the
2253/// directory lookup.
2254/// \param LookupFromFile For #include_next, the starting file for the directory
2255/// lookup.
2256Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2257 SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2258 SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2259 const FileEntry *LookupFromFile) {
2260 SmallString<128> FilenameBuffer;
2261 StringRef Filename = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
2262 SourceLocation CharEnd = FilenameTok.getEndLoc();
2263
2264 CharSourceRange FilenameRange
2265 = CharSourceRange::getCharRange(B: FilenameTok.getLocation(), E: CharEnd);
2266 StringRef OriginalFilename = Filename;
2267 bool isAngled =
2268 GetIncludeFilenameSpelling(Loc: FilenameTok.getLocation(), Buffer&: Filename);
2269
2270 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2271 // error.
2272 if (Filename.empty())
2273 return {ImportAction::None};
2274
2275 bool IsImportDecl = HashLoc.isInvalid();
2276 SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2277
2278 // Complain about attempts to #include files in an audit pragma.
2279 if (PragmaARCCFCodeAuditedInfo.getLoc().isValid()) {
2280 Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2281 Diag(PragmaARCCFCodeAuditedInfo.getLoc(), diag::note_pragma_entered_here);
2282
2283 // Immediately leave the pragma.
2284 PragmaARCCFCodeAuditedInfo = IdentifierLoc();
2285 }
2286
2287 // Complain about attempts to #include files in an assume-nonnull pragma.
2288 if (PragmaAssumeNonNullLoc.isValid()) {
2289 Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2290 Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2291
2292 // Immediately leave the pragma.
2293 PragmaAssumeNonNullLoc = SourceLocation();
2294 }
2295
2296 if (HeaderInfo.HasIncludeAliasMap()) {
2297 // Map the filename with the brackets still attached. If the name doesn't
2298 // map to anything, fall back on the filename we've already gotten the
2299 // spelling for.
2300 StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(Source: OriginalFilename);
2301 if (!NewName.empty())
2302 Filename = NewName;
2303 }
2304
2305 // Search include directories.
2306 bool IsMapped = false;
2307 bool IsFrameworkFound = false;
2308 ConstSearchDirIterator CurDir = nullptr;
2309 SmallString<1024> SearchPath;
2310 SmallString<1024> RelativePath;
2311 // We get the raw path only if we have 'Callbacks' to which we later pass
2312 // the path.
2313 ModuleMap::KnownHeader SuggestedModule;
2314 SourceLocation FilenameLoc = FilenameTok.getLocation();
2315 StringRef LookupFilename = Filename;
2316
2317 // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2318 // is unnecessary on Windows since the filesystem there handles backslashes.
2319 SmallString<128> NormalizedPath;
2320 llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2321 if (is_style_posix(S: BackslashStyle) && LangOpts.MicrosoftExt) {
2322 NormalizedPath = Filename.str();
2323 llvm::sys::path::native(path&: NormalizedPath);
2324 LookupFilename = NormalizedPath;
2325 BackslashStyle = llvm::sys::path::Style::windows;
2326 }
2327
2328 OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2329 CurDir: &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2330 IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2331 LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2332
2333 if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2334 if (File && isPCHThroughHeader(FE: &File->getFileEntry()))
2335 SkippingUntilPCHThroughHeader = false;
2336 return {ImportAction::None};
2337 }
2338
2339 // Should we enter the source file? Set to Skip if either the source file is
2340 // known to have no effect beyond its effect on module visibility -- that is,
2341 // if it's got an include guard that is already defined, set to Import if it
2342 // is a modular header we've already built and should import.
2343
2344 // For C++20 Modules
2345 // [cpp.include]/7 If the header identified by the header-name denotes an
2346 // importable header, it is implementation-defined whether the #include
2347 // preprocessing directive is instead replaced by an import directive.
2348 // For this implementation, the translation is permitted when we are parsing
2349 // the Global Module Fragment, and not otherwise (the cases where it would be
2350 // valid to replace an include with an import are highly constrained once in
2351 // named module purview; this choice avoids considerable complexity in
2352 // determining valid cases).
2353
2354 enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2355
2356 if (PPOpts.SingleFileParseMode)
2357 Action = IncludeLimitReached;
2358
2359 // If we've reached the max allowed include depth, it is usually due to an
2360 // include cycle. Don't enter already processed files again as it can lead to
2361 // reaching the max allowed include depth again.
2362 if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2363 alreadyIncluded(File: *File))
2364 Action = IncludeLimitReached;
2365
2366 // FIXME: We do not have a good way to disambiguate C++ clang modules from
2367 // C++ standard modules (other than use/non-use of Header Units).
2368
2369 Module *ModuleToImport = SuggestedModule.getModule();
2370
2371 bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2372 !ModuleToImport->isForBuilding(LangOpts: getLangOpts());
2373
2374 // Maybe a usable Header Unit
2375 bool UsableHeaderUnit = false;
2376 if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2377 ModuleToImport->isHeaderUnit()) {
2378 if (TrackGMFState.inGMF() || IsImportDecl)
2379 UsableHeaderUnit = true;
2380 else if (!IsImportDecl) {
2381 // This is a Header Unit that we do not include-translate
2382 ModuleToImport = nullptr;
2383 }
2384 }
2385 // Maybe a usable clang header module.
2386 bool UsableClangHeaderModule =
2387 (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2388 ModuleToImport && !ModuleToImport->isHeaderUnit();
2389
2390 // Determine whether we should try to import the module for this #include, if
2391 // there is one. Don't do so if precompiled module support is disabled or we
2392 // are processing this module textually (because we're building the module).
2393 if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2394 // If this include corresponds to a module but that module is
2395 // unavailable, diagnose the situation and bail out.
2396 // FIXME: Remove this; loadModule does the same check (but produces
2397 // slightly worse diagnostics).
2398 if (checkModuleIsAvailable(LangOpts: getLangOpts(), TargetInfo: getTargetInfo(), M: *ModuleToImport,
2399 Diags&: getDiagnostics())) {
2400 Diag(FilenameTok.getLocation(),
2401 diag::note_implicit_top_level_module_import_here)
2402 << ModuleToImport->getTopLevelModuleName();
2403 return {ImportAction::None};
2404 }
2405
2406 // Compute the module access path corresponding to this module.
2407 // FIXME: Should we have a second loadModule() overload to avoid this
2408 // extra lookup step?
2409 SmallVector<IdentifierLoc, 2> Path;
2410 for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2411 Path.emplace_back(Args: FilenameTok.getLocation(),
2412 Args: getIdentifierInfo(Name: Mod->Name));
2413 std::reverse(first: Path.begin(), last: Path.end());
2414
2415 // Warn that we're replacing the include/import with a module import.
2416 if (!IsImportDecl)
2417 diagnoseAutoModuleImport(PP&: *this, HashLoc: StartLoc, IncludeTok, Path, PathEnd: CharEnd);
2418
2419 // Load the module to import its macros. We'll make the declarations
2420 // visible when the parser gets here.
2421 // FIXME: Pass ModuleToImport in here rather than converting it to a path
2422 // and making the module loader convert it back again.
2423 ModuleLoadResult Imported = TheModuleLoader.loadModule(
2424 ImportLoc: IncludeTok.getLocation(), Path, Visibility: Module::Hidden,
2425 /*IsInclusionDirective=*/true);
2426 assert((Imported == nullptr || Imported == ModuleToImport) &&
2427 "the imported module is different than the suggested one");
2428
2429 if (Imported) {
2430 Action = Import;
2431 } else if (Imported.isMissingExpected()) {
2432 markClangModuleAsAffecting(
2433 M: static_cast<Module *>(Imported)->getTopLevelModule());
2434 // We failed to find a submodule that we assumed would exist (because it
2435 // was in the directory of an umbrella header, for instance), but no
2436 // actual module containing it exists (because the umbrella header is
2437 // incomplete). Treat this as a textual inclusion.
2438 ModuleToImport = nullptr;
2439 } else if (Imported.isConfigMismatch()) {
2440 // On a configuration mismatch, enter the header textually. We still know
2441 // that it's part of the corresponding module.
2442 } else {
2443 // We hit an error processing the import. Bail out.
2444 if (hadModuleLoaderFatalFailure()) {
2445 // With a fatal failure in the module loader, we abort parsing.
2446 Token &Result = IncludeTok;
2447 assert(CurLexer && "#include but no current lexer set!");
2448 Result.startToken();
2449 CurLexer->FormTokenWithChars(Result, TokEnd: CurLexer->BufferEnd, Kind: tok::eof);
2450 CurLexer->cutOffLexing();
2451 }
2452 return {ImportAction::None};
2453 }
2454 }
2455
2456 // The #included file will be considered to be a system header if either it is
2457 // in a system include directory, or if the #includer is a system include
2458 // header.
2459 SrcMgr::CharacteristicKind FileCharacter =
2460 SourceMgr.getFileCharacteristic(Loc: FilenameTok.getLocation());
2461 if (File)
2462 FileCharacter = std::max(a: HeaderInfo.getFileDirFlavor(File: *File), b: FileCharacter);
2463
2464 // If this is a '#import' or an import-declaration, don't re-enter the file.
2465 //
2466 // FIXME: If we have a suggested module for a '#include', and we've already
2467 // visited this file, don't bother entering it again. We know it has no
2468 // further effect.
2469 bool EnterOnce =
2470 IsImportDecl ||
2471 IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2472
2473 bool IsFirstIncludeOfFile = false;
2474
2475 // Ask HeaderInfo if we should enter this #include file. If not, #including
2476 // this file will have no effect.
2477 if (Action == Enter && File &&
2478 !HeaderInfo.ShouldEnterIncludeFile(PP&: *this, File: *File, isImport: EnterOnce,
2479 ModulesEnabled: getLangOpts().Modules, M: ModuleToImport,
2480 IsFirstIncludeOfFile)) {
2481 // C++ standard modules:
2482 // If we are not in the GMF, then we textually include only
2483 // clang modules:
2484 // Even if we've already preprocessed this header once and know that we
2485 // don't need to see its contents again, we still need to import it if it's
2486 // modular because we might not have imported it from this submodule before.
2487 //
2488 // FIXME: We don't do this when compiling a PCH because the AST
2489 // serialization layer can't cope with it. This means we get local
2490 // submodule visibility semantics wrong in that case.
2491 if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2492 Action = TrackGMFState.inGMF() ? Import : Skip;
2493 else
2494 Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2495 }
2496
2497 // Check for circular inclusion of the main file.
2498 // We can't generate a consistent preamble with regard to the conditional
2499 // stack if the main file is included again as due to the preamble bounds
2500 // some directives (e.g. #endif of a header guard) will never be seen.
2501 // Since this will lead to confusing errors, avoid the inclusion.
2502 if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2503 SourceMgr.isMainFile(SourceFile: File->getFileEntry())) {
2504 Diag(FilenameTok.getLocation(),
2505 diag::err_pp_including_mainfile_in_preamble);
2506 return {ImportAction::None};
2507 }
2508
2509 if (Callbacks && !IsImportDecl) {
2510 // Notify the callback object that we've seen an inclusion directive.
2511 // FIXME: Use a different callback for a pp-import?
2512 Callbacks->InclusionDirective(HashLoc, IncludeTok, FileName: LookupFilename, IsAngled: isAngled,
2513 FilenameRange, File, SearchPath, RelativePath,
2514 SuggestedModule: SuggestedModule.getModule(), ModuleImported: Action == Import,
2515 FileType: FileCharacter);
2516 if (Action == Skip && File)
2517 Callbacks->FileSkipped(SkippedFile: *File, FilenameTok, FileType: FileCharacter);
2518 }
2519
2520 if (!File)
2521 return {ImportAction::None};
2522
2523 // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2524 // module corresponding to the named header.
2525 if (IsImportDecl && !ModuleToImport) {
2526 Diag(FilenameTok, diag::err_header_import_not_header_unit)
2527 << OriginalFilename << File->getName();
2528 return {ImportAction::None};
2529 }
2530
2531 // Issue a diagnostic if the name of the file on disk has a different case
2532 // than the one we're about to open.
2533 const bool CheckIncludePathPortability =
2534 !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2535
2536 if (CheckIncludePathPortability) {
2537 StringRef Name = LookupFilename;
2538 StringRef NameWithoriginalSlashes = Filename;
2539#if defined(_WIN32)
2540 // Skip UNC prefix if present. (tryGetRealPathName() always
2541 // returns a path with the prefix skipped.)
2542 bool NameWasUNC = Name.consume_front("\\\\?\\");
2543 NameWithoriginalSlashes.consume_front("\\\\?\\");
2544#endif
2545 StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2546 SmallVector<StringRef, 16> Components(llvm::sys::path::begin(path: Name),
2547 llvm::sys::path::end(path: Name));
2548#if defined(_WIN32)
2549 // -Wnonportable-include-path is designed to diagnose includes using
2550 // case even on systems with a case-insensitive file system.
2551 // On Windows, RealPathName always starts with an upper-case drive
2552 // letter for absolute paths, but Name might start with either
2553 // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2554 // ("foo" will always have on-disk case, no matter which case was
2555 // used in the cd command). To not emit this warning solely for
2556 // the drive letter, whose case is dependent on if `cd` is used
2557 // with upper- or lower-case drive letters, always consider the
2558 // given drive letter case as correct for the purpose of this warning.
2559 SmallString<128> FixedDriveRealPath;
2560 if (llvm::sys::path::is_absolute(Name) &&
2561 llvm::sys::path::is_absolute(RealPathName) &&
2562 toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2563 isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2564 assert(Components.size() >= 3 && "should have drive, backslash, name");
2565 assert(Components[0].size() == 2 && "should start with drive");
2566 assert(Components[0][1] == ':' && "should have colon");
2567 FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2568 RealPathName = FixedDriveRealPath;
2569 }
2570#endif
2571
2572 if (trySimplifyPath(Components, RealPathName, Separator: BackslashStyle)) {
2573 SmallString<128> Path;
2574 Path.reserve(N: Name.size()+2);
2575 Path.push_back(Elt: isAngled ? '<' : '"');
2576
2577 const auto IsSep = [BackslashStyle](char c) {
2578 return llvm::sys::path::is_separator(value: c, style: BackslashStyle);
2579 };
2580
2581 for (auto Component : Components) {
2582 // On POSIX, Components will contain a single '/' as first element
2583 // exactly if Name is an absolute path.
2584 // On Windows, it will contain "C:" followed by '\' for absolute paths.
2585 // The drive letter is optional for absolute paths on Windows, but
2586 // clang currently cannot process absolute paths in #include lines that
2587 // don't have a drive.
2588 // If the first entry in Components is a directory separator,
2589 // then the code at the bottom of this loop that keeps the original
2590 // directory separator style copies it. If the second entry is
2591 // a directory separator (the C:\ case), then that separator already
2592 // got copied when the C: was processed and we want to skip that entry.
2593 if (!(Component.size() == 1 && IsSep(Component[0])))
2594 Path.append(RHS: Component);
2595 else if (Path.size() != 1)
2596 continue;
2597
2598 // Append the separator(s) the user used, or the close quote
2599 if (Path.size() > NameWithoriginalSlashes.size()) {
2600 Path.push_back(Elt: isAngled ? '>' : '"');
2601 continue;
2602 }
2603 assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2604 do
2605 Path.push_back(Elt: NameWithoriginalSlashes[Path.size()-1]);
2606 while (Path.size() <= NameWithoriginalSlashes.size() &&
2607 IsSep(NameWithoriginalSlashes[Path.size()-1]));
2608 }
2609
2610#if defined(_WIN32)
2611 // Restore UNC prefix if it was there.
2612 if (NameWasUNC)
2613 Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2614#endif
2615
2616 // For user files and known standard headers, issue a diagnostic.
2617 // For other system headers, don't. They can be controlled separately.
2618 auto DiagId =
2619 (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2620 ? diag::pp_nonportable_path
2621 : diag::pp_nonportable_system_path;
2622 Diag(FilenameTok, DiagId) << Path <<
2623 FixItHint::CreateReplacement(RemoveRange: FilenameRange, Code: Path);
2624 }
2625 }
2626
2627 switch (Action) {
2628 case Skip:
2629 // If we don't need to enter the file, stop now.
2630 if (ModuleToImport)
2631 return {ImportAction::SkippedModuleImport, ModuleToImport};
2632 return {ImportAction::None};
2633
2634 case IncludeLimitReached:
2635 // If we reached our include limit and don't want to enter any more files,
2636 // don't go any further.
2637 return {ImportAction::None};
2638
2639 case Import: {
2640 // If this is a module import, make it visible if needed.
2641 assert(ModuleToImport && "no module to import");
2642
2643 makeModuleVisible(M: ModuleToImport, Loc: EndLoc);
2644
2645 if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2646 tok::pp___include_macros)
2647 return {ImportAction::None};
2648
2649 return {ImportAction::ModuleImport, ModuleToImport};
2650 }
2651
2652 case Enter:
2653 break;
2654 }
2655
2656 // Check that we don't have infinite #include recursion.
2657 if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2658 Diag(FilenameTok, diag::err_pp_include_too_deep);
2659 HasReachedMaxIncludeDepth = true;
2660 return {ImportAction::None};
2661 }
2662
2663 if (isAngled && isInNamedModule())
2664 Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)
2665 << getNamedModuleName();
2666
2667 // Look up the file, create a File ID for it.
2668 SourceLocation IncludePos = FilenameTok.getLocation();
2669 // If the filename string was the result of macro expansions, set the include
2670 // position on the file where it will be included and after the expansions.
2671 if (IncludePos.isMacroID())
2672 IncludePos = SourceMgr.getExpansionRange(Loc: IncludePos).getEnd();
2673 FileID FID = SourceMgr.createFileID(SourceFile: *File, IncludePos, FileCharacter);
2674 if (!FID.isValid()) {
2675 TheModuleLoader.HadFatalFailure = true;
2676 return ImportAction::Failure;
2677 }
2678
2679 // If all is good, enter the new file!
2680 if (EnterSourceFile(FID, Dir: CurDir, Loc: FilenameTok.getLocation(),
2681 IsFirstIncludeOfFile))
2682 return {ImportAction::None};
2683
2684 // Determine if we're switching to building a new submodule, and which one.
2685 // This does not apply for C++20 modules header units.
2686 if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2687 if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2688 // We are building a submodule that belongs to a shadowed module. This
2689 // means we find header files in the shadowed module.
2690 Diag(ModuleToImport->DefinitionLoc,
2691 diag::err_module_build_shadowed_submodule)
2692 << ModuleToImport->getFullModuleName();
2693 Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2694 diag::note_previous_definition);
2695 return {ImportAction::None};
2696 }
2697 // When building a pch, -fmodule-name tells the compiler to textually
2698 // include headers in the specified module. We are not building the
2699 // specified module.
2700 //
2701 // FIXME: This is the wrong way to handle this. We should produce a PCH
2702 // that behaves the same as the header would behave in a compilation using
2703 // that PCH, which means we should enter the submodule. We need to teach
2704 // the AST serialization layer to deal with the resulting AST.
2705 if (getLangOpts().CompilingPCH &&
2706 ModuleToImport->isForBuilding(LangOpts: getLangOpts()))
2707 return {ImportAction::None};
2708
2709 assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2710 CurLexerSubmodule = ModuleToImport;
2711
2712 // Let the macro handling code know that any future macros are within
2713 // the new submodule.
2714 EnterSubmodule(M: ModuleToImport, ImportLoc: EndLoc, /*ForPragma*/ false);
2715
2716 // Let the parser know that any future declarations are within the new
2717 // submodule.
2718 // FIXME: There's no point doing this if we're handling a #__include_macros
2719 // directive.
2720 return {ImportAction::ModuleBegin, ModuleToImport};
2721 }
2722
2723 assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2724 return {ImportAction::None};
2725}
2726
2727/// HandleIncludeNextDirective - Implements \#include_next.
2728///
2729void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2730 Token &IncludeNextTok) {
2731 Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2732
2733 ConstSearchDirIterator Lookup = nullptr;
2734 const FileEntry *LookupFromFile;
2735 std::tie(args&: Lookup, args&: LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2736
2737 return HandleIncludeDirective(HashLoc, IncludeTok&: IncludeNextTok, LookupFrom: Lookup,
2738 LookupFromFile);
2739}
2740
2741/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
2742void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2743 // The Microsoft #import directive takes a type library and generates header
2744 // files from it, and includes those. This is beyond the scope of what clang
2745 // does, so we ignore it and error out. However, #import can optionally have
2746 // trailing attributes that span multiple lines. We're going to eat those
2747 // so we can continue processing from there.
2748 Diag(Tok, diag::err_pp_import_directive_ms );
2749
2750 // Read tokens until we get to the end of the directive. Note that the
2751 // directive can be split over multiple lines using the backslash character.
2752 DiscardUntilEndOfDirective();
2753}
2754
2755/// HandleImportDirective - Implements \#import.
2756///
2757void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2758 Token &ImportTok) {
2759 if (!LangOpts.ObjC) { // #import is standard for ObjC.
2760 if (LangOpts.MSVCCompat)
2761 return HandleMicrosoftImportDirective(Tok&: ImportTok);
2762 Diag(ImportTok, diag::ext_pp_import_directive);
2763 }
2764 return HandleIncludeDirective(HashLoc, IncludeTok&: ImportTok);
2765}
2766
2767/// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2768/// pseudo directive in the predefines buffer. This handles it by sucking all
2769/// tokens through the preprocessor and discarding them (only keeping the side
2770/// effects on the preprocessor).
2771void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2772 Token &IncludeMacrosTok) {
2773 // This directive should only occur in the predefines buffer. If not, emit an
2774 // error and reject it.
2775 SourceLocation Loc = IncludeMacrosTok.getLocation();
2776 if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2777 Diag(IncludeMacrosTok.getLocation(),
2778 diag::pp_include_macros_out_of_predefines);
2779 DiscardUntilEndOfDirective();
2780 return;
2781 }
2782
2783 // Treat this as a normal #include for checking purposes. If this is
2784 // successful, it will push a new lexer onto the include stack.
2785 HandleIncludeDirective(HashLoc, IncludeTok&: IncludeMacrosTok);
2786
2787 Token TmpTok;
2788 do {
2789 Lex(Result&: TmpTok);
2790 assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2791 } while (TmpTok.isNot(K: tok::hashhash));
2792}
2793
2794//===----------------------------------------------------------------------===//
2795// Preprocessor Macro Directive Handling.
2796//===----------------------------------------------------------------------===//
2797
2798/// ReadMacroParameterList - The ( starting a parameter list of a macro
2799/// definition has just been read. Lex the rest of the parameters and the
2800/// closing ), updating MI with what we learn. Return true if an error occurs
2801/// parsing the param list.
2802bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2803 SmallVector<IdentifierInfo*, 32> Parameters;
2804
2805 while (true) {
2806 LexUnexpandedNonComment(Result&: Tok);
2807 switch (Tok.getKind()) {
2808 case tok::r_paren:
2809 // Found the end of the parameter list.
2810 if (Parameters.empty()) // #define FOO()
2811 return false;
2812 // Otherwise we have #define FOO(A,)
2813 Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2814 return true;
2815 case tok::ellipsis: // #define X(... -> C99 varargs
2816 if (!LangOpts.C99)
2817 Diag(Tok, LangOpts.CPlusPlus11 ?
2818 diag::warn_cxx98_compat_variadic_macro :
2819 diag::ext_variadic_macro);
2820
2821 // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2822 if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2823 Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2824 }
2825
2826 // Lex the token after the identifier.
2827 LexUnexpandedNonComment(Result&: Tok);
2828 if (Tok.isNot(K: tok::r_paren)) {
2829 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2830 return true;
2831 }
2832 // Add the __VA_ARGS__ identifier as a parameter.
2833 Parameters.push_back(Elt: Ident__VA_ARGS__);
2834 MI->setIsC99Varargs();
2835 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2836 return false;
2837 case tok::eod: // #define X(
2838 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2839 return true;
2840 default:
2841 // Handle keywords and identifiers here to accept things like
2842 // #define Foo(for) for.
2843 IdentifierInfo *II = Tok.getIdentifierInfo();
2844 if (!II) {
2845 // #define X(1
2846 Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2847 return true;
2848 }
2849
2850 // If this is already used as a parameter, it is used multiple times (e.g.
2851 // #define X(A,A.
2852 if (llvm::is_contained(Range&: Parameters, Element: II)) { // C99 6.10.3p6
2853 Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2854 return true;
2855 }
2856
2857 // Add the parameter to the macro info.
2858 Parameters.push_back(Elt: II);
2859
2860 // Lex the token after the identifier.
2861 LexUnexpandedNonComment(Result&: Tok);
2862
2863 switch (Tok.getKind()) {
2864 default: // #define X(A B
2865 Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2866 return true;
2867 case tok::r_paren: // #define X(A)
2868 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2869 return false;
2870 case tok::comma: // #define X(A,
2871 break;
2872 case tok::ellipsis: // #define X(A... -> GCC extension
2873 // Diagnose extension.
2874 Diag(Tok, diag::ext_named_variadic_macro);
2875
2876 // Lex the token after the identifier.
2877 LexUnexpandedNonComment(Result&: Tok);
2878 if (Tok.isNot(K: tok::r_paren)) {
2879 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2880 return true;
2881 }
2882
2883 MI->setIsGNUVarargs();
2884 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2885 return false;
2886 }
2887 }
2888 }
2889}
2890
2891static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2892 const LangOptions &LOptions) {
2893 if (MI->getNumTokens() == 1) {
2894 const Token &Value = MI->getReplacementToken(Tok: 0);
2895
2896 // Macro that is identity, like '#define inline inline' is a valid pattern.
2897 if (MacroName.getKind() == Value.getKind())
2898 return true;
2899
2900 // Macro that maps a keyword to the same keyword decorated with leading/
2901 // trailing underscores is a valid pattern:
2902 // #define inline __inline
2903 // #define inline __inline__
2904 // #define inline _inline (in MS compatibility mode)
2905 StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2906 if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2907 if (!II->isKeyword(LangOpts: LOptions))
2908 return false;
2909 StringRef ValueText = II->getName();
2910 StringRef TrimmedValue = ValueText;
2911 if (!ValueText.starts_with(Prefix: "__")) {
2912 if (ValueText.starts_with(Prefix: "_"))
2913 TrimmedValue = TrimmedValue.drop_front(N: 1);
2914 else
2915 return false;
2916 } else {
2917 TrimmedValue = TrimmedValue.drop_front(N: 2);
2918 if (TrimmedValue.ends_with(Suffix: "__"))
2919 TrimmedValue = TrimmedValue.drop_back(N: 2);
2920 }
2921 return TrimmedValue == MacroText;
2922 } else {
2923 return false;
2924 }
2925 }
2926
2927 // #define inline
2928 return MacroName.isOneOf(K1: tok::kw_extern, Ks: tok::kw_inline, Ks: tok::kw_static,
2929 Ks: tok::kw_const) &&
2930 MI->getNumTokens() == 0;
2931}
2932
2933// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2934// entire line) of the macro's tokens and adds them to MacroInfo, and while
2935// doing so performs certain validity checks including (but not limited to):
2936// - # (stringization) is followed by a macro parameter
2937//
2938// Returns a nullptr if an invalid sequence of tokens is encountered or returns
2939// a pointer to a MacroInfo object.
2940
2941MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2942 const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2943
2944 Token LastTok = MacroNameTok;
2945 // Create the new macro.
2946 MacroInfo *const MI = AllocateMacroInfo(L: MacroNameTok.getLocation());
2947
2948 Token Tok;
2949 LexUnexpandedToken(Result&: Tok);
2950
2951 // Ensure we consume the rest of the macro body if errors occur.
2952 auto _ = llvm::make_scope_exit(F: [&]() {
2953 // The flag indicates if we are still waiting for 'eod'.
2954 if (CurLexer->ParsingPreprocessorDirective)
2955 DiscardUntilEndOfDirective();
2956 });
2957
2958 // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2959 // within their appropriate context.
2960 VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2961
2962 // If this is a function-like macro definition, parse the argument list,
2963 // marking each of the identifiers as being used as macro arguments. Also,
2964 // check other constraints on the first token of the macro body.
2965 if (Tok.is(K: tok::eod)) {
2966 if (ImmediatelyAfterHeaderGuard) {
2967 // Save this macro information since it may part of a header guard.
2968 CurPPLexer->MIOpt.SetDefinedMacro(M: MacroNameTok.getIdentifierInfo(),
2969 Loc: MacroNameTok.getLocation());
2970 }
2971 // If there is no body to this macro, we have no special handling here.
2972 } else if (Tok.hasLeadingSpace()) {
2973 // This is a normal token with leading space. Clear the leading space
2974 // marker on the first token to get proper expansion.
2975 Tok.clearFlag(Flag: Token::LeadingSpace);
2976 } else if (Tok.is(K: tok::l_paren)) {
2977 // This is a function-like macro definition. Read the argument list.
2978 MI->setIsFunctionLike();
2979 if (ReadMacroParameterList(MI, Tok&: LastTok))
2980 return nullptr;
2981
2982 // If this is a definition of an ISO C/C++ variadic function-like macro (not
2983 // using the GNU named varargs extension) inform our variadic scope guard
2984 // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2985 // allowed only within the definition of a variadic macro.
2986
2987 if (MI->isC99Varargs()) {
2988 VariadicMacroScopeGuard.enterScope();
2989 }
2990
2991 // Read the first token after the arg list for down below.
2992 LexUnexpandedToken(Result&: Tok);
2993 } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2994 // C99 requires whitespace between the macro definition and the body. Emit
2995 // a diagnostic for something like "#define X+".
2996 Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2997 } else {
2998 // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2999 // first character of a replacement list is not a character required by
3000 // subclause 5.2.1, then there shall be white-space separation between the
3001 // identifier and the replacement list.". 5.2.1 lists this set:
3002 // "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
3003 // is irrelevant here.
3004 bool isInvalid = false;
3005 if (Tok.is(K: tok::at)) // @ is not in the list above.
3006 isInvalid = true;
3007 else if (Tok.is(K: tok::unknown)) {
3008 // If we have an unknown token, it is something strange like "`". Since
3009 // all of valid characters would have lexed into a single character
3010 // token of some sort, we know this is not a valid case.
3011 isInvalid = true;
3012 }
3013 if (isInvalid)
3014 Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
3015 else
3016 Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
3017 }
3018
3019 if (!Tok.is(K: tok::eod))
3020 LastTok = Tok;
3021
3022 SmallVector<Token, 16> Tokens;
3023
3024 // Read the rest of the macro body.
3025 if (MI->isObjectLike()) {
3026 // Object-like macros are very simple, just read their body.
3027 while (Tok.isNot(K: tok::eod)) {
3028 LastTok = Tok;
3029 Tokens.push_back(Elt: Tok);
3030 // Get the next token of the macro.
3031 LexUnexpandedToken(Result&: Tok);
3032 }
3033 } else {
3034 // Otherwise, read the body of a function-like macro. While we are at it,
3035 // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3036 // parameters in function-like macro expansions.
3037
3038 VAOptDefinitionContext VAOCtx(*this);
3039
3040 while (Tok.isNot(K: tok::eod)) {
3041 LastTok = Tok;
3042
3043 if (!Tok.isOneOf(K1: tok::hash, Ks: tok::hashat, Ks: tok::hashhash)) {
3044 Tokens.push_back(Elt: Tok);
3045
3046 if (VAOCtx.isVAOptToken(T: Tok)) {
3047 // If we're already within a VAOPT, emit an error.
3048 if (VAOCtx.isInVAOpt()) {
3049 Diag(Tok, diag::err_pp_vaopt_nested_use);
3050 return nullptr;
3051 }
3052 // Ensure VAOPT is followed by a '(' .
3053 LexUnexpandedToken(Result&: Tok);
3054 if (Tok.isNot(K: tok::l_paren)) {
3055 Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
3056 return nullptr;
3057 }
3058 Tokens.push_back(Elt: Tok);
3059 VAOCtx.sawVAOptFollowedByOpeningParens(LParenLoc: Tok.getLocation());
3060 LexUnexpandedToken(Result&: Tok);
3061 if (Tok.is(K: tok::hashhash)) {
3062 Diag(Tok, diag::err_vaopt_paste_at_start);
3063 return nullptr;
3064 }
3065 continue;
3066 } else if (VAOCtx.isInVAOpt()) {
3067 if (Tok.is(K: tok::r_paren)) {
3068 if (VAOCtx.sawClosingParen()) {
3069 assert(Tokens.size() >= 3 &&
3070 "Must have seen at least __VA_OPT__( "
3071 "and a subsequent tok::r_paren");
3072 if (Tokens[Tokens.size() - 2].is(K: tok::hashhash)) {
3073 Diag(Tok, diag::err_vaopt_paste_at_end);
3074 return nullptr;
3075 }
3076 }
3077 } else if (Tok.is(K: tok::l_paren)) {
3078 VAOCtx.sawOpeningParen(LParenLoc: Tok.getLocation());
3079 }
3080 }
3081 // Get the next token of the macro.
3082 LexUnexpandedToken(Result&: Tok);
3083 continue;
3084 }
3085
3086 // If we're in -traditional mode, then we should ignore stringification
3087 // and token pasting. Mark the tokens as unknown so as not to confuse
3088 // things.
3089 if (getLangOpts().TraditionalCPP) {
3090 Tok.setKind(tok::unknown);
3091 Tokens.push_back(Elt: Tok);
3092
3093 // Get the next token of the macro.
3094 LexUnexpandedToken(Result&: Tok);
3095 continue;
3096 }
3097
3098 if (Tok.is(K: tok::hashhash)) {
3099 // If we see token pasting, check if it looks like the gcc comma
3100 // pasting extension. We'll use this information to suppress
3101 // diagnostics later on.
3102
3103 // Get the next token of the macro.
3104 LexUnexpandedToken(Result&: Tok);
3105
3106 if (Tok.is(K: tok::eod)) {
3107 Tokens.push_back(Elt: LastTok);
3108 break;
3109 }
3110
3111 if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3112 Tokens[Tokens.size() - 1].is(K: tok::comma))
3113 MI->setHasCommaPasting();
3114
3115 // Things look ok, add the '##' token to the macro.
3116 Tokens.push_back(Elt: LastTok);
3117 continue;
3118 }
3119
3120 // Our Token is a stringization operator.
3121 // Get the next token of the macro.
3122 LexUnexpandedToken(Result&: Tok);
3123
3124 // Check for a valid macro arg identifier or __VA_OPT__.
3125 if (!VAOCtx.isVAOptToken(T: Tok) &&
3126 (Tok.getIdentifierInfo() == nullptr ||
3127 MI->getParameterNum(Arg: Tok.getIdentifierInfo()) == -1)) {
3128
3129 // If this is assembler-with-cpp mode, we accept random gibberish after
3130 // the '#' because '#' is often a comment character. However, change
3131 // the kind of the token to tok::unknown so that the preprocessor isn't
3132 // confused.
3133 if (getLangOpts().AsmPreprocessor && Tok.isNot(K: tok::eod)) {
3134 LastTok.setKind(tok::unknown);
3135 Tokens.push_back(Elt: LastTok);
3136 continue;
3137 } else {
3138 Diag(Tok, diag::err_pp_stringize_not_parameter)
3139 << LastTok.is(tok::hashat);
3140 return nullptr;
3141 }
3142 }
3143
3144 // Things look ok, add the '#' and param name tokens to the macro.
3145 Tokens.push_back(Elt: LastTok);
3146
3147 // If the token following '#' is VAOPT, let the next iteration handle it
3148 // and check it for correctness, otherwise add the token and prime the
3149 // loop with the next one.
3150 if (!VAOCtx.isVAOptToken(T: Tok)) {
3151 Tokens.push_back(Elt: Tok);
3152 LastTok = Tok;
3153
3154 // Get the next token of the macro.
3155 LexUnexpandedToken(Result&: Tok);
3156 }
3157 }
3158 if (VAOCtx.isInVAOpt()) {
3159 assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3160 Diag(Tok, diag::err_pp_expected_after)
3161 << LastTok.getKind() << tok::r_paren;
3162 Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3163 return nullptr;
3164 }
3165 }
3166 MI->setDefinitionEndLoc(LastTok.getLocation());
3167
3168 MI->setTokens(Tokens, PPAllocator&: BP);
3169 return MI;
3170}
3171
3172static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3173 return II->isStr(Str: "__strong") || II->isStr(Str: "__weak") ||
3174 II->isStr(Str: "__unsafe_unretained") || II->isStr(Str: "__autoreleasing");
3175}
3176
3177/// HandleDefineDirective - Implements \#define. This consumes the entire macro
3178/// line then lets the caller lex the next real token.
3179void Preprocessor::HandleDefineDirective(
3180 Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3181 ++NumDefined;
3182
3183 Token MacroNameTok;
3184 bool MacroShadowsKeyword;
3185 ReadMacroName(MacroNameTok, isDefineUndef: MU_Define, ShadowFlag: &MacroShadowsKeyword);
3186
3187 // Error reading macro name? If so, diagnostic already issued.
3188 if (MacroNameTok.is(K: tok::eod))
3189 return;
3190
3191 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3192 // Issue a final pragma warning if we're defining a macro that was has been
3193 // undefined and is being redefined.
3194 if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3195 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/false);
3196
3197 // If we are supposed to keep comments in #defines, reenable comment saving
3198 // mode.
3199 if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3200
3201 MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3202 MacroNameTok, ImmediatelyAfterHeaderGuard);
3203
3204 if (!MI) return;
3205
3206 if (MacroShadowsKeyword &&
3207 !isConfigurationPattern(MacroName&: MacroNameTok, MI, LOptions: getLangOpts())) {
3208 Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3209 }
3210 // Check that there is no paste (##) operator at the beginning or end of the
3211 // replacement list.
3212 unsigned NumTokens = MI->getNumTokens();
3213 if (NumTokens != 0) {
3214 if (MI->getReplacementToken(Tok: 0).is(K: tok::hashhash)) {
3215 Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3216 return;
3217 }
3218 if (MI->getReplacementToken(Tok: NumTokens-1).is(K: tok::hashhash)) {
3219 Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3220 return;
3221 }
3222 }
3223
3224 // When skipping just warn about macros that do not match.
3225 if (SkippingUntilPCHThroughHeader) {
3226 const MacroInfo *OtherMI = getMacroInfo(II: MacroNameTok.getIdentifierInfo());
3227 if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3228 /*Syntactic=*/LangOpts.MicrosoftExt))
3229 Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3230 << MacroNameTok.getIdentifierInfo();
3231 // Issue the diagnostic but allow the change if msvc extensions are enabled
3232 if (!LangOpts.MicrosoftExt)
3233 return;
3234 }
3235
3236 // Finally, if this identifier already had a macro defined for it, verify that
3237 // the macro bodies are identical, and issue diagnostics if they are not.
3238 if (const MacroInfo *OtherMI=getMacroInfo(II: MacroNameTok.getIdentifierInfo())) {
3239 // Final macros are hard-mode: they always warn. Even if the bodies are
3240 // identical. Even if they are in system headers. Even if they are things we
3241 // would silently allow in the past.
3242 if (MacroNameTok.getIdentifierInfo()->isFinal())
3243 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/false);
3244
3245 // In Objective-C, ignore attempts to directly redefine the builtin
3246 // definitions of the ownership qualifiers. It's still possible to
3247 // #undef them.
3248 if (getLangOpts().ObjC &&
3249 SourceMgr.getFileID(SpellingLoc: OtherMI->getDefinitionLoc()) ==
3250 getPredefinesFileID() &&
3251 isObjCProtectedMacro(II: MacroNameTok.getIdentifierInfo())) {
3252 // Warn if it changes the tokens.
3253 if ((!getDiagnostics().getSuppressSystemWarnings() ||
3254 !SourceMgr.isInSystemHeader(Loc: DefineTok.getLocation())) &&
3255 !MI->isIdenticalTo(Other: *OtherMI, PP&: *this,
3256 /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt)) {
3257 Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3258 }
3259 assert(!OtherMI->isWarnIfUnused());
3260 return;
3261 }
3262
3263 // It is very common for system headers to have tons of macro redefinitions
3264 // and for warnings to be disabled in system headers. If this is the case,
3265 // then don't bother calling MacroInfo::isIdenticalTo.
3266 if (!getDiagnostics().getSuppressSystemWarnings() ||
3267 !SourceMgr.isInSystemHeader(Loc: DefineTok.getLocation())) {
3268
3269 if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3270 Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3271
3272 // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3273 // C++ [cpp.predefined]p4, but allow it as an extension.
3274 if (isLanguageDefinedBuiltin(SourceMgr, MI: OtherMI, MacroName: II->getName()))
3275 Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3276 // Macros must be identical. This means all tokens and whitespace
3277 // separation must be the same. C99 6.10.3p2.
3278 else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3279 !MI->isIdenticalTo(Other: *OtherMI, PP&: *this, /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt)) {
3280 Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3281 << MacroNameTok.getIdentifierInfo();
3282 Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3283 }
3284 }
3285 if (OtherMI->isWarnIfUnused())
3286 WarnUnusedMacroLocs.erase(V: OtherMI->getDefinitionLoc());
3287 }
3288
3289 DefMacroDirective *MD =
3290 appendDefMacroDirective(II: MacroNameTok.getIdentifierInfo(), MI);
3291
3292 assert(!MI->isUsed());
3293 // If we need warning for not using the macro, add its location in the
3294 // warn-because-unused-macro set. If it gets used it will be removed from set.
3295 if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3296 !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3297 !MacroExpansionInDirectivesOverride &&
3298 getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3299 getPredefinesFileID()) {
3300 MI->setIsWarnIfUnused(true);
3301 WarnUnusedMacroLocs.insert(V: MI->getDefinitionLoc());
3302 }
3303
3304 // If the callbacks want to know, tell them about the macro definition.
3305 if (Callbacks)
3306 Callbacks->MacroDefined(MacroNameTok, MD);
3307
3308 // If we're in MS compatibility mode and the macro being defined is the
3309 // assert macro, implicitly add a macro definition for static_assert to work
3310 // around their broken assert.h header file in C. Only do so if there isn't
3311 // already a static_assert macro defined.
3312 if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3313 MacroNameTok.getIdentifierInfo()->isStr(Str: "assert") &&
3314 !isMacroDefined(Id: "static_assert")) {
3315 MacroInfo *MI = AllocateMacroInfo(L: SourceLocation());
3316
3317 Token Tok;
3318 Tok.startToken();
3319 Tok.setKind(tok::kw__Static_assert);
3320 Tok.setIdentifierInfo(getIdentifierInfo(Name: "_Static_assert"));
3321 MI->setTokens(Tokens: {Tok}, PPAllocator&: BP);
3322 (void)appendDefMacroDirective(II: getIdentifierInfo(Name: "static_assert"), MI);
3323 }
3324}
3325
3326/// HandleUndefDirective - Implements \#undef.
3327///
3328void Preprocessor::HandleUndefDirective() {
3329 ++NumUndefined;
3330
3331 Token MacroNameTok;
3332 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
3333
3334 // Error reading macro name? If so, diagnostic already issued.
3335 if (MacroNameTok.is(K: tok::eod))
3336 return;
3337
3338 // Check to see if this is the last token on the #undef line.
3339 CheckEndOfDirective(DirType: "undef");
3340
3341 // Okay, we have a valid identifier to undef.
3342 auto *II = MacroNameTok.getIdentifierInfo();
3343 auto MD = getMacroDefinition(II);
3344 UndefMacroDirective *Undef = nullptr;
3345
3346 if (II->isFinal())
3347 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/true);
3348
3349 // If the macro is not defined, this is a noop undef.
3350 if (const MacroInfo *MI = MD.getMacroInfo()) {
3351 if (!MI->isUsed() && MI->isWarnIfUnused())
3352 Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3353
3354 // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3355 // C++ [cpp.predefined]p4, but allow it as an extension.
3356 if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))
3357 Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
3358
3359 if (MI->isWarnIfUnused())
3360 WarnUnusedMacroLocs.erase(V: MI->getDefinitionLoc());
3361
3362 Undef = AllocateUndefMacroDirective(UndefLoc: MacroNameTok.getLocation());
3363 }
3364
3365 // If the callbacks want to know, tell them about the macro #undef.
3366 // Note: no matter if the macro was defined or not.
3367 if (Callbacks)
3368 Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3369
3370 if (Undef)
3371 appendMacroDirective(II, MD: Undef);
3372}
3373
3374//===----------------------------------------------------------------------===//
3375// Preprocessor Conditional Directive Handling.
3376//===----------------------------------------------------------------------===//
3377
3378/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef
3379/// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is
3380/// true if any tokens have been returned or pp-directives activated before this
3381/// \#ifndef has been lexed.
3382///
3383void Preprocessor::HandleIfdefDirective(Token &Result,
3384 const Token &HashToken,
3385 bool isIfndef,
3386 bool ReadAnyTokensBeforeDirective) {
3387 ++NumIf;
3388 Token DirectiveTok = Result;
3389
3390 Token MacroNameTok;
3391 ReadMacroName(MacroNameTok);
3392
3393 // Error reading macro name? If so, diagnostic already issued.
3394 if (MacroNameTok.is(K: tok::eod)) {
3395 // Skip code until we get to #endif. This helps with recovery by not
3396 // emitting an error when the #endif is reached.
3397 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(),
3398 IfTokenLoc: DirectiveTok.getLocation(),
3399 /*Foundnonskip*/ FoundNonSkipPortion: false, /*FoundElse*/ false);
3400 return;
3401 }
3402
3403 emitMacroExpansionWarnings(Identifier: MacroNameTok, /*IsIfnDef=*/true);
3404
3405 // Check to see if this is the last token on the #if[n]def line.
3406 CheckEndOfDirective(DirType: isIfndef ? "ifndef" : "ifdef");
3407
3408 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3409 auto MD = getMacroDefinition(II: MII);
3410 MacroInfo *MI = MD.getMacroInfo();
3411
3412 if (CurPPLexer->getConditionalStackDepth() == 0) {
3413 // If the start of a top-level #ifdef and if the macro is not defined,
3414 // inform MIOpt that this might be the start of a proper include guard.
3415 // Otherwise it is some other form of unknown conditional which we can't
3416 // handle.
3417 if (!ReadAnyTokensBeforeDirective && !MI) {
3418 assert(isIfndef && "#ifdef shouldn't reach here");
3419 CurPPLexer->MIOpt.EnterTopLevelIfndef(M: MII, Loc: MacroNameTok.getLocation());
3420 } else
3421 CurPPLexer->MIOpt.EnterTopLevelConditional();
3422 }
3423
3424 // If there is a macro, process it.
3425 if (MI) // Mark it used.
3426 markMacroAsUsed(MI);
3427
3428 if (Callbacks) {
3429 if (isIfndef)
3430 Callbacks->Ifndef(Loc: DirectiveTok.getLocation(), MacroNameTok, MD);
3431 else
3432 Callbacks->Ifdef(Loc: DirectiveTok.getLocation(), MacroNameTok, MD);
3433 }
3434
3435 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3436 getSourceManager().isInMainFile(Loc: DirectiveTok.getLocation());
3437
3438 // Should we include the stuff contained by this directive?
3439 if (PPOpts.SingleFileParseMode && !MI) {
3440 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3441 // the directive blocks.
3442 CurPPLexer->pushConditionalLevel(DirectiveStart: DirectiveTok.getLocation(),
3443 /*wasskip*/WasSkipping: false, /*foundnonskip*/FoundNonSkip: false,
3444 /*foundelse*/FoundElse: false);
3445 } else if (!MI == isIfndef || RetainExcludedCB) {
3446 // Yes, remember that we are inside a conditional, then lex the next token.
3447 CurPPLexer->pushConditionalLevel(DirectiveStart: DirectiveTok.getLocation(),
3448 /*wasskip*/WasSkipping: false, /*foundnonskip*/FoundNonSkip: true,
3449 /*foundelse*/FoundElse: false);
3450 } else {
3451 // No, skip the contents of this block.
3452 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(),
3453 IfTokenLoc: DirectiveTok.getLocation(),
3454 /*Foundnonskip*/ FoundNonSkipPortion: false,
3455 /*FoundElse*/ false);
3456 }
3457}
3458
3459/// HandleIfDirective - Implements the \#if directive.
3460///
3461void Preprocessor::HandleIfDirective(Token &IfToken,
3462 const Token &HashToken,
3463 bool ReadAnyTokensBeforeDirective) {
3464 ++NumIf;
3465
3466 // Parse and evaluate the conditional expression.
3467 IdentifierInfo *IfNDefMacro = nullptr;
3468 const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3469 const bool ConditionalTrue = DER.Conditional;
3470 // Lexer might become invalid if we hit code completion point while evaluating
3471 // expression.
3472 if (!CurPPLexer)
3473 return;
3474
3475 // If this condition is equivalent to #ifndef X, and if this is the first
3476 // directive seen, handle it for the multiple-include optimization.
3477 if (CurPPLexer->getConditionalStackDepth() == 0) {
3478 if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3479 // FIXME: Pass in the location of the macro name, not the 'if' token.
3480 CurPPLexer->MIOpt.EnterTopLevelIfndef(M: IfNDefMacro, Loc: IfToken.getLocation());
3481 else
3482 CurPPLexer->MIOpt.EnterTopLevelConditional();
3483 }
3484
3485 if (Callbacks)
3486 Callbacks->If(
3487 Loc: IfToken.getLocation(), ConditionRange: DER.ExprRange,
3488 ConditionValue: (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3489
3490 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3491 getSourceManager().isInMainFile(Loc: IfToken.getLocation());
3492
3493 // Should we include the stuff contained by this directive?
3494 if (PPOpts.SingleFileParseMode && DER.IncludedUndefinedIds) {
3495 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3496 // the directive blocks.
3497 CurPPLexer->pushConditionalLevel(DirectiveStart: IfToken.getLocation(), /*wasskip*/WasSkipping: false,
3498 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: false);
3499 } else if (ConditionalTrue || RetainExcludedCB) {
3500 // Yes, remember that we are inside a conditional, then lex the next token.
3501 CurPPLexer->pushConditionalLevel(DirectiveStart: IfToken.getLocation(), /*wasskip*/WasSkipping: false,
3502 /*foundnonskip*/FoundNonSkip: true, /*foundelse*/FoundElse: false);
3503 } else {
3504 // No, skip the contents of this block.
3505 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: IfToken.getLocation(),
3506 /*Foundnonskip*/ FoundNonSkipPortion: false,
3507 /*FoundElse*/ false);
3508 }
3509}
3510
3511/// HandleEndifDirective - Implements the \#endif directive.
3512///
3513void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3514 ++NumEndif;
3515
3516 // Check that this is the whole directive.
3517 CheckEndOfDirective(DirType: "endif");
3518
3519 PPConditionalInfo CondInfo;
3520 if (CurPPLexer->popConditionalLevel(CI&: CondInfo)) {
3521 // No conditionals on the stack: this is an #endif without an #if.
3522 Diag(EndifToken, diag::err_pp_endif_without_if);
3523 return;
3524 }
3525
3526 // If this the end of a top-level #endif, inform MIOpt.
3527 if (CurPPLexer->getConditionalStackDepth() == 0)
3528 CurPPLexer->MIOpt.ExitTopLevelConditional();
3529
3530 assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3531 "This code should only be reachable in the non-skipping case!");
3532
3533 if (Callbacks)
3534 Callbacks->Endif(Loc: EndifToken.getLocation(), IfLoc: CondInfo.IfLoc);
3535}
3536
3537/// HandleElseDirective - Implements the \#else directive.
3538///
3539void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3540 ++NumElse;
3541
3542 // #else directive in a non-skipping conditional... start skipping.
3543 CheckEndOfDirective(DirType: "else");
3544
3545 PPConditionalInfo CI;
3546 if (CurPPLexer->popConditionalLevel(CI)) {
3547 Diag(Result, diag::pp_err_else_without_if);
3548 return;
3549 }
3550
3551 // If this is a top-level #else, inform the MIOpt.
3552 if (CurPPLexer->getConditionalStackDepth() == 0)
3553 CurPPLexer->MIOpt.EnterTopLevelConditional();
3554
3555 // If this is a #else with a #else before it, report the error.
3556 if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3557
3558 if (Callbacks)
3559 Callbacks->Else(Loc: Result.getLocation(), IfLoc: CI.IfLoc);
3560
3561 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3562 getSourceManager().isInMainFile(Loc: Result.getLocation());
3563
3564 if ((PPOpts.SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3565 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3566 // the directive blocks.
3567 CurPPLexer->pushConditionalLevel(DirectiveStart: CI.IfLoc, /*wasskip*/WasSkipping: false,
3568 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: true);
3569 return;
3570 }
3571
3572 // Finally, skip the rest of the contents of this block.
3573 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: CI.IfLoc,
3574 /*Foundnonskip*/ FoundNonSkipPortion: true,
3575 /*FoundElse*/ true, ElseLoc: Result.getLocation());
3576}
3577
3578/// Implements the \#elif, \#elifdef, and \#elifndef directives.
3579void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3580 const Token &HashToken,
3581 tok::PPKeywordKind Kind) {
3582 PPElifDiag DirKind = Kind == tok::pp_elif ? PED_Elif
3583 : Kind == tok::pp_elifdef ? PED_Elifdef
3584 : PED_Elifndef;
3585 ++NumElse;
3586
3587 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3588 switch (DirKind) {
3589 case PED_Elifdef:
3590 case PED_Elifndef:
3591 unsigned DiagID;
3592 if (LangOpts.CPlusPlus)
3593 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3594 : diag::ext_cxx23_pp_directive;
3595 else
3596 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3597 : diag::ext_c23_pp_directive;
3598 Diag(Tok: ElifToken, DiagID) << DirKind;
3599 break;
3600 default:
3601 break;
3602 }
3603
3604 // #elif directive in a non-skipping conditional... start skipping.
3605 // We don't care what the condition is, because we will always skip it (since
3606 // the block immediately before it was included).
3607 SourceRange ConditionRange = DiscardUntilEndOfDirective();
3608
3609 PPConditionalInfo CI;
3610 if (CurPPLexer->popConditionalLevel(CI)) {
3611 Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3612 return;
3613 }
3614
3615 // If this is a top-level #elif, inform the MIOpt.
3616 if (CurPPLexer->getConditionalStackDepth() == 0)
3617 CurPPLexer->MIOpt.EnterTopLevelConditional();
3618
3619 // If this is a #elif with a #else before it, report the error.
3620 if (CI.FoundElse)
3621 Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3622
3623 if (Callbacks) {
3624 switch (Kind) {
3625 case tok::pp_elif:
3626 Callbacks->Elif(Loc: ElifToken.getLocation(), ConditionRange,
3627 ConditionValue: PPCallbacks::CVK_NotEvaluated, IfLoc: CI.IfLoc);
3628 break;
3629 case tok::pp_elifdef:
3630 Callbacks->Elifdef(Loc: ElifToken.getLocation(), ConditionRange, IfLoc: CI.IfLoc);
3631 break;
3632 case tok::pp_elifndef:
3633 Callbacks->Elifndef(Loc: ElifToken.getLocation(), ConditionRange, IfLoc: CI.IfLoc);
3634 break;
3635 default:
3636 assert(false && "unexpected directive kind");
3637 break;
3638 }
3639 }
3640
3641 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3642 getSourceManager().isInMainFile(Loc: ElifToken.getLocation());
3643
3644 if ((PPOpts.SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3645 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3646 // the directive blocks.
3647 CurPPLexer->pushConditionalLevel(DirectiveStart: ElifToken.getLocation(), /*wasskip*/WasSkipping: false,
3648 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: false);
3649 return;
3650 }
3651
3652 // Finally, skip the rest of the contents of this block.
3653 SkipExcludedConditionalBlock(
3654 HashTokenLoc: HashToken.getLocation(), IfTokenLoc: CI.IfLoc, /*Foundnonskip*/ FoundNonSkipPortion: true,
3655 /*FoundElse*/ CI.FoundElse, ElseLoc: ElifToken.getLocation());
3656}
3657
3658std::optional<LexEmbedParametersResult>
3659Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3660 LexEmbedParametersResult Result{};
3661 tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3662
3663 auto DiagMismatchedBracesAndSkipToEOD =
3664 [&](tok::TokenKind Expected,
3665 std::pair<tok::TokenKind, SourceLocation> Matches) {
3666 Diag(CurTok, diag::err_expected) << Expected;
3667 Diag(Matches.second, diag::note_matching) << Matches.first;
3668 if (CurTok.isNot(K: tok::eod))
3669 DiscardUntilEndOfDirective(Tmp&: CurTok);
3670 };
3671
3672 auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3673 if (CurTok.isNot(K: Kind)) {
3674 Diag(CurTok, diag::err_expected) << Kind;
3675 if (CurTok.isNot(K: tok::eod))
3676 DiscardUntilEndOfDirective(Tmp&: CurTok);
3677 return false;
3678 }
3679 return true;
3680 };
3681
3682 // C23 6.10:
3683 // pp-parameter-name:
3684 // pp-standard-parameter
3685 // pp-prefixed-parameter
3686 //
3687 // pp-standard-parameter:
3688 // identifier
3689 //
3690 // pp-prefixed-parameter:
3691 // identifier :: identifier
3692 auto LexPPParameterName = [&]() -> std::optional<std::string> {
3693 // We expect the current token to be an identifier; if it's not, things
3694 // have gone wrong.
3695 if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3696 return std::nullopt;
3697
3698 const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3699
3700 // Lex another token; it is either a :: or we're done with the parameter
3701 // name.
3702 LexNonComment(Result&: CurTok);
3703 if (CurTok.is(K: tok::coloncolon)) {
3704 // We found a ::, so lex another identifier token.
3705 LexNonComment(Result&: CurTok);
3706 if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3707 return std::nullopt;
3708
3709 const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3710
3711 // Lex another token so we're past the name.
3712 LexNonComment(Result&: CurTok);
3713 return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3714 }
3715 return Prefix->getName().str();
3716 };
3717
3718 // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3719 // this document as an identifier pp_param and an identifier of the form
3720 // __pp_param__ shall behave the same when used as a preprocessor parameter,
3721 // except for the spelling.
3722 auto NormalizeParameterName = [](StringRef Name) {
3723 if (Name.size() > 4 && Name.starts_with(Prefix: "__") && Name.ends_with(Suffix: "__"))
3724 return Name.substr(Start: 2, N: Name.size() - 4);
3725 return Name;
3726 };
3727
3728 auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3729 // we have a limit parameter and its internals are processed using
3730 // evaluation rules from #if.
3731 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3732 return std::nullopt;
3733
3734 // We do not consume the ( because EvaluateDirectiveExpression will lex
3735 // the next token for us.
3736 IdentifierInfo *ParameterIfNDef = nullptr;
3737 bool EvaluatedDefined;
3738 DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3739 IfNDefMacro&: ParameterIfNDef, Tok&: CurTok, EvaluatedDefined, /*CheckForEOD=*/CheckForEoD: false);
3740
3741 if (!LimitEvalResult.Value) {
3742 // If there was an error evaluating the directive expression, we expect
3743 // to be at the end of directive token.
3744 assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3745 return std::nullopt;
3746 }
3747
3748 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3749 return std::nullopt;
3750
3751 // Eat the ).
3752 LexNonComment(Result&: CurTok);
3753
3754 // C23 6.10.3.2p2: The token defined shall not appear within the constant
3755 // expression.
3756 if (EvaluatedDefined) {
3757 Diag(CurTok, diag::err_defined_in_pp_embed);
3758 return std::nullopt;
3759 }
3760
3761 if (LimitEvalResult.Value) {
3762 const llvm::APSInt &Result = *LimitEvalResult.Value;
3763 if (Result.isNegative()) {
3764 Diag(CurTok, diag::err_requires_positive_value)
3765 << toString(Result, 10) << /*positive*/ 0;
3766 return std::nullopt;
3767 }
3768 return Result.getLimitedValue();
3769 }
3770 return std::nullopt;
3771 };
3772
3773 auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3774 switch (Kind) {
3775 case tok::l_paren:
3776 return tok::r_paren;
3777 case tok::l_brace:
3778 return tok::r_brace;
3779 case tok::l_square:
3780 return tok::r_square;
3781 default:
3782 llvm_unreachable("should not get here");
3783 }
3784 };
3785
3786 auto LexParenthesizedBalancedTokenSoup =
3787 [&](llvm::SmallVectorImpl<Token> &Tokens) {
3788 std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3789
3790 // We expect the current token to be a left paren.
3791 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3792 return false;
3793 LexNonComment(Result&: CurTok); // Eat the (
3794
3795 bool WaitingForInnerCloseParen = false;
3796 while (CurTok.isNot(K: tok::eod) &&
3797 (WaitingForInnerCloseParen || CurTok.isNot(K: tok::r_paren))) {
3798 switch (CurTok.getKind()) {
3799 default: // Shutting up diagnostics about not fully-covered switch.
3800 break;
3801 case tok::l_paren:
3802 WaitingForInnerCloseParen = true;
3803 [[fallthrough]];
3804 case tok::l_brace:
3805 case tok::l_square:
3806 BracketStack.push_back(x: {CurTok.getKind(), CurTok.getLocation()});
3807 break;
3808 case tok::r_paren:
3809 WaitingForInnerCloseParen = false;
3810 [[fallthrough]];
3811 case tok::r_brace:
3812 case tok::r_square: {
3813 tok::TokenKind Matching =
3814 GetMatchingCloseBracket(BracketStack.back().first);
3815 if (BracketStack.empty() || CurTok.getKind() != Matching) {
3816 DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3817 return false;
3818 }
3819 BracketStack.pop_back();
3820 } break;
3821 }
3822 Tokens.push_back(Elt: CurTok);
3823 LexNonComment(Result&: CurTok);
3824 }
3825
3826 // When we're done, we want to eat the closing paren.
3827 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3828 return false;
3829
3830 LexNonComment(Result&: CurTok); // Eat the )
3831 return true;
3832 };
3833
3834 LexNonComment(Result&: CurTok); // Prime the pump.
3835 while (!CurTok.isOneOf(K1: EndTokenKind, K2: tok::eod)) {
3836 SourceLocation ParamStartLoc = CurTok.getLocation();
3837 std::optional<std::string> ParamName = LexPPParameterName();
3838 if (!ParamName)
3839 return std::nullopt;
3840 StringRef Parameter = NormalizeParameterName(*ParamName);
3841
3842 // Lex the parameters (dependent on the parameter type we want!).
3843 //
3844 // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3845 // one time in the embed parameter sequence.
3846 if (Parameter == "limit") {
3847 if (Result.MaybeLimitParam)
3848 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3849
3850 std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3851 if (!Limit)
3852 return std::nullopt;
3853 Result.MaybeLimitParam =
3854 PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3855 } else if (Parameter == "clang::offset") {
3856 if (Result.MaybeOffsetParam)
3857 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3858
3859 std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3860 if (!Offset)
3861 return std::nullopt;
3862 Result.MaybeOffsetParam = PPEmbedParameterOffset{
3863 *Offset, {ParamStartLoc, CurTok.getLocation()}};
3864 } else if (Parameter == "prefix") {
3865 if (Result.MaybePrefixParam)
3866 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3867
3868 SmallVector<Token, 4> Soup;
3869 if (!LexParenthesizedBalancedTokenSoup(Soup))
3870 return std::nullopt;
3871 Result.MaybePrefixParam = PPEmbedParameterPrefix{
3872 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3873 } else if (Parameter == "suffix") {
3874 if (Result.MaybeSuffixParam)
3875 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3876
3877 SmallVector<Token, 4> Soup;
3878 if (!LexParenthesizedBalancedTokenSoup(Soup))
3879 return std::nullopt;
3880 Result.MaybeSuffixParam = PPEmbedParameterSuffix{
3881 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3882 } else if (Parameter == "if_empty") {
3883 if (Result.MaybeIfEmptyParam)
3884 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3885
3886 SmallVector<Token, 4> Soup;
3887 if (!LexParenthesizedBalancedTokenSoup(Soup))
3888 return std::nullopt;
3889 Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
3890 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3891 } else {
3892 ++Result.UnrecognizedParams;
3893
3894 // If there's a left paren, we need to parse a balanced token sequence
3895 // and just eat those tokens.
3896 if (CurTok.is(K: tok::l_paren)) {
3897 SmallVector<Token, 4> Soup;
3898 if (!LexParenthesizedBalancedTokenSoup(Soup))
3899 return std::nullopt;
3900 }
3901 if (!ForHasEmbed) {
3902 Diag(ParamStartLoc, diag::err_pp_unknown_parameter) << 1 << Parameter;
3903 if (CurTok.isNot(K: tok::eod))
3904 DiscardUntilEndOfDirective(Tmp&: CurTok);
3905 return std::nullopt;
3906 }
3907 }
3908 }
3909 return Result;
3910}
3911
3912void Preprocessor::HandleEmbedDirectiveImpl(
3913 SourceLocation HashLoc, const LexEmbedParametersResult &Params,
3914 StringRef BinaryContents, StringRef FileName) {
3915 if (BinaryContents.empty()) {
3916 // If we have no binary contents, the only thing we need to emit are the
3917 // if_empty tokens, if any.
3918 // FIXME: this loses AST fidelity; nothing in the compiler will see that
3919 // these tokens came from #embed. We have to hack around this when printing
3920 // preprocessed output. The same is true for prefix and suffix tokens.
3921 if (Params.MaybeIfEmptyParam) {
3922 ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
3923 size_t TokCount = Toks.size();
3924 auto NewToks = std::make_unique<Token[]>(num: TokCount);
3925 llvm::copy(Range&: Toks, Out: NewToks.get());
3926 EnterTokenStream(Toks: std::move(NewToks), NumToks: TokCount, DisableMacroExpansion: true, IsReinject: true);
3927 }
3928 return;
3929 }
3930
3931 size_t NumPrefixToks = Params.PrefixTokenCount(),
3932 NumSuffixToks = Params.SuffixTokenCount();
3933 size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
3934 size_t CurIdx = 0;
3935 auto Toks = std::make_unique<Token[]>(num: TotalNumToks);
3936
3937 // Add the prefix tokens, if any.
3938 if (Params.MaybePrefixParam) {
3939 llvm::copy(Range: Params.MaybePrefixParam->Tokens, Out: &Toks[CurIdx]);
3940 CurIdx += NumPrefixToks;
3941 }
3942
3943 EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
3944 Data->BinaryData = BinaryContents;
3945 Data->FileName = FileName;
3946
3947 Toks[CurIdx].startToken();
3948 Toks[CurIdx].setKind(tok::annot_embed);
3949 Toks[CurIdx].setAnnotationRange(HashLoc);
3950 Toks[CurIdx++].setAnnotationValue(Data);
3951
3952 // Now add the suffix tokens, if any.
3953 if (Params.MaybeSuffixParam) {
3954 llvm::copy(Range: Params.MaybeSuffixParam->Tokens, Out: &Toks[CurIdx]);
3955 CurIdx += NumSuffixToks;
3956 }
3957
3958 assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
3959 EnterTokenStream(Toks: std::move(Toks), NumToks: TotalNumToks, DisableMacroExpansion: true, IsReinject: true);
3960}
3961
3962void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
3963 const FileEntry *LookupFromFile) {
3964 // Give the usual extension/compatibility warnings.
3965 if (LangOpts.C23)
3966 Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
3967 else
3968 Diag(EmbedTok, diag::ext_pp_embed_directive)
3969 << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
3970
3971 // Parse the filename header
3972 Token FilenameTok;
3973 if (LexHeaderName(Result&: FilenameTok))
3974 return;
3975
3976 if (FilenameTok.isNot(K: tok::header_name)) {
3977 Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
3978 if (FilenameTok.isNot(K: tok::eod))
3979 DiscardUntilEndOfDirective();
3980 return;
3981 }
3982
3983 // Parse the optional sequence of
3984 // directive-parameters:
3985 // identifier parameter-name-list[opt] directive-argument-list[opt]
3986 // directive-argument-list:
3987 // '(' balanced-token-sequence ')'
3988 // parameter-name-list:
3989 // '::' identifier parameter-name-list[opt]
3990 Token CurTok;
3991 std::optional<LexEmbedParametersResult> Params =
3992 LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
3993
3994 assert((Params || CurTok.is(tok::eod)) &&
3995 "expected success or to be at the end of the directive");
3996 if (!Params)
3997 return;
3998
3999 // Now, splat the data out!
4000 SmallString<128> FilenameBuffer;
4001 StringRef Filename = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
4002 StringRef OriginalFilename = Filename;
4003 bool isAngled =
4004 GetIncludeFilenameSpelling(Loc: FilenameTok.getLocation(), Buffer&: Filename);
4005 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
4006 // error.
4007 assert(!Filename.empty());
4008 OptionalFileEntryRef MaybeFileRef =
4009 this->LookupEmbedFile(Filename, isAngled, OpenFile: true, LookupFromFile);
4010 if (!MaybeFileRef) {
4011 // could not find file
4012 if (Callbacks && Callbacks->EmbedFileNotFound(FileName: OriginalFilename)) {
4013 return;
4014 }
4015 Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
4016 return;
4017 }
4018
4019 if (MaybeFileRef->isDeviceFile()) {
4020 Diag(FilenameTok, diag::err_pp_embed_device_file) << Filename;
4021 return;
4022 }
4023
4024 std::optional<llvm::MemoryBufferRef> MaybeFile =
4025 getSourceManager().getMemoryBufferForFileOrNone(File: *MaybeFileRef);
4026 if (!MaybeFile) {
4027 // could not find file
4028 Diag(FilenameTok, diag::err_cannot_open_file)
4029 << Filename << "a buffer to the contents could not be created";
4030 return;
4031 }
4032 StringRef BinaryContents = MaybeFile->getBuffer();
4033
4034 // The order is important between 'offset' and 'limit'; we want to offset
4035 // first and then limit second; otherwise we may reduce the notional resource
4036 // size to something too small to offset into.
4037 if (Params->MaybeOffsetParam) {
4038 // FIXME: just like with the limit() and if_empty() parameters, this loses
4039 // source fidelity in the AST; it has no idea that there was an offset
4040 // involved.
4041 // offsets all the way to the end of the file make for an empty file.
4042 BinaryContents = BinaryContents.substr(Start: Params->MaybeOffsetParam->Offset);
4043 }
4044
4045 if (Params->MaybeLimitParam) {
4046 // FIXME: just like with the clang::offset() and if_empty() parameters,
4047 // this loses source fidelity in the AST; it has no idea there was a limit
4048 // involved.
4049 BinaryContents = BinaryContents.substr(Start: 0, N: Params->MaybeLimitParam->Limit);
4050 }
4051
4052 if (Callbacks)
4053 Callbacks->EmbedDirective(HashLoc, FileName: Filename, IsAngled: isAngled, File: MaybeFileRef,
4054 Params: *Params);
4055 // getSpelling() may return a buffer from the token itself or it may use the
4056 // SmallString buffer we provided. getSpelling() may also return a string that
4057 // is actually longer than FilenameTok.getLength(), so we first pass a
4058 // locally created buffer to getSpelling() to get the string of real length
4059 // and then we allocate a long living buffer because the buffer we used
4060 // previously will only live till the end of this function and we need
4061 // filename info to live longer.
4062 void *Mem = BP.Allocate(Size: OriginalFilename.size(), Alignment: alignof(char *));
4063 memcpy(dest: Mem, src: OriginalFilename.data(), n: OriginalFilename.size());
4064 StringRef FilenameToGo =
4065 StringRef(static_cast<char *>(Mem), OriginalFilename.size());
4066 HandleEmbedDirectiveImpl(HashLoc, Params: *Params, BinaryContents, FileName: FilenameToGo);
4067}
4068

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang/lib/Lex/PPDirectives.cpp