1//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This code simply runs the preprocessor on the input file and prints out the
10// result. This is the traditional behavior of the -E option.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Frontend/Utils.h"
15#include "clang/Basic/CharInfo.h"
16#include "clang/Basic/Diagnostic.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Frontend/PreprocessorOutputOptions.h"
19#include "clang/Lex/MacroInfo.h"
20#include "clang/Lex/PPCallbacks.h"
21#include "clang/Lex/Pragma.h"
22#include "clang/Lex/Preprocessor.h"
23#include "clang/Lex/TokenConcatenation.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallString.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/Support/ErrorHandling.h"
28#include "llvm/Support/raw_ostream.h"
29#include <cstdio>
30using namespace clang;
31
32/// PrintMacroDefinition - Print a macro definition in a form that will be
33/// properly accepted back as a definition.
34static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35 Preprocessor &PP, raw_ostream *OS) {
36 *OS << "#define " << II.getName();
37
38 if (MI.isFunctionLike()) {
39 *OS << '(';
40 if (!MI.param_empty()) {
41 MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
42 for (; AI+1 != E; ++AI) {
43 *OS << (*AI)->getName();
44 *OS << ',';
45 }
46
47 // Last argument.
48 if ((*AI)->getName() == "__VA_ARGS__")
49 *OS << "...";
50 else
51 *OS << (*AI)->getName();
52 }
53
54 if (MI.isGNUVarargs())
55 *OS << "..."; // #define foo(x...)
56
57 *OS << ')';
58 }
59
60 // GCC always emits a space, even if the macro body is empty. However, do not
61 // want to emit two spaces if the first token has a leading space.
62 if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
63 *OS << ' ';
64
65 SmallString<128> SpellingBuffer;
66 for (const auto &T : MI.tokens()) {
67 if (T.hasLeadingSpace())
68 *OS << ' ';
69
70 *OS << PP.getSpelling(Tok: T, Buffer&: SpellingBuffer);
71 }
72}
73
74//===----------------------------------------------------------------------===//
75// Preprocessed token printer
76//===----------------------------------------------------------------------===//
77
78namespace {
79class PrintPPOutputPPCallbacks : public PPCallbacks {
80 Preprocessor &PP;
81 SourceManager &SM;
82 TokenConcatenation ConcatInfo;
83public:
84 raw_ostream *OS;
85private:
86 unsigned CurLine;
87
88 bool EmittedTokensOnThisLine;
89 bool EmittedDirectiveOnThisLine;
90 SrcMgr::CharacteristicKind FileType;
91 SmallString<512> CurFilename;
92 bool Initialized;
93 bool DisableLineMarkers;
94 bool DumpDefines;
95 bool DumpIncludeDirectives;
96 bool UseLineDirectives;
97 bool IsFirstFileEntered;
98 bool MinimizeWhitespace;
99 bool DirectivesOnly;
100 bool KeepSystemIncludes;
101 raw_ostream *OrigOS;
102 std::unique_ptr<llvm::raw_null_ostream> NullOS;
103
104 Token PrevTok;
105 Token PrevPrevTok;
106
107public:
108 PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
109 bool defines, bool DumpIncludeDirectives,
110 bool UseLineDirectives, bool MinimizeWhitespace,
111 bool DirectivesOnly, bool KeepSystemIncludes)
112 : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
113 DisableLineMarkers(lineMarkers), DumpDefines(defines),
114 DumpIncludeDirectives(DumpIncludeDirectives),
115 UseLineDirectives(UseLineDirectives),
116 MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
117 KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
118 CurLine = 0;
119 CurFilename += "<uninit>";
120 EmittedTokensOnThisLine = false;
121 EmittedDirectiveOnThisLine = false;
122 FileType = SrcMgr::C_User;
123 Initialized = false;
124 IsFirstFileEntered = false;
125 if (KeepSystemIncludes)
126 NullOS = std::make_unique<llvm::raw_null_ostream>();
127
128 PrevTok.startToken();
129 PrevPrevTok.startToken();
130 }
131
132 bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
133
134 void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
135 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
136
137 void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
138 bool hasEmittedDirectiveOnThisLine() const {
139 return EmittedDirectiveOnThisLine;
140 }
141
142 /// Ensure that the output stream position is at the beginning of a new line
143 /// and inserts one if it does not. It is intended to ensure that directives
144 /// inserted by the directives not from the input source (such as #line) are
145 /// in the first column. To insert newlines that represent the input, use
146 /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true).
147 void startNewLineIfNeeded();
148
149 void FileChanged(SourceLocation Loc, FileChangeReason Reason,
150 SrcMgr::CharacteristicKind FileType,
151 FileID PrevFID) override;
152 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
153 StringRef FileName, bool IsAngled,
154 CharSourceRange FilenameRange,
155 OptionalFileEntryRef File, StringRef SearchPath,
156 StringRef RelativePath, const Module *SuggestedModule,
157 bool ModuleImported,
158 SrcMgr::CharacteristicKind FileType) override;
159 void Ident(SourceLocation Loc, StringRef str) override;
160 void PragmaMessage(SourceLocation Loc, StringRef Namespace,
161 PragmaMessageKind Kind, StringRef Str) override;
162 void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
163 void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
164 void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
165 void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
166 diag::Severity Map, StringRef Str) override;
167 void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec,
168 ArrayRef<int> Ids) override;
169 void PragmaWarningPush(SourceLocation Loc, int Level) override;
170 void PragmaWarningPop(SourceLocation Loc) override;
171 void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override;
172 void PragmaExecCharsetPop(SourceLocation Loc) override;
173 void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
174 void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
175
176 /// Insert whitespace before emitting the next token.
177 ///
178 /// @param Tok Next token to be emitted.
179 /// @param RequireSpace Ensure at least one whitespace is emitted. Useful
180 /// if non-tokens have been emitted to the stream.
181 /// @param RequireSameLine Never emit newlines. Useful when semantics depend
182 /// on being on the same line, such as directives.
183 void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace,
184 bool RequireSameLine);
185
186 /// Move to the line of the provided source location. This will
187 /// return true if a newline was inserted or if
188 /// the requested location is the first token on the first line.
189 /// In these cases the next output will be the first column on the line and
190 /// make it possible to insert indention. The newline was inserted
191 /// implicitly when at the beginning of the file.
192 ///
193 /// @param Tok Token where to move to.
194 /// @param RequireStartOfLine Whether the next line depends on being in the
195 /// first column, such as a directive.
196 ///
197 /// @return Whether column adjustments are necessary.
198 bool MoveToLine(const Token &Tok, bool RequireStartOfLine) {
199 PresumedLoc PLoc = SM.getPresumedLoc(Loc: Tok.getLocation());
200 unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
201 bool IsFirstInFile =
202 Tok.isAtStartOfLine() && PLoc.isValid() && PLoc.getLine() == 1;
203 return MoveToLine(LineNo: TargetLine, RequireStartOfLine) || IsFirstInFile;
204 }
205
206 /// Move to the line of the provided source location. Returns true if a new
207 /// line was inserted.
208 bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) {
209 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
210 unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
211 return MoveToLine(LineNo: TargetLine, RequireStartOfLine);
212 }
213 bool MoveToLine(unsigned LineNo, bool RequireStartOfLine);
214
215 bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
216 const Token &Tok) {
217 return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
218 }
219 void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
220 unsigned ExtraLen=0);
221 bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
222 void HandleNewlinesInToken(const char *TokStr, unsigned Len);
223
224 /// MacroDefined - This hook is called whenever a macro definition is seen.
225 void MacroDefined(const Token &MacroNameTok,
226 const MacroDirective *MD) override;
227
228 /// MacroUndefined - This hook is called whenever a macro #undef is seen.
229 void MacroUndefined(const Token &MacroNameTok,
230 const MacroDefinition &MD,
231 const MacroDirective *Undef) override;
232
233 void BeginModule(const Module *M);
234 void EndModule(const Module *M);
235};
236} // end anonymous namespace
237
238void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
239 const char *Extra,
240 unsigned ExtraLen) {
241 startNewLineIfNeeded();
242
243 // Emit #line directives or GNU line markers depending on what mode we're in.
244 if (UseLineDirectives) {
245 *OS << "#line" << ' ' << LineNo << ' ' << '"';
246 OS->write_escaped(Str: CurFilename);
247 *OS << '"';
248 } else {
249 *OS << '#' << ' ' << LineNo << ' ' << '"';
250 OS->write_escaped(Str: CurFilename);
251 *OS << '"';
252
253 if (ExtraLen)
254 OS->write(Ptr: Extra, Size: ExtraLen);
255
256 if (FileType == SrcMgr::C_System)
257 OS->write(Ptr: " 3", Size: 2);
258 else if (FileType == SrcMgr::C_ExternCSystem)
259 OS->write(Ptr: " 3 4", Size: 4);
260 }
261 *OS << '\n';
262}
263
264/// MoveToLine - Move the output to the source line specified by the location
265/// object. We can do this by emitting some number of \n's, or be emitting a
266/// #line directive. This returns false if already at the specified line, true
267/// if some newlines were emitted.
268bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
269 bool RequireStartOfLine) {
270 // If it is required to start a new line or finish the current, insert
271 // vertical whitespace now and take it into account when moving to the
272 // expected line.
273 bool StartedNewLine = false;
274 if ((RequireStartOfLine && EmittedTokensOnThisLine) ||
275 EmittedDirectiveOnThisLine) {
276 *OS << '\n';
277 StartedNewLine = true;
278 CurLine += 1;
279 EmittedTokensOnThisLine = false;
280 EmittedDirectiveOnThisLine = false;
281 }
282
283 // If this line is "close enough" to the original line, just print newlines,
284 // otherwise print a #line directive.
285 if (CurLine == LineNo) {
286 // Nothing to do if we are already on the correct line.
287 } else if (MinimizeWhitespace && DisableLineMarkers) {
288 // With -E -P -fminimize-whitespace, don't emit anything if not necessary.
289 } else if (!StartedNewLine && LineNo - CurLine == 1) {
290 // Printing a single line has priority over printing a #line directive, even
291 // when minimizing whitespace which otherwise would print #line directives
292 // for every single line.
293 *OS << '\n';
294 StartedNewLine = true;
295 } else if (!DisableLineMarkers) {
296 if (LineNo - CurLine <= 8) {
297 const char *NewLines = "\n\n\n\n\n\n\n\n";
298 OS->write(Ptr: NewLines, Size: LineNo - CurLine);
299 } else {
300 // Emit a #line or line marker.
301 WriteLineInfo(LineNo, Extra: nullptr, ExtraLen: 0);
302 }
303 StartedNewLine = true;
304 } else if (EmittedTokensOnThisLine) {
305 // If we are not on the correct line and don't need to be line-correct,
306 // at least ensure we start on a new line.
307 *OS << '\n';
308 StartedNewLine = true;
309 }
310
311 if (StartedNewLine) {
312 EmittedTokensOnThisLine = false;
313 EmittedDirectiveOnThisLine = false;
314 }
315
316 CurLine = LineNo;
317 return StartedNewLine;
318}
319
320void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
321 if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
322 *OS << '\n';
323 EmittedTokensOnThisLine = false;
324 EmittedDirectiveOnThisLine = false;
325 }
326}
327
328/// FileChanged - Whenever the preprocessor enters or exits a #include file
329/// it invokes this handler. Update our conception of the current source
330/// position.
331void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
332 FileChangeReason Reason,
333 SrcMgr::CharacteristicKind NewFileType,
334 FileID PrevFID) {
335 // Unless we are exiting a #include, make sure to skip ahead to the line the
336 // #include directive was at.
337 SourceManager &SourceMgr = SM;
338
339 PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
340 if (UserLoc.isInvalid())
341 return;
342
343 unsigned NewLine = UserLoc.getLine();
344
345 if (Reason == PPCallbacks::EnterFile) {
346 SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
347 if (IncludeLoc.isValid())
348 MoveToLine(Loc: IncludeLoc, /*RequireStartOfLine=*/false);
349 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
350 // GCC emits the # directive for this directive on the line AFTER the
351 // directive and emits a bunch of spaces that aren't needed. This is because
352 // otherwise we will emit a line marker for THIS line, which requires an
353 // extra blank line after the directive to avoid making all following lines
354 // off by one. We can do better by simply incrementing NewLine here.
355 NewLine += 1;
356 }
357
358 CurLine = NewLine;
359
360 // In KeepSystemIncludes mode, redirect OS as needed.
361 if (KeepSystemIncludes && (isSystem(CK: FileType) != isSystem(CK: NewFileType)))
362 OS = isSystem(CK: FileType) ? OrigOS : NullOS.get();
363
364 CurFilename.clear();
365 CurFilename += UserLoc.getFilename();
366 FileType = NewFileType;
367
368 if (DisableLineMarkers) {
369 if (!MinimizeWhitespace)
370 startNewLineIfNeeded();
371 return;
372 }
373
374 if (!Initialized) {
375 WriteLineInfo(LineNo: CurLine);
376 Initialized = true;
377 }
378
379 // Do not emit an enter marker for the main file (which we expect is the first
380 // entered file). This matches gcc, and improves compatibility with some tools
381 // which track the # line markers as a way to determine when the preprocessed
382 // output is in the context of the main file.
383 if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
384 IsFirstFileEntered = true;
385 return;
386 }
387
388 switch (Reason) {
389 case PPCallbacks::EnterFile:
390 WriteLineInfo(LineNo: CurLine, Extra: " 1", ExtraLen: 2);
391 break;
392 case PPCallbacks::ExitFile:
393 WriteLineInfo(LineNo: CurLine, Extra: " 2", ExtraLen: 2);
394 break;
395 case PPCallbacks::SystemHeaderPragma:
396 case PPCallbacks::RenameFile:
397 WriteLineInfo(LineNo: CurLine);
398 break;
399 }
400}
401
402void PrintPPOutputPPCallbacks::InclusionDirective(
403 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
404 bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
405 StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule,
406 bool ModuleImported, SrcMgr::CharacteristicKind FileType) {
407 // In -dI mode, dump #include directives prior to dumping their content or
408 // interpretation. Similar for -fkeep-system-includes.
409 if (DumpIncludeDirectives || (KeepSystemIncludes && isSystem(CK: FileType))) {
410 MoveToLine(Loc: HashLoc, /*RequireStartOfLine=*/true);
411 const std::string TokenText = PP.getSpelling(Tok: IncludeTok);
412 assert(!TokenText.empty());
413 *OS << "#" << TokenText << " "
414 << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
415 << " /* clang -E "
416 << (DumpIncludeDirectives ? "-dI" : "-fkeep-system-includes")
417 << " */";
418 setEmittedDirectiveOnThisLine();
419 }
420
421 // When preprocessing, turn implicit imports into module import pragmas.
422 if (ModuleImported) {
423 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
424 case tok::pp_include:
425 case tok::pp_import:
426 case tok::pp_include_next:
427 MoveToLine(Loc: HashLoc, /*RequireStartOfLine=*/true);
428 *OS << "#pragma clang module import "
429 << SuggestedModule->getFullModuleName(AllowStringLiterals: true)
430 << " /* clang -E: implicit import for "
431 << "#" << PP.getSpelling(Tok: IncludeTok) << " "
432 << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
433 << " */";
434 setEmittedDirectiveOnThisLine();
435 break;
436
437 case tok::pp___include_macros:
438 // #__include_macros has no effect on a user of a preprocessed source
439 // file; the only effect is on preprocessing.
440 //
441 // FIXME: That's not *quite* true: it causes the module in question to
442 // be loaded, which can affect downstream diagnostics.
443 break;
444
445 default:
446 llvm_unreachable("unknown include directive kind");
447 break;
448 }
449 }
450}
451
452/// Handle entering the scope of a module during a module compilation.
453void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
454 startNewLineIfNeeded();
455 *OS << "#pragma clang module begin " << M->getFullModuleName(AllowStringLiterals: true);
456 setEmittedDirectiveOnThisLine();
457}
458
459/// Handle leaving the scope of a module during a module compilation.
460void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
461 startNewLineIfNeeded();
462 *OS << "#pragma clang module end /*" << M->getFullModuleName(AllowStringLiterals: true) << "*/";
463 setEmittedDirectiveOnThisLine();
464}
465
466/// Ident - Handle #ident directives when read by the preprocessor.
467///
468void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
469 MoveToLine(Loc, /*RequireStartOfLine=*/true);
470
471 OS->write(Ptr: "#ident ", Size: strlen(s: "#ident "));
472 OS->write(Ptr: S.begin(), Size: S.size());
473 setEmittedTokensOnThisLine();
474}
475
476/// MacroDefined - This hook is called whenever a macro definition is seen.
477void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
478 const MacroDirective *MD) {
479 const MacroInfo *MI = MD->getMacroInfo();
480 // Print out macro definitions in -dD mode and when we have -fdirectives-only
481 // for C++20 header units.
482 if ((!DumpDefines && !DirectivesOnly) ||
483 // Ignore __FILE__ etc.
484 MI->isBuiltinMacro())
485 return;
486
487 SourceLocation DefLoc = MI->getDefinitionLoc();
488 if (DirectivesOnly && !MI->isUsed()) {
489 SourceManager &SM = PP.getSourceManager();
490 if (SM.isWrittenInBuiltinFile(Loc: DefLoc) ||
491 SM.isWrittenInCommandLineFile(Loc: DefLoc))
492 return;
493 }
494 MoveToLine(Loc: DefLoc, /*RequireStartOfLine=*/true);
495 PrintMacroDefinition(II: *MacroNameTok.getIdentifierInfo(), MI: *MI, PP, OS);
496 setEmittedDirectiveOnThisLine();
497}
498
499void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
500 const MacroDefinition &MD,
501 const MacroDirective *Undef) {
502 // Print out macro definitions in -dD mode and when we have -fdirectives-only
503 // for C++20 header units.
504 if (!DumpDefines && !DirectivesOnly)
505 return;
506
507 MoveToLine(Loc: MacroNameTok.getLocation(), /*RequireStartOfLine=*/true);
508 *OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
509 setEmittedDirectiveOnThisLine();
510}
511
512static void outputPrintable(raw_ostream *OS, StringRef Str) {
513 for (unsigned char Char : Str) {
514 if (isPrintable(c: Char) && Char != '\\' && Char != '"')
515 *OS << (char)Char;
516 else // Output anything hard as an octal escape.
517 *OS << '\\'
518 << (char)('0' + ((Char >> 6) & 7))
519 << (char)('0' + ((Char >> 3) & 7))
520 << (char)('0' + ((Char >> 0) & 7));
521 }
522}
523
524void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
525 StringRef Namespace,
526 PragmaMessageKind Kind,
527 StringRef Str) {
528 MoveToLine(Loc, /*RequireStartOfLine=*/true);
529 *OS << "#pragma ";
530 if (!Namespace.empty())
531 *OS << Namespace << ' ';
532 switch (Kind) {
533 case PMK_Message:
534 *OS << "message(\"";
535 break;
536 case PMK_Warning:
537 *OS << "warning \"";
538 break;
539 case PMK_Error:
540 *OS << "error \"";
541 break;
542 }
543
544 outputPrintable(OS, Str);
545 *OS << '"';
546 if (Kind == PMK_Message)
547 *OS << ')';
548 setEmittedDirectiveOnThisLine();
549}
550
551void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
552 StringRef DebugType) {
553 MoveToLine(Loc, /*RequireStartOfLine=*/true);
554
555 *OS << "#pragma clang __debug ";
556 *OS << DebugType;
557
558 setEmittedDirectiveOnThisLine();
559}
560
561void PrintPPOutputPPCallbacks::
562PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
563 MoveToLine(Loc, /*RequireStartOfLine=*/true);
564 *OS << "#pragma " << Namespace << " diagnostic push";
565 setEmittedDirectiveOnThisLine();
566}
567
568void PrintPPOutputPPCallbacks::
569PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
570 MoveToLine(Loc, /*RequireStartOfLine=*/true);
571 *OS << "#pragma " << Namespace << " diagnostic pop";
572 setEmittedDirectiveOnThisLine();
573}
574
575void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
576 StringRef Namespace,
577 diag::Severity Map,
578 StringRef Str) {
579 MoveToLine(Loc, /*RequireStartOfLine=*/true);
580 *OS << "#pragma " << Namespace << " diagnostic ";
581 switch (Map) {
582 case diag::Severity::Remark:
583 *OS << "remark";
584 break;
585 case diag::Severity::Warning:
586 *OS << "warning";
587 break;
588 case diag::Severity::Error:
589 *OS << "error";
590 break;
591 case diag::Severity::Ignored:
592 *OS << "ignored";
593 break;
594 case diag::Severity::Fatal:
595 *OS << "fatal";
596 break;
597 }
598 *OS << " \"" << Str << '"';
599 setEmittedDirectiveOnThisLine();
600}
601
602void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
603 PragmaWarningSpecifier WarningSpec,
604 ArrayRef<int> Ids) {
605 MoveToLine(Loc, /*RequireStartOfLine=*/true);
606
607 *OS << "#pragma warning(";
608 switch(WarningSpec) {
609 case PWS_Default: *OS << "default"; break;
610 case PWS_Disable: *OS << "disable"; break;
611 case PWS_Error: *OS << "error"; break;
612 case PWS_Once: *OS << "once"; break;
613 case PWS_Suppress: *OS << "suppress"; break;
614 case PWS_Level1: *OS << '1'; break;
615 case PWS_Level2: *OS << '2'; break;
616 case PWS_Level3: *OS << '3'; break;
617 case PWS_Level4: *OS << '4'; break;
618 }
619 *OS << ':';
620
621 for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
622 *OS << ' ' << *I;
623 *OS << ')';
624 setEmittedDirectiveOnThisLine();
625}
626
627void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
628 int Level) {
629 MoveToLine(Loc, /*RequireStartOfLine=*/true);
630 *OS << "#pragma warning(push";
631 if (Level >= 0)
632 *OS << ", " << Level;
633 *OS << ')';
634 setEmittedDirectiveOnThisLine();
635}
636
637void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
638 MoveToLine(Loc, /*RequireStartOfLine=*/true);
639 *OS << "#pragma warning(pop)";
640 setEmittedDirectiveOnThisLine();
641}
642
643void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
644 StringRef Str) {
645 MoveToLine(Loc, /*RequireStartOfLine=*/true);
646 *OS << "#pragma character_execution_set(push";
647 if (!Str.empty())
648 *OS << ", " << Str;
649 *OS << ')';
650 setEmittedDirectiveOnThisLine();
651}
652
653void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
654 MoveToLine(Loc, /*RequireStartOfLine=*/true);
655 *OS << "#pragma character_execution_set(pop)";
656 setEmittedDirectiveOnThisLine();
657}
658
659void PrintPPOutputPPCallbacks::
660PragmaAssumeNonNullBegin(SourceLocation Loc) {
661 MoveToLine(Loc, /*RequireStartOfLine=*/true);
662 *OS << "#pragma clang assume_nonnull begin";
663 setEmittedDirectiveOnThisLine();
664}
665
666void PrintPPOutputPPCallbacks::
667PragmaAssumeNonNullEnd(SourceLocation Loc) {
668 MoveToLine(Loc, /*RequireStartOfLine=*/true);
669 *OS << "#pragma clang assume_nonnull end";
670 setEmittedDirectiveOnThisLine();
671}
672
673void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
674 bool RequireSpace,
675 bool RequireSameLine) {
676 // These tokens are not expanded to anything and don't need whitespace before
677 // them.
678 if (Tok.is(K: tok::eof) ||
679 (Tok.isAnnotation() && !Tok.is(K: tok::annot_header_unit) &&
680 !Tok.is(K: tok::annot_module_begin) && !Tok.is(K: tok::annot_module_end) &&
681 !Tok.is(K: tok::annot_repl_input_end)))
682 return;
683
684 // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
685 if ((!RequireSameLine || EmittedDirectiveOnThisLine) &&
686 MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) {
687 if (MinimizeWhitespace) {
688 // Avoid interpreting hash as a directive under -fpreprocessed.
689 if (Tok.is(K: tok::hash))
690 *OS << ' ';
691 } else {
692 // Print out space characters so that the first token on a line is
693 // indented for easy reading.
694 unsigned ColNo = SM.getExpansionColumnNumber(Loc: Tok.getLocation());
695
696 // The first token on a line can have a column number of 1, yet still
697 // expect leading white space, if a macro expansion in column 1 starts
698 // with an empty macro argument, or an empty nested macro expansion. In
699 // this case, move the token to column 2.
700 if (ColNo == 1 && Tok.hasLeadingSpace())
701 ColNo = 2;
702
703 // This hack prevents stuff like:
704 // #define HASH #
705 // HASH define foo bar
706 // From having the # character end up at column 1, which makes it so it
707 // is not handled as a #define next time through the preprocessor if in
708 // -fpreprocessed mode.
709 if (ColNo <= 1 && Tok.is(K: tok::hash))
710 *OS << ' ';
711
712 // Otherwise, indent the appropriate number of spaces.
713 for (; ColNo > 1; --ColNo)
714 *OS << ' ';
715 }
716 } else {
717 // Insert whitespace between the previous and next token if either
718 // - The caller requires it
719 // - The input had whitespace between them and we are not in
720 // whitespace-minimization mode
721 // - The whitespace is necessary to keep the tokens apart and there is not
722 // already a newline between them
723 if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) ||
724 ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) &&
725 AvoidConcat(PrevPrevTok, PrevTok, Tok)))
726 *OS << ' ';
727 }
728
729 PrevPrevTok = PrevTok;
730 PrevTok = Tok;
731}
732
733void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
734 unsigned Len) {
735 unsigned NumNewlines = 0;
736 for (; Len; --Len, ++TokStr) {
737 if (*TokStr != '\n' &&
738 *TokStr != '\r')
739 continue;
740
741 ++NumNewlines;
742
743 // If we have \n\r or \r\n, skip both and count as one line.
744 if (Len != 1 &&
745 (TokStr[1] == '\n' || TokStr[1] == '\r') &&
746 TokStr[0] != TokStr[1]) {
747 ++TokStr;
748 --Len;
749 }
750 }
751
752 if (NumNewlines == 0) return;
753
754 CurLine += NumNewlines;
755}
756
757
758namespace {
759struct UnknownPragmaHandler : public PragmaHandler {
760 const char *Prefix;
761 PrintPPOutputPPCallbacks *Callbacks;
762
763 // Set to true if tokens should be expanded
764 bool ShouldExpandTokens;
765
766 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
767 bool RequireTokenExpansion)
768 : Prefix(prefix), Callbacks(callbacks),
769 ShouldExpandTokens(RequireTokenExpansion) {}
770 void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
771 Token &PragmaTok) override {
772 // Figure out what line we went to and insert the appropriate number of
773 // newline characters.
774 Callbacks->MoveToLine(Loc: PragmaTok.getLocation(), /*RequireStartOfLine=*/true);
775 Callbacks->OS->write(Ptr: Prefix, Size: strlen(s: Prefix));
776 Callbacks->setEmittedTokensOnThisLine();
777
778 if (ShouldExpandTokens) {
779 // The first token does not have expanded macros. Expand them, if
780 // required.
781 auto Toks = std::make_unique<Token[]>(num: 1);
782 Toks[0] = PragmaTok;
783 PP.EnterTokenStream(Toks: std::move(Toks), /*NumToks=*/1,
784 /*DisableMacroExpansion=*/false,
785 /*IsReinject=*/false);
786 PP.Lex(Result&: PragmaTok);
787 }
788
789 // Read and print all of the pragma tokens.
790 bool IsFirst = true;
791 while (PragmaTok.isNot(K: tok::eod)) {
792 Callbacks->HandleWhitespaceBeforeTok(Tok: PragmaTok, /*RequireSpace=*/IsFirst,
793 /*RequireSameLine=*/true);
794 IsFirst = false;
795 std::string TokSpell = PP.getSpelling(Tok: PragmaTok);
796 Callbacks->OS->write(Ptr: &TokSpell[0], Size: TokSpell.size());
797 Callbacks->setEmittedTokensOnThisLine();
798
799 if (ShouldExpandTokens)
800 PP.Lex(Result&: PragmaTok);
801 else
802 PP.LexUnexpandedToken(Result&: PragmaTok);
803 }
804 Callbacks->setEmittedDirectiveOnThisLine();
805 }
806};
807} // end anonymous namespace
808
809
810static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
811 PrintPPOutputPPCallbacks *Callbacks) {
812 bool DropComments = PP.getLangOpts().TraditionalCPP &&
813 !PP.getCommentRetentionState();
814
815 bool IsStartOfLine = false;
816 char Buffer[256];
817 while (true) {
818 // Two lines joined with line continuation ('\' as last character on the
819 // line) must be emitted as one line even though Tok.getLine() returns two
820 // different values. In this situation Tok.isAtStartOfLine() is false even
821 // though it may be the first token on the lexical line. When
822 // dropping/skipping a token that is at the start of a line, propagate the
823 // start-of-line-ness to the next token to not append it to the previous
824 // line.
825 IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine();
826
827 Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false,
828 /*RequireSameLine=*/!IsStartOfLine);
829
830 if (DropComments && Tok.is(K: tok::comment)) {
831 // Skip comments. Normally the preprocessor does not generate
832 // tok::comment nodes at all when not keeping comments, but under
833 // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
834 PP.Lex(Result&: Tok);
835 continue;
836 } else if (Tok.is(K: tok::annot_repl_input_end)) {
837 PP.Lex(Result&: Tok);
838 continue;
839 } else if (Tok.is(K: tok::eod)) {
840 // Don't print end of directive tokens, since they are typically newlines
841 // that mess up our line tracking. These come from unknown pre-processor
842 // directives or hash-prefixed comments in standalone assembly files.
843 PP.Lex(Result&: Tok);
844 // FIXME: The token on the next line after #include should have
845 // Tok.isAtStartOfLine() set.
846 IsStartOfLine = true;
847 continue;
848 } else if (Tok.is(K: tok::annot_module_include)) {
849 // PrintPPOutputPPCallbacks::InclusionDirective handles producing
850 // appropriate output here. Ignore this token entirely.
851 PP.Lex(Result&: Tok);
852 IsStartOfLine = true;
853 continue;
854 } else if (Tok.is(K: tok::annot_module_begin)) {
855 // FIXME: We retrieve this token after the FileChanged callback, and
856 // retrieve the module_end token before the FileChanged callback, so
857 // we render this within the file and render the module end outside the
858 // file, but this is backwards from the token locations: the module_begin
859 // token is at the include location (outside the file) and the module_end
860 // token is at the EOF location (within the file).
861 Callbacks->BeginModule(
862 M: reinterpret_cast<Module *>(Tok.getAnnotationValue()));
863 PP.Lex(Result&: Tok);
864 IsStartOfLine = true;
865 continue;
866 } else if (Tok.is(K: tok::annot_module_end)) {
867 Callbacks->EndModule(
868 M: reinterpret_cast<Module *>(Tok.getAnnotationValue()));
869 PP.Lex(Result&: Tok);
870 IsStartOfLine = true;
871 continue;
872 } else if (Tok.is(K: tok::annot_header_unit)) {
873 // This is a header-name that has been (effectively) converted into a
874 // module-name.
875 // FIXME: The module name could contain non-identifier module name
876 // components. We don't have a good way to round-trip those.
877 Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
878 std::string Name = M->getFullModuleName();
879 Callbacks->OS->write(Ptr: Name.data(), Size: Name.size());
880 Callbacks->HandleNewlinesInToken(TokStr: Name.data(), Len: Name.size());
881 } else if (Tok.isAnnotation()) {
882 // Ignore annotation tokens created by pragmas - the pragmas themselves
883 // will be reproduced in the preprocessed output.
884 PP.Lex(Result&: Tok);
885 continue;
886 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
887 *Callbacks->OS << II->getName();
888 } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
889 Tok.getLiteralData()) {
890 Callbacks->OS->write(Ptr: Tok.getLiteralData(), Size: Tok.getLength());
891 } else if (Tok.getLength() < std::size(Buffer)) {
892 const char *TokPtr = Buffer;
893 unsigned Len = PP.getSpelling(Tok, Buffer&: TokPtr);
894 Callbacks->OS->write(Ptr: TokPtr, Size: Len);
895
896 // Tokens that can contain embedded newlines need to adjust our current
897 // line number.
898 // FIXME: The token may end with a newline in which case
899 // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is
900 // wrong.
901 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
902 Callbacks->HandleNewlinesInToken(TokStr: TokPtr, Len);
903 if (Tok.is(K: tok::comment) && Len >= 2 && TokPtr[0] == '/' &&
904 TokPtr[1] == '/') {
905 // It's a line comment;
906 // Ensure that we don't concatenate anything behind it.
907 Callbacks->setEmittedDirectiveOnThisLine();
908 }
909 } else {
910 std::string S = PP.getSpelling(Tok);
911 Callbacks->OS->write(Ptr: S.data(), Size: S.size());
912
913 // Tokens that can contain embedded newlines need to adjust our current
914 // line number.
915 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
916 Callbacks->HandleNewlinesInToken(TokStr: S.data(), Len: S.size());
917 if (Tok.is(K: tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') {
918 // It's a line comment;
919 // Ensure that we don't concatenate anything behind it.
920 Callbacks->setEmittedDirectiveOnThisLine();
921 }
922 }
923 Callbacks->setEmittedTokensOnThisLine();
924 IsStartOfLine = false;
925
926 if (Tok.is(K: tok::eof)) break;
927
928 PP.Lex(Result&: Tok);
929 }
930}
931
932typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
933static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
934 return LHS->first->getName().compare(RHS: RHS->first->getName());
935}
936
937static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
938 // Ignore unknown pragmas.
939 PP.IgnorePragmas();
940
941 // -dM mode just scans and ignores all tokens in the files, then dumps out
942 // the macro table at the end.
943 PP.EnterMainSourceFile();
944
945 Token Tok;
946 do PP.Lex(Result&: Tok);
947 while (Tok.isNot(K: tok::eof));
948
949 SmallVector<id_macro_pair, 128> MacrosByID;
950 for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
951 I != E; ++I) {
952 auto *MD = I->second.getLatest();
953 if (MD && MD->isDefined())
954 MacrosByID.push_back(Elt: id_macro_pair(I->first, MD->getMacroInfo()));
955 }
956 llvm::array_pod_sort(Start: MacrosByID.begin(), End: MacrosByID.end(), Compare: MacroIDCompare);
957
958 for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
959 MacroInfo &MI = *MacrosByID[i].second;
960 // Ignore computed macros like __LINE__ and friends.
961 if (MI.isBuiltinMacro()) continue;
962
963 PrintMacroDefinition(II: *MacrosByID[i].first, MI, PP, OS);
964 *OS << '\n';
965 }
966}
967
968/// DoPrintPreprocessedInput - This implements -E mode.
969///
970void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
971 const PreprocessorOutputOptions &Opts) {
972 // Show macros with no output is handled specially.
973 if (!Opts.ShowCPP) {
974 assert(Opts.ShowMacros && "Not yet implemented!");
975 DoPrintMacros(PP, OS);
976 return;
977 }
978
979 // Inform the preprocessor whether we want it to retain comments or not, due
980 // to -C or -CC.
981 PP.SetCommentRetentionState(KeepComments: Opts.ShowComments, KeepMacroComments: Opts.ShowMacroComments);
982
983 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
984 PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
985 Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
986 Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
987
988 // Expand macros in pragmas with -fms-extensions. The assumption is that
989 // the majority of pragmas in such a file will be Microsoft pragmas.
990 // Remember the handlers we will add so that we can remove them later.
991 std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
992 new UnknownPragmaHandler(
993 "#pragma", Callbacks,
994 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
995
996 std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
997 "#pragma GCC", Callbacks,
998 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
999
1000 std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
1001 "#pragma clang", Callbacks,
1002 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
1003
1004 PP.AddPragmaHandler(Handler: MicrosoftExtHandler.get());
1005 PP.AddPragmaHandler(Namespace: "GCC", Handler: GCCHandler.get());
1006 PP.AddPragmaHandler(Namespace: "clang", Handler: ClangHandler.get());
1007
1008 // The tokens after pragma omp need to be expanded.
1009 //
1010 // OpenMP [2.1, Directive format]
1011 // Preprocessing tokens following the #pragma omp are subject to macro
1012 // replacement.
1013 std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
1014 new UnknownPragmaHandler("#pragma omp", Callbacks,
1015 /*RequireTokenExpansion=*/true));
1016 PP.AddPragmaHandler(Namespace: "omp", Handler: OpenMPHandler.get());
1017
1018 PP.addPPCallbacks(C: std::unique_ptr<PPCallbacks>(Callbacks));
1019
1020 // After we have configured the preprocessor, enter the main file.
1021 PP.EnterMainSourceFile();
1022 if (Opts.DirectivesOnly)
1023 PP.SetMacroExpansionOnlyInDirectives();
1024
1025 // Consume all of the tokens that come from the predefines buffer. Those
1026 // should not be emitted into the output and are guaranteed to be at the
1027 // start.
1028 const SourceManager &SourceMgr = PP.getSourceManager();
1029 Token Tok;
1030 do {
1031 PP.Lex(Result&: Tok);
1032 if (Tok.is(K: tok::eof) || !Tok.getLocation().isFileID())
1033 break;
1034
1035 PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc: Tok.getLocation());
1036 if (PLoc.isInvalid())
1037 break;
1038
1039 if (strcmp(s1: PLoc.getFilename(), s2: "<built-in>"))
1040 break;
1041 } while (true);
1042
1043 // Read all the preprocessed tokens, printing them out to the stream.
1044 PrintPreprocessedTokens(PP, Tok, Callbacks);
1045 *OS << '\n';
1046
1047 // Remove the handlers we just added to leave the preprocessor in a sane state
1048 // so that it can be reused (for example by a clang::Parser instance).
1049 PP.RemovePragmaHandler(Handler: MicrosoftExtHandler.get());
1050 PP.RemovePragmaHandler(Namespace: "GCC", Handler: GCCHandler.get());
1051 PP.RemovePragmaHandler(Namespace: "clang", Handler: ClangHandler.get());
1052 PP.RemovePragmaHandler(Namespace: "omp", Handler: OpenMPHandler.get());
1053}
1054

source code of clang/lib/Frontend/PrintPreprocessedOutput.cpp