1 | //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Analysis/MacroExpansionContext.h" |
10 | #include "llvm/Support/Debug.h" |
11 | #include <optional> |
12 | |
13 | #define DEBUG_TYPE "macro-expansion-context" |
14 | |
15 | static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS, |
16 | clang::Token Tok); |
17 | |
18 | namespace clang { |
19 | namespace detail { |
20 | class MacroExpansionRangeRecorder : public PPCallbacks { |
21 | const Preprocessor &PP; |
22 | SourceManager &SM; |
23 | MacroExpansionContext::ExpansionRangeMap &ExpansionRanges; |
24 | |
25 | public: |
26 | explicit MacroExpansionRangeRecorder( |
27 | const Preprocessor &PP, SourceManager &SM, |
28 | MacroExpansionContext::ExpansionRangeMap &ExpansionRanges) |
29 | : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {} |
30 | |
31 | void MacroExpands(const Token &MacroName, const MacroDefinition &MD, |
32 | SourceRange Range, const MacroArgs *Args) override { |
33 | // Ignore annotation tokens like: _Pragma("pack(push, 1)") |
34 | if (MacroName.getIdentifierInfo()->getName() == "_Pragma" ) |
35 | return; |
36 | |
37 | SourceLocation MacroNameBegin = SM.getExpansionLoc(Loc: MacroName.getLocation()); |
38 | assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin())); |
39 | |
40 | const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] { |
41 | // If the range is empty, use the length of the macro. |
42 | if (Range.getBegin() == Range.getEnd()) |
43 | return SM.getExpansionLoc( |
44 | Loc: MacroName.getLocation().getLocWithOffset(Offset: MacroName.getLength())); |
45 | |
46 | // Include the last character. |
47 | return SM.getExpansionLoc(Loc: Range.getEnd()).getLocWithOffset(Offset: 1); |
48 | }(); |
49 | |
50 | (void)PP; |
51 | LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '" ; |
52 | dumpTokenInto(PP, llvm::dbgs(), MacroName); |
53 | llvm::dbgs() |
54 | << "' with length " << MacroName.getLength() << " at " ; |
55 | MacroNameBegin.print(llvm::dbgs(), SM); |
56 | llvm::dbgs() << ", expansion end at " ; |
57 | ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';); |
58 | |
59 | // If the expansion range is empty, use the identifier of the macro as a |
60 | // range. |
61 | MacroExpansionContext::ExpansionRangeMap::iterator It; |
62 | bool Inserted; |
63 | std::tie(args&: It, args&: Inserted) = |
64 | ExpansionRanges.try_emplace(Key: MacroNameBegin, Args: ExpansionEnd); |
65 | if (Inserted) { |
66 | LLVM_DEBUG(llvm::dbgs() << "maps " ; |
67 | It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to " ; |
68 | It->getSecond().print(llvm::dbgs(), SM); |
69 | llvm::dbgs() << '\n';); |
70 | } else { |
71 | if (SM.isBeforeInTranslationUnit(LHS: It->getSecond(), RHS: ExpansionEnd)) { |
72 | It->getSecond() = ExpansionEnd; |
73 | LLVM_DEBUG( |
74 | llvm::dbgs() << "remaps " ; It->getFirst().print(llvm::dbgs(), SM); |
75 | llvm::dbgs() << " to " ; It->getSecond().print(llvm::dbgs(), SM); |
76 | llvm::dbgs() << '\n';); |
77 | } |
78 | } |
79 | } |
80 | }; |
81 | } // namespace detail |
82 | } // namespace clang |
83 | |
84 | using namespace clang; |
85 | |
86 | MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts) |
87 | : LangOpts(LangOpts) {} |
88 | |
89 | void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) { |
90 | PP = &NewPP; |
91 | SM = &NewPP.getSourceManager(); |
92 | |
93 | // Make sure that the Preprocessor does not outlive the MacroExpansionContext. |
94 | PP->addPPCallbacks(C: std::make_unique<detail::MacroExpansionRangeRecorder>( |
95 | args&: *PP, args&: *SM, args&: ExpansionRanges)); |
96 | // Same applies here. |
97 | PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); }); |
98 | } |
99 | |
100 | std::optional<StringRef> |
101 | MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { |
102 | if (MacroExpansionLoc.isMacroID()) |
103 | return std::nullopt; |
104 | |
105 | // If there was no macro expansion at that location, return std::nullopt. |
106 | if (ExpansionRanges.find_as(Val: MacroExpansionLoc) == ExpansionRanges.end()) |
107 | return std::nullopt; |
108 | |
109 | // There was macro expansion, but resulted in no tokens, return empty string. |
110 | const auto It = ExpandedTokens.find_as(Val: MacroExpansionLoc); |
111 | if (It == ExpandedTokens.end()) |
112 | return StringRef{"" }; |
113 | |
114 | // Otherwise we have the actual token sequence as string. |
115 | return It->getSecond().str(); |
116 | } |
117 | |
118 | std::optional<StringRef> |
119 | MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { |
120 | if (MacroExpansionLoc.isMacroID()) |
121 | return std::nullopt; |
122 | |
123 | const auto It = ExpansionRanges.find_as(Val: MacroExpansionLoc); |
124 | if (It == ExpansionRanges.end()) |
125 | return std::nullopt; |
126 | |
127 | assert(It->getFirst() != It->getSecond() && |
128 | "Every macro expansion must cover a non-empty range." ); |
129 | |
130 | return Lexer::getSourceText( |
131 | Range: CharSourceRange::getCharRange(B: It->getFirst(), E: It->getSecond()), SM: *SM, |
132 | LangOpts); |
133 | } |
134 | |
135 | void MacroExpansionContext::dumpExpansionRanges() const { |
136 | dumpExpansionRangesToStream(OS&: llvm::dbgs()); |
137 | } |
138 | void MacroExpansionContext::dumpExpandedTexts() const { |
139 | dumpExpandedTextsToStream(OS&: llvm::dbgs()); |
140 | } |
141 | |
142 | void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const { |
143 | std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges; |
144 | LocalExpansionRanges.reserve(n: ExpansionRanges.size()); |
145 | for (const auto &Record : ExpansionRanges) |
146 | LocalExpansionRanges.emplace_back( |
147 | args: std::make_pair(x: Record.getFirst(), y: Record.getSecond())); |
148 | llvm::sort(C&: LocalExpansionRanges); |
149 | |
150 | OS << "\n=============== ExpansionRanges ===============\n" ; |
151 | for (const auto &Record : LocalExpansionRanges) { |
152 | OS << "> " ; |
153 | Record.first.print(OS, SM: *SM); |
154 | OS << ", " ; |
155 | Record.second.print(OS, SM: *SM); |
156 | OS << '\n'; |
157 | } |
158 | } |
159 | |
160 | void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const { |
161 | std::vector<std::pair<SourceLocation, MacroExpansionText>> |
162 | LocalExpandedTokens; |
163 | LocalExpandedTokens.reserve(n: ExpandedTokens.size()); |
164 | for (const auto &Record : ExpandedTokens) |
165 | LocalExpandedTokens.emplace_back( |
166 | args: std::make_pair(x: Record.getFirst(), y: Record.getSecond())); |
167 | llvm::sort(C&: LocalExpandedTokens); |
168 | |
169 | OS << "\n=============== ExpandedTokens ===============\n" ; |
170 | for (const auto &Record : LocalExpandedTokens) { |
171 | OS << "> " ; |
172 | Record.first.print(OS, SM: *SM); |
173 | OS << " -> '" << Record.second << "'\n" ; |
174 | } |
175 | } |
176 | |
177 | static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) { |
178 | assert(Tok.isNot(tok::raw_identifier)); |
179 | |
180 | // Ignore annotation tokens like: _Pragma("pack(push, 1)") |
181 | if (Tok.isAnnotation()) |
182 | return; |
183 | |
184 | if (IdentifierInfo *II = Tok.getIdentifierInfo()) { |
185 | // FIXME: For now, we don't respect whitespaces between macro expanded |
186 | // tokens. We just emit a space after every identifier to produce a valid |
187 | // code for `int a ;` like expansions. |
188 | // ^-^-- Space after the 'int' and 'a' identifiers. |
189 | OS << II->getName() << ' '; |
190 | } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { |
191 | OS << StringRef(Tok.getLiteralData(), Tok.getLength()); |
192 | } else { |
193 | char Tmp[256]; |
194 | if (Tok.getLength() < sizeof(Tmp)) { |
195 | const char *TokPtr = Tmp; |
196 | // FIXME: Might use a different overload for cleaner callsite. |
197 | unsigned Len = PP.getSpelling(Tok, Buffer&: TokPtr); |
198 | OS.write(Ptr: TokPtr, Size: Len); |
199 | } else { |
200 | OS << "<too long token>" ; |
201 | } |
202 | } |
203 | } |
204 | |
205 | void MacroExpansionContext::onTokenLexed(const Token &Tok) { |
206 | SourceLocation SLoc = Tok.getLocation(); |
207 | if (SLoc.isFileID()) |
208 | return; |
209 | |
210 | LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '" ; |
211 | dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at " ; |
212 | SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';); |
213 | |
214 | // Remove spelling location. |
215 | SourceLocation CurrExpansionLoc = SM->getExpansionLoc(Loc: SLoc); |
216 | |
217 | MacroExpansionText TokenAsString; |
218 | llvm::raw_svector_ostream OS(TokenAsString); |
219 | |
220 | // FIXME: Prepend newlines and space to produce the exact same output as the |
221 | // preprocessor would for this token. |
222 | |
223 | dumpTokenInto(PP: *PP, OS, Tok); |
224 | |
225 | ExpansionMap::iterator It; |
226 | bool Inserted; |
227 | std::tie(args&: It, args&: Inserted) = |
228 | ExpandedTokens.try_emplace(Key: CurrExpansionLoc, Args: std::move(TokenAsString)); |
229 | if (!Inserted) |
230 | It->getSecond().append(RHS: TokenAsString); |
231 | } |
232 | |
233 | |