1 | //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file contains the implementation of MacroExpander, which handles macro |
11 | /// configuration and expansion while formatting. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "Macros.h" |
16 | |
17 | #include "Encoding.h" |
18 | #include "FormatToken.h" |
19 | #include "FormatTokenLexer.h" |
20 | #include "clang/Basic/TokenKinds.h" |
21 | #include "clang/Format/Format.h" |
22 | #include "clang/Lex/HeaderSearch.h" |
23 | #include "clang/Lex/HeaderSearchOptions.h" |
24 | #include "clang/Lex/Lexer.h" |
25 | #include "clang/Lex/ModuleLoader.h" |
26 | #include "clang/Lex/Preprocessor.h" |
27 | #include "clang/Lex/PreprocessorOptions.h" |
28 | #include "llvm/ADT/StringSet.h" |
29 | #include "llvm/Support/ErrorHandling.h" |
30 | |
31 | namespace clang { |
32 | namespace format { |
33 | |
34 | struct MacroExpander::Definition { |
35 | StringRef Name; |
36 | SmallVector<FormatToken *, 8> Params; |
37 | SmallVector<FormatToken *, 8> Body; |
38 | |
39 | // Map from each argument's name to its position in the argument list. |
40 | // With "M(x, y) x + y": |
41 | // x -> 0 |
42 | // y -> 1 |
43 | llvm::StringMap<size_t> ArgMap; |
44 | |
45 | bool ObjectLike = true; |
46 | }; |
47 | |
48 | class MacroExpander::DefinitionParser { |
49 | public: |
50 | DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) { |
51 | assert(!Tokens.empty()); |
52 | Current = Tokens[0]; |
53 | } |
54 | |
55 | // Parse the token stream and return the corresponding Definition object. |
56 | // Returns an empty definition object with a null-Name on error. |
57 | MacroExpander::Definition parse() { |
58 | if (Current->isNot(Kind: tok::identifier)) |
59 | return {}; |
60 | Def.Name = Current->TokenText; |
61 | nextToken(); |
62 | if (Current->is(Kind: tok::l_paren)) { |
63 | Def.ObjectLike = false; |
64 | if (!parseParams()) |
65 | return {}; |
66 | } |
67 | if (!parseExpansion()) |
68 | return {}; |
69 | |
70 | return Def; |
71 | } |
72 | |
73 | private: |
74 | bool parseParams() { |
75 | assert(Current->is(tok::l_paren)); |
76 | nextToken(); |
77 | while (Current->is(Kind: tok::identifier)) { |
78 | Def.Params.push_back(Elt: Current); |
79 | Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1; |
80 | nextToken(); |
81 | if (Current->isNot(Kind: tok::comma)) |
82 | break; |
83 | nextToken(); |
84 | } |
85 | if (Current->isNot(Kind: tok::r_paren)) |
86 | return false; |
87 | nextToken(); |
88 | return true; |
89 | } |
90 | |
91 | bool parseExpansion() { |
92 | if (!Current->isOneOf(K1: tok::equal, K2: tok::eof)) |
93 | return false; |
94 | if (Current->is(Kind: tok::equal)) |
95 | nextToken(); |
96 | parseTail(); |
97 | return true; |
98 | } |
99 | |
100 | void parseTail() { |
101 | while (Current->isNot(Kind: tok::eof)) { |
102 | Def.Body.push_back(Elt: Current); |
103 | nextToken(); |
104 | } |
105 | Def.Body.push_back(Elt: Current); |
106 | } |
107 | |
108 | void nextToken() { |
109 | if (Pos + 1 < Tokens.size()) |
110 | ++Pos; |
111 | Current = Tokens[Pos]; |
112 | Current->Finalized = true; |
113 | } |
114 | |
115 | size_t Pos = 0; |
116 | FormatToken *Current = nullptr; |
117 | Definition Def; |
118 | ArrayRef<FormatToken *> Tokens; |
119 | }; |
120 | |
121 | MacroExpander::MacroExpander( |
122 | const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr, |
123 | const FormatStyle &Style, |
124 | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, |
125 | IdentifierTable &IdentTable) |
126 | : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator), |
127 | IdentTable(IdentTable) { |
128 | for (const std::string &Macro : Macros) |
129 | parseDefinition(Macro); |
130 | } |
131 | |
132 | MacroExpander::~MacroExpander() = default; |
133 | |
134 | void MacroExpander::parseDefinition(const std::string &Macro) { |
135 | Buffers.push_back( |
136 | Elt: llvm::MemoryBuffer::getMemBufferCopy(InputData: Macro, BufferName: "<scratch space>" )); |
137 | clang::FileID FID = SourceMgr.createFileID(Buffer: Buffers.back()->getMemBufferRef()); |
138 | FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8, |
139 | Allocator, IdentTable); |
140 | const auto Tokens = Lex.lex(); |
141 | if (!Tokens.empty()) { |
142 | DefinitionParser Parser(Tokens); |
143 | auto Definition = Parser.parse(); |
144 | if (Definition.ObjectLike) { |
145 | ObjectLike[Definition.Name] = std::move(Definition); |
146 | } else { |
147 | FunctionLike[Definition.Name][Definition.Params.size()] = |
148 | std::move(Definition); |
149 | } |
150 | } |
151 | } |
152 | |
153 | bool MacroExpander::defined(llvm::StringRef Name) const { |
154 | return FunctionLike.contains(Key: Name) || ObjectLike.contains(Key: Name); |
155 | } |
156 | |
157 | bool MacroExpander::objectLike(llvm::StringRef Name) const { |
158 | return ObjectLike.contains(Key: Name); |
159 | } |
160 | |
161 | bool MacroExpander::hasArity(llvm::StringRef Name, unsigned Arity) const { |
162 | auto it = FunctionLike.find(Key: Name); |
163 | return it != FunctionLike.end() && it->second.contains(Val: Arity); |
164 | } |
165 | |
166 | llvm::SmallVector<FormatToken *, 8> |
167 | MacroExpander::expand(FormatToken *ID, |
168 | std::optional<ArgsList> OptionalArgs) const { |
169 | if (OptionalArgs) |
170 | assert(hasArity(ID->TokenText, OptionalArgs->size())); |
171 | else |
172 | assert(objectLike(ID->TokenText)); |
173 | const Definition &Def = OptionalArgs |
174 | ? FunctionLike.find(Key: ID->TokenText) |
175 | ->second.find(Val: OptionalArgs.value().size()) |
176 | ->second |
177 | : ObjectLike.find(Key: ID->TokenText)->second; |
178 | ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); |
179 | SmallVector<FormatToken *, 8> Result; |
180 | // Expand each argument at most once. |
181 | llvm::StringSet<> ExpandedArgs; |
182 | |
183 | // Adds the given token to Result. |
184 | auto pushToken = [&](FormatToken *Tok) { |
185 | Tok->MacroCtx->ExpandedFrom.push_back(Elt: ID); |
186 | Result.push_back(Elt: Tok); |
187 | }; |
188 | |
189 | // If Tok references a parameter, adds the corresponding argument to Result. |
190 | // Returns false if Tok does not reference a parameter. |
191 | auto expandArgument = [&](FormatToken *Tok) -> bool { |
192 | // If the current token references a parameter, expand the corresponding |
193 | // argument. |
194 | if (Tok->isNot(Kind: tok::identifier) || ExpandedArgs.contains(key: Tok->TokenText)) |
195 | return false; |
196 | ExpandedArgs.insert(key: Tok->TokenText); |
197 | auto I = Def.ArgMap.find(Key: Tok->TokenText); |
198 | if (I == Def.ArgMap.end()) |
199 | return false; |
200 | // If there are fewer arguments than referenced parameters, treat the |
201 | // parameter as empty. |
202 | // FIXME: Potentially fully abort the expansion instead. |
203 | if (I->getValue() >= Args.size()) |
204 | return true; |
205 | for (FormatToken *Arg : Args[I->getValue()]) { |
206 | // A token can be part of a macro argument at multiple levels. |
207 | // For example, with "ID(x) x": |
208 | // in ID(ID(x)), 'x' is expanded first as argument to the inner |
209 | // ID, then again as argument to the outer ID. We keep the macro |
210 | // role the token had from the inner expansion. |
211 | if (!Arg->MacroCtx) |
212 | Arg->MacroCtx = MacroExpansion(MR_ExpandedArg); |
213 | pushToken(Arg); |
214 | } |
215 | return true; |
216 | }; |
217 | |
218 | // Expand the definition into Result. |
219 | for (FormatToken *Tok : Def.Body) { |
220 | if (expandArgument(Tok)) |
221 | continue; |
222 | // Create a copy of the tokens from the macro body, i.e. were not provided |
223 | // by user code. |
224 | FormatToken *New = new (Allocator.Allocate()) FormatToken; |
225 | New->copyFrom(Tok: *Tok); |
226 | assert(!New->MacroCtx); |
227 | // Tokens that are not part of the user code are not formatted. |
228 | New->MacroCtx = MacroExpansion(MR_Hidden); |
229 | pushToken(New); |
230 | } |
231 | assert(Result.size() >= 1 && Result.back()->is(tok::eof)); |
232 | if (Result.size() > 1) { |
233 | ++Result[0]->MacroCtx->StartOfExpansion; |
234 | ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion; |
235 | } |
236 | return Result; |
237 | } |
238 | |
239 | } // namespace format |
240 | } // namespace clang |
241 | |