1 | //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file contains the implementation of MacroExpander, which handles macro |
11 | /// configuration and expansion while formatting. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "Macros.h" |
16 | |
17 | #include "Encoding.h" |
18 | #include "FormatToken.h" |
19 | #include "FormatTokenLexer.h" |
20 | #include "clang/Basic/TokenKinds.h" |
21 | #include "clang/Format/Format.h" |
22 | #include "clang/Lex/HeaderSearch.h" |
23 | #include "clang/Lex/Lexer.h" |
24 | #include "clang/Lex/PreprocessorOptions.h" |
25 | #include "llvm/ADT/StringSet.h" |
26 | #include "llvm/Support/ErrorHandling.h" |
27 | |
28 | namespace clang { |
29 | namespace format { |
30 | |
31 | struct MacroExpander::Definition { |
32 | StringRef Name; |
33 | SmallVector<FormatToken *, 8> Params; |
34 | SmallVector<FormatToken *, 8> Body; |
35 | |
36 | // Map from each argument's name to its position in the argument list. |
37 | // With "M(x, y) x + y": |
38 | // x -> 0 |
39 | // y -> 1 |
40 | llvm::StringMap<size_t> ArgMap; |
41 | |
42 | bool ObjectLike = true; |
43 | }; |
44 | |
45 | class MacroExpander::DefinitionParser { |
46 | public: |
47 | DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) { |
48 | assert(!Tokens.empty()); |
49 | Current = Tokens[0]; |
50 | } |
51 | |
52 | // Parse the token stream and return the corresponding Definition object. |
53 | // Returns an empty definition object with a null-Name on error. |
54 | MacroExpander::Definition parse() { |
55 | if (Current->isNot(Kind: tok::identifier)) |
56 | return {}; |
57 | Def.Name = Current->TokenText; |
58 | nextToken(); |
59 | if (Current->is(Kind: tok::l_paren)) { |
60 | Def.ObjectLike = false; |
61 | if (!parseParams()) |
62 | return {}; |
63 | } |
64 | if (!parseExpansion()) |
65 | return {}; |
66 | |
67 | return Def; |
68 | } |
69 | |
70 | private: |
71 | bool parseParams() { |
72 | assert(Current->is(tok::l_paren)); |
73 | nextToken(); |
74 | while (Current->is(Kind: tok::identifier)) { |
75 | Def.Params.push_back(Elt: Current); |
76 | Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1; |
77 | nextToken(); |
78 | if (Current->isNot(Kind: tok::comma)) |
79 | break; |
80 | nextToken(); |
81 | } |
82 | if (Current->isNot(Kind: tok::r_paren)) |
83 | return false; |
84 | nextToken(); |
85 | return true; |
86 | } |
87 | |
88 | bool parseExpansion() { |
89 | if (!Current->isOneOf(K1: tok::equal, K2: tok::eof)) |
90 | return false; |
91 | if (Current->is(Kind: tok::equal)) |
92 | nextToken(); |
93 | parseTail(); |
94 | return true; |
95 | } |
96 | |
97 | void parseTail() { |
98 | while (Current->isNot(Kind: tok::eof)) { |
99 | Def.Body.push_back(Elt: Current); |
100 | nextToken(); |
101 | } |
102 | Def.Body.push_back(Elt: Current); |
103 | } |
104 | |
105 | void nextToken() { |
106 | if (Pos + 1 < Tokens.size()) |
107 | ++Pos; |
108 | Current = Tokens[Pos]; |
109 | Current->Finalized = true; |
110 | } |
111 | |
112 | size_t Pos = 0; |
113 | FormatToken *Current = nullptr; |
114 | Definition Def; |
115 | ArrayRef<FormatToken *> Tokens; |
116 | }; |
117 | |
118 | MacroExpander::MacroExpander( |
119 | const std::vector<std::string> &Macros, SourceManager &SourceMgr, |
120 | const FormatStyle &Style, |
121 | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, |
122 | IdentifierTable &IdentTable) |
123 | : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator), |
124 | IdentTable(IdentTable) { |
125 | for (const std::string &Macro : Macros) |
126 | parseDefinition(Macro); |
127 | } |
128 | |
129 | MacroExpander::~MacroExpander() = default; |
130 | |
131 | void MacroExpander::parseDefinition(const std::string &Macro) { |
132 | Buffers.push_back( |
133 | Elt: llvm::MemoryBuffer::getMemBufferCopy(InputData: Macro, BufferName: "<scratch space>" )); |
134 | FileID FID = SourceMgr.createFileID(Buffer: Buffers.back()->getMemBufferRef()); |
135 | FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8, |
136 | Allocator, IdentTable); |
137 | const auto Tokens = Lex.lex(); |
138 | if (!Tokens.empty()) { |
139 | DefinitionParser Parser(Tokens); |
140 | auto Definition = Parser.parse(); |
141 | if (Definition.ObjectLike) { |
142 | ObjectLike[Definition.Name] = std::move(Definition); |
143 | } else { |
144 | FunctionLike[Definition.Name][Definition.Params.size()] = |
145 | std::move(Definition); |
146 | } |
147 | } |
148 | } |
149 | |
150 | bool MacroExpander::defined(StringRef Name) const { |
151 | return FunctionLike.contains(Key: Name) || ObjectLike.contains(Key: Name); |
152 | } |
153 | |
154 | bool MacroExpander::objectLike(StringRef Name) const { |
155 | return ObjectLike.contains(Key: Name); |
156 | } |
157 | |
158 | bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const { |
159 | auto it = FunctionLike.find(Key: Name); |
160 | return it != FunctionLike.end() && it->second.contains(Val: Arity); |
161 | } |
162 | |
163 | SmallVector<FormatToken *, 8> |
164 | MacroExpander::expand(FormatToken *ID, |
165 | std::optional<ArgsList> OptionalArgs) const { |
166 | if (OptionalArgs) |
167 | assert(hasArity(ID->TokenText, OptionalArgs->size())); |
168 | else |
169 | assert(objectLike(ID->TokenText)); |
170 | const Definition &Def = OptionalArgs |
171 | ? FunctionLike.find(Key: ID->TokenText) |
172 | ->second.find(Val: OptionalArgs.value().size()) |
173 | ->second |
174 | : ObjectLike.find(Key: ID->TokenText)->second; |
175 | ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); |
176 | SmallVector<FormatToken *, 8> Result; |
177 | // Expand each argument at most once. |
178 | llvm::StringSet<> ExpandedArgs; |
179 | |
180 | // Adds the given token to Result. |
181 | auto pushToken = [&](FormatToken *Tok) { |
182 | Tok->MacroCtx->ExpandedFrom.push_back(Elt: ID); |
183 | Result.push_back(Elt: Tok); |
184 | }; |
185 | |
186 | // If Tok references a parameter, adds the corresponding argument to Result. |
187 | // Returns false if Tok does not reference a parameter. |
188 | auto expandArgument = [&](FormatToken *Tok) -> bool { |
189 | // If the current token references a parameter, expand the corresponding |
190 | // argument. |
191 | if (Tok->isNot(Kind: tok::identifier)) |
192 | return false; |
193 | if (!ExpandedArgs.insert(key: Tok->TokenText).second) |
194 | return false; |
195 | auto I = Def.ArgMap.find(Key: Tok->TokenText); |
196 | if (I == Def.ArgMap.end()) |
197 | return false; |
198 | // If there are fewer arguments than referenced parameters, treat the |
199 | // parameter as empty. |
200 | // FIXME: Potentially fully abort the expansion instead. |
201 | if (I->getValue() >= Args.size()) |
202 | return true; |
203 | for (FormatToken *Arg : Args[I->getValue()]) { |
204 | // A token can be part of a macro argument at multiple levels. |
205 | // For example, with "ID(x) x": |
206 | // in ID(ID(x)), 'x' is expanded first as argument to the inner |
207 | // ID, then again as argument to the outer ID. We keep the macro |
208 | // role the token had from the inner expansion. |
209 | if (!Arg->MacroCtx) |
210 | Arg->MacroCtx = MacroExpansion(MR_ExpandedArg); |
211 | pushToken(Arg); |
212 | } |
213 | return true; |
214 | }; |
215 | |
216 | // Expand the definition into Result. |
217 | for (FormatToken *Tok : Def.Body) { |
218 | if (expandArgument(Tok)) |
219 | continue; |
220 | // Create a copy of the tokens from the macro body, i.e. were not provided |
221 | // by user code. |
222 | FormatToken *New = new (Allocator.Allocate()) FormatToken; |
223 | New->copyFrom(Tok: *Tok); |
224 | assert(!New->MacroCtx); |
225 | // Tokens that are not part of the user code are not formatted. |
226 | New->MacroCtx = MacroExpansion(MR_Hidden); |
227 | pushToken(New); |
228 | } |
229 | assert(Result.size() >= 1 && Result.back()->is(tok::eof)); |
230 | if (Result.size() > 1) { |
231 | ++Result[0]->MacroCtx->StartOfExpansion; |
232 | ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion; |
233 | } else { |
234 | // If the macro expansion is empty, mark the start and end. |
235 | Result[0]->MacroCtx->StartOfExpansion = 1; |
236 | Result[0]->MacroCtx->EndOfExpansion = 1; |
237 | } |
238 | return Result; |
239 | } |
240 | |
241 | } // namespace format |
242 | } // namespace clang |
243 | |