1 | //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Transformer/RangeSelector.h" |
10 | #include "clang/AST/Expr.h" |
11 | #include "clang/AST/TypeLoc.h" |
12 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
13 | #include "clang/Basic/SourceLocation.h" |
14 | #include "clang/Lex/Lexer.h" |
15 | #include "clang/Tooling/Transformer/SourceCode.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/Support/Errc.h" |
18 | #include "llvm/Support/Error.h" |
19 | #include <string> |
20 | #include <utility> |
21 | #include <vector> |
22 | |
23 | using namespace clang; |
24 | using namespace transformer; |
25 | |
26 | using ast_matchers::MatchFinder; |
27 | using llvm::Error; |
28 | using llvm::StringError; |
29 | |
30 | using MatchResult = MatchFinder::MatchResult; |
31 | |
32 | static Error invalidArgumentError(Twine Message) { |
33 | return llvm::make_error<StringError>(Args: llvm::errc::invalid_argument, Args&: Message); |
34 | } |
35 | |
36 | static Error typeError(StringRef ID, const ASTNodeKind &Kind) { |
37 | return invalidArgumentError(Message: "mismatched type (node id=" + ID + |
38 | " kind=" + Kind.asStringRef() + ")" ); |
39 | } |
40 | |
41 | static Error typeError(StringRef ID, const ASTNodeKind &Kind, |
42 | Twine ExpectedType) { |
43 | return invalidArgumentError(Message: "mismatched type: expected one of " + |
44 | ExpectedType + " (node id=" + ID + |
45 | " kind=" + Kind.asStringRef() + ")" ); |
46 | } |
47 | |
48 | static Error missingPropertyError(StringRef ID, Twine Description, |
49 | StringRef Property) { |
50 | return invalidArgumentError(Message: Description + " requires property '" + Property + |
51 | "' (node id=" + ID + ")" ); |
52 | } |
53 | |
54 | static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes, |
55 | StringRef ID) { |
56 | auto &NodesMap = Nodes.getMap(); |
57 | auto It = NodesMap.find(x: ID); |
58 | if (It == NodesMap.end()) |
59 | return invalidArgumentError(Message: "ID not bound: " + ID); |
60 | return It->second; |
61 | } |
62 | |
63 | // FIXME: handling of macros should be configurable. |
64 | static SourceLocation findPreviousTokenStart(SourceLocation Start, |
65 | const SourceManager &SM, |
66 | const LangOptions &LangOpts) { |
67 | if (Start.isInvalid() || Start.isMacroID()) |
68 | return SourceLocation(); |
69 | |
70 | SourceLocation BeforeStart = Start.getLocWithOffset(Offset: -1); |
71 | if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) |
72 | return SourceLocation(); |
73 | |
74 | return Lexer::GetBeginningOfToken(Loc: BeforeStart, SM, LangOpts); |
75 | } |
76 | |
77 | // Finds the start location of the previous token of kind \p TK. |
78 | // FIXME: handling of macros should be configurable. |
79 | static SourceLocation findPreviousTokenKind(SourceLocation Start, |
80 | const SourceManager &SM, |
81 | const LangOptions &LangOpts, |
82 | tok::TokenKind TK) { |
83 | while (true) { |
84 | SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); |
85 | if (L.isInvalid() || L.isMacroID()) |
86 | return SourceLocation(); |
87 | |
88 | Token T; |
89 | if (Lexer::getRawToken(Loc: L, Result&: T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) |
90 | return SourceLocation(); |
91 | |
92 | if (T.is(K: TK)) |
93 | return T.getLocation(); |
94 | |
95 | Start = L; |
96 | } |
97 | } |
98 | |
99 | static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM, |
100 | const LangOptions &LangOpts) { |
101 | SourceLocation EndLoc = |
102 | E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(Arg: 0)->getBeginLoc(); |
103 | return findPreviousTokenKind(Start: EndLoc, SM, LangOpts, TK: tok::TokenKind::l_paren); |
104 | } |
105 | |
106 | RangeSelector transformer::before(RangeSelector Selector) { |
107 | return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { |
108 | Expected<CharSourceRange> SelectedRange = Selector(Result); |
109 | if (!SelectedRange) |
110 | return SelectedRange.takeError(); |
111 | return CharSourceRange::getCharRange(R: SelectedRange->getBegin()); |
112 | }; |
113 | } |
114 | |
115 | RangeSelector transformer::after(RangeSelector Selector) { |
116 | return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { |
117 | Expected<CharSourceRange> SelectedRange = Selector(Result); |
118 | if (!SelectedRange) |
119 | return SelectedRange.takeError(); |
120 | SourceLocation End = SelectedRange->getEnd(); |
121 | if (SelectedRange->isTokenRange()) { |
122 | // We need to find the actual (exclusive) end location from which to |
123 | // create a new source range. However, that's not guaranteed to be valid, |
124 | // even if the token location itself is valid. So, we create a token range |
125 | // consisting only of the last token, then map that range back to the |
126 | // source file. If that succeeds, we have a valid location for the end of |
127 | // the generated range. |
128 | CharSourceRange Range = Lexer::makeFileCharRange( |
129 | Range: CharSourceRange::getTokenRange(R: SelectedRange->getEnd()), |
130 | SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts()); |
131 | if (Range.isInvalid()) |
132 | return invalidArgumentError( |
133 | Message: "after: can't resolve sub-range to valid source range" ); |
134 | End = Range.getEnd(); |
135 | } |
136 | |
137 | return CharSourceRange::getCharRange(R: End); |
138 | }; |
139 | } |
140 | |
141 | RangeSelector transformer::node(std::string ID) { |
142 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
143 | Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID); |
144 | if (!Node) |
145 | return Node.takeError(); |
146 | return (Node->get<Decl>() != nullptr || |
147 | (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr)) |
148 | ? tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi, |
149 | Context&: *Result.Context) |
150 | : CharSourceRange::getTokenRange(R: Node->getSourceRange()); |
151 | }; |
152 | } |
153 | |
154 | RangeSelector transformer::statement(std::string ID) { |
155 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
156 | Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID); |
157 | if (!Node) |
158 | return Node.takeError(); |
159 | return tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi, |
160 | Context&: *Result.Context); |
161 | }; |
162 | } |
163 | |
164 | RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) { |
165 | return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> { |
166 | Expected<CharSourceRange> BeginRange = Begin(Result); |
167 | if (!BeginRange) |
168 | return BeginRange.takeError(); |
169 | Expected<CharSourceRange> EndRange = End(Result); |
170 | if (!EndRange) |
171 | return EndRange.takeError(); |
172 | SourceLocation B = BeginRange->getBegin(); |
173 | SourceLocation E = EndRange->getEnd(); |
174 | // Note: we are precluding the possibility of sub-token ranges in the case |
175 | // that EndRange is a token range. |
176 | if (Result.SourceManager->isBeforeInTranslationUnit(LHS: E, RHS: B)) { |
177 | return invalidArgumentError(Message: "Bad range: out of order" ); |
178 | } |
179 | return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange()); |
180 | }; |
181 | } |
182 | |
183 | RangeSelector transformer::encloseNodes(std::string BeginID, |
184 | std::string EndID) { |
185 | return transformer::enclose(Begin: node(ID: std::move(BeginID)), End: node(ID: std::move(EndID))); |
186 | } |
187 | |
188 | RangeSelector transformer::member(std::string ID) { |
189 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
190 | Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID); |
191 | if (!Node) |
192 | return Node.takeError(); |
193 | if (auto *M = Node->get<clang::MemberExpr>()) |
194 | return CharSourceRange::getTokenRange( |
195 | R: M->getMemberNameInfo().getSourceRange()); |
196 | return typeError(ID, Kind: Node->getNodeKind(), ExpectedType: "MemberExpr" ); |
197 | }; |
198 | } |
199 | |
200 | RangeSelector transformer::name(std::string ID) { |
201 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
202 | Expected<DynTypedNode> N = getNode(Nodes: Result.Nodes, ID); |
203 | if (!N) |
204 | return N.takeError(); |
205 | auto &Node = *N; |
206 | if (const auto *D = Node.get<NamedDecl>()) { |
207 | if (!D->getDeclName().isIdentifier()) |
208 | return missingPropertyError(ID, Description: "name" , Property: "identifier" ); |
209 | SourceLocation L = D->getLocation(); |
210 | auto R = CharSourceRange::getTokenRange(B: L, E: L); |
211 | // Verify that the range covers exactly the name. |
212 | // FIXME: extend this code to support cases like `operator +` or |
213 | // `foo<int>` for which this range will be too short. Doing so will |
214 | // require subcasing `NamedDecl`, because it doesn't provide virtual |
215 | // access to the \c DeclarationNameInfo. |
216 | if (tooling::getText(R, *Result.Context) != D->getName()) |
217 | return CharSourceRange(); |
218 | return R; |
219 | } |
220 | if (const auto *E = Node.get<DeclRefExpr>()) { |
221 | if (!E->getNameInfo().getName().isIdentifier()) |
222 | return missingPropertyError(ID, Description: "name" , Property: "identifier" ); |
223 | SourceLocation L = E->getLocation(); |
224 | return CharSourceRange::getTokenRange(B: L, E: L); |
225 | } |
226 | if (const auto *I = Node.get<CXXCtorInitializer>()) { |
227 | if (!I->isMemberInitializer() && I->isWritten()) |
228 | return missingPropertyError(ID, Description: "name" , Property: "explicit member initializer" ); |
229 | SourceLocation L = I->getMemberLocation(); |
230 | return CharSourceRange::getTokenRange(B: L, E: L); |
231 | } |
232 | if (const auto *T = Node.get<TypeLoc>()) { |
233 | TypeLoc Loc = *T; |
234 | auto ET = Loc.getAs<ElaboratedTypeLoc>(); |
235 | if (!ET.isNull()) |
236 | Loc = ET.getNamedTypeLoc(); |
237 | if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>(); |
238 | !SpecLoc.isNull()) |
239 | return CharSourceRange::getTokenRange(R: SpecLoc.getTemplateNameLoc()); |
240 | return CharSourceRange::getTokenRange(R: Loc.getSourceRange()); |
241 | } |
242 | return typeError(ID, Kind: Node.getNodeKind(), |
243 | ExpectedType: "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc" ); |
244 | }; |
245 | } |
246 | |
247 | namespace { |
248 | // FIXME: make this available in the public API for users to easily create their |
249 | // own selectors. |
250 | |
251 | // Creates a selector from a range-selection function \p Func, which selects a |
252 | // range that is relative to a bound node id. \c T is the node type expected by |
253 | // \p Func. |
254 | template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)> |
255 | class RelativeSelector { |
256 | std::string ID; |
257 | |
258 | public: |
259 | RelativeSelector(std::string ID) : ID(std::move(ID)) {} |
260 | |
261 | Expected<CharSourceRange> operator()(const MatchResult &Result) { |
262 | Expected<DynTypedNode> N = getNode(Nodes: Result.Nodes, ID); |
263 | if (!N) |
264 | return N.takeError(); |
265 | if (const auto *Arg = N->get<T>()) |
266 | return Func(Result, *Arg); |
267 | return typeError(ID, Kind: N->getNodeKind()); |
268 | } |
269 | }; |
270 | } // namespace |
271 | |
272 | // FIXME: Change the following functions from being in an anonymous namespace |
273 | // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915 |
274 | // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous |
275 | // namespace works around a bug in earlier versions. |
276 | namespace { |
277 | // Returns the range of the statements (all source between the braces). |
278 | CharSourceRange getStatementsRange(const MatchResult &, |
279 | const CompoundStmt &CS) { |
280 | return CharSourceRange::getCharRange(B: CS.getLBracLoc().getLocWithOffset(Offset: 1), |
281 | E: CS.getRBracLoc()); |
282 | } |
283 | } // namespace |
284 | |
285 | RangeSelector transformer::statements(std::string ID) { |
286 | return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID)); |
287 | } |
288 | |
289 | namespace { |
290 | // Returns the range of the source between the call's parentheses. |
291 | CharSourceRange getCallArgumentsRange(const MatchResult &Result, |
292 | const CallExpr &CE) { |
293 | return CharSourceRange::getCharRange( |
294 | B: findOpenParen(E: CE, SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts()) |
295 | .getLocWithOffset(Offset: 1), |
296 | E: CE.getRParenLoc()); |
297 | } |
298 | } // namespace |
299 | |
300 | RangeSelector transformer::callArgs(std::string ID) { |
301 | return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID)); |
302 | } |
303 | |
304 | namespace { |
305 | // Returns the range of the elements of the initializer list. Includes all |
306 | // source between the braces. |
307 | CharSourceRange getElementsRange(const MatchResult &, |
308 | const InitListExpr &E) { |
309 | return CharSourceRange::getCharRange(B: E.getLBraceLoc().getLocWithOffset(Offset: 1), |
310 | E: E.getRBraceLoc()); |
311 | } |
312 | } // namespace |
313 | |
314 | RangeSelector transformer::initListElements(std::string ID) { |
315 | return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID)); |
316 | } |
317 | |
318 | namespace { |
319 | // Returns the range of the else branch, including the `else` keyword. |
320 | CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) { |
321 | return tooling::maybeExtendRange( |
322 | Range: CharSourceRange::getTokenRange(B: S.getElseLoc(), E: S.getEndLoc()), |
323 | Terminator: tok::TokenKind::semi, Context&: *Result.Context); |
324 | } |
325 | } // namespace |
326 | |
327 | RangeSelector transformer::elseBranch(std::string ID) { |
328 | return RelativeSelector<IfStmt, getElseRange>(std::move(ID)); |
329 | } |
330 | |
331 | RangeSelector transformer::expansion(RangeSelector S) { |
332 | return [S](const MatchResult &Result) -> Expected<CharSourceRange> { |
333 | Expected<CharSourceRange> SRange = S(Result); |
334 | if (!SRange) |
335 | return SRange.takeError(); |
336 | return Result.SourceManager->getExpansionRange(Range: *SRange); |
337 | }; |
338 | } |
339 | |