1 | //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Transformer/RangeSelector.h" |
10 | #include "clang/AST/Expr.h" |
11 | #include "clang/AST/TypeLoc.h" |
12 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
13 | #include "clang/Basic/SourceLocation.h" |
14 | #include "clang/Lex/Lexer.h" |
15 | #include "clang/Tooling/Transformer/SourceCode.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/Support/Errc.h" |
18 | #include "llvm/Support/Error.h" |
19 | #include <string> |
20 | #include <utility> |
21 | |
22 | using namespace clang; |
23 | using namespace transformer; |
24 | |
25 | using ast_matchers::MatchFinder; |
26 | using llvm::Error; |
27 | using llvm::StringError; |
28 | |
29 | using MatchResult = MatchFinder::MatchResult; |
30 | |
31 | static Error invalidArgumentError(Twine Message) { |
32 | return llvm::make_error<StringError>(Args: llvm::errc::invalid_argument, Args&: Message); |
33 | } |
34 | |
35 | static Error typeError(StringRef ID, const ASTNodeKind &Kind) { |
36 | return invalidArgumentError(Message: "mismatched type (node id="+ ID + |
37 | " kind="+ Kind.asStringRef() + ")"); |
38 | } |
39 | |
40 | static Error typeError(StringRef ID, const ASTNodeKind &Kind, |
41 | Twine ExpectedType) { |
42 | return invalidArgumentError(Message: "mismatched type: expected one of "+ |
43 | ExpectedType + " (node id="+ ID + |
44 | " kind="+ Kind.asStringRef() + ")"); |
45 | } |
46 | |
47 | static Error missingPropertyError(StringRef ID, Twine Description, |
48 | StringRef Property) { |
49 | return invalidArgumentError(Message: Description + " requires property '"+ Property + |
50 | "' (node id="+ ID + ")"); |
51 | } |
52 | |
53 | static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes, |
54 | StringRef ID) { |
55 | auto &NodesMap = Nodes.getMap(); |
56 | auto It = NodesMap.find(x: ID); |
57 | if (It == NodesMap.end()) |
58 | return invalidArgumentError(Message: "ID not bound: "+ ID); |
59 | return It->second; |
60 | } |
61 | |
62 | // FIXME: handling of macros should be configurable. |
63 | static SourceLocation findPreviousTokenStart(SourceLocation Start, |
64 | const SourceManager &SM, |
65 | const LangOptions &LangOpts) { |
66 | if (Start.isInvalid() || Start.isMacroID()) |
67 | return SourceLocation(); |
68 | |
69 | SourceLocation BeforeStart = Start.getLocWithOffset(Offset: -1); |
70 | if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) |
71 | return SourceLocation(); |
72 | |
73 | return Lexer::GetBeginningOfToken(Loc: BeforeStart, SM, LangOpts); |
74 | } |
75 | |
76 | // Finds the start location of the previous token of kind \p TK. |
77 | // FIXME: handling of macros should be configurable. |
78 | static SourceLocation findPreviousTokenKind(SourceLocation Start, |
79 | const SourceManager &SM, |
80 | const LangOptions &LangOpts, |
81 | tok::TokenKind TK) { |
82 | while (true) { |
83 | SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); |
84 | if (L.isInvalid() || L.isMacroID()) |
85 | return SourceLocation(); |
86 | |
87 | Token T; |
88 | if (Lexer::getRawToken(Loc: L, Result&: T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) |
89 | return SourceLocation(); |
90 | |
91 | if (T.is(K: TK)) |
92 | return T.getLocation(); |
93 | |
94 | Start = L; |
95 | } |
96 | } |
97 | |
98 | RangeSelector transformer::before(RangeSelector Selector) { |
99 | return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { |
100 | Expected<CharSourceRange> SelectedRange = Selector(Result); |
101 | if (!SelectedRange) |
102 | return SelectedRange.takeError(); |
103 | return CharSourceRange::getCharRange(R: SelectedRange->getBegin()); |
104 | }; |
105 | } |
106 | |
107 | RangeSelector transformer::after(RangeSelector Selector) { |
108 | return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { |
109 | Expected<CharSourceRange> SelectedRange = Selector(Result); |
110 | if (!SelectedRange) |
111 | return SelectedRange.takeError(); |
112 | SourceLocation End = SelectedRange->getEnd(); |
113 | if (SelectedRange->isTokenRange()) { |
114 | // We need to find the actual (exclusive) end location from which to |
115 | // create a new source range. However, that's not guaranteed to be valid, |
116 | // even if the token location itself is valid. So, we create a token range |
117 | // consisting only of the last token, then map that range back to the |
118 | // source file. If that succeeds, we have a valid location for the end of |
119 | // the generated range. |
120 | CharSourceRange Range = Lexer::makeFileCharRange( |
121 | Range: CharSourceRange::getTokenRange(R: SelectedRange->getEnd()), |
122 | SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts()); |
123 | if (Range.isInvalid()) |
124 | return invalidArgumentError( |
125 | Message: "after: can't resolve sub-range to valid source range"); |
126 | End = Range.getEnd(); |
127 | } |
128 | |
129 | return CharSourceRange::getCharRange(R: End); |
130 | }; |
131 | } |
132 | |
133 | RangeSelector transformer::node(std::string ID) { |
134 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
135 | Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); |
136 | if (!Node) |
137 | return Node.takeError(); |
138 | return (Node->get<Decl>() != nullptr || |
139 | (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr)) |
140 | ? tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi, |
141 | Context&: *Result.Context) |
142 | : CharSourceRange::getTokenRange(R: Node->getSourceRange()); |
143 | }; |
144 | } |
145 | |
146 | RangeSelector transformer::statement(std::string ID) { |
147 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
148 | Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); |
149 | if (!Node) |
150 | return Node.takeError(); |
151 | return tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi, |
152 | Context&: *Result.Context); |
153 | }; |
154 | } |
155 | |
156 | RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) { |
157 | return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> { |
158 | Expected<CharSourceRange> BeginRange = Begin(Result); |
159 | if (!BeginRange) |
160 | return BeginRange.takeError(); |
161 | Expected<CharSourceRange> EndRange = End(Result); |
162 | if (!EndRange) |
163 | return EndRange.takeError(); |
164 | SourceLocation B = BeginRange->getBegin(); |
165 | SourceLocation E = EndRange->getEnd(); |
166 | // Note: we are precluding the possibility of sub-token ranges in the case |
167 | // that EndRange is a token range. |
168 | if (Result.SourceManager->isBeforeInTranslationUnit(LHS: E, RHS: B)) { |
169 | return invalidArgumentError(Message: "Bad range: out of order"); |
170 | } |
171 | return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange()); |
172 | }; |
173 | } |
174 | |
175 | RangeSelector transformer::encloseNodes(std::string BeginID, |
176 | std::string EndID) { |
177 | return transformer::enclose(Begin: node(ID: std::move(BeginID)), End: node(ID: std::move(EndID))); |
178 | } |
179 | |
180 | RangeSelector transformer::member(std::string ID) { |
181 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
182 | Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); |
183 | if (!Node) |
184 | return Node.takeError(); |
185 | if (auto *M = Node->get<clang::MemberExpr>()) |
186 | return CharSourceRange::getTokenRange( |
187 | R: M->getMemberNameInfo().getSourceRange()); |
188 | return typeError(ID, Kind: Node->getNodeKind(), ExpectedType: "MemberExpr"); |
189 | }; |
190 | } |
191 | |
192 | RangeSelector transformer::name(std::string ID) { |
193 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
194 | Expected<DynTypedNode> N = getNode(Result.Nodes, ID); |
195 | if (!N) |
196 | return N.takeError(); |
197 | auto &Node = *N; |
198 | if (const auto *D = Node.get<NamedDecl>()) { |
199 | if (!D->getDeclName().isIdentifier()) |
200 | return missingPropertyError(ID, Description: "name", Property: "identifier"); |
201 | SourceLocation L = D->getLocation(); |
202 | auto R = CharSourceRange::getTokenRange(B: L, E: L); |
203 | // Verify that the range covers exactly the name. |
204 | // FIXME: extend this code to support cases like `operator +` or |
205 | // `foo<int>` for which this range will be too short. Doing so will |
206 | // require subcasing `NamedDecl`, because it doesn't provide virtual |
207 | // access to the \c DeclarationNameInfo. |
208 | if (tooling::getText(R, *Result.Context) != D->getName()) |
209 | return CharSourceRange(); |
210 | return R; |
211 | } |
212 | if (const auto *E = Node.get<DeclRefExpr>()) { |
213 | if (!E->getNameInfo().getName().isIdentifier()) |
214 | return missingPropertyError(ID, Description: "name", Property: "identifier"); |
215 | SourceLocation L = E->getLocation(); |
216 | return CharSourceRange::getTokenRange(B: L, E: L); |
217 | } |
218 | if (const auto *I = Node.get<CXXCtorInitializer>()) { |
219 | if (!I->isMemberInitializer() && I->isWritten()) |
220 | return missingPropertyError(ID, Description: "name", Property: "explicit member initializer"); |
221 | SourceLocation L = I->getMemberLocation(); |
222 | return CharSourceRange::getTokenRange(B: L, E: L); |
223 | } |
224 | if (const auto *T = Node.get<TypeLoc>()) { |
225 | TypeLoc Loc = *T; |
226 | auto ET = Loc.getAs<ElaboratedTypeLoc>(); |
227 | if (!ET.isNull()) |
228 | Loc = ET.getNamedTypeLoc(); |
229 | if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>(); |
230 | !SpecLoc.isNull()) |
231 | return CharSourceRange::getTokenRange(R: SpecLoc.getTemplateNameLoc()); |
232 | return CharSourceRange::getTokenRange(R: Loc.getSourceRange()); |
233 | } |
234 | return typeError(ID, Kind: Node.getNodeKind(), |
235 | ExpectedType: "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc"); |
236 | }; |
237 | } |
238 | |
239 | namespace { |
240 | // FIXME: make this available in the public API for users to easily create their |
241 | // own selectors. |
242 | |
243 | // Creates a selector from a range-selection function \p Func, which selects a |
244 | // range that is relative to a bound node id. \c T is the node type expected by |
245 | // \p Func. |
246 | template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)> |
247 | class RelativeSelector { |
248 | std::string ID; |
249 | |
250 | public: |
251 | RelativeSelector(std::string ID) : ID(std::move(ID)) {} |
252 | |
253 | Expected<CharSourceRange> operator()(const MatchResult &Result) { |
254 | Expected<DynTypedNode> N = getNode(Result.Nodes, ID); |
255 | if (!N) |
256 | return N.takeError(); |
257 | if (const auto *Arg = N->get<T>()) |
258 | return Func(Result, *Arg); |
259 | return typeError(ID, Kind: N->getNodeKind()); |
260 | } |
261 | }; |
262 | } // namespace |
263 | |
264 | // FIXME: Change the following functions from being in an anonymous namespace |
265 | // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915 |
266 | // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous |
267 | // namespace works around a bug in earlier versions. |
268 | namespace { |
269 | // Returns the range of the statements (all source between the braces). |
270 | CharSourceRange getStatementsRange(const MatchResult &, |
271 | const CompoundStmt &CS) { |
272 | return CharSourceRange::getCharRange(B: CS.getLBracLoc().getLocWithOffset(Offset: 1), |
273 | E: CS.getRBracLoc()); |
274 | } |
275 | } // namespace |
276 | |
277 | RangeSelector transformer::statements(std::string ID) { |
278 | return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID)); |
279 | } |
280 | |
281 | namespace { |
282 | |
283 | SourceLocation findArgStartDelimiter(const CallExpr &E, SourceLocation RLoc, |
284 | const SourceManager &SM, |
285 | const LangOptions &LangOpts) { |
286 | SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(Arg: 0)->getBeginLoc(); |
287 | return findPreviousTokenKind(Start: Loc, SM, LangOpts, TK: tok::TokenKind::l_paren); |
288 | } |
289 | |
290 | // Returns the location after the last argument of the construct expr. Returns |
291 | // an invalid location if there are no arguments. |
292 | SourceLocation findLastArgEnd(const CXXConstructExpr &CE, |
293 | const SourceManager &SM, |
294 | const LangOptions &LangOpts) { |
295 | for (int i = CE.getNumArgs() - 1; i >= 0; --i) { |
296 | const Expr *Arg = CE.getArg(Arg: i); |
297 | if (isa<CXXDefaultArgExpr>(Val: Arg)) |
298 | continue; |
299 | return Lexer::getLocForEndOfToken(Loc: Arg->getEndLoc(), Offset: 0, SM, LangOpts); |
300 | } |
301 | return {}; |
302 | } |
303 | |
304 | // Returns the range of the source between the call's parentheses/braces. |
305 | CharSourceRange getCallArgumentsRange(const MatchResult &Result, |
306 | const CallExpr &CE) { |
307 | const SourceLocation RLoc = CE.getRParenLoc(); |
308 | return CharSourceRange::getCharRange( |
309 | B: findArgStartDelimiter(E: CE, RLoc, SM: *Result.SourceManager, |
310 | LangOpts: Result.Context->getLangOpts()) |
311 | .getLocWithOffset(Offset: 1), |
312 | E: RLoc); |
313 | } |
314 | |
315 | // Returns the range of the source between the construct expr's |
316 | // parentheses/braces. |
317 | CharSourceRange getConstructArgumentsRange(const MatchResult &Result, |
318 | const CXXConstructExpr &CE) { |
319 | if (SourceRange R = CE.getParenOrBraceRange(); R.isValid()) { |
320 | return CharSourceRange::getCharRange( |
321 | B: Lexer::getLocForEndOfToken(Loc: R.getBegin(), Offset: 0, SM: *Result.SourceManager, |
322 | LangOpts: Result.Context->getLangOpts()), |
323 | E: R.getEnd()); |
324 | } |
325 | |
326 | if (CE.getNumArgs() > 0) { |
327 | return CharSourceRange::getCharRange( |
328 | CE.getArg(Arg: 0)->getBeginLoc(), |
329 | findLastArgEnd(CE, SM: *Result.SourceManager, |
330 | LangOpts: Result.Context->getLangOpts())); |
331 | } |
332 | |
333 | return {}; |
334 | } |
335 | |
336 | } // namespace |
337 | |
338 | RangeSelector transformer::callArgs(std::string ID) { |
339 | return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID)); |
340 | } |
341 | |
342 | RangeSelector transformer::constructExprArgs(std::string ID) { |
343 | return RelativeSelector<CXXConstructExpr, getConstructArgumentsRange>( |
344 | std::move(ID)); |
345 | } |
346 | |
347 | namespace { |
348 | // Returns the range of the elements of the initializer list. Includes all |
349 | // source between the braces. |
350 | CharSourceRange getElementsRange(const MatchResult &, |
351 | const InitListExpr &E) { |
352 | return CharSourceRange::getCharRange(B: E.getLBraceLoc().getLocWithOffset(Offset: 1), |
353 | E: E.getRBraceLoc()); |
354 | } |
355 | } // namespace |
356 | |
357 | RangeSelector transformer::initListElements(std::string ID) { |
358 | return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID)); |
359 | } |
360 | |
361 | namespace { |
362 | // Returns the range of the else branch, including the `else` keyword. |
363 | CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) { |
364 | return tooling::maybeExtendRange( |
365 | Range: CharSourceRange::getTokenRange(B: S.getElseLoc(), E: S.getEndLoc()), |
366 | Terminator: tok::TokenKind::semi, Context&: *Result.Context); |
367 | } |
368 | } // namespace |
369 | |
370 | RangeSelector transformer::elseBranch(std::string ID) { |
371 | return RelativeSelector<IfStmt, getElseRange>(std::move(ID)); |
372 | } |
373 | |
374 | RangeSelector transformer::expansion(RangeSelector S) { |
375 | return [S](const MatchResult &Result) -> Expected<CharSourceRange> { |
376 | Expected<CharSourceRange> SRange = S(Result); |
377 | if (!SRange) |
378 | return SRange.takeError(); |
379 | return Result.SourceManager->getExpansionRange(Range: *SRange); |
380 | }; |
381 | } |
382 |
Definitions
- invalidArgumentError
- typeError
- typeError
- missingPropertyError
- getNode
- findPreviousTokenStart
- findPreviousTokenKind
- before
- after
- node
- statement
- enclose
- encloseNodes
- member
- name
- RelativeSelector
- RelativeSelector
- operator()
- getStatementsRange
- statements
- findArgStartDelimiter
- findLastArgEnd
- getCallArgumentsRange
- getConstructArgumentsRange
- callArgs
- constructExprArgs
- getElementsRange
- initListElements
- getElseRange
- elseBranch
Learn to use CMake with our Intro Training
Find out more