1 | //===--- Selection.cpp ----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Selection.h" |
10 | #include "AST.h" |
11 | #include "support/Logger.h" |
12 | #include "support/Trace.h" |
13 | #include "clang/AST/ASTConcept.h" |
14 | #include "clang/AST/ASTTypeTraits.h" |
15 | #include "clang/AST/Decl.h" |
16 | #include "clang/AST/DeclCXX.h" |
17 | #include "clang/AST/Expr.h" |
18 | #include "clang/AST/ExprCXX.h" |
19 | #include "clang/AST/PrettyPrinter.h" |
20 | #include "clang/AST/RecursiveASTVisitor.h" |
21 | #include "clang/AST/TypeLoc.h" |
22 | #include "clang/Basic/OperatorKinds.h" |
23 | #include "clang/Basic/SourceLocation.h" |
24 | #include "clang/Basic/SourceManager.h" |
25 | #include "clang/Basic/TokenKinds.h" |
26 | #include "clang/Lex/Lexer.h" |
27 | #include "clang/Tooling/Syntax/Tokens.h" |
28 | #include "llvm/ADT/BitVector.h" |
29 | #include "llvm/ADT/STLExtras.h" |
30 | #include "llvm/ADT/StringExtras.h" |
31 | #include "llvm/Support/Casting.h" |
32 | #include "llvm/Support/raw_ostream.h" |
33 | #include <algorithm> |
34 | #include <optional> |
35 | #include <set> |
36 | #include <string> |
37 | |
38 | namespace clang { |
39 | namespace clangd { |
40 | namespace { |
41 | using Node = SelectionTree::Node; |
42 | |
43 | // Measure the fraction of selections that were enabled by recovery AST. |
44 | void recordMetrics(const SelectionTree &S, const LangOptions &Lang) { |
45 | if (!trace::enabled()) |
46 | return; |
47 | const char *LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C" ; |
48 | static constexpr trace::Metric SelectionUsedRecovery( |
49 | "selection_recovery" , trace::Metric::Distribution, "language" ); |
50 | static constexpr trace::Metric RecoveryType( |
51 | "selection_recovery_type" , trace::Metric::Distribution, "language" ); |
52 | const auto *Common = S.commonAncestor(); |
53 | for (const auto *N = Common; N; N = N->Parent) { |
54 | if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) { |
55 | SelectionUsedRecovery.record(Value: 1, Label: LanguageLabel); // used recovery ast. |
56 | RecoveryType.record(Value: RE->isTypeDependent() ? 0 : 1, Label: LanguageLabel); |
57 | return; |
58 | } |
59 | } |
60 | if (Common) |
61 | SelectionUsedRecovery.record(Value: 0, Label: LanguageLabel); // unused. |
62 | } |
63 | |
64 | // Return the range covering a node and all its children. |
65 | SourceRange getSourceRange(const DynTypedNode &N) { |
66 | // MemberExprs to implicitly access anonymous fields should not claim any |
67 | // tokens for themselves. Given: |
68 | // struct A { struct { int b; }; }; |
69 | // The clang AST reports the following nodes for an access to b: |
70 | // A().b; |
71 | // [----] MemberExpr, base = A().<anonymous>, member = b |
72 | // [----] MemberExpr: base = A(), member = <anonymous> |
73 | // [-] CXXConstructExpr |
74 | // For our purposes, we don't want the second MemberExpr to own any tokens, |
75 | // so we reduce its range to match the CXXConstructExpr. |
76 | // (It's not clear that changing the clang AST would be correct in general). |
77 | if (const auto *ME = N.get<MemberExpr>()) { |
78 | if (!ME->getMemberDecl()->getDeclName()) |
79 | return ME->getBase() |
80 | ? getSourceRange(N: DynTypedNode::create(Node: *ME->getBase())) |
81 | : SourceRange(); |
82 | } |
83 | return N.getSourceRange(); |
84 | } |
85 | |
86 | // An IntervalSet maintains a set of disjoint subranges of an array. |
87 | // |
88 | // Initially, it contains the entire array. |
89 | // [-----------------------------------------------------------] |
90 | // |
91 | // When a range is erased(), it will typically split the array in two. |
92 | // Claim: [--------------------] |
93 | // after: [----------------] [-------------------] |
94 | // |
95 | // erase() returns the segments actually erased. Given the state above: |
96 | // Claim: [---------------------------------------] |
97 | // Out: [---------] [------] |
98 | // After: [-----] [-----------] |
99 | // |
100 | // It is used to track (expanded) tokens not yet associated with an AST node. |
101 | // On traversing an AST node, its token range is erased from the unclaimed set. |
102 | // The tokens actually removed are associated with that node, and hit-tested |
103 | // against the selection to determine whether the node is selected. |
104 | template <typename T> class IntervalSet { |
105 | public: |
106 | IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); } |
107 | |
108 | // Removes the elements of Claim from the set, modifying or removing ranges |
109 | // that overlap it. |
110 | // Returns the continuous subranges of Claim that were actually removed. |
111 | llvm::SmallVector<llvm::ArrayRef<T>> erase(llvm::ArrayRef<T> Claim) { |
112 | llvm::SmallVector<llvm::ArrayRef<T>> Out; |
113 | if (Claim.empty()) |
114 | return Out; |
115 | |
116 | // General case: |
117 | // Claim: [-----------------] |
118 | // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-] |
119 | // Overlap: ^first ^second |
120 | // Ranges C and D are fully included. Ranges B and E must be trimmed. |
121 | auto Overlap = std::make_pair( |
122 | UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C |
123 | UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F |
124 | // Rewind to cover B. |
125 | if (Overlap.first != UnclaimedRanges.begin()) { |
126 | --Overlap.first; |
127 | // ...unless B isn't selected at all. |
128 | if (Overlap.first->end() <= Claim.begin()) |
129 | ++Overlap.first; |
130 | } |
131 | if (Overlap.first == Overlap.second) |
132 | return Out; |
133 | |
134 | // First, copy all overlapping ranges into the output. |
135 | auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second); |
136 | // If any of the overlapping ranges were sliced by the claim, split them: |
137 | // - restrict the returned range to the claimed part |
138 | // - save the unclaimed part so it can be reinserted |
139 | llvm::ArrayRef<T> RemainingHead, RemainingTail; |
140 | if (Claim.begin() > OutFirst->begin()) { |
141 | RemainingHead = {OutFirst->begin(), Claim.begin()}; |
142 | *OutFirst = {Claim.begin(), OutFirst->end()}; |
143 | } |
144 | if (Claim.end() < Out.back().end()) { |
145 | RemainingTail = {Claim.end(), Out.back().end()}; |
146 | Out.back() = {Out.back().begin(), Claim.end()}; |
147 | } |
148 | |
149 | // Erase all the overlapping ranges (invalidating all iterators). |
150 | UnclaimedRanges.erase(Overlap.first, Overlap.second); |
151 | // Reinsert ranges that were merely trimmed. |
152 | if (!RemainingHead.empty()) |
153 | UnclaimedRanges.insert(RemainingHead); |
154 | if (!RemainingTail.empty()) |
155 | UnclaimedRanges.insert(RemainingTail); |
156 | |
157 | return Out; |
158 | } |
159 | |
160 | private: |
161 | using TokenRange = llvm::ArrayRef<T>; |
162 | struct RangeLess { |
163 | bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const { |
164 | return L.begin() < R.begin(); |
165 | } |
166 | }; |
167 | |
168 | // Disjoint sorted unclaimed ranges of expanded tokens. |
169 | std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges; |
170 | }; |
171 | |
172 | // Sentinel value for the selectedness of a node where we've seen no tokens yet. |
173 | // This resolves to Unselected if no tokens are ever seen. |
174 | // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete. |
175 | // This value is never exposed publicly. |
176 | constexpr SelectionTree::Selection NoTokens = |
177 | static_cast<SelectionTree::Selection>( |
178 | static_cast<unsigned char>(SelectionTree::Complete + 1)); |
179 | |
180 | // Nodes start with NoTokens, and then use this function to aggregate the |
181 | // selectedness as more tokens are found. |
182 | void update(SelectionTree::Selection &Result, SelectionTree::Selection New) { |
183 | if (New == NoTokens) |
184 | return; |
185 | if (Result == NoTokens) |
186 | Result = New; |
187 | else if (Result != New) |
188 | // Can only be completely selected (or unselected) if all tokens are. |
189 | Result = SelectionTree::Partial; |
190 | } |
191 | |
192 | // As well as comments, don't count semicolons as real tokens. |
193 | // They're not properly claimed as expr-statement is missing from the AST. |
194 | bool shouldIgnore(const syntax::Token &Tok) { |
195 | switch (Tok.kind()) { |
196 | // Even "attached" comments are not considered part of a node's range. |
197 | case tok::comment: |
198 | // The AST doesn't directly store locations for terminating semicolons. |
199 | case tok::semi: |
200 | // We don't have locations for cvr-qualifiers: see QualifiedTypeLoc. |
201 | case tok::kw_const: |
202 | case tok::kw_volatile: |
203 | case tok::kw_restrict: |
204 | return true; |
205 | default: |
206 | return false; |
207 | } |
208 | } |
209 | |
210 | // Determine whether 'Target' is the first expansion of the macro |
211 | // argument whose top-level spelling location is 'SpellingLoc'. |
212 | bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc, |
213 | const SourceManager &SM) { |
214 | SourceLocation Prev = SpellingLoc; |
215 | while (true) { |
216 | // If the arg is expanded multiple times, getMacroArgExpandedLocation() |
217 | // returns the first expansion. |
218 | SourceLocation Next = SM.getMacroArgExpandedLocation(Loc: Prev); |
219 | // So if we reach the target, target is the first-expansion of the |
220 | // first-expansion ... |
221 | if (SM.getFileID(SpellingLoc: Next) == Target) |
222 | return true; |
223 | |
224 | // Otherwise, if the FileID stops changing, we've reached the innermost |
225 | // macro expansion, and Target was on a different branch. |
226 | if (SM.getFileID(SpellingLoc: Next) == SM.getFileID(SpellingLoc: Prev)) |
227 | return false; |
228 | |
229 | Prev = Next; |
230 | } |
231 | return false; |
232 | } |
233 | |
234 | // SelectionTester can determine whether a range of tokens from the PP-expanded |
235 | // stream (corresponding to an AST node) is considered selected. |
236 | // |
237 | // When the tokens result from macro expansions, the appropriate tokens in the |
238 | // main file are examined (macro invocation or args). Similarly for #includes. |
239 | // However, only the first expansion of a given spelled token is considered |
240 | // selected. |
241 | // |
242 | // It tests each token in the range (not just the endpoints) as contiguous |
243 | // expanded tokens may not have contiguous spellings (with macros). |
244 | // |
245 | // Non-token text, and tokens not modeled in the AST (comments, semicolons) |
246 | // are ignored when determining selectedness. |
247 | class SelectionTester { |
248 | public: |
249 | // The selection is offsets [SelBegin, SelEnd) in SelFile. |
250 | SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile, |
251 | unsigned SelBegin, unsigned SelEnd, const SourceManager &SM) |
252 | : SelFile(SelFile), SelFileBounds(SM.getLocForStartOfFile(FID: SelFile), |
253 | SM.getLocForEndOfFile(FID: SelFile)), |
254 | SM(SM) { |
255 | // Find all tokens (partially) selected in the file. |
256 | auto AllSpelledTokens = Buf.spelledTokens(FID: SelFile); |
257 | const syntax::Token *SelFirst = |
258 | llvm::partition_point(Range&: AllSpelledTokens, P: [&](const syntax::Token &Tok) { |
259 | return SM.getFileOffset(SpellingLoc: Tok.endLocation()) <= SelBegin; |
260 | }); |
261 | const syntax::Token *SelLimit = std::partition_point( |
262 | first: SelFirst, last: AllSpelledTokens.end(), pred: [&](const syntax::Token &Tok) { |
263 | return SM.getFileOffset(SpellingLoc: Tok.location()) < SelEnd; |
264 | }); |
265 | auto Sel = llvm::ArrayRef(SelFirst, SelLimit); |
266 | // Find which of these are preprocessed to nothing and should be ignored. |
267 | llvm::BitVector PPIgnored(Sel.size(), false); |
268 | for (const syntax::TokenBuffer::Expansion &X : |
269 | Buf.expansionsOverlapping(Spelled: Sel)) { |
270 | if (X.Expanded.empty()) { |
271 | for (const syntax::Token &Tok : X.Spelled) { |
272 | if (&Tok >= SelFirst && &Tok < SelLimit) |
273 | PPIgnored[&Tok - SelFirst] = true; |
274 | } |
275 | } |
276 | } |
277 | // Precompute selectedness and offset for selected spelled tokens. |
278 | for (unsigned I = 0; I < Sel.size(); ++I) { |
279 | if (shouldIgnore(Tok: Sel[I]) || PPIgnored[I]) |
280 | continue; |
281 | SelectedSpelled.emplace_back(); |
282 | Tok &S = SelectedSpelled.back(); |
283 | S.Offset = SM.getFileOffset(SpellingLoc: Sel[I].location()); |
284 | if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd) |
285 | S.Selected = SelectionTree::Complete; |
286 | else |
287 | S.Selected = SelectionTree::Partial; |
288 | } |
289 | MaybeSelectedExpanded = computeMaybeSelectedExpandedTokens(Toks: Buf); |
290 | } |
291 | |
292 | // Test whether a consecutive range of tokens is selected. |
293 | // The tokens are taken from the expanded token stream. |
294 | SelectionTree::Selection |
295 | test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const { |
296 | if (ExpandedTokens.empty()) |
297 | return NoTokens; |
298 | if (SelectedSpelled.empty()) |
299 | return SelectionTree::Unselected; |
300 | // Cheap (pointer) check whether any of the tokens could touch selection. |
301 | // In most cases, the node's overall source range touches ExpandedTokens, |
302 | // or we would have failed mayHit(). However now we're only considering |
303 | // the *unclaimed* spans of expanded tokens. |
304 | // This is a significant performance improvement when a lot of nodes |
305 | // surround the selection, including when generated by macros. |
306 | if (MaybeSelectedExpanded.empty() || |
307 | &ExpandedTokens.front() > &MaybeSelectedExpanded.back() || |
308 | &ExpandedTokens.back() < &MaybeSelectedExpanded.front()) { |
309 | return SelectionTree::Unselected; |
310 | } |
311 | |
312 | // The eof token is used as a sentinel. |
313 | // In general, source range from an AST node should not claim the eof token, |
314 | // but it could occur for unmatched-bracket cases. |
315 | // FIXME: fix it in TokenBuffer, expandedTokens(SourceRange) should not |
316 | // return the eof token. |
317 | if (ExpandedTokens.back().kind() == tok::eof) |
318 | ExpandedTokens = ExpandedTokens.drop_back(); |
319 | |
320 | SelectionTree::Selection Result = NoTokens; |
321 | while (!ExpandedTokens.empty()) { |
322 | // Take consecutive tokens from the same context together for efficiency. |
323 | SourceLocation Start = ExpandedTokens.front().location(); |
324 | FileID FID = SM.getFileID(SpellingLoc: Start); |
325 | // Comparing SourceLocations against bounds is cheaper than getFileID(). |
326 | SourceLocation Limit = SM.getComposedLoc(FID, Offset: SM.getFileIDSize(FID)); |
327 | auto Batch = ExpandedTokens.take_while(Pred: [&](const syntax::Token &T) { |
328 | return T.location() >= Start && T.location() < Limit; |
329 | }); |
330 | assert(!Batch.empty()); |
331 | ExpandedTokens = ExpandedTokens.drop_front(N: Batch.size()); |
332 | |
333 | update(Result, New: testChunk(FID, Batch)); |
334 | } |
335 | return Result; |
336 | } |
337 | |
338 | // Cheap check whether any of the tokens in R might be selected. |
339 | // If it returns false, test() will return NoTokens or Unselected. |
340 | // If it returns true, test() may return any value. |
341 | bool mayHit(SourceRange R) const { |
342 | if (SelectedSpelled.empty() || MaybeSelectedExpanded.empty()) |
343 | return false; |
344 | // If the node starts after the selection ends, it is not selected. |
345 | // Tokens a macro location might claim are >= its expansion start. |
346 | // So if the expansion start > last selected token, we can prune it. |
347 | // (This is particularly helpful for GTest's TEST macro). |
348 | if (auto B = offsetInSelFile(Loc: getExpansionStart(Loc: R.getBegin()))) |
349 | if (*B > SelectedSpelled.back().Offset) |
350 | return false; |
351 | // If the node ends before the selection begins, it is not selected. |
352 | SourceLocation EndLoc = R.getEnd(); |
353 | while (EndLoc.isMacroID()) |
354 | EndLoc = SM.getImmediateExpansionRange(Loc: EndLoc).getEnd(); |
355 | // In the rare case that the expansion range is a char range, EndLoc is |
356 | // ~one token too far to the right. We may fail to prune, that's OK. |
357 | if (auto E = offsetInSelFile(Loc: EndLoc)) |
358 | if (*E < SelectedSpelled.front().Offset) |
359 | return false; |
360 | return true; |
361 | } |
362 | |
363 | private: |
364 | // Plausible expanded tokens that might be affected by the selection. |
365 | // This is an overestimate, it may contain tokens that are not selected. |
366 | // The point is to allow cheap pruning in test() |
367 | llvm::ArrayRef<syntax::Token> |
368 | computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer &Toks) { |
369 | if (SelectedSpelled.empty()) |
370 | return {}; |
371 | |
372 | auto LastAffectedToken = [&](SourceLocation Loc) { |
373 | auto Offset = offsetInSelFile(Loc); |
374 | while (Loc.isValid() && !Offset) { |
375 | Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getEnd() |
376 | : SM.getIncludeLoc(FID: SM.getFileID(SpellingLoc: Loc)); |
377 | Offset = offsetInSelFile(Loc); |
378 | } |
379 | return Offset; |
380 | }; |
381 | auto FirstAffectedToken = [&](SourceLocation Loc) { |
382 | auto Offset = offsetInSelFile(Loc); |
383 | while (Loc.isValid() && !Offset) { |
384 | Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin() |
385 | : SM.getIncludeLoc(FID: SM.getFileID(SpellingLoc: Loc)); |
386 | Offset = offsetInSelFile(Loc); |
387 | } |
388 | return Offset; |
389 | }; |
390 | |
391 | const syntax::Token *Start = llvm::partition_point( |
392 | Range: Toks.expandedTokens(), |
393 | P: [&, First = SelectedSpelled.front().Offset](const syntax::Token &Tok) { |
394 | if (Tok.kind() == tok::eof) |
395 | return false; |
396 | // Implausible if upperbound(Tok) < First. |
397 | if (auto Offset = LastAffectedToken(Tok.location())) |
398 | return *Offset < First; |
399 | // A prefix of the expanded tokens may be from an implicit |
400 | // inclusion (e.g. preamble patch, or command-line -include). |
401 | return true; |
402 | }); |
403 | |
404 | bool EndInvalid = false; |
405 | const syntax::Token *End = std::partition_point( |
406 | first: Start, last: Toks.expandedTokens().end(), |
407 | pred: [&, Last = SelectedSpelled.back().Offset](const syntax::Token &Tok) { |
408 | if (Tok.kind() == tok::eof) |
409 | return false; |
410 | // Plausible if lowerbound(Tok) <= Last. |
411 | if (auto Offset = FirstAffectedToken(Tok.location())) |
412 | return *Offset <= Last; |
413 | // Shouldn't happen: once we've seen tokens traceable to the main |
414 | // file, there shouldn't be any more implicit inclusions. |
415 | assert(false && "Expanded token could not be resolved to main file!" ); |
416 | EndInvalid = true; |
417 | return true; // conservatively assume this token can overlap |
418 | }); |
419 | if (EndInvalid) |
420 | End = Toks.expandedTokens().end(); |
421 | |
422 | return llvm::ArrayRef(Start, End); |
423 | } |
424 | |
425 | // Hit-test a consecutive range of tokens from a single file ID. |
426 | SelectionTree::Selection |
427 | testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const { |
428 | assert(!Batch.empty()); |
429 | SourceLocation StartLoc = Batch.front().location(); |
430 | // There are several possible categories of FileID depending on how the |
431 | // preprocessor was used to generate these tokens: |
432 | // main file, #included file, macro args, macro bodies. |
433 | // We need to identify the main-file tokens that represent Batch, and |
434 | // determine whether we want to exclusively claim them. Regular tokens |
435 | // represent one AST construct, but a macro invocation can represent many. |
436 | |
437 | // Handle tokens written directly in the main file. |
438 | if (FID == SelFile) { |
439 | return testTokenRange(Begin: *offsetInSelFile(Loc: Batch.front().location()), |
440 | End: *offsetInSelFile(Loc: Batch.back().location())); |
441 | } |
442 | |
443 | // Handle tokens in another file #included into the main file. |
444 | // Check if the #include is selected, but don't claim it exclusively. |
445 | if (StartLoc.isFileID()) { |
446 | for (SourceLocation Loc = Batch.front().location(); Loc.isValid(); |
447 | Loc = SM.getIncludeLoc(FID: SM.getFileID(SpellingLoc: Loc))) { |
448 | if (auto Offset = offsetInSelFile(Loc)) |
449 | // FIXME: use whole #include directive, not just the filename string. |
450 | return testToken(Offset: *Offset); |
451 | } |
452 | return NoTokens; |
453 | } |
454 | |
455 | assert(StartLoc.isMacroID()); |
456 | // Handle tokens that were passed as a macro argument. |
457 | SourceLocation ArgStart = SM.getTopMacroCallerLoc(Loc: StartLoc); |
458 | if (auto ArgOffset = offsetInSelFile(Loc: ArgStart)) { |
459 | if (isFirstExpansion(Target: FID, SpellingLoc: ArgStart, SM)) { |
460 | SourceLocation ArgEnd = |
461 | SM.getTopMacroCallerLoc(Loc: Batch.back().location()); |
462 | return testTokenRange(Begin: *ArgOffset, End: *offsetInSelFile(Loc: ArgEnd)); |
463 | } else { // NOLINT(llvm-else-after-return) |
464 | /* fall through and treat as part of the macro body */ |
465 | } |
466 | } |
467 | |
468 | // Handle tokens produced by non-argument macro expansion. |
469 | // Check if the macro name is selected, don't claim it exclusively. |
470 | if (auto ExpansionOffset = offsetInSelFile(Loc: getExpansionStart(Loc: StartLoc))) |
471 | // FIXME: also check ( and ) for function-like macros? |
472 | return testToken(Offset: *ExpansionOffset); |
473 | return NoTokens; |
474 | } |
475 | |
476 | // Is the closed token range [Begin, End] selected? |
477 | SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const { |
478 | assert(Begin <= End); |
479 | // Outside the selection entirely? |
480 | if (End < SelectedSpelled.front().Offset || |
481 | Begin > SelectedSpelled.back().Offset) |
482 | return SelectionTree::Unselected; |
483 | |
484 | // Compute range of tokens. |
485 | auto B = llvm::partition_point( |
486 | Range: SelectedSpelled, P: [&](const Tok &T) { return T.Offset < Begin; }); |
487 | auto E = std::partition_point(first: B, last: SelectedSpelled.end(), pred: [&](const Tok &T) { |
488 | return T.Offset <= End; |
489 | }); |
490 | |
491 | // Aggregate selectedness of tokens in range. |
492 | bool ExtendsOutsideSelection = Begin < SelectedSpelled.front().Offset || |
493 | End > SelectedSpelled.back().Offset; |
494 | SelectionTree::Selection Result = |
495 | ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens; |
496 | for (auto It = B; It != E; ++It) |
497 | update(Result, New: It->Selected); |
498 | return Result; |
499 | } |
500 | |
501 | // Is the token at `Offset` selected? |
502 | SelectionTree::Selection testToken(unsigned Offset) const { |
503 | // Outside the selection entirely? |
504 | if (Offset < SelectedSpelled.front().Offset || |
505 | Offset > SelectedSpelled.back().Offset) |
506 | return SelectionTree::Unselected; |
507 | // Find the token, if it exists. |
508 | auto It = llvm::partition_point( |
509 | Range: SelectedSpelled, P: [&](const Tok &T) { return T.Offset < Offset; }); |
510 | if (It != SelectedSpelled.end() && It->Offset == Offset) |
511 | return It->Selected; |
512 | return NoTokens; |
513 | } |
514 | |
515 | // Decomposes Loc and returns the offset if the file ID is SelFile. |
516 | std::optional<unsigned> offsetInSelFile(SourceLocation Loc) const { |
517 | // Decoding Loc with SM.getDecomposedLoc is relatively expensive. |
518 | // But SourceLocations for a file are numerically contiguous, so we |
519 | // can use cheap integer operations instead. |
520 | if (Loc < SelFileBounds.getBegin() || Loc >= SelFileBounds.getEnd()) |
521 | return std::nullopt; |
522 | // FIXME: subtracting getRawEncoding() is dubious, move this logic into SM. |
523 | return Loc.getRawEncoding() - SelFileBounds.getBegin().getRawEncoding(); |
524 | } |
525 | |
526 | SourceLocation getExpansionStart(SourceLocation Loc) const { |
527 | while (Loc.isMacroID()) |
528 | Loc = SM.getImmediateExpansionRange(Loc).getBegin(); |
529 | return Loc; |
530 | } |
531 | |
532 | struct Tok { |
533 | unsigned Offset; |
534 | SelectionTree::Selection Selected; |
535 | }; |
536 | std::vector<Tok> SelectedSpelled; |
537 | llvm::ArrayRef<syntax::Token> MaybeSelectedExpanded; |
538 | FileID SelFile; |
539 | SourceRange SelFileBounds; |
540 | const SourceManager &SM; |
541 | }; |
542 | |
543 | // Show the type of a node for debugging. |
544 | void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) { |
545 | if (const TypeLoc *TL = N.get<TypeLoc>()) { |
546 | // TypeLoc is a hierarchy, but has only a single ASTNodeKind. |
547 | // Synthesize the name from the Type subclass (except for QualifiedTypeLoc). |
548 | if (TL->getTypeLocClass() == TypeLoc::Qualified) |
549 | OS << "QualifiedTypeLoc" ; |
550 | else |
551 | OS << TL->getType()->getTypeClassName() << "TypeLoc" ; |
552 | } else { |
553 | OS << N.getNodeKind().asStringRef(); |
554 | } |
555 | } |
556 | |
557 | #ifndef NDEBUG |
558 | std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) { |
559 | std::string S; |
560 | llvm::raw_string_ostream OS(S); |
561 | printNodeKind(OS, N); |
562 | return std::move(OS.str()); |
563 | } |
564 | #endif |
565 | |
566 | bool isImplicit(const Stmt *S) { |
567 | // Some Stmts are implicit and shouldn't be traversed, but there's no |
568 | // "implicit" attribute on Stmt/Expr. |
569 | // Unwrap implicit casts first if present (other nodes too?). |
570 | if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(Val: S)) |
571 | S = ICE->getSubExprAsWritten(); |
572 | // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor. |
573 | // It would be nice if RAV handled this (!shouldTraverseImplicitCode()). |
574 | if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(Val: S)) |
575 | if (CTI->isImplicit()) |
576 | return true; |
577 | // Make sure implicit access of anonymous structs don't end up owning tokens. |
578 | if (auto *ME = llvm::dyn_cast<MemberExpr>(Val: S)) { |
579 | if (auto *FD = llvm::dyn_cast<FieldDecl>(Val: ME->getMemberDecl())) |
580 | if (FD->isAnonymousStructOrUnion()) |
581 | // If Base is an implicit CXXThis, then the whole MemberExpr has no |
582 | // tokens. If it's a normal e.g. DeclRef, we treat the MemberExpr like |
583 | // an implicit cast. |
584 | return isImplicit(ME->getBase()); |
585 | } |
586 | // Refs to operator() and [] are (almost?) always implicit as part of calls. |
587 | if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(Val: S)) { |
588 | if (auto *FD = llvm::dyn_cast<FunctionDecl>(Val: DRE->getDecl())) { |
589 | switch (FD->getOverloadedOperator()) { |
590 | case OO_Call: |
591 | case OO_Subscript: |
592 | return true; |
593 | default: |
594 | break; |
595 | } |
596 | } |
597 | } |
598 | return false; |
599 | } |
600 | |
601 | // We find the selection by visiting written nodes in the AST, looking for nodes |
602 | // that intersect with the selected character range. |
603 | // |
604 | // While traversing, we maintain a parent stack. As nodes pop off the stack, |
605 | // we decide whether to keep them or not. To be kept, they must either be |
606 | // selected or contain some nodes that are. |
607 | // |
608 | // For simple cases (not inside macros) we prune subtrees that don't intersect. |
609 | class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> { |
610 | public: |
611 | // Runs the visitor to gather selected nodes and their ancestors. |
612 | // If there is any selection, the root (TUDecl) is the first node. |
613 | static std::deque<Node> collect(ASTContext &AST, |
614 | const syntax::TokenBuffer &Tokens, |
615 | const PrintingPolicy &PP, unsigned Begin, |
616 | unsigned End, FileID File) { |
617 | SelectionVisitor V(AST, Tokens, PP, Begin, End, File); |
618 | V.TraverseAST(AST); |
619 | assert(V.Stack.size() == 1 && "Unpaired push/pop?" ); |
620 | assert(V.Stack.top() == &V.Nodes.front()); |
621 | return std::move(V.Nodes); |
622 | } |
623 | |
624 | // We traverse all "well-behaved" nodes the same way: |
625 | // - push the node onto the stack |
626 | // - traverse its children recursively |
627 | // - pop it from the stack |
628 | // - hit testing: is intersection(node, selection) - union(children) empty? |
629 | // - attach it to the tree if it or any children hit the selection |
630 | // |
631 | // Two categories of nodes are not "well-behaved": |
632 | // - those without source range information, we don't record those |
633 | // - those that can't be stored in DynTypedNode. |
634 | bool TraverseDecl(Decl *X) { |
635 | if (llvm::isa_and_nonnull<TranslationUnitDecl>(Val: X)) |
636 | return Base::TraverseDecl(D: X); // Already pushed by constructor. |
637 | // Base::TraverseDecl will suppress children, but not this node itself. |
638 | if (X && X->isImplicit()) { |
639 | // Most implicit nodes have only implicit children and can be skipped. |
640 | // However there are exceptions (`void foo(Concept auto x)`), and |
641 | // the base implementation knows how to find them. |
642 | return Base::TraverseDecl(D: X); |
643 | } |
644 | return traverseNode(Node: X, Body: [&] { return Base::TraverseDecl(D: X); }); |
645 | } |
646 | bool TraverseTypeLoc(TypeLoc X) { |
647 | return traverseNode(Node: &X, Body: [&] { return Base::TraverseTypeLoc(TL: X); }); |
648 | } |
649 | bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &X) { |
650 | return traverseNode(Node: &X, |
651 | Body: [&] { return Base::TraverseTemplateArgumentLoc(ArgLoc: X); }); |
652 | } |
653 | bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) { |
654 | return traverseNode( |
655 | Node: &X, Body: [&] { return Base::TraverseNestedNameSpecifierLoc(NNS: X); }); |
656 | } |
657 | bool TraverseConstructorInitializer(CXXCtorInitializer *X) { |
658 | return traverseNode( |
659 | Node: X, Body: [&] { return Base::TraverseConstructorInitializer(Init: X); }); |
660 | } |
661 | bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &X) { |
662 | return traverseNode(Node: &X, Body: [&] { return Base::TraverseCXXBaseSpecifier(Base: X); }); |
663 | } |
664 | bool TraverseAttr(Attr *X) { |
665 | return traverseNode(Node: X, Body: [&] { return Base::TraverseAttr(At: X); }); |
666 | } |
667 | bool TraverseConceptReference(ConceptReference *X) { |
668 | return traverseNode(Node: X, Body: [&] { return Base::TraverseConceptReference(CR: X); }); |
669 | } |
670 | // Stmt is the same, but this form allows the data recursion optimization. |
671 | bool dataTraverseStmtPre(Stmt *X) { |
672 | if (!X || isImplicit(S: X)) |
673 | return false; |
674 | auto N = DynTypedNode::create(Node: *X); |
675 | if (canSafelySkipNode(N)) |
676 | return false; |
677 | push(Node: std::move(N)); |
678 | if (shouldSkipChildren(X)) { |
679 | pop(); |
680 | return false; |
681 | } |
682 | return true; |
683 | } |
684 | bool dataTraverseStmtPost(Stmt *X) { |
685 | pop(); |
686 | return true; |
687 | } |
688 | // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived |
689 | // TraverseTypeLoc is not called for the inner UnqualTypeLoc. |
690 | // This means we'd never see 'int' in 'const int'! Work around that here. |
691 | // (The reason for the behavior is to avoid traversing the nested Type twice, |
692 | // but we ignore TraverseType anyway). |
693 | bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) { |
694 | return traverseNode<TypeLoc>( |
695 | Node: &QX, Body: [&] { return TraverseTypeLoc(X: QX.getUnqualifiedLoc()); }); |
696 | } |
697 | bool TraverseObjCProtocolLoc(ObjCProtocolLoc PL) { |
698 | return traverseNode(Node: &PL, Body: [&] { return Base::TraverseObjCProtocolLoc(ProtocolLoc: PL); }); |
699 | } |
700 | // Uninteresting parts of the AST that don't have locations within them. |
701 | bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; } |
702 | bool TraverseType(QualType) { return true; } |
703 | |
704 | // The DeclStmt for the loop variable claims to cover the whole range |
705 | // inside the parens, this causes the range-init expression to not be hit. |
706 | // Traverse the loop VarDecl instead, which has the right source range. |
707 | bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { |
708 | return traverseNode(Node: S, Body: [&] { |
709 | return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) && |
710 | TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody()); |
711 | }); |
712 | } |
713 | // OpaqueValueExpr blocks traversal, we must explicitly traverse it. |
714 | bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) { |
715 | return traverseNode(Node: E, Body: [&] { return TraverseStmt(E->getSourceExpr()); }); |
716 | } |
717 | // We only want to traverse the *syntactic form* to understand the selection. |
718 | bool TraversePseudoObjectExpr(PseudoObjectExpr *E) { |
719 | return traverseNode(Node: E, Body: [&] { return TraverseStmt(E->getSyntacticForm()); }); |
720 | } |
721 | bool TraverseTypeConstraint(const TypeConstraint *C) { |
722 | if (auto *E = C->getImmediatelyDeclaredConstraint()) { |
723 | // Technically this expression is 'implicit' and not traversed by the RAV. |
724 | // However, the range is correct, so we visit expression to avoid adding |
725 | // an extra kind to 'DynTypeNode' that hold 'TypeConstraint'. |
726 | return TraverseStmt(E); |
727 | } |
728 | return Base::TraverseTypeConstraint(C); |
729 | } |
730 | |
731 | // Override child traversal for certain node types. |
732 | using RecursiveASTVisitor::getStmtChildren; |
733 | // PredefinedExpr like __func__ has a StringLiteral child for its value. |
734 | // It's not written, so don't traverse it. |
735 | Stmt::child_range getStmtChildren(PredefinedExpr *) { |
736 | return {StmtIterator{}, StmtIterator{}}; |
737 | } |
738 | |
739 | private: |
740 | using Base = RecursiveASTVisitor<SelectionVisitor>; |
741 | |
742 | SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens, |
743 | const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd, |
744 | FileID SelFile) |
745 | : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()), |
746 | #ifndef NDEBUG |
747 | PrintPolicy(PP), |
748 | #endif |
749 | TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM), |
750 | UnclaimedExpandedTokens(Tokens.expandedTokens()) { |
751 | // Ensure we have a node for the TU decl, regardless of traversal scope. |
752 | Nodes.emplace_back(); |
753 | Nodes.back().ASTNode = DynTypedNode::create(Node: *AST.getTranslationUnitDecl()); |
754 | Nodes.back().Parent = nullptr; |
755 | Nodes.back().Selected = SelectionTree::Unselected; |
756 | Stack.push(x: &Nodes.back()); |
757 | } |
758 | |
759 | // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo. |
760 | // Node is always a pointer so the generic code can handle any null checks. |
761 | template <typename T, typename Func> |
762 | bool traverseNode(T *Node, const Func &Body) { |
763 | if (Node == nullptr) |
764 | return true; |
765 | auto N = DynTypedNode::create(*Node); |
766 | if (canSafelySkipNode(N)) |
767 | return true; |
768 | push(Node: DynTypedNode::create(*Node)); |
769 | bool Ret = Body(); |
770 | pop(); |
771 | return Ret; |
772 | } |
773 | |
774 | // HIT TESTING |
775 | // |
776 | // We do rough hit testing on the way down the tree to avoid traversing |
777 | // subtrees that don't touch the selection (canSafelySkipNode), but |
778 | // fine-grained hit-testing is mostly done on the way back up (in pop()). |
779 | // This means children get to claim parts of the selection first, and parents |
780 | // are only selected if they own tokens that no child owned. |
781 | // |
782 | // Nodes *usually* nest nicely: a child's getSourceRange() lies within the |
783 | // parent's, and a node (transitively) owns all tokens in its range. |
784 | // |
785 | // Exception 1: when declarators nest, *inner* declarator is the *outer* type. |
786 | // e.g. void foo[5](int) is an array of functions. |
787 | // To handle this case, declarators are careful to only claim the tokens they |
788 | // own, rather than claim a range and rely on claim ordering. |
789 | // |
790 | // Exception 2: siblings both claim the same node. |
791 | // e.g. `int x, y;` produces two sibling VarDecls. |
792 | // ~~~~~ x |
793 | // ~~~~~~~~ y |
794 | // Here the first ("leftmost") sibling claims the tokens it wants, and the |
795 | // other sibling gets what's left. So selecting "int" only includes the left |
796 | // VarDecl in the selection tree. |
797 | |
798 | // An optimization for a common case: nodes outside macro expansions that |
799 | // don't intersect the selection may be recursively skipped. |
800 | bool canSafelySkipNode(const DynTypedNode &N) { |
801 | SourceRange S = getSourceRange(N); |
802 | if (auto *TL = N.get<TypeLoc>()) { |
803 | // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile |
804 | // heuristics. We should consider only pruning critical TypeLoc nodes, to |
805 | // be more robust. |
806 | |
807 | // AttributedTypeLoc may point to the attribute's range, NOT the modified |
808 | // type's range. |
809 | if (auto AT = TL->getAs<AttributedTypeLoc>()) |
810 | S = AT.getModifiedLoc().getSourceRange(); |
811 | } |
812 | // SourceRange often doesn't manage to accurately cover attributes. |
813 | // Fortunately, attributes are rare. |
814 | if (llvm::any_of(Range: getAttributes(N), |
815 | P: [](const Attr *A) { return !A->isImplicit(); })) |
816 | return false; |
817 | if (!SelChecker.mayHit(R: S)) { |
818 | dlog("{2}skip: {0} {1}" , printNodeToString(N, PrintPolicy), |
819 | S.printToString(SM), indent()); |
820 | return true; |
821 | } |
822 | return false; |
823 | } |
824 | |
825 | // There are certain nodes we want to treat as leaves in the SelectionTree, |
826 | // although they do have children. |
827 | bool shouldSkipChildren(const Stmt *X) const { |
828 | // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i). |
829 | // Unfortunately TokenBuffer sees 12_i as one token and can't split it. |
830 | // So we treat UserDefinedLiteral as a leaf node, owning the token. |
831 | return llvm::isa<UserDefinedLiteral>(Val: X); |
832 | } |
833 | |
834 | // Pushes a node onto the ancestor stack. Pairs with pop(). |
835 | // Performs early hit detection for some nodes (on the earlySourceRange). |
836 | void push(DynTypedNode Node) { |
837 | SourceRange Early = earlySourceRange(N: Node); |
838 | dlog("{2}push: {0} {1}" , printNodeToString(Node, PrintPolicy), |
839 | Node.getSourceRange().printToString(SM), indent()); |
840 | Nodes.emplace_back(); |
841 | Nodes.back().ASTNode = std::move(Node); |
842 | Nodes.back().Parent = Stack.top(); |
843 | Nodes.back().Selected = NoTokens; |
844 | Stack.push(x: &Nodes.back()); |
845 | claimRange(S: Early, Result&: Nodes.back().Selected); |
846 | } |
847 | |
848 | // Pops a node off the ancestor stack, and finalizes it. Pairs with push(). |
849 | // Performs primary hit detection. |
850 | void pop() { |
851 | Node &N = *Stack.top(); |
852 | dlog("{1}pop: {0}" , printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); |
853 | claimTokensFor(N: N.ASTNode, Result&: N.Selected); |
854 | if (N.Selected == NoTokens) |
855 | N.Selected = SelectionTree::Unselected; |
856 | if (N.Selected || !N.Children.empty()) { |
857 | // Attach to the tree. |
858 | N.Parent->Children.push_back(Elt: &N); |
859 | } else { |
860 | // Neither N any children are selected, it doesn't belong in the tree. |
861 | assert(&N == &Nodes.back()); |
862 | Nodes.pop_back(); |
863 | } |
864 | Stack.pop(); |
865 | } |
866 | |
867 | // Returns the range of tokens that this node will claim directly, and |
868 | // is not available to the node's children. |
869 | // Usually empty, but sometimes children cover tokens but shouldn't own them. |
870 | SourceRange earlySourceRange(const DynTypedNode &N) { |
871 | if (const Decl *VD = N.get<VarDecl>()) { |
872 | // We want the name in the var-decl to be claimed by the decl itself and |
873 | // not by any children. Ususally, we don't need this, because source |
874 | // ranges of children are not overlapped with their parent's. |
875 | // An exception is lambda captured var decl, where AutoTypeLoc is |
876 | // overlapped with the name loc. |
877 | // auto fun = [bar = foo]() { ... } |
878 | // ~~~~~~~~~ VarDecl |
879 | // ~~~ |- AutoTypeLoc |
880 | return VD->getLocation(); |
881 | } |
882 | |
883 | // When referring to a destructor ~Foo(), attribute Foo to the destructor |
884 | // rather than the TypeLoc nested inside it. |
885 | // We still traverse the TypeLoc, because it may contain other targeted |
886 | // things like the T in ~Foo<T>(). |
887 | if (const auto *CDD = N.get<CXXDestructorDecl>()) |
888 | return CDD->getNameInfo().getNamedTypeInfo()->getTypeLoc().getBeginLoc(); |
889 | if (const auto *ME = N.get<MemberExpr>()) { |
890 | auto NameInfo = ME->getMemberNameInfo(); |
891 | if (NameInfo.getName().getNameKind() == |
892 | DeclarationName::CXXDestructorName) |
893 | return NameInfo.getNamedTypeInfo()->getTypeLoc().getBeginLoc(); |
894 | } |
895 | |
896 | return SourceRange(); |
897 | } |
898 | |
899 | // Claim tokens for N, after processing its children. |
900 | // By default this claims all unclaimed tokens in getSourceRange(). |
901 | // We override this if we want to claim fewer tokens (e.g. there are gaps). |
902 | void claimTokensFor(const DynTypedNode &N, SelectionTree::Selection &Result) { |
903 | // CXXConstructExpr often shows implicit construction, like `string s;`. |
904 | // Don't associate any tokens with it unless there's some syntax like {}. |
905 | // This prevents it from claiming 's', its primary location. |
906 | if (const auto *CCE = N.get<CXXConstructExpr>()) { |
907 | claimRange(S: CCE->getParenOrBraceRange(), Result); |
908 | return; |
909 | } |
910 | // ExprWithCleanups is always implicit. It often wraps CXXConstructExpr. |
911 | // Prevent it claiming 's' in the case above. |
912 | if (N.get<ExprWithCleanups>()) |
913 | return; |
914 | |
915 | // Declarators nest "inside out", with parent types inside child ones. |
916 | // Instead of claiming the whole range (clobbering parent tokens), carefully |
917 | // claim the tokens owned by this node and non-declarator children. |
918 | // (We could manipulate traversal order instead, but this is easier). |
919 | // |
920 | // Non-declarator types nest normally, and are handled like other nodes. |
921 | // |
922 | // Example: |
923 | // Vec<R<int>(*[2])(A<char>)> is a Vec of arrays of pointers to functions, |
924 | // which accept A<char> and return R<int>. |
925 | // The TypeLoc hierarchy: |
926 | // Vec<R<int>(*[2])(A<char>)> m; |
927 | // Vec<#####################> TemplateSpecialization Vec |
928 | // --------[2]---------- `-Array |
929 | // -------*------------- `-Pointer |
930 | // ------(----)--------- `-Paren |
931 | // ------------(#######) `-Function |
932 | // R<###> |-TemplateSpecialization R |
933 | // int | `-Builtin int |
934 | // A<####> `-TemplateSpecialization A |
935 | // char `-Builtin char |
936 | // |
937 | // In each row |
938 | // --- represents unclaimed parts of the SourceRange. |
939 | // ### represents parts that children already claimed. |
940 | if (const auto *TL = N.get<TypeLoc>()) { |
941 | if (auto PTL = TL->getAs<ParenTypeLoc>()) { |
942 | claimRange(S: PTL.getLParenLoc(), Result); |
943 | claimRange(S: PTL.getRParenLoc(), Result); |
944 | return; |
945 | } |
946 | if (auto ATL = TL->getAs<ArrayTypeLoc>()) { |
947 | claimRange(S: ATL.getBracketsRange(), Result); |
948 | return; |
949 | } |
950 | if (auto PTL = TL->getAs<PointerTypeLoc>()) { |
951 | claimRange(S: PTL.getStarLoc(), Result); |
952 | return; |
953 | } |
954 | if (auto FTL = TL->getAs<FunctionTypeLoc>()) { |
955 | claimRange(S: SourceRange(FTL.getLParenLoc(), FTL.getEndLoc()), Result); |
956 | return; |
957 | } |
958 | } |
959 | claimRange(S: getSourceRange(N), Result); |
960 | } |
961 | |
962 | // Perform hit-testing of a complete Node against the selection. |
963 | // This runs for every node in the AST, and must be fast in common cases. |
964 | // This is usually called from pop(), so we can take children into account. |
965 | // The existing state of Result is relevant. |
966 | void claimRange(SourceRange S, SelectionTree::Selection &Result) { |
967 | for (const auto &ClaimedRange : |
968 | UnclaimedExpandedTokens.erase(Claim: TokenBuf.expandedTokens(R: S))) |
969 | update(Result, New: SelChecker.test(ExpandedTokens: ClaimedRange)); |
970 | |
971 | if (Result && Result != NoTokens) |
972 | dlog("{1}hit selection: {0}" , S.printToString(SM), indent()); |
973 | } |
974 | |
975 | std::string indent(int Offset = 0) { |
976 | // Cast for signed arithmetic. |
977 | int Amount = int(Stack.size()) + Offset; |
978 | assert(Amount >= 0); |
979 | return std::string(Amount, ' '); |
980 | } |
981 | |
982 | SourceManager &SM; |
983 | const LangOptions &LangOpts; |
984 | #ifndef NDEBUG |
985 | const PrintingPolicy &PrintPolicy; |
986 | #endif |
987 | const syntax::TokenBuffer &TokenBuf; |
988 | std::stack<Node *> Stack; |
989 | SelectionTester SelChecker; |
990 | IntervalSet<syntax::Token> UnclaimedExpandedTokens; |
991 | std::deque<Node> Nodes; // Stable pointers as we add more nodes. |
992 | }; |
993 | |
994 | } // namespace |
995 | |
996 | llvm::SmallString<256> abbreviatedString(DynTypedNode N, |
997 | const PrintingPolicy &PP) { |
998 | llvm::SmallString<256> Result; |
999 | { |
1000 | llvm::raw_svector_ostream OS(Result); |
1001 | N.print(OS, PP); |
1002 | } |
1003 | auto Pos = Result.find(C: '\n'); |
1004 | if (Pos != llvm::StringRef::npos) { |
1005 | bool MoreText = !llvm::all_of(Range: Result.str().drop_front(N: Pos), P: llvm::isSpace); |
1006 | Result.resize(N: Pos); |
1007 | if (MoreText) |
1008 | Result.append(RHS: " …" ); |
1009 | } |
1010 | return Result; |
1011 | } |
1012 | |
1013 | void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N, |
1014 | int Indent) const { |
1015 | if (N.Selected) |
1016 | OS.indent(NumSpaces: Indent - 1) << (N.Selected == SelectionTree::Complete ? '*' |
1017 | : '.'); |
1018 | else |
1019 | OS.indent(NumSpaces: Indent); |
1020 | printNodeKind(OS, N.ASTNode); |
1021 | OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n" ; |
1022 | for (const Node *Child : N.Children) |
1023 | print(OS, N: *Child, Indent: Indent + 2); |
1024 | } |
1025 | |
1026 | std::string SelectionTree::Node::kind() const { |
1027 | std::string S; |
1028 | llvm::raw_string_ostream OS(S); |
1029 | printNodeKind(OS, ASTNode); |
1030 | return std::move(OS.str()); |
1031 | } |
1032 | |
1033 | // Decide which selections emulate a "point" query in between characters. |
1034 | // If it's ambiguous (the neighboring characters are selectable tokens), returns |
1035 | // both possibilities in preference order. |
1036 | // Always returns at least one range - if no tokens touched, and empty range. |
1037 | static llvm::SmallVector<std::pair<unsigned, unsigned>, 2> |
1038 | pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) { |
1039 | const auto &SM = Tokens.sourceManager(); |
1040 | SourceLocation Loc = SM.getComposedLoc(FID: SM.getMainFileID(), Offset); |
1041 | llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result; |
1042 | // Prefer right token over left. |
1043 | for (const syntax::Token &Tok : |
1044 | llvm::reverse(C: spelledTokensTouching(Loc, Tokens))) { |
1045 | if (shouldIgnore(Tok)) |
1046 | continue; |
1047 | unsigned Offset = Tokens.sourceManager().getFileOffset(SpellingLoc: Tok.location()); |
1048 | Result.emplace_back(Args&: Offset, Args: Offset + Tok.length()); |
1049 | } |
1050 | if (Result.empty()) |
1051 | Result.emplace_back(Args&: Offset, Args&: Offset); |
1052 | return Result; |
1053 | } |
1054 | |
1055 | bool SelectionTree::createEach(ASTContext &AST, |
1056 | const syntax::TokenBuffer &Tokens, |
1057 | unsigned Begin, unsigned End, |
1058 | llvm::function_ref<bool(SelectionTree)> Func) { |
1059 | if (Begin != End) |
1060 | return Func(SelectionTree(AST, Tokens, Begin, End)); |
1061 | for (std::pair<unsigned, unsigned> Bounds : pointBounds(Offset: Begin, Tokens)) |
1062 | if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second))) |
1063 | return true; |
1064 | return false; |
1065 | } |
1066 | |
1067 | SelectionTree SelectionTree::createRight(ASTContext &AST, |
1068 | const syntax::TokenBuffer &Tokens, |
1069 | unsigned int Begin, unsigned int End) { |
1070 | std::optional<SelectionTree> Result; |
1071 | createEach(AST, Tokens, Begin, End, Func: [&](SelectionTree T) { |
1072 | Result = std::move(T); |
1073 | return true; |
1074 | }); |
1075 | return std::move(*Result); |
1076 | } |
1077 | |
1078 | SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens, |
1079 | unsigned Begin, unsigned End) |
1080 | : PrintPolicy(AST.getLangOpts()) { |
1081 | // No fundamental reason the selection needs to be in the main file, |
1082 | // but that's all clangd has needed so far. |
1083 | const SourceManager &SM = AST.getSourceManager(); |
1084 | FileID FID = SM.getMainFileID(); |
1085 | PrintPolicy.TerseOutput = true; |
1086 | PrintPolicy.IncludeNewlines = false; |
1087 | |
1088 | dlog("Computing selection for {0}" , |
1089 | SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End)) |
1090 | .printToString(SM)); |
1091 | Nodes = SelectionVisitor::collect(AST, Tokens, PP: PrintPolicy, Begin, End, File: FID); |
1092 | Root = Nodes.empty() ? nullptr : &Nodes.front(); |
1093 | recordMetrics(S: *this, Lang: AST.getLangOpts()); |
1094 | dlog("Built selection tree\n{0}" , *this); |
1095 | } |
1096 | |
1097 | const Node *SelectionTree::commonAncestor() const { |
1098 | const Node *Ancestor = Root; |
1099 | while (Ancestor->Children.size() == 1 && !Ancestor->Selected) |
1100 | Ancestor = Ancestor->Children.front(); |
1101 | // Returning nullptr here is a bit unprincipled, but it makes the API safer: |
1102 | // the TranslationUnitDecl contains all of the preamble, so traversing it is a |
1103 | // performance cliff. Callers can check for null and use root() if they want. |
1104 | return Ancestor != Root ? Ancestor : nullptr; |
1105 | } |
1106 | |
1107 | const DeclContext &SelectionTree::Node::getDeclContext() const { |
1108 | for (const Node *CurrentNode = this; CurrentNode != nullptr; |
1109 | CurrentNode = CurrentNode->Parent) { |
1110 | if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) { |
1111 | if (CurrentNode != this) |
1112 | if (auto *DC = dyn_cast<DeclContext>(Current)) |
1113 | return *DC; |
1114 | return *Current->getLexicalDeclContext(); |
1115 | } |
1116 | if (const auto *LE = CurrentNode->ASTNode.get<LambdaExpr>()) |
1117 | if (CurrentNode != this) |
1118 | return *LE->getCallOperator(); |
1119 | } |
1120 | llvm_unreachable("A tree must always be rooted at TranslationUnitDecl." ); |
1121 | } |
1122 | |
1123 | const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const { |
1124 | if (Children.size() == 1 && |
1125 | getSourceRange(Children.front()->ASTNode) == getSourceRange(ASTNode)) |
1126 | return Children.front()->ignoreImplicit(); |
1127 | return *this; |
1128 | } |
1129 | |
1130 | const SelectionTree::Node &SelectionTree::Node::outerImplicit() const { |
1131 | if (Parent && getSourceRange(Parent->ASTNode) == getSourceRange(ASTNode)) |
1132 | return Parent->outerImplicit(); |
1133 | return *this; |
1134 | } |
1135 | |
1136 | } // namespace clangd |
1137 | } // namespace clang |
1138 | |