1 | //===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "SymbolIndexManager.h" |
10 | |
11 | #include <cmath> |
12 | |
13 | #include "find-all-symbols/SymbolInfo.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/ADT/SmallVector.h" |
16 | #include "llvm/ADT/StringMap.h" |
17 | #include "llvm/Support/Debug.h" |
18 | #include "llvm/Support/Path.h" |
19 | |
20 | #define DEBUG_TYPE "clang-include-fixer" |
21 | |
22 | namespace clang { |
23 | namespace include_fixer { |
24 | |
25 | using find_all_symbols::SymbolInfo; |
26 | using find_all_symbols::SymbolAndSignals; |
27 | |
28 | // Calculate a score based on whether we think the given header is closely |
29 | // related to the given source file. |
30 | static double similarityScore(llvm::StringRef FileName, |
31 | llvm::StringRef ) { |
32 | // Compute the maximum number of common path segments between Header and |
33 | // a suffix of FileName. |
34 | // We do not do a full longest common substring computation, as Header |
35 | // specifies the path we would directly #include, so we assume it is rooted |
36 | // relatively to a subproject of the repository. |
37 | int MaxSegments = 1; |
38 | for (auto FileI = llvm::sys::path::begin(path: FileName), |
39 | FileE = llvm::sys::path::end(path: FileName); |
40 | FileI != FileE; ++FileI) { |
41 | int Segments = 0; |
42 | for (auto = llvm::sys::path::begin(path: Header), |
43 | = llvm::sys::path::end(path: Header), I = FileI; |
44 | HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) { |
45 | ++Segments; |
46 | } |
47 | MaxSegments = std::max(a: Segments, b: MaxSegments); |
48 | } |
49 | return MaxSegments; |
50 | } |
51 | |
52 | static void rank(std::vector<SymbolAndSignals> &Symbols, |
53 | llvm::StringRef FileName) { |
54 | llvm::StringMap<double> Score; |
55 | for (const auto &Symbol : Symbols) { |
56 | // Calculate a score from the similarity of the header the symbol is in |
57 | // with the current file and the popularity of the symbol. |
58 | double NewScore = similarityScore(FileName, Header: Symbol.Symbol.getFilePath()) * |
59 | (1.0 + std::log2(x: 1 + Symbol.Signals.Seen)); |
60 | double &S = Score[Symbol.Symbol.getFilePath()]; |
61 | S = std::max(a: S, b: NewScore); |
62 | } |
63 | // Sort by the gathered scores. Use file name as a tie breaker so we can |
64 | // deduplicate. |
65 | llvm::sort(C&: Symbols, |
66 | Comp: [&](const SymbolAndSignals &A, const SymbolAndSignals &B) { |
67 | auto AS = Score[A.Symbol.getFilePath()]; |
68 | auto BS = Score[B.Symbol.getFilePath()]; |
69 | if (AS != BS) |
70 | return AS > BS; |
71 | return A.Symbol.getFilePath() < B.Symbol.getFilePath(); |
72 | }); |
73 | } |
74 | |
75 | std::vector<find_all_symbols::SymbolInfo> |
76 | SymbolIndexManager::search(llvm::StringRef Identifier, |
77 | bool IsNestedSearch, |
78 | llvm::StringRef FileName) const { |
79 | // The identifier may be fully qualified, so split it and get all the context |
80 | // names. |
81 | llvm::SmallVector<llvm::StringRef, 8> Names; |
82 | Identifier.split(A&: Names, Separator: "::" ); |
83 | |
84 | bool IsFullyQualified = false; |
85 | if (Identifier.starts_with(Prefix: "::" )) { |
86 | Names.erase(CI: Names.begin()); // Drop first (empty) element. |
87 | IsFullyQualified = true; |
88 | } |
89 | |
90 | // As long as we don't find a result keep stripping name parts from the end. |
91 | // This is to support nested classes which aren't recorded in the database. |
92 | // Eventually we will either hit a class (namespaces aren't in the database |
93 | // either) and can report that result. |
94 | bool TookPrefix = false; |
95 | std::vector<SymbolAndSignals> MatchedSymbols; |
96 | do { |
97 | std::vector<SymbolAndSignals> Symbols; |
98 | for (const auto &DB : SymbolIndices) { |
99 | auto Res = DB.get()->search(Identifier: Names.back()); |
100 | Symbols.insert(position: Symbols.end(), first: Res.begin(), last: Res.end()); |
101 | } |
102 | |
103 | LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got " |
104 | << Symbols.size() << " results...\n" ); |
105 | |
106 | for (auto &SymAndSig : Symbols) { |
107 | const SymbolInfo &Symbol = SymAndSig.Symbol; |
108 | // Match the identifier name without qualifier. |
109 | bool IsMatched = true; |
110 | auto SymbolContext = Symbol.getContexts().begin(); |
111 | auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name. |
112 | // Match the remaining context names. |
113 | while (IdentiferContext != Names.rend() && |
114 | SymbolContext != Symbol.getContexts().end()) { |
115 | if (SymbolContext->second == *IdentiferContext) { |
116 | ++IdentiferContext; |
117 | ++SymbolContext; |
118 | } else if (SymbolContext->first == |
119 | find_all_symbols::SymbolInfo::ContextType::EnumDecl) { |
120 | // Skip non-scoped enum context. |
121 | ++SymbolContext; |
122 | } else { |
123 | IsMatched = false; |
124 | break; |
125 | } |
126 | } |
127 | |
128 | // If the name was qualified we only want to add results if we evaluated |
129 | // all contexts. |
130 | if (IsFullyQualified) |
131 | IsMatched &= (SymbolContext == Symbol.getContexts().end()); |
132 | |
133 | // FIXME: Support full match. At this point, we only find symbols in |
134 | // database which end with the same contexts with the identifier. |
135 | if (IsMatched && IdentiferContext == Names.rend()) { |
136 | // If we're in a situation where we took a prefix but the thing we |
137 | // found couldn't possibly have a nested member ignore it. |
138 | if (TookPrefix && |
139 | (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function || |
140 | Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable || |
141 | Symbol.getSymbolKind() == |
142 | SymbolInfo::SymbolKind::EnumConstantDecl || |
143 | Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro)) |
144 | continue; |
145 | |
146 | MatchedSymbols.push_back(x: std::move(SymAndSig)); |
147 | } |
148 | } |
149 | Names.pop_back(); |
150 | TookPrefix = true; |
151 | } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch); |
152 | |
153 | rank(Symbols&: MatchedSymbols, FileName); |
154 | // Strip signals, they are no longer needed. |
155 | std::vector<SymbolInfo> Res; |
156 | Res.reserve(n: MatchedSymbols.size()); |
157 | for (auto &SymAndSig : MatchedSymbols) |
158 | Res.push_back(x: std::move(SymAndSig.Symbol)); |
159 | return Res; |
160 | } |
161 | |
162 | } // namespace include_fixer |
163 | } // namespace clang |
164 | |