1 | //===--- Merge.cpp -----------------------------------------------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Merge.h" |
10 | #include "index/Symbol.h" |
11 | #include "index/SymbolLocation.h" |
12 | #include "index/SymbolOrigin.h" |
13 | #include "support/Trace.h" |
14 | #include "llvm/ADT/StringRef.h" |
15 | #include <algorithm> |
16 | #include <iterator> |
17 | |
18 | namespace clang { |
19 | namespace clangd { |
20 | |
21 | namespace { |
22 | |
23 | // Returns true if file defining/declaring \p S is covered by \p Index. |
24 | bool isIndexAuthoritative(const SymbolIndex::IndexedFiles &Index, |
25 | const Symbol &S) { |
26 | // We expect the definition to see the canonical declaration, so it seems to |
27 | // be enough to check only the definition if it exists. |
28 | const char *OwningFile = |
29 | S.Definition ? S.Definition.FileURI : S.CanonicalDeclaration.FileURI; |
30 | return (Index(OwningFile) & IndexContents::Symbols) != IndexContents::None; |
31 | } |
32 | } // namespace |
33 | |
34 | bool MergedIndex::fuzzyFind( |
35 | const FuzzyFindRequest &Req, |
36 | llvm::function_ref<void(const Symbol &)> Callback) const { |
37 | // We can't step through both sources in parallel. So: |
38 | // 1) query all dynamic symbols, slurping results into a slab |
39 | // 2) query the static symbols, for each one: |
40 | // a) if it's not in the dynamic slab, yield it directly |
41 | // b) if it's in the dynamic slab, merge it and yield the result |
42 | // 3) now yield all the dynamic symbols we haven't processed. |
43 | trace::Span Tracer("MergedIndex fuzzyFind" ); |
44 | bool More = false; // We'll be incomplete if either source was. |
45 | SymbolSlab::Builder DynB; |
46 | unsigned DynamicCount = 0; |
47 | unsigned StaticCount = 0; |
48 | unsigned MergedCount = 0; |
49 | // Number of results ignored due to staleness. |
50 | unsigned StaticDropped = 0; |
51 | More |= Dynamic->fuzzyFind(Req, Callback: [&](const Symbol &S) { |
52 | ++DynamicCount; |
53 | DynB.insert(S); |
54 | }); |
55 | SymbolSlab Dyn = std::move(DynB).build(); |
56 | |
57 | llvm::DenseSet<SymbolID> ReportedDynSymbols; |
58 | { |
59 | auto DynamicContainsFile = Dynamic->indexedFiles(); |
60 | More |= Static->fuzzyFind(Req, Callback: [&](const Symbol &S) { |
61 | ++StaticCount; |
62 | auto DynS = Dyn.find(SymID: S.ID); |
63 | // If symbol also exist in the dynamic index, just merge and report. |
64 | if (DynS != Dyn.end()) { |
65 | ++MergedCount; |
66 | ReportedDynSymbols.insert(V: S.ID); |
67 | return Callback(mergeSymbol(L: *DynS, R: S)); |
68 | } |
69 | |
70 | // Otherwise, if the dynamic index owns the symbol's file, it means static |
71 | // index is stale just drop the symbol. |
72 | if (isIndexAuthoritative(Index: DynamicContainsFile, S)) { |
73 | ++StaticDropped; |
74 | return; |
75 | } |
76 | |
77 | // If not just report the symbol from static index as is. |
78 | return Callback(S); |
79 | }); |
80 | } |
81 | SPAN_ATTACH(Tracer, "dynamic" , DynamicCount); |
82 | SPAN_ATTACH(Tracer, "static" , StaticCount); |
83 | SPAN_ATTACH(Tracer, "static_dropped" , StaticDropped); |
84 | SPAN_ATTACH(Tracer, "merged" , MergedCount); |
85 | for (const Symbol &S : Dyn) |
86 | if (!ReportedDynSymbols.count(V: S.ID)) |
87 | Callback(S); |
88 | return More; |
89 | } |
90 | |
91 | void MergedIndex::lookup( |
92 | const LookupRequest &Req, |
93 | llvm::function_ref<void(const Symbol &)> Callback) const { |
94 | trace::Span Tracer("MergedIndex lookup" ); |
95 | SymbolSlab::Builder B; |
96 | |
97 | Dynamic->lookup(Req, Callback: [&](const Symbol &S) { B.insert(S); }); |
98 | |
99 | auto RemainingIDs = Req.IDs; |
100 | { |
101 | auto DynamicContainsFile = Dynamic->indexedFiles(); |
102 | Static->lookup(Req, Callback: [&](const Symbol &S) { |
103 | // If we've seen the symbol before, just merge. |
104 | if (const Symbol *Sym = B.find(ID: S.ID)) { |
105 | RemainingIDs.erase(V: S.ID); |
106 | return Callback(mergeSymbol(L: *Sym, R: S)); |
107 | } |
108 | |
109 | // If symbol is missing in dynamic index, and dynamic index owns the |
110 | // symbol's file. Static index is stale, just drop the symbol. |
111 | if (isIndexAuthoritative(Index: DynamicContainsFile, S)) |
112 | return; |
113 | |
114 | // Dynamic index doesn't know about this file, just use the symbol from |
115 | // static index. |
116 | RemainingIDs.erase(V: S.ID); |
117 | Callback(S); |
118 | }); |
119 | } |
120 | for (const auto &ID : RemainingIDs) |
121 | if (const Symbol *Sym = B.find(ID)) |
122 | Callback(*Sym); |
123 | } |
124 | |
125 | bool MergedIndex::refs(const RefsRequest &Req, |
126 | llvm::function_ref<void(const Ref &)> Callback) const { |
127 | trace::Span Tracer("MergedIndex refs" ); |
128 | bool More = false; |
129 | uint32_t Remaining = Req.Limit.value_or(u: std::numeric_limits<uint32_t>::max()); |
130 | // We don't want duplicated refs from the static/dynamic indexes, |
131 | // and we can't reliably deduplicate them because offsets may differ slightly. |
132 | // We consider the dynamic index authoritative and report all its refs, |
133 | // and only report static index refs from other files. |
134 | More |= Dynamic->refs(Req, Callback: [&](const Ref &O) { |
135 | Callback(O); |
136 | assert(Remaining != 0); |
137 | --Remaining; |
138 | }); |
139 | if (Remaining == 0 && More) |
140 | return More; |
141 | auto DynamicContainsFile = Dynamic->indexedFiles(); |
142 | // We return less than Req.Limit if static index returns more refs for dirty |
143 | // files. |
144 | bool StaticHadMore = Static->refs(Req, Callback: [&](const Ref &O) { |
145 | if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) != |
146 | IndexContents::None) |
147 | return; // ignore refs that have been seen from dynamic index. |
148 | if (Remaining == 0) { |
149 | More = true; |
150 | return; |
151 | } |
152 | --Remaining; |
153 | Callback(O); |
154 | }); |
155 | return More || StaticHadMore; |
156 | } |
157 | |
158 | llvm::unique_function<IndexContents(llvm::StringRef) const> |
159 | MergedIndex::indexedFiles() const { |
160 | return [DynamicContainsFile{Dynamic->indexedFiles()}, |
161 | StaticContainsFile{Static->indexedFiles()}](llvm::StringRef FileURI) { |
162 | return DynamicContainsFile(FileURI) | StaticContainsFile(FileURI); |
163 | }; |
164 | } |
165 | |
166 | void MergedIndex::relations( |
167 | const RelationsRequest &Req, |
168 | llvm::function_ref<void(const SymbolID &, const Symbol &)> Callback) const { |
169 | uint32_t Remaining = Req.Limit.value_or(u: std::numeric_limits<uint32_t>::max()); |
170 | // Return results from both indexes but avoid duplicates. |
171 | // We might return stale relations from the static index; |
172 | // we don't currently have a good way of identifying them. |
173 | llvm::DenseSet<std::pair<SymbolID, SymbolID>> SeenRelations; |
174 | Dynamic->relations(Req, Callback: [&](const SymbolID &Subject, const Symbol &Object) { |
175 | Callback(Subject, Object); |
176 | SeenRelations.insert(V: std::make_pair(x: Subject, y: Object.ID)); |
177 | --Remaining; |
178 | }); |
179 | if (Remaining == 0) |
180 | return; |
181 | Static->relations(Req, Callback: [&](const SymbolID &Subject, const Symbol &Object) { |
182 | if (Remaining > 0 && |
183 | !SeenRelations.count(V: std::make_pair(x: Subject, y: Object.ID))) { |
184 | --Remaining; |
185 | Callback(Subject, Object); |
186 | } |
187 | }); |
188 | } |
189 | |
190 | // Returns true if \p L is (strictly) preferred to \p R (e.g. by file paths). If |
191 | // neither is preferred, this returns false. |
192 | static bool prefer(const SymbolLocation &L, const SymbolLocation &R) { |
193 | if (!L) |
194 | return false; |
195 | if (!R) |
196 | return true; |
197 | auto HasCodeGenSuffix = [](const SymbolLocation &Loc) { |
198 | constexpr static const char *CodegenSuffixes[] = {".proto" }; |
199 | return llvm::any_of(Range: CodegenSuffixes, P: [&](llvm::StringRef Suffix) { |
200 | return llvm::StringRef(Loc.FileURI).ends_with(Suffix); |
201 | }); |
202 | }; |
203 | return HasCodeGenSuffix(L) && !HasCodeGenSuffix(R); |
204 | } |
205 | |
206 | Symbol mergeSymbol(const Symbol &L, const Symbol &R) { |
207 | assert(L.ID == R.ID); |
208 | // We prefer information from TUs that saw the definition. |
209 | // Classes: this is the def itself. Functions: hopefully the header decl. |
210 | // If both did (or both didn't), continue to prefer L over R. |
211 | bool PreferR = R.Definition && !L.Definition; |
212 | // Merge include headers only if both have definitions or both have no |
213 | // definition; otherwise, only accumulate references of common includes. |
214 | assert(L.Definition.FileURI && R.Definition.FileURI); |
215 | bool MergeIncludes = |
216 | bool(*L.Definition.FileURI) == bool(*R.Definition.FileURI); |
217 | Symbol S = PreferR ? R : L; // The target symbol we're merging into. |
218 | const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol. |
219 | |
220 | // Only use locations in \p O if it's (strictly) preferred. |
221 | if (prefer(L: O.CanonicalDeclaration, R: S.CanonicalDeclaration)) |
222 | S.CanonicalDeclaration = O.CanonicalDeclaration; |
223 | if (prefer(L: O.Definition, R: S.Definition)) |
224 | S.Definition = O.Definition; |
225 | S.References += O.References; |
226 | if (S.Signature == "" ) |
227 | S.Signature = O.Signature; |
228 | if (S.CompletionSnippetSuffix == "" ) |
229 | S.CompletionSnippetSuffix = O.CompletionSnippetSuffix; |
230 | if (S.Documentation == "" ) { |
231 | // Don't accept documentation from bare forward class declarations, if there |
232 | // is a definition and it didn't provide one. S is often an undocumented |
233 | // class, and O is a non-canonical forward decl preceded by an irrelevant |
234 | // comment. |
235 | bool IsClass = S.SymInfo.Kind == index::SymbolKind::Class || |
236 | S.SymInfo.Kind == index::SymbolKind::Struct || |
237 | S.SymInfo.Kind == index::SymbolKind::Union; |
238 | if (!IsClass || !S.Definition) |
239 | S.Documentation = O.Documentation; |
240 | } |
241 | if (S.ReturnType == "" ) |
242 | S.ReturnType = O.ReturnType; |
243 | if (S.Type == "" ) |
244 | S.Type = O.Type; |
245 | for (const auto &OI : O.IncludeHeaders) { |
246 | bool Found = false; |
247 | for (auto &SI : S.IncludeHeaders) { |
248 | if (SI.IncludeHeader == OI.IncludeHeader) { |
249 | Found = true; |
250 | SI.References += OI.References; |
251 | SI.SupportedDirectives |= OI.SupportedDirectives; |
252 | break; |
253 | } |
254 | } |
255 | if (!Found && MergeIncludes) |
256 | S.IncludeHeaders.emplace_back(Args: OI.IncludeHeader, Args: OI.References, |
257 | Args: OI.supportedDirectives()); |
258 | } |
259 | |
260 | S.Origin |= O.Origin | SymbolOrigin::Merge; |
261 | S.Flags |= O.Flags; |
262 | return S; |
263 | } |
264 | |
265 | } // namespace clangd |
266 | } // namespace clang |
267 | |