| 1 | //===--- Merge.cpp -----------------------------------------------*- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "Merge.h" |
| 10 | #include "index/Symbol.h" |
| 11 | #include "index/SymbolLocation.h" |
| 12 | #include "index/SymbolOrigin.h" |
| 13 | #include "support/Trace.h" |
| 14 | #include "llvm/ADT/StringRef.h" |
| 15 | #include <algorithm> |
| 16 | #include <iterator> |
| 17 | |
| 18 | namespace clang { |
| 19 | namespace clangd { |
| 20 | |
| 21 | namespace { |
| 22 | |
| 23 | // Returns true if file defining/declaring \p S is covered by \p Index. |
| 24 | bool isIndexAuthoritative(const SymbolIndex::IndexedFiles &Index, |
| 25 | const Symbol &S) { |
| 26 | // We expect the definition to see the canonical declaration, so it seems to |
| 27 | // be enough to check only the definition if it exists. |
| 28 | const char *OwningFile = |
| 29 | S.Definition ? S.Definition.FileURI : S.CanonicalDeclaration.FileURI; |
| 30 | return (Index(OwningFile) & IndexContents::Symbols) != IndexContents::None; |
| 31 | } |
| 32 | } // namespace |
| 33 | |
| 34 | bool MergedIndex::fuzzyFind( |
| 35 | const FuzzyFindRequest &Req, |
| 36 | llvm::function_ref<void(const Symbol &)> Callback) const { |
| 37 | // We can't step through both sources in parallel. So: |
| 38 | // 1) query all dynamic symbols, slurping results into a slab |
| 39 | // 2) query the static symbols, for each one: |
| 40 | // a) if it's not in the dynamic slab, yield it directly |
| 41 | // b) if it's in the dynamic slab, merge it and yield the result |
| 42 | // 3) now yield all the dynamic symbols we haven't processed. |
| 43 | trace::Span Tracer("MergedIndex fuzzyFind" ); |
| 44 | bool More = false; // We'll be incomplete if either source was. |
| 45 | SymbolSlab::Builder DynB; |
| 46 | unsigned DynamicCount = 0; |
| 47 | unsigned StaticCount = 0; |
| 48 | unsigned MergedCount = 0; |
| 49 | // Number of results ignored due to staleness. |
| 50 | unsigned StaticDropped = 0; |
| 51 | More |= Dynamic->fuzzyFind(Req, Callback: [&](const Symbol &S) { |
| 52 | ++DynamicCount; |
| 53 | DynB.insert(S); |
| 54 | }); |
| 55 | SymbolSlab Dyn = std::move(DynB).build(); |
| 56 | |
| 57 | llvm::DenseSet<SymbolID> ReportedDynSymbols; |
| 58 | { |
| 59 | auto DynamicContainsFile = Dynamic->indexedFiles(); |
| 60 | More |= Static->fuzzyFind(Req, Callback: [&](const Symbol &S) { |
| 61 | ++StaticCount; |
| 62 | auto DynS = Dyn.find(SymID: S.ID); |
| 63 | // If symbol also exist in the dynamic index, just merge and report. |
| 64 | if (DynS != Dyn.end()) { |
| 65 | ++MergedCount; |
| 66 | ReportedDynSymbols.insert(V: S.ID); |
| 67 | return Callback(mergeSymbol(L: *DynS, R: S)); |
| 68 | } |
| 69 | |
| 70 | // Otherwise, if the dynamic index owns the symbol's file, it means static |
| 71 | // index is stale just drop the symbol. |
| 72 | if (isIndexAuthoritative(Index: DynamicContainsFile, S)) { |
| 73 | ++StaticDropped; |
| 74 | return; |
| 75 | } |
| 76 | |
| 77 | // If not just report the symbol from static index as is. |
| 78 | return Callback(S); |
| 79 | }); |
| 80 | } |
| 81 | SPAN_ATTACH(Tracer, "dynamic" , DynamicCount); |
| 82 | SPAN_ATTACH(Tracer, "static" , StaticCount); |
| 83 | SPAN_ATTACH(Tracer, "static_dropped" , StaticDropped); |
| 84 | SPAN_ATTACH(Tracer, "merged" , MergedCount); |
| 85 | for (const Symbol &S : Dyn) |
| 86 | if (!ReportedDynSymbols.count(V: S.ID)) |
| 87 | Callback(S); |
| 88 | return More; |
| 89 | } |
| 90 | |
| 91 | void MergedIndex::lookup( |
| 92 | const LookupRequest &Req, |
| 93 | llvm::function_ref<void(const Symbol &)> Callback) const { |
| 94 | trace::Span Tracer("MergedIndex lookup" ); |
| 95 | SymbolSlab::Builder B; |
| 96 | |
| 97 | Dynamic->lookup(Req, Callback: [&](const Symbol &S) { B.insert(S); }); |
| 98 | |
| 99 | auto RemainingIDs = Req.IDs; |
| 100 | { |
| 101 | auto DynamicContainsFile = Dynamic->indexedFiles(); |
| 102 | Static->lookup(Req, Callback: [&](const Symbol &S) { |
| 103 | // If we've seen the symbol before, just merge. |
| 104 | if (const Symbol *Sym = B.find(ID: S.ID)) { |
| 105 | RemainingIDs.erase(V: S.ID); |
| 106 | return Callback(mergeSymbol(L: *Sym, R: S)); |
| 107 | } |
| 108 | |
| 109 | // If symbol is missing in dynamic index, and dynamic index owns the |
| 110 | // symbol's file. Static index is stale, just drop the symbol. |
| 111 | if (isIndexAuthoritative(Index: DynamicContainsFile, S)) |
| 112 | return; |
| 113 | |
| 114 | // Dynamic index doesn't know about this file, just use the symbol from |
| 115 | // static index. |
| 116 | RemainingIDs.erase(V: S.ID); |
| 117 | Callback(S); |
| 118 | }); |
| 119 | } |
| 120 | for (const auto &ID : RemainingIDs) |
| 121 | if (const Symbol *Sym = B.find(ID)) |
| 122 | Callback(*Sym); |
| 123 | } |
| 124 | |
| 125 | bool MergedIndex::refs(const RefsRequest &Req, |
| 126 | llvm::function_ref<void(const Ref &)> Callback) const { |
| 127 | trace::Span Tracer("MergedIndex refs" ); |
| 128 | bool More = false; |
| 129 | uint32_t Remaining = Req.Limit.value_or(u: std::numeric_limits<uint32_t>::max()); |
| 130 | // We don't want duplicated refs from the static/dynamic indexes, |
| 131 | // and we can't reliably deduplicate them because offsets may differ slightly. |
| 132 | // We consider the dynamic index authoritative and report all its refs, |
| 133 | // and only report static index refs from other files. |
| 134 | More |= Dynamic->refs(Req, Callback: [&](const Ref &O) { |
| 135 | Callback(O); |
| 136 | assert(Remaining != 0); |
| 137 | --Remaining; |
| 138 | }); |
| 139 | if (Remaining == 0 && More) |
| 140 | return More; |
| 141 | auto DynamicContainsFile = Dynamic->indexedFiles(); |
| 142 | // We return less than Req.Limit if static index returns more refs for dirty |
| 143 | // files. |
| 144 | bool StaticHadMore = Static->refs(Req, Callback: [&](const Ref &O) { |
| 145 | if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) != |
| 146 | IndexContents::None) |
| 147 | return; // ignore refs that have been seen from dynamic index. |
| 148 | if (Remaining == 0) { |
| 149 | More = true; |
| 150 | return; |
| 151 | } |
| 152 | --Remaining; |
| 153 | Callback(O); |
| 154 | }); |
| 155 | return More || StaticHadMore; |
| 156 | } |
| 157 | |
| 158 | bool MergedIndex::containedRefs( |
| 159 | const ContainedRefsRequest &Req, |
| 160 | llvm::function_ref<void(const ContainedRefsResult &)> Callback) const { |
| 161 | trace::Span Tracer("MergedIndex refersTo" ); |
| 162 | bool More = false; |
| 163 | uint32_t Remaining = Req.Limit.value_or(u: std::numeric_limits<uint32_t>::max()); |
| 164 | // We don't want duplicated refs from the static/dynamic indexes, |
| 165 | // and we can't reliably deduplicate them because offsets may differ slightly. |
| 166 | // We consider the dynamic index authoritative and report all its refs, |
| 167 | // and only report static index refs from other files. |
| 168 | More |= Dynamic->containedRefs(Req, Callback: [&](const auto &O) { |
| 169 | Callback(O); |
| 170 | assert(Remaining != 0); |
| 171 | --Remaining; |
| 172 | }); |
| 173 | if (Remaining == 0 && More) |
| 174 | return More; |
| 175 | auto DynamicContainsFile = Dynamic->indexedFiles(); |
| 176 | // We return less than Req.Limit if static index returns more refs for dirty |
| 177 | // files. |
| 178 | bool StaticHadMore = Static->containedRefs(Req, Callback: [&](const auto &O) { |
| 179 | if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) != |
| 180 | IndexContents::None) |
| 181 | return; // ignore refs that have been seen from dynamic index. |
| 182 | if (Remaining == 0) { |
| 183 | More = true; |
| 184 | return; |
| 185 | } |
| 186 | --Remaining; |
| 187 | Callback(O); |
| 188 | }); |
| 189 | return More || StaticHadMore; |
| 190 | } |
| 191 | |
| 192 | llvm::unique_function<IndexContents(llvm::StringRef) const> |
| 193 | MergedIndex::indexedFiles() const { |
| 194 | return [DynamicContainsFile{Dynamic->indexedFiles()}, |
| 195 | StaticContainsFile{Static->indexedFiles()}](llvm::StringRef FileURI) { |
| 196 | return DynamicContainsFile(FileURI) | StaticContainsFile(FileURI); |
| 197 | }; |
| 198 | } |
| 199 | |
| 200 | void MergedIndex::relations( |
| 201 | const RelationsRequest &Req, |
| 202 | llvm::function_ref<void(const SymbolID &, const Symbol &)> Callback) const { |
| 203 | uint32_t Remaining = Req.Limit.value_or(u: std::numeric_limits<uint32_t>::max()); |
| 204 | // Return results from both indexes but avoid duplicates. |
| 205 | // We might return stale relations from the static index; |
| 206 | // we don't currently have a good way of identifying them. |
| 207 | llvm::DenseSet<std::pair<SymbolID, SymbolID>> SeenRelations; |
| 208 | Dynamic->relations(Req, Callback: [&](const SymbolID &Subject, const Symbol &Object) { |
| 209 | Callback(Subject, Object); |
| 210 | SeenRelations.insert(V: std::make_pair(x: Subject, y: Object.ID)); |
| 211 | --Remaining; |
| 212 | }); |
| 213 | if (Remaining == 0) |
| 214 | return; |
| 215 | Static->relations(Req, Callback: [&](const SymbolID &Subject, const Symbol &Object) { |
| 216 | if (Remaining > 0 && |
| 217 | !SeenRelations.count(V: std::make_pair(x: Subject, y: Object.ID))) { |
| 218 | --Remaining; |
| 219 | Callback(Subject, Object); |
| 220 | } |
| 221 | }); |
| 222 | } |
| 223 | |
| 224 | // Returns true if \p L is (strictly) preferred to \p R (e.g. by file paths). If |
| 225 | // neither is preferred, this returns false. |
| 226 | static bool prefer(const SymbolLocation &L, const SymbolLocation &R) { |
| 227 | if (!L) |
| 228 | return false; |
| 229 | if (!R) |
| 230 | return true; |
| 231 | auto HasCodeGenSuffix = [](const SymbolLocation &Loc) { |
| 232 | constexpr static const char *CodegenSuffixes[] = {".proto" }; |
| 233 | return llvm::any_of(Range: CodegenSuffixes, P: [&](llvm::StringRef Suffix) { |
| 234 | return llvm::StringRef(Loc.FileURI).ends_with(Suffix); |
| 235 | }); |
| 236 | }; |
| 237 | return HasCodeGenSuffix(L) && !HasCodeGenSuffix(R); |
| 238 | } |
| 239 | |
| 240 | Symbol mergeSymbol(const Symbol &L, const Symbol &R) { |
| 241 | assert(L.ID == R.ID); |
| 242 | // We prefer information from TUs that saw the definition. |
| 243 | // Classes: this is the def itself. Functions: hopefully the header decl. |
| 244 | // If both did (or both didn't), continue to prefer L over R. |
| 245 | bool PreferR = R.Definition && !L.Definition; |
| 246 | // Merge include headers only if both have definitions or both have no |
| 247 | // definition; otherwise, only accumulate references of common includes. |
| 248 | assert(L.Definition.FileURI && R.Definition.FileURI); |
| 249 | bool MergeIncludes = |
| 250 | bool(*L.Definition.FileURI) == bool(*R.Definition.FileURI); |
| 251 | Symbol S = PreferR ? R : L; // The target symbol we're merging into. |
| 252 | const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol. |
| 253 | |
| 254 | // Only use locations in \p O if it's (strictly) preferred. |
| 255 | if (prefer(L: O.CanonicalDeclaration, R: S.CanonicalDeclaration)) |
| 256 | S.CanonicalDeclaration = O.CanonicalDeclaration; |
| 257 | if (prefer(L: O.Definition, R: S.Definition)) |
| 258 | S.Definition = O.Definition; |
| 259 | S.References += O.References; |
| 260 | if (S.Signature == "" ) |
| 261 | S.Signature = O.Signature; |
| 262 | if (S.CompletionSnippetSuffix == "" ) |
| 263 | S.CompletionSnippetSuffix = O.CompletionSnippetSuffix; |
| 264 | if (S.Documentation == "" ) { |
| 265 | // Don't accept documentation from bare forward class declarations, if there |
| 266 | // is a definition and it didn't provide one. S is often an undocumented |
| 267 | // class, and O is a non-canonical forward decl preceded by an irrelevant |
| 268 | // comment. |
| 269 | bool IsClass = S.SymInfo.Kind == index::SymbolKind::Class || |
| 270 | S.SymInfo.Kind == index::SymbolKind::Struct || |
| 271 | S.SymInfo.Kind == index::SymbolKind::Union; |
| 272 | if (!IsClass || !S.Definition) |
| 273 | S.Documentation = O.Documentation; |
| 274 | } |
| 275 | if (S.ReturnType == "" ) |
| 276 | S.ReturnType = O.ReturnType; |
| 277 | if (S.Type == "" ) |
| 278 | S.Type = O.Type; |
| 279 | for (const auto &OI : O.IncludeHeaders) { |
| 280 | bool Found = false; |
| 281 | for (auto &SI : S.IncludeHeaders) { |
| 282 | if (SI.IncludeHeader == OI.IncludeHeader) { |
| 283 | Found = true; |
| 284 | SI.References += OI.References; |
| 285 | SI.SupportedDirectives |= OI.SupportedDirectives; |
| 286 | break; |
| 287 | } |
| 288 | } |
| 289 | if (!Found && MergeIncludes) |
| 290 | S.IncludeHeaders.emplace_back(Args: OI.IncludeHeader, Args: OI.References, |
| 291 | Args: OI.supportedDirectives()); |
| 292 | } |
| 293 | |
| 294 | S.Origin |= O.Origin | SymbolOrigin::Merge; |
| 295 | S.Flags |= O.Flags; |
| 296 | return S; |
| 297 | } |
| 298 | |
| 299 | } // namespace clangd |
| 300 | } // namespace clang |
| 301 | |