| 1 | //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This defines Dex - a symbol index implementation based on query iterators |
| 11 | /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc. |
| 12 | /// While consuming more memory and having longer build stage due to |
| 13 | /// preprocessing, Dex will have substantially lower latency. It will also allow |
| 14 | /// efficient symbol searching which is crucial for operations like code |
| 15 | /// completion, and can be very important for a number of different code |
| 16 | /// transformations which will be eventually supported by Clangd. |
| 17 | /// |
| 18 | //===----------------------------------------------------------------------===// |
| 19 | |
| 20 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H |
| 21 | #define |
| 22 | |
| 23 | #include "index/dex/Iterator.h" |
| 24 | #include "index/Index.h" |
| 25 | #include "index/Relation.h" |
| 26 | #include "index/dex/PostingList.h" |
| 27 | #include "index/dex/Token.h" |
| 28 | #include "llvm/ADT/StringSet.h" |
| 29 | |
| 30 | namespace clang { |
| 31 | namespace clangd { |
| 32 | namespace dex { |
| 33 | |
| 34 | /// In-memory Dex trigram-based index implementation. |
| 35 | class Dex : public SymbolIndex { |
| 36 | public: |
| 37 | // All data must outlive this index. |
| 38 | template <typename SymbolRange, typename RefsRange, typename RelationsRange> |
| 39 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, |
| 40 | bool SupportContainedRefs) |
| 41 | : Corpus(0) { |
| 42 | for (auto &&Sym : Symbols) |
| 43 | this->Symbols.push_back(&Sym); |
| 44 | for (auto &&Ref : Refs) |
| 45 | this->Refs.try_emplace(Ref.first, Ref.second); |
| 46 | for (auto &&Rel : Relations) |
| 47 | this->Relations[std::make_pair(Rel.Subject, |
| 48 | static_cast<uint8_t>(Rel.Predicate))] |
| 49 | .push_back(Rel.Object); |
| 50 | buildIndex(EnableOutgoingCalls: SupportContainedRefs); |
| 51 | } |
| 52 | // Symbols and Refs are owned by BackingData, Index takes ownership. |
| 53 | template <typename SymbolRange, typename RefsRange, typename RelationsRange, |
| 54 | typename Payload> |
| 55 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, |
| 56 | Payload &&BackingData, size_t BackingDataSize, bool SupportContainedRefs) |
| 57 | : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), |
| 58 | std::forward<RelationsRange>(Relations), SupportContainedRefs) { |
| 59 | KeepAlive = std::shared_ptr<void>( |
| 60 | std::make_shared<Payload>(std::move(BackingData)), nullptr); |
| 61 | this->BackingDataSize = BackingDataSize; |
| 62 | } |
| 63 | |
| 64 | template <typename SymbolRange, typename RefsRange, typename RelationsRange, |
| 65 | typename FileRange, typename Payload> |
| 66 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, |
| 67 | FileRange &&Files, IndexContents IdxContents, Payload &&BackingData, |
| 68 | size_t BackingDataSize, bool SupportContainedRefs) |
| 69 | : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), |
| 70 | std::forward<RelationsRange>(Relations), |
| 71 | std::forward<Payload>(BackingData), BackingDataSize, |
| 72 | SupportContainedRefs) { |
| 73 | this->Files = std::forward<FileRange>(Files); |
| 74 | this->IdxContents = IdxContents; |
| 75 | } |
| 76 | |
| 77 | /// Builds an index from slabs. The index takes ownership of the slab. |
| 78 | static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab, |
| 79 | bool SupportContainedRefs); |
| 80 | |
| 81 | bool |
| 82 | fuzzyFind(const FuzzyFindRequest &Req, |
| 83 | llvm::function_ref<void(const Symbol &)> Callback) const override; |
| 84 | |
| 85 | void lookup(const LookupRequest &Req, |
| 86 | llvm::function_ref<void(const Symbol &)> Callback) const override; |
| 87 | |
| 88 | bool refs(const RefsRequest &Req, |
| 89 | llvm::function_ref<void(const Ref &)> Callback) const override; |
| 90 | |
| 91 | bool containedRefs(const ContainedRefsRequest &Req, |
| 92 | llvm::function_ref<void(const ContainedRefsResult &)> |
| 93 | Callback) const override; |
| 94 | |
| 95 | void relations(const RelationsRequest &Req, |
| 96 | llvm::function_ref<void(const SymbolID &, const Symbol &)> |
| 97 | Callback) const override; |
| 98 | |
| 99 | llvm::unique_function<IndexContents(llvm::StringRef) const> |
| 100 | indexedFiles() const override; |
| 101 | |
| 102 | size_t estimateMemoryUsage() const override; |
| 103 | |
| 104 | private: |
| 105 | class RevRef { |
| 106 | const Ref *Reference; |
| 107 | SymbolID Target; |
| 108 | |
| 109 | public: |
| 110 | RevRef(const Ref &Reference, SymbolID Target) |
| 111 | : Reference(&Reference), Target(Target) {} |
| 112 | const Ref &ref() const { return *Reference; } |
| 113 | ContainedRefsResult containedRefsResult() const { |
| 114 | return {.Location: ref().Location, .Kind: ref().Kind, .Symbol: Target}; |
| 115 | } |
| 116 | }; |
| 117 | |
| 118 | void buildIndex(bool EnableOutgoingCalls); |
| 119 | llvm::iterator_range<std::vector<RevRef>::const_iterator> |
| 120 | lookupRevRefs(const SymbolID &Container) const; |
| 121 | std::unique_ptr<Iterator> iterator(const Token &Tok) const; |
| 122 | std::unique_ptr<Iterator> |
| 123 | createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const; |
| 124 | std::unique_ptr<Iterator> |
| 125 | createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const; |
| 126 | |
| 127 | /// Stores symbols sorted in the descending order of symbol quality.. |
| 128 | std::vector<const Symbol *> Symbols; |
| 129 | /// SymbolQuality[I] is the quality of Symbols[I]. |
| 130 | std::vector<float> SymbolQuality; |
| 131 | llvm::DenseMap<SymbolID, const Symbol *> LookupTable; |
| 132 | /// Inverted index is a mapping from the search token to the posting list, |
| 133 | /// which contains all items which can be characterized by such search token. |
| 134 | /// For example, if the search token is scope "std::", the corresponding |
| 135 | /// posting list would contain all indices of symbols defined in namespace |
| 136 | /// std. Inverted index is used to retrieve posting lists which are processed |
| 137 | /// during the fuzzyFind process. |
| 138 | llvm::DenseMap<Token, PostingList> InvertedIndex; |
| 139 | dex::Corpus Corpus; |
| 140 | llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs; |
| 141 | std::vector<RevRef> RevRefs; // sorted by container ID |
| 142 | static_assert(sizeof(RelationKind) == sizeof(uint8_t), |
| 143 | "RelationKind should be of same size as a uint8_t" ); |
| 144 | llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations; |
| 145 | std::shared_ptr<void> KeepAlive; // poor man's move-only std::any |
| 146 | // Set of files which were used during this index build. |
| 147 | llvm::StringSet<> Files; |
| 148 | // Contents of the index (symbols, references, etc.) |
| 149 | // This is only populated if `Files` is, which applies to some but not all |
| 150 | // consumers of this class. |
| 151 | IndexContents IdxContents = IndexContents::None; |
| 152 | // Size of memory retained by KeepAlive. |
| 153 | size_t BackingDataSize = 0; |
| 154 | }; |
| 155 | |
| 156 | /// Returns Search Token for a number of parent directories of given Path. |
| 157 | /// Should be used within the index build process. |
| 158 | /// |
| 159 | /// This function is exposed for testing only. |
| 160 | llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef); |
| 161 | |
| 162 | } // namespace dex |
| 163 | } // namespace clangd |
| 164 | } // namespace clang |
| 165 | |
| 166 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H |
| 167 | |