1 | //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This defines Dex - a symbol index implementation based on query iterators |
11 | /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc. |
12 | /// While consuming more memory and having longer build stage due to |
13 | /// preprocessing, Dex will have substantially lower latency. It will also allow |
14 | /// efficient symbol searching which is crucial for operations like code |
15 | /// completion, and can be very important for a number of different code |
16 | /// transformations which will be eventually supported by Clangd. |
17 | /// |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H |
21 | #define |
22 | |
23 | #include "index/dex/Iterator.h" |
24 | #include "index/Index.h" |
25 | #include "index/Relation.h" |
26 | #include "index/dex/PostingList.h" |
27 | #include "index/dex/Token.h" |
28 | #include "llvm/ADT/StringSet.h" |
29 | |
30 | namespace clang { |
31 | namespace clangd { |
32 | namespace dex { |
33 | |
34 | /// In-memory Dex trigram-based index implementation. |
35 | class Dex : public SymbolIndex { |
36 | public: |
37 | // All data must outlive this index. |
38 | template <typename SymbolRange, typename RefsRange, typename RelationsRange> |
39 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, |
40 | bool SupportContainedRefs) |
41 | : Corpus(0) { |
42 | for (auto &&Sym : Symbols) |
43 | this->Symbols.push_back(&Sym); |
44 | for (auto &&Ref : Refs) |
45 | this->Refs.try_emplace(Ref.first, Ref.second); |
46 | for (auto &&Rel : Relations) |
47 | this->Relations[std::make_pair(Rel.Subject, |
48 | static_cast<uint8_t>(Rel.Predicate))] |
49 | .push_back(Rel.Object); |
50 | buildIndex(EnableOutgoingCalls: SupportContainedRefs); |
51 | } |
52 | // Symbols and Refs are owned by BackingData, Index takes ownership. |
53 | template <typename SymbolRange, typename RefsRange, typename RelationsRange, |
54 | typename Payload> |
55 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, |
56 | Payload &&BackingData, size_t BackingDataSize, bool SupportContainedRefs) |
57 | : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), |
58 | std::forward<RelationsRange>(Relations), SupportContainedRefs) { |
59 | KeepAlive = std::shared_ptr<void>( |
60 | std::make_shared<Payload>(std::move(BackingData)), nullptr); |
61 | this->BackingDataSize = BackingDataSize; |
62 | } |
63 | |
64 | template <typename SymbolRange, typename RefsRange, typename RelationsRange, |
65 | typename FileRange, typename Payload> |
66 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, |
67 | FileRange &&Files, IndexContents IdxContents, Payload &&BackingData, |
68 | size_t BackingDataSize, bool SupportContainedRefs) |
69 | : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), |
70 | std::forward<RelationsRange>(Relations), |
71 | std::forward<Payload>(BackingData), BackingDataSize, |
72 | SupportContainedRefs) { |
73 | this->Files = std::forward<FileRange>(Files); |
74 | this->IdxContents = IdxContents; |
75 | } |
76 | |
77 | /// Builds an index from slabs. The index takes ownership of the slab. |
78 | static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab, |
79 | bool SupportContainedRefs); |
80 | |
81 | bool |
82 | fuzzyFind(const FuzzyFindRequest &Req, |
83 | llvm::function_ref<void(const Symbol &)> Callback) const override; |
84 | |
85 | void lookup(const LookupRequest &Req, |
86 | llvm::function_ref<void(const Symbol &)> Callback) const override; |
87 | |
88 | bool refs(const RefsRequest &Req, |
89 | llvm::function_ref<void(const Ref &)> Callback) const override; |
90 | |
91 | bool containedRefs(const ContainedRefsRequest &Req, |
92 | llvm::function_ref<void(const ContainedRefsResult &)> |
93 | Callback) const override; |
94 | |
95 | void relations(const RelationsRequest &Req, |
96 | llvm::function_ref<void(const SymbolID &, const Symbol &)> |
97 | Callback) const override; |
98 | |
99 | llvm::unique_function<IndexContents(llvm::StringRef) const> |
100 | indexedFiles() const override; |
101 | |
102 | size_t estimateMemoryUsage() const override; |
103 | |
104 | private: |
105 | class RevRef { |
106 | const Ref *Reference; |
107 | SymbolID Target; |
108 | |
109 | public: |
110 | RevRef(const Ref &Reference, SymbolID Target) |
111 | : Reference(&Reference), Target(Target) {} |
112 | const Ref &ref() const { return *Reference; } |
113 | ContainedRefsResult containedRefsResult() const { |
114 | return {.Location: ref().Location, .Kind: ref().Kind, .Symbol: Target}; |
115 | } |
116 | }; |
117 | |
118 | void buildIndex(bool EnableOutgoingCalls); |
119 | llvm::iterator_range<std::vector<RevRef>::const_iterator> |
120 | lookupRevRefs(const SymbolID &Container) const; |
121 | std::unique_ptr<Iterator> iterator(const Token &Tok) const; |
122 | std::unique_ptr<Iterator> |
123 | createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const; |
124 | std::unique_ptr<Iterator> |
125 | createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const; |
126 | |
127 | /// Stores symbols sorted in the descending order of symbol quality.. |
128 | std::vector<const Symbol *> Symbols; |
129 | /// SymbolQuality[I] is the quality of Symbols[I]. |
130 | std::vector<float> SymbolQuality; |
131 | llvm::DenseMap<SymbolID, const Symbol *> LookupTable; |
132 | /// Inverted index is a mapping from the search token to the posting list, |
133 | /// which contains all items which can be characterized by such search token. |
134 | /// For example, if the search token is scope "std::", the corresponding |
135 | /// posting list would contain all indices of symbols defined in namespace |
136 | /// std. Inverted index is used to retrieve posting lists which are processed |
137 | /// during the fuzzyFind process. |
138 | llvm::DenseMap<Token, PostingList> InvertedIndex; |
139 | dex::Corpus Corpus; |
140 | llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs; |
141 | std::vector<RevRef> RevRefs; // sorted by container ID |
142 | static_assert(sizeof(RelationKind) == sizeof(uint8_t), |
143 | "RelationKind should be of same size as a uint8_t" ); |
144 | llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations; |
145 | std::shared_ptr<void> KeepAlive; // poor man's move-only std::any |
146 | // Set of files which were used during this index build. |
147 | llvm::StringSet<> Files; |
148 | // Contents of the index (symbols, references, etc.) |
149 | // This is only populated if `Files` is, which applies to some but not all |
150 | // consumers of this class. |
151 | IndexContents IdxContents = IndexContents::None; |
152 | // Size of memory retained by KeepAlive. |
153 | size_t BackingDataSize = 0; |
154 | }; |
155 | |
156 | /// Returns Search Token for a number of parent directories of given Path. |
157 | /// Should be used within the index build process. |
158 | /// |
159 | /// This function is exposed for testing only. |
160 | llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef); |
161 | |
162 | } // namespace dex |
163 | } // namespace clangd |
164 | } // namespace clang |
165 | |
166 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H |
167 | |