1 | //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This defines Dex - a symbol index implementation based on query iterators |
11 | /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc. |
12 | /// While consuming more memory and having longer build stage due to |
13 | /// preprocessing, Dex will have substantially lower latency. It will also allow |
14 | /// efficient symbol searching which is crucial for operations like code |
15 | /// completion, and can be very important for a number of different code |
16 | /// transformations which will be eventually supported by Clangd. |
17 | /// |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H |
21 | #define |
22 | |
23 | #include "index/dex/Iterator.h" |
24 | #include "index/Index.h" |
25 | #include "index/Relation.h" |
26 | #include "index/dex/PostingList.h" |
27 | #include "index/dex/Token.h" |
28 | #include "llvm/ADT/StringSet.h" |
29 | |
30 | namespace clang { |
31 | namespace clangd { |
32 | namespace dex { |
33 | |
34 | /// In-memory Dex trigram-based index implementation. |
35 | class Dex : public SymbolIndex { |
36 | public: |
37 | // All data must outlive this index. |
38 | template <typename SymbolRange, typename RefsRange, typename RelationsRange> |
39 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations) |
40 | : Corpus(0) { |
41 | for (auto &&Sym : Symbols) |
42 | this->Symbols.push_back(&Sym); |
43 | for (auto &&Ref : Refs) |
44 | this->Refs.try_emplace(Ref.first, Ref.second); |
45 | for (auto &&Rel : Relations) |
46 | this->Relations[std::make_pair(Rel.Subject, |
47 | static_cast<uint8_t>(Rel.Predicate))] |
48 | .push_back(Rel.Object); |
49 | buildIndex(); |
50 | } |
51 | // Symbols and Refs are owned by BackingData, Index takes ownership. |
52 | template <typename SymbolRange, typename RefsRange, typename RelationsRange, |
53 | typename Payload> |
54 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, |
55 | Payload &&BackingData, size_t BackingDataSize) |
56 | : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), |
57 | std::forward<RelationsRange>(Relations)) { |
58 | KeepAlive = std::shared_ptr<void>( |
59 | std::make_shared<Payload>(std::move(BackingData)), nullptr); |
60 | this->BackingDataSize = BackingDataSize; |
61 | } |
62 | |
63 | template <typename SymbolRange, typename RefsRange, typename RelationsRange, |
64 | typename FileRange, typename Payload> |
65 | Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, |
66 | FileRange &&Files, IndexContents IdxContents, Payload &&BackingData, |
67 | size_t BackingDataSize) |
68 | : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), |
69 | std::forward<RelationsRange>(Relations), |
70 | std::forward<Payload>(BackingData), BackingDataSize) { |
71 | this->Files = std::forward<FileRange>(Files); |
72 | this->IdxContents = IdxContents; |
73 | } |
74 | |
75 | /// Builds an index from slabs. The index takes ownership of the slab. |
76 | static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab); |
77 | |
78 | bool |
79 | fuzzyFind(const FuzzyFindRequest &Req, |
80 | llvm::function_ref<void(const Symbol &)> Callback) const override; |
81 | |
82 | void lookup(const LookupRequest &Req, |
83 | llvm::function_ref<void(const Symbol &)> Callback) const override; |
84 | |
85 | bool refs(const RefsRequest &Req, |
86 | llvm::function_ref<void(const Ref &)> Callback) const override; |
87 | |
88 | void relations(const RelationsRequest &Req, |
89 | llvm::function_ref<void(const SymbolID &, const Symbol &)> |
90 | Callback) const override; |
91 | |
92 | llvm::unique_function<IndexContents(llvm::StringRef) const> |
93 | indexedFiles() const override; |
94 | |
95 | size_t estimateMemoryUsage() const override; |
96 | |
97 | private: |
98 | void buildIndex(); |
99 | std::unique_ptr<Iterator> iterator(const Token &Tok) const; |
100 | std::unique_ptr<Iterator> |
101 | createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const; |
102 | std::unique_ptr<Iterator> |
103 | createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const; |
104 | |
105 | /// Stores symbols sorted in the descending order of symbol quality.. |
106 | std::vector<const Symbol *> Symbols; |
107 | /// SymbolQuality[I] is the quality of Symbols[I]. |
108 | std::vector<float> SymbolQuality; |
109 | llvm::DenseMap<SymbolID, const Symbol *> LookupTable; |
110 | /// Inverted index is a mapping from the search token to the posting list, |
111 | /// which contains all items which can be characterized by such search token. |
112 | /// For example, if the search token is scope "std::", the corresponding |
113 | /// posting list would contain all indices of symbols defined in namespace |
114 | /// std. Inverted index is used to retrieve posting lists which are processed |
115 | /// during the fuzzyFind process. |
116 | llvm::DenseMap<Token, PostingList> InvertedIndex; |
117 | dex::Corpus Corpus; |
118 | llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs; |
119 | static_assert(sizeof(RelationKind) == sizeof(uint8_t), |
120 | "RelationKind should be of same size as a uint8_t" ); |
121 | llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations; |
122 | std::shared_ptr<void> KeepAlive; // poor man's move-only std::any |
123 | // Set of files which were used during this index build. |
124 | llvm::StringSet<> Files; |
125 | // Contents of the index (symbols, references, etc.) |
126 | IndexContents IdxContents; |
127 | // Size of memory retained by KeepAlive. |
128 | size_t BackingDataSize = 0; |
129 | }; |
130 | |
131 | /// Returns Search Token for a number of parent directories of given Path. |
132 | /// Should be used within the index build process. |
133 | /// |
134 | /// This function is exposed for testing only. |
135 | llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef); |
136 | |
137 | } // namespace dex |
138 | } // namespace clangd |
139 | } // namespace clang |
140 | |
141 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H |
142 | |