| 1 | //===--- FileIndex.h - Index for files. ---------------------------- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // FileIndex implements SymbolIndex for symbols from a set of files. Symbols are |
| 10 | // maintained at source-file granularity (e.g. with ASTs), and files can be |
| 11 | // updated dynamically. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H |
| 16 | #define |
| 17 | |
| 18 | #include "Headers.h" |
| 19 | #include "clang-include-cleaner/Record.h" |
| 20 | #include "index/Index.h" |
| 21 | #include "index/Merge.h" |
| 22 | #include "index/Ref.h" |
| 23 | #include "index/Relation.h" |
| 24 | #include "index/Serialization.h" |
| 25 | #include "index/Symbol.h" |
| 26 | #include "support/MemoryTree.h" |
| 27 | #include "support/Path.h" |
| 28 | #include "clang/Lex/Preprocessor.h" |
| 29 | #include "llvm/ADT/DenseSet.h" |
| 30 | #include "llvm/ADT/StringMap.h" |
| 31 | #include "llvm/ADT/StringRef.h" |
| 32 | #include <memory> |
| 33 | #include <optional> |
| 34 | #include <vector> |
| 35 | |
| 36 | namespace clang { |
| 37 | class ASTContext; |
| 38 | namespace clangd { |
| 39 | class ParsedAST; |
| 40 | |
| 41 | /// Select between in-memory index implementations, which have tradeoffs. |
| 42 | enum class IndexType { |
| 43 | // MemIndex is trivially cheap to build, but has poor query performance. |
| 44 | Light, |
| 45 | // Dex is relatively expensive to build and uses more memory, but is fast. |
| 46 | Heavy, |
| 47 | }; |
| 48 | |
| 49 | /// How to handle duplicated symbols across multiple files. |
| 50 | enum class DuplicateHandling { |
| 51 | // Pick a random symbol. Less accurate but faster. |
| 52 | PickOne, |
| 53 | // Merge symbols. More accurate but slower. |
| 54 | Merge, |
| 55 | }; |
| 56 | |
| 57 | /// A container of slabs associated with a key. It can be updated at key |
| 58 | /// granularity, replacing all slabs belonging to a key with a new set. Keys are |
| 59 | /// usually file paths/uris. |
| 60 | /// |
| 61 | /// This implements snapshot semantics. Each update will create a new snapshot |
| 62 | /// for all slabs of the Key. Snapshots are managed with shared pointers that |
| 63 | /// are shared between this class and the users. For each key, this class only |
| 64 | /// stores a pointer pointing to the newest snapshot, and an outdated snapshot |
| 65 | /// is deleted by the last owner of the snapshot, either this class or the |
| 66 | /// symbol index. |
| 67 | /// |
| 68 | /// The snapshot semantics keeps critical sections minimal since we only need |
| 69 | /// locking when we swap or obtain references to snapshots. |
| 70 | class FileSymbols { |
| 71 | public: |
| 72 | FileSymbols(IndexContents IdxContents, bool SupportContainedRefs); |
| 73 | /// Updates all slabs associated with the \p Key. |
| 74 | /// If either is nullptr, corresponding data for \p Key will be removed. |
| 75 | /// If CountReferences is true, \p Refs will be used for counting references |
| 76 | /// during merging. |
| 77 | void update(llvm::StringRef Key, std::unique_ptr<SymbolSlab> Symbols, |
| 78 | std::unique_ptr<RefSlab> Refs, |
| 79 | std::unique_ptr<RelationSlab> Relations, bool CountReferences); |
| 80 | |
| 81 | /// The index keeps the slabs alive. |
| 82 | /// Will count Symbol::References based on number of references in the main |
| 83 | /// files, while building the index with DuplicateHandling::Merge option. |
| 84 | /// Version is populated with an increasing sequence counter. |
| 85 | std::unique_ptr<SymbolIndex> |
| 86 | buildIndex(IndexType, |
| 87 | DuplicateHandling DuplicateHandle = DuplicateHandling::PickOne, |
| 88 | size_t *Version = nullptr); |
| 89 | |
| 90 | void profile(MemoryTree &MT) const; |
| 91 | |
| 92 | private: |
| 93 | IndexContents IdxContents; |
| 94 | bool SupportContainedRefs; |
| 95 | |
| 96 | struct RefSlabAndCountReferences { |
| 97 | std::shared_ptr<RefSlab> Slab; |
| 98 | bool CountReferences = false; |
| 99 | }; |
| 100 | mutable std::mutex Mutex; |
| 101 | |
| 102 | size_t Version = 0; |
| 103 | llvm::StringMap<std::shared_ptr<SymbolSlab>> SymbolsSnapshot; |
| 104 | llvm::StringMap<RefSlabAndCountReferences> RefsSnapshot; |
| 105 | llvm::StringMap<std::shared_ptr<RelationSlab>> RelationsSnapshot; |
| 106 | }; |
| 107 | |
| 108 | /// This manages symbols from files and an in-memory index on all symbols. |
| 109 | /// FIXME: Expose an interface to remove files that are closed. |
| 110 | class FileIndex : public MergedIndex { |
| 111 | public: |
| 112 | FileIndex(bool SupportContainedRefs); |
| 113 | |
| 114 | /// Update preamble symbols of file \p Path with all declarations in \p AST |
| 115 | /// and macros in \p PP. |
| 116 | void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST, |
| 117 | Preprocessor &PP, |
| 118 | const include_cleaner::PragmaIncludes &PI); |
| 119 | void updatePreamble(IndexFileIn); |
| 120 | |
| 121 | /// Update symbols and references from main file \p Path with |
| 122 | /// `indexMainDecls`. |
| 123 | void updateMain(PathRef Path, ParsedAST &AST); |
| 124 | |
| 125 | void profile(MemoryTree &MT) const; |
| 126 | |
| 127 | private: |
| 128 | // Contains information from each file's preamble only. Symbols and relations |
| 129 | // are sharded per declaration file to deduplicate multiple symbols and reduce |
| 130 | // memory usage. |
| 131 | // Missing information: |
| 132 | // - symbol refs (these are always "from the main file") |
| 133 | // - definition locations in the main file |
| 134 | // |
| 135 | // Note that we store only one version of a header, hence symbols appearing in |
| 136 | // different PP states will be missing. |
| 137 | FileSymbols PreambleSymbols; |
| 138 | SwapIndex PreambleIndex; |
| 139 | |
| 140 | // Contains information from each file's main AST. |
| 141 | // These are updated frequently (on file change), but are relatively small. |
| 142 | // Mostly contains: |
| 143 | // - refs to symbols declared in the preamble and referenced from main |
| 144 | // - symbols declared both in the main file and the preamble |
| 145 | // (Note that symbols *only* in the main file are not indexed). |
| 146 | FileSymbols MainFileSymbols; |
| 147 | SwapIndex MainFileIndex; |
| 148 | |
| 149 | // While both the FileIndex and SwapIndex are threadsafe, we need to track |
| 150 | // versions to ensure that we don't overwrite newer indexes with older ones. |
| 151 | std::mutex UpdateIndexMu; |
| 152 | unsigned MainIndexVersion = 0; |
| 153 | unsigned PreambleIndexVersion = 0; |
| 154 | }; |
| 155 | |
| 156 | using SlabTuple = std::tuple<SymbolSlab, RefSlab, RelationSlab>; |
| 157 | |
| 158 | /// Retrieves symbols and refs of local top level decls in \p AST (i.e. |
| 159 | /// `AST.getLocalTopLevelDecls()`). |
| 160 | /// Exposed to assist in unit tests. |
| 161 | SlabTuple indexMainDecls(ParsedAST &AST); |
| 162 | |
| 163 | /// Index declarations from \p AST and macros from \p PP that are declared in |
| 164 | /// included headers. |
| 165 | SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, |
| 166 | Preprocessor &PP, |
| 167 | const include_cleaner::PragmaIncludes &PI, |
| 168 | SymbolOrigin Origin); |
| 169 | |
| 170 | /// Takes slabs coming from a TU (multiple files) and shards them per |
| 171 | /// declaration location. |
| 172 | struct FileShardedIndex { |
| 173 | /// \p HintPath is used to convert file URIs stored in symbols into absolute |
| 174 | /// paths. |
| 175 | explicit FileShardedIndex(IndexFileIn Input); |
| 176 | |
| 177 | /// Returns uris for all files that has a shard. |
| 178 | std::vector<llvm::StringRef> getAllSources() const; |
| 179 | |
| 180 | /// Generates index shard for the \p Uri. Note that this function results in |
| 181 | /// a copy of all the relevant data. |
| 182 | /// Returned index will always have Symbol/Refs/Relation Slabs set, even if |
| 183 | /// they are empty. |
| 184 | std::optional<IndexFileIn> getShard(llvm::StringRef Uri) const; |
| 185 | |
| 186 | private: |
| 187 | // Contains all the information that belongs to a single file. |
| 188 | struct FileShard { |
| 189 | // Either declared or defined in the file. |
| 190 | llvm::DenseSet<const Symbol *> Symbols; |
| 191 | // Reference occurs in the file. |
| 192 | llvm::DenseSet<const Ref *> Refs; |
| 193 | // Subject is declared in the file. |
| 194 | llvm::DenseSet<const Relation *> Relations; |
| 195 | // Contains edges for only the direct includes. |
| 196 | IncludeGraph IG; |
| 197 | }; |
| 198 | |
| 199 | // Keeps all the information alive. |
| 200 | const IndexFileIn Index; |
| 201 | // Mapping from URIs to slab information. |
| 202 | llvm::StringMap<FileShard> Shards; |
| 203 | // Used to build RefSlabs. |
| 204 | llvm::DenseMap<const Ref *, SymbolID> RefToSymID; |
| 205 | }; |
| 206 | |
| 207 | } // namespace clangd |
| 208 | } // namespace clang |
| 209 | |
| 210 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H |
| 211 | |