| 1 | //===--- SymbolCollector.h ---------------------------------------*- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H |
| 9 | #define |
| 10 | |
| 11 | #include "CollectMacros.h" |
| 12 | #include "clang-include-cleaner/Record.h" |
| 13 | #include "clang-include-cleaner/Types.h" |
| 14 | #include "index/Ref.h" |
| 15 | #include "index/Relation.h" |
| 16 | #include "index/Symbol.h" |
| 17 | #include "index/SymbolID.h" |
| 18 | #include "index/SymbolLocation.h" |
| 19 | #include "index/SymbolOrigin.h" |
| 20 | #include "clang/AST/ASTContext.h" |
| 21 | #include "clang/AST/Decl.h" |
| 22 | #include "clang/Basic/LLVM.h" |
| 23 | #include "clang/Basic/SourceLocation.h" |
| 24 | #include "clang/Basic/SourceManager.h" |
| 25 | #include "clang/Index/IndexDataConsumer.h" |
| 26 | #include "clang/Index/IndexSymbol.h" |
| 27 | #include "clang/Sema/CodeCompleteConsumer.h" |
| 28 | #include "llvm/ADT/DenseMap.h" |
| 29 | #include "llvm/ADT/DenseSet.h" |
| 30 | #include "llvm/ADT/SmallVector.h" |
| 31 | #include "llvm/ADT/StringRef.h" |
| 32 | #include <functional> |
| 33 | #include <memory> |
| 34 | #include <optional> |
| 35 | #include <string> |
| 36 | #include <utility> |
| 37 | |
| 38 | namespace clang { |
| 39 | namespace clangd { |
| 40 | |
| 41 | /// Collect declarations (symbols) from an AST. |
| 42 | /// It collects most declarations except: |
| 43 | /// - Implicit declarations |
| 44 | /// - Anonymous declarations (anonymous enum/class/struct, etc) |
| 45 | /// - Declarations in anonymous namespaces in headers |
| 46 | /// - Local declarations (in function bodies, blocks, etc) |
| 47 | /// - Template specializations |
| 48 | /// - Library-specific private declarations (e.g. private declaration generated |
| 49 | /// by protobuf compiler) |
| 50 | /// |
| 51 | /// References to main-file symbols are not collected. |
| 52 | /// |
| 53 | /// See also shouldCollectSymbol(...). |
| 54 | /// |
| 55 | /// Clients (e.g. clangd) can use SymbolCollector together with |
| 56 | /// index::indexTopLevelDecls to retrieve all symbols when the source file is |
| 57 | /// changed. |
| 58 | class SymbolCollector : public index::IndexDataConsumer { |
| 59 | public: |
| 60 | struct Options { |
| 61 | /// When symbol paths cannot be resolved to absolute paths (e.g. files in |
| 62 | /// VFS that does not have absolute path), combine the fallback directory |
| 63 | /// with symbols' paths to get absolute paths. This must be an absolute |
| 64 | /// path. |
| 65 | std::string FallbackDir; |
| 66 | bool CollectIncludePath = false; |
| 67 | /// If set, this is used to map symbol #include path to a potentially |
| 68 | /// different #include path specified by IWYU pragmas. |
| 69 | const include_cleaner::PragmaIncludes *PragmaIncludes = nullptr; |
| 70 | // Populate the Symbol.References field. |
| 71 | bool CountReferences = false; |
| 72 | /// The symbol ref kinds that will be collected. |
| 73 | /// If not set, SymbolCollector will not collect refs. |
| 74 | /// Note that references of namespace decls are not collected, as they |
| 75 | /// contribute large part of the index, and they are less useful compared |
| 76 | /// with other decls. |
| 77 | RefKind RefFilter = RefKind::Unknown; |
| 78 | /// If set to true, SymbolCollector will collect all refs (from main file |
| 79 | /// and included headers); otherwise, only refs from main file will be |
| 80 | /// collected. |
| 81 | /// This flag is only meaningful when RefFilter is set. |
| 82 | bool = false; |
| 83 | // Every symbol collected will be stamped with this origin. |
| 84 | SymbolOrigin Origin = SymbolOrigin::Unknown; |
| 85 | /// Collect macros. |
| 86 | /// Note that SymbolCollector must be run with preprocessor in order to |
| 87 | /// collect macros. For example, `indexTopLevelDecls` will not index any |
| 88 | /// macro even if this is true. |
| 89 | bool CollectMacro = false; |
| 90 | /// Collect symbols local to main-files, such as static functions, symbols |
| 91 | /// inside an anonymous namespace, function-local classes and its member |
| 92 | /// functions. |
| 93 | bool CollectMainFileSymbols = true; |
| 94 | /// Collect references to main-file symbols. |
| 95 | bool CollectMainFileRefs = false; |
| 96 | /// Collect symbols with reserved names, like __Vector_base. |
| 97 | /// This does not currently affect macros (many like _WIN32 are important!) |
| 98 | /// This only affects system headers. |
| 99 | bool CollectReserved = false; |
| 100 | /// If set to true, SymbolCollector will collect doc for all symbols. |
| 101 | /// Note that documents of symbols being indexed for completion will always |
| 102 | /// be collected regardless of this option. |
| 103 | bool StoreAllDocumentation = false; |
| 104 | /// If this is set, only collect symbols/references from a file if |
| 105 | /// `FileFilter(SM, FID)` is true. If not set, all files are indexed. |
| 106 | std::function<bool(const SourceManager &, FileID)> FileFilter = nullptr; |
| 107 | }; |
| 108 | |
| 109 | SymbolCollector(Options Opts); |
| 110 | ~SymbolCollector(); |
| 111 | |
| 112 | /// Returns true is \p ND should be collected. |
| 113 | static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx, |
| 114 | const Options &Opts, bool IsMainFileSymbol); |
| 115 | |
| 116 | // Given a ref contained in enclosing decl `Enclosing`, return |
| 117 | // the decl that should be used as that ref's Ref::Container. This is |
| 118 | // usually `Enclosing` itself, but in cases where `Enclosing` is not |
| 119 | // indexed, we walk further up because Ref::Container should always be |
| 120 | // an indexed symbol. |
| 121 | // Note: we don't use DeclContext as the container as in some cases |
| 122 | // it's useful to use a Decl which is not a DeclContext. For example, |
| 123 | // for a ref occurring in the initializer of a namespace-scope variable, |
| 124 | // it's useful to use that variable as the container, as otherwise the |
| 125 | // next enclosing DeclContext would be a NamespaceDecl or TranslationUnitDecl, |
| 126 | // which are both not indexed and less granular than we'd like for use cases |
| 127 | // like call hierarchy. |
| 128 | static const Decl *getRefContainer(const Decl *Enclosing, |
| 129 | const SymbolCollector::Options &Opts); |
| 130 | |
| 131 | void initialize(ASTContext &Ctx) override; |
| 132 | |
| 133 | void setPreprocessor(std::shared_ptr<Preprocessor> PP) override { |
| 134 | this->PP = PP.get(); |
| 135 | } |
| 136 | void setPreprocessor(Preprocessor &PP) { this->PP = &PP; } |
| 137 | |
| 138 | bool |
| 139 | handleDeclOccurrence(const Decl *D, index::SymbolRoleSet Roles, |
| 140 | ArrayRef<index::SymbolRelation> Relations, |
| 141 | SourceLocation Loc, |
| 142 | index::IndexDataConsumer::ASTNodeInfo ASTNode) override; |
| 143 | |
| 144 | bool handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI, |
| 145 | index::SymbolRoleSet Roles, |
| 146 | SourceLocation Loc) override; |
| 147 | |
| 148 | void handleMacros(const MainFileMacros &MacroRefsToIndex); |
| 149 | |
| 150 | SymbolSlab takeSymbols() { return std::move(Symbols).build(); } |
| 151 | RefSlab takeRefs() { return std::move(Refs).build(); } |
| 152 | RelationSlab takeRelations() { return std::move(Relations).build(); } |
| 153 | |
| 154 | /// Returns true if we are interested in references and declarations from \p |
| 155 | /// FID. If this function return false, bodies of functions inside those files |
| 156 | /// will be skipped to decrease indexing time. |
| 157 | bool shouldIndexFile(FileID FID); |
| 158 | |
| 159 | void finish() override; |
| 160 | |
| 161 | private: |
| 162 | const Symbol *addDeclaration(const NamedDecl &, SymbolID, |
| 163 | bool IsMainFileSymbol); |
| 164 | void addDefinition(const NamedDecl &, const Symbol &DeclSymbol, |
| 165 | bool SkipDocCheck); |
| 166 | void processRelations(const NamedDecl &ND, const SymbolID &ID, |
| 167 | ArrayRef<index::SymbolRelation> Relations); |
| 168 | |
| 169 | std::optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc); |
| 170 | |
| 171 | std::optional<std::string> (const Symbol &S, FileID); |
| 172 | |
| 173 | SymbolID getSymbolIDCached(const Decl *D); |
| 174 | SymbolID getSymbolIDCached(const llvm::StringRef MacroName, |
| 175 | const MacroInfo *MI, const SourceManager &SM); |
| 176 | |
| 177 | // All Symbols collected from the AST. |
| 178 | SymbolSlab::Builder Symbols; |
| 179 | // File IDs used to determine if the code contains Obj-C constructs. |
| 180 | // For Obj-C symbols, these File IDs are used to compute the include |
| 181 | // headers. |
| 182 | llvm::DenseMap<SymbolID, FileID> IncludeFiles; |
| 183 | void setIncludeLocation(const Symbol &S, SourceLocation, |
| 184 | const include_cleaner::Symbol &Sym); |
| 185 | |
| 186 | // Providers for Symbol.IncludeHeaders. |
| 187 | // The final spelling is calculated in finish(). |
| 188 | llvm::DenseMap<SymbolID, llvm::SmallVector<include_cleaner::Header>> |
| 189 | SymbolProviders; |
| 190 | // Files which contain ObjC symbols. |
| 191 | // This is finalized and used in finish(). |
| 192 | llvm::DenseSet<FileID> FilesWithObjCConstructs; |
| 193 | |
| 194 | // Indexed macros, to be erased if they turned out to be include guards. |
| 195 | llvm::DenseSet<const IdentifierInfo *> IndexedMacros; |
| 196 | // All refs collected from the AST. It includes: |
| 197 | // 1) symbols declared in the preamble and referenced from the main file ( |
| 198 | // which is not a header), or |
| 199 | // 2) symbols declared and referenced from the main file (which is a header) |
| 200 | RefSlab::Builder Refs; |
| 201 | // All relations collected from the AST. |
| 202 | RelationSlab::Builder Relations; |
| 203 | ASTContext *ASTCtx; |
| 204 | Preprocessor *PP = nullptr; |
| 205 | std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator; |
| 206 | std::unique_ptr<CodeCompletionTUInfo> CompletionTUInfo; |
| 207 | Options Opts; |
| 208 | struct SymbolRef { |
| 209 | SourceLocation Loc; |
| 210 | FileID FID; |
| 211 | index::SymbolRoleSet Roles; |
| 212 | index::SymbolKind Kind; |
| 213 | const Decl *Container; |
| 214 | bool Spelled; |
| 215 | }; |
| 216 | void addRef(SymbolID ID, const SymbolRef &SR); |
| 217 | // Symbols referenced from the current TU, flushed on finish(). |
| 218 | llvm::DenseSet<SymbolID> ReferencedSymbols; |
| 219 | // Maps canonical declaration provided by clang to canonical declaration for |
| 220 | // an index symbol, if clangd prefers a different declaration than that |
| 221 | // provided by clang. For example, friend declaration might be considered |
| 222 | // canonical by clang but should not be considered canonical in the index |
| 223 | // unless it's a definition. |
| 224 | llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls; |
| 225 | // Cache whether to index a file or not. |
| 226 | llvm::DenseMap<FileID, bool> FilesToIndexCache; |
| 227 | // Encapsulates calculations and caches around header paths, which headers |
| 228 | // to insert for which symbol, etc. |
| 229 | class ; |
| 230 | std::unique_ptr<HeaderFileURICache> ; |
| 231 | llvm::DenseMap<const Decl *, SymbolID> DeclToIDCache; |
| 232 | llvm::DenseMap<const MacroInfo *, SymbolID> MacroToIDCache; |
| 233 | }; |
| 234 | |
| 235 | } // namespace clangd |
| 236 | } // namespace clang |
| 237 | |
| 238 | #endif |
| 239 | |