1 | //===--- SymbolCollector.h ---------------------------------------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H |
9 | #define |
10 | |
11 | #include "CollectMacros.h" |
12 | #include "clang-include-cleaner/Record.h" |
13 | #include "clang-include-cleaner/Types.h" |
14 | #include "index/Ref.h" |
15 | #include "index/Relation.h" |
16 | #include "index/Symbol.h" |
17 | #include "index/SymbolID.h" |
18 | #include "index/SymbolLocation.h" |
19 | #include "index/SymbolOrigin.h" |
20 | #include "clang/AST/ASTContext.h" |
21 | #include "clang/AST/Decl.h" |
22 | #include "clang/Basic/LLVM.h" |
23 | #include "clang/Basic/SourceLocation.h" |
24 | #include "clang/Basic/SourceManager.h" |
25 | #include "clang/Index/IndexDataConsumer.h" |
26 | #include "clang/Index/IndexSymbol.h" |
27 | #include "clang/Sema/CodeCompleteConsumer.h" |
28 | #include "llvm/ADT/DenseMap.h" |
29 | #include "llvm/ADT/DenseSet.h" |
30 | #include "llvm/ADT/SmallVector.h" |
31 | #include "llvm/ADT/StringRef.h" |
32 | #include <functional> |
33 | #include <memory> |
34 | #include <optional> |
35 | #include <string> |
36 | #include <utility> |
37 | |
38 | namespace clang { |
39 | namespace clangd { |
40 | |
41 | /// Collect declarations (symbols) from an AST. |
42 | /// It collects most declarations except: |
43 | /// - Implicit declarations |
44 | /// - Anonymous declarations (anonymous enum/class/struct, etc) |
45 | /// - Declarations in anonymous namespaces in headers |
46 | /// - Local declarations (in function bodies, blocks, etc) |
47 | /// - Template specializations |
48 | /// - Library-specific private declarations (e.g. private declaration generated |
49 | /// by protobuf compiler) |
50 | /// |
51 | /// References to main-file symbols are not collected. |
52 | /// |
53 | /// See also shouldCollectSymbol(...). |
54 | /// |
55 | /// Clients (e.g. clangd) can use SymbolCollector together with |
56 | /// index::indexTopLevelDecls to retrieve all symbols when the source file is |
57 | /// changed. |
58 | class SymbolCollector : public index::IndexDataConsumer { |
59 | public: |
60 | struct Options { |
61 | /// When symbol paths cannot be resolved to absolute paths (e.g. files in |
62 | /// VFS that does not have absolute path), combine the fallback directory |
63 | /// with symbols' paths to get absolute paths. This must be an absolute |
64 | /// path. |
65 | std::string FallbackDir; |
66 | bool CollectIncludePath = false; |
67 | /// If set, this is used to map symbol #include path to a potentially |
68 | /// different #include path specified by IWYU pragmas. |
69 | const include_cleaner::PragmaIncludes *PragmaIncludes = nullptr; |
70 | // Populate the Symbol.References field. |
71 | bool CountReferences = false; |
72 | /// The symbol ref kinds that will be collected. |
73 | /// If not set, SymbolCollector will not collect refs. |
74 | /// Note that references of namespace decls are not collected, as they |
75 | /// contribute large part of the index, and they are less useful compared |
76 | /// with other decls. |
77 | RefKind RefFilter = RefKind::Unknown; |
78 | /// If set to true, SymbolCollector will collect all refs (from main file |
79 | /// and included headers); otherwise, only refs from main file will be |
80 | /// collected. |
81 | /// This flag is only meaningful when RefFilter is set. |
82 | bool = false; |
83 | // Every symbol collected will be stamped with this origin. |
84 | SymbolOrigin Origin = SymbolOrigin::Unknown; |
85 | /// Collect macros. |
86 | /// Note that SymbolCollector must be run with preprocessor in order to |
87 | /// collect macros. For example, `indexTopLevelDecls` will not index any |
88 | /// macro even if this is true. |
89 | bool CollectMacro = false; |
90 | /// Collect symbols local to main-files, such as static functions, symbols |
91 | /// inside an anonymous namespace, function-local classes and its member |
92 | /// functions. |
93 | bool CollectMainFileSymbols = true; |
94 | /// Collect references to main-file symbols. |
95 | bool CollectMainFileRefs = false; |
96 | /// Collect symbols with reserved names, like __Vector_base. |
97 | /// This does not currently affect macros (many like _WIN32 are important!) |
98 | /// This only affects system headers. |
99 | bool CollectReserved = false; |
100 | /// If set to true, SymbolCollector will collect doc for all symbols. |
101 | /// Note that documents of symbols being indexed for completion will always |
102 | /// be collected regardless of this option. |
103 | bool StoreAllDocumentation = false; |
104 | /// If this is set, only collect symbols/references from a file if |
105 | /// `FileFilter(SM, FID)` is true. If not set, all files are indexed. |
106 | std::function<bool(const SourceManager &, FileID)> FileFilter = nullptr; |
107 | }; |
108 | |
109 | SymbolCollector(Options Opts); |
110 | ~SymbolCollector(); |
111 | |
112 | /// Returns true is \p ND should be collected. |
113 | static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx, |
114 | const Options &Opts, bool IsMainFileSymbol); |
115 | |
116 | // Given a ref contained in enclosing decl `Enclosing`, return |
117 | // the decl that should be used as that ref's Ref::Container. This is |
118 | // usually `Enclosing` itself, but in cases where `Enclosing` is not |
119 | // indexed, we walk further up because Ref::Container should always be |
120 | // an indexed symbol. |
121 | // Note: we don't use DeclContext as the container as in some cases |
122 | // it's useful to use a Decl which is not a DeclContext. For example, |
123 | // for a ref occurring in the initializer of a namespace-scope variable, |
124 | // it's useful to use that variable as the container, as otherwise the |
125 | // next enclosing DeclContext would be a NamespaceDecl or TranslationUnitDecl, |
126 | // which are both not indexed and less granular than we'd like for use cases |
127 | // like call hierarchy. |
128 | static const Decl *getRefContainer(const Decl *Enclosing, |
129 | const SymbolCollector::Options &Opts); |
130 | |
131 | void initialize(ASTContext &Ctx) override; |
132 | |
133 | void setPreprocessor(std::shared_ptr<Preprocessor> PP) override { |
134 | this->PP = PP.get(); |
135 | } |
136 | void setPreprocessor(Preprocessor &PP) { this->PP = &PP; } |
137 | |
138 | bool |
139 | handleDeclOccurrence(const Decl *D, index::SymbolRoleSet Roles, |
140 | ArrayRef<index::SymbolRelation> Relations, |
141 | SourceLocation Loc, |
142 | index::IndexDataConsumer::ASTNodeInfo ASTNode) override; |
143 | |
144 | bool handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI, |
145 | index::SymbolRoleSet Roles, |
146 | SourceLocation Loc) override; |
147 | |
148 | void handleMacros(const MainFileMacros &MacroRefsToIndex); |
149 | |
150 | SymbolSlab takeSymbols() { return std::move(Symbols).build(); } |
151 | RefSlab takeRefs() { return std::move(Refs).build(); } |
152 | RelationSlab takeRelations() { return std::move(Relations).build(); } |
153 | |
154 | /// Returns true if we are interested in references and declarations from \p |
155 | /// FID. If this function return false, bodies of functions inside those files |
156 | /// will be skipped to decrease indexing time. |
157 | bool shouldIndexFile(FileID FID); |
158 | |
159 | void finish() override; |
160 | |
161 | private: |
162 | const Symbol *addDeclaration(const NamedDecl &, SymbolID, |
163 | bool IsMainFileSymbol); |
164 | void addDefinition(const NamedDecl &, const Symbol &DeclSymbol); |
165 | void processRelations(const NamedDecl &ND, const SymbolID &ID, |
166 | ArrayRef<index::SymbolRelation> Relations); |
167 | |
168 | std::optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc); |
169 | |
170 | std::optional<std::string> (const Symbol &S, FileID); |
171 | |
172 | SymbolID getSymbolIDCached(const Decl *D); |
173 | SymbolID getSymbolIDCached(const llvm::StringRef MacroName, |
174 | const MacroInfo *MI, const SourceManager &SM); |
175 | |
176 | // All Symbols collected from the AST. |
177 | SymbolSlab::Builder Symbols; |
178 | // File IDs used to determine if the code contains Obj-C constructs. |
179 | // For Obj-C symbols, these File IDs are used to compute the include |
180 | // headers. |
181 | llvm::DenseMap<SymbolID, FileID> IncludeFiles; |
182 | void setIncludeLocation(const Symbol &S, SourceLocation, |
183 | const include_cleaner::Symbol &Sym); |
184 | |
185 | // Providers for Symbol.IncludeHeaders. |
186 | // The final spelling is calculated in finish(). |
187 | llvm::DenseMap<SymbolID, llvm::SmallVector<include_cleaner::Header>> |
188 | SymbolProviders; |
189 | // Files which contain ObjC symbols. |
190 | // This is finalized and used in finish(). |
191 | llvm::DenseSet<FileID> FilesWithObjCConstructs; |
192 | |
193 | // Indexed macros, to be erased if they turned out to be include guards. |
194 | llvm::DenseSet<const IdentifierInfo *> IndexedMacros; |
195 | // All refs collected from the AST. It includes: |
196 | // 1) symbols declared in the preamble and referenced from the main file ( |
197 | // which is not a header), or |
198 | // 2) symbols declared and referenced from the main file (which is a header) |
199 | RefSlab::Builder Refs; |
200 | // All relations collected from the AST. |
201 | RelationSlab::Builder Relations; |
202 | ASTContext *ASTCtx; |
203 | Preprocessor *PP = nullptr; |
204 | std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator; |
205 | std::unique_ptr<CodeCompletionTUInfo> CompletionTUInfo; |
206 | Options Opts; |
207 | struct SymbolRef { |
208 | SourceLocation Loc; |
209 | FileID FID; |
210 | index::SymbolRoleSet Roles; |
211 | const Decl *Container; |
212 | bool Spelled; |
213 | }; |
214 | void addRef(SymbolID ID, const SymbolRef &SR); |
215 | // Symbols referenced from the current TU, flushed on finish(). |
216 | llvm::DenseSet<SymbolID> ReferencedSymbols; |
217 | // Maps canonical declaration provided by clang to canonical declaration for |
218 | // an index symbol, if clangd prefers a different declaration than that |
219 | // provided by clang. For example, friend declaration might be considered |
220 | // canonical by clang but should not be considered canonical in the index |
221 | // unless it's a definition. |
222 | llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls; |
223 | // Cache whether to index a file or not. |
224 | llvm::DenseMap<FileID, bool> FilesToIndexCache; |
225 | // Encapsulates calculations and caches around header paths, which headers |
226 | // to insert for which symbol, etc. |
227 | class ; |
228 | std::unique_ptr<HeaderFileURICache> ; |
229 | llvm::DenseMap<const Decl *, SymbolID> DeclToIDCache; |
230 | llvm::DenseMap<const MacroInfo *, SymbolID> MacroToIDCache; |
231 | }; |
232 | |
233 | } // namespace clangd |
234 | } // namespace clang |
235 | |
236 | #endif |
237 | |