1//===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "FileIndex.h"
10#include "CollectMacros.h"
11#include "ParsedAST.h"
12#include "clang-include-cleaner/Record.h"
13#include "index/Index.h"
14#include "index/MemIndex.h"
15#include "index/Merge.h"
16#include "index/Ref.h"
17#include "index/Relation.h"
18#include "index/Serialization.h"
19#include "index/Symbol.h"
20#include "index/SymbolCollector.h"
21#include "index/SymbolID.h"
22#include "index/SymbolOrigin.h"
23#include "index/dex/Dex.h"
24#include "support/Logger.h"
25#include "support/MemoryTree.h"
26#include "support/Path.h"
27#include "clang/AST/ASTContext.h"
28#include "clang/Index/IndexingAction.h"
29#include "clang/Index/IndexingOptions.h"
30#include "clang/Lex/Preprocessor.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/StringMap.h"
34#include "llvm/ADT/StringRef.h"
35#include <algorithm>
36#include <memory>
37#include <optional>
38#include <tuple>
39#include <utility>
40#include <vector>
41
42namespace clang {
43namespace clangd {
44namespace {
45
46SlabTuple indexSymbols(ASTContext &AST, Preprocessor &PP,
47 llvm::ArrayRef<Decl *> DeclsToIndex,
48 const MainFileMacros *MacroRefsToIndex,
49 const include_cleaner::PragmaIncludes &PI,
50 bool IsIndexMainAST, llvm::StringRef Version,
51 bool CollectMainFileRefs, SymbolOrigin Origin) {
52 SymbolCollector::Options CollectorOpts;
53 CollectorOpts.CollectIncludePath = true;
54 CollectorOpts.PragmaIncludes = &PI;
55 CollectorOpts.CountReferences = false;
56 CollectorOpts.Origin = Origin;
57 CollectorOpts.CollectMainFileRefs = CollectMainFileRefs;
58 // We want stdlib implementation details in the index only if we've opened the
59 // file in question. This does means xrefs won't work, though.
60 CollectorOpts.CollectReserved = IsIndexMainAST;
61
62 index::IndexingOptions IndexOpts;
63 // We only need declarations, because we don't count references.
64 IndexOpts.SystemSymbolFilter =
65 index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
66 // We index function-local classes and its member functions only.
67 IndexOpts.IndexFunctionLocals = true;
68 if (IsIndexMainAST) {
69 // We only collect refs when indexing main AST.
70 CollectorOpts.RefFilter = RefKind::All;
71 // Comments for main file can always be obtained from sema, do not store
72 // them in the index.
73 CollectorOpts.StoreAllDocumentation = false;
74 } else {
75 IndexOpts.IndexMacrosInPreprocessor = true;
76 CollectorOpts.CollectMacro = true;
77 CollectorOpts.StoreAllDocumentation = true;
78 }
79
80 SymbolCollector Collector(std::move(CollectorOpts));
81 Collector.setPreprocessor(PP);
82 index::indexTopLevelDecls(Ctx&: AST, PP, Decls: DeclsToIndex, DataConsumer&: Collector,
83 Opts: std::move(IndexOpts));
84 if (MacroRefsToIndex)
85 Collector.handleMacros(MacroRefsToIndex: *MacroRefsToIndex);
86
87 const auto &SM = AST.getSourceManager();
88 const auto MainFileEntry = SM.getFileEntryRefForID(FID: SM.getMainFileID());
89 std::string FileName =
90 std::string(MainFileEntry ? MainFileEntry->getName() : "");
91
92 auto Syms = Collector.takeSymbols();
93 auto Refs = Collector.takeRefs();
94 auto Relations = Collector.takeRelations();
95
96 vlog(Fmt: "indexed {0} AST for {1} version {2}:\n"
97 " symbol slab: {3} symbols, {4} bytes\n"
98 " ref slab: {5} symbols, {6} refs, {7} bytes\n"
99 " relations slab: {8} relations, {9} bytes",
100 Vals: IsIndexMainAST ? "file" : "preamble", Vals&: FileName, Vals&: Version, Vals: Syms.size(),
101 Vals: Syms.bytes(), Vals: Refs.size(), Vals: Refs.numRefs(), Vals: Refs.bytes(),
102 Vals: Relations.size(), Vals: Relations.bytes());
103 return std::make_tuple(args: std::move(Syms), args: std::move(Refs),
104 args: std::move(Relations));
105}
106
107// We keep only the node "U" and its edges. Any node other than "U" will be
108// empty in the resultant graph.
109IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
110 IncludeGraph IG;
111
112 auto Entry = IG.try_emplace(Key: URI).first;
113 auto &Node = Entry->getValue();
114 Node = FullGraph.lookup(Key: Entry->getKey());
115 Node.URI = Entry->getKey();
116
117 // URIs inside nodes must point into the keys of the same IncludeGraph.
118 for (auto &Include : Node.DirectIncludes) {
119 auto I = IG.try_emplace(Key: Include).first;
120 I->getValue().URI = I->getKey();
121 Include = I->getKey();
122 }
123 return IG;
124}
125} // namespace
126
127FileShardedIndex::FileShardedIndex(IndexFileIn Input)
128 : Index(std::move(Input)) {
129 // Used to build RelationSlabs.
130 llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;
131
132 // Attribute each Symbol to both their declaration and definition locations.
133 if (Index.Symbols) {
134 for (const auto &S : *Index.Symbols) {
135 auto It = Shards.try_emplace(Key: S.CanonicalDeclaration.FileURI);
136 It.first->getValue().Symbols.insert(V: &S);
137 SymbolIDToFile[S.ID] = &It.first->getValue();
138 // Only bother if definition file is different than declaration file.
139 if (S.Definition &&
140 S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
141 auto It = Shards.try_emplace(Key: S.Definition.FileURI);
142 It.first->getValue().Symbols.insert(V: &S);
143 }
144 }
145 }
146 // Attribute references into each file they occured in.
147 if (Index.Refs) {
148 for (const auto &SymRefs : *Index.Refs) {
149 for (const auto &R : SymRefs.second) {
150 const auto It = Shards.try_emplace(Key: R.Location.FileURI);
151 It.first->getValue().Refs.insert(V: &R);
152 RefToSymID[&R] = SymRefs.first;
153 }
154 }
155 }
156 // The Subject and/or Object shards might be part of multiple TUs. In
157 // such cases there will be a race and the last TU to write the shard
158 // will win and all the other relations will be lost. To avoid this,
159 // we store relations in both shards. A race might still happen if the
160 // same translation unit produces different relations under different
161 // configurations, but that's something clangd doesn't handle in general.
162 if (Index.Relations) {
163 for (const auto &R : *Index.Relations) {
164 // FIXME: RelationSlab shouldn't contain dangling relations.
165 FileShard *SubjectFile = SymbolIDToFile.lookup(Val: R.Subject);
166 FileShard *ObjectFile = SymbolIDToFile.lookup(Val: R.Object);
167 if (SubjectFile)
168 SubjectFile->Relations.insert(V: &R);
169 if (ObjectFile && ObjectFile != SubjectFile)
170 ObjectFile->Relations.insert(V: &R);
171 }
172 }
173 // Store only the direct includes of a file in a shard.
174 if (Index.Sources) {
175 const auto &FullGraph = *Index.Sources;
176 for (const auto &It : FullGraph) {
177 auto ShardIt = Shards.try_emplace(Key: It.first());
178 ShardIt.first->getValue().IG = getSubGraph(URI: It.first(), FullGraph);
179 }
180 }
181}
182std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {
183 // It should be enough to construct a vector with {Shards.keys().begin(),
184 // Shards.keys().end()} but MSVC fails to compile that.
185 std::vector<PathRef> Result;
186 Result.reserve(n: Shards.size());
187 for (auto Key : Shards.keys())
188 Result.push_back(x: Key);
189 return Result;
190}
191
192std::optional<IndexFileIn>
193FileShardedIndex::getShard(llvm::StringRef Uri) const {
194 auto It = Shards.find(Key: Uri);
195 if (It == Shards.end())
196 return std::nullopt;
197
198 IndexFileIn IF;
199 IF.Sources = It->getValue().IG;
200 IF.Cmd = Index.Cmd;
201
202 SymbolSlab::Builder SymB;
203 for (const auto *S : It->getValue().Symbols)
204 SymB.insert(S: *S);
205 IF.Symbols = std::move(SymB).build();
206
207 RefSlab::Builder RefB;
208 for (const auto *Ref : It->getValue().Refs) {
209 auto SID = RefToSymID.lookup(Val: Ref);
210 RefB.insert(ID: SID, S: *Ref);
211 }
212 IF.Refs = std::move(RefB).build();
213
214 RelationSlab::Builder RelB;
215 for (const auto *Rel : It->getValue().Relations) {
216 RelB.insert(R: *Rel);
217 }
218 IF.Relations = std::move(RelB).build();
219 // Explicit move here is needed by some compilers.
220 return std::move(IF);
221}
222
223SlabTuple indexMainDecls(ParsedAST &AST) {
224 return indexSymbols(AST&: AST.getASTContext(), PP&: AST.getPreprocessor(),
225 DeclsToIndex: AST.getLocalTopLevelDecls(), MacroRefsToIndex: &AST.getMacros(),
226 PI: AST.getPragmaIncludes(),
227 /*IsIndexMainAST=*/true, Version: AST.version(),
228 /*CollectMainFileRefs=*/true, Origin: SymbolOrigin::Open);
229}
230
231SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
232 Preprocessor &PP,
233 const include_cleaner::PragmaIncludes &PI,
234 SymbolOrigin Origin) {
235 std::vector<Decl *> DeclsToIndex(
236 AST.getTranslationUnitDecl()->decls().begin(),
237 AST.getTranslationUnitDecl()->decls().end());
238 return indexSymbols(AST, PP, DeclsToIndex,
239 /*MainFileMacros=*/MacroRefsToIndex: nullptr, PI,
240 /*IsIndexMainAST=*/false, Version,
241 /*CollectMainFileRefs=*/false, Origin);
242}
243
244FileSymbols::FileSymbols(IndexContents IdxContents, bool SupportContainedRefs)
245 : IdxContents(IdxContents), SupportContainedRefs(SupportContainedRefs) {}
246
247void FileSymbols::update(llvm::StringRef Key,
248 std::unique_ptr<SymbolSlab> Symbols,
249 std::unique_ptr<RefSlab> Refs,
250 std::unique_ptr<RelationSlab> Relations,
251 bool CountReferences) {
252 std::lock_guard<std::mutex> Lock(Mutex);
253 ++Version;
254 if (!Symbols)
255 SymbolsSnapshot.erase(Key);
256 else
257 SymbolsSnapshot[Key] = std::move(Symbols);
258 if (!Refs) {
259 RefsSnapshot.erase(Key);
260 } else {
261 RefSlabAndCountReferences Item;
262 Item.CountReferences = CountReferences;
263 Item.Slab = std::move(Refs);
264 RefsSnapshot[Key] = std::move(Item);
265 }
266 if (!Relations)
267 RelationsSnapshot.erase(Key);
268 else
269 RelationsSnapshot[Key] = std::move(Relations);
270}
271
272std::unique_ptr<SymbolIndex>
273FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
274 size_t *Version) {
275 std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
276 std::vector<std::shared_ptr<RefSlab>> RefSlabs;
277 std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
278 llvm::StringSet<> Files;
279 std::vector<RefSlab *> MainFileRefs;
280 {
281 std::lock_guard<std::mutex> Lock(Mutex);
282 for (const auto &FileAndSymbols : SymbolsSnapshot) {
283 SymbolSlabs.push_back(x: FileAndSymbols.second);
284 Files.insert(key: FileAndSymbols.first());
285 }
286 for (const auto &FileAndRefs : RefsSnapshot) {
287 RefSlabs.push_back(x: FileAndRefs.second.Slab);
288 Files.insert(key: FileAndRefs.first());
289 if (FileAndRefs.second.CountReferences)
290 MainFileRefs.push_back(x: RefSlabs.back().get());
291 }
292 for (const auto &FileAndRelations : RelationsSnapshot) {
293 Files.insert(key: FileAndRelations.first());
294 RelationSlabs.push_back(x: FileAndRelations.second);
295 }
296
297 if (Version)
298 *Version = this->Version;
299 }
300 std::vector<const Symbol *> AllSymbols;
301 std::vector<Symbol> SymsStorage;
302 switch (DuplicateHandle) {
303 case DuplicateHandling::Merge: {
304 llvm::DenseMap<SymbolID, Symbol> Merged;
305 for (const auto &Slab : SymbolSlabs) {
306 for (const auto &Sym : *Slab) {
307 assert(Sym.References == 0 &&
308 "Symbol with non-zero references sent to FileSymbols");
309 auto I = Merged.try_emplace(Key: Sym.ID, Args: Sym);
310 if (!I.second)
311 I.first->second = mergeSymbol(L: I.first->second, R: Sym);
312 }
313 }
314 for (const RefSlab *Refs : MainFileRefs)
315 for (const auto &Sym : *Refs) {
316 auto It = Merged.find(Val: Sym.first);
317 // This might happen while background-index is still running.
318 if (It == Merged.end())
319 continue;
320 It->getSecond().References += Sym.second.size();
321 }
322 SymsStorage.reserve(n: Merged.size());
323 for (auto &Sym : Merged) {
324 SymsStorage.push_back(x: std::move(Sym.second));
325 AllSymbols.push_back(x: &SymsStorage.back());
326 }
327 break;
328 }
329 case DuplicateHandling::PickOne: {
330 llvm::DenseSet<SymbolID> AddedSymbols;
331 for (const auto &Slab : SymbolSlabs)
332 for (const auto &Sym : *Slab) {
333 assert(Sym.References == 0 &&
334 "Symbol with non-zero references sent to FileSymbols");
335 if (AddedSymbols.insert(V: Sym.ID).second)
336 AllSymbols.push_back(x: &Sym);
337 }
338 break;
339 }
340 }
341
342 std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
343 llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
344 {
345 llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
346 size_t Count = 0;
347 for (const auto &RefSlab : RefSlabs)
348 for (const auto &Sym : *RefSlab) {
349 MergedRefs[Sym.first].append(in_start: Sym.second.begin(), in_end: Sym.second.end());
350 Count += Sym.second.size();
351 }
352 RefsStorage.reserve(n: Count);
353 AllRefs.reserve(NumEntries: MergedRefs.size());
354 for (auto &Sym : MergedRefs) {
355 auto &SymRefs = Sym.second;
356 // Sorting isn't required, but yields more stable results over rebuilds.
357 llvm::sort(C&: SymRefs);
358 llvm::copy(Range&: SymRefs, Out: back_inserter(x&: RefsStorage));
359 AllRefs.try_emplace(
360 Key: Sym.first,
361 Args: llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
362 SymRefs.size()));
363 }
364 }
365
366 std::vector<Relation> AllRelations;
367 for (const auto &RelationSlab : RelationSlabs) {
368 for (const auto &R : *RelationSlab)
369 AllRelations.push_back(x: R);
370 }
371 // Sort relations and remove duplicates that could arise due to
372 // relations being stored in both the shards containing their
373 // subject and object.
374 llvm::sort(C&: AllRelations);
375 AllRelations.erase(first: llvm::unique(R&: AllRelations), last: AllRelations.end());
376
377 size_t StorageSize =
378 RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
379 for (const auto &Slab : SymbolSlabs)
380 StorageSize += Slab->bytes();
381 for (const auto &RefSlab : RefSlabs)
382 StorageSize += RefSlab->bytes();
383
384 // Index must keep the slabs and contiguous ranges alive.
385 switch (Type) {
386 case IndexType::Light:
387 return std::make_unique<MemIndex>(
388 args: llvm::make_pointee_range(Range&: AllSymbols), args: std::move(AllRefs),
389 args: std::move(AllRelations), args: std::move(Files), args&: IdxContents,
390 args: std::make_tuple(args: std::move(SymbolSlabs), args: std::move(RefSlabs),
391 args: std::move(RefsStorage), args: std::move(SymsStorage)),
392 args&: StorageSize);
393 case IndexType::Heavy:
394 return std::make_unique<dex::Dex>(
395 args: llvm::make_pointee_range(Range&: AllSymbols), args: std::move(AllRefs),
396 args: std::move(AllRelations), args: std::move(Files), args&: IdxContents,
397 args: std::make_tuple(args: std::move(SymbolSlabs), args: std::move(RefSlabs),
398 args: std::move(RefsStorage), args: std::move(SymsStorage)),
399 args&: StorageSize, args&: SupportContainedRefs);
400 }
401 llvm_unreachable("Unknown clangd::IndexType");
402}
403
404void FileSymbols::profile(MemoryTree &MT) const {
405 std::lock_guard<std::mutex> Lock(Mutex);
406 for (const auto &SymSlab : SymbolsSnapshot) {
407 MT.detail(Name: SymSlab.first())
408 .child(Name: "symbols")
409 .addUsage(Increment: SymSlab.second->bytes());
410 }
411 for (const auto &RefSlab : RefsSnapshot) {
412 MT.detail(Name: RefSlab.first())
413 .child(Name: "references")
414 .addUsage(Increment: RefSlab.second.Slab->bytes());
415 }
416 for (const auto &RelSlab : RelationsSnapshot) {
417 MT.detail(Name: RelSlab.first())
418 .child(Name: "relations")
419 .addUsage(Increment: RelSlab.second->bytes());
420 }
421}
422
423FileIndex::FileIndex(bool SupportContainedRefs)
424 : MergedIndex(&MainFileIndex, &PreambleIndex),
425 PreambleSymbols(IndexContents::Symbols | IndexContents::Relations,
426 SupportContainedRefs),
427 PreambleIndex(std::make_unique<MemIndex>()),
428 MainFileSymbols(IndexContents::All, SupportContainedRefs),
429 MainFileIndex(std::make_unique<MemIndex>()) {}
430
431void FileIndex::updatePreamble(IndexFileIn IF) {
432 FileShardedIndex ShardedIndex(std::move(IF));
433 for (auto Uri : ShardedIndex.getAllSources()) {
434 auto IF = ShardedIndex.getShard(Uri);
435 // We are using the key received from ShardedIndex, so it should always
436 // exist.
437 assert(IF);
438 PreambleSymbols.update(
439 Key: Uri, Symbols: std::make_unique<SymbolSlab>(args: std::move(*IF->Symbols)),
440 Refs: std::make_unique<RefSlab>(),
441 Relations: std::make_unique<RelationSlab>(args: std::move(*IF->Relations)),
442 /*CountReferences=*/false);
443 }
444 size_t IndexVersion = 0;
445 auto NewIndex = PreambleSymbols.buildIndex(
446 Type: IndexType::Heavy, DuplicateHandle: DuplicateHandling::PickOne, Version: &IndexVersion);
447 {
448 std::lock_guard<std::mutex> Lock(UpdateIndexMu);
449 if (IndexVersion <= PreambleIndexVersion) {
450 // We lost the race, some other thread built a later version.
451 return;
452 }
453 PreambleIndexVersion = IndexVersion;
454 PreambleIndex.reset(std::move(NewIndex));
455 vlog(
456 Fmt: "Build dynamic index for header symbols with estimated memory usage of "
457 "{0} bytes",
458 Vals: PreambleIndex.estimateMemoryUsage());
459 }
460}
461
462void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
463 ASTContext &AST, Preprocessor &PP,
464 const include_cleaner::PragmaIncludes &PI) {
465 IndexFileIn IF;
466 std::tie(args&: IF.Symbols, args: std::ignore, args&: IF.Relations) =
467 indexHeaderSymbols(Version, AST, PP, PI, Origin: SymbolOrigin::Preamble);
468 updatePreamble(IF: std::move(IF));
469}
470
471void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
472 auto Contents = indexMainDecls(AST);
473 MainFileSymbols.update(
474 Key: URI::create(AbsolutePath: Path).toString(),
475 Symbols: std::make_unique<SymbolSlab>(args: std::move(std::get<0>(t&: Contents))),
476 Refs: std::make_unique<RefSlab>(args: std::move(std::get<1>(t&: Contents))),
477 Relations: std::make_unique<RelationSlab>(args: std::move(std::get<2>(t&: Contents))),
478 /*CountReferences=*/true);
479 size_t IndexVersion = 0;
480 auto NewIndex = MainFileSymbols.buildIndex(
481 Type: IndexType::Light, DuplicateHandle: DuplicateHandling::Merge, Version: &IndexVersion);
482 {
483 std::lock_guard<std::mutex> Lock(UpdateIndexMu);
484 if (IndexVersion <= MainIndexVersion) {
485 // We lost the race, some other thread built a later version.
486 return;
487 }
488 MainIndexVersion = IndexVersion;
489 MainFileIndex.reset(std::move(NewIndex));
490 vlog(
491 Fmt: "Build dynamic index for main-file symbols with estimated memory usage "
492 "of {0} bytes",
493 Vals: MainFileIndex.estimateMemoryUsage());
494 }
495}
496
497void FileIndex::profile(MemoryTree &MT) const {
498 PreambleSymbols.profile(MT&: MT.child(Name: "preamble").child(Name: "slabs"));
499 MT.child(Name: "preamble")
500 .child(Name: "index")
501 .addUsage(Increment: PreambleIndex.estimateMemoryUsage());
502 MainFileSymbols.profile(MT&: MT.child(Name: "main_file").child(Name: "slabs"));
503 MT.child(Name: "main_file")
504 .child(Name: "index")
505 .addUsage(Increment: MainFileIndex.estimateMemoryUsage());
506}
507} // namespace clangd
508} // namespace clang
509

source code of clang-tools-extra/clangd/index/FileIndex.cpp