| 1 | //===-- Background.cpp - Build an index in a background thread ------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "index/Background.h" |
| 10 | #include "Compiler.h" |
| 11 | #include "Config.h" |
| 12 | #include "Headers.h" |
| 13 | #include "SourceCode.h" |
| 14 | #include "URI.h" |
| 15 | #include "index/BackgroundIndexLoader.h" |
| 16 | #include "index/FileIndex.h" |
| 17 | #include "index/Index.h" |
| 18 | #include "index/IndexAction.h" |
| 19 | #include "index/MemIndex.h" |
| 20 | #include "index/Ref.h" |
| 21 | #include "index/Relation.h" |
| 22 | #include "index/Serialization.h" |
| 23 | #include "index/Symbol.h" |
| 24 | #include "index/SymbolCollector.h" |
| 25 | #include "support/Context.h" |
| 26 | #include "support/Logger.h" |
| 27 | #include "support/Path.h" |
| 28 | #include "support/Threading.h" |
| 29 | #include "support/ThreadsafeFS.h" |
| 30 | #include "support/Trace.h" |
| 31 | #include "clang/Basic/SourceLocation.h" |
| 32 | #include "clang/Basic/SourceManager.h" |
| 33 | #include "clang/Basic/Stack.h" |
| 34 | #include "clang/Frontend/FrontendAction.h" |
| 35 | #include "llvm/ADT/ArrayRef.h" |
| 36 | #include "llvm/ADT/DenseSet.h" |
| 37 | #include "llvm/ADT/STLExtras.h" |
| 38 | #include "llvm/ADT/StringMap.h" |
| 39 | #include "llvm/ADT/StringRef.h" |
| 40 | #include "llvm/Support/Error.h" |
| 41 | #include "llvm/Support/Path.h" |
| 42 | #include "llvm/Support/Threading.h" |
| 43 | #include "llvm/Support/xxhash.h" |
| 44 | |
| 45 | #include <algorithm> |
| 46 | #include <atomic> |
| 47 | #include <chrono> |
| 48 | #include <condition_variable> |
| 49 | #include <cstddef> |
| 50 | #include <memory> |
| 51 | #include <mutex> |
| 52 | #include <numeric> |
| 53 | #include <optional> |
| 54 | #include <queue> |
| 55 | #include <random> |
| 56 | #include <string> |
| 57 | #include <thread> |
| 58 | #include <utility> |
| 59 | #include <vector> |
| 60 | |
| 61 | namespace clang { |
| 62 | namespace clangd { |
| 63 | namespace { |
| 64 | |
| 65 | // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or |
| 66 | // relative to Cmd.Directory, which might not be the same as current working |
| 67 | // directory. |
| 68 | llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { |
| 69 | llvm::SmallString<128> AbsolutePath; |
| 70 | if (llvm::sys::path::is_absolute(path: Cmd.Filename)) { |
| 71 | AbsolutePath = Cmd.Filename; |
| 72 | } else { |
| 73 | AbsolutePath = Cmd.Directory; |
| 74 | llvm::sys::path::append(path&: AbsolutePath, a: Cmd.Filename); |
| 75 | llvm::sys::path::remove_dots(path&: AbsolutePath, remove_dot_dot: true); |
| 76 | } |
| 77 | return AbsolutePath; |
| 78 | } |
| 79 | |
| 80 | bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) { |
| 81 | auto Buf = FS->getBufferForFile(Name: LS.AbsolutePath); |
| 82 | if (!Buf) { |
| 83 | vlog(Fmt: "Background-index: Couldn't read {0} to validate stored index: {1}" , |
| 84 | Vals: LS.AbsolutePath, Vals: Buf.getError().message()); |
| 85 | // There is no point in indexing an unreadable file. |
| 86 | return false; |
| 87 | } |
| 88 | return digest(Content: Buf->get()->getBuffer()) != LS.Digest; |
| 89 | } |
| 90 | |
| 91 | } // namespace |
| 92 | |
| 93 | BackgroundIndex::BackgroundIndex( |
| 94 | const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB, |
| 95 | BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts) |
| 96 | : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB), |
| 97 | IndexingPriority(Opts.IndexingPriority), |
| 98 | ContextProvider(std::move(Opts.ContextProvider)), |
| 99 | IndexedSymbols(IndexContents::All, Opts.SupportContainedRefs), |
| 100 | Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize), |
| 101 | IndexStorageFactory(std::move(IndexStorageFactory)), |
| 102 | Queue(std::move(Opts.OnProgress)), |
| 103 | CommandsChanged( |
| 104 | CDB.watch(L: [&](const std::vector<std::string> &ChangedFiles) { |
| 105 | enqueue(ChangedFiles); |
| 106 | })) { |
| 107 | assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero." ); |
| 108 | assert(this->IndexStorageFactory && "Storage factory can not be null!" ); |
| 109 | for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) { |
| 110 | ThreadPool.runAsync(Name: "background-worker-" + llvm::Twine(I + 1), |
| 111 | Action: [this, Ctx(Context::current().clone())]() mutable { |
| 112 | clang::noteBottomOfStack(); |
| 113 | WithContext BGContext(std::move(Ctx)); |
| 114 | Queue.work(OnIdle: [&] { Rebuilder.idle(); }); |
| 115 | }); |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | BackgroundIndex::~BackgroundIndex() { |
| 120 | stop(); |
| 121 | ThreadPool.wait(); |
| 122 | } |
| 123 | |
| 124 | BackgroundQueue::Task BackgroundIndex::changedFilesTask( |
| 125 | const std::vector<std::string> &ChangedFiles) { |
| 126 | BackgroundQueue::Task T([this, ChangedFiles] { |
| 127 | trace::Span Tracer("BackgroundIndexEnqueue" ); |
| 128 | |
| 129 | std::optional<WithContext> WithProvidedContext; |
| 130 | if (ContextProvider) |
| 131 | WithProvidedContext.emplace(args: ContextProvider(/*Path=*/"" )); |
| 132 | |
| 133 | // We're doing this asynchronously, because we'll read shards here too. |
| 134 | log(Fmt: "Enqueueing {0} commands for indexing" , Vals: ChangedFiles.size()); |
| 135 | SPAN_ATTACH(Tracer, "files" , int64_t(ChangedFiles.size())); |
| 136 | |
| 137 | auto NeedsReIndexing = loadProject(MainFiles: std::move(ChangedFiles)); |
| 138 | // Run indexing for files that need to be updated. |
| 139 | std::shuffle(first: NeedsReIndexing.begin(), last: NeedsReIndexing.end(), |
| 140 | g: std::mt19937(std::random_device{}())); |
| 141 | std::vector<BackgroundQueue::Task> Tasks; |
| 142 | Tasks.reserve(n: NeedsReIndexing.size()); |
| 143 | for (const auto &File : NeedsReIndexing) |
| 144 | Tasks.push_back(x: indexFileTask(Path: std::move(File))); |
| 145 | Queue.append(std::move(Tasks)); |
| 146 | }); |
| 147 | |
| 148 | T.QueuePri = LoadShards; |
| 149 | T.ThreadPri = llvm::ThreadPriority::Default; |
| 150 | return T; |
| 151 | } |
| 152 | |
| 153 | static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) { |
| 154 | Path = llvm::sys::path::filename(path: Path); |
| 155 | return Path.drop_back(N: llvm::sys::path::extension(path: Path).size()); |
| 156 | } |
| 157 | |
| 158 | BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) { |
| 159 | std::string Tag = filenameWithoutExtension(Path).str(); |
| 160 | uint64_t Key = llvm::xxh3_64bits(data: Path); |
| 161 | BackgroundQueue::Task T([this, Path(std::move(Path))] { |
| 162 | std::optional<WithContext> WithProvidedContext; |
| 163 | if (ContextProvider) |
| 164 | WithProvidedContext.emplace(args: ContextProvider(Path)); |
| 165 | auto Cmd = CDB.getCompileCommand(File: Path); |
| 166 | if (!Cmd) |
| 167 | return; |
| 168 | if (auto Error = index(std::move(*Cmd))) |
| 169 | elog(Fmt: "Indexing {0} failed: {1}" , Vals: Path, Vals: std::move(Error)); |
| 170 | }); |
| 171 | T.QueuePri = IndexFile; |
| 172 | T.ThreadPri = IndexingPriority; |
| 173 | T.Tag = std::move(Tag); |
| 174 | T.Key = Key; |
| 175 | return T; |
| 176 | } |
| 177 | |
| 178 | void BackgroundIndex::boostRelated(llvm::StringRef Path) { |
| 179 | if (isHeaderFile(FileName: Path)) |
| 180 | Queue.boost(Tag: filenameWithoutExtension(Path), NewPriority: IndexBoostedFile); |
| 181 | } |
| 182 | |
| 183 | /// Given index results from a TU, only update symbols coming from files that |
| 184 | /// are different or missing from than \p ShardVersionsSnapshot. Also stores new |
| 185 | /// index information on IndexStorage. |
| 186 | void BackgroundIndex::update( |
| 187 | llvm::StringRef MainFile, IndexFileIn Index, |
| 188 | const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot, |
| 189 | bool HadErrors) { |
| 190 | // Keys are URIs. |
| 191 | llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate; |
| 192 | // Note that sources do not contain any information regarding missing headers, |
| 193 | // since we don't even know what absolute path they should fall in. |
| 194 | for (const auto &IndexIt : *Index.Sources) { |
| 195 | const auto &IGN = IndexIt.getValue(); |
| 196 | auto AbsPath = URI::resolve(FileURI: IGN.URI, HintPath: MainFile); |
| 197 | if (!AbsPath) { |
| 198 | elog(Fmt: "Failed to resolve URI: {0}" , Vals: AbsPath.takeError()); |
| 199 | continue; |
| 200 | } |
| 201 | const auto DigestIt = ShardVersionsSnapshot.find(Key: *AbsPath); |
| 202 | // File has different contents, or indexing was successful this time. |
| 203 | if (DigestIt == ShardVersionsSnapshot.end() || |
| 204 | DigestIt->getValue().Digest != IGN.Digest || |
| 205 | (DigestIt->getValue().HadErrors && !HadErrors)) |
| 206 | FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest}; |
| 207 | } |
| 208 | |
| 209 | // Shard slabs into files. |
| 210 | FileShardedIndex ShardedIndex(std::move(Index)); |
| 211 | |
| 212 | // Build and store new slabs for each updated file. |
| 213 | for (const auto &FileIt : FilesToUpdate) { |
| 214 | auto Uri = FileIt.first(); |
| 215 | auto IF = ShardedIndex.getShard(Uri); |
| 216 | assert(IF && "no shard for file in Index.Sources?" ); |
| 217 | PathRef Path = FileIt.getValue().first; |
| 218 | |
| 219 | // Only store command line hash for main files of the TU, since our |
| 220 | // current model keeps only one version of a header file. |
| 221 | if (Path != MainFile) |
| 222 | IF->Cmd.reset(); |
| 223 | |
| 224 | // We need to store shards before updating the index, since the latter |
| 225 | // consumes slabs. |
| 226 | // FIXME: Also skip serializing the shard if it is already up-to-date. |
| 227 | if (auto Error = IndexStorageFactory(Path)->storeShard(ShardIdentifier: Path, Shard: *IF)) |
| 228 | elog(Fmt: "Failed to write background-index shard for file {0}: {1}" , Vals&: Path, |
| 229 | Vals: std::move(Error)); |
| 230 | |
| 231 | { |
| 232 | std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
| 233 | const auto &Hash = FileIt.getValue().second; |
| 234 | auto DigestIt = ShardVersions.try_emplace(Key: Path); |
| 235 | ShardVersion &SV = DigestIt.first->second; |
| 236 | // Skip if file is already up to date, unless previous index was broken |
| 237 | // and this one is not. |
| 238 | if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors) |
| 239 | continue; |
| 240 | SV.Digest = Hash; |
| 241 | SV.HadErrors = HadErrors; |
| 242 | |
| 243 | // This can override a newer version that is added in another thread, if |
| 244 | // this thread sees the older version but finishes later. This should be |
| 245 | // rare in practice. |
| 246 | IndexedSymbols.update( |
| 247 | Key: Uri, Symbols: std::make_unique<SymbolSlab>(args: std::move(*IF->Symbols)), |
| 248 | Refs: std::make_unique<RefSlab>(args: std::move(*IF->Refs)), |
| 249 | Relations: std::make_unique<RelationSlab>(args: std::move(*IF->Relations)), |
| 250 | CountReferences: Path == MainFile); |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { |
| 256 | trace::Span Tracer("BackgroundIndex" ); |
| 257 | SPAN_ATTACH(Tracer, "file" , Cmd.Filename); |
| 258 | auto AbsolutePath = getAbsolutePath(Cmd); |
| 259 | |
| 260 | auto FS = TFS.view(CWD: Cmd.Directory); |
| 261 | auto Buf = FS->getBufferForFile(Name: AbsolutePath); |
| 262 | if (!Buf) |
| 263 | return llvm::errorCodeToError(EC: Buf.getError()); |
| 264 | auto Hash = digest(Content: Buf->get()->getBuffer()); |
| 265 | |
| 266 | // Take a snapshot of the versions to avoid locking for each file in the TU. |
| 267 | llvm::StringMap<ShardVersion> ShardVersionsSnapshot; |
| 268 | { |
| 269 | std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
| 270 | ShardVersionsSnapshot = ShardVersions; |
| 271 | } |
| 272 | |
| 273 | vlog(Fmt: "Indexing {0} (digest:={1})" , Vals&: Cmd.Filename, Vals: llvm::toHex(Input: Hash)); |
| 274 | ParseInputs Inputs; |
| 275 | Inputs.TFS = &TFS; |
| 276 | Inputs.CompileCommand = std::move(Cmd); |
| 277 | IgnoreDiagnostics IgnoreDiags; |
| 278 | auto CI = buildCompilerInvocation(Inputs, D&: IgnoreDiags); |
| 279 | if (!CI) |
| 280 | return error(Fmt: "Couldn't build compiler invocation" ); |
| 281 | |
| 282 | auto Clang = |
| 283 | prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr, |
| 284 | MainFile: std::move(*Buf), std::move(FS), IgnoreDiags); |
| 285 | if (!Clang) |
| 286 | return error(Fmt: "Couldn't build compiler instance" ); |
| 287 | |
| 288 | SymbolCollector::Options IndexOpts; |
| 289 | // Creates a filter to not collect index results from files with unchanged |
| 290 | // digests. |
| 291 | IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM, |
| 292 | FileID FID) { |
| 293 | const auto F = SM.getFileEntryRefForID(FID); |
| 294 | if (!F) |
| 295 | return false; // Skip invalid files. |
| 296 | auto AbsPath = getCanonicalPath(F: *F, FileMgr&: SM.getFileManager()); |
| 297 | if (!AbsPath) |
| 298 | return false; // Skip files without absolute path. |
| 299 | auto Digest = digestFile(SM, FID); |
| 300 | if (!Digest) |
| 301 | return false; |
| 302 | auto D = ShardVersionsSnapshot.find(Key: *AbsPath); |
| 303 | if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest && |
| 304 | !D->second.HadErrors) |
| 305 | return false; // Skip files that haven't changed, without errors. |
| 306 | return true; |
| 307 | }; |
| 308 | IndexOpts.CollectMainFileRefs = true; |
| 309 | |
| 310 | IndexFileIn Index; |
| 311 | auto Action = createStaticIndexingAction( |
| 312 | Opts: IndexOpts, SymbolsCallback: [&](SymbolSlab S) { Index.Symbols = std::move(S); }, |
| 313 | RefsCallback: [&](RefSlab R) { Index.Refs = std::move(R); }, |
| 314 | RelationsCallback: [&](RelationSlab R) { Index.Relations = std::move(R); }, |
| 315 | IncludeGraphCallback: [&](IncludeGraph IG) { Index.Sources = std::move(IG); }); |
| 316 | |
| 317 | // We're going to run clang here, and it could potentially crash. |
| 318 | // We could use CrashRecoveryContext to try to make indexing crashes nonfatal, |
| 319 | // but the leaky "recovery" is pretty scary too in a long-running process. |
| 320 | // If crashes are a real problem, maybe we should fork a child process. |
| 321 | |
| 322 | const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front(); |
| 323 | if (!Action->BeginSourceFile(CI&: *Clang, Input)) |
| 324 | return error(Fmt: "BeginSourceFile() failed" ); |
| 325 | if (llvm::Error Err = Action->Execute()) |
| 326 | return Err; |
| 327 | |
| 328 | Action->EndSourceFile(); |
| 329 | |
| 330 | Index.Cmd = Inputs.CompileCommand; |
| 331 | assert(Index.Symbols && Index.Refs && Index.Sources && |
| 332 | "Symbols, Refs and Sources must be set." ); |
| 333 | |
| 334 | log(Fmt: "Indexed {0} ({1} symbols, {2} refs, {3} files)" , |
| 335 | Vals&: Inputs.CompileCommand.Filename, Vals: Index.Symbols->size(), |
| 336 | Vals: Index.Refs->numRefs(), Vals: Index.Sources->size()); |
| 337 | SPAN_ATTACH(Tracer, "symbols" , int(Index.Symbols->size())); |
| 338 | SPAN_ATTACH(Tracer, "refs" , int(Index.Refs->numRefs())); |
| 339 | SPAN_ATTACH(Tracer, "sources" , int(Index.Sources->size())); |
| 340 | |
| 341 | bool HadErrors = Clang->hasDiagnostics() && |
| 342 | Clang->getDiagnostics().hasUncompilableErrorOccurred(); |
| 343 | if (HadErrors) { |
| 344 | log(Fmt: "Failed to compile {0}, index may be incomplete" , Vals&: AbsolutePath); |
| 345 | for (auto &It : *Index.Sources) |
| 346 | It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors; |
| 347 | } |
| 348 | update(MainFile: AbsolutePath, Index: std::move(Index), ShardVersionsSnapshot, HadErrors); |
| 349 | |
| 350 | Rebuilder.indexedTU(); |
| 351 | return llvm::Error::success(); |
| 352 | } |
| 353 | |
| 354 | // Restores shards for \p MainFiles from index storage. Then checks staleness of |
| 355 | // those shards and returns a list of TUs that needs to be indexed to update |
| 356 | // staleness. |
| 357 | std::vector<std::string> |
| 358 | BackgroundIndex::loadProject(std::vector<std::string> MainFiles) { |
| 359 | // Drop files where background indexing is disabled in config. |
| 360 | if (ContextProvider) |
| 361 | llvm::erase_if(C&: MainFiles, P: [&](const std::string &TU) { |
| 362 | // Load the config for each TU, as indexing may be selectively enabled. |
| 363 | WithContext WithProvidedContext(ContextProvider(TU)); |
| 364 | return Config::current().Index.Background == |
| 365 | Config::BackgroundPolicy::Skip; |
| 366 | }); |
| 367 | Rebuilder.startLoading(); |
| 368 | // Load shards for all of the mainfiles. |
| 369 | const std::vector<LoadedShard> Result = |
| 370 | loadIndexShards(MainFiles, IndexStorageFactory, CDB); |
| 371 | size_t LoadedShards = 0; |
| 372 | { |
| 373 | // Update in-memory state. |
| 374 | std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
| 375 | for (auto &LS : Result) { |
| 376 | if (!LS.Shard) |
| 377 | continue; |
| 378 | auto SS = |
| 379 | LS.Shard->Symbols |
| 380 | ? std::make_unique<SymbolSlab>(args: std::move(*LS.Shard->Symbols)) |
| 381 | : nullptr; |
| 382 | auto RS = LS.Shard->Refs |
| 383 | ? std::make_unique<RefSlab>(args: std::move(*LS.Shard->Refs)) |
| 384 | : nullptr; |
| 385 | auto RelS = |
| 386 | LS.Shard->Relations |
| 387 | ? std::make_unique<RelationSlab>(args: std::move(*LS.Shard->Relations)) |
| 388 | : nullptr; |
| 389 | ShardVersion &SV = ShardVersions[LS.AbsolutePath]; |
| 390 | SV.Digest = LS.Digest; |
| 391 | SV.HadErrors = LS.HadErrors; |
| 392 | ++LoadedShards; |
| 393 | |
| 394 | IndexedSymbols.update(Key: URI::create(AbsolutePath: LS.AbsolutePath).toString(), |
| 395 | Symbols: std::move(SS), Refs: std::move(RS), Relations: std::move(RelS), |
| 396 | CountReferences: LS.CountReferences); |
| 397 | } |
| 398 | } |
| 399 | Rebuilder.loadedShard(ShardCount: LoadedShards); |
| 400 | Rebuilder.doneLoading(); |
| 401 | |
| 402 | auto FS = TFS.view(/*CWD=*/std::nullopt); |
| 403 | llvm::DenseSet<PathRef> TUsToIndex; |
| 404 | // We'll accept data from stale shards, but ensure the files get reindexed |
| 405 | // soon. |
| 406 | for (auto &LS : Result) { |
| 407 | if (!shardIsStale(LS, FS: FS.get())) |
| 408 | continue; |
| 409 | PathRef TUForFile = LS.DependentTU; |
| 410 | assert(!TUForFile.empty() && "File without a TU!" ); |
| 411 | |
| 412 | // FIXME: Currently, we simply schedule indexing on a TU whenever any of |
| 413 | // its dependencies needs re-indexing. We might do it smarter by figuring |
| 414 | // out a minimal set of TUs that will cover all the stale dependencies. |
| 415 | // FIXME: Try looking at other TUs if no compile commands are available |
| 416 | // for this TU, i.e TU was deleted after we performed indexing. |
| 417 | TUsToIndex.insert(V: TUForFile); |
| 418 | } |
| 419 | |
| 420 | return {TUsToIndex.begin(), TUsToIndex.end()}; |
| 421 | } |
| 422 | |
| 423 | void BackgroundIndex::profile(MemoryTree &MT) const { |
| 424 | IndexedSymbols.profile(MT&: MT.child(Name: "slabs" )); |
| 425 | // We don't want to mix memory used by index and symbols, so call base class. |
| 426 | MT.child(Name: "index" ).addUsage(Increment: SwapIndex::estimateMemoryUsage()); |
| 427 | } |
| 428 | } // namespace clangd |
| 429 | } // namespace clang |
| 430 | |