1 | //===-- Background.cpp - Build an index in a background thread ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "index/Background.h" |
10 | #include "Compiler.h" |
11 | #include "Config.h" |
12 | #include "Headers.h" |
13 | #include "SourceCode.h" |
14 | #include "URI.h" |
15 | #include "index/BackgroundIndexLoader.h" |
16 | #include "index/FileIndex.h" |
17 | #include "index/Index.h" |
18 | #include "index/IndexAction.h" |
19 | #include "index/MemIndex.h" |
20 | #include "index/Ref.h" |
21 | #include "index/Relation.h" |
22 | #include "index/Serialization.h" |
23 | #include "index/Symbol.h" |
24 | #include "index/SymbolCollector.h" |
25 | #include "support/Context.h" |
26 | #include "support/Logger.h" |
27 | #include "support/Path.h" |
28 | #include "support/Threading.h" |
29 | #include "support/ThreadsafeFS.h" |
30 | #include "support/Trace.h" |
31 | #include "clang/Basic/SourceLocation.h" |
32 | #include "clang/Basic/SourceManager.h" |
33 | #include "clang/Basic/Stack.h" |
34 | #include "clang/Frontend/FrontendAction.h" |
35 | #include "llvm/ADT/ArrayRef.h" |
36 | #include "llvm/ADT/DenseSet.h" |
37 | #include "llvm/ADT/STLExtras.h" |
38 | #include "llvm/ADT/StringMap.h" |
39 | #include "llvm/ADT/StringRef.h" |
40 | #include "llvm/Support/Error.h" |
41 | #include "llvm/Support/Path.h" |
42 | #include "llvm/Support/Threading.h" |
43 | #include "llvm/Support/xxhash.h" |
44 | |
45 | #include <algorithm> |
46 | #include <atomic> |
47 | #include <chrono> |
48 | #include <condition_variable> |
49 | #include <cstddef> |
50 | #include <memory> |
51 | #include <mutex> |
52 | #include <numeric> |
53 | #include <optional> |
54 | #include <queue> |
55 | #include <random> |
56 | #include <string> |
57 | #include <thread> |
58 | #include <utility> |
59 | #include <vector> |
60 | |
61 | namespace clang { |
62 | namespace clangd { |
63 | namespace { |
64 | |
65 | // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or |
66 | // relative to Cmd.Directory, which might not be the same as current working |
67 | // directory. |
68 | llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { |
69 | llvm::SmallString<128> AbsolutePath; |
70 | if (llvm::sys::path::is_absolute(path: Cmd.Filename)) { |
71 | AbsolutePath = Cmd.Filename; |
72 | } else { |
73 | AbsolutePath = Cmd.Directory; |
74 | llvm::sys::path::append(path&: AbsolutePath, a: Cmd.Filename); |
75 | llvm::sys::path::remove_dots(path&: AbsolutePath, remove_dot_dot: true); |
76 | } |
77 | return AbsolutePath; |
78 | } |
79 | |
80 | bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) { |
81 | auto Buf = FS->getBufferForFile(Name: LS.AbsolutePath); |
82 | if (!Buf) { |
83 | vlog(Fmt: "Background-index: Couldn't read {0} to validate stored index: {1}" , |
84 | Vals: LS.AbsolutePath, Vals: Buf.getError().message()); |
85 | // There is no point in indexing an unreadable file. |
86 | return false; |
87 | } |
88 | return digest(Content: Buf->get()->getBuffer()) != LS.Digest; |
89 | } |
90 | |
91 | } // namespace |
92 | |
93 | BackgroundIndex::BackgroundIndex( |
94 | const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB, |
95 | BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts) |
96 | : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB), |
97 | IndexingPriority(Opts.IndexingPriority), |
98 | ContextProvider(std::move(Opts.ContextProvider)), |
99 | IndexedSymbols(IndexContents::All), |
100 | Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize), |
101 | IndexStorageFactory(std::move(IndexStorageFactory)), |
102 | Queue(std::move(Opts.OnProgress)), |
103 | CommandsChanged( |
104 | CDB.watch(L: [&](const std::vector<std::string> &ChangedFiles) { |
105 | enqueue(ChangedFiles); |
106 | })) { |
107 | assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero." ); |
108 | assert(this->IndexStorageFactory && "Storage factory can not be null!" ); |
109 | for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) { |
110 | ThreadPool.runAsync(Name: "background-worker-" + llvm::Twine(I + 1), |
111 | Action: [this, Ctx(Context::current().clone())]() mutable { |
112 | clang::noteBottomOfStack(); |
113 | WithContext BGContext(std::move(Ctx)); |
114 | Queue.work(OnIdle: [&] { Rebuilder.idle(); }); |
115 | }); |
116 | } |
117 | } |
118 | |
119 | BackgroundIndex::~BackgroundIndex() { |
120 | stop(); |
121 | ThreadPool.wait(); |
122 | } |
123 | |
124 | BackgroundQueue::Task BackgroundIndex::changedFilesTask( |
125 | const std::vector<std::string> &ChangedFiles) { |
126 | BackgroundQueue::Task T([this, ChangedFiles] { |
127 | trace::Span Tracer("BackgroundIndexEnqueue" ); |
128 | |
129 | std::optional<WithContext> WithProvidedContext; |
130 | if (ContextProvider) |
131 | WithProvidedContext.emplace(args: ContextProvider(/*Path=*/"" )); |
132 | |
133 | // We're doing this asynchronously, because we'll read shards here too. |
134 | log(Fmt: "Enqueueing {0} commands for indexing" , Vals: ChangedFiles.size()); |
135 | SPAN_ATTACH(Tracer, "files" , int64_t(ChangedFiles.size())); |
136 | |
137 | auto NeedsReIndexing = loadProject(MainFiles: std::move(ChangedFiles)); |
138 | // Run indexing for files that need to be updated. |
139 | std::shuffle(first: NeedsReIndexing.begin(), last: NeedsReIndexing.end(), |
140 | g: std::mt19937(std::random_device{}())); |
141 | std::vector<BackgroundQueue::Task> Tasks; |
142 | Tasks.reserve(n: NeedsReIndexing.size()); |
143 | for (const auto &File : NeedsReIndexing) |
144 | Tasks.push_back(x: indexFileTask(Path: std::move(File))); |
145 | Queue.append(std::move(Tasks)); |
146 | }); |
147 | |
148 | T.QueuePri = LoadShards; |
149 | T.ThreadPri = llvm::ThreadPriority::Default; |
150 | return T; |
151 | } |
152 | |
153 | static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) { |
154 | Path = llvm::sys::path::filename(path: Path); |
155 | return Path.drop_back(N: llvm::sys::path::extension(path: Path).size()); |
156 | } |
157 | |
158 | BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) { |
159 | std::string Tag = filenameWithoutExtension(Path).str(); |
160 | uint64_t Key = llvm::xxh3_64bits(data: Path); |
161 | BackgroundQueue::Task T([this, Path(std::move(Path))] { |
162 | std::optional<WithContext> WithProvidedContext; |
163 | if (ContextProvider) |
164 | WithProvidedContext.emplace(args: ContextProvider(Path)); |
165 | auto Cmd = CDB.getCompileCommand(File: Path); |
166 | if (!Cmd) |
167 | return; |
168 | if (auto Error = index(std::move(*Cmd))) |
169 | elog(Fmt: "Indexing {0} failed: {1}" , Vals: Path, Vals: std::move(Error)); |
170 | }); |
171 | T.QueuePri = IndexFile; |
172 | T.ThreadPri = IndexingPriority; |
173 | T.Tag = std::move(Tag); |
174 | T.Key = Key; |
175 | return T; |
176 | } |
177 | |
178 | void BackgroundIndex::boostRelated(llvm::StringRef Path) { |
179 | if (isHeaderFile(FileName: Path)) |
180 | Queue.boost(Tag: filenameWithoutExtension(Path), NewPriority: IndexBoostedFile); |
181 | } |
182 | |
183 | /// Given index results from a TU, only update symbols coming from files that |
184 | /// are different or missing from than \p ShardVersionsSnapshot. Also stores new |
185 | /// index information on IndexStorage. |
186 | void BackgroundIndex::update( |
187 | llvm::StringRef MainFile, IndexFileIn Index, |
188 | const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot, |
189 | bool HadErrors) { |
190 | // Keys are URIs. |
191 | llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate; |
192 | // Note that sources do not contain any information regarding missing headers, |
193 | // since we don't even know what absolute path they should fall in. |
194 | for (const auto &IndexIt : *Index.Sources) { |
195 | const auto &IGN = IndexIt.getValue(); |
196 | auto AbsPath = URI::resolve(FileURI: IGN.URI, HintPath: MainFile); |
197 | if (!AbsPath) { |
198 | elog(Fmt: "Failed to resolve URI: {0}" , Vals: AbsPath.takeError()); |
199 | continue; |
200 | } |
201 | const auto DigestIt = ShardVersionsSnapshot.find(Key: *AbsPath); |
202 | // File has different contents, or indexing was successful this time. |
203 | if (DigestIt == ShardVersionsSnapshot.end() || |
204 | DigestIt->getValue().Digest != IGN.Digest || |
205 | (DigestIt->getValue().HadErrors && !HadErrors)) |
206 | FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest}; |
207 | } |
208 | |
209 | // Shard slabs into files. |
210 | FileShardedIndex ShardedIndex(std::move(Index)); |
211 | |
212 | // Build and store new slabs for each updated file. |
213 | for (const auto &FileIt : FilesToUpdate) { |
214 | auto Uri = FileIt.first(); |
215 | auto IF = ShardedIndex.getShard(Uri); |
216 | assert(IF && "no shard for file in Index.Sources?" ); |
217 | PathRef Path = FileIt.getValue().first; |
218 | |
219 | // Only store command line hash for main files of the TU, since our |
220 | // current model keeps only one version of a header file. |
221 | if (Path != MainFile) |
222 | IF->Cmd.reset(); |
223 | |
224 | // We need to store shards before updating the index, since the latter |
225 | // consumes slabs. |
226 | // FIXME: Also skip serializing the shard if it is already up-to-date. |
227 | if (auto Error = IndexStorageFactory(Path)->storeShard(ShardIdentifier: Path, Shard: *IF)) |
228 | elog(Fmt: "Failed to write background-index shard for file {0}: {1}" , Vals&: Path, |
229 | Vals: std::move(Error)); |
230 | |
231 | { |
232 | std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
233 | const auto &Hash = FileIt.getValue().second; |
234 | auto DigestIt = ShardVersions.try_emplace(Key: Path); |
235 | ShardVersion &SV = DigestIt.first->second; |
236 | // Skip if file is already up to date, unless previous index was broken |
237 | // and this one is not. |
238 | if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors) |
239 | continue; |
240 | SV.Digest = Hash; |
241 | SV.HadErrors = HadErrors; |
242 | |
243 | // This can override a newer version that is added in another thread, if |
244 | // this thread sees the older version but finishes later. This should be |
245 | // rare in practice. |
246 | IndexedSymbols.update( |
247 | Key: Uri, Symbols: std::make_unique<SymbolSlab>(args: std::move(*IF->Symbols)), |
248 | Refs: std::make_unique<RefSlab>(args: std::move(*IF->Refs)), |
249 | Relations: std::make_unique<RelationSlab>(args: std::move(*IF->Relations)), |
250 | CountReferences: Path == MainFile); |
251 | } |
252 | } |
253 | } |
254 | |
255 | llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { |
256 | trace::Span Tracer("BackgroundIndex" ); |
257 | SPAN_ATTACH(Tracer, "file" , Cmd.Filename); |
258 | auto AbsolutePath = getAbsolutePath(Cmd); |
259 | |
260 | auto FS = TFS.view(CWD: Cmd.Directory); |
261 | auto Buf = FS->getBufferForFile(Name: AbsolutePath); |
262 | if (!Buf) |
263 | return llvm::errorCodeToError(EC: Buf.getError()); |
264 | auto Hash = digest(Content: Buf->get()->getBuffer()); |
265 | |
266 | // Take a snapshot of the versions to avoid locking for each file in the TU. |
267 | llvm::StringMap<ShardVersion> ShardVersionsSnapshot; |
268 | { |
269 | std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
270 | ShardVersionsSnapshot = ShardVersions; |
271 | } |
272 | |
273 | vlog(Fmt: "Indexing {0} (digest:={1})" , Vals&: Cmd.Filename, Vals: llvm::toHex(Input: Hash)); |
274 | ParseInputs Inputs; |
275 | Inputs.TFS = &TFS; |
276 | Inputs.CompileCommand = std::move(Cmd); |
277 | IgnoreDiagnostics IgnoreDiags; |
278 | auto CI = buildCompilerInvocation(Inputs, D&: IgnoreDiags); |
279 | if (!CI) |
280 | return error(Fmt: "Couldn't build compiler invocation" ); |
281 | |
282 | auto Clang = |
283 | prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr, |
284 | MainFile: std::move(*Buf), std::move(FS), IgnoreDiags); |
285 | if (!Clang) |
286 | return error(Fmt: "Couldn't build compiler instance" ); |
287 | |
288 | SymbolCollector::Options IndexOpts; |
289 | // Creates a filter to not collect index results from files with unchanged |
290 | // digests. |
291 | IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM, |
292 | FileID FID) { |
293 | const auto F = SM.getFileEntryRefForID(FID); |
294 | if (!F) |
295 | return false; // Skip invalid files. |
296 | auto AbsPath = getCanonicalPath(F: *F, FileMgr&: SM.getFileManager()); |
297 | if (!AbsPath) |
298 | return false; // Skip files without absolute path. |
299 | auto Digest = digestFile(SM, FID); |
300 | if (!Digest) |
301 | return false; |
302 | auto D = ShardVersionsSnapshot.find(Key: *AbsPath); |
303 | if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest && |
304 | !D->second.HadErrors) |
305 | return false; // Skip files that haven't changed, without errors. |
306 | return true; |
307 | }; |
308 | IndexOpts.CollectMainFileRefs = true; |
309 | |
310 | IndexFileIn Index; |
311 | auto Action = createStaticIndexingAction( |
312 | Opts: IndexOpts, SymbolsCallback: [&](SymbolSlab S) { Index.Symbols = std::move(S); }, |
313 | RefsCallback: [&](RefSlab R) { Index.Refs = std::move(R); }, |
314 | RelationsCallback: [&](RelationSlab R) { Index.Relations = std::move(R); }, |
315 | IncludeGraphCallback: [&](IncludeGraph IG) { Index.Sources = std::move(IG); }); |
316 | |
317 | // We're going to run clang here, and it could potentially crash. |
318 | // We could use CrashRecoveryContext to try to make indexing crashes nonfatal, |
319 | // but the leaky "recovery" is pretty scary too in a long-running process. |
320 | // If crashes are a real problem, maybe we should fork a child process. |
321 | |
322 | const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front(); |
323 | if (!Action->BeginSourceFile(CI&: *Clang, Input)) |
324 | return error(Fmt: "BeginSourceFile() failed" ); |
325 | if (llvm::Error Err = Action->Execute()) |
326 | return Err; |
327 | |
328 | Action->EndSourceFile(); |
329 | |
330 | Index.Cmd = Inputs.CompileCommand; |
331 | assert(Index.Symbols && Index.Refs && Index.Sources && |
332 | "Symbols, Refs and Sources must be set." ); |
333 | |
334 | log(Fmt: "Indexed {0} ({1} symbols, {2} refs, {3} files)" , |
335 | Vals&: Inputs.CompileCommand.Filename, Vals: Index.Symbols->size(), |
336 | Vals: Index.Refs->numRefs(), Vals: Index.Sources->size()); |
337 | SPAN_ATTACH(Tracer, "symbols" , int(Index.Symbols->size())); |
338 | SPAN_ATTACH(Tracer, "refs" , int(Index.Refs->numRefs())); |
339 | SPAN_ATTACH(Tracer, "sources" , int(Index.Sources->size())); |
340 | |
341 | bool HadErrors = Clang->hasDiagnostics() && |
342 | Clang->getDiagnostics().hasUncompilableErrorOccurred(); |
343 | if (HadErrors) { |
344 | log(Fmt: "Failed to compile {0}, index may be incomplete" , Vals&: AbsolutePath); |
345 | for (auto &It : *Index.Sources) |
346 | It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors; |
347 | } |
348 | update(MainFile: AbsolutePath, Index: std::move(Index), ShardVersionsSnapshot, HadErrors); |
349 | |
350 | Rebuilder.indexedTU(); |
351 | return llvm::Error::success(); |
352 | } |
353 | |
354 | // Restores shards for \p MainFiles from index storage. Then checks staleness of |
355 | // those shards and returns a list of TUs that needs to be indexed to update |
356 | // staleness. |
357 | std::vector<std::string> |
358 | BackgroundIndex::loadProject(std::vector<std::string> MainFiles) { |
359 | // Drop files where background indexing is disabled in config. |
360 | if (ContextProvider) |
361 | llvm::erase_if(C&: MainFiles, P: [&](const std::string &TU) { |
362 | // Load the config for each TU, as indexing may be selectively enabled. |
363 | WithContext WithProvidedContext(ContextProvider(TU)); |
364 | return Config::current().Index.Background == |
365 | Config::BackgroundPolicy::Skip; |
366 | }); |
367 | Rebuilder.startLoading(); |
368 | // Load shards for all of the mainfiles. |
369 | const std::vector<LoadedShard> Result = |
370 | loadIndexShards(MainFiles, IndexStorageFactory, CDB); |
371 | size_t LoadedShards = 0; |
372 | { |
373 | // Update in-memory state. |
374 | std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
375 | for (auto &LS : Result) { |
376 | if (!LS.Shard) |
377 | continue; |
378 | auto SS = |
379 | LS.Shard->Symbols |
380 | ? std::make_unique<SymbolSlab>(args: std::move(*LS.Shard->Symbols)) |
381 | : nullptr; |
382 | auto RS = LS.Shard->Refs |
383 | ? std::make_unique<RefSlab>(args: std::move(*LS.Shard->Refs)) |
384 | : nullptr; |
385 | auto RelS = |
386 | LS.Shard->Relations |
387 | ? std::make_unique<RelationSlab>(args: std::move(*LS.Shard->Relations)) |
388 | : nullptr; |
389 | ShardVersion &SV = ShardVersions[LS.AbsolutePath]; |
390 | SV.Digest = LS.Digest; |
391 | SV.HadErrors = LS.HadErrors; |
392 | ++LoadedShards; |
393 | |
394 | IndexedSymbols.update(Key: URI::create(AbsolutePath: LS.AbsolutePath).toString(), |
395 | Symbols: std::move(SS), Refs: std::move(RS), Relations: std::move(RelS), |
396 | CountReferences: LS.CountReferences); |
397 | } |
398 | } |
399 | Rebuilder.loadedShard(ShardCount: LoadedShards); |
400 | Rebuilder.doneLoading(); |
401 | |
402 | auto FS = TFS.view(/*CWD=*/std::nullopt); |
403 | llvm::DenseSet<PathRef> TUsToIndex; |
404 | // We'll accept data from stale shards, but ensure the files get reindexed |
405 | // soon. |
406 | for (auto &LS : Result) { |
407 | if (!shardIsStale(LS, FS: FS.get())) |
408 | continue; |
409 | PathRef TUForFile = LS.DependentTU; |
410 | assert(!TUForFile.empty() && "File without a TU!" ); |
411 | |
412 | // FIXME: Currently, we simply schedule indexing on a TU whenever any of |
413 | // its dependencies needs re-indexing. We might do it smarter by figuring |
414 | // out a minimal set of TUs that will cover all the stale dependencies. |
415 | // FIXME: Try looking at other TUs if no compile commands are available |
416 | // for this TU, i.e TU was deleted after we performed indexing. |
417 | TUsToIndex.insert(V: TUForFile); |
418 | } |
419 | |
420 | return {TUsToIndex.begin(), TUsToIndex.end()}; |
421 | } |
422 | |
423 | void BackgroundIndex::profile(MemoryTree &MT) const { |
424 | IndexedSymbols.profile(MT&: MT.child(Name: "slabs" )); |
425 | // We don't want to mix memory used by index and symbols, so call base class. |
426 | MT.child(Name: "index" ).addUsage(Increment: SwapIndex::estimateMemoryUsage()); |
427 | } |
428 | } // namespace clangd |
429 | } // namespace clang |
430 | |