1 | //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" |
10 | #include "llvm/Support/MemoryBuffer.h" |
11 | #include "llvm/Support/SmallVectorMemoryBuffer.h" |
12 | #include "llvm/Support/Threading.h" |
13 | #include <optional> |
14 | |
15 | using namespace clang; |
16 | using namespace tooling; |
17 | using namespace dependencies; |
18 | |
19 | llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry> |
20 | DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { |
21 | // Load the file and its content from the file system. |
22 | auto MaybeFile = getUnderlyingFS().openFileForRead(Path: Filename); |
23 | if (!MaybeFile) |
24 | return MaybeFile.getError(); |
25 | auto File = std::move(*MaybeFile); |
26 | |
27 | auto MaybeStat = File->status(); |
28 | if (!MaybeStat) |
29 | return MaybeStat.getError(); |
30 | auto Stat = std::move(*MaybeStat); |
31 | |
32 | auto MaybeBuffer = File->getBuffer(Name: Stat.getName()); |
33 | if (!MaybeBuffer) |
34 | return MaybeBuffer.getError(); |
35 | auto Buffer = std::move(*MaybeBuffer); |
36 | |
37 | // If the file size changed between read and stat, pretend it didn't. |
38 | if (Stat.getSize() != Buffer->getBufferSize()) |
39 | Stat = llvm::vfs::Status::copyWithNewSize(In: Stat, NewSize: Buffer->getBufferSize()); |
40 | |
41 | return TentativeEntry(Stat, std::move(Buffer)); |
42 | } |
43 | |
44 | bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated( |
45 | EntryRef Ref) { |
46 | auto &Entry = Ref.Entry; |
47 | |
48 | if (Entry.isError() || Entry.isDirectory()) |
49 | return false; |
50 | |
51 | CachedFileContents *Contents = Entry.getCachedContents(); |
52 | assert(Contents && "contents not initialized" ); |
53 | |
54 | // Double-checked locking. |
55 | if (Contents->DepDirectives.load()) |
56 | return true; |
57 | |
58 | std::lock_guard<std::mutex> GuardLock(Contents->ValueLock); |
59 | |
60 | // Double-checked locking. |
61 | if (Contents->DepDirectives.load()) |
62 | return true; |
63 | |
64 | SmallVector<dependency_directives_scan::Directive, 64> Directives; |
65 | // Scan the file for preprocessor directives that might affect the |
66 | // dependencies. |
67 | if (scanSourceForDependencyDirectives(Input: Contents->Original->getBuffer(), |
68 | Tokens&: Contents->DepDirectiveTokens, |
69 | Directives)) { |
70 | Contents->DepDirectiveTokens.clear(); |
71 | // FIXME: Propagate the diagnostic if desired by the client. |
72 | Contents->DepDirectives.store(p: new std::optional<DependencyDirectivesTy>()); |
73 | return false; |
74 | } |
75 | |
76 | // This function performed double-checked locking using `DepDirectives`. |
77 | // Assigning it must be the last thing this function does, otherwise other |
78 | // threads may skip the critical section (`DepDirectives != nullptr`), leading |
79 | // to a data race. |
80 | Contents->DepDirectives.store( |
81 | p: new std::optional<DependencyDirectivesTy>(std::move(Directives))); |
82 | return true; |
83 | } |
84 | |
85 | DependencyScanningFilesystemSharedCache:: |
86 | DependencyScanningFilesystemSharedCache() { |
87 | // This heuristic was chosen using a empirical testing on a |
88 | // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache |
89 | // sharding gives a performance edge by reducing the lock contention. |
90 | // FIXME: A better heuristic might also consider the OS to account for |
91 | // the different cost of lock contention on different OSes. |
92 | NumShards = |
93 | std::max(a: 2u, b: llvm::hardware_concurrency().compute_thread_count() / 4); |
94 | CacheShards = std::make_unique<CacheShard[]>(num: NumShards); |
95 | } |
96 | |
97 | DependencyScanningFilesystemSharedCache::CacheShard & |
98 | DependencyScanningFilesystemSharedCache::getShardForFilename( |
99 | StringRef Filename) const { |
100 | assert(llvm::sys::path::is_absolute_gnu(Filename)); |
101 | return CacheShards[llvm::hash_value(S: Filename) % NumShards]; |
102 | } |
103 | |
104 | DependencyScanningFilesystemSharedCache::CacheShard & |
105 | DependencyScanningFilesystemSharedCache::getShardForUID( |
106 | llvm::sys::fs::UniqueID UID) const { |
107 | auto Hash = llvm::hash_combine(args: UID.getDevice(), args: UID.getFile()); |
108 | return CacheShards[Hash % NumShards]; |
109 | } |
110 | |
111 | const CachedFileSystemEntry * |
112 | DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( |
113 | StringRef Filename) const { |
114 | assert(llvm::sys::path::is_absolute_gnu(Filename)); |
115 | std::lock_guard<std::mutex> LockGuard(CacheLock); |
116 | auto It = CacheByFilename.find(Key: Filename); |
117 | return It == CacheByFilename.end() ? nullptr : It->getValue().first; |
118 | } |
119 | |
120 | const CachedFileSystemEntry * |
121 | DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( |
122 | llvm::sys::fs::UniqueID UID) const { |
123 | std::lock_guard<std::mutex> LockGuard(CacheLock); |
124 | auto It = EntriesByUID.find(Val: UID); |
125 | return It == EntriesByUID.end() ? nullptr : It->getSecond(); |
126 | } |
127 | |
128 | const CachedFileSystemEntry & |
129 | DependencyScanningFilesystemSharedCache::CacheShard:: |
130 | getOrEmplaceEntryForFilename(StringRef Filename, |
131 | llvm::ErrorOr<llvm::vfs::Status> Stat) { |
132 | std::lock_guard<std::mutex> LockGuard(CacheLock); |
133 | auto [It, Inserted] = CacheByFilename.insert(KV: {Filename, {nullptr, nullptr}}); |
134 | auto &[CachedEntry, CachedRealPath] = It->getValue(); |
135 | if (!CachedEntry) { |
136 | // The entry is not present in the shared cache. Either the cache doesn't |
137 | // know about the file at all, or it only knows about its real path. |
138 | assert((Inserted || CachedRealPath) && "existing file with empty pair" ); |
139 | CachedEntry = |
140 | new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); |
141 | } |
142 | return *CachedEntry; |
143 | } |
144 | |
145 | const CachedFileSystemEntry & |
146 | DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( |
147 | llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, |
148 | std::unique_ptr<llvm::MemoryBuffer> Contents) { |
149 | std::lock_guard<std::mutex> LockGuard(CacheLock); |
150 | auto [It, Inserted] = EntriesByUID.insert(KV: {UID, nullptr}); |
151 | auto &CachedEntry = It->getSecond(); |
152 | if (Inserted) { |
153 | CachedFileContents *StoredContents = nullptr; |
154 | if (Contents) |
155 | StoredContents = new (ContentsStorage.Allocate()) |
156 | CachedFileContents(std::move(Contents)); |
157 | CachedEntry = new (EntryStorage.Allocate()) |
158 | CachedFileSystemEntry(std::move(Stat), StoredContents); |
159 | } |
160 | return *CachedEntry; |
161 | } |
162 | |
163 | const CachedFileSystemEntry & |
164 | DependencyScanningFilesystemSharedCache::CacheShard:: |
165 | getOrInsertEntryForFilename(StringRef Filename, |
166 | const CachedFileSystemEntry &Entry) { |
167 | std::lock_guard<std::mutex> LockGuard(CacheLock); |
168 | auto [It, Inserted] = CacheByFilename.insert(KV: {Filename, {&Entry, nullptr}}); |
169 | auto &[CachedEntry, CachedRealPath] = It->getValue(); |
170 | if (!Inserted || !CachedEntry) |
171 | CachedEntry = &Entry; |
172 | return *CachedEntry; |
173 | } |
174 | |
175 | const CachedRealPath * |
176 | DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename( |
177 | StringRef Filename) const { |
178 | assert(llvm::sys::path::is_absolute_gnu(Filename)); |
179 | std::lock_guard<std::mutex> LockGuard(CacheLock); |
180 | auto It = CacheByFilename.find(Key: Filename); |
181 | return It == CacheByFilename.end() ? nullptr : It->getValue().second; |
182 | } |
183 | |
184 | const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: |
185 | getOrEmplaceRealPathForFilename(StringRef Filename, |
186 | llvm::ErrorOr<llvm::StringRef> RealPath) { |
187 | std::lock_guard<std::mutex> LockGuard(CacheLock); |
188 | |
189 | const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second; |
190 | if (!StoredRealPath) { |
191 | auto OwnedRealPath = [&]() -> CachedRealPath { |
192 | if (!RealPath) |
193 | return RealPath.getError(); |
194 | return RealPath->str(); |
195 | }(); |
196 | |
197 | StoredRealPath = new (RealPathStorage.Allocate()) |
198 | CachedRealPath(std::move(OwnedRealPath)); |
199 | } |
200 | |
201 | return *StoredRealPath; |
202 | } |
203 | |
204 | static bool shouldCacheStatFailures(StringRef Filename) { |
205 | StringRef Ext = llvm::sys::path::extension(path: Filename); |
206 | if (Ext.empty()) |
207 | return false; // This may be the module cache directory. |
208 | return true; |
209 | } |
210 | |
211 | DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( |
212 | DependencyScanningFilesystemSharedCache &SharedCache, |
213 | IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) |
214 | : llvm::RTTIExtends<DependencyScanningWorkerFilesystem, |
215 | llvm::vfs::ProxyFileSystem>(std::move(FS)), |
216 | SharedCache(SharedCache), |
217 | WorkingDirForCacheLookup(llvm::errc::invalid_argument) { |
218 | updateWorkingDirForCacheLookup(); |
219 | } |
220 | |
221 | const CachedFileSystemEntry & |
222 | DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( |
223 | TentativeEntry TEntry) { |
224 | auto &Shard = SharedCache.getShardForUID(UID: TEntry.Status.getUniqueID()); |
225 | return Shard.getOrEmplaceEntryForUID(UID: TEntry.Status.getUniqueID(), |
226 | Stat: std::move(TEntry.Status), |
227 | Contents: std::move(TEntry.Contents)); |
228 | } |
229 | |
230 | const CachedFileSystemEntry * |
231 | DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( |
232 | StringRef Filename) { |
233 | if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) |
234 | return Entry; |
235 | auto &Shard = SharedCache.getShardForFilename(Filename); |
236 | if (const auto *Entry = Shard.findEntryByFilename(Filename)) |
237 | return &LocalCache.insertEntryForFilename(Filename, Entry: *Entry); |
238 | return nullptr; |
239 | } |
240 | |
241 | llvm::ErrorOr<const CachedFileSystemEntry &> |
242 | DependencyScanningWorkerFilesystem::computeAndStoreResult( |
243 | StringRef OriginalFilename, StringRef FilenameForLookup) { |
244 | llvm::ErrorOr<llvm::vfs::Status> Stat = |
245 | getUnderlyingFS().status(Path: OriginalFilename); |
246 | if (!Stat) { |
247 | if (!shouldCacheStatFailures(Filename: OriginalFilename)) |
248 | return Stat.getError(); |
249 | const auto &Entry = |
250 | getOrEmplaceSharedEntryForFilename(Filename: FilenameForLookup, EC: Stat.getError()); |
251 | return insertLocalEntryForFilename(Filename: FilenameForLookup, Entry); |
252 | } |
253 | |
254 | if (const auto *Entry = findSharedEntryByUID(Stat: *Stat)) |
255 | return insertLocalEntryForFilename(Filename: FilenameForLookup, Entry: *Entry); |
256 | |
257 | auto TEntry = |
258 | Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename: OriginalFilename); |
259 | |
260 | const CachedFileSystemEntry *SharedEntry = [&]() { |
261 | if (TEntry) { |
262 | const auto &UIDEntry = getOrEmplaceSharedEntryForUID(TEntry: std::move(*TEntry)); |
263 | return &getOrInsertSharedEntryForFilename(Filename: FilenameForLookup, Entry: UIDEntry); |
264 | } |
265 | return &getOrEmplaceSharedEntryForFilename(Filename: FilenameForLookup, |
266 | EC: TEntry.getError()); |
267 | }(); |
268 | |
269 | return insertLocalEntryForFilename(Filename: FilenameForLookup, Entry: *SharedEntry); |
270 | } |
271 | |
272 | llvm::ErrorOr<EntryRef> |
273 | DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( |
274 | StringRef OriginalFilename) { |
275 | SmallString<256> PathBuf; |
276 | auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); |
277 | if (!FilenameForLookup) |
278 | return FilenameForLookup.getError(); |
279 | |
280 | if (const auto *Entry = |
281 | findEntryByFilenameWithWriteThrough(Filename: *FilenameForLookup)) |
282 | return EntryRef(OriginalFilename, *Entry).unwrapError(); |
283 | auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup: *FilenameForLookup); |
284 | if (!MaybeEntry) |
285 | return MaybeEntry.getError(); |
286 | return EntryRef(OriginalFilename, *MaybeEntry).unwrapError(); |
287 | } |
288 | |
289 | llvm::ErrorOr<llvm::vfs::Status> |
290 | DependencyScanningWorkerFilesystem::status(const Twine &Path) { |
291 | SmallString<256> OwnedFilename; |
292 | StringRef Filename = Path.toStringRef(Out&: OwnedFilename); |
293 | |
294 | if (Filename.ends_with(Suffix: ".pcm" )) |
295 | return getUnderlyingFS().status(Path); |
296 | |
297 | llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(OriginalFilename: Filename); |
298 | if (!Result) |
299 | return Result.getError(); |
300 | return Result->getStatus(); |
301 | } |
302 | |
303 | bool DependencyScanningWorkerFilesystem::exists(const Twine &Path) { |
304 | // While some VFS overlay filesystems may implement more-efficient |
305 | // mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem` |
306 | // typically wraps `RealFileSystem` which does not specialize `exists`, |
307 | // so it is not likely to benefit from such optimizations. Instead, |
308 | // it is more-valuable to have this query go through the |
309 | // cached-`status` code-path of the `DependencyScanningWorkerFilesystem`. |
310 | llvm::ErrorOr<llvm::vfs::Status> Status = status(Path); |
311 | return Status && Status->exists(); |
312 | } |
313 | |
314 | namespace { |
315 | |
316 | /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using |
317 | /// this subclass. |
318 | class DepScanFile final : public llvm::vfs::File { |
319 | public: |
320 | DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, |
321 | llvm::vfs::Status Stat) |
322 | : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} |
323 | |
324 | static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry); |
325 | |
326 | llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } |
327 | |
328 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> |
329 | getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, |
330 | bool IsVolatile) override { |
331 | return std::move(Buffer); |
332 | } |
333 | |
334 | std::error_code close() override { return {}; } |
335 | |
336 | private: |
337 | std::unique_ptr<llvm::MemoryBuffer> Buffer; |
338 | llvm::vfs::Status Stat; |
339 | }; |
340 | |
341 | } // end anonymous namespace |
342 | |
343 | llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> |
344 | DepScanFile::create(EntryRef Entry) { |
345 | assert(!Entry.isError() && "error" ); |
346 | |
347 | if (Entry.isDirectory()) |
348 | return std::make_error_code(e: std::errc::is_a_directory); |
349 | |
350 | auto Result = std::make_unique<DepScanFile>( |
351 | args: llvm::MemoryBuffer::getMemBuffer(InputData: Entry.getContents(), |
352 | BufferName: Entry.getStatus().getName(), |
353 | /*RequiresNullTerminator=*/false), |
354 | args: Entry.getStatus()); |
355 | |
356 | return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( |
357 | std::unique_ptr<llvm::vfs::File>(std::move(Result))); |
358 | } |
359 | |
360 | llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> |
361 | DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { |
362 | SmallString<256> OwnedFilename; |
363 | StringRef Filename = Path.toStringRef(Out&: OwnedFilename); |
364 | |
365 | if (Filename.ends_with(Suffix: ".pcm" )) |
366 | return getUnderlyingFS().openFileForRead(Path); |
367 | |
368 | llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(OriginalFilename: Filename); |
369 | if (!Result) |
370 | return Result.getError(); |
371 | return DepScanFile::create(Entry: Result.get()); |
372 | } |
373 | |
374 | std::error_code |
375 | DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path, |
376 | SmallVectorImpl<char> &Output) { |
377 | SmallString<256> OwnedFilename; |
378 | StringRef OriginalFilename = Path.toStringRef(Out&: OwnedFilename); |
379 | |
380 | SmallString<256> PathBuf; |
381 | auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); |
382 | if (!FilenameForLookup) |
383 | return FilenameForLookup.getError(); |
384 | |
385 | auto HandleCachedRealPath = |
386 | [&Output](const CachedRealPath &RealPath) -> std::error_code { |
387 | if (!RealPath) |
388 | return RealPath.getError(); |
389 | Output.assign(in_start: RealPath->begin(), in_end: RealPath->end()); |
390 | return {}; |
391 | }; |
392 | |
393 | // If we already have the result in local cache, no work required. |
394 | if (const auto *RealPath = |
395 | LocalCache.findRealPathByFilename(Filename: *FilenameForLookup)) |
396 | return HandleCachedRealPath(*RealPath); |
397 | |
398 | // If we have the result in the shared cache, cache it locally. |
399 | auto &Shard = SharedCache.getShardForFilename(Filename: *FilenameForLookup); |
400 | if (const auto *ShardRealPath = |
401 | Shard.findRealPathByFilename(Filename: *FilenameForLookup)) { |
402 | const auto &RealPath = LocalCache.insertRealPathForFilename( |
403 | Filename: *FilenameForLookup, RealPath: *ShardRealPath); |
404 | return HandleCachedRealPath(RealPath); |
405 | } |
406 | |
407 | // If we don't know the real path, compute it... |
408 | std::error_code EC = getUnderlyingFS().getRealPath(Path: OriginalFilename, Output); |
409 | llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC; |
410 | if (!EC) |
411 | ComputedRealPath = StringRef{Output.data(), Output.size()}; |
412 | |
413 | // ...and try to write it into the shared cache. In case some other thread won |
414 | // this race and already wrote its own result there, just adopt it. Write |
415 | // whatever is in the shared cache into the local one. |
416 | const auto &RealPath = Shard.getOrEmplaceRealPathForFilename( |
417 | Filename: *FilenameForLookup, RealPath: ComputedRealPath); |
418 | return HandleCachedRealPath( |
419 | LocalCache.insertRealPathForFilename(Filename: *FilenameForLookup, RealPath)); |
420 | } |
421 | |
422 | std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory( |
423 | const Twine &Path) { |
424 | std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path); |
425 | updateWorkingDirForCacheLookup(); |
426 | return EC; |
427 | } |
428 | |
429 | void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() { |
430 | llvm::ErrorOr<std::string> CWD = |
431 | getUnderlyingFS().getCurrentWorkingDirectory(); |
432 | if (!CWD) { |
433 | WorkingDirForCacheLookup = CWD.getError(); |
434 | } else if (!llvm::sys::path::is_absolute_gnu(path: *CWD)) { |
435 | WorkingDirForCacheLookup = llvm::errc::invalid_argument; |
436 | } else { |
437 | WorkingDirForCacheLookup = *CWD; |
438 | } |
439 | assert(!WorkingDirForCacheLookup || |
440 | llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup)); |
441 | } |
442 | |
443 | llvm::ErrorOr<StringRef> |
444 | DependencyScanningWorkerFilesystem::tryGetFilenameForLookup( |
445 | StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const { |
446 | StringRef FilenameForLookup; |
447 | if (llvm::sys::path::is_absolute_gnu(path: OriginalFilename)) { |
448 | FilenameForLookup = OriginalFilename; |
449 | } else if (!WorkingDirForCacheLookup) { |
450 | return WorkingDirForCacheLookup.getError(); |
451 | } else { |
452 | StringRef RelFilename = OriginalFilename; |
453 | RelFilename.consume_front(Prefix: "./" ); |
454 | PathBuf.assign(in_start: WorkingDirForCacheLookup->begin(), |
455 | in_end: WorkingDirForCacheLookup->end()); |
456 | llvm::sys::path::append(path&: PathBuf, a: RelFilename); |
457 | FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()}; |
458 | } |
459 | assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup)); |
460 | return FilenameForLookup; |
461 | } |
462 | |
463 | const char DependencyScanningWorkerFilesystem::ID = 0; |
464 | |