1//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
10#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
11
12#include "clang/Basic/LLVM.h"
13#include "clang/Lex/DependencyDirectivesScanner.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/StringMap.h"
16#include "llvm/Support/Allocator.h"
17#include "llvm/Support/ErrorOr.h"
18#include "llvm/Support/VirtualFileSystem.h"
19#include <mutex>
20#include <optional>
21
22namespace clang {
23namespace tooling {
24namespace dependencies {
25
26using DependencyDirectivesTy =
27 SmallVector<dependency_directives_scan::Directive, 20>;
28
29/// Contents and directive tokens of a cached file entry. Single instance can
30/// be shared between multiple entries.
31struct CachedFileContents {
32 CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
33 : Original(std::move(Contents)), DepDirectives(nullptr) {}
34
35 /// Owning storage for the original contents.
36 std::unique_ptr<llvm::MemoryBuffer> Original;
37
38 /// The mutex that must be locked before mutating directive tokens.
39 std::mutex ValueLock;
40 SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
41 /// Accessor to the directive tokens that's atomic to avoid data races.
42 /// \p CachedFileContents has ownership of the pointer.
43 std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
44
45 ~CachedFileContents() { delete DepDirectives.load(); }
46};
47
48/// An in-memory representation of a file system entity that is of interest to
49/// the dependency scanning filesystem.
50///
51/// It represents one of the following:
52/// - opened file with contents and a stat value,
53/// - opened file with contents, directive tokens and a stat value,
54/// - directory entry with its stat value,
55/// - filesystem error.
56///
57/// Single instance of this class can be shared across different filenames (e.g.
58/// a regular file and a symlink). For this reason the status filename is empty
59/// and is only materialized by \c EntryRef that knows the requested filename.
60class CachedFileSystemEntry {
61public:
62 /// Creates an entry without contents: either a filesystem error or
63 /// a directory with stat value.
64 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
65 : MaybeStat(std::move(Stat)), Contents(nullptr) {
66 clearStatName();
67 }
68
69 /// Creates an entry representing a file with contents.
70 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
71 CachedFileContents *Contents)
72 : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
73 clearStatName();
74 }
75
76 /// \returns True if the entry is a filesystem error.
77 bool isError() const { return !MaybeStat; }
78
79 /// \returns True if the current entry represents a directory.
80 bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
81
82 /// \returns Original contents of the file.
83 StringRef getOriginalContents() const {
84 assert(!isError() && "error");
85 assert(!MaybeStat->isDirectory() && "not a file");
86 assert(Contents && "contents not initialized");
87 return Contents->Original->getBuffer();
88 }
89
90 /// \returns The scanned preprocessor directive tokens of the file that are
91 /// used to speed up preprocessing, if available.
92 std::optional<ArrayRef<dependency_directives_scan::Directive>>
93 getDirectiveTokens() const {
94 assert(!isError() && "error");
95 assert(!isDirectory() && "not a file");
96 assert(Contents && "contents not initialized");
97 if (auto *Directives = Contents->DepDirectives.load()) {
98 if (Directives->has_value())
99 return ArrayRef<dependency_directives_scan::Directive>(**Directives);
100 }
101 return std::nullopt;
102 }
103
104 /// \returns The error.
105 std::error_code getError() const { return MaybeStat.getError(); }
106
107 /// \returns The entry status with empty filename.
108 llvm::vfs::Status getStatus() const {
109 assert(!isError() && "error");
110 assert(MaybeStat->getName().empty() && "stat name must be empty");
111 return *MaybeStat;
112 }
113
114 /// \returns The unique ID of the entry.
115 llvm::sys::fs::UniqueID getUniqueID() const {
116 assert(!isError() && "error");
117 return MaybeStat->getUniqueID();
118 }
119
120 /// \returns The data structure holding both contents and directive tokens.
121 CachedFileContents *getCachedContents() const {
122 assert(!isError() && "error");
123 assert(!isDirectory() && "not a file");
124 return Contents;
125 }
126
127private:
128 void clearStatName() {
129 if (MaybeStat)
130 MaybeStat = llvm::vfs::Status::copyWithNewName(In: *MaybeStat, NewName: "");
131 }
132
133 /// Either the filesystem error or status of the entry.
134 /// The filename is empty and only materialized by \c EntryRef.
135 llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
136
137 /// Non-owning pointer to the file contents.
138 ///
139 /// We're using pointer here to keep the size of this class small. Instances
140 /// representing directories and filesystem errors don't hold any contents
141 /// anyway.
142 CachedFileContents *Contents;
143};
144
145using CachedRealPath = llvm::ErrorOr<std::string>;
146
147/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
148/// underlying real file system, and the scanned preprocessor directives of
149/// files.
150///
151/// It is sharded based on the hash of the key to reduce the lock contention for
152/// the worker threads.
153class DependencyScanningFilesystemSharedCache {
154public:
155 struct CacheShard {
156 /// The mutex that needs to be locked before mutation of any member.
157 mutable std::mutex CacheLock;
158
159 /// Map from filenames to cached entries and real paths.
160 llvm::StringMap<
161 std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
162 llvm::BumpPtrAllocator>
163 CacheByFilename;
164
165 /// Map from unique IDs to cached entries.
166 llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
167 EntriesByUID;
168
169 /// The backing storage for cached entries.
170 llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
171
172 /// The backing storage for cached contents.
173 llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
174
175 /// The backing storage for cached real paths.
176 llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
177
178 /// Returns entry associated with the filename or nullptr if none is found.
179 const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
180
181 /// Returns entry associated with the unique ID or nullptr if none is found.
182 const CachedFileSystemEntry *
183 findEntryByUID(llvm::sys::fs::UniqueID UID) const;
184
185 /// Returns entry associated with the filename if there is some. Otherwise,
186 /// constructs new one with the given status, associates it with the
187 /// filename and returns the result.
188 const CachedFileSystemEntry &
189 getOrEmplaceEntryForFilename(StringRef Filename,
190 llvm::ErrorOr<llvm::vfs::Status> Stat);
191
192 /// Returns entry associated with the unique ID if there is some. Otherwise,
193 /// constructs new one with the given status and contents, associates it
194 /// with the unique ID and returns the result.
195 const CachedFileSystemEntry &
196 getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
197 std::unique_ptr<llvm::MemoryBuffer> Contents);
198
199 /// Returns entry associated with the filename if there is some. Otherwise,
200 /// associates the given entry with the filename and returns it.
201 const CachedFileSystemEntry &
202 getOrInsertEntryForFilename(StringRef Filename,
203 const CachedFileSystemEntry &Entry);
204
205 /// Returns the real path associated with the filename or nullptr if none is
206 /// found.
207 const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
208
209 /// Returns the real path associated with the filename if there is some.
210 /// Otherwise, constructs new one with the given one, associates it with the
211 /// filename and returns the result.
212 const CachedRealPath &
213 getOrEmplaceRealPathForFilename(StringRef Filename,
214 llvm::ErrorOr<StringRef> RealPath);
215 };
216
217 DependencyScanningFilesystemSharedCache();
218
219 /// Returns shard for the given key.
220 CacheShard &getShardForFilename(StringRef Filename) const;
221 CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
222
223private:
224 std::unique_ptr<CacheShard[]> CacheShards;
225 unsigned NumShards;
226};
227
228/// This class is a local cache, that caches the 'stat' and 'open' calls to the
229/// underlying real file system.
230class DependencyScanningFilesystemLocalCache {
231 llvm::StringMap<
232 std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
233 llvm::BumpPtrAllocator>
234 Cache;
235
236public:
237 /// Returns entry associated with the filename or nullptr if none is found.
238 const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
239 assert(llvm::sys::path::is_absolute_gnu(Filename));
240 auto It = Cache.find(Key: Filename);
241 return It == Cache.end() ? nullptr : It->getValue().first;
242 }
243
244 /// Associates the given entry with the filename and returns the given entry
245 /// pointer (for convenience).
246 const CachedFileSystemEntry &
247 insertEntryForFilename(StringRef Filename,
248 const CachedFileSystemEntry &Entry) {
249 assert(llvm::sys::path::is_absolute_gnu(Filename));
250 auto [It, Inserted] = Cache.insert(KV: {Filename, {&Entry, nullptr}});
251 auto &[CachedEntry, CachedRealPath] = It->getValue();
252 if (!Inserted) {
253 // The file is already present in the local cache. If we got here, it only
254 // contains the real path. Let's make sure the entry is populated too.
255 assert((!CachedEntry && CachedRealPath) && "entry already present");
256 CachedEntry = &Entry;
257 }
258 return *CachedEntry;
259 }
260
261 /// Returns real path associated with the filename or nullptr if none is
262 /// found.
263 const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
264 assert(llvm::sys::path::is_absolute_gnu(Filename));
265 auto It = Cache.find(Key: Filename);
266 return It == Cache.end() ? nullptr : It->getValue().second;
267 }
268
269 /// Associates the given real path with the filename and returns the given
270 /// entry pointer (for convenience).
271 const CachedRealPath &
272 insertRealPathForFilename(StringRef Filename,
273 const CachedRealPath &RealPath) {
274 assert(llvm::sys::path::is_absolute_gnu(Filename));
275 auto [It, Inserted] = Cache.insert(KV: {Filename, {nullptr, &RealPath}});
276 auto &[CachedEntry, CachedRealPath] = It->getValue();
277 if (!Inserted) {
278 // The file is already present in the local cache. If we got here, it only
279 // contains the entry. Let's make sure the real path is populated too.
280 assert((!CachedRealPath && CachedEntry) && "real path already present");
281 CachedRealPath = &RealPath;
282 }
283 return *CachedRealPath;
284 }
285};
286
287/// Reference to a CachedFileSystemEntry.
288/// If the underlying entry is an opened file, this wrapper returns the file
289/// contents and the scanned preprocessor directives.
290class EntryRef {
291 /// The filename used to access this entry.
292 std::string Filename;
293
294 /// The underlying cached entry.
295 const CachedFileSystemEntry &Entry;
296
297 friend class DependencyScanningWorkerFilesystem;
298
299public:
300 EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
301 : Filename(Name), Entry(Entry) {}
302
303 llvm::vfs::Status getStatus() const {
304 llvm::vfs::Status Stat = Entry.getStatus();
305 if (!Stat.isDirectory())
306 Stat = llvm::vfs::Status::copyWithNewSize(In: Stat, NewSize: getContents().size());
307 return llvm::vfs::Status::copyWithNewName(In: Stat, NewName: Filename);
308 }
309
310 bool isError() const { return Entry.isError(); }
311 bool isDirectory() const { return Entry.isDirectory(); }
312
313 /// If the cached entry represents an error, promotes it into `ErrorOr`.
314 llvm::ErrorOr<EntryRef> unwrapError() const {
315 if (isError())
316 return Entry.getError();
317 return *this;
318 }
319
320 StringRef getContents() const { return Entry.getOriginalContents(); }
321
322 std::optional<ArrayRef<dependency_directives_scan::Directive>>
323 getDirectiveTokens() const {
324 return Entry.getDirectiveTokens();
325 }
326};
327
328/// A virtual file system optimized for the dependency discovery.
329///
330/// It is primarily designed to work with source files whose contents was
331/// preprocessed to remove any tokens that are unlikely to affect the dependency
332/// computation.
333///
334/// This is not a thread safe VFS. A single instance is meant to be used only in
335/// one thread. Multiple instances are allowed to service multiple threads
336/// running in parallel.
337class DependencyScanningWorkerFilesystem
338 : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
339 llvm::vfs::ProxyFileSystem> {
340public:
341 static const char ID;
342
343 DependencyScanningWorkerFilesystem(
344 DependencyScanningFilesystemSharedCache &SharedCache,
345 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
346
347 llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
348 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
349 openFileForRead(const Twine &Path) override;
350
351 std::error_code getRealPath(const Twine &Path,
352 SmallVectorImpl<char> &Output) override;
353
354 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
355
356 /// Returns entry for the given filename.
357 ///
358 /// Attempts to use the local and shared caches first, then falls back to
359 /// using the underlying filesystem.
360 llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
361
362 /// Ensure the directive tokens are populated for this file entry.
363 ///
364 /// Returns true if the directive tokens are populated for this file entry,
365 /// false if not (i.e. this entry is not a file or its scan fails).
366 bool ensureDirectiveTokensArePopulated(EntryRef Entry);
367
368 /// Check whether \p Path exists. By default checks cached result of \c
369 /// status(), and falls back on FS if unable to do so.
370 bool exists(const Twine &Path) override;
371
372private:
373 /// For a filename that's not yet associated with any entry in the caches,
374 /// uses the underlying filesystem to either look up the entry based in the
375 /// shared cache indexed by unique ID, or creates new entry from scratch.
376 /// \p FilenameForLookup will always be an absolute path, and different than
377 /// \p OriginalFilename if \p OriginalFilename is relative.
378 llvm::ErrorOr<const CachedFileSystemEntry &>
379 computeAndStoreResult(StringRef OriginalFilename,
380 StringRef FilenameForLookup);
381
382 /// Represents a filesystem entry that has been stat-ed (and potentially read)
383 /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
384 struct TentativeEntry {
385 llvm::vfs::Status Status;
386 std::unique_ptr<llvm::MemoryBuffer> Contents;
387
388 TentativeEntry(llvm::vfs::Status Status,
389 std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
390 : Status(std::move(Status)), Contents(std::move(Contents)) {}
391 };
392
393 /// Reads file at the given path. Enforces consistency between the file size
394 /// in status and size of read contents.
395 llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
396
397 /// Returns entry associated with the unique ID of the given tentative entry
398 /// if there is some in the shared cache. Otherwise, constructs new one,
399 /// associates it with the unique ID and returns the result.
400 const CachedFileSystemEntry &
401 getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
402
403 /// Returns entry associated with the filename or nullptr if none is found.
404 ///
405 /// Returns entry from local cache if there is some. Otherwise, if the entry
406 /// is found in the shared cache, writes it through the local cache and
407 /// returns it. Otherwise returns nullptr.
408 const CachedFileSystemEntry *
409 findEntryByFilenameWithWriteThrough(StringRef Filename);
410
411 /// Returns entry associated with the unique ID in the shared cache or nullptr
412 /// if none is found.
413 const CachedFileSystemEntry *
414 findSharedEntryByUID(llvm::vfs::Status Stat) const {
415 return SharedCache.getShardForUID(UID: Stat.getUniqueID())
416 .findEntryByUID(UID: Stat.getUniqueID());
417 }
418
419 /// Associates the given entry with the filename in the local cache and
420 /// returns it.
421 const CachedFileSystemEntry &
422 insertLocalEntryForFilename(StringRef Filename,
423 const CachedFileSystemEntry &Entry) {
424 return LocalCache.insertEntryForFilename(Filename, Entry);
425 }
426
427 /// Returns entry associated with the filename in the shared cache if there is
428 /// some. Otherwise, constructs new one with the given error code, associates
429 /// it with the filename and returns the result.
430 const CachedFileSystemEntry &
431 getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
432 return SharedCache.getShardForFilename(Filename)
433 .getOrEmplaceEntryForFilename(Filename, Stat: EC);
434 }
435
436 /// Returns entry associated with the filename in the shared cache if there is
437 /// some. Otherwise, associates the given entry with the filename and returns
438 /// it.
439 const CachedFileSystemEntry &
440 getOrInsertSharedEntryForFilename(StringRef Filename,
441 const CachedFileSystemEntry &Entry) {
442 return SharedCache.getShardForFilename(Filename)
443 .getOrInsertEntryForFilename(Filename, Entry);
444 }
445
446 void printImpl(raw_ostream &OS, PrintType Type,
447 unsigned IndentLevel) const override {
448 printIndent(OS, IndentLevel);
449 OS << "DependencyScanningFilesystem\n";
450 getUnderlyingFS().print(OS, Type, IndentLevel: IndentLevel + 1);
451 }
452
453 /// The global cache shared between worker threads.
454 DependencyScanningFilesystemSharedCache &SharedCache;
455 /// The local cache is used by the worker thread to cache file system queries
456 /// locally instead of querying the global cache every time.
457 DependencyScanningFilesystemLocalCache LocalCache;
458
459 /// The working directory to use for making relative paths absolute before
460 /// using them for cache lookups.
461 llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
462
463 void updateWorkingDirForCacheLookup();
464
465 llvm::ErrorOr<StringRef>
466 tryGetFilenameForLookup(StringRef OriginalFilename,
467 llvm::SmallVectorImpl<char> &PathBuf) const;
468};
469
470} // end namespace dependencies
471} // end namespace tooling
472} // end namespace clang
473
474#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
475

source code of clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h