1 | //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |
10 | #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |
11 | |
12 | #include "clang/Basic/LLVM.h" |
13 | #include "clang/Lex/DependencyDirectivesScanner.h" |
14 | #include "llvm/ADT/DenseMap.h" |
15 | #include "llvm/ADT/StringMap.h" |
16 | #include "llvm/Support/Allocator.h" |
17 | #include "llvm/Support/ErrorOr.h" |
18 | #include "llvm/Support/VirtualFileSystem.h" |
19 | #include <mutex> |
20 | #include <optional> |
21 | |
22 | namespace clang { |
23 | namespace tooling { |
24 | namespace dependencies { |
25 | |
26 | using DependencyDirectivesTy = |
27 | SmallVector<dependency_directives_scan::Directive, 20>; |
28 | |
29 | /// Contents and directive tokens of a cached file entry. Single instance can |
30 | /// be shared between multiple entries. |
31 | struct CachedFileContents { |
32 | CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents) |
33 | : Original(std::move(Contents)), DepDirectives(nullptr) {} |
34 | |
35 | /// Owning storage for the original contents. |
36 | std::unique_ptr<llvm::MemoryBuffer> Original; |
37 | |
38 | /// The mutex that must be locked before mutating directive tokens. |
39 | std::mutex ValueLock; |
40 | SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens; |
41 | /// Accessor to the directive tokens that's atomic to avoid data races. |
42 | /// \p CachedFileContents has ownership of the pointer. |
43 | std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives; |
44 | |
45 | ~CachedFileContents() { delete DepDirectives.load(); } |
46 | }; |
47 | |
48 | /// An in-memory representation of a file system entity that is of interest to |
49 | /// the dependency scanning filesystem. |
50 | /// |
51 | /// It represents one of the following: |
52 | /// - opened file with contents and a stat value, |
53 | /// - opened file with contents, directive tokens and a stat value, |
54 | /// - directory entry with its stat value, |
55 | /// - filesystem error. |
56 | /// |
57 | /// Single instance of this class can be shared across different filenames (e.g. |
58 | /// a regular file and a symlink). For this reason the status filename is empty |
59 | /// and is only materialized by \c EntryRef that knows the requested filename. |
60 | class CachedFileSystemEntry { |
61 | public: |
62 | /// Creates an entry without contents: either a filesystem error or |
63 | /// a directory with stat value. |
64 | CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat) |
65 | : MaybeStat(std::move(Stat)), Contents(nullptr) { |
66 | clearStatName(); |
67 | } |
68 | |
69 | /// Creates an entry representing a file with contents. |
70 | CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat, |
71 | CachedFileContents *Contents) |
72 | : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) { |
73 | clearStatName(); |
74 | } |
75 | |
76 | /// \returns True if the entry is a filesystem error. |
77 | bool isError() const { return !MaybeStat; } |
78 | |
79 | /// \returns True if the current entry represents a directory. |
80 | bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); } |
81 | |
82 | /// \returns Original contents of the file. |
83 | StringRef getOriginalContents() const { |
84 | assert(!isError() && "error" ); |
85 | assert(!MaybeStat->isDirectory() && "not a file" ); |
86 | assert(Contents && "contents not initialized" ); |
87 | return Contents->Original->getBuffer(); |
88 | } |
89 | |
90 | /// \returns The scanned preprocessor directive tokens of the file that are |
91 | /// used to speed up preprocessing, if available. |
92 | std::optional<ArrayRef<dependency_directives_scan::Directive>> |
93 | getDirectiveTokens() const { |
94 | assert(!isError() && "error" ); |
95 | assert(!isDirectory() && "not a file" ); |
96 | assert(Contents && "contents not initialized" ); |
97 | if (auto *Directives = Contents->DepDirectives.load()) { |
98 | if (Directives->has_value()) |
99 | return ArrayRef<dependency_directives_scan::Directive>(**Directives); |
100 | } |
101 | return std::nullopt; |
102 | } |
103 | |
104 | /// \returns The error. |
105 | std::error_code getError() const { return MaybeStat.getError(); } |
106 | |
107 | /// \returns The entry status with empty filename. |
108 | llvm::vfs::Status getStatus() const { |
109 | assert(!isError() && "error" ); |
110 | assert(MaybeStat->getName().empty() && "stat name must be empty" ); |
111 | return *MaybeStat; |
112 | } |
113 | |
114 | /// \returns The unique ID of the entry. |
115 | llvm::sys::fs::UniqueID getUniqueID() const { |
116 | assert(!isError() && "error" ); |
117 | return MaybeStat->getUniqueID(); |
118 | } |
119 | |
120 | /// \returns The data structure holding both contents and directive tokens. |
121 | CachedFileContents *getCachedContents() const { |
122 | assert(!isError() && "error" ); |
123 | assert(!isDirectory() && "not a file" ); |
124 | return Contents; |
125 | } |
126 | |
127 | private: |
128 | void clearStatName() { |
129 | if (MaybeStat) |
130 | MaybeStat = llvm::vfs::Status::copyWithNewName(In: *MaybeStat, NewName: "" ); |
131 | } |
132 | |
133 | /// Either the filesystem error or status of the entry. |
134 | /// The filename is empty and only materialized by \c EntryRef. |
135 | llvm::ErrorOr<llvm::vfs::Status> MaybeStat; |
136 | |
137 | /// Non-owning pointer to the file contents. |
138 | /// |
139 | /// We're using pointer here to keep the size of this class small. Instances |
140 | /// representing directories and filesystem errors don't hold any contents |
141 | /// anyway. |
142 | CachedFileContents *Contents; |
143 | }; |
144 | |
145 | using CachedRealPath = llvm::ErrorOr<std::string>; |
146 | |
147 | /// This class is a shared cache, that caches the 'stat' and 'open' calls to the |
148 | /// underlying real file system, and the scanned preprocessor directives of |
149 | /// files. |
150 | /// |
151 | /// It is sharded based on the hash of the key to reduce the lock contention for |
152 | /// the worker threads. |
153 | class DependencyScanningFilesystemSharedCache { |
154 | public: |
155 | struct CacheShard { |
156 | /// The mutex that needs to be locked before mutation of any member. |
157 | mutable std::mutex CacheLock; |
158 | |
159 | /// Map from filenames to cached entries and real paths. |
160 | llvm::StringMap< |
161 | std::pair<const CachedFileSystemEntry *, const CachedRealPath *>, |
162 | llvm::BumpPtrAllocator> |
163 | CacheByFilename; |
164 | |
165 | /// Map from unique IDs to cached entries. |
166 | llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *> |
167 | EntriesByUID; |
168 | |
169 | /// The backing storage for cached entries. |
170 | llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage; |
171 | |
172 | /// The backing storage for cached contents. |
173 | llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage; |
174 | |
175 | /// The backing storage for cached real paths. |
176 | llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage; |
177 | |
178 | /// Returns entry associated with the filename or nullptr if none is found. |
179 | const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const; |
180 | |
181 | /// Returns entry associated with the unique ID or nullptr if none is found. |
182 | const CachedFileSystemEntry * |
183 | findEntryByUID(llvm::sys::fs::UniqueID UID) const; |
184 | |
185 | /// Returns entry associated with the filename if there is some. Otherwise, |
186 | /// constructs new one with the given status, associates it with the |
187 | /// filename and returns the result. |
188 | const CachedFileSystemEntry & |
189 | getOrEmplaceEntryForFilename(StringRef Filename, |
190 | llvm::ErrorOr<llvm::vfs::Status> Stat); |
191 | |
192 | /// Returns entry associated with the unique ID if there is some. Otherwise, |
193 | /// constructs new one with the given status and contents, associates it |
194 | /// with the unique ID and returns the result. |
195 | const CachedFileSystemEntry & |
196 | getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, |
197 | std::unique_ptr<llvm::MemoryBuffer> Contents); |
198 | |
199 | /// Returns entry associated with the filename if there is some. Otherwise, |
200 | /// associates the given entry with the filename and returns it. |
201 | const CachedFileSystemEntry & |
202 | getOrInsertEntryForFilename(StringRef Filename, |
203 | const CachedFileSystemEntry &Entry); |
204 | |
205 | /// Returns the real path associated with the filename or nullptr if none is |
206 | /// found. |
207 | const CachedRealPath *findRealPathByFilename(StringRef Filename) const; |
208 | |
209 | /// Returns the real path associated with the filename if there is some. |
210 | /// Otherwise, constructs new one with the given one, associates it with the |
211 | /// filename and returns the result. |
212 | const CachedRealPath & |
213 | getOrEmplaceRealPathForFilename(StringRef Filename, |
214 | llvm::ErrorOr<StringRef> RealPath); |
215 | }; |
216 | |
217 | DependencyScanningFilesystemSharedCache(); |
218 | |
219 | /// Returns shard for the given key. |
220 | CacheShard &getShardForFilename(StringRef Filename) const; |
221 | CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const; |
222 | |
223 | private: |
224 | std::unique_ptr<CacheShard[]> CacheShards; |
225 | unsigned NumShards; |
226 | }; |
227 | |
228 | /// This class is a local cache, that caches the 'stat' and 'open' calls to the |
229 | /// underlying real file system. |
230 | class DependencyScanningFilesystemLocalCache { |
231 | llvm::StringMap< |
232 | std::pair<const CachedFileSystemEntry *, const CachedRealPath *>, |
233 | llvm::BumpPtrAllocator> |
234 | Cache; |
235 | |
236 | public: |
237 | /// Returns entry associated with the filename or nullptr if none is found. |
238 | const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const { |
239 | assert(llvm::sys::path::is_absolute_gnu(Filename)); |
240 | auto It = Cache.find(Key: Filename); |
241 | return It == Cache.end() ? nullptr : It->getValue().first; |
242 | } |
243 | |
244 | /// Associates the given entry with the filename and returns the given entry |
245 | /// pointer (for convenience). |
246 | const CachedFileSystemEntry & |
247 | insertEntryForFilename(StringRef Filename, |
248 | const CachedFileSystemEntry &Entry) { |
249 | assert(llvm::sys::path::is_absolute_gnu(Filename)); |
250 | auto [It, Inserted] = Cache.insert(KV: {Filename, {&Entry, nullptr}}); |
251 | auto &[CachedEntry, CachedRealPath] = It->getValue(); |
252 | if (!Inserted) { |
253 | // The file is already present in the local cache. If we got here, it only |
254 | // contains the real path. Let's make sure the entry is populated too. |
255 | assert((!CachedEntry && CachedRealPath) && "entry already present" ); |
256 | CachedEntry = &Entry; |
257 | } |
258 | return *CachedEntry; |
259 | } |
260 | |
261 | /// Returns real path associated with the filename or nullptr if none is |
262 | /// found. |
263 | const CachedRealPath *findRealPathByFilename(StringRef Filename) const { |
264 | assert(llvm::sys::path::is_absolute_gnu(Filename)); |
265 | auto It = Cache.find(Key: Filename); |
266 | return It == Cache.end() ? nullptr : It->getValue().second; |
267 | } |
268 | |
269 | /// Associates the given real path with the filename and returns the given |
270 | /// entry pointer (for convenience). |
271 | const CachedRealPath & |
272 | insertRealPathForFilename(StringRef Filename, |
273 | const CachedRealPath &RealPath) { |
274 | assert(llvm::sys::path::is_absolute_gnu(Filename)); |
275 | auto [It, Inserted] = Cache.insert(KV: {Filename, {nullptr, &RealPath}}); |
276 | auto &[CachedEntry, CachedRealPath] = It->getValue(); |
277 | if (!Inserted) { |
278 | // The file is already present in the local cache. If we got here, it only |
279 | // contains the entry. Let's make sure the real path is populated too. |
280 | assert((!CachedRealPath && CachedEntry) && "real path already present" ); |
281 | CachedRealPath = &RealPath; |
282 | } |
283 | return *CachedRealPath; |
284 | } |
285 | }; |
286 | |
287 | /// Reference to a CachedFileSystemEntry. |
288 | /// If the underlying entry is an opened file, this wrapper returns the file |
289 | /// contents and the scanned preprocessor directives. |
290 | class EntryRef { |
291 | /// The filename used to access this entry. |
292 | std::string Filename; |
293 | |
294 | /// The underlying cached entry. |
295 | const CachedFileSystemEntry &Entry; |
296 | |
297 | friend class DependencyScanningWorkerFilesystem; |
298 | |
299 | public: |
300 | EntryRef(StringRef Name, const CachedFileSystemEntry &Entry) |
301 | : Filename(Name), Entry(Entry) {} |
302 | |
303 | llvm::vfs::Status getStatus() const { |
304 | llvm::vfs::Status Stat = Entry.getStatus(); |
305 | if (!Stat.isDirectory()) |
306 | Stat = llvm::vfs::Status::copyWithNewSize(In: Stat, NewSize: getContents().size()); |
307 | return llvm::vfs::Status::copyWithNewName(In: Stat, NewName: Filename); |
308 | } |
309 | |
310 | bool isError() const { return Entry.isError(); } |
311 | bool isDirectory() const { return Entry.isDirectory(); } |
312 | |
313 | /// If the cached entry represents an error, promotes it into `ErrorOr`. |
314 | llvm::ErrorOr<EntryRef> unwrapError() const { |
315 | if (isError()) |
316 | return Entry.getError(); |
317 | return *this; |
318 | } |
319 | |
320 | StringRef getContents() const { return Entry.getOriginalContents(); } |
321 | |
322 | std::optional<ArrayRef<dependency_directives_scan::Directive>> |
323 | getDirectiveTokens() const { |
324 | return Entry.getDirectiveTokens(); |
325 | } |
326 | }; |
327 | |
328 | /// A virtual file system optimized for the dependency discovery. |
329 | /// |
330 | /// It is primarily designed to work with source files whose contents was |
331 | /// preprocessed to remove any tokens that are unlikely to affect the dependency |
332 | /// computation. |
333 | /// |
334 | /// This is not a thread safe VFS. A single instance is meant to be used only in |
335 | /// one thread. Multiple instances are allowed to service multiple threads |
336 | /// running in parallel. |
337 | class DependencyScanningWorkerFilesystem |
338 | : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem, |
339 | llvm::vfs::ProxyFileSystem> { |
340 | public: |
341 | static const char ID; |
342 | |
343 | DependencyScanningWorkerFilesystem( |
344 | DependencyScanningFilesystemSharedCache &SharedCache, |
345 | IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS); |
346 | |
347 | llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override; |
348 | llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> |
349 | openFileForRead(const Twine &Path) override; |
350 | |
351 | std::error_code getRealPath(const Twine &Path, |
352 | SmallVectorImpl<char> &Output) override; |
353 | |
354 | std::error_code setCurrentWorkingDirectory(const Twine &Path) override; |
355 | |
356 | /// Returns entry for the given filename. |
357 | /// |
358 | /// Attempts to use the local and shared caches first, then falls back to |
359 | /// using the underlying filesystem. |
360 | llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename); |
361 | |
362 | /// Ensure the directive tokens are populated for this file entry. |
363 | /// |
364 | /// Returns true if the directive tokens are populated for this file entry, |
365 | /// false if not (i.e. this entry is not a file or its scan fails). |
366 | bool ensureDirectiveTokensArePopulated(EntryRef Entry); |
367 | |
368 | /// Check whether \p Path exists. By default checks cached result of \c |
369 | /// status(), and falls back on FS if unable to do so. |
370 | bool exists(const Twine &Path) override; |
371 | |
372 | private: |
373 | /// For a filename that's not yet associated with any entry in the caches, |
374 | /// uses the underlying filesystem to either look up the entry based in the |
375 | /// shared cache indexed by unique ID, or creates new entry from scratch. |
376 | /// \p FilenameForLookup will always be an absolute path, and different than |
377 | /// \p OriginalFilename if \p OriginalFilename is relative. |
378 | llvm::ErrorOr<const CachedFileSystemEntry &> |
379 | computeAndStoreResult(StringRef OriginalFilename, |
380 | StringRef FilenameForLookup); |
381 | |
382 | /// Represents a filesystem entry that has been stat-ed (and potentially read) |
383 | /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. |
384 | struct TentativeEntry { |
385 | llvm::vfs::Status Status; |
386 | std::unique_ptr<llvm::MemoryBuffer> Contents; |
387 | |
388 | TentativeEntry(llvm::vfs::Status Status, |
389 | std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr) |
390 | : Status(std::move(Status)), Contents(std::move(Contents)) {} |
391 | }; |
392 | |
393 | /// Reads file at the given path. Enforces consistency between the file size |
394 | /// in status and size of read contents. |
395 | llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename); |
396 | |
397 | /// Returns entry associated with the unique ID of the given tentative entry |
398 | /// if there is some in the shared cache. Otherwise, constructs new one, |
399 | /// associates it with the unique ID and returns the result. |
400 | const CachedFileSystemEntry & |
401 | getOrEmplaceSharedEntryForUID(TentativeEntry TEntry); |
402 | |
403 | /// Returns entry associated with the filename or nullptr if none is found. |
404 | /// |
405 | /// Returns entry from local cache if there is some. Otherwise, if the entry |
406 | /// is found in the shared cache, writes it through the local cache and |
407 | /// returns it. Otherwise returns nullptr. |
408 | const CachedFileSystemEntry * |
409 | findEntryByFilenameWithWriteThrough(StringRef Filename); |
410 | |
411 | /// Returns entry associated with the unique ID in the shared cache or nullptr |
412 | /// if none is found. |
413 | const CachedFileSystemEntry * |
414 | findSharedEntryByUID(llvm::vfs::Status Stat) const { |
415 | return SharedCache.getShardForUID(UID: Stat.getUniqueID()) |
416 | .findEntryByUID(UID: Stat.getUniqueID()); |
417 | } |
418 | |
419 | /// Associates the given entry with the filename in the local cache and |
420 | /// returns it. |
421 | const CachedFileSystemEntry & |
422 | insertLocalEntryForFilename(StringRef Filename, |
423 | const CachedFileSystemEntry &Entry) { |
424 | return LocalCache.insertEntryForFilename(Filename, Entry); |
425 | } |
426 | |
427 | /// Returns entry associated with the filename in the shared cache if there is |
428 | /// some. Otherwise, constructs new one with the given error code, associates |
429 | /// it with the filename and returns the result. |
430 | const CachedFileSystemEntry & |
431 | getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) { |
432 | return SharedCache.getShardForFilename(Filename) |
433 | .getOrEmplaceEntryForFilename(Filename, Stat: EC); |
434 | } |
435 | |
436 | /// Returns entry associated with the filename in the shared cache if there is |
437 | /// some. Otherwise, associates the given entry with the filename and returns |
438 | /// it. |
439 | const CachedFileSystemEntry & |
440 | getOrInsertSharedEntryForFilename(StringRef Filename, |
441 | const CachedFileSystemEntry &Entry) { |
442 | return SharedCache.getShardForFilename(Filename) |
443 | .getOrInsertEntryForFilename(Filename, Entry); |
444 | } |
445 | |
446 | void printImpl(raw_ostream &OS, PrintType Type, |
447 | unsigned IndentLevel) const override { |
448 | printIndent(OS, IndentLevel); |
449 | OS << "DependencyScanningFilesystem\n" ; |
450 | getUnderlyingFS().print(OS, Type, IndentLevel: IndentLevel + 1); |
451 | } |
452 | |
453 | /// The global cache shared between worker threads. |
454 | DependencyScanningFilesystemSharedCache &SharedCache; |
455 | /// The local cache is used by the worker thread to cache file system queries |
456 | /// locally instead of querying the global cache every time. |
457 | DependencyScanningFilesystemLocalCache LocalCache; |
458 | |
459 | /// The working directory to use for making relative paths absolute before |
460 | /// using them for cache lookups. |
461 | llvm::ErrorOr<std::string> WorkingDirForCacheLookup; |
462 | |
463 | void updateWorkingDirForCacheLookup(); |
464 | |
465 | llvm::ErrorOr<StringRef> |
466 | tryGetFilenameForLookup(StringRef OriginalFilename, |
467 | llvm::SmallVectorImpl<char> &PathBuf) const; |
468 | }; |
469 | |
470 | } // end namespace dependencies |
471 | } // end namespace tooling |
472 | } // end namespace clang |
473 | |
474 | #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |
475 | |