1//===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines the virtual file system interface vfs::FileSystem.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H
15#define LLVM_SUPPORT_VIRTUALFILESYSTEM_H
16
17#include "llvm/ADT/IntrusiveRefCntPtr.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/Support/Chrono.h"
22#include "llvm/Support/Errc.h"
23#include "llvm/Support/Error.h"
24#include "llvm/Support/ErrorOr.h"
25#include "llvm/Support/ExtensibleRTTI.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/Path.h"
28#include "llvm/Support/SourceMgr.h"
29#include <cassert>
30#include <cstdint>
31#include <ctime>
32#include <memory>
33#include <optional>
34#include <string>
35#include <system_error>
36#include <utility>
37#include <vector>
38
39namespace llvm {
40
41class MemoryBuffer;
42class MemoryBufferRef;
43class Twine;
44
45namespace vfs {
46
47/// The result of a \p status operation.
48class Status {
49 std::string Name;
50 llvm::sys::fs::UniqueID UID;
51 llvm::sys::TimePoint<> MTime;
52 uint32_t User;
53 uint32_t Group;
54 uint64_t Size;
55 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error;
56 llvm::sys::fs::perms Perms;
57
58public:
59 /// Whether this entity has an external path different from the virtual path,
60 /// and the external path is exposed by leaking it through the abstraction.
61 /// For example, a RedirectingFileSystem will set this for paths where
62 /// UseExternalName is true.
63 ///
64 /// FIXME: Currently the external path is exposed by replacing the virtual
65 /// path in this Status object. Instead, we should leave the path in the
66 /// Status intact (matching the requested virtual path) - see
67 /// FileManager::getFileRef for how we plan to fix this.
68 bool ExposesExternalVFSPath = false;
69
70 Status() = default;
71 Status(const llvm::sys::fs::file_status &Status);
72 Status(const Twine &Name, llvm::sys::fs::UniqueID UID,
73 llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
74 uint64_t Size, llvm::sys::fs::file_type Type,
75 llvm::sys::fs::perms Perms);
76
77 /// Get a copy of a Status with a different size.
78 static Status copyWithNewSize(const Status &In, uint64_t NewSize);
79 /// Get a copy of a Status with a different name.
80 static Status copyWithNewName(const Status &In, const Twine &NewName);
81 static Status copyWithNewName(const llvm::sys::fs::file_status &In,
82 const Twine &NewName);
83
84 /// Returns the name that should be used for this file or directory.
85 StringRef getName() const { return Name; }
86
87 /// @name Status interface from llvm::sys::fs
88 /// @{
89 llvm::sys::fs::file_type getType() const { return Type; }
90 llvm::sys::fs::perms getPermissions() const { return Perms; }
91 llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; }
92 llvm::sys::fs::UniqueID getUniqueID() const { return UID; }
93 uint32_t getUser() const { return User; }
94 uint32_t getGroup() const { return Group; }
95 uint64_t getSize() const { return Size; }
96 /// @}
97 /// @name Status queries
98 /// These are static queries in llvm::sys::fs.
99 /// @{
100 bool equivalent(const Status &Other) const;
101 bool isDirectory() const;
102 bool isRegularFile() const;
103 bool isOther() const;
104 bool isSymlink() const;
105 bool isStatusKnown() const;
106 bool exists() const;
107 /// @}
108};
109
110/// Represents an open file.
111class File {
112public:
113 /// Destroy the file after closing it (if open).
114 /// Sub-classes should generally call close() inside their destructors. We
115 /// cannot do that from the base class, since close is virtual.
116 virtual ~File();
117
118 /// Get the status of the file.
119 virtual llvm::ErrorOr<Status> status() = 0;
120
121 /// Get the name of the file
122 virtual llvm::ErrorOr<std::string> getName() {
123 if (auto Status = status())
124 return Status->getName().str();
125 else
126 return Status.getError();
127 }
128
129 /// Get the contents of the file as a \p MemoryBuffer.
130 virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
131 getBuffer(const Twine &Name, int64_t FileSize = -1,
132 bool RequiresNullTerminator = true, bool IsVolatile = false) = 0;
133
134 /// Closes the file.
135 virtual std::error_code close() = 0;
136
137 // Get the same file with a different path.
138 static ErrorOr<std::unique_ptr<File>>
139 getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P);
140
141protected:
142 // Set the file's underlying path.
143 virtual void setPath(const Twine &Path) {}
144};
145
146/// A member of a directory, yielded by a directory_iterator.
147/// Only information available on most platforms is included.
148class directory_entry {
149 std::string Path;
150 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown;
151
152public:
153 directory_entry() = default;
154 directory_entry(std::string Path, llvm::sys::fs::file_type Type)
155 : Path(std::move(Path)), Type(Type) {}
156
157 llvm::StringRef path() const { return Path; }
158 llvm::sys::fs::file_type type() const { return Type; }
159};
160
161namespace detail {
162
163/// An interface for virtual file systems to provide an iterator over the
164/// (non-recursive) contents of a directory.
165struct DirIterImpl {
166 virtual ~DirIterImpl();
167
168 /// Sets \c CurrentEntry to the next entry in the directory on success,
169 /// to directory_entry() at end, or returns a system-defined \c error_code.
170 virtual std::error_code increment() = 0;
171
172 directory_entry CurrentEntry;
173};
174
175} // namespace detail
176
177/// An input iterator over the entries in a virtual path, similar to
178/// llvm::sys::fs::directory_iterator.
179class directory_iterator {
180 std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy
181
182public:
183 directory_iterator(std::shared_ptr<detail::DirIterImpl> I)
184 : Impl(std::move(I)) {
185 assert(Impl.get() != nullptr && "requires non-null implementation");
186 if (Impl->CurrentEntry.path().empty())
187 Impl.reset(); // Normalize the end iterator to Impl == nullptr.
188 }
189
190 /// Construct an 'end' iterator.
191 directory_iterator() = default;
192
193 /// Equivalent to operator++, with an error code.
194 directory_iterator &increment(std::error_code &EC) {
195 assert(Impl && "attempting to increment past end");
196 EC = Impl->increment();
197 if (Impl->CurrentEntry.path().empty())
198 Impl.reset(); // Normalize the end iterator to Impl == nullptr.
199 return *this;
200 }
201
202 const directory_entry &operator*() const { return Impl->CurrentEntry; }
203 const directory_entry *operator->() const { return &Impl->CurrentEntry; }
204
205 bool operator==(const directory_iterator &RHS) const {
206 if (Impl && RHS.Impl)
207 return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path();
208 return !Impl && !RHS.Impl;
209 }
210 bool operator!=(const directory_iterator &RHS) const {
211 return !(*this == RHS);
212 }
213};
214
215class FileSystem;
216
217namespace detail {
218
219/// Keeps state for the recursive_directory_iterator.
220struct RecDirIterState {
221 std::vector<directory_iterator> Stack;
222 bool HasNoPushRequest = false;
223};
224
225} // end namespace detail
226
227/// An input iterator over the recursive contents of a virtual path,
228/// similar to llvm::sys::fs::recursive_directory_iterator.
229class recursive_directory_iterator {
230 FileSystem *FS;
231 std::shared_ptr<detail::RecDirIterState>
232 State; // Input iterator semantics on copy.
233
234public:
235 recursive_directory_iterator(FileSystem &FS, const Twine &Path,
236 std::error_code &EC);
237
238 /// Construct an 'end' iterator.
239 recursive_directory_iterator() = default;
240
241 /// Equivalent to operator++, with an error code.
242 recursive_directory_iterator &increment(std::error_code &EC);
243
244 const directory_entry &operator*() const { return *State->Stack.back(); }
245 const directory_entry *operator->() const { return &*State->Stack.back(); }
246
247 bool operator==(const recursive_directory_iterator &Other) const {
248 return State == Other.State; // identity
249 }
250 bool operator!=(const recursive_directory_iterator &RHS) const {
251 return !(*this == RHS);
252 }
253
254 /// Gets the current level. Starting path is at level 0.
255 int level() const {
256 assert(!State->Stack.empty() &&
257 "Cannot get level without any iteration state");
258 return State->Stack.size() - 1;
259 }
260
261 void no_push() { State->HasNoPushRequest = true; }
262};
263
264/// The virtual file system interface.
265class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem>,
266 public RTTIExtends<FileSystem, RTTIRoot> {
267public:
268 static const char ID;
269 virtual ~FileSystem();
270
271 /// Get the status of the entry at \p Path, if one exists.
272 virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0;
273
274 /// Get a \p File object for the text file at \p Path, if one exists.
275 virtual llvm::ErrorOr<std::unique_ptr<File>>
276 openFileForRead(const Twine &Path) = 0;
277
278 /// Get a \p File object for the binary file at \p Path, if one exists.
279 /// Some non-ascii based file systems perform encoding conversions
280 /// when reading as a text file, and this function should be used if
281 /// a file's bytes should be read as-is. On most filesystems, this
282 /// is the same behaviour as openFileForRead.
283 virtual llvm::ErrorOr<std::unique_ptr<File>>
284 openFileForReadBinary(const Twine &Path) {
285 return openFileForRead(Path);
286 }
287
288 /// This is a convenience method that opens a file, gets its content and then
289 /// closes the file.
290 /// The IsText parameter is used to distinguish whether the file should be
291 /// opened as a binary or text file.
292 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
293 getBufferForFile(const Twine &Name, int64_t FileSize = -1,
294 bool RequiresNullTerminator = true, bool IsVolatile = false,
295 bool IsText = true);
296
297 /// Get a directory_iterator for \p Dir.
298 /// \note The 'end' iterator is directory_iterator().
299 virtual directory_iterator dir_begin(const Twine &Dir,
300 std::error_code &EC) = 0;
301
302 /// Set the working directory. This will affect all following operations on
303 /// this file system and may propagate down for nested file systems.
304 virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0;
305
306 /// Get the working directory of this file system.
307 virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0;
308
309 /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve
310 /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`.
311 /// This returns errc::operation_not_permitted if not implemented by subclass.
312 virtual std::error_code getRealPath(const Twine &Path,
313 SmallVectorImpl<char> &Output);
314
315 /// Check whether \p Path exists. By default this uses \c status(), but
316 /// filesystems may provide a more efficient implementation if available.
317 virtual bool exists(const Twine &Path);
318
319 /// Is the file mounted on a local filesystem?
320 virtual std::error_code isLocal(const Twine &Path, bool &Result);
321
322 /// Make \a Path an absolute path.
323 ///
324 /// Makes \a Path absolute using the current directory if it is not already.
325 /// An empty \a Path will result in the current directory.
326 ///
327 /// /absolute/path => /absolute/path
328 /// relative/../path => <current-directory>/relative/../path
329 ///
330 /// \param Path A path that is modified to be an absolute path.
331 /// \returns success if \a path has been made absolute, otherwise a
332 /// platform-specific error_code.
333 virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
334
335 /// \returns true if \p A and \p B represent the same file, or an error or
336 /// false if they do not.
337 llvm::ErrorOr<bool> equivalent(const Twine &A, const Twine &B);
338
339 enum class PrintType { Summary, Contents, RecursiveContents };
340 void print(raw_ostream &OS, PrintType Type = PrintType::Contents,
341 unsigned IndentLevel = 0) const {
342 printImpl(OS, Type, IndentLevel);
343 }
344
345 using VisitCallbackTy = llvm::function_ref<void(FileSystem &)>;
346 virtual void visitChildFileSystems(VisitCallbackTy Callback) {}
347 void visit(VisitCallbackTy Callback) {
348 Callback(*this);
349 visitChildFileSystems(Callback);
350 }
351
352#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
353 LLVM_DUMP_METHOD void dump() const;
354#endif
355
356protected:
357 virtual void printImpl(raw_ostream &OS, PrintType Type,
358 unsigned IndentLevel) const {
359 printIndent(OS, IndentLevel);
360 OS << "FileSystem\n";
361 }
362
363 void printIndent(raw_ostream &OS, unsigned IndentLevel) const {
364 for (unsigned i = 0; i < IndentLevel; ++i)
365 OS << " ";
366 }
367};
368
369/// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
370/// the operating system.
371/// The working directory is linked to the process's working directory.
372/// (This is usually thread-hostile).
373IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
374
375/// Create an \p vfs::FileSystem for the 'real' file system, as seen by
376/// the operating system.
377/// It has its own working directory, independent of (but initially equal to)
378/// that of the process.
379std::unique_ptr<FileSystem> createPhysicalFileSystem();
380
381/// A file system that allows overlaying one \p AbstractFileSystem on top
382/// of another.
383///
384/// Consists of a stack of >=1 \p FileSystem objects, which are treated as being
385/// one merged file system. When there is a directory that exists in more than
386/// one file system, the \p OverlayFileSystem contains a directory containing
387/// the union of their contents. The attributes (permissions, etc.) of the
388/// top-most (most recently added) directory are used. When there is a file
389/// that exists in more than one file system, the file in the top-most file
390/// system overrides the other(s).
391class OverlayFileSystem : public RTTIExtends<OverlayFileSystem, FileSystem> {
392 using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>;
393
394 /// The stack of file systems, implemented as a list in order of
395 /// their addition.
396 FileSystemList FSList;
397
398public:
399 static const char ID;
400 OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base);
401
402 /// Pushes a file system on top of the stack.
403 void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS);
404
405 llvm::ErrorOr<Status> status(const Twine &Path) override;
406 bool exists(const Twine &Path) override;
407 llvm::ErrorOr<std::unique_ptr<File>>
408 openFileForRead(const Twine &Path) override;
409 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
410 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
411 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
412 std::error_code isLocal(const Twine &Path, bool &Result) override;
413 std::error_code getRealPath(const Twine &Path,
414 SmallVectorImpl<char> &Output) override;
415
416 using iterator = FileSystemList::reverse_iterator;
417 using const_iterator = FileSystemList::const_reverse_iterator;
418 using reverse_iterator = FileSystemList::iterator;
419 using const_reverse_iterator = FileSystemList::const_iterator;
420 using range = iterator_range<iterator>;
421 using const_range = iterator_range<const_iterator>;
422
423 /// Get an iterator pointing to the most recently added file system.
424 iterator overlays_begin() { return FSList.rbegin(); }
425 const_iterator overlays_begin() const { return FSList.rbegin(); }
426
427 /// Get an iterator pointing one-past the least recently added file system.
428 iterator overlays_end() { return FSList.rend(); }
429 const_iterator overlays_end() const { return FSList.rend(); }
430
431 /// Get an iterator pointing to the least recently added file system.
432 reverse_iterator overlays_rbegin() { return FSList.begin(); }
433 const_reverse_iterator overlays_rbegin() const { return FSList.begin(); }
434
435 /// Get an iterator pointing one-past the most recently added file system.
436 reverse_iterator overlays_rend() { return FSList.end(); }
437 const_reverse_iterator overlays_rend() const { return FSList.end(); }
438
439 range overlays_range() { return llvm::reverse(C&: FSList); }
440 const_range overlays_range() const { return llvm::reverse(C: FSList); }
441
442protected:
443 void printImpl(raw_ostream &OS, PrintType Type,
444 unsigned IndentLevel) const override;
445 void visitChildFileSystems(VisitCallbackTy Callback) override;
446};
447
448/// By default, this delegates all calls to the underlying file system. This
449/// is useful when derived file systems want to override some calls and still
450/// proxy other calls.
451class ProxyFileSystem : public RTTIExtends<ProxyFileSystem, FileSystem> {
452public:
453 static const char ID;
454 explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
455 : FS(std::move(FS)) {}
456
457 llvm::ErrorOr<Status> status(const Twine &Path) override {
458 return FS->status(Path);
459 }
460 bool exists(const Twine &Path) override { return FS->exists(Path); }
461 llvm::ErrorOr<std::unique_ptr<File>>
462 openFileForRead(const Twine &Path) override {
463 return FS->openFileForRead(Path);
464 }
465 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
466 return FS->dir_begin(Dir, EC);
467 }
468 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
469 return FS->getCurrentWorkingDirectory();
470 }
471 std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
472 return FS->setCurrentWorkingDirectory(Path);
473 }
474 std::error_code getRealPath(const Twine &Path,
475 SmallVectorImpl<char> &Output) override {
476 return FS->getRealPath(Path, Output);
477 }
478 std::error_code isLocal(const Twine &Path, bool &Result) override {
479 return FS->isLocal(Path, Result);
480 }
481
482protected:
483 FileSystem &getUnderlyingFS() const { return *FS; }
484 void visitChildFileSystems(VisitCallbackTy Callback) override {
485 if (FS) {
486 Callback(*FS);
487 FS->visitChildFileSystems(Callback);
488 }
489 }
490
491private:
492 IntrusiveRefCntPtr<FileSystem> FS;
493
494 virtual void anchor() override;
495};
496
497namespace detail {
498
499class InMemoryDirectory;
500class InMemoryNode;
501
502struct NewInMemoryNodeInfo {
503 llvm::sys::fs::UniqueID DirUID;
504 StringRef Path;
505 StringRef Name;
506 time_t ModificationTime;
507 std::unique_ptr<llvm::MemoryBuffer> Buffer;
508 uint32_t User;
509 uint32_t Group;
510 llvm::sys::fs::file_type Type;
511 llvm::sys::fs::perms Perms;
512
513 Status makeStatus() const;
514};
515
516class NamedNodeOrError {
517 ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>>
518 Value;
519
520public:
521 NamedNodeOrError(llvm::SmallString<128> Name,
522 const detail::InMemoryNode *Node)
523 : Value(std::make_pair(x&: Name, y&: Node)) {}
524 NamedNodeOrError(std::error_code EC) : Value(EC) {}
525 NamedNodeOrError(llvm::errc EC) : Value(EC) {}
526
527 StringRef getName() const { return (*Value).first; }
528 explicit operator bool() const { return static_cast<bool>(Value); }
529 operator std::error_code() const { return Value.getError(); }
530 std::error_code getError() const { return Value.getError(); }
531 const detail::InMemoryNode *operator*() const { return (*Value).second; }
532};
533
534} // namespace detail
535
536/// An in-memory file system.
537class InMemoryFileSystem : public RTTIExtends<InMemoryFileSystem, FileSystem> {
538 std::unique_ptr<detail::InMemoryDirectory> Root;
539 std::string WorkingDirectory;
540 bool UseNormalizedPaths = true;
541
542public:
543 static const char ID;
544
545private:
546 using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>(
547 detail::NewInMemoryNodeInfo)>;
548
549 /// Create node with \p MakeNode and add it into this filesystem at \p Path.
550 bool addFile(const Twine &Path, time_t ModificationTime,
551 std::unique_ptr<llvm::MemoryBuffer> Buffer,
552 std::optional<uint32_t> User, std::optional<uint32_t> Group,
553 std::optional<llvm::sys::fs::file_type> Type,
554 std::optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode);
555
556 /// Looks up the in-memory node for the path \p P.
557 /// If \p FollowFinalSymlink is true, the returned node is guaranteed to
558 /// not be a symlink and its path may differ from \p P.
559 detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink,
560 size_t SymlinkDepth = 0) const;
561
562 class DirIterator;
563
564public:
565 explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
566 ~InMemoryFileSystem() override;
567
568 /// Add a file containing a buffer or a directory to the VFS with a
569 /// path. The VFS owns the buffer. If present, User, Group, Type
570 /// and Perms apply to the newly-created file or directory.
571 /// \return true if the file or directory was successfully added,
572 /// false if the file or directory already exists in the file system with
573 /// different contents.
574 bool addFile(const Twine &Path, time_t ModificationTime,
575 std::unique_ptr<llvm::MemoryBuffer> Buffer,
576 std::optional<uint32_t> User = std::nullopt,
577 std::optional<uint32_t> Group = std::nullopt,
578 std::optional<llvm::sys::fs::file_type> Type = std::nullopt,
579 std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
580
581 /// Add a hard link to a file.
582 ///
583 /// Here hard links are not intended to be fully equivalent to the classical
584 /// filesystem. Both the hard link and the file share the same buffer and
585 /// status (and thus have the same UniqueID). Because of this there is no way
586 /// to distinguish between the link and the file after the link has been
587 /// added.
588 ///
589 /// The \p Target path must be an existing file or a hardlink. The
590 /// \p NewLink file must not have been added before. The \p Target
591 /// path must not be a directory. The \p NewLink node is added as a hard
592 /// link which points to the resolved file of \p Target node.
593 /// \return true if the above condition is satisfied and hardlink was
594 /// successfully created, false otherwise.
595 bool addHardLink(const Twine &NewLink, const Twine &Target);
596
597 /// Arbitrary max depth to search through symlinks. We can get into problems
598 /// if a link links to a link that links back to the link, for example.
599 static constexpr size_t MaxSymlinkDepth = 16;
600
601 /// Add a symbolic link. Unlike a HardLink, because \p Target doesn't need
602 /// to refer to a file (or refer to anything, as it happens). Also, an
603 /// in-memory directory for \p Target isn't automatically created.
604 bool
605 addSymbolicLink(const Twine &NewLink, const Twine &Target,
606 time_t ModificationTime,
607 std::optional<uint32_t> User = std::nullopt,
608 std::optional<uint32_t> Group = std::nullopt,
609 std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
610
611 /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
612 /// If present, User, Group, Type and Perms apply to the newly-created file
613 /// or directory.
614 /// \return true if the file or directory was successfully added,
615 /// false if the file or directory already exists in the file system with
616 /// different contents.
617 bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
618 const llvm::MemoryBufferRef &Buffer,
619 std::optional<uint32_t> User = std::nullopt,
620 std::optional<uint32_t> Group = std::nullopt,
621 std::optional<llvm::sys::fs::file_type> Type = std::nullopt,
622 std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
623
624 std::string toString() const;
625
626 /// Return true if this file system normalizes . and .. in paths.
627 bool useNormalizedPaths() const { return UseNormalizedPaths; }
628
629 llvm::ErrorOr<Status> status(const Twine &Path) override;
630 llvm::ErrorOr<std::unique_ptr<File>>
631 openFileForRead(const Twine &Path) override;
632 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
633
634 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
635 return WorkingDirectory;
636 }
637 /// Canonicalizes \p Path by combining with the current working
638 /// directory and normalizing the path (e.g. remove dots). If the current
639 /// working directory is not set, this returns errc::operation_not_permitted.
640 ///
641 /// This doesn't resolve symlinks as they are not supported in in-memory file
642 /// system.
643 std::error_code getRealPath(const Twine &Path,
644 SmallVectorImpl<char> &Output) override;
645 std::error_code isLocal(const Twine &Path, bool &Result) override;
646 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
647
648protected:
649 void printImpl(raw_ostream &OS, PrintType Type,
650 unsigned IndentLevel) const override;
651};
652
653/// Get a globally unique ID for a virtual file or directory.
654llvm::sys::fs::UniqueID getNextVirtualUniqueID();
655
656/// Gets a \p FileSystem for a virtual file system described in YAML
657/// format.
658std::unique_ptr<FileSystem>
659getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer,
660 llvm::SourceMgr::DiagHandlerTy DiagHandler,
661 StringRef YAMLFilePath, void *DiagContext = nullptr,
662 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
663
664struct YAMLVFSEntry {
665 template <typename T1, typename T2>
666 YAMLVFSEntry(T1 &&VPath, T2 &&RPath, bool IsDirectory = false)
667 : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)),
668 IsDirectory(IsDirectory) {}
669 std::string VPath;
670 std::string RPath;
671 bool IsDirectory = false;
672};
673
674class RedirectingFSDirIterImpl;
675class RedirectingFileSystemParser;
676
677/// A virtual file system parsed from a YAML file.
678///
679/// Currently, this class allows creating virtual files and directories. Virtual
680/// files map to existing external files in \c ExternalFS, and virtual
681/// directories may either map to existing directories in \c ExternalFS or list
682/// their contents in the form of other virtual directories and/or files.
683///
684/// The basic structure of the parsed file is:
685/// \verbatim
686/// {
687/// 'version': <version number>,
688/// <optional configuration>
689/// 'roots': [
690/// <directory entries>
691/// ]
692/// }
693/// \endverbatim
694/// The roots may be absolute or relative. If relative they will be made
695/// absolute against either current working directory or the directory where
696/// the Overlay YAML file is located, depending on the 'root-relative'
697/// configuration.
698///
699/// All configuration options are optional.
700/// 'case-sensitive': <boolean, default=(true for Posix, false for Windows)>
701/// 'use-external-names': <boolean, default=true>
702/// 'root-relative': <string, one of 'cwd' or 'overlay-dir', default='cwd'>
703/// 'overlay-relative': <boolean, default=false>
704/// 'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with'
705/// instead>
706/// 'redirecting-with': <string, one of 'fallthrough', 'fallback', or
707/// 'redirect-only', default='fallthrough'>
708///
709/// To clarify, 'root-relative' option will prepend the current working
710/// directory, or the overlay directory to the 'roots->name' field only if
711/// 'roots->name' is a relative path. On the other hand, when 'overlay-relative'
712/// is set to 'true', external paths will always be prepended with the overlay
713/// directory, even if external paths are not relative paths. The
714/// 'root-relative' option has no interaction with the 'overlay-relative'
715/// option.
716///
717/// Virtual directories that list their contents are represented as
718/// \verbatim
719/// {
720/// 'type': 'directory',
721/// 'name': <string>,
722/// 'contents': [ <file or directory entries> ]
723/// }
724/// \endverbatim
725/// The default attributes for such virtual directories are:
726/// \verbatim
727/// MTime = now() when created
728/// Perms = 0777
729/// User = Group = 0
730/// Size = 0
731/// UniqueID = unspecified unique value
732/// \endverbatim
733/// When a path prefix matches such a directory, the next component in the path
734/// is matched against the entries in the 'contents' array.
735///
736/// Re-mapped directories, on the other hand, are represented as
737/// /// \verbatim
738/// {
739/// 'type': 'directory-remap',
740/// 'name': <string>,
741/// 'use-external-name': <boolean>, # Optional
742/// 'external-contents': <path to external directory>
743/// }
744/// \endverbatim
745/// and inherit their attributes from the external directory. When a path
746/// prefix matches such an entry, the unmatched components are appended to the
747/// 'external-contents' path, and the resulting path is looked up in the
748/// external file system instead.
749///
750/// Re-mapped files are represented as
751/// \verbatim
752/// {
753/// 'type': 'file',
754/// 'name': <string>,
755/// 'use-external-name': <boolean>, # Optional
756/// 'external-contents': <path to external file>
757/// }
758/// \endverbatim
759/// Their attributes and file contents are determined by looking up the file at
760/// their 'external-contents' path in the external file system.
761///
762/// For 'file', 'directory' and 'directory-remap' entries the 'name' field may
763/// contain multiple path components (e.g. /path/to/file). However, any
764/// directory in such a path that contains more than one child must be uniquely
765/// represented by a 'directory' entry.
766///
767/// When the 'use-external-name' field is set, calls to \a vfs::File::status()
768/// give the external (remapped) filesystem name instead of the name the file
769/// was accessed by. This is an intentional leak through the \a
770/// RedirectingFileSystem abstraction layer. It enables clients to discover
771/// (and use) the external file location when communicating with users or tools
772/// that don't use the same VFS overlay.
773///
774/// FIXME: 'use-external-name' causes behaviour that's inconsistent with how
775/// "real" filesystems behave. Maybe there should be a separate channel for
776/// this information.
777class RedirectingFileSystem
778 : public RTTIExtends<RedirectingFileSystem, vfs::FileSystem> {
779public:
780 static const char ID;
781 enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
782 enum NameKind { NK_NotSet, NK_External, NK_Virtual };
783
784 /// The type of redirection to perform.
785 enum class RedirectKind {
786 /// Lookup the redirected path first (ie. the one specified in
787 /// 'external-contents') and if that fails "fallthrough" to a lookup of the
788 /// originally provided path.
789 Fallthrough,
790 /// Lookup the provided path first and if that fails, "fallback" to a
791 /// lookup of the redirected path.
792 Fallback,
793 /// Only lookup the redirected path, do not lookup the originally provided
794 /// path.
795 RedirectOnly
796 };
797
798 /// The type of relative path used by Roots.
799 enum class RootRelativeKind {
800 /// The roots are relative to the current working directory.
801 CWD,
802 /// The roots are relative to the directory where the Overlay YAML file
803 // locates.
804 OverlayDir
805 };
806
807 /// A single file or directory in the VFS.
808 class Entry {
809 EntryKind Kind;
810 std::string Name;
811
812 public:
813 Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {}
814 virtual ~Entry() = default;
815
816 StringRef getName() const { return Name; }
817 EntryKind getKind() const { return Kind; }
818 };
819
820 /// A directory in the vfs with explicitly specified contents.
821 class DirectoryEntry : public Entry {
822 std::vector<std::unique_ptr<Entry>> Contents;
823 Status S;
824
825 public:
826 /// Constructs a directory entry with explicitly specified contents.
827 DirectoryEntry(StringRef Name, std::vector<std::unique_ptr<Entry>> Contents,
828 Status S)
829 : Entry(EK_Directory, Name), Contents(std::move(Contents)),
830 S(std::move(S)) {}
831
832 /// Constructs an empty directory entry.
833 DirectoryEntry(StringRef Name, Status S)
834 : Entry(EK_Directory, Name), S(std::move(S)) {}
835
836 Status getStatus() { return S; }
837
838 void addContent(std::unique_ptr<Entry> Content) {
839 Contents.push_back(x: std::move(Content));
840 }
841
842 Entry *getLastContent() const { return Contents.back().get(); }
843
844 using iterator = decltype(Contents)::iterator;
845
846 iterator contents_begin() { return Contents.begin(); }
847 iterator contents_end() { return Contents.end(); }
848
849 static bool classof(const Entry *E) { return E->getKind() == EK_Directory; }
850 };
851
852 /// A file or directory in the vfs that is mapped to a file or directory in
853 /// the external filesystem.
854 class RemapEntry : public Entry {
855 std::string ExternalContentsPath;
856 NameKind UseName;
857
858 protected:
859 RemapEntry(EntryKind K, StringRef Name, StringRef ExternalContentsPath,
860 NameKind UseName)
861 : Entry(K, Name), ExternalContentsPath(ExternalContentsPath),
862 UseName(UseName) {}
863
864 public:
865 StringRef getExternalContentsPath() const { return ExternalContentsPath; }
866
867 /// Whether to use the external path as the name for this file or directory.
868 bool useExternalName(bool GlobalUseExternalName) const {
869 return UseName == NK_NotSet ? GlobalUseExternalName
870 : (UseName == NK_External);
871 }
872
873 NameKind getUseName() const { return UseName; }
874
875 static bool classof(const Entry *E) {
876 switch (E->getKind()) {
877 case EK_DirectoryRemap:
878 [[fallthrough]];
879 case EK_File:
880 return true;
881 case EK_Directory:
882 return false;
883 }
884 llvm_unreachable("invalid entry kind");
885 }
886 };
887
888 /// A directory in the vfs that maps to a directory in the external file
889 /// system.
890 class DirectoryRemapEntry : public RemapEntry {
891 public:
892 DirectoryRemapEntry(StringRef Name, StringRef ExternalContentsPath,
893 NameKind UseName)
894 : RemapEntry(EK_DirectoryRemap, Name, ExternalContentsPath, UseName) {}
895
896 static bool classof(const Entry *E) {
897 return E->getKind() == EK_DirectoryRemap;
898 }
899 };
900
901 /// A file in the vfs that maps to a file in the external file system.
902 class FileEntry : public RemapEntry {
903 public:
904 FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName)
905 : RemapEntry(EK_File, Name, ExternalContentsPath, UseName) {}
906
907 static bool classof(const Entry *E) { return E->getKind() == EK_File; }
908 };
909
910 /// Represents the result of a path lookup into the RedirectingFileSystem.
911 struct LookupResult {
912 /// Chain of parent directory entries for \c E.
913 llvm::SmallVector<Entry *, 32> Parents;
914
915 /// The entry the looked-up path corresponds to.
916 Entry *E;
917
918 private:
919 /// When the found Entry is a DirectoryRemapEntry, stores the path in the
920 /// external file system that the looked-up path in the virtual file system
921 // corresponds to.
922 std::optional<std::string> ExternalRedirect;
923
924 public:
925 LookupResult(Entry *E, sys::path::const_iterator Start,
926 sys::path::const_iterator End);
927
928 /// If the found Entry maps the input path to a path in the external
929 /// file system (i.e. it is a FileEntry or DirectoryRemapEntry), returns
930 /// that path.
931 std::optional<StringRef> getExternalRedirect() const {
932 if (isa<DirectoryRemapEntry>(Val: E))
933 return StringRef(*ExternalRedirect);
934 if (auto *FE = dyn_cast<FileEntry>(Val: E))
935 return FE->getExternalContentsPath();
936 return std::nullopt;
937 }
938
939 /// Get the (canonical) path of the found entry. This uses the as-written
940 /// path components from the VFS specification.
941 void getPath(llvm::SmallVectorImpl<char> &Path) const;
942 };
943
944private:
945 friend class RedirectingFSDirIterImpl;
946 friend class RedirectingFileSystemParser;
947
948 /// Canonicalize path by removing ".", "..", "./", components. This is
949 /// a VFS request, do not bother about symlinks in the path components
950 /// but canonicalize in order to perform the correct entry search.
951 std::error_code makeCanonicalForLookup(SmallVectorImpl<char> &Path) const;
952
953 /// Get the File status, or error, from the underlying external file system.
954 /// This returns the status with the originally requested name, while looking
955 /// up the entry using a potentially different path.
956 ErrorOr<Status> getExternalStatus(const Twine &LookupPath,
957 const Twine &OriginalPath) const;
958
959 /// Make \a Path an absolute path.
960 ///
961 /// Makes \a Path absolute using the \a WorkingDir if it is not already.
962 ///
963 /// /absolute/path => /absolute/path
964 /// relative/../path => <WorkingDir>/relative/../path
965 ///
966 /// \param WorkingDir A path that will be used as the base Dir if \a Path
967 /// is not already absolute.
968 /// \param Path A path that is modified to be an absolute path.
969 /// \returns success if \a path has been made absolute, otherwise a
970 /// platform-specific error_code.
971 std::error_code makeAbsolute(StringRef WorkingDir,
972 SmallVectorImpl<char> &Path) const;
973
974 // In a RedirectingFileSystem, keys can be specified in Posix or Windows
975 // style (or even a mixture of both), so this comparison helper allows
976 // slashes (representing a root) to match backslashes (and vice versa). Note
977 // that, other than the root, path components should not contain slashes or
978 // backslashes.
979 bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const {
980 if ((CaseSensitive ? lhs == rhs : lhs.equals_insensitive(RHS: rhs)))
981 return true;
982 return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/");
983 }
984
985 /// The root(s) of the virtual file system.
986 std::vector<std::unique_ptr<Entry>> Roots;
987
988 /// The current working directory of the file system.
989 std::string WorkingDirectory;
990
991 /// The file system to use for external references.
992 IntrusiveRefCntPtr<FileSystem> ExternalFS;
993
994 /// This represents the directory path that the YAML file is located.
995 /// This will be prefixed to each 'external-contents' if IsRelativeOverlay
996 /// is set. This will also be prefixed to each 'roots->name' if RootRelative
997 /// is set to RootRelativeKind::OverlayDir and the path is relative.
998 std::string OverlayFileDir;
999
1000 /// @name Configuration
1001 /// @{
1002
1003 /// Whether to perform case-sensitive comparisons.
1004 ///
1005 /// Currently, case-insensitive matching only works correctly with ASCII.
1006 bool CaseSensitive = is_style_posix(S: sys::path::Style::native);
1007
1008 /// IsRelativeOverlay marks whether a OverlayFileDir path must
1009 /// be prefixed in every 'external-contents' when reading from YAML files.
1010 bool IsRelativeOverlay = false;
1011
1012 /// Whether to use to use the value of 'external-contents' for the
1013 /// names of files. This global value is overridable on a per-file basis.
1014 bool UseExternalNames = true;
1015
1016 /// True if this FS has redirected a lookup. This does not include
1017 /// fallthrough.
1018 mutable bool HasBeenUsed = false;
1019
1020 /// Used to enable or disable updating `HasBeenUsed`.
1021 bool UsageTrackingActive = false;
1022
1023 /// Determines the lookups to perform, as well as their order. See
1024 /// \c RedirectKind for details.
1025 RedirectKind Redirection = RedirectKind::Fallthrough;
1026
1027 /// Determine the prefix directory if the roots are relative paths. See
1028 /// \c RootRelativeKind for details.
1029 RootRelativeKind RootRelative = RootRelativeKind::CWD;
1030 /// @}
1031
1032 RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS);
1033
1034 /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly recursing
1035 /// into the contents of \p From if it is a directory. Returns a LookupResult
1036 /// giving the matched entry and, if that entry is a FileEntry or
1037 /// DirectoryRemapEntry, the path it redirects to in the external file system.
1038 ErrorOr<LookupResult>
1039 lookupPathImpl(llvm::sys::path::const_iterator Start,
1040 llvm::sys::path::const_iterator End, Entry *From,
1041 llvm::SmallVectorImpl<Entry *> &Entries) const;
1042
1043 /// Get the status for a path with the provided \c LookupResult.
1044 ErrorOr<Status> status(const Twine &LookupPath, const Twine &OriginalPath,
1045 const LookupResult &Result);
1046
1047public:
1048 /// Looks up \p Path in \c Roots and returns a LookupResult giving the
1049 /// matched entry and, if the entry was a FileEntry or DirectoryRemapEntry,
1050 /// the path it redirects to in the external file system.
1051 ErrorOr<LookupResult> lookupPath(StringRef Path) const;
1052
1053 /// Parses \p Buffer, which is expected to be in YAML format and
1054 /// returns a virtual file system representing its contents.
1055 static std::unique_ptr<RedirectingFileSystem>
1056 create(std::unique_ptr<MemoryBuffer> Buffer,
1057 SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
1058 void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
1059
1060 /// Redirect each of the remapped files from first to second.
1061 static std::unique_ptr<RedirectingFileSystem>
1062 create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
1063 bool UseExternalNames, FileSystem &ExternalFS);
1064
1065 ErrorOr<Status> status(const Twine &Path) override;
1066 bool exists(const Twine &Path) override;
1067 ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
1068
1069 std::error_code getRealPath(const Twine &Path,
1070 SmallVectorImpl<char> &Output) override;
1071
1072 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
1073
1074 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
1075
1076 std::error_code isLocal(const Twine &Path, bool &Result) override;
1077
1078 std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override;
1079
1080 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
1081
1082 void setOverlayFileDir(StringRef PrefixDir);
1083
1084 StringRef getOverlayFileDir() const;
1085
1086 /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly
1087 /// otherwise. Will removed in the future, use \c setRedirection instead.
1088 void setFallthrough(bool Fallthrough);
1089
1090 void setRedirection(RedirectingFileSystem::RedirectKind Kind);
1091
1092 std::vector<llvm::StringRef> getRoots() const;
1093
1094 bool hasBeenUsed() const { return HasBeenUsed; };
1095 void clearHasBeenUsed() { HasBeenUsed = false; }
1096
1097 void setUsageTrackingActive(bool Active) { UsageTrackingActive = Active; }
1098
1099 void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const;
1100
1101protected:
1102 void printImpl(raw_ostream &OS, PrintType Type,
1103 unsigned IndentLevel) const override;
1104 void visitChildFileSystems(VisitCallbackTy Callback) override;
1105};
1106
1107/// Collect all pairs of <virtual path, real path> entries from the
1108/// \p YAMLFilePath. This is used by the module dependency collector to forward
1109/// the entries into the reproducer output VFS YAML file.
1110void collectVFSFromYAML(
1111 std::unique_ptr<llvm::MemoryBuffer> Buffer,
1112 llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
1113 SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
1114 void *DiagContext = nullptr,
1115 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
1116
1117class YAMLVFSWriter {
1118 std::vector<YAMLVFSEntry> Mappings;
1119 std::optional<bool> IsCaseSensitive;
1120 std::optional<bool> IsOverlayRelative;
1121 std::optional<bool> UseExternalNames;
1122 std::string OverlayDir;
1123
1124 void addEntry(StringRef VirtualPath, StringRef RealPath, bool IsDirectory);
1125
1126public:
1127 YAMLVFSWriter() = default;
1128
1129 void addFileMapping(StringRef VirtualPath, StringRef RealPath);
1130 void addDirectoryMapping(StringRef VirtualPath, StringRef RealPath);
1131
1132 void setCaseSensitivity(bool CaseSensitive) {
1133 IsCaseSensitive = CaseSensitive;
1134 }
1135
1136 void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; }
1137
1138 void setOverlayDir(StringRef OverlayDirectory) {
1139 IsOverlayRelative = true;
1140 OverlayDir.assign(str: OverlayDirectory.str());
1141 }
1142
1143 const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; }
1144
1145 void write(llvm::raw_ostream &OS);
1146};
1147
1148/// File system that tracks the number of calls to the underlying file system.
1149/// This is particularly useful when wrapped around \c RealFileSystem to add
1150/// lightweight tracking of expensive syscalls.
1151class TracingFileSystem
1152 : public llvm::RTTIExtends<TracingFileSystem, ProxyFileSystem> {
1153public:
1154 static const char ID;
1155
1156 std::size_t NumStatusCalls = 0;
1157 std::size_t NumOpenFileForReadCalls = 0;
1158 std::size_t NumDirBeginCalls = 0;
1159 std::size_t NumGetRealPathCalls = 0;
1160 std::size_t NumExistsCalls = 0;
1161 std::size_t NumIsLocalCalls = 0;
1162
1163 TracingFileSystem(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
1164 : RTTIExtends(std::move(FS)) {}
1165
1166 ErrorOr<Status> status(const Twine &Path) override {
1167 ++NumStatusCalls;
1168 return ProxyFileSystem::status(Path);
1169 }
1170
1171 ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override {
1172 ++NumOpenFileForReadCalls;
1173 return ProxyFileSystem::openFileForRead(Path);
1174 }
1175
1176 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
1177 ++NumDirBeginCalls;
1178 return ProxyFileSystem::dir_begin(Dir, EC);
1179 }
1180
1181 std::error_code getRealPath(const Twine &Path,
1182 SmallVectorImpl<char> &Output) override {
1183 ++NumGetRealPathCalls;
1184 return ProxyFileSystem::getRealPath(Path, Output);
1185 }
1186
1187 bool exists(const Twine &Path) override {
1188 ++NumExistsCalls;
1189 return ProxyFileSystem::exists(Path);
1190 }
1191
1192 std::error_code isLocal(const Twine &Path, bool &Result) override {
1193 ++NumIsLocalCalls;
1194 return ProxyFileSystem::isLocal(Path, Result);
1195 }
1196
1197protected:
1198 void printImpl(raw_ostream &OS, PrintType Type,
1199 unsigned IndentLevel) const override;
1200};
1201
1202} // namespace vfs
1203} // namespace llvm
1204
1205#endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H
1206

source code of include/llvm-20/llvm/Support/VirtualFileSystem.h