1//===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines the virtual file system interface vfs::FileSystem.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H
15#define LLVM_SUPPORT_VIRTUALFILESYSTEM_H
16
17#include "llvm/ADT/IntrusiveRefCntPtr.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/STLFunctionalExtras.h"
21#include "llvm/Support/Chrono.h"
22#include "llvm/Support/ErrorOr.h"
23#include "llvm/Support/Errc.h"
24#include "llvm/Support/FileSystem.h"
25#include "llvm/Support/Path.h"
26#include "llvm/Support/SourceMgr.h"
27#include <cassert>
28#include <cstdint>
29#include <ctime>
30#include <memory>
31#include <optional>
32#include <stack>
33#include <string>
34#include <system_error>
35#include <utility>
36#include <vector>
37
38namespace llvm {
39
40class MemoryBuffer;
41class MemoryBufferRef;
42class Twine;
43
44namespace vfs {
45
46/// The result of a \p status operation.
47class Status {
48 std::string Name;
49 llvm::sys::fs::UniqueID UID;
50 llvm::sys::TimePoint<> MTime;
51 uint32_t User;
52 uint32_t Group;
53 uint64_t Size;
54 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error;
55 llvm::sys::fs::perms Perms;
56
57public:
58 // FIXME: remove when files support multiple names
59 bool IsVFSMapped = false;
60
61 /// Whether this entity has an external path different from the virtual path,
62 /// and the external path is exposed by leaking it through the abstraction.
63 /// For example, a RedirectingFileSystem will set this for paths where
64 /// UseExternalName is true.
65 ///
66 /// FIXME: Currently the external path is exposed by replacing the virtual
67 /// path in this Status object. Instead, we should leave the path in the
68 /// Status intact (matching the requested virtual path) - see
69 /// FileManager::getFileRef for how how we plan to fix this.
70 bool ExposesExternalVFSPath = false;
71
72 Status() = default;
73 Status(const llvm::sys::fs::file_status &Status);
74 Status(const Twine &Name, llvm::sys::fs::UniqueID UID,
75 llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
76 uint64_t Size, llvm::sys::fs::file_type Type,
77 llvm::sys::fs::perms Perms);
78
79 /// Get a copy of a Status with a different size.
80 static Status copyWithNewSize(const Status &In, uint64_t NewSize);
81 /// Get a copy of a Status with a different name.
82 static Status copyWithNewName(const Status &In, const Twine &NewName);
83 static Status copyWithNewName(const llvm::sys::fs::file_status &In,
84 const Twine &NewName);
85
86 /// Returns the name that should be used for this file or directory.
87 StringRef getName() const { return Name; }
88
89 /// @name Status interface from llvm::sys::fs
90 /// @{
91 llvm::sys::fs::file_type getType() const { return Type; }
92 llvm::sys::fs::perms getPermissions() const { return Perms; }
93 llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; }
94 llvm::sys::fs::UniqueID getUniqueID() const { return UID; }
95 uint32_t getUser() const { return User; }
96 uint32_t getGroup() const { return Group; }
97 uint64_t getSize() const { return Size; }
98 /// @}
99 /// @name Status queries
100 /// These are static queries in llvm::sys::fs.
101 /// @{
102 bool equivalent(const Status &Other) const;
103 bool isDirectory() const;
104 bool isRegularFile() const;
105 bool isOther() const;
106 bool isSymlink() const;
107 bool isStatusKnown() const;
108 bool exists() const;
109 /// @}
110};
111
112/// Represents an open file.
113class File {
114public:
115 /// Destroy the file after closing it (if open).
116 /// Sub-classes should generally call close() inside their destructors. We
117 /// cannot do that from the base class, since close is virtual.
118 virtual ~File();
119
120 /// Get the status of the file.
121 virtual llvm::ErrorOr<Status> status() = 0;
122
123 /// Get the name of the file
124 virtual llvm::ErrorOr<std::string> getName() {
125 if (auto Status = status())
126 return Status->getName().str();
127 else
128 return Status.getError();
129 }
130
131 /// Get the contents of the file as a \p MemoryBuffer.
132 virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
133 getBuffer(const Twine &Name, int64_t FileSize = -1,
134 bool RequiresNullTerminator = true, bool IsVolatile = false) = 0;
135
136 /// Closes the file.
137 virtual std::error_code close() = 0;
138
139 // Get the same file with a different path.
140 static ErrorOr<std::unique_ptr<File>>
141 getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P);
142
143protected:
144 // Set the file's underlying path.
145 virtual void setPath(const Twine &Path) {}
146};
147
148/// A member of a directory, yielded by a directory_iterator.
149/// Only information available on most platforms is included.
150class directory_entry {
151 std::string Path;
152 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown;
153
154public:
155 directory_entry() = default;
156 directory_entry(std::string Path, llvm::sys::fs::file_type Type)
157 : Path(std::move(Path)), Type(Type) {}
158
159 llvm::StringRef path() const { return Path; }
160 llvm::sys::fs::file_type type() const { return Type; }
161};
162
163namespace detail {
164
165/// An interface for virtual file systems to provide an iterator over the
166/// (non-recursive) contents of a directory.
167struct DirIterImpl {
168 virtual ~DirIterImpl();
169
170 /// Sets \c CurrentEntry to the next entry in the directory on success,
171 /// to directory_entry() at end, or returns a system-defined \c error_code.
172 virtual std::error_code increment() = 0;
173
174 directory_entry CurrentEntry;
175};
176
177} // namespace detail
178
179/// An input iterator over the entries in a virtual path, similar to
180/// llvm::sys::fs::directory_iterator.
181class directory_iterator {
182 std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy
183
184public:
185 directory_iterator(std::shared_ptr<detail::DirIterImpl> I)
186 : Impl(std::move(I)) {
187 assert(Impl.get() != nullptr && "requires non-null implementation");
188 if (Impl->CurrentEntry.path().empty())
189 Impl.reset(); // Normalize the end iterator to Impl == nullptr.
190 }
191
192 /// Construct an 'end' iterator.
193 directory_iterator() = default;
194
195 /// Equivalent to operator++, with an error code.
196 directory_iterator &increment(std::error_code &EC) {
197 assert(Impl && "attempting to increment past end");
198 EC = Impl->increment();
199 if (Impl->CurrentEntry.path().empty())
200 Impl.reset(); // Normalize the end iterator to Impl == nullptr.
201 return *this;
202 }
203
204 const directory_entry &operator*() const { return Impl->CurrentEntry; }
205 const directory_entry *operator->() const { return &Impl->CurrentEntry; }
206
207 bool operator==(const directory_iterator &RHS) const {
208 if (Impl && RHS.Impl)
209 return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path();
210 return !Impl && !RHS.Impl;
211 }
212 bool operator!=(const directory_iterator &RHS) const {
213 return !(*this == RHS);
214 }
215};
216
217class FileSystem;
218
219namespace detail {
220
221/// Keeps state for the recursive_directory_iterator.
222struct RecDirIterState {
223 std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
224 bool HasNoPushRequest = false;
225};
226
227} // end namespace detail
228
229/// An input iterator over the recursive contents of a virtual path,
230/// similar to llvm::sys::fs::recursive_directory_iterator.
231class recursive_directory_iterator {
232 FileSystem *FS;
233 std::shared_ptr<detail::RecDirIterState>
234 State; // Input iterator semantics on copy.
235
236public:
237 recursive_directory_iterator(FileSystem &FS, const Twine &Path,
238 std::error_code &EC);
239
240 /// Construct an 'end' iterator.
241 recursive_directory_iterator() = default;
242
243 /// Equivalent to operator++, with an error code.
244 recursive_directory_iterator &increment(std::error_code &EC);
245
246 const directory_entry &operator*() const { return *State->Stack.top(); }
247 const directory_entry *operator->() const { return &*State->Stack.top(); }
248
249 bool operator==(const recursive_directory_iterator &Other) const {
250 return State == Other.State; // identity
251 }
252 bool operator!=(const recursive_directory_iterator &RHS) const {
253 return !(*this == RHS);
254 }
255
256 /// Gets the current level. Starting path is at level 0.
257 int level() const {
258 assert(!State->Stack.empty() &&
259 "Cannot get level without any iteration state");
260 return State->Stack.size() - 1;
261 }
262
263 void no_push() { State->HasNoPushRequest = true; }
264};
265
266/// The virtual file system interface.
267class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> {
268public:
269 virtual ~FileSystem();
270
271 /// Get the status of the entry at \p Path, if one exists.
272 virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0;
273
274 /// Get a \p File object for the file at \p Path, if one exists.
275 virtual llvm::ErrorOr<std::unique_ptr<File>>
276 openFileForRead(const Twine &Path) = 0;
277
278 /// This is a convenience method that opens a file, gets its content and then
279 /// closes the file.
280 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
281 getBufferForFile(const Twine &Name, int64_t FileSize = -1,
282 bool RequiresNullTerminator = true, bool IsVolatile = false);
283
284 /// Get a directory_iterator for \p Dir.
285 /// \note The 'end' iterator is directory_iterator().
286 virtual directory_iterator dir_begin(const Twine &Dir,
287 std::error_code &EC) = 0;
288
289 /// Set the working directory. This will affect all following operations on
290 /// this file system and may propagate down for nested file systems.
291 virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0;
292
293 /// Get the working directory of this file system.
294 virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0;
295
296 /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve
297 /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`.
298 /// This returns errc::operation_not_permitted if not implemented by subclass.
299 virtual std::error_code getRealPath(const Twine &Path,
300 SmallVectorImpl<char> &Output) const;
301
302 /// Check whether a file exists. Provided for convenience.
303 bool exists(const Twine &Path);
304
305 /// Is the file mounted on a local filesystem?
306 virtual std::error_code isLocal(const Twine &Path, bool &Result);
307
308 /// Make \a Path an absolute path.
309 ///
310 /// Makes \a Path absolute using the current directory if it is not already.
311 /// An empty \a Path will result in the current directory.
312 ///
313 /// /absolute/path => /absolute/path
314 /// relative/../path => <current-directory>/relative/../path
315 ///
316 /// \param Path A path that is modified to be an absolute path.
317 /// \returns success if \a path has been made absolute, otherwise a
318 /// platform-specific error_code.
319 virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
320
321 enum class PrintType { Summary, Contents, RecursiveContents };
322 void print(raw_ostream &OS, PrintType Type = PrintType::Contents,
323 unsigned IndentLevel = 0) const {
324 printImpl(OS, Type, IndentLevel);
325 }
326
327#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
328 LLVM_DUMP_METHOD void dump() const;
329#endif
330
331protected:
332 virtual void printImpl(raw_ostream &OS, PrintType Type,
333 unsigned IndentLevel) const {
334 printIndent(OS, IndentLevel);
335 OS << "FileSystem\n";
336 }
337
338 void printIndent(raw_ostream &OS, unsigned IndentLevel) const {
339 for (unsigned i = 0; i < IndentLevel; ++i)
340 OS << " ";
341 }
342};
343
344/// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
345/// the operating system.
346/// The working directory is linked to the process's working directory.
347/// (This is usually thread-hostile).
348IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
349
350/// Create an \p vfs::FileSystem for the 'real' file system, as seen by
351/// the operating system.
352/// It has its own working directory, independent of (but initially equal to)
353/// that of the process.
354std::unique_ptr<FileSystem> createPhysicalFileSystem();
355
356/// A file system that allows overlaying one \p AbstractFileSystem on top
357/// of another.
358///
359/// Consists of a stack of >=1 \p FileSystem objects, which are treated as being
360/// one merged file system. When there is a directory that exists in more than
361/// one file system, the \p OverlayFileSystem contains a directory containing
362/// the union of their contents. The attributes (permissions, etc.) of the
363/// top-most (most recently added) directory are used. When there is a file
364/// that exists in more than one file system, the file in the top-most file
365/// system overrides the other(s).
366class OverlayFileSystem : public FileSystem {
367 using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>;
368
369 /// The stack of file systems, implemented as a list in order of
370 /// their addition.
371 FileSystemList FSList;
372
373public:
374 OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base);
375
376 /// Pushes a file system on top of the stack.
377 void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS);
378
379 llvm::ErrorOr<Status> status(const Twine &Path) override;
380 llvm::ErrorOr<std::unique_ptr<File>>
381 openFileForRead(const Twine &Path) override;
382 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
383 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
384 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
385 std::error_code isLocal(const Twine &Path, bool &Result) override;
386 std::error_code getRealPath(const Twine &Path,
387 SmallVectorImpl<char> &Output) const override;
388
389 using iterator = FileSystemList::reverse_iterator;
390 using const_iterator = FileSystemList::const_reverse_iterator;
391 using reverse_iterator = FileSystemList::iterator;
392 using const_reverse_iterator = FileSystemList::const_iterator;
393 using range = iterator_range<iterator>;
394 using const_range = iterator_range<const_iterator>;
395
396 /// Get an iterator pointing to the most recently added file system.
397 iterator overlays_begin() { return FSList.rbegin(); }
398 const_iterator overlays_begin() const { return FSList.rbegin(); }
399
400 /// Get an iterator pointing one-past the least recently added file system.
401 iterator overlays_end() { return FSList.rend(); }
402 const_iterator overlays_end() const { return FSList.rend(); }
403
404 /// Get an iterator pointing to the least recently added file system.
405 reverse_iterator overlays_rbegin() { return FSList.begin(); }
406 const_reverse_iterator overlays_rbegin() const { return FSList.begin(); }
407
408 /// Get an iterator pointing one-past the most recently added file system.
409 reverse_iterator overlays_rend() { return FSList.end(); }
410 const_reverse_iterator overlays_rend() const { return FSList.end(); }
411
412 range overlays_range() { return llvm::reverse(C&: FSList); }
413 const_range overlays_range() const { return llvm::reverse(C: FSList); }
414
415protected:
416 void printImpl(raw_ostream &OS, PrintType Type,
417 unsigned IndentLevel) const override;
418};
419
420/// By default, this delegates all calls to the underlying file system. This
421/// is useful when derived file systems want to override some calls and still
422/// proxy other calls.
423class ProxyFileSystem : public FileSystem {
424public:
425 explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
426 : FS(std::move(FS)) {}
427
428 llvm::ErrorOr<Status> status(const Twine &Path) override {
429 return FS->status(Path);
430 }
431 llvm::ErrorOr<std::unique_ptr<File>>
432 openFileForRead(const Twine &Path) override {
433 return FS->openFileForRead(Path);
434 }
435 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
436 return FS->dir_begin(Dir, EC);
437 }
438 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
439 return FS->getCurrentWorkingDirectory();
440 }
441 std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
442 return FS->setCurrentWorkingDirectory(Path);
443 }
444 std::error_code getRealPath(const Twine &Path,
445 SmallVectorImpl<char> &Output) const override {
446 return FS->getRealPath(Path, Output);
447 }
448 std::error_code isLocal(const Twine &Path, bool &Result) override {
449 return FS->isLocal(Path, Result);
450 }
451
452protected:
453 FileSystem &getUnderlyingFS() const { return *FS; }
454
455private:
456 IntrusiveRefCntPtr<FileSystem> FS;
457
458 virtual void anchor();
459};
460
461namespace detail {
462
463class InMemoryDirectory;
464class InMemoryNode;
465
466struct NewInMemoryNodeInfo {
467 llvm::sys::fs::UniqueID DirUID;
468 StringRef Path;
469 StringRef Name;
470 time_t ModificationTime;
471 std::unique_ptr<llvm::MemoryBuffer> Buffer;
472 uint32_t User;
473 uint32_t Group;
474 llvm::sys::fs::file_type Type;
475 llvm::sys::fs::perms Perms;
476
477 Status makeStatus() const;
478};
479
480class NamedNodeOrError {
481 ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>>
482 Value;
483
484public:
485 NamedNodeOrError(llvm::SmallString<128> Name,
486 const detail::InMemoryNode *Node)
487 : Value(std::make_pair(x&: Name, y&: Node)) {}
488 NamedNodeOrError(std::error_code EC) : Value(EC) {}
489 NamedNodeOrError(llvm::errc EC) : Value(EC) {}
490
491 StringRef getName() const { return (*Value).first; }
492 explicit operator bool() const { return static_cast<bool>(Value); }
493 operator std::error_code() const { return Value.getError(); }
494 std::error_code getError() const { return Value.getError(); }
495 const detail::InMemoryNode *operator*() const { return (*Value).second; }
496};
497
498} // namespace detail
499
500/// An in-memory file system.
501class InMemoryFileSystem : public FileSystem {
502 std::unique_ptr<detail::InMemoryDirectory> Root;
503 std::string WorkingDirectory;
504 bool UseNormalizedPaths = true;
505
506 using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>(
507 detail::NewInMemoryNodeInfo)>;
508
509 /// Create node with \p MakeNode and add it into this filesystem at \p Path.
510 bool addFile(const Twine &Path, time_t ModificationTime,
511 std::unique_ptr<llvm::MemoryBuffer> Buffer,
512 std::optional<uint32_t> User, std::optional<uint32_t> Group,
513 std::optional<llvm::sys::fs::file_type> Type,
514 std::optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode);
515
516 /// Looks up the in-memory node for the path \p P.
517 /// If \p FollowFinalSymlink is true, the returned node is guaranteed to
518 /// not be a symlink and its path may differ from \p P.
519 detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink,
520 size_t SymlinkDepth = 0) const;
521
522 class DirIterator;
523
524public:
525 explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
526 ~InMemoryFileSystem() override;
527
528 /// Add a file containing a buffer or a directory to the VFS with a
529 /// path. The VFS owns the buffer. If present, User, Group, Type
530 /// and Perms apply to the newly-created file or directory.
531 /// \return true if the file or directory was successfully added,
532 /// false if the file or directory already exists in the file system with
533 /// different contents.
534 bool addFile(const Twine &Path, time_t ModificationTime,
535 std::unique_ptr<llvm::MemoryBuffer> Buffer,
536 std::optional<uint32_t> User = std::nullopt,
537 std::optional<uint32_t> Group = std::nullopt,
538 std::optional<llvm::sys::fs::file_type> Type = std::nullopt,
539 std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
540
541 /// Add a hard link to a file.
542 ///
543 /// Here hard links are not intended to be fully equivalent to the classical
544 /// filesystem. Both the hard link and the file share the same buffer and
545 /// status (and thus have the same UniqueID). Because of this there is no way
546 /// to distinguish between the link and the file after the link has been
547 /// added.
548 ///
549 /// The \p Target path must be an existing file or a hardlink. The
550 /// \p NewLink file must not have been added before. The \p Target
551 /// path must not be a directory. The \p NewLink node is added as a hard
552 /// link which points to the resolved file of \p Target node.
553 /// \return true if the above condition is satisfied and hardlink was
554 /// successfully created, false otherwise.
555 bool addHardLink(const Twine &NewLink, const Twine &Target);
556
557 /// Arbitrary max depth to search through symlinks. We can get into problems
558 /// if a link links to a link that links back to the link, for example.
559 static constexpr size_t MaxSymlinkDepth = 16;
560
561 /// Add a symbolic link. Unlike a HardLink, because \p Target doesn't need
562 /// to refer to a file (or refer to anything, as it happens). Also, an
563 /// in-memory directory for \p Target isn't automatically created.
564 bool
565 addSymbolicLink(const Twine &NewLink, const Twine &Target,
566 time_t ModificationTime,
567 std::optional<uint32_t> User = std::nullopt,
568 std::optional<uint32_t> Group = std::nullopt,
569 std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
570
571 /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
572 /// If present, User, Group, Type and Perms apply to the newly-created file
573 /// or directory.
574 /// \return true if the file or directory was successfully added,
575 /// false if the file or directory already exists in the file system with
576 /// different contents.
577 bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
578 const llvm::MemoryBufferRef &Buffer,
579 std::optional<uint32_t> User = std::nullopt,
580 std::optional<uint32_t> Group = std::nullopt,
581 std::optional<llvm::sys::fs::file_type> Type = std::nullopt,
582 std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
583
584 std::string toString() const;
585
586 /// Return true if this file system normalizes . and .. in paths.
587 bool useNormalizedPaths() const { return UseNormalizedPaths; }
588
589 llvm::ErrorOr<Status> status(const Twine &Path) override;
590 llvm::ErrorOr<std::unique_ptr<File>>
591 openFileForRead(const Twine &Path) override;
592 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
593
594 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
595 return WorkingDirectory;
596 }
597 /// Canonicalizes \p Path by combining with the current working
598 /// directory and normalizing the path (e.g. remove dots). If the current
599 /// working directory is not set, this returns errc::operation_not_permitted.
600 ///
601 /// This doesn't resolve symlinks as they are not supported in in-memory file
602 /// system.
603 std::error_code getRealPath(const Twine &Path,
604 SmallVectorImpl<char> &Output) const override;
605 std::error_code isLocal(const Twine &Path, bool &Result) override;
606 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
607
608protected:
609 void printImpl(raw_ostream &OS, PrintType Type,
610 unsigned IndentLevel) const override;
611};
612
613/// Get a globally unique ID for a virtual file or directory.
614llvm::sys::fs::UniqueID getNextVirtualUniqueID();
615
616/// Gets a \p FileSystem for a virtual file system described in YAML
617/// format.
618std::unique_ptr<FileSystem>
619getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer,
620 llvm::SourceMgr::DiagHandlerTy DiagHandler,
621 StringRef YAMLFilePath, void *DiagContext = nullptr,
622 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
623
624struct YAMLVFSEntry {
625 template <typename T1, typename T2>
626 YAMLVFSEntry(T1 &&VPath, T2 &&RPath, bool IsDirectory = false)
627 : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)),
628 IsDirectory(IsDirectory) {}
629 std::string VPath;
630 std::string RPath;
631 bool IsDirectory = false;
632};
633
634class RedirectingFSDirIterImpl;
635class RedirectingFileSystemParser;
636
637/// A virtual file system parsed from a YAML file.
638///
639/// Currently, this class allows creating virtual files and directories. Virtual
640/// files map to existing external files in \c ExternalFS, and virtual
641/// directories may either map to existing directories in \c ExternalFS or list
642/// their contents in the form of other virtual directories and/or files.
643///
644/// The basic structure of the parsed file is:
645/// \verbatim
646/// {
647/// 'version': <version number>,
648/// <optional configuration>
649/// 'roots': [
650/// <directory entries>
651/// ]
652/// }
653/// \endverbatim
654///
655/// The roots may be absolute or relative. If relative they will be made
656/// absolute against either current working directory or the directory where
657/// the Overlay YAML file is located, depending on the 'root-relative'
658/// configuration.
659///
660/// All configuration options are optional.
661/// 'case-sensitive': <boolean, default=(true for Posix, false for Windows)>
662/// 'use-external-names': <boolean, default=true>
663/// 'root-relative': <string, one of 'cwd' or 'overlay-dir', default='cwd'>
664/// 'overlay-relative': <boolean, default=false>
665/// 'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with'
666/// instead>
667/// 'redirecting-with': <string, one of 'fallthrough', 'fallback', or
668/// 'redirect-only', default='fallthrough'>
669///
670/// To clarify, 'root-relative' option will prepend the current working
671/// directory, or the overlay directory to the 'roots->name' field only if
672/// 'roots->name' is a relative path. On the other hand, when 'overlay-relative'
673/// is set to 'true', external paths will always be prepended with the overlay
674/// directory, even if external paths are not relative paths. The
675/// 'root-relative' option has no interaction with the 'overlay-relative'
676/// option.
677///
678/// Virtual directories that list their contents are represented as
679/// \verbatim
680/// {
681/// 'type': 'directory',
682/// 'name': <string>,
683/// 'contents': [ <file or directory entries> ]
684/// }
685/// \endverbatim
686///
687/// The default attributes for such virtual directories are:
688/// \verbatim
689/// MTime = now() when created
690/// Perms = 0777
691/// User = Group = 0
692/// Size = 0
693/// UniqueID = unspecified unique value
694/// \endverbatim
695///
696/// When a path prefix matches such a directory, the next component in the path
697/// is matched against the entries in the 'contents' array.
698///
699/// Re-mapped directories, on the other hand, are represented as
700/// /// \verbatim
701/// {
702/// 'type': 'directory-remap',
703/// 'name': <string>,
704/// 'use-external-name': <boolean>, # Optional
705/// 'external-contents': <path to external directory>
706/// }
707/// \endverbatim
708///
709/// and inherit their attributes from the external directory. When a path
710/// prefix matches such an entry, the unmatched components are appended to the
711/// 'external-contents' path, and the resulting path is looked up in the
712/// external file system instead.
713///
714/// Re-mapped files are represented as
715/// \verbatim
716/// {
717/// 'type': 'file',
718/// 'name': <string>,
719/// 'use-external-name': <boolean>, # Optional
720/// 'external-contents': <path to external file>
721/// }
722/// \endverbatim
723///
724/// Their attributes and file contents are determined by looking up the file at
725/// their 'external-contents' path in the external file system.
726///
727/// For 'file', 'directory' and 'directory-remap' entries the 'name' field may
728/// contain multiple path components (e.g. /path/to/file). However, any
729/// directory in such a path that contains more than one child must be uniquely
730/// represented by a 'directory' entry.
731///
732/// When the 'use-external-name' field is set, calls to \a vfs::File::status()
733/// give the external (remapped) filesystem name instead of the name the file
734/// was accessed by. This is an intentional leak through the \a
735/// RedirectingFileSystem abstraction layer. It enables clients to discover
736/// (and use) the external file location when communicating with users or tools
737/// that don't use the same VFS overlay.
738///
739/// FIXME: 'use-external-name' causes behaviour that's inconsistent with how
740/// "real" filesystems behave. Maybe there should be a separate channel for
741/// this information.
742class RedirectingFileSystem : public vfs::FileSystem {
743public:
744 enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
745 enum NameKind { NK_NotSet, NK_External, NK_Virtual };
746
747 /// The type of redirection to perform.
748 enum class RedirectKind {
749 /// Lookup the redirected path first (ie. the one specified in
750 /// 'external-contents') and if that fails "fallthrough" to a lookup of the
751 /// originally provided path.
752 Fallthrough,
753 /// Lookup the provided path first and if that fails, "fallback" to a
754 /// lookup of the redirected path.
755 Fallback,
756 /// Only lookup the redirected path, do not lookup the originally provided
757 /// path.
758 RedirectOnly
759 };
760
761 /// The type of relative path used by Roots.
762 enum class RootRelativeKind {
763 /// The roots are relative to the current working directory.
764 CWD,
765 /// The roots are relative to the directory where the Overlay YAML file
766 // locates.
767 OverlayDir
768 };
769
770 /// A single file or directory in the VFS.
771 class Entry {
772 EntryKind Kind;
773 std::string Name;
774
775 public:
776 Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {}
777 virtual ~Entry() = default;
778
779 StringRef getName() const { return Name; }
780 EntryKind getKind() const { return Kind; }
781 };
782
783 /// A directory in the vfs with explicitly specified contents.
784 class DirectoryEntry : public Entry {
785 std::vector<std::unique_ptr<Entry>> Contents;
786 Status S;
787
788 public:
789 /// Constructs a directory entry with explicitly specified contents.
790 DirectoryEntry(StringRef Name, std::vector<std::unique_ptr<Entry>> Contents,
791 Status S)
792 : Entry(EK_Directory, Name), Contents(std::move(Contents)),
793 S(std::move(S)) {}
794
795 /// Constructs an empty directory entry.
796 DirectoryEntry(StringRef Name, Status S)
797 : Entry(EK_Directory, Name), S(std::move(S)) {}
798
799 Status getStatus() { return S; }
800
801 void addContent(std::unique_ptr<Entry> Content) {
802 Contents.push_back(x: std::move(Content));
803 }
804
805 Entry *getLastContent() const { return Contents.back().get(); }
806
807 using iterator = decltype(Contents)::iterator;
808
809 iterator contents_begin() { return Contents.begin(); }
810 iterator contents_end() { return Contents.end(); }
811
812 static bool classof(const Entry *E) { return E->getKind() == EK_Directory; }
813 };
814
815 /// A file or directory in the vfs that is mapped to a file or directory in
816 /// the external filesystem.
817 class RemapEntry : public Entry {
818 std::string ExternalContentsPath;
819 NameKind UseName;
820
821 protected:
822 RemapEntry(EntryKind K, StringRef Name, StringRef ExternalContentsPath,
823 NameKind UseName)
824 : Entry(K, Name), ExternalContentsPath(ExternalContentsPath),
825 UseName(UseName) {}
826
827 public:
828 StringRef getExternalContentsPath() const { return ExternalContentsPath; }
829
830 /// Whether to use the external path as the name for this file or directory.
831 bool useExternalName(bool GlobalUseExternalName) const {
832 return UseName == NK_NotSet ? GlobalUseExternalName
833 : (UseName == NK_External);
834 }
835
836 NameKind getUseName() const { return UseName; }
837
838 static bool classof(const Entry *E) {
839 switch (E->getKind()) {
840 case EK_DirectoryRemap:
841 [[fallthrough]];
842 case EK_File:
843 return true;
844 case EK_Directory:
845 return false;
846 }
847 llvm_unreachable("invalid entry kind");
848 }
849 };
850
851 /// A directory in the vfs that maps to a directory in the external file
852 /// system.
853 class DirectoryRemapEntry : public RemapEntry {
854 public:
855 DirectoryRemapEntry(StringRef Name, StringRef ExternalContentsPath,
856 NameKind UseName)
857 : RemapEntry(EK_DirectoryRemap, Name, ExternalContentsPath, UseName) {}
858
859 static bool classof(const Entry *E) {
860 return E->getKind() == EK_DirectoryRemap;
861 }
862 };
863
864 /// A file in the vfs that maps to a file in the external file system.
865 class FileEntry : public RemapEntry {
866 public:
867 FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName)
868 : RemapEntry(EK_File, Name, ExternalContentsPath, UseName) {}
869
870 static bool classof(const Entry *E) { return E->getKind() == EK_File; }
871 };
872
873 /// Represents the result of a path lookup into the RedirectingFileSystem.
874 struct LookupResult {
875 /// Chain of parent directory entries for \c E.
876 llvm::SmallVector<Entry *, 32> Parents;
877
878 /// The entry the looked-up path corresponds to.
879 Entry *E;
880
881 private:
882 /// When the found Entry is a DirectoryRemapEntry, stores the path in the
883 /// external file system that the looked-up path in the virtual file system
884 // corresponds to.
885 std::optional<std::string> ExternalRedirect;
886
887 public:
888 LookupResult(Entry *E, sys::path::const_iterator Start,
889 sys::path::const_iterator End);
890
891 /// If the found Entry maps the the input path to a path in the external
892 /// file system (i.e. it is a FileEntry or DirectoryRemapEntry), returns
893 /// that path.
894 std::optional<StringRef> getExternalRedirect() const {
895 if (isa<DirectoryRemapEntry>(Val: E))
896 return StringRef(*ExternalRedirect);
897 if (auto *FE = dyn_cast<FileEntry>(Val: E))
898 return FE->getExternalContentsPath();
899 return std::nullopt;
900 }
901
902 /// Get the (canonical) path of the found entry. This uses the as-written
903 /// path components from the VFS specification.
904 void getPath(llvm::SmallVectorImpl<char> &Path) const;
905 };
906
907private:
908 friend class RedirectingFSDirIterImpl;
909 friend class RedirectingFileSystemParser;
910
911 /// Canonicalize path by removing ".", "..", "./", components. This is
912 /// a VFS request, do not bother about symlinks in the path components
913 /// but canonicalize in order to perform the correct entry search.
914 std::error_code makeCanonical(SmallVectorImpl<char> &Path) const;
915
916 /// Get the File status, or error, from the underlying external file system.
917 /// This returns the status with the originally requested name, while looking
918 /// up the entry using the canonical path.
919 ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath,
920 const Twine &OriginalPath) const;
921
922 /// Make \a Path an absolute path.
923 ///
924 /// Makes \a Path absolute using the \a WorkingDir if it is not already.
925 ///
926 /// /absolute/path => /absolute/path
927 /// relative/../path => <WorkingDir>/relative/../path
928 ///
929 /// \param WorkingDir A path that will be used as the base Dir if \a Path
930 /// is not already absolute.
931 /// \param Path A path that is modified to be an absolute path.
932 /// \returns success if \a path has been made absolute, otherwise a
933 /// platform-specific error_code.
934 std::error_code makeAbsolute(StringRef WorkingDir,
935 SmallVectorImpl<char> &Path) const;
936
937 // In a RedirectingFileSystem, keys can be specified in Posix or Windows
938 // style (or even a mixture of both), so this comparison helper allows
939 // slashes (representing a root) to match backslashes (and vice versa). Note
940 // that, other than the root, path components should not contain slashes or
941 // backslashes.
942 bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const {
943 if ((CaseSensitive ? lhs.equals(RHS: rhs) : lhs.equals_insensitive(RHS: rhs)))
944 return true;
945 return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/");
946 }
947
948 /// The root(s) of the virtual file system.
949 std::vector<std::unique_ptr<Entry>> Roots;
950
951 /// The current working directory of the file system.
952 std::string WorkingDirectory;
953
954 /// The file system to use for external references.
955 IntrusiveRefCntPtr<FileSystem> ExternalFS;
956
957 /// This represents the directory path that the YAML file is located.
958 /// This will be prefixed to each 'external-contents' if IsRelativeOverlay
959 /// is set. This will also be prefixed to each 'roots->name' if RootRelative
960 /// is set to RootRelativeKind::OverlayDir and the path is relative.
961 std::string OverlayFileDir;
962
963 /// @name Configuration
964 /// @{
965
966 /// Whether to perform case-sensitive comparisons.
967 ///
968 /// Currently, case-insensitive matching only works correctly with ASCII.
969 bool CaseSensitive = is_style_posix(S: sys::path::Style::native);
970
971 /// IsRelativeOverlay marks whether a OverlayFileDir path must
972 /// be prefixed in every 'external-contents' when reading from YAML files.
973 bool IsRelativeOverlay = false;
974
975 /// Whether to use to use the value of 'external-contents' for the
976 /// names of files. This global value is overridable on a per-file basis.
977 bool UseExternalNames = true;
978
979 /// Determines the lookups to perform, as well as their order. See
980 /// \c RedirectKind for details.
981 RedirectKind Redirection = RedirectKind::Fallthrough;
982
983 /// Determine the prefix directory if the roots are relative paths. See
984 /// \c RootRelativeKind for details.
985 RootRelativeKind RootRelative = RootRelativeKind::CWD;
986 /// @}
987
988 RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS);
989
990 /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly recursing
991 /// into the contents of \p From if it is a directory. Returns a LookupResult
992 /// giving the matched entry and, if that entry is a FileEntry or
993 /// DirectoryRemapEntry, the path it redirects to in the external file system.
994 ErrorOr<LookupResult>
995 lookupPathImpl(llvm::sys::path::const_iterator Start,
996 llvm::sys::path::const_iterator End, Entry *From,
997 llvm::SmallVectorImpl<Entry *> &Entries) const;
998
999 /// Get the status for a path with the provided \c LookupResult.
1000 ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
1001 const LookupResult &Result);
1002
1003public:
1004 /// Looks up \p Path in \c Roots and returns a LookupResult giving the
1005 /// matched entry and, if the entry was a FileEntry or DirectoryRemapEntry,
1006 /// the path it redirects to in the external file system.
1007 ErrorOr<LookupResult> lookupPath(StringRef Path) const;
1008
1009 /// Parses \p Buffer, which is expected to be in YAML format and
1010 /// returns a virtual file system representing its contents.
1011 static std::unique_ptr<RedirectingFileSystem>
1012 create(std::unique_ptr<MemoryBuffer> Buffer,
1013 SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
1014 void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
1015
1016 /// Redirect each of the remapped files from first to second.
1017 static std::unique_ptr<RedirectingFileSystem>
1018 create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
1019 bool UseExternalNames, FileSystem &ExternalFS);
1020
1021 ErrorOr<Status> status(const Twine &Path) override;
1022 ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
1023
1024 std::error_code getRealPath(const Twine &Path,
1025 SmallVectorImpl<char> &Output) const override;
1026
1027 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
1028
1029 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
1030
1031 std::error_code isLocal(const Twine &Path, bool &Result) override;
1032
1033 std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override;
1034
1035 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
1036
1037 void setOverlayFileDir(StringRef PrefixDir);
1038
1039 StringRef getOverlayFileDir() const;
1040
1041 /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly
1042 /// otherwise. Will removed in the future, use \c setRedirection instead.
1043 void setFallthrough(bool Fallthrough);
1044
1045 void setRedirection(RedirectingFileSystem::RedirectKind Kind);
1046
1047 std::vector<llvm::StringRef> getRoots() const;
1048
1049 void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const;
1050
1051protected:
1052 void printImpl(raw_ostream &OS, PrintType Type,
1053 unsigned IndentLevel) const override;
1054};
1055
1056/// Collect all pairs of <virtual path, real path> entries from the
1057/// \p YAMLFilePath. This is used by the module dependency collector to forward
1058/// the entries into the reproducer output VFS YAML file.
1059void collectVFSFromYAML(
1060 std::unique_ptr<llvm::MemoryBuffer> Buffer,
1061 llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
1062 SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
1063 void *DiagContext = nullptr,
1064 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
1065
1066class YAMLVFSWriter {
1067 std::vector<YAMLVFSEntry> Mappings;
1068 std::optional<bool> IsCaseSensitive;
1069 std::optional<bool> IsOverlayRelative;
1070 std::optional<bool> UseExternalNames;
1071 std::string OverlayDir;
1072
1073 void addEntry(StringRef VirtualPath, StringRef RealPath, bool IsDirectory);
1074
1075public:
1076 YAMLVFSWriter() = default;
1077
1078 void addFileMapping(StringRef VirtualPath, StringRef RealPath);
1079 void addDirectoryMapping(StringRef VirtualPath, StringRef RealPath);
1080
1081 void setCaseSensitivity(bool CaseSensitive) {
1082 IsCaseSensitive = CaseSensitive;
1083 }
1084
1085 void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; }
1086
1087 void setOverlayDir(StringRef OverlayDirectory) {
1088 IsOverlayRelative = true;
1089 OverlayDir.assign(str: OverlayDirectory.str());
1090 }
1091
1092 const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; }
1093
1094 void write(llvm::raw_ostream &OS);
1095};
1096
1097} // namespace vfs
1098} // namespace llvm
1099
1100#endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H
1101

source code of include/llvm-17/llvm/Support/VirtualFileSystem.h