| 1 | //===-- FileCollector.h -----------------------------------------*- C++ -*-===// | 
| 2 | // | 
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
| 6 | // | 
| 7 | //===----------------------------------------------------------------------===// | 
| 8 |  | 
| 9 | #ifndef LLVM_SUPPORT_FILECOLLECTOR_H | 
| 10 | #define LLVM_SUPPORT_FILECOLLECTOR_H | 
| 11 |  | 
| 12 | #include "llvm/ADT/StringMap.h" | 
| 13 | #include "llvm/ADT/StringSet.h" | 
| 14 | #include "llvm/Support/VirtualFileSystem.h" | 
| 15 | #include <mutex> | 
| 16 | #include <string> | 
| 17 |  | 
| 18 | namespace llvm { | 
| 19 | class FileCollectorFileSystem; | 
| 20 | class Twine; | 
| 21 |  | 
| 22 | class FileCollectorBase { | 
| 23 | public: | 
| 24 |   FileCollectorBase(); | 
| 25 |   virtual ~FileCollectorBase(); | 
| 26 |  | 
| 27 |   void addFile(const Twine &file); | 
| 28 |   void addDirectory(const Twine &Dir); | 
| 29 |  | 
| 30 | protected: | 
| 31 |   bool markAsSeen(StringRef Path) { | 
| 32 |     if (Path.empty()) | 
| 33 |       return false; | 
| 34 |     return Seen.insert(key: Path).second; | 
| 35 |   } | 
| 36 |  | 
| 37 |   virtual void addFileImpl(StringRef SrcPath) = 0; | 
| 38 |  | 
| 39 |   virtual llvm::vfs::directory_iterator | 
| 40 |   addDirectoryImpl(const llvm::Twine &Dir, | 
| 41 |                    IntrusiveRefCntPtr<vfs::FileSystem> FS, | 
| 42 |                    std::error_code &EC) = 0; | 
| 43 |  | 
| 44 |   /// Synchronizes access to internal data structures. | 
| 45 |   std::mutex Mutex; | 
| 46 |  | 
| 47 |   /// Tracks already seen files so they can be skipped. | 
| 48 |   StringSet<> Seen; | 
| 49 | }; | 
| 50 |  | 
| 51 | /// Captures file system interaction and generates data to be later replayed | 
| 52 | /// with the RedirectingFileSystem. | 
| 53 | /// | 
| 54 | /// For any file that gets accessed we eventually create: | 
| 55 | /// - a copy of the file inside Root | 
| 56 | /// - a record in RedirectingFileSystem mapping that maps: | 
| 57 | ///   current real path -> path to the copy in Root | 
| 58 | /// | 
| 59 | /// That intent is that later when the mapping is used by RedirectingFileSystem | 
| 60 | /// it simulates the state of FS that we collected. | 
| 61 | /// | 
| 62 | /// We generate file copies and mapping lazily - see writeMapping and copyFiles. | 
| 63 | /// We don't try to capture the state of the file at the exact time when it's | 
| 64 | /// accessed. Files might get changed, deleted ... we record only the "final" | 
| 65 | /// state. | 
| 66 | /// | 
| 67 | /// In order to preserve the relative topology of files we use their real paths | 
| 68 | /// as relative paths inside of the Root. | 
| 69 | class FileCollector : public FileCollectorBase { | 
| 70 | public: | 
| 71 |   /// Helper utility that encapsulates the logic for canonicalizing a virtual | 
| 72 |   /// path and a path to copy from. | 
| 73 |   class PathCanonicalizer { | 
| 74 |   public: | 
| 75 |     struct PathStorage { | 
| 76 |       SmallString<256> CopyFrom; | 
| 77 |       SmallString<256> VirtualPath; | 
| 78 |     }; | 
| 79 |  | 
| 80 |     /// Canonicalize a pair of virtual and real paths. | 
| 81 |     PathStorage canonicalize(StringRef SrcPath); | 
| 82 |  | 
| 83 |   private: | 
| 84 |     /// Replace with a (mostly) real path, or don't modify. Resolves symlinks | 
| 85 |     /// in the directory, using \a CachedDirs to avoid redundant lookups, but | 
| 86 |     /// leaves the filename as a possible symlink. | 
| 87 |     void updateWithRealPath(SmallVectorImpl<char> &Path); | 
| 88 |  | 
| 89 |     StringMap<std::string> CachedDirs; | 
| 90 |   }; | 
| 91 |  | 
| 92 |   /// \p Root is the directory where collected files are will be stored. | 
| 93 |   /// \p OverlayRoot is VFS mapping root. | 
| 94 |   /// \p Root directory gets created in copyFiles unless it already exists. | 
| 95 |   FileCollector(std::string Root, std::string OverlayRoot); | 
| 96 |  | 
| 97 |   /// Write the yaml mapping (for the VFS) to the given file. | 
| 98 |   std::error_code writeMapping(StringRef MappingFile); | 
| 99 |  | 
| 100 |   /// Copy the files into the root directory. | 
| 101 |   /// | 
| 102 |   /// When StopOnError is true (the default) we abort as soon as one file | 
| 103 |   /// cannot be copied. This is relatively common, for example when a file was | 
| 104 |   /// removed after it was added to the mapping. | 
| 105 |   std::error_code copyFiles(bool StopOnError = true); | 
| 106 |  | 
| 107 |   /// Create a VFS that uses \p Collector to collect files accessed via \p | 
| 108 |   /// BaseFS. | 
| 109 |   static IntrusiveRefCntPtr<vfs::FileSystem> | 
| 110 |   createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS, | 
| 111 |                      std::shared_ptr<FileCollector> Collector); | 
| 112 |  | 
| 113 | private: | 
| 114 |   friend FileCollectorFileSystem; | 
| 115 |  | 
| 116 |   void addFileToMapping(StringRef VirtualPath, StringRef RealPath) { | 
| 117 |     if (sys::fs::is_directory(Path: VirtualPath)) | 
| 118 |       VFSWriter.addDirectoryMapping(VirtualPath, RealPath); | 
| 119 |     else | 
| 120 |       VFSWriter.addFileMapping(VirtualPath, RealPath); | 
| 121 |   } | 
| 122 |  | 
| 123 | protected: | 
| 124 |   void addFileImpl(StringRef SrcPath) override; | 
| 125 |  | 
| 126 |   llvm::vfs::directory_iterator | 
| 127 |   addDirectoryImpl(const llvm::Twine &Dir, | 
| 128 |                    IntrusiveRefCntPtr<vfs::FileSystem> FS, | 
| 129 |                    std::error_code &EC) override; | 
| 130 |  | 
| 131 |   /// The directory where collected files are copied to in copyFiles(). | 
| 132 |   const std::string Root; | 
| 133 |  | 
| 134 |   /// The root directory where the VFS overlay lives. | 
| 135 |   const std::string OverlayRoot; | 
| 136 |  | 
| 137 |   /// The yaml mapping writer. | 
| 138 |   vfs::YAMLVFSWriter VFSWriter; | 
| 139 |  | 
| 140 |   /// Helper utility for canonicalizing paths. | 
| 141 |   PathCanonicalizer Canonicalizer; | 
| 142 | }; | 
| 143 |  | 
| 144 | } // end namespace llvm | 
| 145 |  | 
| 146 | #endif // LLVM_SUPPORT_FILECOLLECTOR_H | 
| 147 |  |