1 | //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H |
10 | #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H |
11 | |
12 | #include "clang/Basic/LLVM.h" |
13 | #include "clang/Basic/SourceManager.h" |
14 | #include "clang/Frontend/CompilerInvocation.h" |
15 | #include "clang/Frontend/Utils.h" |
16 | #include "clang/Lex/HeaderSearch.h" |
17 | #include "clang/Lex/PPCallbacks.h" |
18 | #include "clang/Serialization/ASTReader.h" |
19 | #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" |
20 | #include "llvm/ADT/DenseMap.h" |
21 | #include "llvm/ADT/Hashing.h" |
22 | #include "llvm/ADT/StringSet.h" |
23 | #include "llvm/Support/raw_ostream.h" |
24 | #include <optional> |
25 | #include <string> |
26 | #include <unordered_map> |
27 | #include <variant> |
28 | |
29 | namespace clang { |
30 | namespace tooling { |
31 | namespace dependencies { |
32 | |
33 | class DependencyActionController; |
34 | class DependencyConsumer; |
35 | |
36 | /// Modular dependency that has already been built prior to the dependency scan. |
37 | struct PrebuiltModuleDep { |
38 | std::string ModuleName; |
39 | std::string PCMFile; |
40 | std::string ModuleMapFile; |
41 | |
42 | explicit PrebuiltModuleDep(const Module *M) |
43 | : ModuleName(M->getTopLevelModuleName()), |
44 | PCMFile(M->getASTFile()->getName()), |
45 | ModuleMapFile(M->PresumedModuleMapFile) {} |
46 | }; |
47 | |
48 | /// This is used to identify a specific module. |
49 | struct ModuleID { |
50 | /// The name of the module. This may include `:` for C++20 module partitions, |
51 | /// or a header-name for C++20 header units. |
52 | std::string ModuleName; |
53 | |
54 | /// The context hash of a module represents the compiler options that affect |
55 | /// the resulting command-line invocation. |
56 | /// |
57 | /// Modules with the same name and ContextHash but different invocations could |
58 | /// cause non-deterministic build results. |
59 | /// |
60 | /// Modules with the same name but a different \c ContextHash should be |
61 | /// treated as separate modules for the purpose of a build. |
62 | std::string ContextHash; |
63 | |
64 | bool operator==(const ModuleID &Other) const { |
65 | return std::tie(args: ModuleName, args: ContextHash) == |
66 | std::tie(args: Other.ModuleName, args: Other.ContextHash); |
67 | } |
68 | |
69 | bool operator<(const ModuleID& Other) const { |
70 | return std::tie(args: ModuleName, args: ContextHash) < |
71 | std::tie(args: Other.ModuleName, args: Other.ContextHash); |
72 | } |
73 | }; |
74 | |
75 | /// P1689ModuleInfo - Represents the needed information of standard C++20 |
76 | /// modules for P1689 format. |
77 | struct P1689ModuleInfo { |
78 | /// The name of the module. This may include `:` for partitions. |
79 | std::string ModuleName; |
80 | |
81 | /// Optional. The source path to the module. |
82 | std::string SourcePath; |
83 | |
84 | /// If this module is a standard c++ interface unit. |
85 | bool IsStdCXXModuleInterface = true; |
86 | |
87 | enum class ModuleType { |
88 | NamedCXXModule |
89 | // To be supported |
90 | // AngleHeaderUnit, |
91 | // QuoteHeaderUnit |
92 | }; |
93 | ModuleType Type = ModuleType::NamedCXXModule; |
94 | }; |
95 | |
96 | /// An output from a module compilation, such as the path of the module file. |
97 | enum class ModuleOutputKind { |
98 | /// The module file (.pcm). Required. |
99 | ModuleFile, |
100 | /// The path of the dependency file (.d), if any. |
101 | DependencyFile, |
102 | /// The null-separated list of names to use as the targets in the dependency |
103 | /// file, if any. Defaults to the value of \c ModuleFile, as in the driver. |
104 | DependencyTargets, |
105 | /// The path of the serialized diagnostic file (.dia), if any. |
106 | DiagnosticSerializationFile, |
107 | }; |
108 | |
109 | struct ModuleDeps { |
110 | /// The identifier of the module. |
111 | ModuleID ID; |
112 | |
113 | /// Whether this is a "system" module. |
114 | bool IsSystem; |
115 | |
116 | /// The path to the modulemap file which defines this module. |
117 | /// |
118 | /// This can be used to explicitly build this module. This file will |
119 | /// additionally appear in \c FileDeps as a dependency. |
120 | std::string ClangModuleMapFile; |
121 | |
122 | /// A collection of absolute paths to files that this module directly depends |
123 | /// on, not including transitive dependencies. |
124 | llvm::StringSet<> FileDeps; |
125 | |
126 | /// A collection of absolute paths to module map files that this module needs |
127 | /// to know about. The ordering is significant. |
128 | std::vector<std::string> ModuleMapFileDeps; |
129 | |
130 | /// A collection of prebuilt modular dependencies this module directly depends |
131 | /// on, not including transitive dependencies. |
132 | std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; |
133 | |
134 | /// A list of module identifiers this module directly depends on, not |
135 | /// including transitive dependencies. |
136 | /// |
137 | /// This may include modules with a different context hash when it can be |
138 | /// determined that the differences are benign for this compilation. |
139 | std::vector<ModuleID> ClangModuleDeps; |
140 | |
141 | /// Get (or compute) the compiler invocation that can be used to build this |
142 | /// module. Does not include argv[0]. |
143 | const std::vector<std::string> &getBuildArguments(); |
144 | |
145 | private: |
146 | friend class ModuleDepCollectorPP; |
147 | |
148 | std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>> |
149 | BuildInfo; |
150 | }; |
151 | |
152 | using PrebuiltModuleVFSMapT = llvm::StringMap<llvm::StringSet<>>; |
153 | |
154 | class ModuleDepCollector; |
155 | |
156 | /// Callback that records textual includes and direct modular includes/imports |
157 | /// during preprocessing. At the end of the main file, it also collects |
158 | /// transitive modular dependencies and passes everything to the |
159 | /// \c DependencyConsumer of the parent \c ModuleDepCollector. |
160 | class ModuleDepCollectorPP final : public PPCallbacks { |
161 | public: |
162 | ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} |
163 | |
164 | void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, |
165 | SrcMgr::CharacteristicKind FileType, FileID PrevFID, |
166 | SourceLocation Loc) override; |
167 | void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, |
168 | StringRef FileName, bool IsAngled, |
169 | CharSourceRange FilenameRange, |
170 | OptionalFileEntryRef File, StringRef SearchPath, |
171 | StringRef RelativePath, const Module *SuggestedModule, |
172 | bool ModuleImported, |
173 | SrcMgr::CharacteristicKind FileType) override; |
174 | void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, |
175 | const Module *Imported) override; |
176 | |
177 | void EndOfMainFile() override; |
178 | |
179 | private: |
180 | /// The parent dependency collector. |
181 | ModuleDepCollector &MDC; |
182 | |
183 | void handleImport(const Module *Imported); |
184 | |
185 | /// Adds direct modular dependencies that have already been built to the |
186 | /// ModuleDeps instance. |
187 | void |
188 | addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, |
189 | llvm::DenseSet<const Module *> &SeenSubmodules); |
190 | void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, |
191 | llvm::DenseSet<const Module *> &SeenSubmodules); |
192 | |
193 | /// Traverses the previously collected direct modular dependencies to discover |
194 | /// transitive modular dependencies and fills the parent \c ModuleDepCollector |
195 | /// with both. |
196 | /// Returns the ID or nothing if the dependency is spurious and is ignored. |
197 | std::optional<ModuleID> handleTopLevelModule(const Module *M); |
198 | void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, |
199 | llvm::DenseSet<const Module *> &AddedModules); |
200 | void addModuleDep(const Module *M, ModuleDeps &MD, |
201 | llvm::DenseSet<const Module *> &AddedModules); |
202 | |
203 | /// Traverses the affecting modules and updates \c MD with references to the |
204 | /// parent \c ModuleDepCollector info. |
205 | void addAllAffectingClangModules(const Module *M, ModuleDeps &MD, |
206 | llvm::DenseSet<const Module *> &AddedModules); |
207 | void addAffectingClangModule(const Module *M, ModuleDeps &MD, |
208 | llvm::DenseSet<const Module *> &AddedModules); |
209 | }; |
210 | |
211 | /// Collects modular and non-modular dependencies of the main file by attaching |
212 | /// \c ModuleDepCollectorPP to the preprocessor. |
213 | class ModuleDepCollector final : public DependencyCollector { |
214 | public: |
215 | ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, |
216 | CompilerInstance &ScanInstance, DependencyConsumer &C, |
217 | DependencyActionController &Controller, |
218 | CompilerInvocation OriginalCI, |
219 | PrebuiltModuleVFSMapT PrebuiltModuleVFSMap, |
220 | ScanningOptimizations OptimizeArgs, bool EagerLoadModules, |
221 | bool IsStdModuleP1689Format); |
222 | |
223 | void attachToPreprocessor(Preprocessor &PP) override; |
224 | void attachToASTReader(ASTReader &R) override; |
225 | |
226 | /// Apply any changes implied by the discovered dependencies to the given |
227 | /// invocation, (e.g. disable implicit modules, add explicit module paths). |
228 | void applyDiscoveredDependencies(CompilerInvocation &CI); |
229 | |
230 | private: |
231 | friend ModuleDepCollectorPP; |
232 | |
233 | /// The compiler instance for scanning the current translation unit. |
234 | CompilerInstance &ScanInstance; |
235 | /// The consumer of collected dependency information. |
236 | DependencyConsumer &Consumer; |
237 | /// Callbacks for computing dependency information. |
238 | DependencyActionController &Controller; |
239 | /// Mapping from prebuilt AST files to their sorted list of VFS overlay files. |
240 | PrebuiltModuleVFSMapT PrebuiltModuleVFSMap; |
241 | /// Path to the main source file. |
242 | std::string MainFile; |
243 | /// Hash identifying the compilation conditions of the current TU. |
244 | std::string ContextHash; |
245 | /// Non-modular file dependencies. This includes the main source file and |
246 | /// textually included header files. |
247 | std::vector<std::string> FileDeps; |
248 | /// Direct and transitive modular dependencies of the main source file. |
249 | llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; |
250 | /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without |
251 | /// a preprocessor. Storage owned by \c ModularDeps. |
252 | llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID; |
253 | /// Direct modular dependencies that have already been built. |
254 | llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps; |
255 | /// Working set of direct modular dependencies. |
256 | llvm::SetVector<const Module *> DirectModularDeps; |
257 | /// Options that control the dependency output generation. |
258 | std::unique_ptr<DependencyOutputOptions> Opts; |
259 | /// A Clang invocation that's based on the original TU invocation and that has |
260 | /// been partially transformed into one that can perform explicit build of |
261 | /// a discovered modular dependency. Note that this still needs to be adjusted |
262 | /// for each individual module. |
263 | CowCompilerInvocation CommonInvocation; |
264 | /// Whether to optimize the modules' command-line arguments. |
265 | ScanningOptimizations OptimizeArgs; |
266 | /// Whether to set up command-lines to load PCM files eagerly. |
267 | bool EagerLoadModules; |
268 | /// If we're generating dependency output in P1689 format |
269 | /// for standard C++ modules. |
270 | bool IsStdModuleP1689Format; |
271 | |
272 | std::optional<P1689ModuleInfo> ProvidedStdCXXModule; |
273 | std::vector<P1689ModuleInfo> RequiredStdCXXModules; |
274 | |
275 | /// Checks whether the module is known as being prebuilt. |
276 | bool isPrebuiltModule(const Module *M); |
277 | |
278 | /// Adds \p Path to \c FileDeps, making it absolute if necessary. |
279 | void addFileDep(StringRef Path); |
280 | /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary. |
281 | void addFileDep(ModuleDeps &MD, StringRef Path); |
282 | |
283 | /// Get a Clang invocation adjusted to build the given modular dependency. |
284 | /// This excludes paths that are yet-to-be-provided by the build system. |
285 | CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs( |
286 | const ModuleDeps &Deps, |
287 | llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const; |
288 | |
289 | /// Collect module map files for given modules. |
290 | llvm::DenseSet<const FileEntry *> |
291 | collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const; |
292 | |
293 | /// Add module map files to the invocation, if needed. |
294 | void addModuleMapFiles(CompilerInvocation &CI, |
295 | ArrayRef<ModuleID> ClangModuleDeps) const; |
296 | /// Add module files (pcm) to the invocation, if needed. |
297 | void addModuleFiles(CompilerInvocation &CI, |
298 | ArrayRef<ModuleID> ClangModuleDeps) const; |
299 | void addModuleFiles(CowCompilerInvocation &CI, |
300 | ArrayRef<ModuleID> ClangModuleDeps) const; |
301 | |
302 | /// Add paths that require looking up outputs to the given dependencies. |
303 | void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps); |
304 | |
305 | /// Compute the context hash for \p Deps, and create the mapping |
306 | /// \c ModuleDepsByID[Deps.ID] = &Deps. |
307 | void associateWithContextHash(const CowCompilerInvocation &CI, |
308 | ModuleDeps &Deps); |
309 | }; |
310 | |
311 | /// Resets codegen options that don't affect modules/PCH. |
312 | void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction, |
313 | const LangOptions &LangOpts, |
314 | CodeGenOptions &CGOpts); |
315 | |
316 | } // end namespace dependencies |
317 | } // end namespace tooling |
318 | } // end namespace clang |
319 | |
320 | namespace llvm { |
321 | inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) { |
322 | return hash_combine(args: ID.ModuleName, args: ID.ContextHash); |
323 | } |
324 | |
325 | template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> { |
326 | using ModuleID = clang::tooling::dependencies::ModuleID; |
327 | static inline ModuleID getEmptyKey() { return ModuleID{.ModuleName: "" , .ContextHash: "" }; } |
328 | static inline ModuleID getTombstoneKey() { |
329 | return ModuleID{.ModuleName: "~" , .ContextHash: "~" }; // ~ is not a valid module name or context hash |
330 | } |
331 | static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); } |
332 | static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) { |
333 | return LHS == RHS; |
334 | } |
335 | }; |
336 | } // namespace llvm |
337 | |
338 | #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H |
339 | |