1//===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11
12#include "clang/Basic/LLVM.h"
13#include "clang/Basic/SourceManager.h"
14#include "clang/Frontend/CompilerInvocation.h"
15#include "clang/Frontend/Utils.h"
16#include "clang/Lex/HeaderSearch.h"
17#include "clang/Lex/PPCallbacks.h"
18#include "clang/Serialization/ASTReader.h"
19#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/Hashing.h"
22#include "llvm/ADT/StringSet.h"
23#include "llvm/Support/raw_ostream.h"
24#include <optional>
25#include <string>
26#include <unordered_map>
27#include <variant>
28
29namespace clang {
30namespace tooling {
31namespace dependencies {
32
33class DependencyActionController;
34class DependencyConsumer;
35
36/// Modular dependency that has already been built prior to the dependency scan.
37struct PrebuiltModuleDep {
38 std::string ModuleName;
39 std::string PCMFile;
40 std::string ModuleMapFile;
41
42 explicit PrebuiltModuleDep(const Module *M)
43 : ModuleName(M->getTopLevelModuleName()),
44 PCMFile(M->getASTFile()->getName()),
45 ModuleMapFile(M->PresumedModuleMapFile) {}
46};
47
48/// This is used to identify a specific module.
49struct ModuleID {
50 /// The name of the module. This may include `:` for C++20 module partitions,
51 /// or a header-name for C++20 header units.
52 std::string ModuleName;
53
54 /// The context hash of a module represents the compiler options that affect
55 /// the resulting command-line invocation.
56 ///
57 /// Modules with the same name and ContextHash but different invocations could
58 /// cause non-deterministic build results.
59 ///
60 /// Modules with the same name but a different \c ContextHash should be
61 /// treated as separate modules for the purpose of a build.
62 std::string ContextHash;
63
64 bool operator==(const ModuleID &Other) const {
65 return std::tie(args: ModuleName, args: ContextHash) ==
66 std::tie(args: Other.ModuleName, args: Other.ContextHash);
67 }
68
69 bool operator<(const ModuleID& Other) const {
70 return std::tie(args: ModuleName, args: ContextHash) <
71 std::tie(args: Other.ModuleName, args: Other.ContextHash);
72 }
73};
74
75/// P1689ModuleInfo - Represents the needed information of standard C++20
76/// modules for P1689 format.
77struct P1689ModuleInfo {
78 /// The name of the module. This may include `:` for partitions.
79 std::string ModuleName;
80
81 /// Optional. The source path to the module.
82 std::string SourcePath;
83
84 /// If this module is a standard c++ interface unit.
85 bool IsStdCXXModuleInterface = true;
86
87 enum class ModuleType {
88 NamedCXXModule
89 // To be supported
90 // AngleHeaderUnit,
91 // QuoteHeaderUnit
92 };
93 ModuleType Type = ModuleType::NamedCXXModule;
94};
95
96/// An output from a module compilation, such as the path of the module file.
97enum class ModuleOutputKind {
98 /// The module file (.pcm). Required.
99 ModuleFile,
100 /// The path of the dependency file (.d), if any.
101 DependencyFile,
102 /// The null-separated list of names to use as the targets in the dependency
103 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
104 DependencyTargets,
105 /// The path of the serialized diagnostic file (.dia), if any.
106 DiagnosticSerializationFile,
107};
108
109struct ModuleDeps {
110 /// The identifier of the module.
111 ModuleID ID;
112
113 /// Whether this is a "system" module.
114 bool IsSystem;
115
116 /// The path to the modulemap file which defines this module.
117 ///
118 /// This can be used to explicitly build this module. This file will
119 /// additionally appear in \c FileDeps as a dependency.
120 std::string ClangModuleMapFile;
121
122 /// A collection of absolute paths to files that this module directly depends
123 /// on, not including transitive dependencies.
124 llvm::StringSet<> FileDeps;
125
126 /// A collection of absolute paths to module map files that this module needs
127 /// to know about. The ordering is significant.
128 std::vector<std::string> ModuleMapFileDeps;
129
130 /// A collection of prebuilt modular dependencies this module directly depends
131 /// on, not including transitive dependencies.
132 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
133
134 /// A list of module identifiers this module directly depends on, not
135 /// including transitive dependencies.
136 ///
137 /// This may include modules with a different context hash when it can be
138 /// determined that the differences are benign for this compilation.
139 std::vector<ModuleID> ClangModuleDeps;
140
141 /// Get (or compute) the compiler invocation that can be used to build this
142 /// module. Does not include argv[0].
143 const std::vector<std::string> &getBuildArguments();
144
145private:
146 friend class ModuleDepCollectorPP;
147
148 std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
149 BuildInfo;
150};
151
152using PrebuiltModuleVFSMapT = llvm::StringMap<llvm::StringSet<>>;
153
154class ModuleDepCollector;
155
156/// Callback that records textual includes and direct modular includes/imports
157/// during preprocessing. At the end of the main file, it also collects
158/// transitive modular dependencies and passes everything to the
159/// \c DependencyConsumer of the parent \c ModuleDepCollector.
160class ModuleDepCollectorPP final : public PPCallbacks {
161public:
162 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
163
164 void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
165 SrcMgr::CharacteristicKind FileType, FileID PrevFID,
166 SourceLocation Loc) override;
167 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
168 StringRef FileName, bool IsAngled,
169 CharSourceRange FilenameRange,
170 OptionalFileEntryRef File, StringRef SearchPath,
171 StringRef RelativePath, const Module *SuggestedModule,
172 bool ModuleImported,
173 SrcMgr::CharacteristicKind FileType) override;
174 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
175 const Module *Imported) override;
176
177 void EndOfMainFile() override;
178
179private:
180 /// The parent dependency collector.
181 ModuleDepCollector &MDC;
182
183 void handleImport(const Module *Imported);
184
185 /// Adds direct modular dependencies that have already been built to the
186 /// ModuleDeps instance.
187 void
188 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
189 llvm::DenseSet<const Module *> &SeenSubmodules);
190 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
191 llvm::DenseSet<const Module *> &SeenSubmodules);
192
193 /// Traverses the previously collected direct modular dependencies to discover
194 /// transitive modular dependencies and fills the parent \c ModuleDepCollector
195 /// with both.
196 /// Returns the ID or nothing if the dependency is spurious and is ignored.
197 std::optional<ModuleID> handleTopLevelModule(const Module *M);
198 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
199 llvm::DenseSet<const Module *> &AddedModules);
200 void addModuleDep(const Module *M, ModuleDeps &MD,
201 llvm::DenseSet<const Module *> &AddedModules);
202
203 /// Traverses the affecting modules and updates \c MD with references to the
204 /// parent \c ModuleDepCollector info.
205 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
206 llvm::DenseSet<const Module *> &AddedModules);
207 void addAffectingClangModule(const Module *M, ModuleDeps &MD,
208 llvm::DenseSet<const Module *> &AddedModules);
209};
210
211/// Collects modular and non-modular dependencies of the main file by attaching
212/// \c ModuleDepCollectorPP to the preprocessor.
213class ModuleDepCollector final : public DependencyCollector {
214public:
215 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
216 CompilerInstance &ScanInstance, DependencyConsumer &C,
217 DependencyActionController &Controller,
218 CompilerInvocation OriginalCI,
219 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap,
220 ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
221 bool IsStdModuleP1689Format);
222
223 void attachToPreprocessor(Preprocessor &PP) override;
224 void attachToASTReader(ASTReader &R) override;
225
226 /// Apply any changes implied by the discovered dependencies to the given
227 /// invocation, (e.g. disable implicit modules, add explicit module paths).
228 void applyDiscoveredDependencies(CompilerInvocation &CI);
229
230private:
231 friend ModuleDepCollectorPP;
232
233 /// The compiler instance for scanning the current translation unit.
234 CompilerInstance &ScanInstance;
235 /// The consumer of collected dependency information.
236 DependencyConsumer &Consumer;
237 /// Callbacks for computing dependency information.
238 DependencyActionController &Controller;
239 /// Mapping from prebuilt AST files to their sorted list of VFS overlay files.
240 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap;
241 /// Path to the main source file.
242 std::string MainFile;
243 /// Hash identifying the compilation conditions of the current TU.
244 std::string ContextHash;
245 /// Non-modular file dependencies. This includes the main source file and
246 /// textually included header files.
247 std::vector<std::string> FileDeps;
248 /// Direct and transitive modular dependencies of the main source file.
249 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
250 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
251 /// a preprocessor. Storage owned by \c ModularDeps.
252 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
253 /// Direct modular dependencies that have already been built.
254 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
255 /// Working set of direct modular dependencies.
256 llvm::SetVector<const Module *> DirectModularDeps;
257 /// Options that control the dependency output generation.
258 std::unique_ptr<DependencyOutputOptions> Opts;
259 /// A Clang invocation that's based on the original TU invocation and that has
260 /// been partially transformed into one that can perform explicit build of
261 /// a discovered modular dependency. Note that this still needs to be adjusted
262 /// for each individual module.
263 CowCompilerInvocation CommonInvocation;
264 /// Whether to optimize the modules' command-line arguments.
265 ScanningOptimizations OptimizeArgs;
266 /// Whether to set up command-lines to load PCM files eagerly.
267 bool EagerLoadModules;
268 /// If we're generating dependency output in P1689 format
269 /// for standard C++ modules.
270 bool IsStdModuleP1689Format;
271
272 std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
273 std::vector<P1689ModuleInfo> RequiredStdCXXModules;
274
275 /// Checks whether the module is known as being prebuilt.
276 bool isPrebuiltModule(const Module *M);
277
278 /// Adds \p Path to \c FileDeps, making it absolute if necessary.
279 void addFileDep(StringRef Path);
280 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
281 void addFileDep(ModuleDeps &MD, StringRef Path);
282
283 /// Get a Clang invocation adjusted to build the given modular dependency.
284 /// This excludes paths that are yet-to-be-provided by the build system.
285 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
286 const ModuleDeps &Deps,
287 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
288
289 /// Collect module map files for given modules.
290 llvm::DenseSet<const FileEntry *>
291 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
292
293 /// Add module map files to the invocation, if needed.
294 void addModuleMapFiles(CompilerInvocation &CI,
295 ArrayRef<ModuleID> ClangModuleDeps) const;
296 /// Add module files (pcm) to the invocation, if needed.
297 void addModuleFiles(CompilerInvocation &CI,
298 ArrayRef<ModuleID> ClangModuleDeps) const;
299 void addModuleFiles(CowCompilerInvocation &CI,
300 ArrayRef<ModuleID> ClangModuleDeps) const;
301
302 /// Add paths that require looking up outputs to the given dependencies.
303 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
304
305 /// Compute the context hash for \p Deps, and create the mapping
306 /// \c ModuleDepsByID[Deps.ID] = &Deps.
307 void associateWithContextHash(const CowCompilerInvocation &CI,
308 ModuleDeps &Deps);
309};
310
311/// Resets codegen options that don't affect modules/PCH.
312void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction,
313 const LangOptions &LangOpts,
314 CodeGenOptions &CGOpts);
315
316} // end namespace dependencies
317} // end namespace tooling
318} // end namespace clang
319
320namespace llvm {
321inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) {
322 return hash_combine(args: ID.ModuleName, args: ID.ContextHash);
323}
324
325template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
326 using ModuleID = clang::tooling::dependencies::ModuleID;
327 static inline ModuleID getEmptyKey() { return ModuleID{.ModuleName: "", .ContextHash: ""}; }
328 static inline ModuleID getTombstoneKey() {
329 return ModuleID{.ModuleName: "~", .ContextHash: "~"}; // ~ is not a valid module name or context hash
330 }
331 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
332 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
333 return LHS == RHS;
334 }
335};
336} // namespace llvm
337
338#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
339

source code of clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h