1 | //===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Driver/Compilation.h" |
10 | #include "clang/Driver/Driver.h" |
11 | #include "clang/Frontend/CompilerInstance.h" |
12 | #include "clang/Frontend/TextDiagnosticPrinter.h" |
13 | #include "clang/Tooling/CommonOptionsParser.h" |
14 | #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" |
15 | #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" |
16 | #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" |
17 | #include "clang/Tooling/JSONCompilationDatabase.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/Twine.h" |
20 | #include "llvm/Support/CommandLine.h" |
21 | #include "llvm/Support/FileUtilities.h" |
22 | #include "llvm/Support/Format.h" |
23 | #include "llvm/Support/JSON.h" |
24 | #include "llvm/Support/LLVMDriver.h" |
25 | #include "llvm/Support/Program.h" |
26 | #include "llvm/Support/Signals.h" |
27 | #include "llvm/Support/ThreadPool.h" |
28 | #include "llvm/Support/Threading.h" |
29 | #include "llvm/Support/Timer.h" |
30 | #include "llvm/TargetParser/Host.h" |
31 | #include <mutex> |
32 | #include <optional> |
33 | #include <thread> |
34 | |
35 | #include "Opts.inc" |
36 | |
37 | using namespace clang; |
38 | using namespace tooling::dependencies; |
39 | |
40 | namespace { |
41 | |
42 | using namespace llvm::opt; |
43 | enum ID { |
44 | OPT_INVALID = 0, // This is not an option ID. |
45 | #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), |
46 | #include "Opts.inc" |
47 | #undef OPTION |
48 | }; |
49 | |
50 | #define PREFIX(NAME, VALUE) \ |
51 | constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ |
52 | constexpr llvm::ArrayRef<llvm::StringLiteral> NAME( \ |
53 | NAME##_init, std::size(NAME##_init) - 1); |
54 | #include "Opts.inc" |
55 | #undef PREFIX |
56 | |
57 | const llvm::opt::OptTable::Info InfoTable[] = { |
58 | #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), |
59 | #include "Opts.inc" |
60 | #undef OPTION |
61 | }; |
62 | |
63 | class ScanDepsOptTable : public llvm::opt::GenericOptTable { |
64 | public: |
65 | ScanDepsOptTable() : GenericOptTable(InfoTable) { |
66 | setGroupedShortOptions(true); |
67 | } |
68 | }; |
69 | |
70 | enum ResourceDirRecipeKind { |
71 | RDRK_ModifyCompilerPath, |
72 | RDRK_InvokeCompiler, |
73 | }; |
74 | |
75 | static ScanningMode ScanMode = ScanningMode::DependencyDirectivesScan; |
76 | static ScanningOutputFormat Format = ScanningOutputFormat::Make; |
77 | static ScanningOptimizations OptimizeArgs; |
78 | static std::string ModuleFilesDir; |
79 | static bool EagerLoadModules; |
80 | static unsigned NumThreads = 0; |
81 | static std::string CompilationDB; |
82 | static std::string ModuleName; |
83 | static std::vector<std::string> ModuleDepTargets; |
84 | static bool DeprecatedDriverCommand; |
85 | static ResourceDirRecipeKind ResourceDirRecipe; |
86 | static bool Verbose; |
87 | static bool PrintTiming; |
88 | static std::vector<const char *> CommandLine; |
89 | |
90 | #ifndef NDEBUG |
91 | static constexpr bool DoRoundTripDefault = true; |
92 | #else |
93 | static constexpr bool DoRoundTripDefault = false; |
94 | #endif |
95 | |
96 | static bool RoundTripArgs = DoRoundTripDefault; |
97 | |
98 | static void ParseArgs(int argc, char **argv) { |
99 | ScanDepsOptTable Tbl; |
100 | llvm::StringRef ToolName = argv[0]; |
101 | llvm::BumpPtrAllocator A; |
102 | llvm::StringSaver Saver{A}; |
103 | llvm::opt::InputArgList Args = |
104 | Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { |
105 | llvm::errs() << Msg << '\n'; |
106 | std::exit(1); |
107 | }); |
108 | |
109 | if (Args.hasArg(OPT_help)) { |
110 | Tbl.printHelp(OS&: llvm::outs(), Usage: "clang-scan-deps [options]" , Title: "clang-scan-deps" ); |
111 | std::exit(status: 0); |
112 | } |
113 | if (Args.hasArg(OPT_version)) { |
114 | llvm::outs() << ToolName << '\n'; |
115 | llvm::cl::PrintVersionMessage(); |
116 | std::exit(status: 0); |
117 | } |
118 | if (const llvm::opt::Arg *A = Args.getLastArg(OPT_mode_EQ)) { |
119 | auto ModeType = |
120 | llvm::StringSwitch<std::optional<ScanningMode>>(A->getValue()) |
121 | .Case("preprocess-dependency-directives" , |
122 | ScanningMode::DependencyDirectivesScan) |
123 | .Case("preprocess" , ScanningMode::CanonicalPreprocessing) |
124 | .Default(std::nullopt); |
125 | if (!ModeType) { |
126 | llvm::errs() << ToolName |
127 | << ": for the --mode option: Cannot find option named '" |
128 | << A->getValue() << "'\n" ; |
129 | std::exit(status: 1); |
130 | } |
131 | ScanMode = *ModeType; |
132 | } |
133 | |
134 | if (const llvm::opt::Arg *A = Args.getLastArg(OPT_format_EQ)) { |
135 | auto FormatType = |
136 | llvm::StringSwitch<std::optional<ScanningOutputFormat>>(A->getValue()) |
137 | .Case("make" , ScanningOutputFormat::Make) |
138 | .Case("p1689" , ScanningOutputFormat::P1689) |
139 | .Case("experimental-full" , ScanningOutputFormat::Full) |
140 | .Default(std::nullopt); |
141 | if (!FormatType) { |
142 | llvm::errs() << ToolName |
143 | << ": for the --format option: Cannot find option named '" |
144 | << A->getValue() << "'\n" ; |
145 | std::exit(status: 1); |
146 | } |
147 | Format = *FormatType; |
148 | } |
149 | |
150 | std::vector<std::string> OptimizationFlags = |
151 | Args.getAllArgValues(OPT_optimize_args_EQ); |
152 | OptimizeArgs = ScanningOptimizations::None; |
153 | for (const auto &Arg : OptimizationFlags) { |
154 | auto Optimization = |
155 | llvm::StringSwitch<std::optional<ScanningOptimizations>>(Arg) |
156 | .Case("none" , ScanningOptimizations::None) |
157 | .Case("header-search" , ScanningOptimizations::HeaderSearch) |
158 | .Case("system-warnings" , ScanningOptimizations::SystemWarnings) |
159 | .Case("vfs" , ScanningOptimizations::VFS) |
160 | .Case("all" , ScanningOptimizations::All) |
161 | .Default(std::nullopt); |
162 | if (!Optimization) { |
163 | llvm::errs() |
164 | << ToolName |
165 | << ": for the --optimize-args option: Cannot find option named '" |
166 | << Arg << "'\n" ; |
167 | std::exit(1); |
168 | } |
169 | OptimizeArgs |= *Optimization; |
170 | } |
171 | if (OptimizationFlags.empty()) |
172 | OptimizeArgs = ScanningOptimizations::Default; |
173 | |
174 | if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_files_dir_EQ)) |
175 | ModuleFilesDir = A->getValue(); |
176 | |
177 | EagerLoadModules = Args.hasArg(OPT_eager_load_pcm); |
178 | |
179 | if (const llvm::opt::Arg *A = Args.getLastArg(OPT_j)) { |
180 | StringRef S{A->getValue()}; |
181 | if (!llvm::to_integer(S, NumThreads, 0)) { |
182 | llvm::errs() << ToolName << ": for the -j option: '" << S |
183 | << "' value invalid for uint argument!\n" ; |
184 | std::exit(status: 1); |
185 | } |
186 | } |
187 | |
188 | if (const llvm::opt::Arg *A = Args.getLastArg(OPT_compilation_database_EQ)) { |
189 | CompilationDB = A->getValue(); |
190 | } else if (Format != ScanningOutputFormat::P1689) { |
191 | llvm::errs() << ToolName |
192 | << ": for the --compiilation-database option: must be " |
193 | "specified at least once!" ; |
194 | std::exit(status: 1); |
195 | } |
196 | |
197 | if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_name_EQ)) |
198 | ModuleName = A->getValue(); |
199 | |
200 | for (const llvm::opt::Arg *A : Args.filtered(OPT_dependency_target_EQ)) |
201 | ModuleDepTargets.emplace_back(A->getValue()); |
202 | |
203 | DeprecatedDriverCommand = Args.hasArg(OPT_deprecated_driver_command); |
204 | |
205 | if (const llvm::opt::Arg *A = Args.getLastArg(OPT_resource_dir_recipe_EQ)) { |
206 | auto Kind = |
207 | llvm::StringSwitch<std::optional<ResourceDirRecipeKind>>(A->getValue()) |
208 | .Case("modify-compiler-path" , RDRK_ModifyCompilerPath) |
209 | .Case("invoke-compiler" , RDRK_InvokeCompiler) |
210 | .Default(std::nullopt); |
211 | if (!Kind) { |
212 | llvm::errs() << ToolName |
213 | << ": for the --resource-dir-recipe option: Cannot find " |
214 | "option named '" |
215 | << A->getValue() << "'\n" ; |
216 | std::exit(status: 1); |
217 | } |
218 | ResourceDirRecipe = *Kind; |
219 | } |
220 | |
221 | PrintTiming = Args.hasArg(OPT_print_timing); |
222 | |
223 | Verbose = Args.hasArg(OPT_verbose); |
224 | |
225 | RoundTripArgs = Args.hasArg(OPT_round_trip_args); |
226 | |
227 | if (auto *A = Args.getLastArgNoClaim(OPT_DASH_DASH)) |
228 | CommandLine.insert(CommandLine.end(), A->getValues().begin(), |
229 | A->getValues().end()); |
230 | } |
231 | |
232 | class SharedStream { |
233 | public: |
234 | SharedStream(raw_ostream &OS) : OS(OS) {} |
235 | void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) { |
236 | std::unique_lock<std::mutex> LockGuard(Lock); |
237 | Fn(OS); |
238 | OS.flush(); |
239 | } |
240 | |
241 | private: |
242 | std::mutex Lock; |
243 | raw_ostream &OS; |
244 | }; |
245 | |
246 | class ResourceDirectoryCache { |
247 | public: |
248 | /// findResourceDir finds the resource directory relative to the clang |
249 | /// compiler being used in Args, by running it with "-print-resource-dir" |
250 | /// option and cache the results for reuse. \returns resource directory path |
251 | /// associated with the given invocation command or empty string if the |
252 | /// compiler path is NOT an absolute path. |
253 | StringRef findResourceDir(const tooling::CommandLineArguments &Args, |
254 | bool ClangCLMode) { |
255 | if (Args.size() < 1) |
256 | return "" ; |
257 | |
258 | const std::string &ClangBinaryPath = Args[0]; |
259 | if (!llvm::sys::path::is_absolute(path: ClangBinaryPath)) |
260 | return "" ; |
261 | |
262 | const std::string &ClangBinaryName = |
263 | std::string(llvm::sys::path::filename(path: ClangBinaryPath)); |
264 | |
265 | std::unique_lock<std::mutex> LockGuard(CacheLock); |
266 | const auto &CachedResourceDir = Cache.find(ClangBinaryPath); |
267 | if (CachedResourceDir != Cache.end()) |
268 | return CachedResourceDir->second; |
269 | |
270 | std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName}; |
271 | if (ClangCLMode) |
272 | PrintResourceDirArgs.push_back("/clang:-print-resource-dir" ); |
273 | else |
274 | PrintResourceDirArgs.push_back("-print-resource-dir" ); |
275 | |
276 | llvm::SmallString<64> OutputFile, ErrorFile; |
277 | llvm::sys::fs::createTemporaryFile("print-resource-dir-output" , |
278 | "" /*no-suffix*/, OutputFile); |
279 | llvm::sys::fs::createTemporaryFile("print-resource-dir-error" , |
280 | "" /*no-suffix*/, ErrorFile); |
281 | llvm::FileRemover OutputRemover(OutputFile.c_str()); |
282 | llvm::FileRemover ErrorRemover(ErrorFile.c_str()); |
283 | std::optional<StringRef> Redirects[] = { |
284 | {"" }, // Stdin |
285 | OutputFile.str(), |
286 | ErrorFile.str(), |
287 | }; |
288 | if (llvm::sys::ExecuteAndWait(Program: ClangBinaryPath, Args: PrintResourceDirArgs, Env: {}, |
289 | Redirects)) { |
290 | auto ErrorBuf = llvm::MemoryBuffer::getFile(Filename: ErrorFile.c_str()); |
291 | llvm::errs() << ErrorBuf.get()->getBuffer(); |
292 | return "" ; |
293 | } |
294 | |
295 | auto OutputBuf = llvm::MemoryBuffer::getFile(Filename: OutputFile.c_str()); |
296 | if (!OutputBuf) |
297 | return "" ; |
298 | StringRef Output = OutputBuf.get()->getBuffer().rtrim('\n'); |
299 | |
300 | Cache[ClangBinaryPath] = Output.str(); |
301 | return Cache[ClangBinaryPath]; |
302 | } |
303 | |
304 | private: |
305 | std::map<std::string, std::string> Cache; |
306 | std::mutex CacheLock; |
307 | }; |
308 | |
309 | } // end anonymous namespace |
310 | |
311 | /// Takes the result of a dependency scan and prints error / dependency files |
312 | /// based on the result. |
313 | /// |
314 | /// \returns True on error. |
315 | static bool |
316 | handleMakeDependencyToolResult(const std::string &Input, |
317 | llvm::Expected<std::string> &MaybeFile, |
318 | SharedStream &OS, SharedStream &Errs) { |
319 | if (!MaybeFile) { |
320 | llvm::handleAllErrors( |
321 | E: MaybeFile.takeError(), Handlers: [&Input, &Errs](llvm::StringError &Err) { |
322 | Errs.applyLocked(Fn: [&](raw_ostream &OS) { |
323 | OS << "Error while scanning dependencies for " << Input << ":\n" ; |
324 | OS << Err.getMessage(); |
325 | }); |
326 | }); |
327 | return true; |
328 | } |
329 | OS.applyLocked(Fn: [&](raw_ostream &OS) { OS << *MaybeFile; }); |
330 | return false; |
331 | } |
332 | |
333 | static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) { |
334 | std::vector<llvm::StringRef> Strings; |
335 | for (auto &&I : Set) |
336 | Strings.push_back(x: I.getKey()); |
337 | llvm::sort(C&: Strings); |
338 | return llvm::json::Array(Strings); |
339 | } |
340 | |
341 | // Technically, we don't need to sort the dependency list to get determinism. |
342 | // Leaving these be will simply preserve the import order. |
343 | static llvm::json::Array toJSONSorted(std::vector<ModuleID> V) { |
344 | llvm::sort(C&: V); |
345 | |
346 | llvm::json::Array Ret; |
347 | for (const ModuleID &MID : V) |
348 | Ret.push_back(E: llvm::json::Object( |
349 | {{.K: "module-name" , .V: MID.ModuleName}, {.K: "context-hash" , .V: MID.ContextHash}})); |
350 | return Ret; |
351 | } |
352 | |
353 | // Thread safe. |
354 | class FullDeps { |
355 | public: |
356 | FullDeps(size_t NumInputs) : Inputs(NumInputs) {} |
357 | |
358 | void mergeDeps(StringRef Input, TranslationUnitDeps TUDeps, |
359 | size_t InputIndex) { |
360 | mergeDeps(Graph: std::move(TUDeps.ModuleGraph), InputIndex); |
361 | |
362 | InputDeps ID; |
363 | ID.FileName = std::string(Input); |
364 | ID.ContextHash = std::move(TUDeps.ID.ContextHash); |
365 | ID.FileDeps = std::move(TUDeps.FileDeps); |
366 | ID.ModuleDeps = std::move(TUDeps.ClangModuleDeps); |
367 | ID.DriverCommandLine = std::move(TUDeps.DriverCommandLine); |
368 | ID.Commands = std::move(TUDeps.Commands); |
369 | |
370 | assert(InputIndex < Inputs.size() && "Input index out of bounds" ); |
371 | assert(Inputs[InputIndex].FileName.empty() && "Result already populated" ); |
372 | Inputs[InputIndex] = std::move(ID); |
373 | } |
374 | |
375 | void mergeDeps(ModuleDepsGraph Graph, size_t InputIndex) { |
376 | std::vector<ModuleDeps *> NewMDs; |
377 | { |
378 | std::unique_lock<std::mutex> ul(Lock); |
379 | for (const ModuleDeps &MD : Graph) { |
380 | auto I = Modules.find(x: {.ID: MD.ID, .InputIndex: 0}); |
381 | if (I != Modules.end()) { |
382 | I->first.InputIndex = std::min(a: I->first.InputIndex, b: InputIndex); |
383 | continue; |
384 | } |
385 | auto Res = Modules.insert(hint: I, x: {{.ID: MD.ID, .InputIndex: InputIndex}, std::move(MD)}); |
386 | NewMDs.push_back(x: &Res->second); |
387 | } |
388 | // First call to \c getBuildArguments is somewhat expensive. Let's call it |
389 | // on the current thread (instead of the main one), and outside the |
390 | // critical section. |
391 | for (ModuleDeps *MD : NewMDs) |
392 | (void)MD->getBuildArguments(); |
393 | } |
394 | } |
395 | |
396 | bool roundTripCommand(ArrayRef<std::string> ArgStrs, |
397 | DiagnosticsEngine &Diags) { |
398 | if (ArgStrs.empty() || ArgStrs[0] != "-cc1" ) |
399 | return false; |
400 | SmallVector<const char *> Args; |
401 | for (const std::string &Arg : ArgStrs) |
402 | Args.push_back(Elt: Arg.c_str()); |
403 | return !CompilerInvocation::checkCC1RoundTrip(Args, Diags); |
404 | } |
405 | |
406 | // Returns \c true if any command lines fail to round-trip. We expect |
407 | // commands already be canonical when output by the scanner. |
408 | bool roundTripCommands(raw_ostream &ErrOS) { |
409 | IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions{}; |
410 | TextDiagnosticPrinter DiagConsumer(ErrOS, &*DiagOpts); |
411 | IntrusiveRefCntPtr<DiagnosticsEngine> Diags = |
412 | CompilerInstance::createDiagnostics(Opts: &*DiagOpts, Client: &DiagConsumer, |
413 | /*ShouldOwnClient=*/false); |
414 | |
415 | for (auto &&M : Modules) |
416 | if (roundTripCommand(ArgStrs: M.second.getBuildArguments(), Diags&: *Diags)) |
417 | return true; |
418 | |
419 | for (auto &&I : Inputs) |
420 | for (const auto &Cmd : I.Commands) |
421 | if (roundTripCommand(ArgStrs: Cmd.Arguments, Diags&: *Diags)) |
422 | return true; |
423 | |
424 | return false; |
425 | } |
426 | |
427 | void printFullOutput(raw_ostream &OS) { |
428 | // Sort the modules by name to get a deterministic order. |
429 | std::vector<IndexedModuleID> ModuleIDs; |
430 | for (auto &&M : Modules) |
431 | ModuleIDs.push_back(x: M.first); |
432 | llvm::sort(C&: ModuleIDs); |
433 | |
434 | using namespace llvm::json; |
435 | |
436 | Array OutModules; |
437 | for (auto &&ModID : ModuleIDs) { |
438 | auto &MD = Modules[ModID]; |
439 | Object O{ |
440 | {.K: "name" , .V: MD.ID.ModuleName}, |
441 | {.K: "context-hash" , .V: MD.ID.ContextHash}, |
442 | {.K: "file-deps" , .V: toJSONSorted(Set: MD.FileDeps)}, |
443 | {.K: "clang-module-deps" , .V: toJSONSorted(V: MD.ClangModuleDeps)}, |
444 | {.K: "clang-modulemap-file" , .V: MD.ClangModuleMapFile}, |
445 | {.K: "command-line" , .V: MD.getBuildArguments()}, |
446 | }; |
447 | OutModules.push_back(E: std::move(O)); |
448 | } |
449 | |
450 | Array TUs; |
451 | for (auto &&I : Inputs) { |
452 | Array Commands; |
453 | if (I.DriverCommandLine.empty()) { |
454 | for (const auto &Cmd : I.Commands) { |
455 | Object O{ |
456 | {.K: "input-file" , .V: I.FileName}, |
457 | {.K: "clang-context-hash" , .V: I.ContextHash}, |
458 | {.K: "file-deps" , .V: I.FileDeps}, |
459 | {.K: "clang-module-deps" , .V: toJSONSorted(V: I.ModuleDeps)}, |
460 | {.K: "executable" , .V: Cmd.Executable}, |
461 | {.K: "command-line" , .V: Cmd.Arguments}, |
462 | }; |
463 | Commands.push_back(E: std::move(O)); |
464 | } |
465 | } else { |
466 | Object O{ |
467 | {.K: "input-file" , .V: I.FileName}, |
468 | {.K: "clang-context-hash" , .V: I.ContextHash}, |
469 | {.K: "file-deps" , .V: I.FileDeps}, |
470 | {.K: "clang-module-deps" , .V: toJSONSorted(V: I.ModuleDeps)}, |
471 | {.K: "executable" , .V: "clang" }, |
472 | {.K: "command-line" , .V: I.DriverCommandLine}, |
473 | }; |
474 | Commands.push_back(E: std::move(O)); |
475 | } |
476 | TUs.push_back(E: Object{ |
477 | {.K: "commands" , .V: std::move(Commands)}, |
478 | }); |
479 | } |
480 | |
481 | Object Output{ |
482 | {.K: "modules" , .V: std::move(OutModules)}, |
483 | {.K: "translation-units" , .V: std::move(TUs)}, |
484 | }; |
485 | |
486 | OS << llvm::formatv(Fmt: "{0:2}\n" , Vals: Value(std::move(Output))); |
487 | } |
488 | |
489 | private: |
490 | struct IndexedModuleID { |
491 | ModuleID ID; |
492 | |
493 | // FIXME: This is mutable so that it can still be updated after insertion |
494 | // into an unordered associative container. This is "fine", since this |
495 | // field doesn't contribute to the hash, but it's a brittle hack. |
496 | mutable size_t InputIndex; |
497 | |
498 | bool operator==(const IndexedModuleID &Other) const { |
499 | return ID == Other.ID; |
500 | } |
501 | |
502 | bool operator<(const IndexedModuleID &Other) const { |
503 | /// We need the output of clang-scan-deps to be deterministic. However, |
504 | /// the dependency graph may contain two modules with the same name. How |
505 | /// do we decide which one to print first? If we made that decision based |
506 | /// on the context hash, the ordering would be deterministic, but |
507 | /// different across machines. This can happen for example when the inputs |
508 | /// or the SDKs (which both contribute to the "context" hash) live in |
509 | /// different absolute locations. We solve that by tracking the index of |
510 | /// the first input TU that (transitively) imports the dependency, which |
511 | /// is always the same for the same input, resulting in deterministic |
512 | /// sorting that's also reproducible across machines. |
513 | return std::tie(args: ID.ModuleName, args&: InputIndex) < |
514 | std::tie(args: Other.ID.ModuleName, args&: Other.InputIndex); |
515 | } |
516 | |
517 | struct Hasher { |
518 | std::size_t operator()(const IndexedModuleID &IMID) const { |
519 | return llvm::hash_value(ID: IMID.ID); |
520 | } |
521 | }; |
522 | }; |
523 | |
524 | struct InputDeps { |
525 | std::string FileName; |
526 | std::string ContextHash; |
527 | std::vector<std::string> FileDeps; |
528 | std::vector<ModuleID> ModuleDeps; |
529 | std::vector<std::string> DriverCommandLine; |
530 | std::vector<Command> Commands; |
531 | }; |
532 | |
533 | std::mutex Lock; |
534 | std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleID::Hasher> |
535 | Modules; |
536 | std::vector<InputDeps> Inputs; |
537 | }; |
538 | |
539 | static bool handleTranslationUnitResult( |
540 | StringRef Input, llvm::Expected<TranslationUnitDeps> &MaybeTUDeps, |
541 | FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) { |
542 | if (!MaybeTUDeps) { |
543 | llvm::handleAllErrors( |
544 | E: MaybeTUDeps.takeError(), Handlers: [&Input, &Errs](llvm::StringError &Err) { |
545 | Errs.applyLocked(Fn: [&](raw_ostream &OS) { |
546 | OS << "Error while scanning dependencies for " << Input << ":\n" ; |
547 | OS << Err.getMessage(); |
548 | }); |
549 | }); |
550 | return true; |
551 | } |
552 | FD.mergeDeps(Input, TUDeps: std::move(*MaybeTUDeps), InputIndex); |
553 | return false; |
554 | } |
555 | |
556 | static bool handleModuleResult( |
557 | StringRef ModuleName, llvm::Expected<ModuleDepsGraph> &MaybeModuleGraph, |
558 | FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) { |
559 | if (!MaybeModuleGraph) { |
560 | llvm::handleAllErrors(E: MaybeModuleGraph.takeError(), |
561 | Handlers: [&ModuleName, &Errs](llvm::StringError &Err) { |
562 | Errs.applyLocked(Fn: [&](raw_ostream &OS) { |
563 | OS << "Error while scanning dependencies for " |
564 | << ModuleName << ":\n" ; |
565 | OS << Err.getMessage(); |
566 | }); |
567 | }); |
568 | return true; |
569 | } |
570 | FD.mergeDeps(Graph: std::move(*MaybeModuleGraph), InputIndex); |
571 | return false; |
572 | } |
573 | |
574 | class P1689Deps { |
575 | public: |
576 | void printDependencies(raw_ostream &OS) { |
577 | addSourcePathsToRequires(); |
578 | // Sort the modules by name to get a deterministic order. |
579 | llvm::sort(C&: Rules, Comp: [](const P1689Rule &A, const P1689Rule &B) { |
580 | return A.PrimaryOutput < B.PrimaryOutput; |
581 | }); |
582 | |
583 | using namespace llvm::json; |
584 | Array OutputRules; |
585 | for (const P1689Rule &R : Rules) { |
586 | Object O{{.K: "primary-output" , .V: R.PrimaryOutput}}; |
587 | |
588 | if (R.Provides) { |
589 | Array Provides; |
590 | Object Provided{{.K: "logical-name" , .V: R.Provides->ModuleName}, |
591 | {.K: "source-path" , .V: R.Provides->SourcePath}, |
592 | {.K: "is-interface" , .V: R.Provides->IsStdCXXModuleInterface}}; |
593 | Provides.push_back(E: std::move(Provided)); |
594 | O.insert(E: {.K: "provides" , .V: std::move(Provides)}); |
595 | } |
596 | |
597 | Array Requires; |
598 | for (const P1689ModuleInfo &Info : R.Requires) { |
599 | Object RequiredInfo{{.K: "logical-name" , .V: Info.ModuleName}}; |
600 | if (!Info.SourcePath.empty()) |
601 | RequiredInfo.insert(E: {.K: "source-path" , .V: Info.SourcePath}); |
602 | Requires.push_back(E: std::move(RequiredInfo)); |
603 | } |
604 | |
605 | if (!Requires.empty()) |
606 | O.insert(E: {.K: "requires" , .V: std::move(Requires)}); |
607 | |
608 | OutputRules.push_back(E: std::move(O)); |
609 | } |
610 | |
611 | Object Output{ |
612 | {.K: "version" , .V: 1}, {.K: "revision" , .V: 0}, {.K: "rules" , .V: std::move(OutputRules)}}; |
613 | |
614 | OS << llvm::formatv(Fmt: "{0:2}\n" , Vals: Value(std::move(Output))); |
615 | } |
616 | |
617 | void addRules(P1689Rule &Rule) { |
618 | std::unique_lock<std::mutex> LockGuard(Lock); |
619 | Rules.push_back(x: Rule); |
620 | } |
621 | |
622 | private: |
623 | void addSourcePathsToRequires() { |
624 | llvm::DenseMap<StringRef, StringRef> ModuleSourceMapper; |
625 | for (const P1689Rule &R : Rules) |
626 | if (R.Provides && !R.Provides->SourcePath.empty()) |
627 | ModuleSourceMapper[R.Provides->ModuleName] = R.Provides->SourcePath; |
628 | |
629 | for (P1689Rule &R : Rules) { |
630 | for (P1689ModuleInfo &Info : R.Requires) { |
631 | auto Iter = ModuleSourceMapper.find(Val: Info.ModuleName); |
632 | if (Iter != ModuleSourceMapper.end()) |
633 | Info.SourcePath = Iter->second; |
634 | } |
635 | } |
636 | } |
637 | |
638 | std::mutex Lock; |
639 | std::vector<P1689Rule> Rules; |
640 | }; |
641 | |
642 | static bool |
643 | handleP1689DependencyToolResult(const std::string &Input, |
644 | llvm::Expected<P1689Rule> &MaybeRule, |
645 | P1689Deps &PD, SharedStream &Errs) { |
646 | if (!MaybeRule) { |
647 | llvm::handleAllErrors( |
648 | E: MaybeRule.takeError(), Handlers: [&Input, &Errs](llvm::StringError &Err) { |
649 | Errs.applyLocked(Fn: [&](raw_ostream &OS) { |
650 | OS << "Error while scanning dependencies for " << Input << ":\n" ; |
651 | OS << Err.getMessage(); |
652 | }); |
653 | }); |
654 | return true; |
655 | } |
656 | PD.addRules(Rule&: *MaybeRule); |
657 | return false; |
658 | } |
659 | |
660 | /// Construct a path for the explicitly built PCM. |
661 | static std::string constructPCMPath(ModuleID MID, StringRef OutputDir) { |
662 | SmallString<256> ExplicitPCMPath(OutputDir); |
663 | llvm::sys::path::append(path&: ExplicitPCMPath, a: MID.ContextHash, |
664 | b: MID.ModuleName + "-" + MID.ContextHash + ".pcm" ); |
665 | return std::string(ExplicitPCMPath); |
666 | } |
667 | |
668 | static std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK, |
669 | StringRef OutputDir) { |
670 | std::string PCMPath = constructPCMPath(MID, OutputDir); |
671 | switch (MOK) { |
672 | case ModuleOutputKind::ModuleFile: |
673 | return PCMPath; |
674 | case ModuleOutputKind::DependencyFile: |
675 | return PCMPath + ".d" ; |
676 | case ModuleOutputKind::DependencyTargets: |
677 | // Null-separate the list of targets. |
678 | return join(ModuleDepTargets, StringRef("\0" , 1)); |
679 | case ModuleOutputKind::DiagnosticSerializationFile: |
680 | return PCMPath + ".diag" ; |
681 | } |
682 | llvm_unreachable("Fully covered switch above!" ); |
683 | } |
684 | |
685 | static std::string getModuleCachePath(ArrayRef<std::string> Args) { |
686 | for (StringRef Arg : llvm::reverse(C&: Args)) { |
687 | Arg.consume_front(Prefix: "/clang:" ); |
688 | if (Arg.consume_front(Prefix: "-fmodules-cache-path=" )) |
689 | return std::string(Arg); |
690 | } |
691 | SmallString<128> Path; |
692 | driver::Driver::getDefaultModuleCachePath(Result&: Path); |
693 | return std::string(Path); |
694 | } |
695 | |
696 | // getCompilationDataBase - If -compilation-database is set, load the |
697 | // compilation database from the specified file. Otherwise if the we're |
698 | // generating P1689 format, trying to generate the compilation database |
699 | // form specified command line after the positional parameter "--". |
700 | static std::unique_ptr<tooling::CompilationDatabase> |
701 | getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) { |
702 | ParseArgs(argc, argv); |
703 | |
704 | if (!CompilationDB.empty()) |
705 | return tooling::JSONCompilationDatabase::loadFromFile( |
706 | FilePath: CompilationDB, ErrorMessage, |
707 | Syntax: tooling::JSONCommandLineSyntax::AutoDetect); |
708 | |
709 | if (Format != ScanningOutputFormat::P1689) { |
710 | llvm::errs() << "the --compilation-database option: must be specified at " |
711 | "least once!" ; |
712 | return nullptr; |
713 | } |
714 | |
715 | // Trying to get the input file, the output file and the command line options |
716 | // from the positional parameter "--". |
717 | char **DoubleDash = std::find(first: argv, last: argv + argc, val: StringRef("--" )); |
718 | if (DoubleDash == argv + argc) { |
719 | llvm::errs() << "The command line arguments is required after '--' in " |
720 | "P1689 per file mode." ; |
721 | return nullptr; |
722 | } |
723 | |
724 | llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = |
725 | CompilerInstance::createDiagnostics(Opts: new DiagnosticOptions); |
726 | driver::Driver TheDriver(CommandLine[0], llvm::sys::getDefaultTargetTriple(), |
727 | *Diags); |
728 | std::unique_ptr<driver::Compilation> C( |
729 | TheDriver.BuildCompilation(Args: CommandLine)); |
730 | if (!C || C->getJobs().empty()) |
731 | return nullptr; |
732 | |
733 | auto Cmd = C->getJobs().begin(); |
734 | auto CI = std::make_unique<CompilerInvocation>(); |
735 | CompilerInvocation::CreateFromArgs(Res&: *CI, CommandLineArgs: Cmd->getArguments(), Diags&: *Diags, |
736 | Argv0: CommandLine[0]); |
737 | if (!CI) |
738 | return nullptr; |
739 | |
740 | FrontendOptions &FEOpts = CI->getFrontendOpts(); |
741 | if (FEOpts.Inputs.size() != 1) { |
742 | llvm::errs() << "Only one input file is allowed in P1689 per file mode." ; |
743 | return nullptr; |
744 | } |
745 | |
746 | // There might be multiple jobs for a compilation. Extract the specified |
747 | // output filename from the last job. |
748 | auto LastCmd = C->getJobs().end(); |
749 | LastCmd--; |
750 | if (LastCmd->getOutputFilenames().size() != 1) { |
751 | llvm::errs() << "The command line should provide exactly one output file " |
752 | "in P1689 per file mode.\n" ; |
753 | } |
754 | StringRef OutputFile = LastCmd->getOutputFilenames().front(); |
755 | |
756 | class InplaceCompilationDatabase : public tooling::CompilationDatabase { |
757 | public: |
758 | InplaceCompilationDatabase(StringRef InputFile, StringRef OutputFile, |
759 | ArrayRef<const char *> CommandLine) |
760 | : Command("." , InputFile, {}, OutputFile) { |
761 | for (auto *C : CommandLine) |
762 | Command.CommandLine.push_back(x: C); |
763 | } |
764 | |
765 | std::vector<tooling::CompileCommand> |
766 | getCompileCommands(StringRef FilePath) const override { |
767 | if (FilePath != Command.Filename) |
768 | return {}; |
769 | return {Command}; |
770 | } |
771 | |
772 | std::vector<std::string> getAllFiles() const override { |
773 | return {Command.Filename}; |
774 | } |
775 | |
776 | std::vector<tooling::CompileCommand> |
777 | getAllCompileCommands() const override { |
778 | return {Command}; |
779 | } |
780 | |
781 | private: |
782 | tooling::CompileCommand Command; |
783 | }; |
784 | |
785 | return std::make_unique<InplaceCompilationDatabase>( |
786 | FEOpts.Inputs[0].getFile(), OutputFile, CommandLine); |
787 | } |
788 | |
789 | int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { |
790 | std::string ErrorMessage; |
791 | std::unique_ptr<tooling::CompilationDatabase> Compilations = |
792 | getCompilationDataBase(argc, argv, ErrorMessage); |
793 | if (!Compilations) { |
794 | llvm::errs() << ErrorMessage << "\n" ; |
795 | return 1; |
796 | } |
797 | |
798 | llvm::cl::PrintOptionValues(); |
799 | |
800 | // The command options are rewritten to run Clang in preprocessor only mode. |
801 | auto AdjustingCompilations = |
802 | std::make_unique<tooling::ArgumentsAdjustingCompilations>( |
803 | args: std::move(Compilations)); |
804 | ResourceDirectoryCache ResourceDirCache; |
805 | |
806 | AdjustingCompilations->appendArgumentsAdjuster( |
807 | Adjuster: [&ResourceDirCache](const tooling::CommandLineArguments &Args, |
808 | StringRef FileName) { |
809 | std::string LastO; |
810 | bool HasResourceDir = false; |
811 | bool ClangCLMode = false; |
812 | auto FlagsEnd = llvm::find(Range: Args, Val: "--" ); |
813 | if (FlagsEnd != Args.begin()) { |
814 | ClangCLMode = |
815 | llvm::sys::path::stem(path: Args[0]).contains_insensitive(Other: "clang-cl" ) || |
816 | llvm::is_contained(Range: Args, Element: "--driver-mode=cl" ); |
817 | |
818 | // Reverse scan, starting at the end or at the element before "--". |
819 | auto R = std::make_reverse_iterator(i: FlagsEnd); |
820 | for (auto I = R, E = Args.rend(); I != E; ++I) { |
821 | StringRef Arg = *I; |
822 | if (ClangCLMode) { |
823 | // Ignore arguments that are preceded by "-Xclang". |
824 | if ((I + 1) != E && I[1] == "-Xclang" ) |
825 | continue; |
826 | if (LastO.empty()) { |
827 | // With clang-cl, the output obj file can be specified with |
828 | // "/opath", "/o path", "/Fopath", and the dash counterparts. |
829 | // Also, clang-cl adds ".obj" extension if none is found. |
830 | if ((Arg == "-o" || Arg == "/o" ) && I != R) |
831 | LastO = I[-1]; // Next argument (reverse iterator) |
832 | else if (Arg.starts_with(Prefix: "/Fo" ) || Arg.starts_with(Prefix: "-Fo" )) |
833 | LastO = Arg.drop_front(N: 3).str(); |
834 | else if (Arg.starts_with(Prefix: "/o" ) || Arg.starts_with(Prefix: "-o" )) |
835 | LastO = Arg.drop_front(N: 2).str(); |
836 | |
837 | if (!LastO.empty() && !llvm::sys::path::has_extension(path: LastO)) |
838 | LastO.append(s: ".obj" ); |
839 | } |
840 | } |
841 | if (Arg == "-resource-dir" ) |
842 | HasResourceDir = true; |
843 | } |
844 | } |
845 | tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd); |
846 | // The clang-cl driver passes "-o -" to the frontend. Inject the real |
847 | // file here to ensure "-MT" can be deduced if need be. |
848 | if (ClangCLMode && !LastO.empty()) { |
849 | AdjustedArgs.push_back(x: "/clang:-o" ); |
850 | AdjustedArgs.push_back(x: "/clang:" + LastO); |
851 | } |
852 | |
853 | if (!HasResourceDir && ResourceDirRecipe == RDRK_InvokeCompiler) { |
854 | StringRef ResourceDir = |
855 | ResourceDirCache.findResourceDir(Args, ClangCLMode); |
856 | if (!ResourceDir.empty()) { |
857 | AdjustedArgs.push_back(x: "-resource-dir" ); |
858 | AdjustedArgs.push_back(x: std::string(ResourceDir)); |
859 | } |
860 | } |
861 | AdjustedArgs.insert(position: AdjustedArgs.end(), first: FlagsEnd, last: Args.end()); |
862 | return AdjustedArgs; |
863 | }); |
864 | |
865 | SharedStream Errs(llvm::errs()); |
866 | // Print out the dependency results to STDOUT by default. |
867 | SharedStream DependencyOS(llvm::outs()); |
868 | |
869 | DependencyScanningService Service(ScanMode, Format, OptimizeArgs, |
870 | EagerLoadModules); |
871 | llvm::ThreadPool Pool(llvm::hardware_concurrency(ThreadCount: NumThreads)); |
872 | std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools; |
873 | for (unsigned I = 0; I < Pool.getThreadCount(); ++I) |
874 | WorkerTools.push_back(x: std::make_unique<DependencyScanningTool>(args&: Service)); |
875 | |
876 | std::vector<tooling::CompileCommand> Inputs = |
877 | AdjustingCompilations->getAllCompileCommands(); |
878 | |
879 | std::atomic<bool> HadErrors(false); |
880 | std::optional<FullDeps> FD; |
881 | P1689Deps PD; |
882 | |
883 | std::mutex Lock; |
884 | size_t Index = 0; |
885 | auto GetNextInputIndex = [&]() -> std::optional<size_t> { |
886 | std::unique_lock<std::mutex> LockGuard(Lock); |
887 | if (Index < Inputs.size()) |
888 | return Index++; |
889 | return {}; |
890 | }; |
891 | |
892 | if (Format == ScanningOutputFormat::Full) |
893 | FD.emplace(args: ModuleName.empty() ? Inputs.size() : 0); |
894 | |
895 | if (Verbose) { |
896 | llvm::outs() << "Running clang-scan-deps on " << Inputs.size() |
897 | << " files using " << Pool.getThreadCount() << " workers\n" ; |
898 | } |
899 | |
900 | llvm::Timer T; |
901 | T.startTimer(); |
902 | |
903 | for (unsigned I = 0; I < Pool.getThreadCount(); ++I) { |
904 | Pool.async(F: [&, I]() { |
905 | llvm::DenseSet<ModuleID> AlreadySeenModules; |
906 | while (auto MaybeInputIndex = GetNextInputIndex()) { |
907 | size_t LocalIndex = *MaybeInputIndex; |
908 | const tooling::CompileCommand *Input = &Inputs[LocalIndex]; |
909 | std::string Filename = std::move(Input->Filename); |
910 | std::string CWD = std::move(Input->Directory); |
911 | |
912 | std::optional<StringRef> MaybeModuleName; |
913 | if (!ModuleName.empty()) |
914 | MaybeModuleName = ModuleName; |
915 | |
916 | std::string OutputDir(ModuleFilesDir); |
917 | if (OutputDir.empty()) |
918 | OutputDir = getModuleCachePath(Args: Input->CommandLine); |
919 | auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) { |
920 | return ::lookupModuleOutput(MID, MOK, OutputDir); |
921 | }; |
922 | |
923 | // Run the tool on it. |
924 | if (Format == ScanningOutputFormat::Make) { |
925 | auto MaybeFile = |
926 | WorkerTools[I]->getDependencyFile(CommandLine: Input->CommandLine, CWD); |
927 | if (handleMakeDependencyToolResult(Input: Filename, MaybeFile, OS&: DependencyOS, |
928 | Errs)) |
929 | HadErrors = true; |
930 | } else if (Format == ScanningOutputFormat::P1689) { |
931 | // It is useful to generate the make-format dependency output during |
932 | // the scanning for P1689. Otherwise the users need to scan again for |
933 | // it. We will generate the make-format dependency output if we find |
934 | // `-MF` in the command lines. |
935 | std::string MakeformatOutputPath; |
936 | std::string MakeformatOutput; |
937 | |
938 | auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile( |
939 | Command: *Input, CWD, MakeformatOutput, MakeformatOutputPath); |
940 | |
941 | if (handleP1689DependencyToolResult(Input: Filename, MaybeRule, PD, Errs)) |
942 | HadErrors = true; |
943 | |
944 | if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() && |
945 | !HadErrors) { |
946 | static std::mutex Lock; |
947 | // With compilation database, we may open different files |
948 | // concurrently or we may write the same file concurrently. So we |
949 | // use a map here to allow multiple compile commands to write to the |
950 | // same file. Also we need a lock here to avoid data race. |
951 | static llvm::StringMap<llvm::raw_fd_ostream> OSs; |
952 | std::unique_lock<std::mutex> LockGuard(Lock); |
953 | |
954 | auto OSIter = OSs.find(Key: MakeformatOutputPath); |
955 | if (OSIter == OSs.end()) { |
956 | std::error_code EC; |
957 | OSIter = OSs.try_emplace(Key: MakeformatOutputPath, |
958 | Args&: MakeformatOutputPath, Args&: EC) |
959 | .first; |
960 | if (EC) |
961 | llvm::errs() |
962 | << "Failed to open P1689 make format output file \"" |
963 | << MakeformatOutputPath << "\" for " << EC.message() |
964 | << "\n" ; |
965 | } |
966 | |
967 | SharedStream MakeformatOS(OSIter->second); |
968 | llvm::Expected<std::string> MaybeOutput(MakeformatOutput); |
969 | if (handleMakeDependencyToolResult(Input: Filename, MaybeFile&: MaybeOutput, |
970 | OS&: MakeformatOS, Errs)) |
971 | HadErrors = true; |
972 | } |
973 | } else if (MaybeModuleName) { |
974 | auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies( |
975 | ModuleName: *MaybeModuleName, CommandLine: Input->CommandLine, CWD, AlreadySeen: AlreadySeenModules, |
976 | LookupModuleOutput: LookupOutput); |
977 | if (handleModuleResult(ModuleName: *MaybeModuleName, MaybeModuleGraph&: MaybeModuleDepsGraph, FD&: *FD, |
978 | InputIndex: LocalIndex, OS&: DependencyOS, Errs)) |
979 | HadErrors = true; |
980 | } else { |
981 | auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies( |
982 | CommandLine: Input->CommandLine, CWD, AlreadySeen: AlreadySeenModules, LookupModuleOutput: LookupOutput); |
983 | if (handleTranslationUnitResult(Input: Filename, MaybeTUDeps, FD&: *FD, |
984 | InputIndex: LocalIndex, OS&: DependencyOS, Errs)) |
985 | HadErrors = true; |
986 | } |
987 | } |
988 | }); |
989 | } |
990 | Pool.wait(); |
991 | |
992 | T.stopTimer(); |
993 | if (PrintTiming) |
994 | llvm::errs() << llvm::format( |
995 | Fmt: "clang-scan-deps timing: %0.2fs wall, %0.2fs process\n" , |
996 | Vals: T.getTotalTime().getWallTime(), Vals: T.getTotalTime().getProcessTime()); |
997 | |
998 | if (RoundTripArgs) |
999 | if (FD && FD->roundTripCommands(ErrOS&: llvm::errs())) |
1000 | HadErrors = true; |
1001 | |
1002 | if (Format == ScanningOutputFormat::Full) |
1003 | FD->printFullOutput(OS&: llvm::outs()); |
1004 | else if (Format == ScanningOutputFormat::P1689) |
1005 | PD.printDependencies(OS&: llvm::outs()); |
1006 | |
1007 | return HadErrors; |
1008 | } |
1009 | |