1 | //===--- IncludeCleaner.cpp - standalone tool for include analysis --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AnalysisInternal.h" |
10 | #include "clang-include-cleaner/Analysis.h" |
11 | #include "clang-include-cleaner/Record.h" |
12 | #include "clang/Frontend/CompilerInstance.h" |
13 | #include "clang/Frontend/FrontendAction.h" |
14 | #include "clang/Lex/Preprocessor.h" |
15 | #include "clang/Tooling/CommonOptionsParser.h" |
16 | #include "clang/Tooling/Tooling.h" |
17 | #include "llvm/ADT/STLFunctionalExtras.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/StringMap.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/Support/CommandLine.h" |
22 | #include "llvm/Support/FormatVariadic.h" |
23 | #include "llvm/Support/Regex.h" |
24 | #include "llvm/Support/Signals.h" |
25 | #include "llvm/Support/raw_ostream.h" |
26 | #include <functional> |
27 | #include <memory> |
28 | #include <string> |
29 | #include <utility> |
30 | #include <vector> |
31 | |
32 | namespace clang { |
33 | namespace include_cleaner { |
34 | namespace { |
35 | namespace cl = llvm::cl; |
36 | |
37 | llvm::StringRef Overview = llvm::StringLiteral(R"( |
38 | clang-include-cleaner analyzes the #include directives in source code. |
39 | |
40 | It suggests removing headers that the code is not using. |
41 | It suggests inserting headers that the code relies on, but does not include. |
42 | These changes make the file more self-contained and (at scale) make the codebase |
43 | easier to reason about and modify. |
44 | |
45 | The tool operates on *working* source code. This means it can suggest including |
46 | headers that are only indirectly included, but cannot suggest those that are |
47 | missing entirely. (clang-include-fixer can do this). |
48 | )" ) |
49 | .trim(); |
50 | |
51 | cl::OptionCategory IncludeCleaner("clang-include-cleaner" ); |
52 | |
53 | cl::opt<std::string> HTMLReportPath{ |
54 | "html" , |
55 | cl::desc("Specify an output filename for an HTML report. " |
56 | "This describes both recommendations and reasons for changes." ), |
57 | cl::cat(IncludeCleaner), |
58 | }; |
59 | |
60 | cl::opt<std::string> { |
61 | "only-headers" , |
62 | cl::desc("A comma-separated list of regexes to match against suffix of a " |
63 | "header. Only headers that match will be analyzed." ), |
64 | cl::init(Val: "" ), |
65 | cl::cat(IncludeCleaner), |
66 | }; |
67 | |
68 | cl::opt<std::string> { |
69 | "ignore-headers" , |
70 | cl::desc("A comma-separated list of regexes to match against suffix of a " |
71 | "header, and disable analysis if matched." ), |
72 | cl::init(Val: "" ), |
73 | cl::cat(IncludeCleaner), |
74 | }; |
75 | |
76 | enum class PrintStyle { Changes, Final }; |
77 | cl::opt<PrintStyle> Print{ |
78 | "print" , |
79 | cl::values( |
80 | clEnumValN(PrintStyle::Changes, "changes" , "Print symbolic changes" ), |
81 | clEnumValN(PrintStyle::Final, "" , "Print final code" )), |
82 | cl::ValueOptional, |
83 | cl::init(Val: PrintStyle::Final), |
84 | cl::desc("Print the list of headers to insert and remove" ), |
85 | cl::cat(IncludeCleaner), |
86 | }; |
87 | |
88 | cl::opt<bool> Edit{ |
89 | "edit" , |
90 | cl::desc("Apply edits to analyzed source files" ), |
91 | cl::cat(IncludeCleaner), |
92 | }; |
93 | cl::opt<bool> Insert{ |
94 | "insert" , |
95 | cl::desc( |
96 | "Allow header insertions (deprecated. Use -disable-insert instead)" ), |
97 | cl::init(Val: true), |
98 | cl::cat(IncludeCleaner), |
99 | }; |
100 | cl::opt<bool> Remove{ |
101 | "remove" , |
102 | cl::desc("Allow header removals (deprecated. Use -disable-remove instead)" ), |
103 | cl::init(Val: true), |
104 | cl::cat(IncludeCleaner), |
105 | }; |
106 | cl::opt<bool> DisableInsert{ |
107 | "disable-insert" , |
108 | cl::desc("Disable header insertions" ), |
109 | cl::init(Val: false), |
110 | cl::cat(IncludeCleaner), |
111 | }; |
112 | cl::opt<bool> DisableRemove{ |
113 | "disable-remove" , |
114 | cl::desc("Disable header removals" ), |
115 | cl::init(Val: false), |
116 | cl::cat(IncludeCleaner), |
117 | }; |
118 | |
119 | std::atomic<unsigned> Errors = ATOMIC_VAR_INIT(0); |
120 | |
121 | format::FormatStyle getStyle(llvm::StringRef Filename) { |
122 | auto S = format::getStyle(StyleName: format::DefaultFormatStyle, FileName: Filename, |
123 | FallbackStyle: format::DefaultFallbackStyle); |
124 | if (!S || !S->isCpp()) { |
125 | consumeError(Err: S.takeError()); |
126 | return format::getLLVMStyle(); |
127 | } |
128 | return std::move(*S); |
129 | } |
130 | |
131 | class Action : public clang::ASTFrontendAction { |
132 | public: |
133 | Action(llvm::function_ref<bool(llvm::StringRef)> , |
134 | llvm::StringMap<std::string> &EditedFiles) |
135 | : HeaderFilter(HeaderFilter), EditedFiles(EditedFiles) {} |
136 | |
137 | private: |
138 | RecordedAST AST; |
139 | RecordedPP PP; |
140 | PragmaIncludes PI; |
141 | llvm::function_ref<bool(llvm::StringRef)> ; |
142 | llvm::StringMap<std::string> &EditedFiles; |
143 | |
144 | bool BeginInvocation(CompilerInstance &CI) override { |
145 | // We only perform include-cleaner analysis. So we disable diagnostics that |
146 | // won't affect our analysis to make the tool more robust against |
147 | // in-development code. |
148 | CI.getLangOpts().ModulesDeclUse = false; |
149 | CI.getLangOpts().ModulesStrictDeclUse = false; |
150 | return true; |
151 | } |
152 | |
153 | void ExecuteAction() override { |
154 | const auto &CI = getCompilerInstance(); |
155 | |
156 | // Disable all warnings when running include-cleaner, as we are only |
157 | // interested in include-cleaner related findings. This makes the tool both |
158 | // more resilient around in-development code, and possibly faster as we |
159 | // skip some extra analysis. |
160 | auto &Diags = CI.getDiagnostics(); |
161 | Diags.setEnableAllWarnings(false); |
162 | Diags.setSeverityForAll(Flavor: clang::diag::Flavor::WarningOrError, |
163 | Map: clang::diag::Severity::Ignored); |
164 | auto &P = CI.getPreprocessor(); |
165 | P.addPPCallbacks(C: PP.record(PP: P)); |
166 | PI.record(CI: getCompilerInstance()); |
167 | ASTFrontendAction::ExecuteAction(); |
168 | } |
169 | |
170 | std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, |
171 | StringRef File) override { |
172 | return AST.record(); |
173 | } |
174 | |
175 | void EndSourceFile() override { |
176 | const auto &SM = getCompilerInstance().getSourceManager(); |
177 | if (SM.getDiagnostics().hasUncompilableErrorOccurred()) { |
178 | llvm::errs() |
179 | << "Skipping file " << getCurrentFile() |
180 | << " due to compiler errors. clang-include-cleaner expects to " |
181 | "work on compilable source code.\n" ; |
182 | return; |
183 | } |
184 | |
185 | if (!HTMLReportPath.empty()) |
186 | writeHTML(); |
187 | |
188 | // Source File's path of compiler invocation, converted to absolute path. |
189 | llvm::SmallString<256> AbsPath( |
190 | SM.getFileEntryRefForID(FID: SM.getMainFileID())->getName()); |
191 | assert(!AbsPath.empty() && "Main file path not known?" ); |
192 | SM.getFileManager().makeAbsolutePath(Path&: AbsPath); |
193 | llvm::StringRef Code = SM.getBufferData(FID: SM.getMainFileID()); |
194 | |
195 | auto Results = |
196 | analyze(ASTRoots: AST.Roots, MacroRefs: PP.MacroReferences, I: PP.Includes, PI: &PI, |
197 | PP: getCompilerInstance().getPreprocessor(), HeaderFilter); |
198 | |
199 | if (!Insert) { |
200 | llvm::errs() |
201 | << "warning: '-insert=0' is deprecated in favor of " |
202 | "'-disable-insert'. " |
203 | "The old flag was confusing since it suggested that inserts " |
204 | "were disabled by default, when they were actually enabled.\n" ; |
205 | } |
206 | |
207 | if (!Remove) { |
208 | llvm::errs() |
209 | << "warning: '-remove=0' is deprecated in favor of " |
210 | "'-disable-remove'. " |
211 | "The old flag was confusing since it suggested that removes " |
212 | "were disabled by default, when they were actually enabled.\n" ; |
213 | } |
214 | |
215 | if (!Insert || DisableInsert) |
216 | Results.Missing.clear(); |
217 | if (!Remove || DisableRemove) |
218 | Results.Unused.clear(); |
219 | std::string Final = fixIncludes(Results, FileName: AbsPath, Code, IncludeStyle: getStyle(Filename: AbsPath)); |
220 | |
221 | if (Print.getNumOccurrences()) { |
222 | switch (Print) { |
223 | case PrintStyle::Changes: |
224 | for (const Include *I : Results.Unused) |
225 | llvm::outs() << "- " << I->quote() << " @Line:" << I->Line << "\n" ; |
226 | for (const auto &[I, _] : Results.Missing) |
227 | llvm::outs() << "+ " << I << "\n" ; |
228 | break; |
229 | case PrintStyle::Final: |
230 | llvm::outs() << Final; |
231 | break; |
232 | } |
233 | } |
234 | |
235 | if (!Results.Missing.empty() || !Results.Unused.empty()) |
236 | EditedFiles.try_emplace(Key: AbsPath, Args&: Final); |
237 | } |
238 | |
239 | void writeHTML() { |
240 | std::error_code EC; |
241 | llvm::raw_fd_ostream OS(HTMLReportPath, EC); |
242 | if (EC) { |
243 | llvm::errs() << "Unable to write HTML report to " << HTMLReportPath |
244 | << ": " << EC.message() << "\n" ; |
245 | ++Errors; |
246 | return; |
247 | } |
248 | writeHTMLReport(File: AST.Ctx->getSourceManager().getMainFileID(), PP.Includes, |
249 | Roots: AST.Roots, MacroRefs: PP.MacroReferences, Ctx&: *AST.Ctx, |
250 | PP: getCompilerInstance().getPreprocessor(), PI: &PI, OS); |
251 | } |
252 | }; |
253 | class ActionFactory : public tooling::FrontendActionFactory { |
254 | public: |
255 | ActionFactory(llvm::function_ref<bool(llvm::StringRef)> ) |
256 | : HeaderFilter(HeaderFilter) {} |
257 | |
258 | std::unique_ptr<clang::FrontendAction> create() override { |
259 | return std::make_unique<Action>(args&: HeaderFilter, args&: EditedFiles); |
260 | } |
261 | |
262 | const llvm::StringMap<std::string> &editedFiles() const { |
263 | return EditedFiles; |
264 | } |
265 | |
266 | private: |
267 | llvm::function_ref<bool(llvm::StringRef)> ; |
268 | // Map from file name to final code with the include edits applied. |
269 | llvm::StringMap<std::string> EditedFiles; |
270 | }; |
271 | |
272 | // Compiles a regex list into a function that return true if any match a header. |
273 | // Prints and returns nullptr if any regexes are invalid. |
274 | std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag) { |
275 | auto FilterRegs = std::make_shared<std::vector<llvm::Regex>>(); |
276 | llvm::SmallVector<llvm::StringRef> ; |
277 | RegexFlag.split(A&: Headers, Separator: ',', MaxSplit: -1, /*KeepEmpty=*/false); |
278 | for (auto : Headers) { |
279 | std::string AnchoredPattern = "(" + HeaderPattern.str() + ")$" ; |
280 | llvm::Regex CompiledRegex(AnchoredPattern); |
281 | std::string RegexError; |
282 | if (!CompiledRegex.isValid(Error&: RegexError)) { |
283 | llvm::errs() << llvm::formatv(Fmt: "Invalid regular expression '{0}': {1}\n" , |
284 | Vals&: HeaderPattern, Vals&: RegexError); |
285 | return nullptr; |
286 | } |
287 | FilterRegs->push_back(x: std::move(CompiledRegex)); |
288 | } |
289 | return [FilterRegs](llvm::StringRef Path) { |
290 | for (const auto &F : *FilterRegs) { |
291 | if (F.match(String: Path)) |
292 | return true; |
293 | } |
294 | return false; |
295 | }; |
296 | } |
297 | |
298 | std::function<bool(llvm::StringRef)> () { |
299 | auto OnlyMatches = matchesAny(RegexFlag: OnlyHeaders); |
300 | auto IgnoreMatches = matchesAny(RegexFlag: IgnoreHeaders); |
301 | if (!OnlyMatches || !IgnoreMatches) |
302 | return nullptr; |
303 | |
304 | return [OnlyMatches, IgnoreMatches](llvm::StringRef ) { |
305 | if (!OnlyHeaders.empty() && !OnlyMatches(Header)) |
306 | return true; |
307 | if (!IgnoreHeaders.empty() && IgnoreMatches(Header)) |
308 | return true; |
309 | return false; |
310 | }; |
311 | } |
312 | |
313 | // Maps absolute path of each files of each compilation commands to the |
314 | // absolute path of the input file. |
315 | llvm::Expected<std::map<std::string, std::string, std::less<>>> |
316 | mapInputsToAbsPaths(clang::tooling::CompilationDatabase &CDB, |
317 | llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, |
318 | const std::vector<std::string> &Inputs) { |
319 | std::map<std::string, std::string, std::less<>> CDBToAbsPaths; |
320 | // Factory.editedFiles()` will contain the final code, along with the |
321 | // path given in the compilation database. That path can be |
322 | // absolute or relative, and if it is relative, it is relative to the |
323 | // "Directory" field in the compilation database. We need to make it |
324 | // absolute to write the final code to the correct path. |
325 | for (auto &Source : Inputs) { |
326 | llvm::SmallString<256> AbsPath(Source); |
327 | if (auto Err = VFS->makeAbsolute(Path&: AbsPath)) { |
328 | llvm::errs() << "Failed to get absolute path for " << Source << " : " |
329 | << Err.message() << '\n'; |
330 | return llvm::errorCodeToError(EC: Err); |
331 | } |
332 | std::vector<clang::tooling::CompileCommand> Cmds = |
333 | CDB.getCompileCommands(FilePath: AbsPath); |
334 | if (Cmds.empty()) { |
335 | // It should be found in the compilation database, even user didn't |
336 | // specify the compilation database, the `FixedCompilationDatabase` will |
337 | // create an entry from the arguments. So it is an error if we can't |
338 | // find the compile commands. |
339 | std::string ErrorMsg = |
340 | llvm::formatv(Fmt: "No compile commands found for {0}" , Vals&: AbsPath).str(); |
341 | llvm::errs() << ErrorMsg << '\n'; |
342 | return llvm::make_error<llvm::StringError>( |
343 | Args&: ErrorMsg, Args: llvm::inconvertibleErrorCode()); |
344 | } |
345 | for (const auto &Cmd : Cmds) { |
346 | llvm::SmallString<256> CDBPath(Cmd.Filename); |
347 | llvm::sys::fs::make_absolute(current_directory: Cmd.Directory, path&: CDBPath); |
348 | CDBToAbsPaths[std::string(CDBPath)] = std::string(AbsPath); |
349 | } |
350 | } |
351 | return CDBToAbsPaths; |
352 | } |
353 | |
354 | } // namespace |
355 | } // namespace include_cleaner |
356 | } // namespace clang |
357 | |
358 | int main(int argc, const char **argv) { |
359 | using namespace clang::include_cleaner; |
360 | |
361 | llvm::sys::PrintStackTraceOnErrorSignal(Argv0: argv[0]); |
362 | auto OptionsParser = |
363 | clang::tooling::CommonOptionsParser::create(argc, argv, Category&: IncludeCleaner); |
364 | if (!OptionsParser) { |
365 | llvm::errs() << toString(E: OptionsParser.takeError()); |
366 | return 1; |
367 | } |
368 | |
369 | if (OptionsParser->getSourcePathList().size() != 1) { |
370 | std::vector<cl::Option *> IncompatibleFlags = {&HTMLReportPath, &Print}; |
371 | for (const auto *Flag : IncompatibleFlags) { |
372 | if (Flag->getNumOccurrences()) { |
373 | llvm::errs() << "-" << Flag->ArgStr << " requires a single input file" ; |
374 | return 1; |
375 | } |
376 | } |
377 | } |
378 | |
379 | auto VFS = llvm::vfs::getRealFileSystem(); |
380 | auto &CDB = OptionsParser->getCompilations(); |
381 | // CDBToAbsPaths is a map from the path in the compilation database to the |
382 | // writable absolute path of the file. |
383 | auto CDBToAbsPaths = |
384 | mapInputsToAbsPaths(CDB, VFS, Inputs: OptionsParser->getSourcePathList()); |
385 | if (!CDBToAbsPaths) |
386 | return 1; |
387 | |
388 | clang::tooling::ClangTool Tool(CDB, OptionsParser->getSourcePathList()); |
389 | |
390 | auto = headerFilter(); |
391 | if (!HeaderFilter) |
392 | return 1; // error already reported. |
393 | ActionFactory Factory(HeaderFilter); |
394 | auto ErrorCode = Tool.run(Action: &Factory); |
395 | if (Edit) { |
396 | for (const auto &NameAndContent : Factory.editedFiles()) { |
397 | llvm::StringRef FileName = NameAndContent.first(); |
398 | if (auto It = CDBToAbsPaths->find(x: FileName); It != CDBToAbsPaths->end()) |
399 | FileName = It->second; |
400 | |
401 | const std::string &FinalCode = NameAndContent.second; |
402 | if (auto Err = llvm::writeToOutput( |
403 | OutputFileName: FileName, Write: [&](llvm::raw_ostream &OS) -> llvm::Error { |
404 | OS << FinalCode; |
405 | return llvm::Error::success(); |
406 | })) { |
407 | llvm::errs() << "Failed to apply edits to " << FileName << ": " |
408 | << toString(E: std::move(Err)) << "\n" ; |
409 | ++Errors; |
410 | } |
411 | } |
412 | } |
413 | return ErrorCode || Errors != 0; |
414 | } |
415 | |