| 1 | //===--- IncludeCleaner.cpp - standalone tool for include analysis --------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "AnalysisInternal.h" |
| 10 | #include "clang-include-cleaner/Analysis.h" |
| 11 | #include "clang-include-cleaner/Record.h" |
| 12 | #include "clang/Frontend/CompilerInstance.h" |
| 13 | #include "clang/Frontend/FrontendAction.h" |
| 14 | #include "clang/Lex/Preprocessor.h" |
| 15 | #include "clang/Tooling/CommonOptionsParser.h" |
| 16 | #include "clang/Tooling/Tooling.h" |
| 17 | #include "llvm/ADT/STLFunctionalExtras.h" |
| 18 | #include "llvm/ADT/SmallVector.h" |
| 19 | #include "llvm/ADT/StringMap.h" |
| 20 | #include "llvm/ADT/StringRef.h" |
| 21 | #include "llvm/Support/CommandLine.h" |
| 22 | #include "llvm/Support/FormatVariadic.h" |
| 23 | #include "llvm/Support/Regex.h" |
| 24 | #include "llvm/Support/Signals.h" |
| 25 | #include "llvm/Support/raw_ostream.h" |
| 26 | #include <functional> |
| 27 | #include <memory> |
| 28 | #include <string> |
| 29 | #include <utility> |
| 30 | #include <vector> |
| 31 | |
| 32 | namespace clang { |
| 33 | namespace include_cleaner { |
| 34 | namespace { |
| 35 | namespace cl = llvm::cl; |
| 36 | |
| 37 | llvm::StringRef Overview = llvm::StringLiteral(R"( |
| 38 | clang-include-cleaner analyzes the #include directives in source code. |
| 39 | |
| 40 | It suggests removing headers that the code is not using. |
| 41 | It suggests inserting headers that the code relies on, but does not include. |
| 42 | These changes make the file more self-contained and (at scale) make the codebase |
| 43 | easier to reason about and modify. |
| 44 | |
| 45 | The tool operates on *working* source code. This means it can suggest including |
| 46 | headers that are only indirectly included, but cannot suggest those that are |
| 47 | missing entirely. (clang-include-fixer can do this). |
| 48 | )" ) |
| 49 | .trim(); |
| 50 | |
| 51 | cl::OptionCategory IncludeCleaner("clang-include-cleaner" ); |
| 52 | |
| 53 | cl::opt<std::string> HTMLReportPath{ |
| 54 | "html" , |
| 55 | cl::desc("Specify an output filename for an HTML report. " |
| 56 | "This describes both recommendations and reasons for changes." ), |
| 57 | cl::cat(IncludeCleaner), |
| 58 | }; |
| 59 | |
| 60 | cl::opt<std::string> { |
| 61 | "only-headers" , |
| 62 | cl::desc("A comma-separated list of regexes to match against suffix of a " |
| 63 | "header. Only headers that match will be analyzed." ), |
| 64 | cl::init(Val: "" ), |
| 65 | cl::cat(IncludeCleaner), |
| 66 | }; |
| 67 | |
| 68 | cl::opt<std::string> { |
| 69 | "ignore-headers" , |
| 70 | cl::desc("A comma-separated list of regexes to match against suffix of a " |
| 71 | "header, and disable analysis if matched." ), |
| 72 | cl::init(Val: "" ), |
| 73 | cl::cat(IncludeCleaner), |
| 74 | }; |
| 75 | |
| 76 | enum class PrintStyle { Changes, Final }; |
| 77 | cl::opt<PrintStyle> Print{ |
| 78 | "print" , |
| 79 | cl::values( |
| 80 | clEnumValN(PrintStyle::Changes, "changes" , "Print symbolic changes" ), |
| 81 | clEnumValN(PrintStyle::Final, "" , "Print final code" )), |
| 82 | cl::ValueOptional, |
| 83 | cl::init(Val: PrintStyle::Final), |
| 84 | cl::desc("Print the list of headers to insert and remove" ), |
| 85 | cl::cat(IncludeCleaner), |
| 86 | }; |
| 87 | |
| 88 | cl::opt<bool> Edit{ |
| 89 | "edit" , |
| 90 | cl::desc("Apply edits to analyzed source files" ), |
| 91 | cl::cat(IncludeCleaner), |
| 92 | }; |
| 93 | cl::opt<bool> Insert{ |
| 94 | "insert" , |
| 95 | cl::desc( |
| 96 | "Allow header insertions (deprecated. Use -disable-insert instead)" ), |
| 97 | cl::init(Val: true), |
| 98 | cl::cat(IncludeCleaner), |
| 99 | }; |
| 100 | cl::opt<bool> Remove{ |
| 101 | "remove" , |
| 102 | cl::desc("Allow header removals (deprecated. Use -disable-remove instead)" ), |
| 103 | cl::init(Val: true), |
| 104 | cl::cat(IncludeCleaner), |
| 105 | }; |
| 106 | cl::opt<bool> DisableInsert{ |
| 107 | "disable-insert" , |
| 108 | cl::desc("Disable header insertions" ), |
| 109 | cl::init(Val: false), |
| 110 | cl::cat(IncludeCleaner), |
| 111 | }; |
| 112 | cl::opt<bool> DisableRemove{ |
| 113 | "disable-remove" , |
| 114 | cl::desc("Disable header removals" ), |
| 115 | cl::init(Val: false), |
| 116 | cl::cat(IncludeCleaner), |
| 117 | }; |
| 118 | |
| 119 | std::atomic<unsigned> Errors = ATOMIC_VAR_INIT(0); |
| 120 | |
| 121 | format::FormatStyle getStyle(llvm::StringRef Filename) { |
| 122 | auto S = format::getStyle(StyleName: format::DefaultFormatStyle, FileName: Filename, |
| 123 | FallbackStyle: format::DefaultFallbackStyle); |
| 124 | if (!S || !S->isCpp()) { |
| 125 | consumeError(Err: S.takeError()); |
| 126 | return format::getLLVMStyle(); |
| 127 | } |
| 128 | return std::move(*S); |
| 129 | } |
| 130 | |
| 131 | class Action : public clang::ASTFrontendAction { |
| 132 | public: |
| 133 | Action(llvm::function_ref<bool(llvm::StringRef)> , |
| 134 | llvm::StringMap<std::string> &EditedFiles) |
| 135 | : HeaderFilter(HeaderFilter), EditedFiles(EditedFiles) {} |
| 136 | |
| 137 | private: |
| 138 | RecordedAST AST; |
| 139 | RecordedPP PP; |
| 140 | PragmaIncludes PI; |
| 141 | llvm::function_ref<bool(llvm::StringRef)> ; |
| 142 | llvm::StringMap<std::string> &EditedFiles; |
| 143 | |
| 144 | bool BeginInvocation(CompilerInstance &CI) override { |
| 145 | // We only perform include-cleaner analysis. So we disable diagnostics that |
| 146 | // won't affect our analysis to make the tool more robust against |
| 147 | // in-development code. |
| 148 | CI.getLangOpts().ModulesDeclUse = false; |
| 149 | CI.getLangOpts().ModulesStrictDeclUse = false; |
| 150 | return true; |
| 151 | } |
| 152 | |
| 153 | void ExecuteAction() override { |
| 154 | const auto &CI = getCompilerInstance(); |
| 155 | |
| 156 | // Disable all warnings when running include-cleaner, as we are only |
| 157 | // interested in include-cleaner related findings. This makes the tool both |
| 158 | // more resilient around in-development code, and possibly faster as we |
| 159 | // skip some extra analysis. |
| 160 | auto &Diags = CI.getDiagnostics(); |
| 161 | Diags.setEnableAllWarnings(false); |
| 162 | Diags.setSeverityForAll(Flavor: clang::diag::Flavor::WarningOrError, |
| 163 | Map: clang::diag::Severity::Ignored); |
| 164 | auto &P = CI.getPreprocessor(); |
| 165 | P.addPPCallbacks(C: PP.record(PP: P)); |
| 166 | PI.record(CI: getCompilerInstance()); |
| 167 | ASTFrontendAction::ExecuteAction(); |
| 168 | } |
| 169 | |
| 170 | std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, |
| 171 | StringRef File) override { |
| 172 | return AST.record(); |
| 173 | } |
| 174 | |
| 175 | void EndSourceFile() override { |
| 176 | const auto &SM = getCompilerInstance().getSourceManager(); |
| 177 | if (SM.getDiagnostics().hasUncompilableErrorOccurred()) { |
| 178 | llvm::errs() |
| 179 | << "Skipping file " << getCurrentFile() |
| 180 | << " due to compiler errors. clang-include-cleaner expects to " |
| 181 | "work on compilable source code.\n" ; |
| 182 | return; |
| 183 | } |
| 184 | |
| 185 | if (!HTMLReportPath.empty()) |
| 186 | writeHTML(); |
| 187 | |
| 188 | // Source File's path of compiler invocation, converted to absolute path. |
| 189 | llvm::SmallString<256> AbsPath( |
| 190 | SM.getFileEntryRefForID(FID: SM.getMainFileID())->getName()); |
| 191 | assert(!AbsPath.empty() && "Main file path not known?" ); |
| 192 | SM.getFileManager().makeAbsolutePath(Path&: AbsPath); |
| 193 | llvm::StringRef Code = SM.getBufferData(FID: SM.getMainFileID()); |
| 194 | |
| 195 | auto Results = |
| 196 | analyze(ASTRoots: AST.Roots, MacroRefs: PP.MacroReferences, I: PP.Includes, PI: &PI, |
| 197 | PP: getCompilerInstance().getPreprocessor(), HeaderFilter); |
| 198 | |
| 199 | if (!Insert) { |
| 200 | llvm::errs() |
| 201 | << "warning: '-insert=0' is deprecated in favor of " |
| 202 | "'-disable-insert'. " |
| 203 | "The old flag was confusing since it suggested that inserts " |
| 204 | "were disabled by default, when they were actually enabled.\n" ; |
| 205 | } |
| 206 | |
| 207 | if (!Remove) { |
| 208 | llvm::errs() |
| 209 | << "warning: '-remove=0' is deprecated in favor of " |
| 210 | "'-disable-remove'. " |
| 211 | "The old flag was confusing since it suggested that removes " |
| 212 | "were disabled by default, when they were actually enabled.\n" ; |
| 213 | } |
| 214 | |
| 215 | if (!Insert || DisableInsert) |
| 216 | Results.Missing.clear(); |
| 217 | if (!Remove || DisableRemove) |
| 218 | Results.Unused.clear(); |
| 219 | std::string Final = fixIncludes(Results, FileName: AbsPath, Code, IncludeStyle: getStyle(Filename: AbsPath)); |
| 220 | |
| 221 | if (Print.getNumOccurrences()) { |
| 222 | switch (Print) { |
| 223 | case PrintStyle::Changes: |
| 224 | for (const Include *I : Results.Unused) |
| 225 | llvm::outs() << "- " << I->quote() << " @Line:" << I->Line << "\n" ; |
| 226 | for (const auto &[I, _] : Results.Missing) |
| 227 | llvm::outs() << "+ " << I << "\n" ; |
| 228 | break; |
| 229 | case PrintStyle::Final: |
| 230 | llvm::outs() << Final; |
| 231 | break; |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | if (!Results.Missing.empty() || !Results.Unused.empty()) |
| 236 | EditedFiles.try_emplace(Key: AbsPath, Args&: Final); |
| 237 | } |
| 238 | |
| 239 | void writeHTML() { |
| 240 | std::error_code EC; |
| 241 | llvm::raw_fd_ostream OS(HTMLReportPath, EC); |
| 242 | if (EC) { |
| 243 | llvm::errs() << "Unable to write HTML report to " << HTMLReportPath |
| 244 | << ": " << EC.message() << "\n" ; |
| 245 | ++Errors; |
| 246 | return; |
| 247 | } |
| 248 | writeHTMLReport(File: AST.Ctx->getSourceManager().getMainFileID(), PP.Includes, |
| 249 | Roots: AST.Roots, MacroRefs: PP.MacroReferences, Ctx&: *AST.Ctx, |
| 250 | PP: getCompilerInstance().getPreprocessor(), PI: &PI, OS); |
| 251 | } |
| 252 | }; |
| 253 | class ActionFactory : public tooling::FrontendActionFactory { |
| 254 | public: |
| 255 | ActionFactory(llvm::function_ref<bool(llvm::StringRef)> ) |
| 256 | : HeaderFilter(HeaderFilter) {} |
| 257 | |
| 258 | std::unique_ptr<clang::FrontendAction> create() override { |
| 259 | return std::make_unique<Action>(args&: HeaderFilter, args&: EditedFiles); |
| 260 | } |
| 261 | |
| 262 | const llvm::StringMap<std::string> &editedFiles() const { |
| 263 | return EditedFiles; |
| 264 | } |
| 265 | |
| 266 | private: |
| 267 | llvm::function_ref<bool(llvm::StringRef)> ; |
| 268 | // Map from file name to final code with the include edits applied. |
| 269 | llvm::StringMap<std::string> EditedFiles; |
| 270 | }; |
| 271 | |
| 272 | // Compiles a regex list into a function that return true if any match a header. |
| 273 | // Prints and returns nullptr if any regexes are invalid. |
| 274 | std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag) { |
| 275 | auto FilterRegs = std::make_shared<std::vector<llvm::Regex>>(); |
| 276 | llvm::SmallVector<llvm::StringRef> ; |
| 277 | RegexFlag.split(A&: Headers, Separator: ',', MaxSplit: -1, /*KeepEmpty=*/false); |
| 278 | for (auto : Headers) { |
| 279 | std::string AnchoredPattern = "(" + HeaderPattern.str() + ")$" ; |
| 280 | llvm::Regex CompiledRegex(AnchoredPattern); |
| 281 | std::string RegexError; |
| 282 | if (!CompiledRegex.isValid(Error&: RegexError)) { |
| 283 | llvm::errs() << llvm::formatv(Fmt: "Invalid regular expression '{0}': {1}\n" , |
| 284 | Vals&: HeaderPattern, Vals&: RegexError); |
| 285 | return nullptr; |
| 286 | } |
| 287 | FilterRegs->push_back(x: std::move(CompiledRegex)); |
| 288 | } |
| 289 | return [FilterRegs](llvm::StringRef Path) { |
| 290 | for (const auto &F : *FilterRegs) { |
| 291 | if (F.match(String: Path)) |
| 292 | return true; |
| 293 | } |
| 294 | return false; |
| 295 | }; |
| 296 | } |
| 297 | |
| 298 | std::function<bool(llvm::StringRef)> () { |
| 299 | auto OnlyMatches = matchesAny(RegexFlag: OnlyHeaders); |
| 300 | auto IgnoreMatches = matchesAny(RegexFlag: IgnoreHeaders); |
| 301 | if (!OnlyMatches || !IgnoreMatches) |
| 302 | return nullptr; |
| 303 | |
| 304 | return [OnlyMatches, IgnoreMatches](llvm::StringRef ) { |
| 305 | if (!OnlyHeaders.empty() && !OnlyMatches(Header)) |
| 306 | return true; |
| 307 | if (!IgnoreHeaders.empty() && IgnoreMatches(Header)) |
| 308 | return true; |
| 309 | return false; |
| 310 | }; |
| 311 | } |
| 312 | |
| 313 | // Maps absolute path of each files of each compilation commands to the |
| 314 | // absolute path of the input file. |
| 315 | llvm::Expected<std::map<std::string, std::string, std::less<>>> |
| 316 | mapInputsToAbsPaths(clang::tooling::CompilationDatabase &CDB, |
| 317 | llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, |
| 318 | const std::vector<std::string> &Inputs) { |
| 319 | std::map<std::string, std::string, std::less<>> CDBToAbsPaths; |
| 320 | // Factory.editedFiles()` will contain the final code, along with the |
| 321 | // path given in the compilation database. That path can be |
| 322 | // absolute or relative, and if it is relative, it is relative to the |
| 323 | // "Directory" field in the compilation database. We need to make it |
| 324 | // absolute to write the final code to the correct path. |
| 325 | for (auto &Source : Inputs) { |
| 326 | llvm::SmallString<256> AbsPath(Source); |
| 327 | if (auto Err = VFS->makeAbsolute(Path&: AbsPath)) { |
| 328 | llvm::errs() << "Failed to get absolute path for " << Source << " : " |
| 329 | << Err.message() << '\n'; |
| 330 | return llvm::errorCodeToError(EC: Err); |
| 331 | } |
| 332 | std::vector<clang::tooling::CompileCommand> Cmds = |
| 333 | CDB.getCompileCommands(FilePath: AbsPath); |
| 334 | if (Cmds.empty()) { |
| 335 | // It should be found in the compilation database, even user didn't |
| 336 | // specify the compilation database, the `FixedCompilationDatabase` will |
| 337 | // create an entry from the arguments. So it is an error if we can't |
| 338 | // find the compile commands. |
| 339 | std::string ErrorMsg = |
| 340 | llvm::formatv(Fmt: "No compile commands found for {0}" , Vals&: AbsPath).str(); |
| 341 | llvm::errs() << ErrorMsg << '\n'; |
| 342 | return llvm::make_error<llvm::StringError>( |
| 343 | Args&: ErrorMsg, Args: llvm::inconvertibleErrorCode()); |
| 344 | } |
| 345 | for (const auto &Cmd : Cmds) { |
| 346 | llvm::SmallString<256> CDBPath(Cmd.Filename); |
| 347 | llvm::sys::fs::make_absolute(current_directory: Cmd.Directory, path&: CDBPath); |
| 348 | CDBToAbsPaths[std::string(CDBPath)] = std::string(AbsPath); |
| 349 | } |
| 350 | } |
| 351 | return CDBToAbsPaths; |
| 352 | } |
| 353 | |
| 354 | } // namespace |
| 355 | } // namespace include_cleaner |
| 356 | } // namespace clang |
| 357 | |
| 358 | int main(int argc, const char **argv) { |
| 359 | using namespace clang::include_cleaner; |
| 360 | |
| 361 | llvm::sys::PrintStackTraceOnErrorSignal(Argv0: argv[0]); |
| 362 | auto OptionsParser = |
| 363 | clang::tooling::CommonOptionsParser::create(argc, argv, Category&: IncludeCleaner); |
| 364 | if (!OptionsParser) { |
| 365 | llvm::errs() << toString(E: OptionsParser.takeError()); |
| 366 | return 1; |
| 367 | } |
| 368 | |
| 369 | if (OptionsParser->getSourcePathList().size() != 1) { |
| 370 | std::vector<cl::Option *> IncompatibleFlags = {&HTMLReportPath, &Print}; |
| 371 | for (const auto *Flag : IncompatibleFlags) { |
| 372 | if (Flag->getNumOccurrences()) { |
| 373 | llvm::errs() << "-" << Flag->ArgStr << " requires a single input file" ; |
| 374 | return 1; |
| 375 | } |
| 376 | } |
| 377 | } |
| 378 | |
| 379 | auto VFS = llvm::vfs::getRealFileSystem(); |
| 380 | auto &CDB = OptionsParser->getCompilations(); |
| 381 | // CDBToAbsPaths is a map from the path in the compilation database to the |
| 382 | // writable absolute path of the file. |
| 383 | auto CDBToAbsPaths = |
| 384 | mapInputsToAbsPaths(CDB, VFS, Inputs: OptionsParser->getSourcePathList()); |
| 385 | if (!CDBToAbsPaths) |
| 386 | return 1; |
| 387 | |
| 388 | clang::tooling::ClangTool Tool(CDB, OptionsParser->getSourcePathList()); |
| 389 | |
| 390 | auto = headerFilter(); |
| 391 | if (!HeaderFilter) |
| 392 | return 1; // error already reported. |
| 393 | ActionFactory Factory(HeaderFilter); |
| 394 | auto ErrorCode = Tool.run(Action: &Factory); |
| 395 | if (Edit) { |
| 396 | for (const auto &NameAndContent : Factory.editedFiles()) { |
| 397 | llvm::StringRef FileName = NameAndContent.first(); |
| 398 | if (auto It = CDBToAbsPaths->find(x: FileName); It != CDBToAbsPaths->end()) |
| 399 | FileName = It->second; |
| 400 | |
| 401 | const std::string &FinalCode = NameAndContent.second; |
| 402 | if (auto Err = llvm::writeToOutput( |
| 403 | OutputFileName: FileName, Write: [&](llvm::raw_ostream &OS) -> llvm::Error { |
| 404 | OS << FinalCode; |
| 405 | return llvm::Error::success(); |
| 406 | })) { |
| 407 | llvm::errs() << "Failed to apply edits to " << FileName << ": " |
| 408 | << toString(E: std::move(Err)) << "\n" ; |
| 409 | ++Errors; |
| 410 | } |
| 411 | } |
| 412 | } |
| 413 | return ErrorCode || Errors != 0; |
| 414 | } |
| 415 | |