1//===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "IncludeCleaner.h"
10#include "Diagnostics.h"
11#include "Headers.h"
12#include "ParsedAST.h"
13#include "Preamble.h"
14#include "Protocol.h"
15#include "SourceCode.h"
16#include "clang-include-cleaner/Analysis.h"
17#include "clang-include-cleaner/IncludeSpeller.h"
18#include "clang-include-cleaner/Record.h"
19#include "clang-include-cleaner/Types.h"
20#include "support/Logger.h"
21#include "support/Path.h"
22#include "support/Trace.h"
23#include "clang/AST/ASTContext.h"
24#include "clang/Basic/Diagnostic.h"
25#include "clang/Basic/LLVM.h"
26#include "clang/Basic/SourceLocation.h"
27#include "clang/Basic/SourceManager.h"
28#include "clang/Format/Format.h"
29#include "clang/Lex/DirectoryLookup.h"
30#include "clang/Lex/HeaderSearch.h"
31#include "clang/Lex/Preprocessor.h"
32#include "clang/Tooling/Core/Replacement.h"
33#include "clang/Tooling/Inclusions/HeaderIncludes.h"
34#include "clang/Tooling/Inclusions/StandardLibrary.h"
35#include "clang/Tooling/Syntax/Tokens.h"
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseSet.h"
38#include "llvm/ADT/GenericUniformityImpl.h"
39#include "llvm/ADT/STLExtras.h"
40#include "llvm/ADT/SmallString.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/StringRef.h"
43#include "llvm/Support/Error.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/FormatVariadic.h"
46#include "llvm/Support/Path.h"
47#include "llvm/Support/Regex.h"
48#include <cassert>
49#include <iterator>
50#include <map>
51#include <optional>
52#include <string>
53#include <utility>
54#include <vector>
55
56namespace clang::clangd {
57namespace {
58
59bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) {
60 // Convert the path to Unix slashes and try to match against the filter.
61 llvm::SmallString<64> NormalizedPath(HeaderPath);
62 llvm::sys::path::native(path&: NormalizedPath, style: llvm::sys::path::Style::posix);
63 for (auto &Filter : IgnoreHeaders) {
64 if (Filter(NormalizedPath))
65 return true;
66 }
67 return false;
68}
69
70bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST,
71 const include_cleaner::PragmaIncludes *PI) {
72 assert(Inc.HeaderID);
73 auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
74 auto FE = AST.getSourceManager().getFileManager().getFileRef(
75 Filename: AST.getIncludeStructure().getRealPath(ID: HID));
76 assert(FE);
77 if (FE->getDir() == AST.getPreprocessor()
78 .getHeaderSearchInfo()
79 .getModuleMap()
80 .getBuiltinDir())
81 return false;
82 if (PI && PI->shouldKeep(FE: *FE))
83 return false;
84 // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
85 // System headers are likely to be standard library headers.
86 // Until we have good support for umbrella headers, don't warn about them.
87 if (Inc.Written.front() == '<')
88 return tooling::stdlib::Header::named(Name: Inc.Written).has_value();
89 if (PI) {
90 // Check if main file is the public interface for a private header. If so we
91 // shouldn't diagnose it as unused.
92 if (auto PHeader = PI->getPublic(File: *FE); !PHeader.empty()) {
93 PHeader = PHeader.trim(Chars: "<>\"");
94 // Since most private -> public mappings happen in a verbatim way, we
95 // check textually here. This might go wrong in presence of symlinks or
96 // header mappings. But that's not different than rest of the places.
97 if (AST.tuPath().ends_with(Suffix: PHeader))
98 return false;
99 }
100 }
101 // Headers without include guards have side effects and are not
102 // self-contained, skip them.
103 if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
104 File: *FE)) {
105 dlog("{0} doesn't have header guard and will not be considered unused",
106 FE->getName());
107 return false;
108 }
109 return true;
110}
111
112std::vector<Diag> generateMissingIncludeDiagnostics(
113 ParsedAST &AST, llvm::ArrayRef<MissingIncludeDiagInfo> MissingIncludes,
114 llvm::StringRef Code, HeaderFilter IgnoreHeaders, const ThreadsafeFS &TFS) {
115 std::vector<Diag> Result;
116 const SourceManager &SM = AST.getSourceManager();
117 const FileEntry *MainFile = SM.getFileEntryForID(FID: SM.getMainFileID());
118
119 auto FileStyle = getFormatStyleForFile(File: AST.tuPath(), Content: Code, TFS, FormatFile: false);
120
121 tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
122 FileStyle.IncludeStyle);
123 for (const auto &SymbolWithMissingInclude : MissingIncludes) {
124 llvm::StringRef ResolvedPath =
125 SymbolWithMissingInclude.Providers.front().resolvedPath();
126 if (isIgnored(HeaderPath: ResolvedPath, IgnoreHeaders)) {
127 dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by "
128 "config",
129 ResolvedPath);
130 continue;
131 }
132
133 std::string Spelling = include_cleaner::spellHeader(
134 Input: {.H: SymbolWithMissingInclude.Providers.front(),
135 .HS: AST.getPreprocessor().getHeaderSearchInfo(), .Main: MainFile});
136
137 llvm::StringRef HeaderRef{Spelling};
138 bool Angled = HeaderRef.starts_with(Prefix: "<");
139 // We might suggest insertion of an existing include in edge cases, e.g.,
140 // include is present in a PP-disabled region, or spelling of the header
141 // turns out to be the same as one of the unresolved includes in the
142 // main file.
143 std::optional<tooling::Replacement> Replacement = HeaderIncludes.insert(
144 Header: HeaderRef.trim(Chars: "\"<>"), IsAngled: Angled, Directive: tooling::IncludeDirective::Include);
145 if (!Replacement.has_value())
146 continue;
147
148 Diag &D = Result.emplace_back();
149 D.Message =
150 llvm::formatv(Fmt: "No header providing \"{0}\" is directly included",
151 Vals: SymbolWithMissingInclude.Symbol.name());
152 D.Name = "missing-includes";
153 D.Source = Diag::DiagSource::Clangd;
154 D.File = AST.tuPath();
155 D.InsideMainFile = true;
156 // We avoid the "warning" severity here in favor of LSP's "information".
157 //
158 // Users treat most warnings on code being edited as high-priority.
159 // They don't think of include cleanups the same way: they want to edit
160 // lines with existing violations without fixing them.
161 // Diagnostics at the same level tend to be visually indistinguishable,
162 // and a few missing includes can cause many diagnostics.
163 // Marking these as "information" leaves them visible, but less intrusive.
164 //
165 // (These concerns don't apply to unused #include warnings: these are fewer,
166 // they appear on infrequently-edited lines with few other warnings, and
167 // the 'Unneccesary' tag often result in a different rendering)
168 //
169 // Usually clang's "note" severity usually has special semantics, being
170 // translated into LSP RelatedInformation of a parent diagnostic.
171 // But not here: these aren't processed by clangd's DiagnosticConsumer.
172 D.Severity = DiagnosticsEngine::Note;
173 D.Range = clangd::Range{
174 .start: offsetToPosition(Code,
175 Offset: SymbolWithMissingInclude.SymRefRange.beginOffset()),
176 .end: offsetToPosition(Code,
177 Offset: SymbolWithMissingInclude.SymRefRange.endOffset())};
178 auto &F = D.Fixes.emplace_back();
179 F.Message = "#include " + Spelling;
180 TextEdit Edit = replacementToEdit(Code, R: *Replacement);
181 F.Edits.emplace_back(Args: std::move(Edit));
182 }
183 return Result;
184}
185
186std::vector<Diag> generateUnusedIncludeDiagnostics(
187 PathRef FileName, llvm::ArrayRef<const Inclusion *> UnusedIncludes,
188 llvm::StringRef Code, HeaderFilter IgnoreHeaders) {
189 std::vector<Diag> Result;
190 for (const auto *Inc : UnusedIncludes) {
191 if (isIgnored(HeaderPath: Inc->Resolved, IgnoreHeaders))
192 continue;
193 Diag &D = Result.emplace_back();
194 D.Message =
195 llvm::formatv(Fmt: "included header {0} is not used directly",
196 Vals: llvm::sys::path::filename(
197 path: Inc->Written.substr(pos: 1, n: Inc->Written.size() - 2),
198 style: llvm::sys::path::Style::posix));
199 D.Name = "unused-includes";
200 D.Source = Diag::DiagSource::Clangd;
201 D.File = FileName;
202 D.InsideMainFile = true;
203 D.Severity = DiagnosticsEngine::Warning;
204 D.Tags.push_back(Elt: Unnecessary);
205 D.Range = rangeTillEOL(Code, HashOffset: Inc->HashOffset);
206 // FIXME(kirillbobyrev): Removing inclusion might break the code if the
207 // used headers are only reachable transitively through this one. Suggest
208 // including them directly instead.
209 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
210 // (keep/export) remove the warning once we support IWYU pragmas.
211 auto &F = D.Fixes.emplace_back();
212 F.Message = "remove #include directive";
213 F.Edits.emplace_back();
214 F.Edits.back().range.start.line = Inc->HashLine;
215 F.Edits.back().range.end.line = Inc->HashLine + 1;
216 }
217 return Result;
218}
219
220std::optional<Fix>
221removeAllUnusedIncludes(llvm::ArrayRef<Diag> UnusedIncludes) {
222 if (UnusedIncludes.empty())
223 return std::nullopt;
224
225 Fix RemoveAll;
226 RemoveAll.Message = "remove all unused includes";
227 for (const auto &Diag : UnusedIncludes) {
228 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
229 RemoveAll.Edits.insert(I: RemoveAll.Edits.end(),
230 From: Diag.Fixes.front().Edits.begin(),
231 To: Diag.Fixes.front().Edits.end());
232 }
233 return RemoveAll;
234}
235
236std::optional<Fix>
237addAllMissingIncludes(llvm::ArrayRef<Diag> MissingIncludeDiags) {
238 if (MissingIncludeDiags.empty())
239 return std::nullopt;
240
241 Fix AddAllMissing;
242 AddAllMissing.Message = "add all missing includes";
243 // A map to deduplicate the edits with the same new text.
244 // newText (#include "my_missing_header.h") -> TextEdit.
245 std::map<std::string, TextEdit> Edits;
246 for (const auto &Diag : MissingIncludeDiags) {
247 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
248 for (const auto &Edit : Diag.Fixes.front().Edits) {
249 Edits.try_emplace(k: Edit.newText, args: Edit);
250 }
251 }
252 for (auto &It : Edits)
253 AddAllMissing.Edits.push_back(Elt: std::move(It.second));
254 return AddAllMissing;
255}
256Fix fixAll(const Fix &RemoveAllUnused, const Fix &AddAllMissing) {
257 Fix FixAll;
258 FixAll.Message = "fix all includes";
259
260 for (const auto &F : RemoveAllUnused.Edits)
261 FixAll.Edits.push_back(Elt: F);
262 for (const auto &F : AddAllMissing.Edits)
263 FixAll.Edits.push_back(Elt: F);
264 return FixAll;
265}
266
267std::vector<const Inclusion *>
268getUnused(ParsedAST &AST,
269 const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
270 trace::Span Tracer("IncludeCleaner::getUnused");
271 std::vector<const Inclusion *> Unused;
272 for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
273 if (!MFI.HeaderID)
274 continue;
275 auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
276 if (ReferencedFiles.contains(V: IncludeID))
277 continue;
278 if (!mayConsiderUnused(Inc: MFI, AST, PI: &AST.getPragmaIncludes())) {
279 dlog("{0} was not used, but is not eligible to be diagnosed as unused",
280 MFI.Written);
281 continue;
282 }
283 Unused.push_back(x: &MFI);
284 }
285 return Unused;
286}
287
288} // namespace
289
290std::vector<include_cleaner::SymbolReference>
291collectMacroReferences(ParsedAST &AST) {
292 const auto &SM = AST.getSourceManager();
293 auto &PP = AST.getPreprocessor();
294 std::vector<include_cleaner::SymbolReference> Macros;
295 for (const auto &[_, Refs] : AST.getMacros().MacroRefs) {
296 for (const auto &Ref : Refs) {
297 auto Loc = SM.getComposedLoc(FID: SM.getMainFileID(), Offset: Ref.StartOffset);
298 const auto *Tok = AST.getTokens().spelledTokenAt(Loc);
299 if (!Tok)
300 continue;
301 auto Macro = locateMacroAt(SpelledTok: *Tok, PP);
302 if (!Macro)
303 continue;
304 auto DefLoc = Macro->NameLoc;
305 if (!DefLoc.isValid())
306 continue;
307 Macros.push_back(
308 x: {.Target: include_cleaner::Macro{/*Name=*/PP.getIdentifierInfo(Name: Tok->text(SM)),
309 .Definition: DefLoc},
310 .RefLocation: Tok->location(),
311 .RT: Ref.InConditionalDirective ? include_cleaner::RefType::Ambiguous
312 : include_cleaner::RefType::Explicit});
313 }
314 }
315
316 return Macros;
317}
318
319include_cleaner::Includes convertIncludes(const ParsedAST &AST) {
320 auto &SM = AST.getSourceManager();
321
322 include_cleaner::Includes ConvertedIncludes;
323 // We satisfy Includes's contract that search dirs and included files have
324 // matching path styles: both ultimately use FileManager::getCanonicalName().
325 for (const auto &Dir : AST.getIncludeStructure().SearchPathsCanonical)
326 ConvertedIncludes.addSearchDirectory(Dir);
327
328 for (const Inclusion &Inc : AST.getIncludeStructure().MainFileIncludes) {
329 include_cleaner::Include TransformedInc;
330 llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written);
331 TransformedInc.Spelled = WrittenRef.trim(Chars: "\"<>");
332 TransformedInc.HashLocation =
333 SM.getComposedLoc(FID: SM.getMainFileID(), Offset: Inc.HashOffset);
334 TransformedInc.Line = Inc.HashLine + 1;
335 TransformedInc.Angled = WrittenRef.starts_with(Prefix: "<");
336 // Inc.Resolved is canonicalized with clangd::getCanonicalPath(),
337 // which is based on FileManager::getCanonicalName(ParentDir).
338 auto FE = SM.getFileManager().getFileRef(Filename: Inc.Resolved);
339 if (!FE) {
340 elog(Fmt: "IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
341 Vals: Inc.Resolved, Vals: FE.takeError());
342 continue;
343 }
344 TransformedInc.Resolved = *FE;
345 ConvertedIncludes.add(std::move(TransformedInc));
346 }
347 return ConvertedIncludes;
348}
349
350IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
351 // Interaction is only polished for C/CPP.
352 if (AST.getLangOpts().ObjC)
353 return {};
354 const auto &SM = AST.getSourceManager();
355 include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
356 const FileEntry *MainFile = SM.getFileEntryForID(FID: SM.getMainFileID());
357 auto PreamblePatch = PreamblePatch::getPatchEntry(MainFilePath: AST.tuPath(), SM);
358
359 std::vector<include_cleaner::SymbolReference> Macros =
360 collectMacroReferences(AST);
361 std::vector<MissingIncludeDiagInfo> MissingIncludes;
362 llvm::DenseSet<IncludeStructure::HeaderID> Used;
363 trace::Span Tracer("include_cleaner::walkUsed");
364 OptionalDirectoryEntryRef ResourceDir = AST.getPreprocessor()
365 .getHeaderSearchInfo()
366 .getModuleMap()
367 .getBuiltinDir();
368 include_cleaner::walkUsed(
369 ASTRoots: AST.getLocalTopLevelDecls(), /*MacroRefs=*/Macros,
370 PI: &AST.getPragmaIncludes(), PP: AST.getPreprocessor(),
371 CB: [&](const include_cleaner::SymbolReference &Ref,
372 llvm::ArrayRef<include_cleaner::Header> Providers) {
373 bool Satisfied = false;
374 for (const auto &H : Providers) {
375 if (H.kind() == include_cleaner::Header::Physical &&
376 (H.physical() == MainFile || H.physical() == PreamblePatch ||
377 H.physical().getDir() == ResourceDir)) {
378 Satisfied = true;
379 continue;
380 }
381 for (auto *Inc : ConvertedIncludes.match(H)) {
382 Satisfied = true;
383 auto HeaderID =
384 AST.getIncludeStructure().getID(Entry: &Inc->Resolved->getFileEntry());
385 assert(HeaderID.has_value() &&
386 "ConvertedIncludes only contains resolved includes.");
387 Used.insert(V: *HeaderID);
388 }
389 }
390
391 if (Satisfied || Providers.empty() ||
392 Ref.RT != include_cleaner::RefType::Explicit)
393 return;
394
395 // We actually always want to map usages to their spellings, but
396 // spelling locations can point into preamble section. Using these
397 // offsets could lead into crashes in presence of stale preambles. Hence
398 // we use "getFileLoc" instead to make sure it always points into main
399 // file.
400 // FIXME: Use presumed locations to map such usages back to patched
401 // locations safely.
402 auto Loc = SM.getFileLoc(Loc: Ref.RefLocation);
403 // File locations can be outside of the main file if macro is expanded
404 // through an #include.
405 while (SM.getFileID(SpellingLoc: Loc) != SM.getMainFileID())
406 Loc = SM.getIncludeLoc(FID: SM.getFileID(SpellingLoc: Loc));
407 auto TouchingTokens =
408 syntax::spelledTokensTouching(Loc, Tokens: AST.getTokens());
409 assert(!TouchingTokens.empty());
410 // Loc points to the start offset of the ref token, here we use the last
411 // element of the TouchingTokens, e.g. avoid getting the "::" for
412 // "ns::^abc".
413 MissingIncludeDiagInfo DiagInfo{
414 .Symbol: Ref.Target, .SymRefRange: TouchingTokens.back().range(SM), .Providers: Providers};
415 MissingIncludes.push_back(x: std::move(DiagInfo));
416 });
417 // Put possibly equal diagnostics together for deduplication.
418 // The duplicates might be from macro arguments that get expanded multiple
419 // times.
420 llvm::stable_sort(Range&: MissingIncludes, C: [](const MissingIncludeDiagInfo &LHS,
421 const MissingIncludeDiagInfo &RHS) {
422 // First sort by reference location.
423 if (LHS.SymRefRange != RHS.SymRefRange) {
424 // We can get away just by comparing the offsets as all the ranges are in
425 // main file.
426 return LHS.SymRefRange.beginOffset() < RHS.SymRefRange.beginOffset();
427 }
428 // For the same location, break ties using the symbol. Note that this won't
429 // be stable across runs.
430 using MapInfo = llvm::DenseMapInfo<include_cleaner::Symbol>;
431 return MapInfo::getHashValue(Val: LHS.Symbol) <
432 MapInfo::getHashValue(Val: RHS.Symbol);
433 });
434 MissingIncludes.erase(first: llvm::unique(R&: MissingIncludes), last: MissingIncludes.end());
435 std::vector<const Inclusion *> UnusedIncludes = getUnused(AST, ReferencedFiles: Used);
436 return {.UnusedIncludes: std::move(UnusedIncludes), .MissingIncludes: std::move(MissingIncludes)};
437}
438
439bool isPreferredProvider(const Inclusion &Inc,
440 const include_cleaner::Includes &Includes,
441 llvm::ArrayRef<include_cleaner::Header> Providers) {
442 for (const auto &H : Providers) {
443 auto Matches = Includes.match(H);
444 for (const include_cleaner::Include *Match : Matches)
445 if (Match->Line == unsigned(Inc.HashLine + 1))
446 return true; // this header is (equal) best
447 if (!Matches.empty())
448 return false; // another header is better
449 }
450 return false; // no header provides the symbol
451}
452
453std::vector<Diag>
454issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
455 const IncludeCleanerFindings &Findings,
456 const ThreadsafeFS &TFS,
457 HeaderFilter IgnoreHeaders) {
458 trace::Span Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics");
459 std::vector<Diag> UnusedIncludes = generateUnusedIncludeDiagnostics(
460 FileName: AST.tuPath(), UnusedIncludes: Findings.UnusedIncludes, Code, IgnoreHeaders);
461 std::optional<Fix> RemoveAllUnused = removeAllUnusedIncludes(UnusedIncludes);
462
463 std::vector<Diag> MissingIncludeDiags = generateMissingIncludeDiagnostics(
464 AST, MissingIncludes: Findings.MissingIncludes, Code, IgnoreHeaders, TFS);
465 std::optional<Fix> AddAllMissing = addAllMissingIncludes(MissingIncludeDiags);
466
467 std::optional<Fix> FixAll;
468 if (RemoveAllUnused && AddAllMissing)
469 FixAll = fixAll(RemoveAllUnused: *RemoveAllUnused, AddAllMissing: *AddAllMissing);
470
471 auto AddBatchFix = [](const std::optional<Fix> &F, clang::clangd::Diag *Out) {
472 if (!F)
473 return;
474 Out->Fixes.push_back(x: *F);
475 };
476 for (auto &Diag : MissingIncludeDiags) {
477 AddBatchFix(MissingIncludeDiags.size() > 1 ? AddAllMissing : std::nullopt,
478 &Diag);
479 AddBatchFix(FixAll, &Diag);
480 }
481 for (auto &Diag : UnusedIncludes) {
482 AddBatchFix(UnusedIncludes.size() > 1 ? RemoveAllUnused : std::nullopt,
483 &Diag);
484 AddBatchFix(FixAll, &Diag);
485 }
486
487 auto Result = std::move(MissingIncludeDiags);
488 llvm::move(Range&: UnusedIncludes, Out: std::back_inserter(x&: Result));
489 return Result;
490}
491
492} // namespace clang::clangd
493

source code of clang-tools-extra/clangd/IncludeCleaner.cpp