| 1 | //===--- FindHeaders.cpp --------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "AnalysisInternal.h" |
| 10 | #include "TypesInternal.h" |
| 11 | #include "clang-include-cleaner/Record.h" |
| 12 | #include "clang-include-cleaner/Types.h" |
| 13 | #include "clang/AST/ASTContext.h" |
| 14 | #include "clang/AST/Decl.h" |
| 15 | #include "clang/AST/DeclBase.h" |
| 16 | #include "clang/AST/DeclCXX.h" |
| 17 | #include "clang/Basic/Builtins.h" |
| 18 | #include "clang/Basic/FileEntry.h" |
| 19 | #include "clang/Basic/SourceLocation.h" |
| 20 | #include "clang/Basic/SourceManager.h" |
| 21 | #include "clang/Lex/Preprocessor.h" |
| 22 | #include "clang/Tooling/Inclusions/StandardLibrary.h" |
| 23 | #include "llvm/ADT/ArrayRef.h" |
| 24 | #include "llvm/ADT/STLExtras.h" |
| 25 | #include "llvm/ADT/SmallVector.h" |
| 26 | #include "llvm/ADT/StringRef.h" |
| 27 | #include "llvm/Support/Casting.h" |
| 28 | #include "llvm/Support/ErrorHandling.h" |
| 29 | #include <optional> |
| 30 | #include <queue> |
| 31 | #include <set> |
| 32 | #include <utility> |
| 33 | |
| 34 | namespace clang::include_cleaner { |
| 35 | namespace { |
| 36 | llvm::SmallVector<Hinted<Header>> |
| 37 | (llvm::SmallVector<Hinted<Header>> , Hints H) { |
| 38 | for (auto & : Headers) |
| 39 | Header.Hint |= H; |
| 40 | return Headers; |
| 41 | } |
| 42 | |
| 43 | llvm::SmallVector<Header> (llvm::SmallVector<Hinted<Header>> ) { |
| 44 | llvm::stable_sort(Range: llvm::reverse(C&: Headers), |
| 45 | C: [](const Hinted<Header> &LHS, const Hinted<Header> &RHS) { |
| 46 | return LHS < RHS; |
| 47 | }); |
| 48 | return llvm::SmallVector<Header>(Headers.begin(), Headers.end()); |
| 49 | } |
| 50 | |
| 51 | // Return the basename from a verbatim header spelling, leaves only the file |
| 52 | // name. |
| 53 | llvm::StringRef basename(llvm::StringRef ) { |
| 54 | Header = Header.trim(Chars: "<>\"" ); |
| 55 | Header = llvm::sys::path::filename(path: Header); |
| 56 | // Drop everything after first `.` (dot). |
| 57 | // foo.h -> foo |
| 58 | // foo.cu.h -> foo |
| 59 | Header = Header.substr(Start: 0, N: Header.find(C: '.')); |
| 60 | return Header; |
| 61 | } |
| 62 | |
| 63 | // Check if spelling of \p H matches \p DeclName. |
| 64 | bool (llvm::StringRef DeclName, Header H) { |
| 65 | switch (H.kind()) { |
| 66 | case Header::Physical: |
| 67 | return basename(Header: H.physical().getName()).equals_insensitive(RHS: DeclName); |
| 68 | case Header::Standard: |
| 69 | return basename(Header: H.standard().name()).equals_insensitive(RHS: DeclName); |
| 70 | case Header::Verbatim: |
| 71 | return basename(Header: H.verbatim()).equals_insensitive(RHS: DeclName); |
| 72 | } |
| 73 | llvm_unreachable("unhandled Header kind!" ); |
| 74 | } |
| 75 | |
| 76 | llvm::StringRef symbolName(const Symbol &S) { |
| 77 | switch (S.kind()) { |
| 78 | case Symbol::Declaration: |
| 79 | // Unnamed decls like operators and anonymous structs won't get any name |
| 80 | // match. |
| 81 | if (const auto *ND = llvm::dyn_cast<NamedDecl>(Val: &S.declaration())) |
| 82 | if (auto *II = ND->getIdentifier()) |
| 83 | return II->getName(); |
| 84 | return "" ; |
| 85 | case Symbol::Macro: |
| 86 | return S.macro().Name->getName(); |
| 87 | } |
| 88 | llvm_unreachable("unhandled Symbol kind!" ); |
| 89 | } |
| 90 | |
| 91 | Hints isPublicHeader(const FileEntry *FE, const PragmaIncludes &PI) { |
| 92 | if (PI.isPrivate(File: FE) || !PI.isSelfContained(File: FE)) |
| 93 | return Hints::None; |
| 94 | return Hints::PublicHeader; |
| 95 | } |
| 96 | |
| 97 | llvm::SmallVector<Hinted<Header>> |
| 98 | hintedHeadersForStdHeaders(llvm::ArrayRef<tooling::stdlib::Header> , |
| 99 | const SourceManager &SM, const PragmaIncludes *PI) { |
| 100 | llvm::SmallVector<Hinted<Header>> Results; |
| 101 | for (const auto &H : Headers) { |
| 102 | Results.emplace_back(Args: H, Args: Hints::PublicHeader | Hints::OriginHeader); |
| 103 | if (!PI) |
| 104 | continue; |
| 105 | for (FileEntryRef Export : PI->getExporters(H, FM&: SM.getFileManager())) |
| 106 | Results.emplace_back(Args: Header(Export), Args: isPublicHeader(FE: Export, PI: *PI)); |
| 107 | } |
| 108 | // StandardLibrary returns headers in preference order, so only mark the |
| 109 | // first. |
| 110 | if (!Results.empty()) |
| 111 | Results.front().Hint |= Hints::PreferredHeader; |
| 112 | return Results; |
| 113 | } |
| 114 | |
| 115 | // Symbol to header mapping for std::move and std::remove, based on number of |
| 116 | // parameters. |
| 117 | std::optional<tooling::stdlib::Header> |
| 118 | (const NamedDecl *ND) { |
| 119 | if (!ND->isInStdNamespace()) |
| 120 | return {}; |
| 121 | if (auto* USD = llvm::dyn_cast<UsingShadowDecl>(Val: ND)) |
| 122 | ND = USD->getTargetDecl(); |
| 123 | const auto *FD = ND->getAsFunction(); |
| 124 | if (!FD) |
| 125 | return std::nullopt; |
| 126 | llvm::StringRef FName = symbolName(*ND); |
| 127 | if (FName == "move" ) { |
| 128 | if (FD->getNumParams() == 1) |
| 129 | // move(T&& t) |
| 130 | return tooling::stdlib::Header::named(Name: "<utility>" ); |
| 131 | if (FD->getNumParams() == 3 || FD->getNumParams() == 4) |
| 132 | // move(InputIt first, InputIt last, OutputIt dest); |
| 133 | // move(ExecutionPolicy&& policy, ForwardIt1 first, |
| 134 | // ForwardIt1 last, ForwardIt2 d_first); |
| 135 | return tooling::stdlib::Header::named(Name: "<algorithm>" ); |
| 136 | } else if (FName == "remove" ) { |
| 137 | if (FD->getNumParams() == 1) |
| 138 | // remove(const char*); |
| 139 | return tooling::stdlib::Header::named(Name: "<cstdio>" ); |
| 140 | if (FD->getNumParams() == 3) |
| 141 | // remove(ForwardIt first, ForwardIt last, const T& value); |
| 142 | return tooling::stdlib::Header::named(Name: "<algorithm>" ); |
| 143 | } |
| 144 | return std::nullopt; |
| 145 | } |
| 146 | |
| 147 | // Special-case symbols without proper locations, like the ambiguous standard |
| 148 | // library symbols (e.g. std::move) or builtin declarations. |
| 149 | std::optional<llvm::SmallVector<Hinted<Header>>> |
| 150 | headersForSpecialSymbol(const Symbol &S, const SourceManager &SM, |
| 151 | const PragmaIncludes *PI) { |
| 152 | // Our special casing logic only deals with decls, so bail out early for |
| 153 | // macros. |
| 154 | if (S.kind() != Symbol::Declaration) |
| 155 | return std::nullopt; |
| 156 | const auto *ND = llvm::cast<NamedDecl>(Val: &S.declaration()); |
| 157 | // We map based on names, so again bail out early if there are no names. |
| 158 | if (!ND) |
| 159 | return std::nullopt; |
| 160 | auto *II = ND->getIdentifier(); |
| 161 | if (!II) |
| 162 | return std::nullopt; |
| 163 | |
| 164 | // Check first for symbols that are part of our stdlib mapping. As we have |
| 165 | // header names for those. |
| 166 | if (auto = headerForAmbiguousStdSymbol(ND)) { |
| 167 | return applyHints(Headers: hintedHeadersForStdHeaders(Headers: {*Header}, SM, PI), |
| 168 | H: Hints::CompleteSymbol); |
| 169 | } |
| 170 | |
| 171 | // Now check for builtin symbols, we shouldn't suggest any headers for ones |
| 172 | // without any headers. |
| 173 | if (auto ID = II->getBuiltinID()) { |
| 174 | const char * = |
| 175 | ND->getASTContext().BuiltinInfo.getHeaderName(ID); |
| 176 | if (!BuiltinHeader) |
| 177 | return llvm::SmallVector<Hinted<Header>>{}; |
| 178 | // FIXME: Use the header mapping for builtins with a known header. |
| 179 | } |
| 180 | return std::nullopt; |
| 181 | } |
| 182 | |
| 183 | } // namespace |
| 184 | |
| 185 | llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc, |
| 186 | const SourceManager &SM, |
| 187 | const PragmaIncludes *PI) { |
| 188 | llvm::SmallVector<Hinted<Header>> Results; |
| 189 | switch (Loc.kind()) { |
| 190 | case SymbolLocation::Physical: { |
| 191 | FileID FID = SM.getFileID(SpellingLoc: SM.getExpansionLoc(Loc: Loc.physical())); |
| 192 | OptionalFileEntryRef FE = SM.getFileEntryRefForID(FID); |
| 193 | if (!FE) |
| 194 | return {}; |
| 195 | if (!PI) |
| 196 | return {{*FE, Hints::PublicHeader | Hints::OriginHeader}}; |
| 197 | bool IsOrigin = true; |
| 198 | std::queue<FileEntryRef> Exporters; |
| 199 | while (FE) { |
| 200 | Results.emplace_back(Args&: *FE, |
| 201 | Args: isPublicHeader(FE: *FE, PI: *PI) | |
| 202 | (IsOrigin ? Hints::OriginHeader : Hints::None)); |
| 203 | for (FileEntryRef Export : PI->getExporters(File: *FE, FM&: SM.getFileManager())) |
| 204 | Exporters.push(x: Export); |
| 205 | |
| 206 | if (auto Verbatim = PI->getPublic(File: *FE); !Verbatim.empty()) { |
| 207 | Results.emplace_back(Args&: Verbatim, |
| 208 | Args: Hints::PublicHeader | Hints::PreferredHeader); |
| 209 | break; |
| 210 | } |
| 211 | if (PI->isSelfContained(File: *FE) || FID == SM.getMainFileID()) |
| 212 | break; |
| 213 | |
| 214 | // Walkup the include stack for non self-contained headers. |
| 215 | FID = SM.getDecomposedIncludedLoc(FID).first; |
| 216 | FE = SM.getFileEntryRefForID(FID); |
| 217 | IsOrigin = false; |
| 218 | } |
| 219 | // Now traverse provider trees rooted at exporters. |
| 220 | // Note that we only traverse export edges, and ignore private -> public |
| 221 | // mappings, as those pragmas apply to exporter, and not the main provider |
| 222 | // being exported in this header. |
| 223 | std::set<const FileEntry *> SeenExports; |
| 224 | while (!Exporters.empty()) { |
| 225 | FileEntryRef Export = Exporters.front(); |
| 226 | Exporters.pop(); |
| 227 | if (!SeenExports.insert(x: Export).second) // In case of cyclic exports |
| 228 | continue; |
| 229 | Results.emplace_back(Args&: Export, Args: isPublicHeader(FE: Export, PI: *PI)); |
| 230 | for (FileEntryRef Export : PI->getExporters(File: Export, FM&: SM.getFileManager())) |
| 231 | Exporters.push(x: Export); |
| 232 | } |
| 233 | return Results; |
| 234 | } |
| 235 | case SymbolLocation::Standard: { |
| 236 | return hintedHeadersForStdHeaders(Headers: Loc.standard().headers(), SM, PI); |
| 237 | } |
| 238 | } |
| 239 | llvm_unreachable("unhandled SymbolLocation kind!" ); |
| 240 | } |
| 241 | |
| 242 | llvm::SmallVector<Header> headersForSymbol(const Symbol &S, |
| 243 | const Preprocessor &PP, |
| 244 | const PragmaIncludes *PI) { |
| 245 | const auto &SM = PP.getSourceManager(); |
| 246 | // Get headers for all the locations providing Symbol. Same header can be |
| 247 | // reached through different traversals, deduplicate those into a single |
| 248 | // Header by merging their hints. |
| 249 | llvm::SmallVector<Hinted<Header>> ; |
| 250 | if (auto = headersForSpecialSymbol(S, SM, PI)) { |
| 251 | Headers = std::move(*SpecialHeaders); |
| 252 | } else { |
| 253 | for (auto &Loc : locateSymbol(S, LO: PP.getLangOpts())) |
| 254 | Headers.append(RHS: applyHints(Headers: findHeaders(Loc, SM, PI), H: Loc.Hint)); |
| 255 | } |
| 256 | // If two Headers probably refer to the same file (e.g. Verbatim(foo.h) and |
| 257 | // Physical(/path/to/foo.h), we won't deduplicate them or merge their hints |
| 258 | llvm::stable_sort( |
| 259 | Range&: Headers, C: [](const Hinted<Header> &LHS, const Hinted<Header> &RHS) { |
| 260 | return static_cast<Header>(LHS) < static_cast<Header>(RHS); |
| 261 | }); |
| 262 | auto *Write = Headers.begin(); |
| 263 | for (auto *Read = Headers.begin(); Read != Headers.end(); ++Write) { |
| 264 | *Write = *Read++; |
| 265 | while (Read != Headers.end() && |
| 266 | static_cast<Header>(*Write) == static_cast<Header>(*Read)) { |
| 267 | Write->Hint |= Read->Hint; |
| 268 | ++Read; |
| 269 | } |
| 270 | } |
| 271 | Headers.erase(CS: Write, CE: Headers.end()); |
| 272 | |
| 273 | // Add name match hints to deduplicated providers. |
| 274 | llvm::StringRef SymbolName = symbolName(S); |
| 275 | for (auto &H : Headers) { |
| 276 | // Don't apply name match hints to standard headers as the standard headers |
| 277 | // are already ranked in the stdlib mapping. |
| 278 | if (H.kind() == Header::Standard) |
| 279 | continue; |
| 280 | // Don't apply name match hints to exporting headers. As they usually have |
| 281 | // names similar to the original header, e.g. foo_wrapper/foo.h vs |
| 282 | // foo/foo.h, but shouldn't be preferred (unless marked as the public |
| 283 | // interface). |
| 284 | if ((H.Hint & Hints::OriginHeader) == Hints::None) |
| 285 | continue; |
| 286 | if (nameMatch(DeclName: SymbolName, H)) |
| 287 | H.Hint |= Hints::PreferredHeader; |
| 288 | } |
| 289 | |
| 290 | // FIXME: Introduce a MainFile header kind or signal and boost it. |
| 291 | return ranked(Headers: std::move(Headers)); |
| 292 | } |
| 293 | } // namespace clang::include_cleaner |
| 294 | |