| 1 | //===--- Headers.h - Include headers -----------------------------*- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H |
| 10 | #define |
| 11 | |
| 12 | #include "Protocol.h" |
| 13 | #include "SourceCode.h" |
| 14 | #include "index/Symbol.h" |
| 15 | #include "support/Path.h" |
| 16 | #include "clang/Basic/FileEntry.h" |
| 17 | #include "clang/Basic/TokenKinds.h" |
| 18 | #include "clang/Format/Format.h" |
| 19 | #include "clang/Frontend/CompilerInstance.h" |
| 20 | #include "clang/Lex/HeaderSearch.h" |
| 21 | #include "clang/Lex/Preprocessor.h" |
| 22 | #include "clang/Tooling/Inclusions/HeaderIncludes.h" |
| 23 | #include "clang/Tooling/Inclusions/StandardLibrary.h" |
| 24 | #include "llvm/ADT/ArrayRef.h" |
| 25 | #include "llvm/ADT/DenseSet.h" |
| 26 | #include "llvm/ADT/StringRef.h" |
| 27 | #include "llvm/ADT/StringSet.h" |
| 28 | #include "llvm/Support/Error.h" |
| 29 | #include "llvm/Support/FileSystem/UniqueID.h" |
| 30 | #include <optional> |
| 31 | #include <string> |
| 32 | |
| 33 | namespace clang { |
| 34 | namespace clangd { |
| 35 | |
| 36 | using = llvm::ArrayRef<std::function<bool(llvm::StringRef)>>; |
| 37 | |
| 38 | /// Returns true if \p Include is literal include like "path" or <path>. |
| 39 | bool isLiteralInclude(llvm::StringRef Include); |
| 40 | |
| 41 | /// Represents a header file to be #include'd. |
| 42 | struct { |
| 43 | std::string ; |
| 44 | /// If this is true, `File` is a literal string quoted with <> or "" that |
| 45 | /// can be #included directly; otherwise, `File` is an absolute file path. |
| 46 | bool ; |
| 47 | |
| 48 | bool () const; |
| 49 | }; |
| 50 | |
| 51 | /// A header and directives as stored in a Symbol. |
| 52 | struct SymbolInclude { |
| 53 | /// The header to include. This is either a URI or a verbatim include which is |
| 54 | /// quoted with <> or "". |
| 55 | llvm::StringRef ; |
| 56 | /// The include directive(s) that can be used, e.g. #import and/or #include. |
| 57 | Symbol::IncludeDirective Directive; |
| 58 | }; |
| 59 | |
| 60 | /// Creates a `HeaderFile` from \p Header which can be either a URI or a literal |
| 61 | /// include. |
| 62 | llvm::Expected<HeaderFile> (llvm::StringRef , |
| 63 | llvm::StringRef HintPath); |
| 64 | |
| 65 | // Returns include headers for \p Sym sorted by popularity. If two headers are |
| 66 | // equally popular, prefer the shorter one. |
| 67 | llvm::SmallVector<SymbolInclude, 1> getRankedIncludes(const Symbol &Sym); |
| 68 | |
| 69 | // An #include directive that we found in the main file. |
| 70 | struct Inclusion { |
| 71 | tok::PPKeywordKind Directive; // Directive used for inclusion, e.g. import |
| 72 | std::string Written; // Inclusion name as written e.g. <vector>. |
| 73 | Path Resolved; // Resolved path of included file. Empty if not resolved. |
| 74 | unsigned HashOffset = 0; // Byte offset from start of file to #. |
| 75 | int HashLine = 0; // Line number containing the directive, 0-indexed. |
| 76 | SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; |
| 77 | std::optional<unsigned> ; |
| 78 | }; |
| 79 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &); |
| 80 | bool operator==(const Inclusion &LHS, const Inclusion &RHS); |
| 81 | |
| 82 | // Contains information about one file in the build graph and its direct |
| 83 | // dependencies. Doesn't own the strings it references (IncludeGraph is |
| 84 | // self-contained). |
| 85 | struct IncludeGraphNode { |
| 86 | enum class SourceFlag : uint8_t { |
| 87 | None = 0, |
| 88 | // Whether current file is a main file rather than a header. |
| 89 | IsTU = 1 << 0, |
| 90 | // Whether current file had any uncompilable errors during indexing. |
| 91 | HadErrors = 1 << 1, |
| 92 | }; |
| 93 | |
| 94 | SourceFlag Flags = SourceFlag::None; |
| 95 | llvm::StringRef URI; |
| 96 | FileDigest Digest{._M_elems: {0}}; |
| 97 | std::vector<llvm::StringRef> DirectIncludes; |
| 98 | }; |
| 99 | // FileURI and FileInclusions are references to keys of the map containing |
| 100 | // them. |
| 101 | // Important: The graph generated by those callbacks might contain cycles, self |
| 102 | // edges and multi edges. |
| 103 | using IncludeGraph = llvm::StringMap<IncludeGraphNode>; |
| 104 | |
| 105 | inline IncludeGraphNode::SourceFlag operator|(IncludeGraphNode::SourceFlag A, |
| 106 | IncludeGraphNode::SourceFlag B) { |
| 107 | return static_cast<IncludeGraphNode::SourceFlag>(static_cast<uint8_t>(A) | |
| 108 | static_cast<uint8_t>(B)); |
| 109 | } |
| 110 | |
| 111 | inline bool operator&(IncludeGraphNode::SourceFlag A, |
| 112 | IncludeGraphNode::SourceFlag B) { |
| 113 | return static_cast<uint8_t>(A) & static_cast<uint8_t>(B); |
| 114 | } |
| 115 | |
| 116 | inline IncludeGraphNode::SourceFlag & |
| 117 | operator|=(IncludeGraphNode::SourceFlag &A, IncludeGraphNode::SourceFlag B) { |
| 118 | return A = A | B; |
| 119 | } |
| 120 | |
| 121 | // Information captured about the inclusion graph in a translation unit. |
| 122 | // This includes detailed information about the direct #includes, and summary |
| 123 | // information about all transitive includes. |
| 124 | // |
| 125 | // It should be built incrementally with collectIncludeStructureCallback(). |
| 126 | // When we build the preamble, we capture and store its include structure along |
| 127 | // with the preamble data. When we use the preamble, we can copy its |
| 128 | // IncludeStructure and use another collectIncludeStructureCallback() to fill |
| 129 | // in any non-preamble inclusions. |
| 130 | class IncludeStructure { |
| 131 | public: |
| 132 | IncludeStructure() { |
| 133 | // Reserve HeaderID = 0 for the main file. |
| 134 | RealPathNames.emplace_back(); |
| 135 | } |
| 136 | |
| 137 | // Inserts a PPCallback and CommentHandler that visits all includes in the |
| 138 | // main file and populates the structure. It will also scan for IWYU pragmas |
| 139 | // in comments. |
| 140 | void collect(const CompilerInstance &CI); |
| 141 | |
| 142 | // HeaderID identifies file in the include graph. It corresponds to a |
| 143 | // FileEntry rather than a FileID, but stays stable across preamble & main |
| 144 | // file builds. |
| 145 | enum class : unsigned {}; |
| 146 | |
| 147 | std::optional<HeaderID> getID(const FileEntry *Entry) const; |
| 148 | HeaderID getOrCreateID(FileEntryRef Entry); |
| 149 | |
| 150 | StringRef (HeaderID ID) const { |
| 151 | assert(static_cast<unsigned>(ID) <= RealPathNames.size()); |
| 152 | return RealPathNames[static_cast<unsigned>(ID)]; |
| 153 | } |
| 154 | |
| 155 | // Return all transitively reachable files. |
| 156 | llvm::ArrayRef<std::string> () const { return RealPathNames; } |
| 157 | |
| 158 | // Returns includes inside the main file with the given spelling. |
| 159 | // Spelling should include brackets or quotes, e.g. <foo>. |
| 160 | llvm::SmallVector<const Inclusion *> |
| 161 | mainFileIncludesWithSpelling(llvm::StringRef Spelling) const; |
| 162 | |
| 163 | // Return all transitively reachable files, and their minimum include depth. |
| 164 | // All transitive includes (absolute paths), with their minimum include depth. |
| 165 | // Root --> 0, #included file --> 1, etc. |
| 166 | // Root is the ID of the header being visited first. |
| 167 | llvm::DenseMap<HeaderID, unsigned> |
| 168 | (HeaderID Root = MainFileID) const; |
| 169 | |
| 170 | // Maps HeaderID to the ids of the files included from it. |
| 171 | llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren; |
| 172 | |
| 173 | llvm::DenseMap<tooling::stdlib::Header, llvm::SmallVector<HeaderID>> |
| 174 | ; |
| 175 | |
| 176 | std::vector<Inclusion> MainFileIncludes; |
| 177 | |
| 178 | // The entries of the header search path. (HeaderSearch::search_dir_range()) |
| 179 | // Only includes the plain-directory entries (not header maps or frameworks). |
| 180 | // All paths are canonical (FileManager::getCanonicalPath()). |
| 181 | std::vector<std::string> SearchPathsCanonical; |
| 182 | |
| 183 | // We reserve HeaderID(0) for the main file and will manually check for that |
| 184 | // in getID and getOrCreateID because the UniqueID is not stable when the |
| 185 | // content of the main file changes. |
| 186 | static const HeaderID MainFileID = HeaderID(0u); |
| 187 | |
| 188 | class ; |
| 189 | |
| 190 | private: |
| 191 | // MainFileEntry will be used to check if the queried file is the main file |
| 192 | // or not. |
| 193 | const FileEntry *MainFileEntry = nullptr; |
| 194 | |
| 195 | std::vector<std::string> RealPathNames; // In HeaderID order. |
| 196 | // FileEntry::UniqueID is mapped to the internal representation (HeaderID). |
| 197 | // Identifying files in a way that persists from preamble build to subsequent |
| 198 | // builds is surprisingly hard. FileID is unavailable in InclusionDirective(), |
| 199 | // and RealPathName and UniqueID are not preserved in |
| 200 | // the preamble. |
| 201 | llvm::DenseMap<llvm::sys::fs::UniqueID, HeaderID> UIDToIndex; |
| 202 | |
| 203 | // Maps written includes to indices in MainFileInclude for easier lookup by |
| 204 | // spelling. |
| 205 | llvm::StringMap<llvm::SmallVector<unsigned>> MainFileIncludesBySpelling; |
| 206 | }; |
| 207 | |
| 208 | // Calculates insertion edit for including a new header in a file. |
| 209 | class IncludeInserter { |
| 210 | public: |
| 211 | // If \p HeaderSearchInfo is nullptr (e.g. when compile command is |
| 212 | // infeasible), this will only try to insert verbatim headers, and |
| 213 | // include path of non-verbatim header will not be shortened. |
| 214 | (StringRef FileName, StringRef Code, |
| 215 | const format::FormatStyle &Style, StringRef BuildDir, |
| 216 | HeaderSearch *, HeaderFilter , |
| 217 | HeaderFilter ) |
| 218 | : FileName(FileName), Code(Code), BuildDir(BuildDir), |
| 219 | HeaderSearchInfo(HeaderSearchInfo), |
| 220 | Inserter(FileName, Code, Style.IncludeStyle), |
| 221 | QuotedHeaders(QuotedHeaders), AngledHeaders(AngledHeaders) {} |
| 222 | |
| 223 | void addExisting(const Inclusion &Inc); |
| 224 | |
| 225 | /// Checks whether to add an #include of the header into \p File. |
| 226 | /// An #include will not be added if: |
| 227 | /// - Either \p DeclaringHeader or \p InsertedHeader is already (directly) |
| 228 | /// in \p Inclusions (including those included via different paths). |
| 229 | /// - \p DeclaringHeader or \p InsertedHeader is the same as \p File. |
| 230 | /// |
| 231 | /// \param DeclaringHeader is path of the original header corresponding to \p |
| 232 | /// InsertedHeader e.g. the header that declares a symbol. |
| 233 | /// \param InsertedHeader The preferred header to be inserted. This could be |
| 234 | /// the same as DeclaringHeader but must be provided. |
| 235 | bool (PathRef , |
| 236 | const HeaderFile &) const; |
| 237 | |
| 238 | /// Determines the preferred way to #include a file, taking into account the |
| 239 | /// search path. Usually this will prefer a shorter representation like |
| 240 | /// 'Foo/Bar.h' over a longer one like 'Baz/include/Foo/Bar.h'. |
| 241 | /// |
| 242 | /// \param InsertedHeader The preferred header to be inserted. |
| 243 | /// |
| 244 | /// \param IncludingFile is the absolute path of the file that InsertedHeader |
| 245 | /// will be inserted. |
| 246 | /// |
| 247 | /// \return A quoted "path" or <path> to be included, or std::nullopt if it |
| 248 | /// couldn't be shortened. |
| 249 | std::optional<std::string> |
| 250 | (const HeaderFile &, |
| 251 | llvm::StringRef IncludingFile) const; |
| 252 | |
| 253 | /// Calculates an edit that inserts \p VerbatimHeader into code. If the header |
| 254 | /// is already included, this returns std::nullopt. |
| 255 | std::optional<TextEdit> insert(llvm::StringRef , |
| 256 | tooling::IncludeDirective Directive) const; |
| 257 | |
| 258 | private: |
| 259 | StringRef FileName; |
| 260 | StringRef Code; |
| 261 | StringRef BuildDir; |
| 262 | HeaderSearch * = nullptr; |
| 263 | llvm::StringSet<> ; // Both written and resolved. |
| 264 | tooling::HeaderIncludes Inserter; // Computers insertion replacement. |
| 265 | HeaderFilter ; |
| 266 | HeaderFilter ; |
| 267 | }; |
| 268 | |
| 269 | } // namespace clangd |
| 270 | } // namespace clang |
| 271 | |
| 272 | namespace llvm { |
| 273 | |
| 274 | // Support HeaderIDs as DenseMap keys. |
| 275 | template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> { |
| 276 | static inline clang::clangd::IncludeStructure::HeaderID () { |
| 277 | return static_cast<clang::clangd::IncludeStructure::HeaderID>(-1); |
| 278 | } |
| 279 | |
| 280 | static inline clang::clangd::IncludeStructure::HeaderID () { |
| 281 | return static_cast<clang::clangd::IncludeStructure::HeaderID>(-2); |
| 282 | } |
| 283 | |
| 284 | static unsigned |
| 285 | (const clang::clangd::IncludeStructure::HeaderID &Tag) { |
| 286 | return hash_value(value: static_cast<unsigned>(Tag)); |
| 287 | } |
| 288 | |
| 289 | static bool (const clang::clangd::IncludeStructure::HeaderID &LHS, |
| 290 | const clang::clangd::IncludeStructure::HeaderID &RHS) { |
| 291 | return LHS == RHS; |
| 292 | } |
| 293 | }; |
| 294 | |
| 295 | } // namespace llvm |
| 296 | |
| 297 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H |
| 298 | |