| 1 | //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "SymbolCollector.h" |
| 10 | #include "AST.h" |
| 11 | #include "CodeComplete.h" |
| 12 | #include "CodeCompletionStrings.h" |
| 13 | #include "ExpectedTypes.h" |
| 14 | #include "SourceCode.h" |
| 15 | #include "URI.h" |
| 16 | #include "clang-include-cleaner/Analysis.h" |
| 17 | #include "clang-include-cleaner/IncludeSpeller.h" |
| 18 | #include "clang-include-cleaner/Record.h" |
| 19 | #include "clang-include-cleaner/Types.h" |
| 20 | #include "index/CanonicalIncludes.h" |
| 21 | #include "index/Ref.h" |
| 22 | #include "index/Relation.h" |
| 23 | #include "index/Symbol.h" |
| 24 | #include "index/SymbolID.h" |
| 25 | #include "index/SymbolLocation.h" |
| 26 | #include "clang/AST/Decl.h" |
| 27 | #include "clang/AST/DeclBase.h" |
| 28 | #include "clang/AST/DeclObjC.h" |
| 29 | #include "clang/AST/DeclTemplate.h" |
| 30 | #include "clang/AST/DeclarationName.h" |
| 31 | #include "clang/AST/Expr.h" |
| 32 | #include "clang/Basic/FileEntry.h" |
| 33 | #include "clang/Basic/LangOptions.h" |
| 34 | #include "clang/Basic/SourceLocation.h" |
| 35 | #include "clang/Basic/SourceManager.h" |
| 36 | #include "clang/Index/IndexSymbol.h" |
| 37 | #include "clang/Lex/Preprocessor.h" |
| 38 | #include "clang/Lex/Token.h" |
| 39 | #include "clang/Tooling/Inclusions/HeaderAnalysis.h" |
| 40 | #include "clang/Tooling/Inclusions/StandardLibrary.h" |
| 41 | #include "llvm/ADT/ArrayRef.h" |
| 42 | #include "llvm/ADT/DenseMap.h" |
| 43 | #include "llvm/ADT/SmallVector.h" |
| 44 | #include "llvm/ADT/StringRef.h" |
| 45 | #include "llvm/Support/Casting.h" |
| 46 | #include "llvm/Support/ErrorHandling.h" |
| 47 | #include "llvm/Support/FileSystem.h" |
| 48 | #include "llvm/Support/Path.h" |
| 49 | #include <cassert> |
| 50 | #include <memory> |
| 51 | #include <optional> |
| 52 | #include <string> |
| 53 | #include <utility> |
| 54 | |
| 55 | namespace clang { |
| 56 | namespace clangd { |
| 57 | namespace { |
| 58 | |
| 59 | /// If \p ND is a template specialization, returns the described template. |
| 60 | /// Otherwise, returns \p ND. |
| 61 | const NamedDecl &getTemplateOrThis(const NamedDecl &ND) { |
| 62 | if (auto *T = ND.getDescribedTemplate()) |
| 63 | return *T; |
| 64 | return ND; |
| 65 | } |
| 66 | |
| 67 | // Checks whether the decl is a private symbol in a header generated by |
| 68 | // protobuf compiler. |
| 69 | // FIXME: make filtering extensible when there are more use cases for symbol |
| 70 | // filters. |
| 71 | bool isPrivateProtoDecl(const NamedDecl &ND) { |
| 72 | const auto &SM = ND.getASTContext().getSourceManager(); |
| 73 | if (!isProtoFile(nameLocation(ND, SM), SM)) |
| 74 | return false; |
| 75 | |
| 76 | // ND without identifier can be operators. |
| 77 | if (ND.getIdentifier() == nullptr) |
| 78 | return false; |
| 79 | auto Name = ND.getIdentifier()->getName(); |
| 80 | // There are some internal helpers like _internal_set_foo(); |
| 81 | if (Name.contains(Other: "_internal_" )) |
| 82 | return true; |
| 83 | |
| 84 | // https://protobuf.dev/reference/cpp/cpp-generated/#nested-types |
| 85 | // Nested entities (messages/enums) has two names, one at the top-level scope, |
| 86 | // with a mangled name created by prepending all the outer types. These names |
| 87 | // are almost never preferred by the developers, so exclude them from index. |
| 88 | // e.g. |
| 89 | // message Foo { |
| 90 | // message Bar {} |
| 91 | // enum E { A } |
| 92 | // } |
| 93 | // |
| 94 | // yields: |
| 95 | // class Foo_Bar {}; |
| 96 | // enum Foo_E { Foo_E_A }; |
| 97 | // class Foo { |
| 98 | // using Bar = Foo_Bar; |
| 99 | // static constexpr Foo_E A = Foo_E_A; |
| 100 | // }; |
| 101 | |
| 102 | // We get rid of Foo_Bar and Foo_E by discarding any top-level entries with |
| 103 | // `_` in the name. This relies on original message/enum not having `_` in the |
| 104 | // name. Hence might go wrong in certain cases. |
| 105 | if (ND.getDeclContext()->isNamespace()) { |
| 106 | // Strip off some known public suffix helpers for enums, rest of the helpers |
| 107 | // are generated inside record decls so we don't care. |
| 108 | // https://protobuf.dev/reference/cpp/cpp-generated/#enum |
| 109 | Name.consume_back(Suffix: "_descriptor" ); |
| 110 | Name.consume_back(Suffix: "_IsValid" ); |
| 111 | Name.consume_back(Suffix: "_Name" ); |
| 112 | Name.consume_back(Suffix: "_Parse" ); |
| 113 | Name.consume_back(Suffix: "_MIN" ); |
| 114 | Name.consume_back(Suffix: "_MAX" ); |
| 115 | Name.consume_back(Suffix: "_ARRAYSIZE" ); |
| 116 | return Name.contains(C: '_'); |
| 117 | } |
| 118 | |
| 119 | // EnumConstantDecls need some special attention, despite being nested in a |
| 120 | // TagDecl, they might still have mangled names. We filter those by checking |
| 121 | // if it has parent's name as a prefix. |
| 122 | // This might go wrong if a nested entity has a name that starts with parent's |
| 123 | // name, e.g: enum Foo { Foo_X }. |
| 124 | if (llvm::isa<EnumConstantDecl>(Val: &ND)) { |
| 125 | auto *DC = llvm::cast<EnumDecl>(ND.getDeclContext()); |
| 126 | if (!DC || !DC->getIdentifier()) |
| 127 | return false; |
| 128 | auto CtxName = DC->getIdentifier()->getName(); |
| 129 | return !CtxName.empty() && Name.consume_front(Prefix: CtxName) && |
| 130 | Name.consume_front(Prefix: "_" ); |
| 131 | } |
| 132 | |
| 133 | // Now we're only left with fields/methods without an `_internal_` in the |
| 134 | // name, they're intended for public use. |
| 135 | return false; |
| 136 | } |
| 137 | |
| 138 | // We only collect #include paths for symbols that are suitable for global code |
| 139 | // completion, except for namespaces since #include path for a namespace is hard |
| 140 | // to define. |
| 141 | Symbol::IncludeDirective shouldCollectIncludePath(index::SymbolKind Kind) { |
| 142 | using SK = index::SymbolKind; |
| 143 | switch (Kind) { |
| 144 | case SK::Macro: |
| 145 | case SK::Enum: |
| 146 | case SK::Struct: |
| 147 | case SK::Class: |
| 148 | case SK::Union: |
| 149 | case SK::TypeAlias: |
| 150 | case SK::Using: |
| 151 | case SK::Function: |
| 152 | case SK::Variable: |
| 153 | case SK::EnumConstant: |
| 154 | case SK::Concept: |
| 155 | return Symbol::Include | Symbol::Import; |
| 156 | case SK::Protocol: |
| 157 | return Symbol::Import; |
| 158 | default: |
| 159 | return Symbol::Invalid; |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | // Return the symbol range of the token at \p TokLoc. |
| 164 | std::pair<SymbolLocation::Position, SymbolLocation::Position> |
| 165 | getTokenRange(SourceLocation TokLoc, const SourceManager &SM, |
| 166 | const LangOptions &LangOpts) { |
| 167 | auto CreatePosition = [&SM](SourceLocation Loc) { |
| 168 | auto LSPLoc = sourceLocToPosition(SM, Loc); |
| 169 | SymbolLocation::Position Pos; |
| 170 | Pos.setLine(LSPLoc.line); |
| 171 | Pos.setColumn(LSPLoc.character); |
| 172 | return Pos; |
| 173 | }; |
| 174 | |
| 175 | auto TokenLength = clang::Lexer::MeasureTokenLength(Loc: TokLoc, SM, LangOpts); |
| 176 | return {CreatePosition(TokLoc), |
| 177 | CreatePosition(TokLoc.getLocWithOffset(Offset: TokenLength))}; |
| 178 | } |
| 179 | |
| 180 | // Checks whether \p ND is a good candidate to be the *canonical* declaration of |
| 181 | // its symbol (e.g. a go-to-declaration target). This overrides the default of |
| 182 | // using Clang's canonical declaration, which is the first in the TU. |
| 183 | // |
| 184 | // Example: preferring a class declaration over its forward declaration. |
| 185 | bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) { |
| 186 | const auto &SM = ND.getASTContext().getSourceManager(); |
| 187 | if (isa<TagDecl>(Val: ND)) |
| 188 | return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) && |
| 189 | !isInsideMainFile(ND.getLocation(), SM); |
| 190 | if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(Val: &ND)) |
| 191 | return ID->isThisDeclarationADefinition(); |
| 192 | if (const auto *PD = dyn_cast<ObjCProtocolDecl>(Val: &ND)) |
| 193 | return PD->isThisDeclarationADefinition(); |
| 194 | return false; |
| 195 | } |
| 196 | |
| 197 | RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) { |
| 198 | RefKind Result = RefKind::Unknown; |
| 199 | if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration)) |
| 200 | Result |= RefKind::Declaration; |
| 201 | if (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) |
| 202 | Result |= RefKind::Definition; |
| 203 | if (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) |
| 204 | Result |= RefKind::Reference; |
| 205 | if (Spelled) |
| 206 | Result |= RefKind::Spelled; |
| 207 | return Result; |
| 208 | } |
| 209 | |
| 210 | std::optional<RelationKind> indexableRelation(const index::SymbolRelation &R) { |
| 211 | if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf)) |
| 212 | return RelationKind::BaseOf; |
| 213 | if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf)) |
| 214 | return RelationKind::OverriddenBy; |
| 215 | return std::nullopt; |
| 216 | } |
| 217 | |
| 218 | // Check if there is an exact spelling of \p ND at \p Loc. |
| 219 | bool isSpelled(SourceLocation Loc, const NamedDecl &ND) { |
| 220 | auto Name = ND.getDeclName(); |
| 221 | const auto NameKind = Name.getNameKind(); |
| 222 | if (NameKind != DeclarationName::Identifier && |
| 223 | NameKind != DeclarationName::CXXConstructorName && |
| 224 | NameKind != DeclarationName::ObjCZeroArgSelector && |
| 225 | NameKind != DeclarationName::ObjCOneArgSelector && |
| 226 | NameKind != DeclarationName::ObjCMultiArgSelector) |
| 227 | return false; |
| 228 | const auto &AST = ND.getASTContext(); |
| 229 | const auto &SM = AST.getSourceManager(); |
| 230 | const auto &LO = AST.getLangOpts(); |
| 231 | clang::Token Tok; |
| 232 | if (clang::Lexer::getRawToken(Loc, Result&: Tok, SM: SM, LangOpts: LO)) |
| 233 | return false; |
| 234 | auto TokSpelling = clang::Lexer::getSpelling(Tok, SM, LO); |
| 235 | if (const auto *MD = dyn_cast<ObjCMethodDecl>(Val: &ND)) |
| 236 | return TokSpelling == MD->getSelector().getNameForSlot(argIndex: 0); |
| 237 | return TokSpelling == Name.getAsString(); |
| 238 | } |
| 239 | } // namespace |
| 240 | |
| 241 | // Encapsulates decisions about how to record header paths in the index, |
| 242 | // including filename normalization, URI conversion etc. |
| 243 | // Expensive checks are cached internally. |
| 244 | class SymbolCollector:: { |
| 245 | struct { |
| 246 | // Spelling for the public umbrella header, e.g. <Foundation/Foundation.h> |
| 247 | std::optional<std::string> ; |
| 248 | // Spelling for the private umbrella header, e.g. |
| 249 | // <Foundation/Foundation_Private.h> |
| 250 | std::optional<std::string> ; |
| 251 | }; |
| 252 | // Weird double-indirect access to PP, which might not be ready yet when |
| 253 | // HeaderFiles is created but will be by the time it's used. |
| 254 | // (IndexDataConsumer::setPreprocessor can happen before or after initialize) |
| 255 | Preprocessor *&; |
| 256 | const SourceManager &; |
| 257 | const include_cleaner::PragmaIncludes *; |
| 258 | llvm::StringRef ; |
| 259 | llvm::DenseMap<const FileEntry *, const std::string *> ; |
| 260 | llvm::StringMap<std::string> ; |
| 261 | llvm::DenseMap<FileID, llvm::StringRef> ; |
| 262 | llvm::StringMap<std::string> ; |
| 263 | llvm::StringMap<FrameworkUmbrellaSpelling> |
| 264 | ; |
| 265 | |
| 266 | public: |
| 267 | (Preprocessor *&PP, const SourceManager &SM, |
| 268 | const SymbolCollector::Options &Opts) |
| 269 | : PP(PP), SM(SM), PI(Opts.PragmaIncludes), FallbackDir(Opts.FallbackDir) { |
| 270 | } |
| 271 | |
| 272 | // Returns a canonical URI for the file \p FE. |
| 273 | // We attempt to make the path absolute first. |
| 274 | const std::string &(const FileEntryRef FE) { |
| 275 | auto R = CacheFEToURI.try_emplace(Key: FE); |
| 276 | if (R.second) { |
| 277 | auto CanonPath = getCanonicalPath(F: FE, FileMgr&: SM.getFileManager()); |
| 278 | R.first->second = &toURIInternal(Path: CanonPath ? *CanonPath : FE.getName()); |
| 279 | } |
| 280 | return *R.first->second; |
| 281 | } |
| 282 | |
| 283 | // Returns a canonical URI for \p Path. |
| 284 | // If the file is in the FileManager, use that to canonicalize the path. |
| 285 | // We attempt to make the path absolute in any case. |
| 286 | const std::string &(llvm::StringRef Path) { |
| 287 | if (auto File = SM.getFileManager().getFileRef(Filename: Path)) |
| 288 | return toURI(FE: *File); |
| 289 | return toURIInternal(Path); |
| 290 | } |
| 291 | |
| 292 | // Gets a canonical include (URI of the header or <header> or "header") for |
| 293 | // header of \p FID (which should usually be the *expansion* file). |
| 294 | // This does not account for any per-symbol overrides! |
| 295 | // Returns "" if includes should not be inserted for this file. |
| 296 | llvm::StringRef (FileID FID) { |
| 297 | auto R = CacheFIDToInclude.try_emplace(Key: FID); |
| 298 | if (R.second) |
| 299 | R.first->second = getIncludeHeaderUncached(FID); |
| 300 | return R.first->second; |
| 301 | } |
| 302 | |
| 303 | // If a file is mapped by canonical headers, use that mapping, regardless |
| 304 | // of whether it's an otherwise-good header (header guards etc). |
| 305 | llvm::StringRef (llvm::StringRef ) { |
| 306 | if (!PP) |
| 307 | return "" ; |
| 308 | // Populate the system header mapping as late as possible to |
| 309 | // ensure the preprocessor has been set already. |
| 310 | CanonicalIncludes ; |
| 311 | SysHeaderMapping.addSystemHeadersMapping(Language: PP->getLangOpts()); |
| 312 | auto Canonical = SysHeaderMapping.mapHeader(HeaderPath); |
| 313 | if (Canonical.empty()) |
| 314 | return "" ; |
| 315 | // If we had a mapping, always use it. |
| 316 | assert(Canonical.starts_with("<" ) || Canonical.starts_with("\"" )); |
| 317 | return Canonical; |
| 318 | } |
| 319 | |
| 320 | private: |
| 321 | // This takes care of making paths absolute and path->URI caching, but no |
| 322 | // FileManager-based canonicalization. |
| 323 | const std::string &(llvm::StringRef Path) { |
| 324 | auto R = CachePathToURI.try_emplace(Key: Path); |
| 325 | if (R.second) { |
| 326 | llvm::SmallString<256> AbsPath = Path; |
| 327 | if (!llvm::sys::path::is_absolute(path: AbsPath) && !FallbackDir.empty()) |
| 328 | llvm::sys::fs::make_absolute(current_directory: FallbackDir, path&: AbsPath); |
| 329 | assert(llvm::sys::path::is_absolute(AbsPath) && |
| 330 | "If the VFS can't make paths absolute, a FallbackDir must be " |
| 331 | "provided" ); |
| 332 | llvm::sys::path::remove_dots(path&: AbsPath, /*remove_dot_dot=*/true); |
| 333 | R.first->second = URI::create(AbsolutePath: AbsPath).toString(); |
| 334 | } |
| 335 | return R.first->second; |
| 336 | } |
| 337 | |
| 338 | struct { |
| 339 | // Path to the frameworks directory containing the .framework directory. |
| 340 | llvm::StringRef ; |
| 341 | // Name of the framework. |
| 342 | llvm::StringRef ; |
| 343 | // Subpath relative to the Headers or PrivateHeaders dir, e.g. NSObject.h |
| 344 | // Note: This is NOT relative to the `HeadersParentDir`. |
| 345 | llvm::StringRef ; |
| 346 | // Whether this header is under the PrivateHeaders dir |
| 347 | bool ; |
| 348 | }; |
| 349 | |
| 350 | std::optional<FrameworkHeaderPath> |
| 351 | (llvm::StringRef Path) { |
| 352 | using namespace llvm::sys; |
| 353 | path::reverse_iterator I = path::rbegin(path: Path); |
| 354 | path::reverse_iterator Prev = I; |
| 355 | path::reverse_iterator E = path::rend(path: Path); |
| 356 | FrameworkHeaderPath ; |
| 357 | while (I != E) { |
| 358 | if (*I == "Headers" || *I == "PrivateHeaders" ) { |
| 359 | HeaderPath.HeaderSubpath = Path.substr(Start: Prev - E); |
| 360 | HeaderPath.IsPrivateHeader = *I == "PrivateHeaders" ; |
| 361 | if (++I == E) |
| 362 | break; |
| 363 | HeaderPath.FrameworkName = *I; |
| 364 | if (!HeaderPath.FrameworkName.consume_back(Suffix: ".framework" )) |
| 365 | break; |
| 366 | HeaderPath.FrameworkParentDir = Path.substr(Start: 0, N: I - E); |
| 367 | return HeaderPath; |
| 368 | } |
| 369 | Prev = I; |
| 370 | ++I; |
| 371 | } |
| 372 | // Unexpected, must not be a framework header. |
| 373 | return std::nullopt; |
| 374 | } |
| 375 | |
| 376 | // Frameworks typically have an umbrella header of the same name, e.g. |
| 377 | // <Foundation/Foundation.h> instead of <Foundation/NSObject.h> or |
| 378 | // <Foundation/Foundation_Private.h> instead of |
| 379 | // <Foundation/NSObject_Private.h> which should be used instead of directly |
| 380 | // importing the header. |
| 381 | std::optional<std::string> |
| 382 | (const HeaderSearch &HS, |
| 383 | FrameworkHeaderPath &) { |
| 384 | StringRef Framework = HeaderPath.FrameworkName; |
| 385 | auto Res = CacheFrameworkToUmbrellaHeaderSpelling.try_emplace(Key: Framework); |
| 386 | auto *CachedSpelling = &Res.first->second; |
| 387 | if (!Res.second) { |
| 388 | return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader |
| 389 | : CachedSpelling->PublicHeader; |
| 390 | } |
| 391 | SmallString<256> UmbrellaPath(HeaderPath.FrameworkParentDir); |
| 392 | llvm::sys::path::append(path&: UmbrellaPath, a: Framework + ".framework" , b: "Headers" , |
| 393 | c: Framework + ".h" ); |
| 394 | |
| 395 | llvm::vfs::Status Status; |
| 396 | auto StatErr = HS.getFileMgr().getNoncachedStatValue(Path: UmbrellaPath, Result&: Status); |
| 397 | if (!StatErr) |
| 398 | CachedSpelling->PublicHeader = llvm::formatv(Fmt: "<{0}/{0}.h>" , Vals&: Framework); |
| 399 | |
| 400 | UmbrellaPath = HeaderPath.FrameworkParentDir; |
| 401 | llvm::sys::path::append(path&: UmbrellaPath, a: Framework + ".framework" , |
| 402 | b: "PrivateHeaders" , c: Framework + "_Private.h" ); |
| 403 | |
| 404 | StatErr = HS.getFileMgr().getNoncachedStatValue(Path: UmbrellaPath, Result&: Status); |
| 405 | if (!StatErr) |
| 406 | CachedSpelling->PrivateHeader = |
| 407 | llvm::formatv(Fmt: "<{0}/{0}_Private.h>" , Vals&: Framework); |
| 408 | |
| 409 | return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader |
| 410 | : CachedSpelling->PublicHeader; |
| 411 | } |
| 412 | |
| 413 | // Compute the framework include spelling for `FE` which is in a framework |
| 414 | // named `Framework`, e.g. `NSObject.h` in framework `Foundation` would |
| 415 | // give <Foundation/Foundation.h> if the umbrella header exists, otherwise |
| 416 | // <Foundation/NSObject.h>. |
| 417 | std::optional<llvm::StringRef> |
| 418 | (FileEntryRef FE, HeaderSearch &HS) { |
| 419 | auto Res = CachePathToFrameworkSpelling.try_emplace(Key: FE.getName()); |
| 420 | auto * = &Res.first->second; |
| 421 | if (!Res.second) |
| 422 | return llvm::StringRef(*CachedHeaderSpelling); |
| 423 | |
| 424 | auto = splitFrameworkHeaderPath(Path: FE.getName()); |
| 425 | if (!HeaderPath) { |
| 426 | // Unexpected: must not be a proper framework header, don't cache the |
| 427 | // failure. |
| 428 | CachePathToFrameworkSpelling.erase(I: Res.first); |
| 429 | return std::nullopt; |
| 430 | } |
| 431 | if (auto UmbrellaSpelling = |
| 432 | getFrameworkUmbrellaSpelling(HS, HeaderPath&: *HeaderPath)) { |
| 433 | *CachedHeaderSpelling = *UmbrellaSpelling; |
| 434 | return llvm::StringRef(*CachedHeaderSpelling); |
| 435 | } |
| 436 | |
| 437 | *CachedHeaderSpelling = |
| 438 | llvm::formatv(Fmt: "<{0}/{1}>" , Vals&: HeaderPath->FrameworkName, |
| 439 | Vals&: HeaderPath->HeaderSubpath) |
| 440 | .str(); |
| 441 | return llvm::StringRef(*CachedHeaderSpelling); |
| 442 | } |
| 443 | |
| 444 | llvm::StringRef (FileID FID) { |
| 445 | const auto FE = SM.getFileEntryRefForID(FID); |
| 446 | if (!FE || FE->getName().empty()) |
| 447 | return "" ; |
| 448 | |
| 449 | if (auto Verbatim = PI->getPublic(File: *FE); !Verbatim.empty()) |
| 450 | return Verbatim; |
| 451 | |
| 452 | llvm::StringRef Filename = FE->getName(); |
| 453 | if (auto Canonical = mapCanonical(HeaderPath: Filename); !Canonical.empty()) |
| 454 | return Canonical; |
| 455 | |
| 456 | // Framework headers are spelled as <FrameworkName/Foo.h>, not |
| 457 | // "path/FrameworkName.framework/Headers/Foo.h". |
| 458 | auto &HS = PP->getHeaderSearchInfo(); |
| 459 | if (auto Spelling = getFrameworkHeaderIncludeSpelling(FE: *FE, HS)) |
| 460 | return *Spelling; |
| 461 | |
| 462 | if (!tooling::isSelfContainedHeader(FE: *FE, SM: PP->getSourceManager(), |
| 463 | HeaderInfo: PP->getHeaderSearchInfo())) { |
| 464 | // A .inc or .def file is often included into a real header to define |
| 465 | // symbols (e.g. LLVM tablegen files). |
| 466 | if (Filename.ends_with(Suffix: ".inc" ) || Filename.ends_with(Suffix: ".def" )) |
| 467 | // Don't use cache reentrantly due to iterator invalidation. |
| 468 | return getIncludeHeaderUncached(FID: SM.getFileID(SpellingLoc: SM.getIncludeLoc(FID))); |
| 469 | // Conservatively refuse to insert #includes to files without guards. |
| 470 | return "" ; |
| 471 | } |
| 472 | // Standard case: just insert the file itself. |
| 473 | return toURI(FE: *FE); |
| 474 | } |
| 475 | }; |
| 476 | |
| 477 | // Return the symbol location of the token at \p TokLoc. |
| 478 | std::optional<SymbolLocation> |
| 479 | SymbolCollector::getTokenLocation(SourceLocation TokLoc) { |
| 480 | const auto &SM = ASTCtx->getSourceManager(); |
| 481 | const auto FE = SM.getFileEntryRefForID(FID: SM.getFileID(SpellingLoc: TokLoc)); |
| 482 | if (!FE) |
| 483 | return std::nullopt; |
| 484 | |
| 485 | SymbolLocation Result; |
| 486 | Result.FileURI = HeaderFileURIs->toURI(FE: *FE).c_str(); |
| 487 | auto Range = getTokenRange(TokLoc, SM, LangOpts: ASTCtx->getLangOpts()); |
| 488 | Result.Start = Range.first; |
| 489 | Result.End = Range.second; |
| 490 | |
| 491 | return Result; |
| 492 | } |
| 493 | |
| 494 | SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {} |
| 495 | SymbolCollector::~SymbolCollector() = default; |
| 496 | |
| 497 | void SymbolCollector::initialize(ASTContext &Ctx) { |
| 498 | ASTCtx = &Ctx; |
| 499 | HeaderFileURIs = std::make_unique<HeaderFileURICache>( |
| 500 | args&: this->PP, args&: ASTCtx->getSourceManager(), args&: Opts); |
| 501 | CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>(); |
| 502 | CompletionTUInfo = |
| 503 | std::make_unique<CodeCompletionTUInfo>(args&: CompletionAllocator); |
| 504 | } |
| 505 | |
| 506 | bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND, |
| 507 | const ASTContext &ASTCtx, |
| 508 | const Options &Opts, |
| 509 | bool IsMainFileOnly) { |
| 510 | // Skip anonymous declarations, e.g (anonymous enum/class/struct). |
| 511 | if (ND.getDeclName().isEmpty()) |
| 512 | return false; |
| 513 | |
| 514 | // Skip main-file symbols if we are not collecting them. |
| 515 | if (IsMainFileOnly && !Opts.CollectMainFileSymbols) |
| 516 | return false; |
| 517 | |
| 518 | // Skip symbols in anonymous namespaces in header files. |
| 519 | if (!IsMainFileOnly && ND.isInAnonymousNamespace()) |
| 520 | return false; |
| 521 | |
| 522 | // For function local symbols, index only classes and its member functions. |
| 523 | if (index::isFunctionLocalSymbol(&ND)) |
| 524 | return isa<RecordDecl>(Val: ND) || |
| 525 | (ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate()); |
| 526 | |
| 527 | // We want most things but not "local" symbols such as symbols inside |
| 528 | // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl. |
| 529 | // FIXME: Need a matcher for ExportDecl in order to include symbols declared |
| 530 | // within an export. |
| 531 | const auto *DeclCtx = ND.getDeclContext(); |
| 532 | switch (DeclCtx->getDeclKind()) { |
| 533 | case Decl::TranslationUnit: |
| 534 | case Decl::Namespace: |
| 535 | case Decl::LinkageSpec: |
| 536 | case Decl::Enum: |
| 537 | case Decl::ObjCProtocol: |
| 538 | case Decl::ObjCInterface: |
| 539 | case Decl::ObjCCategory: |
| 540 | case Decl::ObjCCategoryImpl: |
| 541 | case Decl::ObjCImplementation: |
| 542 | break; |
| 543 | default: |
| 544 | // Record has a few derivations (e.g. CXXRecord, Class specialization), it's |
| 545 | // easier to cast. |
| 546 | if (!isa<RecordDecl>(DeclCtx)) |
| 547 | return false; |
| 548 | } |
| 549 | |
| 550 | // Avoid indexing internal symbols in protobuf generated headers. |
| 551 | if (isPrivateProtoDecl(ND)) |
| 552 | return false; |
| 553 | |
| 554 | // System headers that end with `intrin.h` likely contain useful symbols. |
| 555 | if (!Opts.CollectReserved && |
| 556 | (hasReservedName(ND) || hasReservedScope(*ND.getDeclContext())) && |
| 557 | ASTCtx.getSourceManager().isInSystemHeader(Loc: ND.getLocation()) && |
| 558 | !ASTCtx.getSourceManager() |
| 559 | .getFilename(SpellingLoc: ND.getLocation()) |
| 560 | .ends_with("intrin.h" )) |
| 561 | return false; |
| 562 | |
| 563 | return true; |
| 564 | } |
| 565 | |
| 566 | const Decl * |
| 567 | SymbolCollector::getRefContainer(const Decl *Enclosing, |
| 568 | const SymbolCollector::Options &Opts) { |
| 569 | while (Enclosing) { |
| 570 | const auto *ND = dyn_cast<NamedDecl>(Val: Enclosing); |
| 571 | if (ND && shouldCollectSymbol(ND: *ND, ASTCtx: ND->getASTContext(), Opts, IsMainFileOnly: true)) { |
| 572 | break; |
| 573 | } |
| 574 | Enclosing = dyn_cast_or_null<Decl>(Val: Enclosing->getDeclContext()); |
| 575 | } |
| 576 | return Enclosing; |
| 577 | } |
| 578 | |
| 579 | // Always return true to continue indexing. |
| 580 | bool SymbolCollector::handleDeclOccurrence( |
| 581 | const Decl *D, index::SymbolRoleSet Roles, |
| 582 | llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc, |
| 583 | index::IndexDataConsumer::ASTNodeInfo ASTNode) { |
| 584 | assert(ASTCtx && PP && HeaderFileURIs); |
| 585 | assert(CompletionAllocator && CompletionTUInfo); |
| 586 | assert(ASTNode.OrigD); |
| 587 | // Indexing API puts canonical decl into D, which might not have a valid |
| 588 | // source location for implicit/built-in decls. Fallback to original decl in |
| 589 | // such cases. |
| 590 | if (D->getLocation().isInvalid()) |
| 591 | D = ASTNode.OrigD; |
| 592 | // If OrigD is an declaration associated with a friend declaration and it's |
| 593 | // not a definition, skip it. Note that OrigD is the occurrence that the |
| 594 | // collector is currently visiting. |
| 595 | if ((ASTNode.OrigD->getFriendObjectKind() != |
| 596 | Decl::FriendObjectKind::FOK_None) && |
| 597 | !(Roles & static_cast<unsigned>(index::SymbolRole::Definition))) |
| 598 | return true; |
| 599 | // A declaration created for a friend declaration should not be used as the |
| 600 | // canonical declaration in the index. Use OrigD instead, unless we've already |
| 601 | // picked a replacement for D |
| 602 | if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None) |
| 603 | D = CanonicalDecls.try_emplace(Key: D, Args&: ASTNode.OrigD).first->second; |
| 604 | // Flag to mark that D should be considered canonical meaning its declaration |
| 605 | // will override any previous declaration for the Symbol. |
| 606 | bool DeclIsCanonical = false; |
| 607 | // Avoid treating ObjCImplementationDecl as a canonical declaration if it has |
| 608 | // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl. |
| 609 | if (const auto *IID = dyn_cast<ObjCImplementationDecl>(Val: D)) { |
| 610 | DeclIsCanonical = true; |
| 611 | if (const auto *CID = IID->getClassInterface()) |
| 612 | if (const auto *DD = CID->getDefinition()) |
| 613 | if (!DD->isImplicitInterfaceDecl()) |
| 614 | D = DD; |
| 615 | } |
| 616 | // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of |
| 617 | // its ObjCCategoryDecl if it has one. |
| 618 | if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(Val: D)) { |
| 619 | DeclIsCanonical = true; |
| 620 | if (const auto *CD = CID->getCategoryDecl()) |
| 621 | D = CD; |
| 622 | } |
| 623 | const NamedDecl *ND = dyn_cast<NamedDecl>(Val: D); |
| 624 | if (!ND) |
| 625 | return true; |
| 626 | |
| 627 | auto ID = getSymbolIDCached(ND); |
| 628 | if (!ID) |
| 629 | return true; |
| 630 | |
| 631 | // Mark D as referenced if this is a reference coming from the main file. |
| 632 | // D may not be an interesting symbol, but it's cheaper to check at the end. |
| 633 | auto &SM = ASTCtx->getSourceManager(); |
| 634 | if (Opts.CountReferences && |
| 635 | (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) && |
| 636 | SM.getFileID(SpellingLoc: SM.getSpellingLoc(Loc)) == SM.getMainFileID()) |
| 637 | ReferencedSymbols.insert(ID); |
| 638 | |
| 639 | // ND is the canonical (i.e. first) declaration. If it's in the main file |
| 640 | // (which is not a header), then no public declaration was visible, so assume |
| 641 | // it's main-file only. |
| 642 | bool IsMainFileOnly = |
| 643 | SM.isWrittenInMainFile(Loc: SM.getExpansionLoc(Loc: ND->getBeginLoc())) && |
| 644 | !isHeaderFile(FileName: SM.getFileEntryRefForID(FID: SM.getMainFileID())->getName(), |
| 645 | LangOpts: ASTCtx->getLangOpts()); |
| 646 | // In C, printf is a redecl of an implicit builtin! So check OrigD instead. |
| 647 | if (ASTNode.OrigD->isImplicit() || |
| 648 | !shouldCollectSymbol(ND: *ND, ASTCtx: *ASTCtx, Opts, IsMainFileOnly)) |
| 649 | return true; |
| 650 | |
| 651 | // Note: we need to process relations for all decl occurrences, including |
| 652 | // refs, because the indexing code only populates relations for specific |
| 653 | // occurrences. For example, RelationBaseOf is only populated for the |
| 654 | // occurrence inside the base-specifier. |
| 655 | processRelations(ND: *ND, ID: ID, Relations); |
| 656 | |
| 657 | bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles)); |
| 658 | // Unlike other fields, e.g. Symbols (which use spelling locations), we use |
| 659 | // file locations for references (as it aligns the behavior of clangd's |
| 660 | // AST-based xref). |
| 661 | // FIXME: we should try to use the file locations for other fields. |
| 662 | if (CollectRef && |
| 663 | (!IsMainFileOnly || Opts.CollectMainFileRefs || |
| 664 | ND->isExternallyVisible()) && |
| 665 | !isa<NamespaceDecl>(Val: ND)) { |
| 666 | auto FileLoc = SM.getFileLoc(Loc); |
| 667 | auto FID = SM.getFileID(SpellingLoc: FileLoc); |
| 668 | if (Opts.RefsInHeaders || FID == SM.getMainFileID()) { |
| 669 | addRef(ID: ID, SR: SymbolRef{FileLoc, FID, Roles, index::getSymbolInfo(ND).Kind, |
| 670 | getRefContainer(Enclosing: ASTNode.Parent, Opts), |
| 671 | isSpelled(Loc: FileLoc, ND: *ND)}); |
| 672 | } |
| 673 | } |
| 674 | // Don't continue indexing if this is a mere reference. |
| 675 | if (!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) | |
| 676 | static_cast<unsigned>(index::SymbolRole::Definition)))) |
| 677 | return true; |
| 678 | |
| 679 | // FIXME: ObjCPropertyDecl are not properly indexed here: |
| 680 | // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is |
| 681 | // not a NamedDecl. |
| 682 | auto *OriginalDecl = dyn_cast<NamedDecl>(Val: ASTNode.OrigD); |
| 683 | if (!OriginalDecl) |
| 684 | return true; |
| 685 | |
| 686 | const Symbol *BasicSymbol = Symbols.find(ID: ID); |
| 687 | bool SkipDocCheckInDef = false; |
| 688 | if (isPreferredDeclaration(ND: *OriginalDecl, Roles)) { |
| 689 | // If OriginalDecl is preferred, replace/create the existing canonical |
| 690 | // declaration (e.g. a class forward declaration). There should be at most |
| 691 | // one duplicate as we expect to see only one preferred declaration per |
| 692 | // TU, because in practice they are definitions. |
| 693 | BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileSymbol: IsMainFileOnly); |
| 694 | SkipDocCheckInDef = true; |
| 695 | } else if (!BasicSymbol || DeclIsCanonical) { |
| 696 | BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileSymbol: IsMainFileOnly); |
| 697 | SkipDocCheckInDef = true; |
| 698 | } |
| 699 | |
| 700 | if (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) |
| 701 | addDefinition(*OriginalDecl, DeclSymbol: *BasicSymbol, SkipDocCheck: SkipDocCheckInDef); |
| 702 | |
| 703 | return true; |
| 704 | } |
| 705 | |
| 706 | void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) { |
| 707 | assert(HeaderFileURIs && PP); |
| 708 | const auto &SM = PP->getSourceManager(); |
| 709 | const auto MainFileEntryRef = SM.getFileEntryRefForID(FID: SM.getMainFileID()); |
| 710 | assert(MainFileEntryRef); |
| 711 | |
| 712 | const std::string &MainFileURI = HeaderFileURIs->toURI(FE: *MainFileEntryRef); |
| 713 | // Add macro references. |
| 714 | for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) { |
| 715 | for (const auto &MacroRef : IDToRefs.second) { |
| 716 | const auto &SR = MacroRef.toSourceRange(SM); |
| 717 | auto Range = halfOpenToRange(SM, R: SR); |
| 718 | bool IsDefinition = MacroRef.IsDefinition; |
| 719 | Ref R; |
| 720 | R.Location.Start.setLine(Range.start.line); |
| 721 | R.Location.Start.setColumn(Range.start.character); |
| 722 | R.Location.End.setLine(Range.end.line); |
| 723 | R.Location.End.setColumn(Range.end.character); |
| 724 | R.Location.FileURI = MainFileURI.c_str(); |
| 725 | R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference; |
| 726 | Refs.insert(ID: IDToRefs.first, S: R); |
| 727 | if (IsDefinition) { |
| 728 | Symbol S; |
| 729 | S.ID = IDToRefs.first; |
| 730 | S.Name = toSourceCode(SM, R: SR.getAsRange()); |
| 731 | S.SymInfo.Kind = index::SymbolKind::Macro; |
| 732 | S.SymInfo.SubKind = index::SymbolSubKind::None; |
| 733 | S.SymInfo.Properties = index::SymbolPropertySet(); |
| 734 | S.SymInfo.Lang = index::SymbolLanguage::C; |
| 735 | S.Origin = Opts.Origin; |
| 736 | S.CanonicalDeclaration = R.Location; |
| 737 | // Make the macro visible for code completion if main file is an |
| 738 | // include-able header. |
| 739 | if (!HeaderFileURIs->getIncludeHeader(FID: SM.getMainFileID()).empty()) { |
| 740 | S.Flags |= Symbol::IndexedForCodeCompletion; |
| 741 | S.Flags |= Symbol::VisibleOutsideFile; |
| 742 | } |
| 743 | Symbols.insert(S); |
| 744 | } |
| 745 | } |
| 746 | } |
| 747 | } |
| 748 | |
| 749 | bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name, |
| 750 | const MacroInfo *MI, |
| 751 | index::SymbolRoleSet Roles, |
| 752 | SourceLocation Loc) { |
| 753 | assert(PP); |
| 754 | // Builtin macros don't have useful locations and aren't needed in completion. |
| 755 | if (MI->isBuiltinMacro()) |
| 756 | return true; |
| 757 | |
| 758 | const auto &SM = PP->getSourceManager(); |
| 759 | auto DefLoc = MI->getDefinitionLoc(); |
| 760 | // Also avoid storing macros that aren't defined in any file, i.e. predefined |
| 761 | // macros like __DBL_MIN__ and those defined on the command line. |
| 762 | if (SM.isWrittenInBuiltinFile(Loc: DefLoc) || |
| 763 | SM.isWrittenInCommandLineFile(Loc: DefLoc) || |
| 764 | Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM" ) |
| 765 | return true; |
| 766 | |
| 767 | auto ID = getSymbolIDCached(MacroName: Name->getName(), MI, SM); |
| 768 | if (!ID) |
| 769 | return true; |
| 770 | |
| 771 | auto SpellingLoc = SM.getSpellingLoc(Loc); |
| 772 | bool IsMainFileOnly = |
| 773 | SM.isInMainFile(Loc: SM.getExpansionLoc(Loc: DefLoc)) && |
| 774 | !isHeaderFile(FileName: SM.getFileEntryRefForID(FID: SM.getMainFileID())->getName(), |
| 775 | LangOpts: ASTCtx->getLangOpts()); |
| 776 | // Do not store references to main-file macros. |
| 777 | if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly && |
| 778 | (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) { |
| 779 | // FIXME: Populate container information for macro references. |
| 780 | // FIXME: All MacroRefs are marked as Spelled now, but this should be |
| 781 | // checked. |
| 782 | addRef(ID, |
| 783 | SR: SymbolRef{.Loc: Loc, .FID: SM.getFileID(SpellingLoc: Loc), .Roles: Roles, .Kind: index::SymbolKind::Macro, |
| 784 | /*Container=*/nullptr, |
| 785 | /*Spelled=*/true}); |
| 786 | } |
| 787 | |
| 788 | // Collect symbols. |
| 789 | if (!Opts.CollectMacro) |
| 790 | return true; |
| 791 | |
| 792 | // Skip main-file macros if we are not collecting them. |
| 793 | if (IsMainFileOnly && !Opts.CollectMainFileSymbols) |
| 794 | return false; |
| 795 | |
| 796 | // Mark the macro as referenced if this is a reference coming from the main |
| 797 | // file. The macro may not be an interesting symbol, but it's cheaper to check |
| 798 | // at the end. |
| 799 | if (Opts.CountReferences && |
| 800 | (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) && |
| 801 | SM.getFileID(SpellingLoc) == SM.getMainFileID()) |
| 802 | ReferencedSymbols.insert(V: ID); |
| 803 | |
| 804 | // Don't continue indexing if this is a mere reference. |
| 805 | // FIXME: remove macro with ID if it is undefined. |
| 806 | if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) || |
| 807 | Roles & static_cast<unsigned>(index::SymbolRole::Definition))) |
| 808 | return true; |
| 809 | |
| 810 | // Only collect one instance in case there are multiple. |
| 811 | if (Symbols.find(ID) != nullptr) |
| 812 | return true; |
| 813 | |
| 814 | Symbol S; |
| 815 | S.ID = std::move(ID); |
| 816 | S.Name = Name->getName(); |
| 817 | if (!IsMainFileOnly) { |
| 818 | S.Flags |= Symbol::IndexedForCodeCompletion; |
| 819 | S.Flags |= Symbol::VisibleOutsideFile; |
| 820 | } |
| 821 | S.SymInfo = index::getSymbolInfoForMacro(MI: *MI); |
| 822 | S.Origin = Opts.Origin; |
| 823 | // FIXME: use the result to filter out symbols. |
| 824 | shouldIndexFile(FID: SM.getFileID(SpellingLoc: Loc)); |
| 825 | if (auto DeclLoc = getTokenLocation(TokLoc: DefLoc)) |
| 826 | S.CanonicalDeclaration = *DeclLoc; |
| 827 | |
| 828 | CodeCompletionResult SymbolCompletion(Name); |
| 829 | const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro( |
| 830 | PP&: *PP, Allocator&: *CompletionAllocator, CCTUInfo&: *CompletionTUInfo); |
| 831 | std::string Signature; |
| 832 | std::string SnippetSuffix; |
| 833 | getSignature(CCS: *CCS, Signature: &Signature, Snippet: &SnippetSuffix, ResultKind: SymbolCompletion.Kind, |
| 834 | CursorKind: SymbolCompletion.CursorKind); |
| 835 | S.Signature = Signature; |
| 836 | S.CompletionSnippetSuffix = SnippetSuffix; |
| 837 | |
| 838 | IndexedMacros.insert(V: Name); |
| 839 | |
| 840 | setIncludeLocation(S, DefLoc, Sym: include_cleaner::Macro{.Name: Name, .Definition: DefLoc}); |
| 841 | Symbols.insert(S); |
| 842 | return true; |
| 843 | } |
| 844 | |
| 845 | void SymbolCollector::processRelations( |
| 846 | const NamedDecl &ND, const SymbolID &ID, |
| 847 | ArrayRef<index::SymbolRelation> Relations) { |
| 848 | for (const auto &R : Relations) { |
| 849 | auto RKind = indexableRelation(R); |
| 850 | if (!RKind) |
| 851 | continue; |
| 852 | const Decl *Object = R.RelatedSymbol; |
| 853 | |
| 854 | auto ObjectID = getSymbolIDCached(D: Object); |
| 855 | if (!ObjectID) |
| 856 | continue; |
| 857 | |
| 858 | // Record the relation. |
| 859 | // TODO: There may be cases where the object decl is not indexed for some |
| 860 | // reason. Those cases should probably be removed in due course, but for |
| 861 | // now there are two possible ways to handle it: |
| 862 | // (A) Avoid storing the relation in such cases. |
| 863 | // (B) Store it anyways. Clients will likely lookup() the SymbolID |
| 864 | // in the index and find nothing, but that's a situation they |
| 865 | // probably need to handle for other reasons anyways. |
| 866 | // We currently do (B) because it's simpler. |
| 867 | if (*RKind == RelationKind::BaseOf) |
| 868 | this->Relations.insert(R: {.Subject: ID, .Predicate: *RKind, .Object: ObjectID}); |
| 869 | else if (*RKind == RelationKind::OverriddenBy) |
| 870 | this->Relations.insert(R: {.Subject: ObjectID, .Predicate: *RKind, .Object: ID}); |
| 871 | } |
| 872 | } |
| 873 | |
| 874 | void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc, |
| 875 | const include_cleaner::Symbol &Sym) { |
| 876 | const auto &SM = PP->getSourceManager(); |
| 877 | if (!Opts.CollectIncludePath || |
| 878 | shouldCollectIncludePath(Kind: S.SymInfo.Kind) == Symbol::Invalid) |
| 879 | return; |
| 880 | |
| 881 | // Use the expansion location to get the #include header since this is |
| 882 | // where the symbol is exposed. |
| 883 | if (FileID FID = SM.getDecomposedExpansionLoc(Loc: DefLoc).first; FID.isValid()) |
| 884 | IncludeFiles[S.ID] = FID; |
| 885 | |
| 886 | // We update providers for a symbol with each occurence, as SymbolCollector |
| 887 | // might run while parsing, rather than at the end of a translation unit. |
| 888 | // Hence we see more and more redecls over time. |
| 889 | SymbolProviders[S.ID] = |
| 890 | include_cleaner::headersForSymbol(S: Sym, PP: *PP, PI: Opts.PragmaIncludes); |
| 891 | } |
| 892 | |
| 893 | llvm::StringRef (const Symbol *S, const LangOptions &LangOpts) { |
| 894 | tooling::stdlib::Lang Lang = tooling::stdlib::Lang::CXX; |
| 895 | if (LangOpts.C11) |
| 896 | Lang = tooling::stdlib::Lang::C; |
| 897 | else if(!LangOpts.CPlusPlus) |
| 898 | return "" ; |
| 899 | |
| 900 | if (S->Scope == "std::" && S->Name == "move" ) { |
| 901 | if (!S->Signature.contains(C: ',')) |
| 902 | return "<utility>" ; |
| 903 | return "<algorithm>" ; |
| 904 | } |
| 905 | |
| 906 | if (auto StdSym = tooling::stdlib::Symbol::named(Scope: S->Scope, Name: S->Name, Language: Lang)) |
| 907 | if (auto = StdSym->header()) |
| 908 | return Header->name(); |
| 909 | return "" ; |
| 910 | } |
| 911 | |
| 912 | void SymbolCollector::finish() { |
| 913 | // At the end of the TU, add 1 to the refcount of all referenced symbols. |
| 914 | for (const auto &ID : ReferencedSymbols) { |
| 915 | if (const auto *S = Symbols.find(ID)) { |
| 916 | // SymbolSlab::Builder returns const symbols because strings are interned |
| 917 | // and modifying returned symbols without inserting again wouldn't go |
| 918 | // well. const_cast is safe here as we're modifying a data owned by the |
| 919 | // Symbol. This reduces time spent in SymbolCollector by ~1%. |
| 920 | ++const_cast<Symbol *>(S)->References; |
| 921 | } |
| 922 | } |
| 923 | if (Opts.CollectMacro) { |
| 924 | assert(PP); |
| 925 | // First, drop header guards. We can't identify these until EOF. |
| 926 | for (const IdentifierInfo *II : IndexedMacros) { |
| 927 | if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) |
| 928 | if (auto ID = |
| 929 | getSymbolIDCached(MacroName: II->getName(), MI, SM: PP->getSourceManager())) |
| 930 | if (MI->isUsedForHeaderGuard()) |
| 931 | Symbols.erase(ID); |
| 932 | } |
| 933 | } |
| 934 | llvm::DenseMap<FileID, bool> FileToContainsImportsOrObjC; |
| 935 | llvm::DenseMap<include_cleaner::Header, std::string> ; |
| 936 | // Fill in IncludeHeaders. |
| 937 | // We delay this until end of TU so header guards are all resolved. |
| 938 | for (const auto &[SID, Providers] : SymbolProviders) { |
| 939 | const Symbol *S = Symbols.find(ID: SID); |
| 940 | if (!S) |
| 941 | continue; |
| 942 | |
| 943 | FileID FID = IncludeFiles.lookup(Val: SID); |
| 944 | // Determine if the FID is #include'd or #import'ed. |
| 945 | Symbol::IncludeDirective Directives = Symbol::Invalid; |
| 946 | auto CollectDirectives = shouldCollectIncludePath(Kind: S->SymInfo.Kind); |
| 947 | if ((CollectDirectives & Symbol::Include) != 0) |
| 948 | Directives |= Symbol::Include; |
| 949 | // Only allow #import for symbols from ObjC-like files. |
| 950 | if ((CollectDirectives & Symbol::Import) != 0 && FID.isValid()) { |
| 951 | auto [It, Inserted] = FileToContainsImportsOrObjC.try_emplace(Key: FID); |
| 952 | if (Inserted) |
| 953 | It->second = FilesWithObjCConstructs.contains(V: FID) || |
| 954 | tooling::codeContainsImports( |
| 955 | Code: ASTCtx->getSourceManager().getBufferData(FID)); |
| 956 | if (It->second) |
| 957 | Directives |= Symbol::Import; |
| 958 | } |
| 959 | |
| 960 | if (Directives == Symbol::Invalid) |
| 961 | continue; |
| 962 | |
| 963 | // Use the include location-based logic for Objective-C symbols. |
| 964 | if (Directives & Symbol::Import) { |
| 965 | llvm::StringRef = getStdHeader(S, LangOpts: ASTCtx->getLangOpts()); |
| 966 | if (IncludeHeader.empty()) |
| 967 | IncludeHeader = HeaderFileURIs->getIncludeHeader(FID); |
| 968 | |
| 969 | if (!IncludeHeader.empty()) { |
| 970 | auto NewSym = *S; |
| 971 | NewSym.IncludeHeaders.push_back(Elt: {IncludeHeader, 1, Directives}); |
| 972 | Symbols.insert(S: NewSym); |
| 973 | } |
| 974 | // FIXME: use providers from include-cleaner library once it's polished |
| 975 | // for Objective-C. |
| 976 | continue; |
| 977 | } |
| 978 | |
| 979 | // For #include's, use the providers computed by the include-cleaner |
| 980 | // library. |
| 981 | assert(Directives == Symbol::Include); |
| 982 | // Ignore providers that are not self-contained, this is especially |
| 983 | // important for symbols defined in the main-file. We want to prefer the |
| 984 | // header, if possible. |
| 985 | // TODO: Limit this to specifically ignore main file, when we're indexing a |
| 986 | // non-header file? |
| 987 | auto SelfContainedProvider = |
| 988 | [this](llvm::ArrayRef<include_cleaner::Header> Providers) |
| 989 | -> std::optional<include_cleaner::Header> { |
| 990 | for (const auto &H : Providers) { |
| 991 | if (H.kind() != include_cleaner::Header::Physical) |
| 992 | return H; |
| 993 | if (tooling::isSelfContainedHeader(FE: H.physical(), SM: PP->getSourceManager(), |
| 994 | HeaderInfo: PP->getHeaderSearchInfo())) |
| 995 | return H; |
| 996 | } |
| 997 | return std::nullopt; |
| 998 | }; |
| 999 | const auto OptionalProvider = SelfContainedProvider(Providers); |
| 1000 | if (!OptionalProvider) |
| 1001 | continue; |
| 1002 | const auto &H = *OptionalProvider; |
| 1003 | const auto [SpellingIt, Inserted] = HeaderSpelling.try_emplace(Key: H); |
| 1004 | if (Inserted) { |
| 1005 | auto &SM = ASTCtx->getSourceManager(); |
| 1006 | if (H.kind() == include_cleaner::Header::Kind::Physical) { |
| 1007 | // FIXME: Get rid of this once include-cleaner has support for system |
| 1008 | // headers. |
| 1009 | if (auto Canonical = |
| 1010 | HeaderFileURIs->mapCanonical(HeaderPath: H.physical().getName()); |
| 1011 | !Canonical.empty()) |
| 1012 | SpellingIt->second = Canonical; |
| 1013 | // For physical files, prefer URIs as spellings might change |
| 1014 | // depending on the translation unit. |
| 1015 | else if (tooling::isSelfContainedHeader(FE: H.physical(), SM, |
| 1016 | HeaderInfo: PP->getHeaderSearchInfo())) |
| 1017 | SpellingIt->second = |
| 1018 | HeaderFileURIs->toURI(FE: H.physical()); |
| 1019 | } else { |
| 1020 | SpellingIt->second = include_cleaner::spellHeader( |
| 1021 | Input: {.H: H, .HS: PP->getHeaderSearchInfo(), |
| 1022 | .Main: SM.getFileEntryForID(FID: SM.getMainFileID())}); |
| 1023 | } |
| 1024 | } |
| 1025 | |
| 1026 | if (!SpellingIt->second.empty()) { |
| 1027 | auto NewSym = *S; |
| 1028 | NewSym.IncludeHeaders.push_back(Elt: {SpellingIt->second, 1, Directives}); |
| 1029 | Symbols.insert(S: NewSym); |
| 1030 | } |
| 1031 | } |
| 1032 | |
| 1033 | ReferencedSymbols.clear(); |
| 1034 | IncludeFiles.clear(); |
| 1035 | SymbolProviders.clear(); |
| 1036 | FilesWithObjCConstructs.clear(); |
| 1037 | } |
| 1038 | |
| 1039 | const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID, |
| 1040 | bool IsMainFileOnly) { |
| 1041 | auto &Ctx = ND.getASTContext(); |
| 1042 | auto &SM = Ctx.getSourceManager(); |
| 1043 | |
| 1044 | Symbol S; |
| 1045 | S.ID = std::move(ID); |
| 1046 | std::string QName = printQualifiedName(ND); |
| 1047 | // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo: |
| 1048 | // for consistency with CodeCompletionString and a clean name/signature split. |
| 1049 | std::tie(args&: S.Scope, args&: S.Name) = splitQualifiedName(QName); |
| 1050 | std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND); |
| 1051 | S.TemplateSpecializationArgs = TemplateSpecializationArgs; |
| 1052 | |
| 1053 | // We collect main-file symbols, but do not use them for code completion. |
| 1054 | if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx)) |
| 1055 | S.Flags |= Symbol::IndexedForCodeCompletion; |
| 1056 | if (isImplementationDetail(&ND)) |
| 1057 | S.Flags |= Symbol::ImplementationDetail; |
| 1058 | if (!IsMainFileOnly) |
| 1059 | S.Flags |= Symbol::VisibleOutsideFile; |
| 1060 | S.SymInfo = index::getSymbolInfo(&ND); |
| 1061 | auto Loc = nameLocation(ND, SM); |
| 1062 | assert(Loc.isValid() && "Invalid source location for NamedDecl" ); |
| 1063 | // FIXME: use the result to filter out symbols. |
| 1064 | auto FID = SM.getFileID(Loc); |
| 1065 | shouldIndexFile(FID: FID); |
| 1066 | if (auto DeclLoc = getTokenLocation(Loc)) |
| 1067 | S.CanonicalDeclaration = *DeclLoc; |
| 1068 | |
| 1069 | S.Origin = Opts.Origin; |
| 1070 | if (ND.getAvailability() == AR_Deprecated) |
| 1071 | S.Flags |= Symbol::Deprecated; |
| 1072 | |
| 1073 | // Add completion info. |
| 1074 | // FIXME: we may want to choose a different redecl, or combine from several. |
| 1075 | assert(ASTCtx && PP && "ASTContext and Preprocessor must be set." ); |
| 1076 | // We use the primary template, as clang does during code completion. |
| 1077 | CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0); |
| 1078 | const auto *CCS = SymbolCompletion.CreateCodeCompletionString( |
| 1079 | Ctx&: *ASTCtx, PP&: *PP, CCContext: CodeCompletionContext::CCC_Symbol, Allocator&: *CompletionAllocator, |
| 1080 | CCTUInfo&: *CompletionTUInfo, |
| 1081 | /*IncludeBriefComments*/ false); |
| 1082 | std::string ; |
| 1083 | std::string Documentation; |
| 1084 | bool AlreadyHasDoc = S.Flags & Symbol::HasDocComment; |
| 1085 | if (!AlreadyHasDoc) { |
| 1086 | DocComment = getDocComment(Ctx, SymbolCompletion, |
| 1087 | /*CommentsFromHeaders=*/true); |
| 1088 | Documentation = formatDocumentation(CCS: *CCS, DocComment); |
| 1089 | } |
| 1090 | const auto UpdateDoc = [&] { |
| 1091 | if (!AlreadyHasDoc) { |
| 1092 | if (!DocComment.empty()) |
| 1093 | S.Flags |= Symbol::HasDocComment; |
| 1094 | S.Documentation = Documentation; |
| 1095 | } |
| 1096 | }; |
| 1097 | if (!(S.Flags & Symbol::IndexedForCodeCompletion)) { |
| 1098 | if (Opts.StoreAllDocumentation) |
| 1099 | UpdateDoc(); |
| 1100 | Symbols.insert(S); |
| 1101 | return Symbols.find(ID: S.ID); |
| 1102 | } |
| 1103 | UpdateDoc(); |
| 1104 | std::string Signature; |
| 1105 | std::string SnippetSuffix; |
| 1106 | getSignature(CCS: *CCS, Signature: &Signature, Snippet: &SnippetSuffix, ResultKind: SymbolCompletion.Kind, |
| 1107 | CursorKind: SymbolCompletion.CursorKind); |
| 1108 | S.Signature = Signature; |
| 1109 | S.CompletionSnippetSuffix = SnippetSuffix; |
| 1110 | std::string ReturnType = getReturnType(CCS: *CCS); |
| 1111 | S.ReturnType = ReturnType; |
| 1112 | |
| 1113 | std::optional<OpaqueType> TypeStorage; |
| 1114 | if (S.Flags & Symbol::IndexedForCodeCompletion) { |
| 1115 | TypeStorage = OpaqueType::fromCompletionResult(Ctx&: *ASTCtx, R: SymbolCompletion); |
| 1116 | if (TypeStorage) |
| 1117 | S.Type = TypeStorage->raw(); |
| 1118 | } |
| 1119 | |
| 1120 | Symbols.insert(S); |
| 1121 | setIncludeLocation(S, DefLoc: ND.getLocation(), Sym: include_cleaner::Symbol{ND}); |
| 1122 | if (S.SymInfo.Lang == index::SymbolLanguage::ObjC) |
| 1123 | FilesWithObjCConstructs.insert(FID); |
| 1124 | return Symbols.find(ID: S.ID); |
| 1125 | } |
| 1126 | |
| 1127 | void SymbolCollector::addDefinition(const NamedDecl &ND, const Symbol &DeclSym, |
| 1128 | bool SkipDocCheck) { |
| 1129 | if (DeclSym.Definition) |
| 1130 | return; |
| 1131 | const auto &SM = ND.getASTContext().getSourceManager(); |
| 1132 | auto Loc = nameLocation(ND, SM); |
| 1133 | shouldIndexFile(FID: SM.getFileID(Loc)); |
| 1134 | auto DefLoc = getTokenLocation(TokLoc: Loc); |
| 1135 | // If we saw some forward declaration, we end up copying the symbol. |
| 1136 | // This is not ideal, but avoids duplicating the "is this a definition" check |
| 1137 | // in clang::index. We should only see one definition. |
| 1138 | if (!DefLoc) |
| 1139 | return; |
| 1140 | Symbol S = DeclSym; |
| 1141 | // FIXME: use the result to filter out symbols. |
| 1142 | S.Definition = *DefLoc; |
| 1143 | |
| 1144 | std::string ; |
| 1145 | std::string Documentation; |
| 1146 | if (!SkipDocCheck && !(S.Flags & Symbol::HasDocComment) && |
| 1147 | (llvm::isa<FunctionDecl>(Val: ND) || llvm::isa<CXXMethodDecl>(Val: ND))) { |
| 1148 | CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0); |
| 1149 | const auto *CCS = SymbolCompletion.CreateCodeCompletionString( |
| 1150 | Ctx&: *ASTCtx, PP&: *PP, CCContext: CodeCompletionContext::CCC_Symbol, Allocator&: *CompletionAllocator, |
| 1151 | CCTUInfo&: *CompletionTUInfo, |
| 1152 | /*IncludeBriefComments*/ false); |
| 1153 | DocComment = getDocComment(ND.getASTContext(), SymbolCompletion, |
| 1154 | /*CommentsFromHeaders=*/true); |
| 1155 | if (!S.Documentation.empty()) |
| 1156 | Documentation = S.Documentation.str() + '\n' + DocComment; |
| 1157 | else |
| 1158 | Documentation = formatDocumentation(CCS: *CCS, DocComment); |
| 1159 | if (!DocComment.empty()) |
| 1160 | S.Flags |= Symbol::HasDocComment; |
| 1161 | S.Documentation = Documentation; |
| 1162 | } |
| 1163 | |
| 1164 | Symbols.insert(S); |
| 1165 | } |
| 1166 | |
| 1167 | bool SymbolCollector::shouldIndexFile(FileID FID) { |
| 1168 | if (!Opts.FileFilter) |
| 1169 | return true; |
| 1170 | auto I = FilesToIndexCache.try_emplace(Key: FID); |
| 1171 | if (I.second) |
| 1172 | I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID); |
| 1173 | return I.first->second; |
| 1174 | } |
| 1175 | |
| 1176 | static bool refIsCall(index::SymbolKind Kind) { |
| 1177 | using SK = index::SymbolKind; |
| 1178 | return Kind == SK::Function || Kind == SK::InstanceMethod || |
| 1179 | Kind == SK::ClassMethod || Kind == SK::StaticMethod || |
| 1180 | Kind == SK::Constructor || Kind == SK::Destructor || |
| 1181 | Kind == SK::ConversionFunction; |
| 1182 | } |
| 1183 | |
| 1184 | void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) { |
| 1185 | const auto &SM = ASTCtx->getSourceManager(); |
| 1186 | // FIXME: use the result to filter out references. |
| 1187 | shouldIndexFile(FID: SR.FID); |
| 1188 | if (const auto FE = SM.getFileEntryRefForID(FID: SR.FID)) { |
| 1189 | auto Range = getTokenRange(TokLoc: SR.Loc, SM, LangOpts: ASTCtx->getLangOpts()); |
| 1190 | Ref R; |
| 1191 | R.Location.Start = Range.first; |
| 1192 | R.Location.End = Range.second; |
| 1193 | R.Location.FileURI = HeaderFileURIs->toURI(FE: *FE).c_str(); |
| 1194 | R.Kind = toRefKind(Roles: SR.Roles, Spelled: SR.Spelled); |
| 1195 | if (refIsCall(Kind: SR.Kind)) { |
| 1196 | R.Kind |= RefKind::Call; |
| 1197 | } |
| 1198 | R.Container = getSymbolIDCached(D: SR.Container); |
| 1199 | Refs.insert(ID, S: R); |
| 1200 | } |
| 1201 | } |
| 1202 | |
| 1203 | SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) { |
| 1204 | auto It = DeclToIDCache.try_emplace(Key: D); |
| 1205 | if (It.second) |
| 1206 | It.first->second = getSymbolID(D); |
| 1207 | return It.first->second; |
| 1208 | } |
| 1209 | |
| 1210 | SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName, |
| 1211 | const MacroInfo *MI, |
| 1212 | const SourceManager &SM) { |
| 1213 | auto It = MacroToIDCache.try_emplace(Key: MI); |
| 1214 | if (It.second) |
| 1215 | It.first->second = getSymbolID(MacroName, MI, SM); |
| 1216 | return It.first->second; |
| 1217 | } |
| 1218 | } // namespace clangd |
| 1219 | } // namespace clang |
| 1220 | |