| 1 | //===--- Symbol.h ------------------------------------------------*- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |
| 10 | #define |
| 11 | |
| 12 | #include "index/SymbolID.h" |
| 13 | #include "index/SymbolLocation.h" |
| 14 | #include "index/SymbolOrigin.h" |
| 15 | #include "clang/Index/IndexSymbol.h" |
| 16 | #include "llvm/ADT/BitmaskEnum.h" |
| 17 | #include "llvm/ADT/StringRef.h" |
| 18 | #include "llvm/Support/StringSaver.h" |
| 19 | |
| 20 | namespace clang { |
| 21 | namespace clangd { |
| 22 | |
| 23 | LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); |
| 24 | |
| 25 | /// The class presents a C++ symbol, e.g. class, function. |
| 26 | /// |
| 27 | /// WARNING: Symbols do not own much of their underlying data - typically |
| 28 | /// strings are owned by a SymbolSlab. They should be treated as non-owning |
| 29 | /// references. Copies are shallow. |
| 30 | /// |
| 31 | /// When adding new unowned data fields to Symbol, remember to update: |
| 32 | /// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. |
| 33 | /// - mergeSymbol in Merge.cpp, to properly combine two Symbols. |
| 34 | /// |
| 35 | /// A fully documented symbol can be split as: |
| 36 | /// size_type std::map<k, t>::count(const K& key) const |
| 37 | /// | Return | Scope |Name| Signature | |
| 38 | /// We split up these components to allow display flexibility later. |
| 39 | struct Symbol { |
| 40 | /// The ID of the symbol. |
| 41 | SymbolID ID; |
| 42 | /// The symbol information, like symbol kind. |
| 43 | index::SymbolInfo SymInfo = index::SymbolInfo(); |
| 44 | /// The unqualified name of the symbol, e.g. "bar" (for ns::bar). |
| 45 | llvm::StringRef Name; |
| 46 | /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace). |
| 47 | llvm::StringRef Scope; |
| 48 | /// The location of the symbol's definition, if one was found. |
| 49 | /// This just covers the symbol name (e.g. without class/function body). |
| 50 | SymbolLocation Definition; |
| 51 | /// The location of the preferred declaration of the symbol. |
| 52 | /// This just covers the symbol name. |
| 53 | /// This may be the same as Definition. |
| 54 | /// |
| 55 | /// A C++ symbol may have multiple declarations, and we pick one to prefer. |
| 56 | /// * For classes, the canonical declaration should be the definition. |
| 57 | /// * For non-inline functions, the canonical declaration typically appears |
| 58 | /// in the ".h" file corresponding to the definition. |
| 59 | SymbolLocation CanonicalDeclaration; |
| 60 | /// The number of translation units that reference this symbol from their main |
| 61 | /// file. This number is only meaningful if aggregated in an index. |
| 62 | unsigned References = 0; |
| 63 | /// Where this symbol came from. Usually an index provides a constant value. |
| 64 | SymbolOrigin Origin = SymbolOrigin::Unknown; |
| 65 | /// A brief description of the symbol that can be appended in the completion |
| 66 | /// candidate list. For example, "(X x, Y y) const" is a function signature. |
| 67 | /// Only set when the symbol is indexed for completion. |
| 68 | llvm::StringRef Signature; |
| 69 | /// Argument list in human-readable format, will be displayed to help |
| 70 | /// disambiguate between different specializations of a template. Empty for |
| 71 | /// non-specializations. Example: "<int, bool, 3>" |
| 72 | llvm::StringRef TemplateSpecializationArgs; |
| 73 | /// What to insert when completing this symbol, after the symbol name. |
| 74 | /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function). |
| 75 | /// (When snippets are disabled, the symbol name alone is used). |
| 76 | /// Only set when the symbol is indexed for completion. |
| 77 | llvm::StringRef CompletionSnippetSuffix; |
| 78 | /// Documentation including comment for the symbol declaration. |
| 79 | llvm::StringRef Documentation; |
| 80 | /// Type when this symbol is used in an expression. (Short display form). |
| 81 | /// e.g. return type of a function, or type of a variable. |
| 82 | /// Only set when the symbol is indexed for completion. |
| 83 | llvm::StringRef ReturnType; |
| 84 | |
| 85 | /// Raw representation of the OpaqueType of the symbol, used for scoring |
| 86 | /// purposes. |
| 87 | /// Only set when the symbol is indexed for completion. |
| 88 | llvm::StringRef Type; |
| 89 | |
| 90 | enum IncludeDirective : uint8_t { |
| 91 | Invalid = 0, |
| 92 | /// `#include "header.h"` |
| 93 | Include = 1, |
| 94 | /// `#import "header.h"` |
| 95 | Import = 2, |
| 96 | |
| 97 | LLVM_MARK_AS_BITMASK_ENUM(Import) |
| 98 | }; |
| 99 | |
| 100 | struct { |
| 101 | () = default; |
| 102 | |
| 103 | (llvm::StringRef , |
| 104 | uint32_t References, |
| 105 | IncludeDirective SupportedDirectives) |
| 106 | : IncludeHeader(IncludeHeader), References(References), |
| 107 | SupportedDirectives(SupportedDirectives) {} |
| 108 | |
| 109 | /// This can be either a URI of the header to be #include'd |
| 110 | /// for this symbol, or a literal header quoted with <> or "" that is |
| 111 | /// suitable to be included directly. When it is a URI, the exact #include |
| 112 | /// path needs to be calculated according to the URI scheme. |
| 113 | /// |
| 114 | /// Note that the include header is a canonical include for the symbol and |
| 115 | /// can be different from FileURI in the CanonicalDeclaration. |
| 116 | llvm::StringRef = "" ; |
| 117 | /// The number of translation units that reference this symbol and include |
| 118 | /// this header. This number is only meaningful if aggregated in an index. |
| 119 | uint32_t : 30; |
| 120 | /// Bitfield of supported directives (IncludeDirective) that can be used |
| 121 | /// when including this header. |
| 122 | uint32_t : 2; |
| 123 | |
| 124 | IncludeDirective () const { |
| 125 | return static_cast<IncludeDirective>(SupportedDirectives); |
| 126 | } |
| 127 | }; |
| 128 | /// One Symbol can potentially be included via different headers. |
| 129 | /// - If we haven't seen a definition, this covers all declarations. |
| 130 | /// - If we have seen a definition, this covers declarations visible from |
| 131 | /// any definition. |
| 132 | /// Only set when the symbol is indexed for completion. |
| 133 | llvm::SmallVector<IncludeHeaderWithReferences, 1> ; |
| 134 | |
| 135 | enum SymbolFlag : uint8_t { |
| 136 | None = 0, |
| 137 | /// Whether or not this symbol is meant to be used for the code completion. |
| 138 | /// See also isIndexedForCodeCompletion(). |
| 139 | /// Note that we don't store completion information (signature, snippet, |
| 140 | /// type, includes) if the symbol is not indexed for code completion. |
| 141 | IndexedForCodeCompletion = 1 << 0, |
| 142 | /// Indicates if the symbol is deprecated. |
| 143 | Deprecated = 1 << 1, |
| 144 | /// Symbol is an implementation detail. |
| 145 | ImplementationDetail = 1 << 2, |
| 146 | /// Symbol is visible to other files (not e.g. a static helper function). |
| 147 | VisibleOutsideFile = 1 << 3, |
| 148 | /// Symbol has an attached documentation comment. |
| 149 | = 1 << 4 |
| 150 | }; |
| 151 | SymbolFlag Flags = SymbolFlag::None; |
| 152 | |
| 153 | /// FIXME: also add deprecation message and fixit? |
| 154 | }; |
| 155 | |
| 156 | inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, |
| 157 | Symbol::SymbolFlag B) { |
| 158 | return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) | |
| 159 | static_cast<uint8_t>(B)); |
| 160 | } |
| 161 | inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, |
| 162 | Symbol::SymbolFlag B) { |
| 163 | return A = A | B; |
| 164 | } |
| 165 | |
| 166 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); |
| 167 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag); |
| 168 | |
| 169 | /// Invokes Callback with each StringRef& contained in the Symbol. |
| 170 | /// Useful for deduplicating backing strings. |
| 171 | template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) { |
| 172 | CB(S.Name); |
| 173 | CB(S.Scope); |
| 174 | CB(S.TemplateSpecializationArgs); |
| 175 | CB(S.Signature); |
| 176 | CB(S.CompletionSnippetSuffix); |
| 177 | CB(S.Documentation); |
| 178 | CB(S.ReturnType); |
| 179 | CB(S.Type); |
| 180 | auto RawCharPointerCB = [&CB](const char *&P) { |
| 181 | llvm::StringRef S(P); |
| 182 | CB(S); |
| 183 | assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated" ); |
| 184 | P = S.data(); |
| 185 | }; |
| 186 | RawCharPointerCB(S.CanonicalDeclaration.FileURI); |
| 187 | RawCharPointerCB(S.Definition.FileURI); |
| 188 | |
| 189 | for (auto &Include : S.IncludeHeaders) |
| 190 | CB(Include.IncludeHeader); |
| 191 | } |
| 192 | |
| 193 | /// Computes query-independent quality score for a Symbol. |
| 194 | /// This currently falls in the range [1, ln(#indexed documents)]. |
| 195 | /// FIXME: this should probably be split into symbol -> signals |
| 196 | /// and signals -> score, so it can be reused for Sema completions. |
| 197 | float quality(const Symbol &S); |
| 198 | |
| 199 | /// An immutable symbol container that stores a set of symbols. |
| 200 | /// The container will maintain the lifetime of the symbols. |
| 201 | class SymbolSlab { |
| 202 | public: |
| 203 | using const_iterator = std::vector<Symbol>::const_iterator; |
| 204 | using iterator = const_iterator; |
| 205 | using value_type = Symbol; |
| 206 | |
| 207 | SymbolSlab() = default; |
| 208 | |
| 209 | const_iterator begin() const { return Symbols.begin(); } |
| 210 | const_iterator end() const { return Symbols.end(); } |
| 211 | const_iterator find(const SymbolID &SymID) const; |
| 212 | |
| 213 | using size_type = size_t; |
| 214 | size_type size() const { return Symbols.size(); } |
| 215 | bool empty() const { return Symbols.empty(); } |
| 216 | // Estimates the total memory usage. |
| 217 | size_t bytes() const { |
| 218 | return sizeof(*this) + Arena.getTotalMemory() + |
| 219 | Symbols.capacity() * sizeof(Symbol); |
| 220 | } |
| 221 | |
| 222 | /// SymbolSlab::Builder is a mutable container that can 'freeze' to |
| 223 | /// SymbolSlab. The frozen SymbolSlab will use less memory. |
| 224 | class Builder { |
| 225 | public: |
| 226 | Builder() : UniqueStrings(Arena) {} |
| 227 | |
| 228 | /// Adds a symbol, overwriting any existing one with the same ID. |
| 229 | /// This is a deep copy: underlying strings will be owned by the slab. |
| 230 | void insert(const Symbol &S); |
| 231 | |
| 232 | /// Removes the symbol with an ID, if it exists. |
| 233 | void erase(const SymbolID &ID) { Symbols.erase(Val: ID); } |
| 234 | |
| 235 | /// Returns the symbol with an ID, if it exists. Valid until insert/remove. |
| 236 | const Symbol *find(const SymbolID &ID) { |
| 237 | auto I = Symbols.find(Val: ID); |
| 238 | return I == Symbols.end() ? nullptr : &I->second; |
| 239 | } |
| 240 | |
| 241 | /// Consumes the builder to finalize the slab. |
| 242 | SymbolSlab build() &&; |
| 243 | |
| 244 | private: |
| 245 | llvm::BumpPtrAllocator Arena; |
| 246 | /// Intern table for strings. Contents are on the arena. |
| 247 | llvm::UniqueStringSaver UniqueStrings; |
| 248 | /// Values are indices into Symbols vector. |
| 249 | llvm::DenseMap<SymbolID, Symbol> Symbols; |
| 250 | }; |
| 251 | |
| 252 | private: |
| 253 | SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols) |
| 254 | : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} |
| 255 | |
| 256 | llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. |
| 257 | std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup. |
| 258 | }; |
| 259 | |
| 260 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolSlab &Slab); |
| 261 | |
| 262 | } // namespace clangd |
| 263 | } // namespace clang |
| 264 | |
| 265 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |
| 266 | |