1 | //===--- Symbol.h ------------------------------------------------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |
10 | #define |
11 | |
12 | #include "index/SymbolID.h" |
13 | #include "index/SymbolLocation.h" |
14 | #include "index/SymbolOrigin.h" |
15 | #include "clang/Index/IndexSymbol.h" |
16 | #include "llvm/ADT/BitmaskEnum.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/Support/StringSaver.h" |
19 | |
20 | namespace clang { |
21 | namespace clangd { |
22 | |
23 | LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); |
24 | |
25 | /// The class presents a C++ symbol, e.g. class, function. |
26 | /// |
27 | /// WARNING: Symbols do not own much of their underlying data - typically |
28 | /// strings are owned by a SymbolSlab. They should be treated as non-owning |
29 | /// references. Copies are shallow. |
30 | /// |
31 | /// When adding new unowned data fields to Symbol, remember to update: |
32 | /// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. |
33 | /// - mergeSymbol in Merge.cpp, to properly combine two Symbols. |
34 | /// |
35 | /// A fully documented symbol can be split as: |
36 | /// size_type std::map<k, t>::count(const K& key) const |
37 | /// | Return | Scope |Name| Signature | |
38 | /// We split up these components to allow display flexibility later. |
39 | struct Symbol { |
40 | /// The ID of the symbol. |
41 | SymbolID ID; |
42 | /// The symbol information, like symbol kind. |
43 | index::SymbolInfo SymInfo = index::SymbolInfo(); |
44 | /// The unqualified name of the symbol, e.g. "bar" (for ns::bar). |
45 | llvm::StringRef Name; |
46 | /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace). |
47 | llvm::StringRef Scope; |
48 | /// The location of the symbol's definition, if one was found. |
49 | /// This just covers the symbol name (e.g. without class/function body). |
50 | SymbolLocation Definition; |
51 | /// The location of the preferred declaration of the symbol. |
52 | /// This just covers the symbol name. |
53 | /// This may be the same as Definition. |
54 | /// |
55 | /// A C++ symbol may have multiple declarations, and we pick one to prefer. |
56 | /// * For classes, the canonical declaration should be the definition. |
57 | /// * For non-inline functions, the canonical declaration typically appears |
58 | /// in the ".h" file corresponding to the definition. |
59 | SymbolLocation CanonicalDeclaration; |
60 | /// The number of translation units that reference this symbol from their main |
61 | /// file. This number is only meaningful if aggregated in an index. |
62 | unsigned References = 0; |
63 | /// Where this symbol came from. Usually an index provides a constant value. |
64 | SymbolOrigin Origin = SymbolOrigin::Unknown; |
65 | /// A brief description of the symbol that can be appended in the completion |
66 | /// candidate list. For example, "(X x, Y y) const" is a function signature. |
67 | /// Only set when the symbol is indexed for completion. |
68 | llvm::StringRef Signature; |
69 | /// Argument list in human-readable format, will be displayed to help |
70 | /// disambiguate between different specializations of a template. Empty for |
71 | /// non-specializations. Example: "<int, bool, 3>" |
72 | llvm::StringRef TemplateSpecializationArgs; |
73 | /// What to insert when completing this symbol, after the symbol name. |
74 | /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function). |
75 | /// (When snippets are disabled, the symbol name alone is used). |
76 | /// Only set when the symbol is indexed for completion. |
77 | llvm::StringRef CompletionSnippetSuffix; |
78 | /// Documentation including comment for the symbol declaration. |
79 | llvm::StringRef Documentation; |
80 | /// Type when this symbol is used in an expression. (Short display form). |
81 | /// e.g. return type of a function, or type of a variable. |
82 | /// Only set when the symbol is indexed for completion. |
83 | llvm::StringRef ReturnType; |
84 | |
85 | /// Raw representation of the OpaqueType of the symbol, used for scoring |
86 | /// purposes. |
87 | /// Only set when the symbol is indexed for completion. |
88 | llvm::StringRef Type; |
89 | |
90 | enum IncludeDirective : uint8_t { |
91 | Invalid = 0, |
92 | /// `#include "header.h"` |
93 | Include = 1, |
94 | /// `#import "header.h"` |
95 | Import = 2, |
96 | |
97 | LLVM_MARK_AS_BITMASK_ENUM(Import) |
98 | }; |
99 | |
100 | struct { |
101 | () = default; |
102 | |
103 | (llvm::StringRef , |
104 | uint32_t References, |
105 | IncludeDirective SupportedDirectives) |
106 | : IncludeHeader(IncludeHeader), References(References), |
107 | SupportedDirectives(SupportedDirectives) {} |
108 | |
109 | /// This can be either a URI of the header to be #include'd |
110 | /// for this symbol, or a literal header quoted with <> or "" that is |
111 | /// suitable to be included directly. When it is a URI, the exact #include |
112 | /// path needs to be calculated according to the URI scheme. |
113 | /// |
114 | /// Note that the include header is a canonical include for the symbol and |
115 | /// can be different from FileURI in the CanonicalDeclaration. |
116 | llvm::StringRef = "" ; |
117 | /// The number of translation units that reference this symbol and include |
118 | /// this header. This number is only meaningful if aggregated in an index. |
119 | uint32_t : 30; |
120 | /// Bitfield of supported directives (IncludeDirective) that can be used |
121 | /// when including this header. |
122 | uint32_t : 2; |
123 | |
124 | IncludeDirective () const { |
125 | return static_cast<IncludeDirective>(SupportedDirectives); |
126 | } |
127 | }; |
128 | /// One Symbol can potentially be included via different headers. |
129 | /// - If we haven't seen a definition, this covers all declarations. |
130 | /// - If we have seen a definition, this covers declarations visible from |
131 | /// any definition. |
132 | /// Only set when the symbol is indexed for completion. |
133 | llvm::SmallVector<IncludeHeaderWithReferences, 1> ; |
134 | |
135 | enum SymbolFlag : uint8_t { |
136 | None = 0, |
137 | /// Whether or not this symbol is meant to be used for the code completion. |
138 | /// See also isIndexedForCodeCompletion(). |
139 | /// Note that we don't store completion information (signature, snippet, |
140 | /// type, includes) if the symbol is not indexed for code completion. |
141 | IndexedForCodeCompletion = 1 << 0, |
142 | /// Indicates if the symbol is deprecated. |
143 | Deprecated = 1 << 1, |
144 | /// Symbol is an implementation detail. |
145 | ImplementationDetail = 1 << 2, |
146 | /// Symbol is visible to other files (not e.g. a static helper function). |
147 | VisibleOutsideFile = 1 << 3, |
148 | }; |
149 | |
150 | SymbolFlag Flags = SymbolFlag::None; |
151 | /// FIXME: also add deprecation message and fixit? |
152 | }; |
153 | |
154 | inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, |
155 | Symbol::SymbolFlag B) { |
156 | return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) | |
157 | static_cast<uint8_t>(B)); |
158 | } |
159 | inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, |
160 | Symbol::SymbolFlag B) { |
161 | return A = A | B; |
162 | } |
163 | |
164 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); |
165 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag); |
166 | |
167 | /// Invokes Callback with each StringRef& contained in the Symbol. |
168 | /// Useful for deduplicating backing strings. |
169 | template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) { |
170 | CB(S.Name); |
171 | CB(S.Scope); |
172 | CB(S.TemplateSpecializationArgs); |
173 | CB(S.Signature); |
174 | CB(S.CompletionSnippetSuffix); |
175 | CB(S.Documentation); |
176 | CB(S.ReturnType); |
177 | CB(S.Type); |
178 | auto RawCharPointerCB = [&CB](const char *&P) { |
179 | llvm::StringRef S(P); |
180 | CB(S); |
181 | assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated" ); |
182 | P = S.data(); |
183 | }; |
184 | RawCharPointerCB(S.CanonicalDeclaration.FileURI); |
185 | RawCharPointerCB(S.Definition.FileURI); |
186 | |
187 | for (auto &Include : S.IncludeHeaders) |
188 | CB(Include.IncludeHeader); |
189 | } |
190 | |
191 | /// Computes query-independent quality score for a Symbol. |
192 | /// This currently falls in the range [1, ln(#indexed documents)]. |
193 | /// FIXME: this should probably be split into symbol -> signals |
194 | /// and signals -> score, so it can be reused for Sema completions. |
195 | float quality(const Symbol &S); |
196 | |
197 | /// An immutable symbol container that stores a set of symbols. |
198 | /// The container will maintain the lifetime of the symbols. |
199 | class SymbolSlab { |
200 | public: |
201 | using const_iterator = std::vector<Symbol>::const_iterator; |
202 | using iterator = const_iterator; |
203 | using value_type = Symbol; |
204 | |
205 | SymbolSlab() = default; |
206 | |
207 | const_iterator begin() const { return Symbols.begin(); } |
208 | const_iterator end() const { return Symbols.end(); } |
209 | const_iterator find(const SymbolID &SymID) const; |
210 | |
211 | using size_type = size_t; |
212 | size_type size() const { return Symbols.size(); } |
213 | bool empty() const { return Symbols.empty(); } |
214 | // Estimates the total memory usage. |
215 | size_t bytes() const { |
216 | return sizeof(*this) + Arena.getTotalMemory() + |
217 | Symbols.capacity() * sizeof(Symbol); |
218 | } |
219 | |
220 | /// SymbolSlab::Builder is a mutable container that can 'freeze' to |
221 | /// SymbolSlab. The frozen SymbolSlab will use less memory. |
222 | class Builder { |
223 | public: |
224 | Builder() : UniqueStrings(Arena) {} |
225 | |
226 | /// Adds a symbol, overwriting any existing one with the same ID. |
227 | /// This is a deep copy: underlying strings will be owned by the slab. |
228 | void insert(const Symbol &S); |
229 | |
230 | /// Removes the symbol with an ID, if it exists. |
231 | void erase(const SymbolID &ID) { Symbols.erase(Val: ID); } |
232 | |
233 | /// Returns the symbol with an ID, if it exists. Valid until insert/remove. |
234 | const Symbol *find(const SymbolID &ID) { |
235 | auto I = Symbols.find(Val: ID); |
236 | return I == Symbols.end() ? nullptr : &I->second; |
237 | } |
238 | |
239 | /// Consumes the builder to finalize the slab. |
240 | SymbolSlab build() &&; |
241 | |
242 | private: |
243 | llvm::BumpPtrAllocator Arena; |
244 | /// Intern table for strings. Contents are on the arena. |
245 | llvm::UniqueStringSaver UniqueStrings; |
246 | /// Values are indices into Symbols vector. |
247 | llvm::DenseMap<SymbolID, Symbol> Symbols; |
248 | }; |
249 | |
250 | private: |
251 | SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols) |
252 | : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} |
253 | |
254 | llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. |
255 | std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup. |
256 | }; |
257 | |
258 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolSlab &Slab); |
259 | |
260 | } // namespace clangd |
261 | } // namespace clang |
262 | |
263 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |
264 | |