1 | //===--- Symbol.h ------------------------------------------------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |
10 | #define |
11 | |
12 | #include "index/SymbolID.h" |
13 | #include "index/SymbolLocation.h" |
14 | #include "index/SymbolOrigin.h" |
15 | #include "clang/Index/IndexSymbol.h" |
16 | #include "llvm/ADT/BitmaskEnum.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/Support/StringSaver.h" |
19 | |
20 | namespace clang { |
21 | namespace clangd { |
22 | |
23 | LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); |
24 | |
25 | /// The class presents a C++ symbol, e.g. class, function. |
26 | /// |
27 | /// WARNING: Symbols do not own much of their underlying data - typically |
28 | /// strings are owned by a SymbolSlab. They should be treated as non-owning |
29 | /// references. Copies are shallow. |
30 | /// |
31 | /// When adding new unowned data fields to Symbol, remember to update: |
32 | /// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. |
33 | /// - mergeSymbol in Merge.cpp, to properly combine two Symbols. |
34 | /// |
35 | /// A fully documented symbol can be split as: |
36 | /// size_type std::map<k, t>::count(const K& key) const |
37 | /// | Return | Scope |Name| Signature | |
38 | /// We split up these components to allow display flexibility later. |
39 | struct Symbol { |
40 | /// The ID of the symbol. |
41 | SymbolID ID; |
42 | /// The symbol information, like symbol kind. |
43 | index::SymbolInfo SymInfo = index::SymbolInfo(); |
44 | /// The unqualified name of the symbol, e.g. "bar" (for ns::bar). |
45 | llvm::StringRef Name; |
46 | /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace). |
47 | llvm::StringRef Scope; |
48 | /// The location of the symbol's definition, if one was found. |
49 | /// This just covers the symbol name (e.g. without class/function body). |
50 | SymbolLocation Definition; |
51 | /// The location of the preferred declaration of the symbol. |
52 | /// This just covers the symbol name. |
53 | /// This may be the same as Definition. |
54 | /// |
55 | /// A C++ symbol may have multiple declarations, and we pick one to prefer. |
56 | /// * For classes, the canonical declaration should be the definition. |
57 | /// * For non-inline functions, the canonical declaration typically appears |
58 | /// in the ".h" file corresponding to the definition. |
59 | SymbolLocation CanonicalDeclaration; |
60 | /// The number of translation units that reference this symbol from their main |
61 | /// file. This number is only meaningful if aggregated in an index. |
62 | unsigned References = 0; |
63 | /// Where this symbol came from. Usually an index provides a constant value. |
64 | SymbolOrigin Origin = SymbolOrigin::Unknown; |
65 | /// A brief description of the symbol that can be appended in the completion |
66 | /// candidate list. For example, "(X x, Y y) const" is a function signature. |
67 | /// Only set when the symbol is indexed for completion. |
68 | llvm::StringRef Signature; |
69 | /// Argument list in human-readable format, will be displayed to help |
70 | /// disambiguate between different specializations of a template. Empty for |
71 | /// non-specializations. Example: "<int, bool, 3>" |
72 | llvm::StringRef TemplateSpecializationArgs; |
73 | /// What to insert when completing this symbol, after the symbol name. |
74 | /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function). |
75 | /// (When snippets are disabled, the symbol name alone is used). |
76 | /// Only set when the symbol is indexed for completion. |
77 | llvm::StringRef CompletionSnippetSuffix; |
78 | /// Documentation including comment for the symbol declaration. |
79 | llvm::StringRef Documentation; |
80 | /// Type when this symbol is used in an expression. (Short display form). |
81 | /// e.g. return type of a function, or type of a variable. |
82 | /// Only set when the symbol is indexed for completion. |
83 | llvm::StringRef ReturnType; |
84 | |
85 | /// Raw representation of the OpaqueType of the symbol, used for scoring |
86 | /// purposes. |
87 | /// Only set when the symbol is indexed for completion. |
88 | llvm::StringRef Type; |
89 | |
90 | enum IncludeDirective : uint8_t { |
91 | Invalid = 0, |
92 | /// `#include "header.h"` |
93 | Include = 1, |
94 | /// `#import "header.h"` |
95 | Import = 2, |
96 | |
97 | LLVM_MARK_AS_BITMASK_ENUM(Import) |
98 | }; |
99 | |
100 | struct { |
101 | () = default; |
102 | |
103 | (llvm::StringRef , |
104 | uint32_t References, |
105 | IncludeDirective SupportedDirectives) |
106 | : IncludeHeader(IncludeHeader), References(References), |
107 | SupportedDirectives(SupportedDirectives) {} |
108 | |
109 | /// This can be either a URI of the header to be #include'd |
110 | /// for this symbol, or a literal header quoted with <> or "" that is |
111 | /// suitable to be included directly. When it is a URI, the exact #include |
112 | /// path needs to be calculated according to the URI scheme. |
113 | /// |
114 | /// Note that the include header is a canonical include for the symbol and |
115 | /// can be different from FileURI in the CanonicalDeclaration. |
116 | llvm::StringRef = "" ; |
117 | /// The number of translation units that reference this symbol and include |
118 | /// this header. This number is only meaningful if aggregated in an index. |
119 | uint32_t : 30; |
120 | /// Bitfield of supported directives (IncludeDirective) that can be used |
121 | /// when including this header. |
122 | uint32_t : 2; |
123 | |
124 | IncludeDirective () const { |
125 | return static_cast<IncludeDirective>(SupportedDirectives); |
126 | } |
127 | }; |
128 | /// One Symbol can potentially be included via different headers. |
129 | /// - If we haven't seen a definition, this covers all declarations. |
130 | /// - If we have seen a definition, this covers declarations visible from |
131 | /// any definition. |
132 | /// Only set when the symbol is indexed for completion. |
133 | llvm::SmallVector<IncludeHeaderWithReferences, 1> ; |
134 | |
135 | enum SymbolFlag : uint8_t { |
136 | None = 0, |
137 | /// Whether or not this symbol is meant to be used for the code completion. |
138 | /// See also isIndexedForCodeCompletion(). |
139 | /// Note that we don't store completion information (signature, snippet, |
140 | /// type, includes) if the symbol is not indexed for code completion. |
141 | IndexedForCodeCompletion = 1 << 0, |
142 | /// Indicates if the symbol is deprecated. |
143 | Deprecated = 1 << 1, |
144 | /// Symbol is an implementation detail. |
145 | ImplementationDetail = 1 << 2, |
146 | /// Symbol is visible to other files (not e.g. a static helper function). |
147 | VisibleOutsideFile = 1 << 3, |
148 | /// Symbol has an attached documentation comment. |
149 | = 1 << 4 |
150 | }; |
151 | SymbolFlag Flags = SymbolFlag::None; |
152 | |
153 | /// FIXME: also add deprecation message and fixit? |
154 | }; |
155 | |
156 | inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, |
157 | Symbol::SymbolFlag B) { |
158 | return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) | |
159 | static_cast<uint8_t>(B)); |
160 | } |
161 | inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, |
162 | Symbol::SymbolFlag B) { |
163 | return A = A | B; |
164 | } |
165 | |
166 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); |
167 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag); |
168 | |
169 | /// Invokes Callback with each StringRef& contained in the Symbol. |
170 | /// Useful for deduplicating backing strings. |
171 | template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) { |
172 | CB(S.Name); |
173 | CB(S.Scope); |
174 | CB(S.TemplateSpecializationArgs); |
175 | CB(S.Signature); |
176 | CB(S.CompletionSnippetSuffix); |
177 | CB(S.Documentation); |
178 | CB(S.ReturnType); |
179 | CB(S.Type); |
180 | auto RawCharPointerCB = [&CB](const char *&P) { |
181 | llvm::StringRef S(P); |
182 | CB(S); |
183 | assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated" ); |
184 | P = S.data(); |
185 | }; |
186 | RawCharPointerCB(S.CanonicalDeclaration.FileURI); |
187 | RawCharPointerCB(S.Definition.FileURI); |
188 | |
189 | for (auto &Include : S.IncludeHeaders) |
190 | CB(Include.IncludeHeader); |
191 | } |
192 | |
193 | /// Computes query-independent quality score for a Symbol. |
194 | /// This currently falls in the range [1, ln(#indexed documents)]. |
195 | /// FIXME: this should probably be split into symbol -> signals |
196 | /// and signals -> score, so it can be reused for Sema completions. |
197 | float quality(const Symbol &S); |
198 | |
199 | /// An immutable symbol container that stores a set of symbols. |
200 | /// The container will maintain the lifetime of the symbols. |
201 | class SymbolSlab { |
202 | public: |
203 | using const_iterator = std::vector<Symbol>::const_iterator; |
204 | using iterator = const_iterator; |
205 | using value_type = Symbol; |
206 | |
207 | SymbolSlab() = default; |
208 | |
209 | const_iterator begin() const { return Symbols.begin(); } |
210 | const_iterator end() const { return Symbols.end(); } |
211 | const_iterator find(const SymbolID &SymID) const; |
212 | |
213 | using size_type = size_t; |
214 | size_type size() const { return Symbols.size(); } |
215 | bool empty() const { return Symbols.empty(); } |
216 | // Estimates the total memory usage. |
217 | size_t bytes() const { |
218 | return sizeof(*this) + Arena.getTotalMemory() + |
219 | Symbols.capacity() * sizeof(Symbol); |
220 | } |
221 | |
222 | /// SymbolSlab::Builder is a mutable container that can 'freeze' to |
223 | /// SymbolSlab. The frozen SymbolSlab will use less memory. |
224 | class Builder { |
225 | public: |
226 | Builder() : UniqueStrings(Arena) {} |
227 | |
228 | /// Adds a symbol, overwriting any existing one with the same ID. |
229 | /// This is a deep copy: underlying strings will be owned by the slab. |
230 | void insert(const Symbol &S); |
231 | |
232 | /// Removes the symbol with an ID, if it exists. |
233 | void erase(const SymbolID &ID) { Symbols.erase(Val: ID); } |
234 | |
235 | /// Returns the symbol with an ID, if it exists. Valid until insert/remove. |
236 | const Symbol *find(const SymbolID &ID) { |
237 | auto I = Symbols.find(Val: ID); |
238 | return I == Symbols.end() ? nullptr : &I->second; |
239 | } |
240 | |
241 | /// Consumes the builder to finalize the slab. |
242 | SymbolSlab build() &&; |
243 | |
244 | private: |
245 | llvm::BumpPtrAllocator Arena; |
246 | /// Intern table for strings. Contents are on the arena. |
247 | llvm::UniqueStringSaver UniqueStrings; |
248 | /// Values are indices into Symbols vector. |
249 | llvm::DenseMap<SymbolID, Symbol> Symbols; |
250 | }; |
251 | |
252 | private: |
253 | SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols) |
254 | : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} |
255 | |
256 | llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. |
257 | std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup. |
258 | }; |
259 | |
260 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolSlab &Slab); |
261 | |
262 | } // namespace clangd |
263 | } // namespace clang |
264 | |
265 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |
266 | |