1 | //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Token objects represent a characteristic of a symbol, which can be used to |
11 | /// perform efficient search. Tokens are keys for inverted index which are |
12 | /// mapped to the corresponding posting lists. |
13 | /// |
14 | /// The symbol std::cout might have the tokens: |
15 | /// * Scope "std::" |
16 | /// * Trigram "cou" |
17 | /// * Trigram "out" |
18 | /// * Type "std::ostream" |
19 | /// |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H |
23 | #define |
24 | |
25 | #include "llvm/ADT/Hashing.h" |
26 | #include "llvm/Support/raw_ostream.h" |
27 | #include <string> |
28 | #include <vector> |
29 | |
30 | namespace clang { |
31 | namespace clangd { |
32 | namespace dex { |
33 | |
34 | /// A Token represents an attribute of a symbol, such as a particular trigram |
35 | /// present in the name (used for fuzzy search). |
36 | /// |
37 | /// Tokens can be used to perform more sophisticated search queries by |
38 | /// constructing complex iterator trees. |
39 | class Token { |
40 | public: |
41 | /// Kind specifies Token type which defines semantics for the internal |
42 | /// representation. Each Kind has different representation stored in Data |
43 | /// field. |
44 | // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw |
45 | // strings. For example, PathURI store URIs of each directory and its parents, |
46 | // which induces a lot of overhead because these paths tend to be long and |
47 | // each parent directory is a prefix. |
48 | enum class Kind { |
49 | /// Represents trigram used for fuzzy search of unqualified symbol names. |
50 | /// |
51 | /// Data contains 3 bytes with trigram contents. |
52 | Trigram, |
53 | /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". |
54 | /// |
55 | /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global |
56 | /// scope). |
57 | Scope, |
58 | /// Path Proximity URI to symbol declaration. |
59 | /// |
60 | /// Data stores path URI of symbol declaration file or its parent. |
61 | /// |
62 | /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" |
63 | /// and some amount of its parents. |
64 | ProximityURI, |
65 | /// Type of symbol (see `Symbol::Type`). |
66 | Type, |
67 | /// Internal Token type for invalid/special tokens, e.g. empty tokens for |
68 | /// llvm::DenseMap. |
69 | Sentinel, |
70 | }; |
71 | |
72 | Token(Kind TokenKind, llvm::StringRef Data) |
73 | : Data(Data), TokenKind(TokenKind) {} |
74 | |
75 | bool operator==(const Token &Other) const { |
76 | return TokenKind == Other.TokenKind && Data == Other.Data; |
77 | } |
78 | |
79 | friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) { |
80 | switch (T.TokenKind) { |
81 | case Kind::Trigram: |
82 | OS << "T=" ; |
83 | break; |
84 | case Kind::Scope: |
85 | OS << "S=" ; |
86 | break; |
87 | case Kind::ProximityURI: |
88 | OS << "U=" ; |
89 | break; |
90 | case Kind::Type: |
91 | OS << "Ty=" ; |
92 | break; |
93 | case Kind::Sentinel: |
94 | OS << "?=" ; |
95 | break; |
96 | } |
97 | return OS << T.Data; |
98 | } |
99 | |
100 | private: |
101 | /// Representation which is unique among Token with the same Kind. |
102 | std::string Data; |
103 | Kind TokenKind; |
104 | |
105 | friend llvm::hash_code hash_value(const Token &Token) { |
106 | return llvm::hash_combine(args: static_cast<int>(Token.TokenKind), args: Token.Data); |
107 | } |
108 | }; |
109 | |
110 | } // namespace dex |
111 | } // namespace clangd |
112 | } // namespace clang |
113 | |
114 | namespace llvm { |
115 | |
116 | // Support Tokens as DenseMap keys. |
117 | template <> struct DenseMapInfo<clang::clangd::dex::Token> { |
118 | static inline clang::clangd::dex::Token getEmptyKey() { |
119 | return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey" }; |
120 | } |
121 | |
122 | static inline clang::clangd::dex::Token getTombstoneKey() { |
123 | return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey" }; |
124 | } |
125 | |
126 | static unsigned getHashValue(const clang::clangd::dex::Token &Tag) { |
127 | return hash_value(Token: Tag); |
128 | } |
129 | |
130 | static bool isEqual(const clang::clangd::dex::Token &LHS, |
131 | const clang::clangd::dex::Token &RHS) { |
132 | return LHS == RHS; |
133 | } |
134 | }; |
135 | |
136 | } // namespace llvm |
137 | |
138 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H |
139 | |