| 1 | //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// Token objects represent a characteristic of a symbol, which can be used to |
| 11 | /// perform efficient search. Tokens are keys for inverted index which are |
| 12 | /// mapped to the corresponding posting lists. |
| 13 | /// |
| 14 | /// The symbol std::cout might have the tokens: |
| 15 | /// * Scope "std::" |
| 16 | /// * Trigram "cou" |
| 17 | /// * Trigram "out" |
| 18 | /// * Type "std::ostream" |
| 19 | /// |
| 20 | //===----------------------------------------------------------------------===// |
| 21 | |
| 22 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H |
| 23 | #define |
| 24 | |
| 25 | #include "llvm/ADT/Hashing.h" |
| 26 | #include "llvm/Support/raw_ostream.h" |
| 27 | #include <string> |
| 28 | #include <vector> |
| 29 | |
| 30 | namespace clang { |
| 31 | namespace clangd { |
| 32 | namespace dex { |
| 33 | |
| 34 | /// A Token represents an attribute of a symbol, such as a particular trigram |
| 35 | /// present in the name (used for fuzzy search). |
| 36 | /// |
| 37 | /// Tokens can be used to perform more sophisticated search queries by |
| 38 | /// constructing complex iterator trees. |
| 39 | class Token { |
| 40 | public: |
| 41 | /// Kind specifies Token type which defines semantics for the internal |
| 42 | /// representation. Each Kind has different representation stored in Data |
| 43 | /// field. |
| 44 | // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw |
| 45 | // strings. For example, PathURI store URIs of each directory and its parents, |
| 46 | // which induces a lot of overhead because these paths tend to be long and |
| 47 | // each parent directory is a prefix. |
| 48 | enum class Kind { |
| 49 | /// Represents trigram used for fuzzy search of unqualified symbol names. |
| 50 | /// |
| 51 | /// Data contains 3 bytes with trigram contents. |
| 52 | Trigram, |
| 53 | /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". |
| 54 | /// |
| 55 | /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global |
| 56 | /// scope). |
| 57 | Scope, |
| 58 | /// Path Proximity URI to symbol declaration. |
| 59 | /// |
| 60 | /// Data stores path URI of symbol declaration file or its parent. |
| 61 | /// |
| 62 | /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" |
| 63 | /// and some amount of its parents. |
| 64 | ProximityURI, |
| 65 | /// Type of symbol (see `Symbol::Type`). |
| 66 | Type, |
| 67 | /// Internal Token type for invalid/special tokens, e.g. empty tokens for |
| 68 | /// llvm::DenseMap. |
| 69 | Sentinel, |
| 70 | }; |
| 71 | |
| 72 | Token(Kind TokenKind, llvm::StringRef Data) |
| 73 | : Data(Data), TokenKind(TokenKind) {} |
| 74 | |
| 75 | bool operator==(const Token &Other) const { |
| 76 | return TokenKind == Other.TokenKind && Data == Other.Data; |
| 77 | } |
| 78 | |
| 79 | friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) { |
| 80 | switch (T.TokenKind) { |
| 81 | case Kind::Trigram: |
| 82 | OS << "T=" ; |
| 83 | break; |
| 84 | case Kind::Scope: |
| 85 | OS << "S=" ; |
| 86 | break; |
| 87 | case Kind::ProximityURI: |
| 88 | OS << "U=" ; |
| 89 | break; |
| 90 | case Kind::Type: |
| 91 | OS << "Ty=" ; |
| 92 | break; |
| 93 | case Kind::Sentinel: |
| 94 | OS << "?=" ; |
| 95 | break; |
| 96 | } |
| 97 | return OS << T.Data; |
| 98 | } |
| 99 | |
| 100 | private: |
| 101 | /// Representation which is unique among Token with the same Kind. |
| 102 | std::string Data; |
| 103 | Kind TokenKind; |
| 104 | |
| 105 | friend llvm::hash_code hash_value(const Token &Token) { |
| 106 | return llvm::hash_combine(args: static_cast<int>(Token.TokenKind), args: Token.Data); |
| 107 | } |
| 108 | }; |
| 109 | |
| 110 | } // namespace dex |
| 111 | } // namespace clangd |
| 112 | } // namespace clang |
| 113 | |
| 114 | namespace llvm { |
| 115 | |
| 116 | // Support Tokens as DenseMap keys. |
| 117 | template <> struct DenseMapInfo<clang::clangd::dex::Token> { |
| 118 | static inline clang::clangd::dex::Token getEmptyKey() { |
| 119 | return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey" }; |
| 120 | } |
| 121 | |
| 122 | static inline clang::clangd::dex::Token getTombstoneKey() { |
| 123 | return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey" }; |
| 124 | } |
| 125 | |
| 126 | static unsigned getHashValue(const clang::clangd::dex::Token &Tag) { |
| 127 | return hash_value(Token: Tag); |
| 128 | } |
| 129 | |
| 130 | static bool isEqual(const clang::clangd::dex::Token &LHS, |
| 131 | const clang::clangd::dex::Token &RHS) { |
| 132 | return LHS == RHS; |
| 133 | } |
| 134 | }; |
| 135 | |
| 136 | } // namespace llvm |
| 137 | |
| 138 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H |
| 139 | |