1 | //===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "FuzzySymbolIndex.h" |
9 | #include "llvm/Support/Regex.h" |
10 | |
11 | using clang::find_all_symbols::SymbolAndSignals; |
12 | using llvm::StringRef; |
13 | |
14 | namespace clang { |
15 | namespace include_fixer { |
16 | namespace { |
17 | |
18 | class MemSymbolIndex : public FuzzySymbolIndex { |
19 | public: |
20 | MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) { |
21 | for (auto &Symbol : Symbols) { |
22 | auto Tokens = tokenize(Text: Symbol.Symbol.getName()); |
23 | this->Symbols.emplace_back( |
24 | args: StringRef(llvm::join(Begin: Tokens.begin(), End: Tokens.end(), Separator: " " )), |
25 | args: std::move(Symbol)); |
26 | } |
27 | } |
28 | |
29 | std::vector<SymbolAndSignals> search(StringRef Query) override { |
30 | auto Tokens = tokenize(Text: Query); |
31 | llvm::Regex Pattern("^" + queryRegexp(Tokens)); |
32 | std::vector<SymbolAndSignals> Results; |
33 | for (const Entry &E : Symbols) |
34 | if (Pattern.match(String: E.first)) |
35 | Results.push_back(x: E.second); |
36 | return Results; |
37 | } |
38 | |
39 | private: |
40 | using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>; |
41 | std::vector<Entry> Symbols; |
42 | }; |
43 | |
44 | // Helpers for tokenize state machine. |
45 | enum TokenizeState { |
46 | EMPTY, // No pending characters. |
47 | ONE_BIG, // Read one uppercase letter, could be WORD or Word. |
48 | BIG_WORD, // Reading an uppercase WORD. |
49 | SMALL_WORD, // Reading a lowercase word. |
50 | NUMBER // Reading a number. |
51 | }; |
52 | |
53 | enum CharType { UPPER, LOWER, DIGIT, MISC }; |
54 | CharType classify(char c) { |
55 | if (isupper(c)) |
56 | return UPPER; |
57 | if (islower(c)) |
58 | return LOWER; |
59 | if (isdigit(c)) |
60 | return DIGIT; |
61 | return MISC; |
62 | } |
63 | |
64 | } // namespace |
65 | |
66 | std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) { |
67 | std::vector<std::string> Result; |
68 | // State describes the treatment of text from Start to I. |
69 | // Once text is Flush()ed into Result, we're done with it and advance Start. |
70 | TokenizeState State = EMPTY; |
71 | size_t Start = 0; |
72 | auto Flush = [&](size_t End) { |
73 | if (State != EMPTY) { |
74 | Result.push_back(x: Text.substr(Start, N: End - Start).lower()); |
75 | State = EMPTY; |
76 | } |
77 | Start = End; |
78 | }; |
79 | for (size_t I = 0; I < Text.size(); ++I) { |
80 | CharType Type = classify(c: Text[I]); |
81 | if (Type == MISC) |
82 | Flush(I); |
83 | else if (Type == LOWER) |
84 | switch (State) { |
85 | case BIG_WORD: |
86 | Flush(I - 1); // FOOBar: first token is FOO, not FOOB. |
87 | [[fallthrough]]; |
88 | case ONE_BIG: |
89 | State = SMALL_WORD; |
90 | [[fallthrough]]; |
91 | case SMALL_WORD: |
92 | break; |
93 | default: |
94 | Flush(I); |
95 | State = SMALL_WORD; |
96 | } |
97 | else if (Type == UPPER) |
98 | switch (State) { |
99 | case ONE_BIG: |
100 | State = BIG_WORD; |
101 | [[fallthrough]]; |
102 | case BIG_WORD: |
103 | break; |
104 | default: |
105 | Flush(I); |
106 | State = ONE_BIG; |
107 | } |
108 | else if (Type == DIGIT && State != NUMBER) { |
109 | Flush(I); |
110 | State = NUMBER; |
111 | } |
112 | } |
113 | Flush(Text.size()); |
114 | return Result; |
115 | } |
116 | |
117 | std::string |
118 | FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) { |
119 | std::string Result; |
120 | for (size_t I = 0; I < Tokens.size(); ++I) { |
121 | if (I) |
122 | Result.append(s: "[[:alnum:]]* " ); |
123 | for (size_t J = 0; J < Tokens[I].size(); ++J) { |
124 | if (J) |
125 | Result.append(s: "([[:alnum:]]* )?" ); |
126 | Result.push_back(c: Tokens[I][J]); |
127 | } |
128 | } |
129 | return Result; |
130 | } |
131 | |
132 | llvm::Expected<std::unique_ptr<FuzzySymbolIndex>> |
133 | FuzzySymbolIndex::createFromYAML(StringRef FilePath) { |
134 | auto Buffer = llvm::MemoryBuffer::getFile(Filename: FilePath); |
135 | if (!Buffer) |
136 | return llvm::errorCodeToError(EC: Buffer.getError()); |
137 | return std::make_unique<MemSymbolIndex>( |
138 | args: find_all_symbols::ReadSymbolInfosFromYAML(Yaml: Buffer.get()->getBuffer())); |
139 | } |
140 | |
141 | } // namespace include_fixer |
142 | } // namespace clang |
143 | |