FuzzySymbolIndex.cpp source code [clang-tools-extra/clang-include-fixer/FuzzySymbolIndex.cpp]

1	//===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	#include "FuzzySymbolIndex.h"
9	#include "llvm/Support/Regex.h"
10
11	using clang::find_all_symbols::SymbolAndSignals;
12	using llvm::StringRef;
13
14	namespace clang {
15	namespace include_fixer {
16	namespace {
17
18	class MemSymbolIndex : public FuzzySymbolIndex {
19	public:
20	MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
21	for (auto &Symbol : Symbols) {
22	auto Tokens = tokenize(Text: Symbol.Symbol.getName());
23	this->Symbols.emplace_back(
24	args: StringRef(llvm::join(Begin: Tokens.begin(), End: Tokens.end(), Separator: " ")),
25	args: std::move(Symbol));
26	}
27	}
28
29	std::vector<SymbolAndSignals> search(StringRef Query) override {
30	auto Tokens = tokenize(Text: Query);
31	llvm::Regex Pattern("^" + queryRegexp(Tokens));
32	std::vector<SymbolAndSignals> Results;
33	for (const Entry &E : Symbols)
34	if (Pattern.match(String: E.first))
35	Results.push_back(x: E.second);
36	return Results;
37	}
38
39	private:
40	using Entry = std::pair<llvm::SmallString<`32`>, SymbolAndSignals>;
41	std::vector<Entry> Symbols;
42	};
43
44	// Helpers for tokenize state machine.
45	enum TokenizeState {
46	EMPTY, // No pending characters.
47	ONE_BIG, // Read one uppercase letter, could be WORD or Word.
48	BIG_WORD, // Reading an uppercase WORD.
49	SMALL_WORD, // Reading a lowercase word.
50	NUMBER // Reading a number.
51	};
52
53	enum CharType { UPPER, LOWER, DIGIT, MISC };
54	CharType classify(char c) {
55	if (isupper(c))
56	return UPPER;
57	if (islower(c))
58	return LOWER;
59	if (isdigit(c))
60	return DIGIT;
61	return MISC;
62	}
63
64	} // namespace
65
66	std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
67	std::vector<std::string> Result;
68	// State describes the treatment of text from Start to I.
69	// Once text is Flush()ed into Result, we're done with it and advance Start.
70	TokenizeState State = EMPTY;
71	size_t Start = `0`;
72	auto Flush = [&](size_t End) {
73	if (State != EMPTY) {
74	Result.push_back(x: Text.substr(Start, N: End - Start).lower());
75	State = EMPTY;
76	}
77	Start = End;
78	};
79	for (size_t I = `0`; I < Text.size(); ++I) {
80	CharType Type = classify(c: Text [I]);
81	if (Type == MISC)
82	Flush (I);
83	else if (Type == LOWER)
84	switch (State) {
85	case BIG_WORD:
86	Flush (I - `1`); // FOOBar: first token is FOO, not FOOB.
87	[[fallthrough]];
88	case ONE_BIG:
89	State = SMALL_WORD;
90	[[fallthrough]];
91	case SMALL_WORD:
92	break;
93	default:
94	Flush (I);
95	State = SMALL_WORD;
96	}
97	else if (Type == UPPER)
98	switch (State) {
99	case ONE_BIG:
100	State = BIG_WORD;
101	[[fallthrough]];
102	case BIG_WORD:
103	break;
104	default:
105	Flush (I);
106	State = ONE_BIG;
107	}
108	else if (Type == DIGIT && State != NUMBER) {
109	Flush (I);
110	State = NUMBER;
111	}
112	}
113	Flush (Text.size());
114	return Result;
115	}
116
117	std::string
118	FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
119	std::string Result;
120	for (size_t I = `0`; I < Tokens.size(); ++I) {
121	if (I)
122	Result.append(s: "[[:alnum:]]* ");
123	for (size_t J = `0`; J < Tokens [I].size(); ++J) {
124	if (J)
125	Result.append(s: "([[:alnum:]]* )?");
126	Result.push_back(c: Tokens [I][J]);
127	}
128	}
129	return Result;
130	}
131
132	llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
133	FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
134	auto Buffer = llvm::MemoryBuffer::getFile(Filename: FilePath, /IsText=/true);
135	if (!Buffer)
136	return llvm::errorCodeToError(EC: Buffer.getError());
137	return std::make_unique<MemSymbolIndex>(
138	args: find_all_symbols::ReadSymbolInfosFromYAML(Yaml: Buffer.get()->getBuffer()));
139	}
140
141	} // namespace include_fixer
142	} // namespace clang
143

source code of clang-tools-extra/clang-include-fixer/FuzzySymbolIndex.cpp