1 | //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a simple interactive tool which can be used to manually |
10 | // evaluate symbol search quality of Clangd index. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "index/Index.h" |
15 | #include "index/Relation.h" |
16 | #include "index/Serialization.h" |
17 | #include "index/remote/Client.h" |
18 | #include "llvm/ADT/ScopeExit.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/LineEditor/LineEditor.h" |
22 | #include "llvm/Support/CommandLine.h" |
23 | #include "llvm/Support/Signals.h" |
24 | #include <optional> |
25 | |
26 | namespace clang { |
27 | namespace clangd { |
28 | namespace { |
29 | |
30 | llvm::cl::opt<std::string> IndexLocation( |
31 | llvm::cl::desc("<path to index file | remote:server.address>" ), |
32 | llvm::cl::Positional); |
33 | |
34 | llvm::cl::opt<std::string> |
35 | ExecCommand("c" , llvm::cl::desc("Command to execute and then exit." )); |
36 | |
37 | llvm::cl::opt<std::string> ProjectRoot( |
38 | "project-root" , |
39 | llvm::cl::desc( |
40 | "Path to the project. Required when connecting using remote index." )); |
41 | |
42 | static constexpr char Overview[] = R"( |
43 | This is an **experimental** interactive tool to process user-provided search |
44 | queries over given symbol collection obtained via clangd-indexer. The |
45 | tool can be used to evaluate search quality of existing index implementations |
46 | and manually construct non-trivial test cases. |
47 | |
48 | You can connect to remote index by passing remote:address to dexp. Example: |
49 | |
50 | $ dexp remote:0.0.0.0:9000 |
51 | |
52 | Type use "help" request to get information about the details. |
53 | )" ; |
54 | |
55 | void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) { |
56 | const auto TimerStart = std::chrono::high_resolution_clock::now(); |
57 | F(); |
58 | const auto TimerStop = std::chrono::high_resolution_clock::now(); |
59 | const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>( |
60 | d: TimerStop - TimerStart); |
61 | llvm::outs() << llvm::formatv(Fmt: "{0} took {1:ms+n}.\n" , Vals&: Name, Vals: Duration); |
62 | } |
63 | |
64 | std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName, |
65 | const SymbolIndex *Index) { |
66 | FuzzyFindRequest Request; |
67 | // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::" |
68 | // qualifier for global scope. |
69 | bool IsGlobalScope = QualifiedName.consume_front(Prefix: "::" ); |
70 | auto Names = splitQualifiedName(QName: QualifiedName); |
71 | if (IsGlobalScope || !Names.first.empty()) |
72 | Request.Scopes = {std::string(Names.first)}; |
73 | else |
74 | // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"), |
75 | // add the global scope to the request. |
76 | Request.Scopes = {"" }; |
77 | |
78 | Request.Query = std::string(Names.second); |
79 | std::vector<SymbolID> SymIDs; |
80 | Index->fuzzyFind(Req: Request, Callback: [&](const Symbol &Sym) { |
81 | std::string SymQualifiedName = (Sym.Scope + Sym.Name).str(); |
82 | if (QualifiedName == SymQualifiedName) |
83 | SymIDs.push_back(x: Sym.ID); |
84 | }); |
85 | return SymIDs; |
86 | } |
87 | |
88 | // REPL commands inherit from Command and contain their options as members. |
89 | // Creating a Command populates parser options, parseAndRun() resets them. |
90 | class Command { |
91 | // By resetting the parser options, we lost the standard -help flag. |
92 | llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{ |
93 | "help" , llvm::cl::desc("Display available options" ), |
94 | llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::getGeneralCategory())}; |
95 | // FIXME: Allow commands to signal failure. |
96 | virtual void run() = 0; |
97 | |
98 | protected: |
99 | const SymbolIndex *Index; |
100 | |
101 | public: |
102 | virtual ~Command() = default; |
103 | bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview, |
104 | const SymbolIndex &Index) { |
105 | std::string ParseErrs; |
106 | llvm::raw_string_ostream OS(ParseErrs); |
107 | bool Ok = llvm::cl::ParseCommandLineOptions(argc: Argv.size(), argv: Argv.data(), |
108 | Overview, Errs: &OS); |
109 | // must do this before opts are destroyed |
110 | auto Cleanup = llvm::make_scope_exit(F&: llvm::cl::ResetCommandLineParser); |
111 | if (Help.getNumOccurrences() > 0) { |
112 | // Avoid printing parse errors in this case. |
113 | // (Well, in theory. A bunch get printed to llvm::errs() regardless!) |
114 | llvm::cl::PrintHelpMessage(); |
115 | return true; |
116 | } |
117 | |
118 | llvm::outs() << OS.str(); |
119 | if (Ok) { |
120 | this->Index = &Index; |
121 | reportTime(Name: Argv[0], F: [&] { run(); }); |
122 | } |
123 | return Ok; |
124 | } |
125 | }; |
126 | |
127 | // FIXME(kbobyrev): Ideas for more commands: |
128 | // * load/swap/reload index: this would make it possible to get rid of llvm::cl |
129 | // usages in the tool driver and actually use llvm::cl library in the REPL. |
130 | // * show posting list density histogram (our dump data somewhere so that user |
131 | // could build one) |
132 | // * show number of tokens of each kind |
133 | // * print out tokens with the most dense posting lists |
134 | // * print out tokens with least dense posting lists |
135 | |
136 | class FuzzyFind : public Command { |
137 | llvm::cl::opt<std::string> Query{ |
138 | "query" , |
139 | llvm::cl::Positional, |
140 | llvm::cl::Required, |
141 | llvm::cl::desc("Query string to be fuzzy-matched" ), |
142 | }; |
143 | llvm::cl::opt<std::string> Scopes{ |
144 | "scopes" , |
145 | llvm::cl::desc("Allowed symbol scopes (comma-separated list)" ), |
146 | }; |
147 | llvm::cl::opt<unsigned> Limit{ |
148 | "limit" , |
149 | llvm::cl::init(Val: 10), |
150 | llvm::cl::desc("Max results to display" ), |
151 | }; |
152 | |
153 | void run() override { |
154 | FuzzyFindRequest Request; |
155 | Request.Limit = Limit; |
156 | Request.Query = Query; |
157 | if (Scopes.getNumOccurrences() > 0) { |
158 | llvm::SmallVector<llvm::StringRef> Scopes; |
159 | llvm::StringRef(this->Scopes).split(A&: Scopes, Separator: ','); |
160 | Request.Scopes = {Scopes.begin(), Scopes.end()}; |
161 | } |
162 | Request.AnyScope = Request.Scopes.empty(); |
163 | // FIXME(kbobyrev): Print symbol final scores to see the distribution. |
164 | static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n" ; |
165 | llvm::outs() << llvm::formatv(Fmt: OutputFormat, Vals: "Rank" , Vals: "Symbol ID" , |
166 | Vals: "Symbol Name" ); |
167 | size_t Rank = 0; |
168 | Index->fuzzyFind(Req: Request, Callback: [&](const Symbol &Sym) { |
169 | llvm::outs() << llvm::formatv(Fmt: OutputFormat, Vals: Rank++, Vals: Sym.ID.str(), |
170 | Vals: Sym.Scope + Sym.Name); |
171 | }); |
172 | } |
173 | }; |
174 | |
175 | class Lookup : public Command { |
176 | llvm::cl::opt<std::string> ID{ |
177 | "id" , |
178 | llvm::cl::Positional, |
179 | llvm::cl::desc("Symbol ID to look up (hex)" ), |
180 | }; |
181 | llvm::cl::opt<std::string> Name{ |
182 | "name" , |
183 | llvm::cl::desc("Qualified name to look up." ), |
184 | }; |
185 | |
186 | void run() override { |
187 | if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) { |
188 | llvm::errs() |
189 | << "Missing required argument: please provide id or -name.\n" ; |
190 | return; |
191 | } |
192 | std::vector<SymbolID> IDs; |
193 | if (ID.getNumOccurrences()) { |
194 | auto SID = SymbolID::fromStr(ID); |
195 | if (!SID) { |
196 | llvm::errs() << llvm::toString(E: SID.takeError()) << "\n" ; |
197 | return; |
198 | } |
199 | IDs.push_back(x: *SID); |
200 | } else { |
201 | IDs = getSymbolIDsFromIndex(QualifiedName: Name, Index); |
202 | } |
203 | |
204 | LookupRequest Request; |
205 | Request.IDs.insert(I: IDs.begin(), E: IDs.end()); |
206 | bool FoundSymbol = false; |
207 | Index->lookup(Req: Request, Callback: [&](const Symbol &Sym) { |
208 | FoundSymbol = true; |
209 | llvm::outs() << toYAML(Sym); |
210 | }); |
211 | if (!FoundSymbol) |
212 | llvm::errs() << "not found\n" ; |
213 | } |
214 | }; |
215 | |
216 | class Refs : public Command { |
217 | llvm::cl::opt<std::string> ID{ |
218 | "id" , |
219 | llvm::cl::Positional, |
220 | llvm::cl::desc("Symbol ID of the symbol being queried (hex)." ), |
221 | }; |
222 | llvm::cl::opt<std::string> Name{ |
223 | "name" , |
224 | llvm::cl::desc("Qualified name of the symbol being queried." ), |
225 | }; |
226 | llvm::cl::opt<std::string> Filter{ |
227 | "filter" , |
228 | llvm::cl::init(Val: ".*" ), |
229 | llvm::cl::desc( |
230 | "Print all results from files matching this regular expression." ), |
231 | }; |
232 | |
233 | void run() override { |
234 | if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) { |
235 | llvm::errs() |
236 | << "Missing required argument: please provide id or -name.\n" ; |
237 | return; |
238 | } |
239 | std::vector<SymbolID> IDs; |
240 | if (ID.getNumOccurrences()) { |
241 | auto SID = SymbolID::fromStr(ID); |
242 | if (!SID) { |
243 | llvm::errs() << llvm::toString(E: SID.takeError()) << "\n" ; |
244 | return; |
245 | } |
246 | IDs.push_back(x: *SID); |
247 | } else { |
248 | IDs = getSymbolIDsFromIndex(QualifiedName: Name, Index); |
249 | if (IDs.size() > 1) { |
250 | llvm::errs() << llvm::formatv( |
251 | Fmt: "The name {0} is ambiguous, found {1} different " |
252 | "symbols. Please use id flag to disambiguate.\n" , |
253 | Vals&: Name, Vals: IDs.size()); |
254 | return; |
255 | } |
256 | } |
257 | RefsRequest RefRequest; |
258 | RefRequest.IDs.insert(I: IDs.begin(), E: IDs.end()); |
259 | llvm::Regex RegexFilter(Filter); |
260 | Index->refs(Req: RefRequest, Callback: [&RegexFilter](const Ref &R) { |
261 | auto U = URI::parse(Uri: R.Location.FileURI); |
262 | if (!U) { |
263 | llvm::errs() << U.takeError(); |
264 | return; |
265 | } |
266 | if (RegexFilter.match(String: U->body())) |
267 | llvm::outs() << R << "\n" ; |
268 | }); |
269 | } |
270 | }; |
271 | |
272 | class Relations : public Command { |
273 | llvm::cl::opt<std::string> ID{ |
274 | "id" , |
275 | llvm::cl::Positional, |
276 | llvm::cl::desc("Symbol ID of the symbol being queried (hex)." ), |
277 | }; |
278 | llvm::cl::opt<RelationKind> Relation{ |
279 | "relation" , |
280 | llvm::cl::desc("Relation kind for the predicate." ), |
281 | values(clEnumValN(RelationKind::BaseOf, "base_of" , |
282 | "Find subclasses of a class." ), |
283 | clEnumValN(RelationKind::OverriddenBy, "overridden_by" , |
284 | "Find methods that overrides a virtual method." )), |
285 | }; |
286 | |
287 | void run() override { |
288 | if (ID.getNumOccurrences() == 0 || Relation.getNumOccurrences() == 0) { |
289 | llvm::errs() |
290 | << "Missing required argument: please provide id and -relation.\n" ; |
291 | return; |
292 | } |
293 | RelationsRequest Req; |
294 | if (ID.getNumOccurrences()) { |
295 | auto SID = SymbolID::fromStr(ID); |
296 | if (!SID) { |
297 | llvm::errs() << llvm::toString(E: SID.takeError()) << "\n" ; |
298 | return; |
299 | } |
300 | Req.Subjects.insert(V: *SID); |
301 | } |
302 | Req.Predicate = Relation.getValue(); |
303 | Index->relations(Req, Callback: [](const SymbolID &SID, const Symbol &S) { |
304 | llvm::outs() << toYAML(S); |
305 | }); |
306 | } |
307 | }; |
308 | |
309 | class Export : public Command { |
310 | llvm::cl::opt<IndexFileFormat> Format{ |
311 | "format" , |
312 | llvm::cl::desc("Format of index export" ), |
313 | llvm::cl::values( |
314 | clEnumValN(IndexFileFormat::YAML, "yaml" , |
315 | "human-readable YAML format" ), |
316 | clEnumValN(IndexFileFormat::RIFF, "binary" , "binary RIFF format" )), |
317 | llvm::cl::init(Val: IndexFileFormat::YAML), |
318 | }; |
319 | llvm::cl::opt<std::string> OutputFile{ |
320 | "output-file" , |
321 | llvm::cl::Positional, |
322 | llvm::cl::Required, |
323 | llvm::cl::desc("Output file for export" ), |
324 | }; |
325 | |
326 | public: |
327 | void run() override { |
328 | using namespace clang::clangd; |
329 | // Read input file (as specified in global option) |
330 | auto Buffer = llvm::MemoryBuffer::getFile(Filename: IndexLocation); |
331 | if (!Buffer) { |
332 | llvm::errs() << llvm::formatv(Fmt: "Can't open {0}" , Vals&: IndexLocation) << "\n" ; |
333 | return; |
334 | } |
335 | |
336 | // Auto-detects input format when parsing |
337 | auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer(), |
338 | SymbolOrigin::Static); |
339 | if (!IndexIn) { |
340 | llvm::errs() << llvm::toString(E: IndexIn.takeError()) << "\n" ; |
341 | return; |
342 | } |
343 | |
344 | // Prepare output file |
345 | std::error_code EC; |
346 | llvm::raw_fd_ostream OutputStream(OutputFile, EC); |
347 | if (EC) { |
348 | llvm::errs() << llvm::formatv(Fmt: "Can't open {0} for writing" , Vals&: OutputFile) |
349 | << "\n" ; |
350 | return; |
351 | } |
352 | |
353 | // Export |
354 | clang::clangd::IndexFileOut IndexOut(IndexIn.get()); |
355 | IndexOut.Format = Format; |
356 | OutputStream << IndexOut; |
357 | } |
358 | }; |
359 | |
360 | struct { |
361 | const char *Name; |
362 | const char *Description; |
363 | std::function<std::unique_ptr<Command>()> Implementation; |
364 | } CommandInfo[] = { |
365 | {.Name: "find" , .Description: "Search for symbols with fuzzyFind" , .Implementation: std::make_unique<FuzzyFind>}, |
366 | {.Name: "lookup" , .Description: "Dump symbol details by ID or qualified name" , |
367 | .Implementation: std::make_unique<Lookup>}, |
368 | {.Name: "refs" , .Description: "Find references by ID or qualified name" , .Implementation: std::make_unique<Refs>}, |
369 | {.Name: "relations" , .Description: "Find relations by ID and relation kind" , |
370 | .Implementation: std::make_unique<Relations>}, |
371 | {.Name: "export" , .Description: "Export index" , .Implementation: std::make_unique<Export>}, |
372 | }; |
373 | |
374 | std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) { |
375 | return Index.starts_with(Prefix: "remote:" ) |
376 | ? remote::getClient(Address: Index.drop_front(N: strlen(s: "remote:" )), |
377 | IndexRoot: ProjectRoot) |
378 | : loadIndex(Filename: Index, Origin: SymbolOrigin::Static, /*UseDex=*/true); |
379 | } |
380 | |
381 | bool runCommand(std::string Request, const SymbolIndex &Index) { |
382 | // Split on spaces and add required null-termination. |
383 | std::replace(first: Request.begin(), last: Request.end(), old_value: ' ', new_value: '\0'); |
384 | llvm::SmallVector<llvm::StringRef> Args; |
385 | llvm::StringRef(Request).split(A&: Args, Separator: '\0', /*MaxSplit=*/-1, |
386 | /*KeepEmpty=*/false); |
387 | if (Args.empty()) |
388 | return false; |
389 | if (Args.front() == "help" ) { |
390 | llvm::outs() << "dexp - Index explorer\nCommands:\n" ; |
391 | for (const auto &C : CommandInfo) |
392 | llvm::outs() << llvm::formatv(Fmt: "{0,16} - {1}\n" , Vals: C.Name, Vals: C.Description); |
393 | llvm::outs() << "Get detailed command help with e.g. `find -help`.\n" ; |
394 | return true; |
395 | } |
396 | llvm::SmallVector<const char *> FakeArgv; |
397 | for (llvm::StringRef S : Args) |
398 | FakeArgv.push_back(Elt: S.data()); // Terminated by separator or end of string. |
399 | |
400 | for (const auto &Cmd : CommandInfo) { |
401 | if (Cmd.Name == Args.front()) |
402 | return Cmd.Implementation()->parseAndRun(Argv: FakeArgv, Overview: Cmd.Description, |
403 | Index); |
404 | } |
405 | llvm::errs() << "Unknown command. Try 'help'.\n" ; |
406 | return false; |
407 | } |
408 | |
409 | } // namespace |
410 | } // namespace clangd |
411 | } // namespace clang |
412 | |
413 | int main(int argc, const char *argv[]) { |
414 | using namespace clang::clangd; |
415 | |
416 | llvm::cl::ParseCommandLineOptions(argc, argv, Overview); |
417 | |
418 | // Preserve global options when flag parser is reset, so commands can use |
419 | // them. |
420 | IndexLocation.setValue(V: IndexLocation, /*initial=*/true); |
421 | ExecCommand.setValue(V: ExecCommand, /*initial=*/true); |
422 | ProjectRoot.setValue(V: ProjectRoot, /*initial=*/true); |
423 | |
424 | llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands. |
425 | llvm::sys::PrintStackTraceOnErrorSignal(Argv0: argv[0]); |
426 | |
427 | bool RemoteMode = llvm::StringRef(IndexLocation).starts_with(Prefix: "remote:" ); |
428 | if (RemoteMode && ProjectRoot.empty()) { |
429 | llvm::errs() << "--project-root is required in remote mode\n" ; |
430 | return -1; |
431 | } |
432 | |
433 | std::unique_ptr<SymbolIndex> Index; |
434 | reportTime(Name: RemoteMode ? "Remote index client creation" : "Dex build" , |
435 | F: [&]() { Index = openIndex(Index: IndexLocation); }); |
436 | |
437 | if (!Index) { |
438 | llvm::errs() << "Failed to open the index.\n" ; |
439 | return -1; |
440 | } |
441 | |
442 | if (!ExecCommand.empty()) |
443 | return runCommand(Request: ExecCommand, Index: *Index) ? 0 : 1; |
444 | |
445 | llvm::LineEditor LE("dexp" ); |
446 | while (std::optional<std::string> Request = LE.readLine()) |
447 | runCommand(Request: std::move(*Request), Index: *Index); |
448 | } |
449 | |