| 1 | //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements a simple interactive tool which can be used to manually |
| 10 | // evaluate symbol search quality of Clangd index. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "index/Index.h" |
| 15 | #include "index/Relation.h" |
| 16 | #include "index/Serialization.h" |
| 17 | #include "index/remote/Client.h" |
| 18 | #include "llvm/ADT/ScopeExit.h" |
| 19 | #include "llvm/ADT/SmallVector.h" |
| 20 | #include "llvm/ADT/StringRef.h" |
| 21 | #include "llvm/LineEditor/LineEditor.h" |
| 22 | #include "llvm/Support/CommandLine.h" |
| 23 | #include "llvm/Support/Signals.h" |
| 24 | #include <optional> |
| 25 | |
| 26 | namespace clang { |
| 27 | namespace clangd { |
| 28 | namespace { |
| 29 | |
| 30 | llvm::cl::opt<std::string> IndexLocation( |
| 31 | llvm::cl::desc("<path to index file | remote:server.address>" ), |
| 32 | llvm::cl::Positional); |
| 33 | |
| 34 | llvm::cl::opt<std::string> |
| 35 | ExecCommand("c" , llvm::cl::desc("Command to execute and then exit." )); |
| 36 | |
| 37 | llvm::cl::opt<std::string> ProjectRoot( |
| 38 | "project-root" , |
| 39 | llvm::cl::desc( |
| 40 | "Path to the project. Required when connecting using remote index." )); |
| 41 | |
| 42 | static constexpr char Overview[] = R"( |
| 43 | This is an **experimental** interactive tool to process user-provided search |
| 44 | queries over given symbol collection obtained via clangd-indexer. The |
| 45 | tool can be used to evaluate search quality of existing index implementations |
| 46 | and manually construct non-trivial test cases. |
| 47 | |
| 48 | You can connect to remote index by passing remote:address to dexp. Example: |
| 49 | |
| 50 | $ dexp remote:0.0.0.0:9000 |
| 51 | |
| 52 | Type use "help" request to get information about the details. |
| 53 | )" ; |
| 54 | |
| 55 | void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) { |
| 56 | const auto TimerStart = std::chrono::high_resolution_clock::now(); |
| 57 | F(); |
| 58 | const auto TimerStop = std::chrono::high_resolution_clock::now(); |
| 59 | const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>( |
| 60 | d: TimerStop - TimerStart); |
| 61 | llvm::outs() << llvm::formatv(Fmt: "{0} took {1:ms+n}.\n" , Vals&: Name, Vals: Duration); |
| 62 | } |
| 63 | |
| 64 | std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName, |
| 65 | const SymbolIndex *Index) { |
| 66 | FuzzyFindRequest Request; |
| 67 | // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::" |
| 68 | // qualifier for global scope. |
| 69 | bool IsGlobalScope = QualifiedName.consume_front(Prefix: "::" ); |
| 70 | auto Names = splitQualifiedName(QName: QualifiedName); |
| 71 | if (IsGlobalScope || !Names.first.empty()) |
| 72 | Request.Scopes = {std::string(Names.first)}; |
| 73 | else |
| 74 | // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"), |
| 75 | // add the global scope to the request. |
| 76 | Request.Scopes = {"" }; |
| 77 | |
| 78 | Request.Query = std::string(Names.second); |
| 79 | std::vector<SymbolID> SymIDs; |
| 80 | Index->fuzzyFind(Req: Request, Callback: [&](const Symbol &Sym) { |
| 81 | std::string SymQualifiedName = (Sym.Scope + Sym.Name).str(); |
| 82 | if (QualifiedName == SymQualifiedName) |
| 83 | SymIDs.push_back(x: Sym.ID); |
| 84 | }); |
| 85 | return SymIDs; |
| 86 | } |
| 87 | |
| 88 | // REPL commands inherit from Command and contain their options as members. |
| 89 | // Creating a Command populates parser options, parseAndRun() resets them. |
| 90 | class Command { |
| 91 | // By resetting the parser options, we lost the standard -help flag. |
| 92 | llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{ |
| 93 | "help" , llvm::cl::desc("Display available options" ), |
| 94 | llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::getGeneralCategory())}; |
| 95 | // FIXME: Allow commands to signal failure. |
| 96 | virtual void run() = 0; |
| 97 | |
| 98 | protected: |
| 99 | const SymbolIndex *Index; |
| 100 | |
| 101 | public: |
| 102 | virtual ~Command() = default; |
| 103 | bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview, |
| 104 | const SymbolIndex &Index) { |
| 105 | std::string ParseErrs; |
| 106 | llvm::raw_string_ostream OS(ParseErrs); |
| 107 | bool Ok = llvm::cl::ParseCommandLineOptions(argc: Argv.size(), argv: Argv.data(), |
| 108 | Overview, Errs: &OS); |
| 109 | // must do this before opts are destroyed |
| 110 | auto Cleanup = llvm::make_scope_exit(F&: llvm::cl::ResetCommandLineParser); |
| 111 | if (Help.getNumOccurrences() > 0) { |
| 112 | // Avoid printing parse errors in this case. |
| 113 | // (Well, in theory. A bunch get printed to llvm::errs() regardless!) |
| 114 | llvm::cl::PrintHelpMessage(); |
| 115 | return true; |
| 116 | } |
| 117 | |
| 118 | llvm::outs() << OS.str(); |
| 119 | if (Ok) { |
| 120 | this->Index = &Index; |
| 121 | reportTime(Name: Argv[0], F: [&] { run(); }); |
| 122 | } |
| 123 | return Ok; |
| 124 | } |
| 125 | }; |
| 126 | |
| 127 | // FIXME(kbobyrev): Ideas for more commands: |
| 128 | // * load/swap/reload index: this would make it possible to get rid of llvm::cl |
| 129 | // usages in the tool driver and actually use llvm::cl library in the REPL. |
| 130 | // * show posting list density histogram (our dump data somewhere so that user |
| 131 | // could build one) |
| 132 | // * show number of tokens of each kind |
| 133 | // * print out tokens with the most dense posting lists |
| 134 | // * print out tokens with least dense posting lists |
| 135 | |
| 136 | class FuzzyFind : public Command { |
| 137 | llvm::cl::opt<std::string> Query{ |
| 138 | "query" , |
| 139 | llvm::cl::Positional, |
| 140 | llvm::cl::Required, |
| 141 | llvm::cl::desc("Query string to be fuzzy-matched" ), |
| 142 | }; |
| 143 | llvm::cl::opt<std::string> Scopes{ |
| 144 | "scopes" , |
| 145 | llvm::cl::desc("Allowed symbol scopes (comma-separated list)" ), |
| 146 | }; |
| 147 | llvm::cl::opt<unsigned> Limit{ |
| 148 | "limit" , |
| 149 | llvm::cl::init(Val: 10), |
| 150 | llvm::cl::desc("Max results to display" ), |
| 151 | }; |
| 152 | |
| 153 | void run() override { |
| 154 | FuzzyFindRequest Request; |
| 155 | Request.Limit = Limit; |
| 156 | Request.Query = Query; |
| 157 | if (Scopes.getNumOccurrences() > 0) { |
| 158 | llvm::SmallVector<llvm::StringRef> Scopes; |
| 159 | llvm::StringRef(this->Scopes).split(A&: Scopes, Separator: ','); |
| 160 | Request.Scopes = {Scopes.begin(), Scopes.end()}; |
| 161 | } |
| 162 | Request.AnyScope = Request.Scopes.empty(); |
| 163 | // FIXME(kbobyrev): Print symbol final scores to see the distribution. |
| 164 | static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n" ; |
| 165 | llvm::outs() << llvm::formatv(Fmt: OutputFormat, Vals: "Rank" , Vals: "Symbol ID" , |
| 166 | Vals: "Symbol Name" ); |
| 167 | size_t Rank = 0; |
| 168 | Index->fuzzyFind(Req: Request, Callback: [&](const Symbol &Sym) { |
| 169 | llvm::outs() << llvm::formatv(Fmt: OutputFormat, Vals: Rank++, Vals: Sym.ID.str(), |
| 170 | Vals: Sym.Scope + Sym.Name); |
| 171 | }); |
| 172 | } |
| 173 | }; |
| 174 | |
| 175 | class Lookup : public Command { |
| 176 | llvm::cl::opt<std::string> ID{ |
| 177 | "id" , |
| 178 | llvm::cl::Positional, |
| 179 | llvm::cl::desc("Symbol ID to look up (hex)" ), |
| 180 | }; |
| 181 | llvm::cl::opt<std::string> Name{ |
| 182 | "name" , |
| 183 | llvm::cl::desc("Qualified name to look up." ), |
| 184 | }; |
| 185 | |
| 186 | void run() override { |
| 187 | if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) { |
| 188 | llvm::errs() |
| 189 | << "Missing required argument: please provide id or -name.\n" ; |
| 190 | return; |
| 191 | } |
| 192 | std::vector<SymbolID> IDs; |
| 193 | if (ID.getNumOccurrences()) { |
| 194 | auto SID = SymbolID::fromStr(ID); |
| 195 | if (!SID) { |
| 196 | llvm::errs() << llvm::toString(E: SID.takeError()) << "\n" ; |
| 197 | return; |
| 198 | } |
| 199 | IDs.push_back(x: *SID); |
| 200 | } else { |
| 201 | IDs = getSymbolIDsFromIndex(QualifiedName: Name, Index); |
| 202 | } |
| 203 | |
| 204 | LookupRequest Request; |
| 205 | Request.IDs.insert_range(R&: IDs); |
| 206 | bool FoundSymbol = false; |
| 207 | Index->lookup(Req: Request, Callback: [&](const Symbol &Sym) { |
| 208 | FoundSymbol = true; |
| 209 | llvm::outs() << toYAML(Sym); |
| 210 | }); |
| 211 | if (!FoundSymbol) |
| 212 | llvm::errs() << "not found\n" ; |
| 213 | } |
| 214 | }; |
| 215 | |
| 216 | class Refs : public Command { |
| 217 | llvm::cl::opt<std::string> ID{ |
| 218 | "id" , |
| 219 | llvm::cl::Positional, |
| 220 | llvm::cl::desc("Symbol ID of the symbol being queried (hex)." ), |
| 221 | }; |
| 222 | llvm::cl::opt<std::string> Name{ |
| 223 | "name" , |
| 224 | llvm::cl::desc("Qualified name of the symbol being queried." ), |
| 225 | }; |
| 226 | llvm::cl::opt<std::string> Filter{ |
| 227 | "filter" , |
| 228 | llvm::cl::init(Val: ".*" ), |
| 229 | llvm::cl::desc( |
| 230 | "Print all results from files matching this regular expression." ), |
| 231 | }; |
| 232 | |
| 233 | void run() override { |
| 234 | if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) { |
| 235 | llvm::errs() |
| 236 | << "Missing required argument: please provide id or -name.\n" ; |
| 237 | return; |
| 238 | } |
| 239 | std::vector<SymbolID> IDs; |
| 240 | if (ID.getNumOccurrences()) { |
| 241 | auto SID = SymbolID::fromStr(ID); |
| 242 | if (!SID) { |
| 243 | llvm::errs() << llvm::toString(E: SID.takeError()) << "\n" ; |
| 244 | return; |
| 245 | } |
| 246 | IDs.push_back(x: *SID); |
| 247 | } else { |
| 248 | IDs = getSymbolIDsFromIndex(QualifiedName: Name, Index); |
| 249 | if (IDs.size() > 1) { |
| 250 | llvm::errs() << llvm::formatv( |
| 251 | Fmt: "The name {0} is ambiguous, found {1} different " |
| 252 | "symbols. Please use id flag to disambiguate.\n" , |
| 253 | Vals&: Name, Vals: IDs.size()); |
| 254 | return; |
| 255 | } |
| 256 | } |
| 257 | RefsRequest RefRequest; |
| 258 | RefRequest.IDs.insert_range(R&: IDs); |
| 259 | llvm::Regex RegexFilter(Filter); |
| 260 | Index->refs(Req: RefRequest, Callback: [&RegexFilter](const Ref &R) { |
| 261 | auto U = URI::parse(Uri: R.Location.FileURI); |
| 262 | if (!U) { |
| 263 | llvm::errs() << U.takeError(); |
| 264 | return; |
| 265 | } |
| 266 | if (RegexFilter.match(String: U->body())) |
| 267 | llvm::outs() << R << "\n" ; |
| 268 | }); |
| 269 | } |
| 270 | }; |
| 271 | |
| 272 | class Relations : public Command { |
| 273 | llvm::cl::opt<std::string> ID{ |
| 274 | "id" , |
| 275 | llvm::cl::Positional, |
| 276 | llvm::cl::desc("Symbol ID of the symbol being queried (hex)." ), |
| 277 | }; |
| 278 | llvm::cl::opt<RelationKind> Relation{ |
| 279 | "relation" , |
| 280 | llvm::cl::desc("Relation kind for the predicate." ), |
| 281 | values(clEnumValN(RelationKind::BaseOf, "base_of" , |
| 282 | "Find subclasses of a class." ), |
| 283 | clEnumValN(RelationKind::OverriddenBy, "overridden_by" , |
| 284 | "Find methods that overrides a virtual method." )), |
| 285 | }; |
| 286 | |
| 287 | void run() override { |
| 288 | if (ID.getNumOccurrences() == 0 || Relation.getNumOccurrences() == 0) { |
| 289 | llvm::errs() |
| 290 | << "Missing required argument: please provide id and -relation.\n" ; |
| 291 | return; |
| 292 | } |
| 293 | RelationsRequest Req; |
| 294 | if (ID.getNumOccurrences()) { |
| 295 | auto SID = SymbolID::fromStr(ID); |
| 296 | if (!SID) { |
| 297 | llvm::errs() << llvm::toString(E: SID.takeError()) << "\n" ; |
| 298 | return; |
| 299 | } |
| 300 | Req.Subjects.insert(V: *SID); |
| 301 | } |
| 302 | Req.Predicate = Relation.getValue(); |
| 303 | Index->relations(Req, Callback: [](const SymbolID &SID, const Symbol &S) { |
| 304 | llvm::outs() << toYAML(S); |
| 305 | }); |
| 306 | } |
| 307 | }; |
| 308 | |
| 309 | class Export : public Command { |
| 310 | llvm::cl::opt<IndexFileFormat> Format{ |
| 311 | "format" , |
| 312 | llvm::cl::desc("Format of index export" ), |
| 313 | llvm::cl::values( |
| 314 | clEnumValN(IndexFileFormat::YAML, "yaml" , |
| 315 | "human-readable YAML format" ), |
| 316 | clEnumValN(IndexFileFormat::RIFF, "binary" , "binary RIFF format" )), |
| 317 | llvm::cl::init(Val: IndexFileFormat::YAML), |
| 318 | }; |
| 319 | llvm::cl::opt<std::string> OutputFile{ |
| 320 | "output-file" , |
| 321 | llvm::cl::Positional, |
| 322 | llvm::cl::Required, |
| 323 | llvm::cl::desc("Output file for export" ), |
| 324 | }; |
| 325 | |
| 326 | public: |
| 327 | void run() override { |
| 328 | using namespace clang::clangd; |
| 329 | // Read input file (as specified in global option) |
| 330 | auto Buffer = llvm::MemoryBuffer::getFile(Filename: IndexLocation); |
| 331 | if (!Buffer) { |
| 332 | llvm::errs() << llvm::formatv(Fmt: "Can't open {0}" , Vals&: IndexLocation) << "\n" ; |
| 333 | return; |
| 334 | } |
| 335 | |
| 336 | // Auto-detects input format when parsing |
| 337 | auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer(), |
| 338 | SymbolOrigin::Static); |
| 339 | if (!IndexIn) { |
| 340 | llvm::errs() << llvm::toString(E: IndexIn.takeError()) << "\n" ; |
| 341 | return; |
| 342 | } |
| 343 | |
| 344 | // Prepare output file |
| 345 | std::error_code EC; |
| 346 | llvm::raw_fd_ostream OutputStream(OutputFile, EC); |
| 347 | if (EC) { |
| 348 | llvm::errs() << llvm::formatv(Fmt: "Can't open {0} for writing" , Vals&: OutputFile) |
| 349 | << "\n" ; |
| 350 | return; |
| 351 | } |
| 352 | |
| 353 | // Export |
| 354 | clang::clangd::IndexFileOut IndexOut(IndexIn.get()); |
| 355 | IndexOut.Format = Format; |
| 356 | OutputStream << IndexOut; |
| 357 | } |
| 358 | }; |
| 359 | |
| 360 | struct { |
| 361 | const char *Name; |
| 362 | const char *Description; |
| 363 | std::function<std::unique_ptr<Command>()> Implementation; |
| 364 | } CommandInfo[] = { |
| 365 | {.Name: "find" , .Description: "Search for symbols with fuzzyFind" , .Implementation: std::make_unique<FuzzyFind>}, |
| 366 | {.Name: "lookup" , .Description: "Dump symbol details by ID or qualified name" , |
| 367 | .Implementation: std::make_unique<Lookup>}, |
| 368 | {.Name: "refs" , .Description: "Find references by ID or qualified name" , .Implementation: std::make_unique<Refs>}, |
| 369 | {.Name: "relations" , .Description: "Find relations by ID and relation kind" , |
| 370 | .Implementation: std::make_unique<Relations>}, |
| 371 | {.Name: "export" , .Description: "Export index" , .Implementation: std::make_unique<Export>}, |
| 372 | }; |
| 373 | |
| 374 | std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) { |
| 375 | return Index.starts_with(Prefix: "remote:" ) |
| 376 | ? remote::getClient(Address: Index.drop_front(N: strlen(s: "remote:" )), |
| 377 | IndexRoot: ProjectRoot) |
| 378 | : loadIndex(Filename: Index, Origin: SymbolOrigin::Static, /*UseDex=*/true, |
| 379 | /*SupportContainedRefs=*/true); |
| 380 | } |
| 381 | |
| 382 | bool runCommand(std::string Request, const SymbolIndex &Index) { |
| 383 | // Split on spaces and add required null-termination. |
| 384 | llvm::replace(Range&: Request, OldValue: ' ', NewValue: '\0'); |
| 385 | llvm::SmallVector<llvm::StringRef> Args; |
| 386 | llvm::StringRef(Request).split(A&: Args, Separator: '\0', /*MaxSplit=*/-1, |
| 387 | /*KeepEmpty=*/false); |
| 388 | if (Args.empty()) |
| 389 | return false; |
| 390 | if (Args.front() == "help" ) { |
| 391 | llvm::outs() << "dexp - Index explorer\nCommands:\n" ; |
| 392 | for (const auto &C : CommandInfo) |
| 393 | llvm::outs() << llvm::formatv(Fmt: "{0,16} - {1}\n" , Vals: C.Name, Vals: C.Description); |
| 394 | llvm::outs() << "Get detailed command help with e.g. `find -help`.\n" ; |
| 395 | return true; |
| 396 | } |
| 397 | llvm::SmallVector<const char *> FakeArgv; |
| 398 | for (llvm::StringRef S : Args) |
| 399 | FakeArgv.push_back(Elt: S.data()); // Terminated by separator or end of string. |
| 400 | |
| 401 | for (const auto &Cmd : CommandInfo) { |
| 402 | if (Cmd.Name == Args.front()) |
| 403 | return Cmd.Implementation()->parseAndRun(Argv: FakeArgv, Overview: Cmd.Description, |
| 404 | Index); |
| 405 | } |
| 406 | llvm::errs() << "Unknown command. Try 'help'.\n" ; |
| 407 | return false; |
| 408 | } |
| 409 | |
| 410 | } // namespace |
| 411 | } // namespace clangd |
| 412 | } // namespace clang |
| 413 | |
| 414 | int main(int argc, const char *argv[]) { |
| 415 | using namespace clang::clangd; |
| 416 | |
| 417 | llvm::cl::ParseCommandLineOptions(argc, argv, Overview); |
| 418 | |
| 419 | // Preserve global options when flag parser is reset, so commands can use |
| 420 | // them. |
| 421 | IndexLocation.setValue(V: IndexLocation, /*initial=*/true); |
| 422 | ExecCommand.setValue(V: ExecCommand, /*initial=*/true); |
| 423 | ProjectRoot.setValue(V: ProjectRoot, /*initial=*/true); |
| 424 | |
| 425 | llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands. |
| 426 | llvm::sys::PrintStackTraceOnErrorSignal(Argv0: argv[0]); |
| 427 | |
| 428 | bool RemoteMode = llvm::StringRef(IndexLocation).starts_with(Prefix: "remote:" ); |
| 429 | if (RemoteMode && ProjectRoot.empty()) { |
| 430 | llvm::errs() << "--project-root is required in remote mode\n" ; |
| 431 | return -1; |
| 432 | } |
| 433 | |
| 434 | std::unique_ptr<SymbolIndex> Index; |
| 435 | reportTime(Name: RemoteMode ? "Remote index client creation" : "Dex build" , |
| 436 | F: [&]() { Index = openIndex(Index: IndexLocation); }); |
| 437 | |
| 438 | if (!Index) { |
| 439 | llvm::errs() << "Failed to open the index.\n" ; |
| 440 | return -1; |
| 441 | } |
| 442 | |
| 443 | if (!ExecCommand.empty()) |
| 444 | return runCommand(Request: ExecCommand, Index: *Index) ? 0 : 1; |
| 445 | |
| 446 | llvm::LineEditor LE("dexp" ); |
| 447 | while (std::optional<std::string> Request = LE.readLine()) |
| 448 | runCommand(Request: std::move(*Request), Index: *Index); |
| 449 | } |
| 450 | |