1//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The FileDistance structure allows calculating the minimum distance to paths
10// in a single tree.
11// We simply walk up the path's ancestors until we find a node whose cost is
12// known, and add the cost of walking back down. Initialization ensures this
13// gives the correct path to the roots.
14// We cache the results, so that the runtime is O(|A|), where A is the set of
15// all distinct ancestors of visited paths.
16//
17// Example after initialization with /=2, /bar=0, DownCost = 1:
18// / = 2
19// /bar = 0
20//
21// After querying /foo/bar and /bar/foo:
22// / = 2
23// /bar = 0
24// /bar/foo = 1
25// /foo = 3
26// /foo/bar = 4
27//
28// URIDistance creates FileDistance lazily for each URI scheme encountered. In
29// practice this is a small constant factor.
30//
31//===-------------------------------------------------------------------------//
32
33#include "FileDistance.h"
34#include "URI.h"
35#include "support/Logger.h"
36#include "llvm/ADT/STLExtras.h"
37#include "llvm/ADT/StringExtras.h"
38#include "llvm/Support/Path.h"
39#include <queue>
40
41namespace clang {
42namespace clangd {
43
44// Convert a path into the canonical form.
45// Canonical form is either "/", or "/segment" * N:
46// C:\foo\bar --> /c:/foo/bar
47// /foo/ --> /foo
48// a/b/c --> /a/b/c
49static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
50 llvm::SmallString<128> Result = Path.rtrim(Char: '/');
51 native(path&: Result, style: llvm::sys::path::Style::posix);
52 if (Result.empty() || Result.front() != '/')
53 Result.insert(I: Result.begin(), Elt: '/');
54 return Result;
55}
56
57constexpr const unsigned FileDistance::Unreachable;
58const llvm::hash_code FileDistance::RootHash =
59 llvm::hash_value(S: llvm::StringRef("/"));
60
61FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources,
62 const FileDistanceOptions &Opts)
63 : Opts(Opts) {
64 llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges;
65 // Compute the best distance following only up edges.
66 // Keep track of down edges, in case we can use them to improve on this.
67 for (const auto &S : Sources) {
68 auto Canonical = canonicalize(Path: S.getKey());
69 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
70 S.second.MaxUpTraversals);
71 // Walk up to ancestors of this source, assigning cost.
72 llvm::StringRef Rest = Canonical;
73 llvm::hash_code Hash = llvm::hash_value(S: Rest);
74 for (unsigned I = 0; !Rest.empty(); ++I) {
75 Rest = parent_path(path: Rest, style: llvm::sys::path::Style::posix);
76 auto NextHash = llvm::hash_value(S: Rest);
77 auto &Down = DownEdges[NextHash];
78 if (!llvm::is_contained(Range&: Down, Element: Hash))
79 Down.push_back(Elt: Hash);
80 // We can't just break after MaxUpTraversals, must still set DownEdges.
81 if (I > S.getValue().MaxUpTraversals) {
82 if (Cache.contains(Val: Hash))
83 break;
84 } else {
85 unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
86 auto R = Cache.try_emplace(Key: Hash, Args&: Cost);
87 if (!R.second) {
88 if (Cost < R.first->second) {
89 R.first->second = Cost;
90 } else {
91 // If we're not the best way to get to this path, stop assigning.
92 break;
93 }
94 }
95 }
96 Hash = NextHash;
97 }
98 }
99 // Now propagate scores parent -> child if that's an improvement.
100 // BFS ensures we propagate down chains (must visit parents before children).
101 std::queue<llvm::hash_code> Next;
102 for (auto Child : DownEdges.lookup(Val: llvm::hash_value(S: llvm::StringRef(""))))
103 Next.push(x: Child);
104 while (!Next.empty()) {
105 auto Parent = Next.front();
106 Next.pop();
107 auto ParentCost = Cache.lookup(Val: Parent);
108 for (auto Child : DownEdges.lookup(Val: Parent)) {
109 if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
110 auto &ChildCost =
111 Cache.try_emplace(Key: Child, Args: Unreachable).first->getSecond();
112 if (ParentCost + Opts.DownCost < ChildCost)
113 ChildCost = ParentCost + Opts.DownCost;
114 }
115 Next.push(x: Child);
116 }
117 }
118}
119
120unsigned FileDistance::distance(llvm::StringRef Path) {
121 auto Canonical = canonicalize(Path);
122 unsigned Cost = Unreachable;
123 llvm::SmallVector<llvm::hash_code> Ancestors;
124 // Walk up ancestors until we find a path we know the distance for.
125 for (llvm::StringRef Rest = Canonical; !Rest.empty();
126 Rest = parent_path(path: Rest, style: llvm::sys::path::Style::posix)) {
127 auto Hash = llvm::hash_value(S: Rest);
128 if (Hash == RootHash && !Ancestors.empty() &&
129 !Opts.AllowDownTraversalFromRoot) {
130 Cost = Unreachable;
131 break;
132 }
133 auto It = Cache.find(Val: Hash);
134 if (It != Cache.end()) {
135 Cost = It->second;
136 break;
137 }
138 Ancestors.push_back(Elt: Hash);
139 }
140 // Now we know the costs for (known node, queried node].
141 // Fill these in, walking down the directory tree.
142 for (llvm::hash_code Hash : llvm::reverse(C&: Ancestors)) {
143 if (Cost != Unreachable)
144 Cost += Opts.DownCost;
145 Cache.try_emplace(Key: Hash, Args&: Cost);
146 }
147 dlog("distance({0} = {1})", Path, Cost);
148 return Cost;
149}
150
151unsigned URIDistance::distance(llvm::StringRef URI) {
152 auto R = Cache.try_emplace(Key: llvm::hash_value(S: URI), Args: FileDistance::Unreachable);
153 if (!R.second)
154 return R.first->getSecond();
155 if (auto U = clangd::URI::parse(Uri: URI)) {
156 dlog("distance({0} = {1})", URI, U->body());
157 R.first->second = forScheme(Scheme: U->scheme()).distance(Path: U->body());
158 } else {
159 log(Fmt: "URIDistance::distance() of unparseable {0}: {1}", Vals&: URI, Vals: U.takeError());
160 }
161 return R.first->second;
162}
163
164FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
165 auto &Delegate = ByScheme[Scheme];
166 if (!Delegate) {
167 llvm::StringMap<SourceParams> SchemeSources;
168 for (const auto &Source : Sources) {
169 if (auto U = clangd::URI::create(AbsolutePath: Source.getKey(), Scheme))
170 SchemeSources.try_emplace(Key: U->body(), Args: Source.getValue());
171 else
172 llvm::consumeError(Err: U.takeError());
173 }
174 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
175 SchemeSources.size(), Sources.size());
176 Delegate.reset(p: new FileDistance(std::move(SchemeSources), Opts));
177 }
178 return *Delegate;
179}
180
181static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
182 llvm::SmallVector<llvm::StringRef> Split;
183 Scope.split(A&: Split, Separator: "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
184 return {"/" + llvm::join(R&: Split, Separator: "/"), Split.size()};
185}
186
187static FileDistance
188createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
189 FileDistanceOptions Opts;
190 Opts.UpCost = 2;
191 Opts.DownCost = 4;
192 Opts.AllowDownTraversalFromRoot = false;
193
194 llvm::StringMap<SourceParams> Sources;
195 llvm::StringRef Preferred =
196 QueryScopes.empty() ? "" : QueryScopes.front().c_str();
197 for (llvm::StringRef S : QueryScopes) {
198 SourceParams Param;
199 // Penalize the global scope even it's preferred, as all projects can define
200 // symbols in it, and there is pattern where using-namespace is used in
201 // place of enclosing namespaces (e.g. in implementation files).
202 if (S == Preferred)
203 Param.Cost = S == "" ? 4 : 0;
204 else if (Preferred.starts_with(Prefix: S) && !S.empty())
205 continue; // just rely on up-traversals.
206 else
207 Param.Cost = S == "" ? 6 : 2;
208 auto Path = scopeToPath(Scope: S);
209 // The global namespace is not 'near' its children.
210 Param.MaxUpTraversals = std::max(a: Path.second - 1, b: 0);
211 Sources[Path.first] = std::move(Param);
212 }
213 return FileDistance(std::move(Sources), Opts);
214}
215
216ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
217 : Distance(createScopeFileDistance(QueryScopes)) {}
218
219unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
220 return Distance.distance(Path: scopeToPath(Scope: SymbolScope).first);
221}
222
223} // namespace clangd
224} // namespace clang
225

source code of clang-tools-extra/clangd/FileDistance.cpp