1 | //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The FileDistance structure allows calculating the minimum distance to paths |
10 | // in a single tree. |
11 | // We simply walk up the path's ancestors until we find a node whose cost is |
12 | // known, and add the cost of walking back down. Initialization ensures this |
13 | // gives the correct path to the roots. |
14 | // We cache the results, so that the runtime is O(|A|), where A is the set of |
15 | // all distinct ancestors of visited paths. |
16 | // |
17 | // Example after initialization with /=2, /bar=0, DownCost = 1: |
18 | // / = 2 |
19 | // /bar = 0 |
20 | // |
21 | // After querying /foo/bar and /bar/foo: |
22 | // / = 2 |
23 | // /bar = 0 |
24 | // /bar/foo = 1 |
25 | // /foo = 3 |
26 | // /foo/bar = 4 |
27 | // |
28 | // URIDistance creates FileDistance lazily for each URI scheme encountered. In |
29 | // practice this is a small constant factor. |
30 | // |
31 | //===-------------------------------------------------------------------------// |
32 | |
33 | #include "FileDistance.h" |
34 | #include "URI.h" |
35 | #include "support/Logger.h" |
36 | #include "llvm/ADT/STLExtras.h" |
37 | #include "llvm/ADT/StringExtras.h" |
38 | #include "llvm/Support/Path.h" |
39 | #include <queue> |
40 | |
41 | namespace clang { |
42 | namespace clangd { |
43 | |
44 | // Convert a path into the canonical form. |
45 | // Canonical form is either "/", or "/segment" * N: |
46 | // C:\foo\bar --> /c:/foo/bar |
47 | // /foo/ --> /foo |
48 | // a/b/c --> /a/b/c |
49 | static llvm::SmallString<128> canonicalize(llvm::StringRef Path) { |
50 | llvm::SmallString<128> Result = Path.rtrim(Char: '/'); |
51 | native(path&: Result, style: llvm::sys::path::Style::posix); |
52 | if (Result.empty() || Result.front() != '/') |
53 | Result.insert(I: Result.begin(), Elt: '/'); |
54 | return Result; |
55 | } |
56 | |
57 | constexpr const unsigned FileDistance::Unreachable; |
58 | const llvm::hash_code FileDistance::RootHash = |
59 | llvm::hash_value(S: llvm::StringRef("/" )); |
60 | |
61 | FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources, |
62 | const FileDistanceOptions &Opts) |
63 | : Opts(Opts) { |
64 | llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges; |
65 | // Compute the best distance following only up edges. |
66 | // Keep track of down edges, in case we can use them to improve on this. |
67 | for (const auto &S : Sources) { |
68 | auto Canonical = canonicalize(Path: S.getKey()); |
69 | dlog("Source {0} = {1}, MaxUp = {2}" , Canonical, S.second.Cost, |
70 | S.second.MaxUpTraversals); |
71 | // Walk up to ancestors of this source, assigning cost. |
72 | llvm::StringRef Rest = Canonical; |
73 | llvm::hash_code Hash = llvm::hash_value(S: Rest); |
74 | for (unsigned I = 0; !Rest.empty(); ++I) { |
75 | Rest = parent_path(path: Rest, style: llvm::sys::path::Style::posix); |
76 | auto NextHash = llvm::hash_value(S: Rest); |
77 | auto &Down = DownEdges[NextHash]; |
78 | if (!llvm::is_contained(Range&: Down, Element: Hash)) |
79 | Down.push_back(Elt: Hash); |
80 | // We can't just break after MaxUpTraversals, must still set DownEdges. |
81 | if (I > S.getValue().MaxUpTraversals) { |
82 | if (Cache.contains(Val: Hash)) |
83 | break; |
84 | } else { |
85 | unsigned Cost = S.getValue().Cost + I * Opts.UpCost; |
86 | auto R = Cache.try_emplace(Key: Hash, Args&: Cost); |
87 | if (!R.second) { |
88 | if (Cost < R.first->second) { |
89 | R.first->second = Cost; |
90 | } else { |
91 | // If we're not the best way to get to this path, stop assigning. |
92 | break; |
93 | } |
94 | } |
95 | } |
96 | Hash = NextHash; |
97 | } |
98 | } |
99 | // Now propagate scores parent -> child if that's an improvement. |
100 | // BFS ensures we propagate down chains (must visit parents before children). |
101 | std::queue<llvm::hash_code> Next; |
102 | for (auto Child : DownEdges.lookup(Val: llvm::hash_value(S: llvm::StringRef("" )))) |
103 | Next.push(x: Child); |
104 | while (!Next.empty()) { |
105 | auto Parent = Next.front(); |
106 | Next.pop(); |
107 | auto ParentCost = Cache.lookup(Val: Parent); |
108 | for (auto Child : DownEdges.lookup(Val: Parent)) { |
109 | if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) { |
110 | auto &ChildCost = |
111 | Cache.try_emplace(Key: Child, Args: Unreachable).first->getSecond(); |
112 | if (ParentCost + Opts.DownCost < ChildCost) |
113 | ChildCost = ParentCost + Opts.DownCost; |
114 | } |
115 | Next.push(x: Child); |
116 | } |
117 | } |
118 | } |
119 | |
120 | unsigned FileDistance::distance(llvm::StringRef Path) { |
121 | auto Canonical = canonicalize(Path); |
122 | unsigned Cost = Unreachable; |
123 | llvm::SmallVector<llvm::hash_code> Ancestors; |
124 | // Walk up ancestors until we find a path we know the distance for. |
125 | for (llvm::StringRef Rest = Canonical; !Rest.empty(); |
126 | Rest = parent_path(path: Rest, style: llvm::sys::path::Style::posix)) { |
127 | auto Hash = llvm::hash_value(S: Rest); |
128 | if (Hash == RootHash && !Ancestors.empty() && |
129 | !Opts.AllowDownTraversalFromRoot) { |
130 | Cost = Unreachable; |
131 | break; |
132 | } |
133 | auto It = Cache.find(Val: Hash); |
134 | if (It != Cache.end()) { |
135 | Cost = It->second; |
136 | break; |
137 | } |
138 | Ancestors.push_back(Elt: Hash); |
139 | } |
140 | // Now we know the costs for (known node, queried node]. |
141 | // Fill these in, walking down the directory tree. |
142 | for (llvm::hash_code Hash : llvm::reverse(C&: Ancestors)) { |
143 | if (Cost != Unreachable) |
144 | Cost += Opts.DownCost; |
145 | Cache.try_emplace(Key: Hash, Args&: Cost); |
146 | } |
147 | dlog("distance({0} = {1})" , Path, Cost); |
148 | return Cost; |
149 | } |
150 | |
151 | unsigned URIDistance::distance(llvm::StringRef URI) { |
152 | auto R = Cache.try_emplace(Key: llvm::hash_value(S: URI), Args: FileDistance::Unreachable); |
153 | if (!R.second) |
154 | return R.first->getSecond(); |
155 | if (auto U = clangd::URI::parse(Uri: URI)) { |
156 | dlog("distance({0} = {1})" , URI, U->body()); |
157 | R.first->second = forScheme(Scheme: U->scheme()).distance(Path: U->body()); |
158 | } else { |
159 | log(Fmt: "URIDistance::distance() of unparseable {0}: {1}" , Vals&: URI, Vals: U.takeError()); |
160 | } |
161 | return R.first->second; |
162 | } |
163 | |
164 | FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { |
165 | auto &Delegate = ByScheme[Scheme]; |
166 | if (!Delegate) { |
167 | llvm::StringMap<SourceParams> SchemeSources; |
168 | for (const auto &Source : Sources) { |
169 | if (auto U = clangd::URI::create(AbsolutePath: Source.getKey(), Scheme)) |
170 | SchemeSources.try_emplace(Key: U->body(), Args: Source.getValue()); |
171 | else |
172 | llvm::consumeError(Err: U.takeError()); |
173 | } |
174 | dlog("FileDistance for scheme {0}: {1}/{2} sources" , Scheme, |
175 | SchemeSources.size(), Sources.size()); |
176 | Delegate.reset(p: new FileDistance(std::move(SchemeSources), Opts)); |
177 | } |
178 | return *Delegate; |
179 | } |
180 | |
181 | static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) { |
182 | llvm::SmallVector<llvm::StringRef> Split; |
183 | Scope.split(A&: Split, Separator: "::" , /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
184 | return {"/" + llvm::join(R&: Split, Separator: "/" ), Split.size()}; |
185 | } |
186 | |
187 | static FileDistance |
188 | createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) { |
189 | FileDistanceOptions Opts; |
190 | Opts.UpCost = 2; |
191 | Opts.DownCost = 4; |
192 | Opts.AllowDownTraversalFromRoot = false; |
193 | |
194 | llvm::StringMap<SourceParams> Sources; |
195 | llvm::StringRef Preferred = |
196 | QueryScopes.empty() ? "" : QueryScopes.front().c_str(); |
197 | for (llvm::StringRef S : QueryScopes) { |
198 | SourceParams Param; |
199 | // Penalize the global scope even it's preferred, as all projects can define |
200 | // symbols in it, and there is pattern where using-namespace is used in |
201 | // place of enclosing namespaces (e.g. in implementation files). |
202 | if (S == Preferred) |
203 | Param.Cost = S == "" ? 4 : 0; |
204 | else if (Preferred.starts_with(Prefix: S) && !S.empty()) |
205 | continue; // just rely on up-traversals. |
206 | else |
207 | Param.Cost = S == "" ? 6 : 2; |
208 | auto Path = scopeToPath(Scope: S); |
209 | // The global namespace is not 'near' its children. |
210 | Param.MaxUpTraversals = std::max(a: Path.second - 1, b: 0); |
211 | Sources[Path.first] = std::move(Param); |
212 | } |
213 | return FileDistance(std::move(Sources), Opts); |
214 | } |
215 | |
216 | ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes) |
217 | : Distance(createScopeFileDistance(QueryScopes)) {} |
218 | |
219 | unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) { |
220 | return Distance.distance(Path: scopeToPath(Scope: SymbolScope).first); |
221 | } |
222 | |
223 | } // namespace clangd |
224 | } // namespace clang |
225 | |