1 | //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "URI.h" |
10 | #include "support/Logger.h" |
11 | #include "llvm/ADT/StringExtras.h" |
12 | #include "llvm/ADT/Twine.h" |
13 | #include "llvm/Support/Error.h" |
14 | #include "llvm/Support/Path.h" |
15 | #include <algorithm> |
16 | |
17 | LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry) |
18 | |
19 | namespace clang { |
20 | namespace clangd { |
21 | namespace { |
22 | |
23 | bool isWindowsPath(llvm::StringRef Path) { |
24 | return Path.size() > 1 && llvm::isAlpha(C: Path[0]) && Path[1] == ':'; |
25 | } |
26 | |
27 | bool isNetworkPath(llvm::StringRef Path) { |
28 | return Path.size() > 2 && Path[0] == Path[1] && |
29 | llvm::sys::path::is_separator(value: Path[0]); |
30 | } |
31 | |
32 | /// This manages file paths in the file system. All paths in the scheme |
33 | /// are absolute (with leading '/'). |
34 | /// Note that this scheme is hardcoded into the library and not registered in |
35 | /// registry. |
36 | class FileSystemScheme : public URIScheme { |
37 | public: |
38 | llvm::Expected<std::string> |
39 | getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body, |
40 | llvm::StringRef /*HintPath*/) const override { |
41 | if (!Body.starts_with(Prefix: "/" )) |
42 | return error(Fmt: "File scheme: expect body to be an absolute path starting " |
43 | "with '/': {0}" , |
44 | Vals&: Body); |
45 | llvm::SmallString<128> Path; |
46 | if (!Authority.empty()) { |
47 | // Windows UNC paths e.g. file://server/share => \\server\share |
48 | ("//" + Authority).toVector(Out&: Path); |
49 | } else if (isWindowsPath(Path: Body.substr(Start: 1))) { |
50 | // Windows paths e.g. file:///X:/path => X:\path |
51 | Body.consume_front(Prefix: "/" ); |
52 | } |
53 | Path.append(RHS: Body); |
54 | llvm::sys::path::native(path&: Path); |
55 | return std::string(Path); |
56 | } |
57 | |
58 | llvm::Expected<URI> |
59 | uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { |
60 | std::string Body; |
61 | llvm::StringRef Authority; |
62 | llvm::StringRef Root = llvm::sys::path::root_name(path: AbsolutePath); |
63 | if (isNetworkPath(Path: Root)) { |
64 | // Windows UNC paths e.g. \\server\share => file://server/share |
65 | Authority = Root.drop_front(N: 2); |
66 | AbsolutePath.consume_front(Prefix: Root); |
67 | } else if (isWindowsPath(Path: Root)) { |
68 | // Windows paths e.g. X:\path => file:///X:/path |
69 | Body = "/" ; |
70 | } |
71 | Body += llvm::sys::path::convert_to_slash(path: AbsolutePath); |
72 | return URI("file" , Authority, Body); |
73 | } |
74 | }; |
75 | |
76 | llvm::Expected<std::unique_ptr<URIScheme>> |
77 | findSchemeByName(llvm::StringRef Scheme) { |
78 | if (Scheme == "file" ) |
79 | return std::make_unique<FileSystemScheme>(); |
80 | |
81 | for (const auto &URIScheme : URISchemeRegistry::entries()) { |
82 | if (URIScheme.getName() != Scheme) |
83 | continue; |
84 | return URIScheme.instantiate(); |
85 | } |
86 | return error(Fmt: "Can't find scheme: {0}" , Vals&: Scheme); |
87 | } |
88 | |
89 | bool shouldEscape(unsigned char C) { |
90 | // Unreserved characters. |
91 | if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || |
92 | (C >= '0' && C <= '9')) |
93 | return false; |
94 | switch (C) { |
95 | case '-': |
96 | case '_': |
97 | case '.': |
98 | case '~': |
99 | case '/': // '/' is only reserved when parsing. |
100 | // ':' is only reserved for relative URI paths, which clangd doesn't produce. |
101 | case ':': |
102 | return false; |
103 | } |
104 | return true; |
105 | } |
106 | |
107 | /// Encodes a string according to percent-encoding. |
108 | /// - Unreserved characters are not escaped. |
109 | /// - Reserved characters always escaped with exceptions like '/'. |
110 | /// - All other characters are escaped. |
111 | void percentEncode(llvm::StringRef Content, std::string &Out) { |
112 | for (unsigned char C : Content) |
113 | if (shouldEscape(C)) { |
114 | Out.push_back(c: '%'); |
115 | Out.push_back(c: llvm::hexdigit(X: C / 16)); |
116 | Out.push_back(c: llvm::hexdigit(X: C % 16)); |
117 | } else { |
118 | Out.push_back(c: C); |
119 | } |
120 | } |
121 | |
122 | /// Decodes a string according to percent-encoding. |
123 | std::string percentDecode(llvm::StringRef Content) { |
124 | std::string Result; |
125 | for (auto I = Content.begin(), E = Content.end(); I != E; ++I) { |
126 | if (*I != '%') { |
127 | Result += *I; |
128 | continue; |
129 | } |
130 | if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(C: *(I + 1)) && |
131 | llvm::isHexDigit(C: *(I + 2))) { |
132 | Result.push_back(c: llvm::hexFromNibbles(MSB: *(I + 1), LSB: *(I + 2))); |
133 | I += 2; |
134 | } else |
135 | Result.push_back(c: *I); |
136 | } |
137 | return Result; |
138 | } |
139 | |
140 | bool isValidScheme(llvm::StringRef Scheme) { |
141 | if (Scheme.empty()) |
142 | return false; |
143 | if (!llvm::isAlpha(C: Scheme[0])) |
144 | return false; |
145 | return llvm::all_of(Range: llvm::drop_begin(RangeOrContainer&: Scheme), P: [](char C) { |
146 | return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-'; |
147 | }); |
148 | } |
149 | |
150 | } // namespace |
151 | |
152 | URI::URI(llvm::StringRef Scheme, llvm::StringRef Authority, |
153 | llvm::StringRef Body) |
154 | : Scheme(Scheme), Authority(Authority), Body(Body) { |
155 | assert(!Scheme.empty()); |
156 | assert((Authority.empty() || Body.starts_with("/" )) && |
157 | "URI body must start with '/' when authority is present." ); |
158 | } |
159 | |
160 | std::string URI::toString() const { |
161 | std::string Result; |
162 | percentEncode(Content: Scheme, Out&: Result); |
163 | Result.push_back(c: ':'); |
164 | if (Authority.empty() && Body.empty()) |
165 | return Result; |
166 | // If authority if empty, we only print body if it starts with "/"; otherwise, |
167 | // the URI is invalid. |
168 | if (!Authority.empty() || llvm::StringRef(Body).starts_with(Prefix: "/" )) { |
169 | Result.append(s: "//" ); |
170 | percentEncode(Content: Authority, Out&: Result); |
171 | } |
172 | percentEncode(Content: Body, Out&: Result); |
173 | return Result; |
174 | } |
175 | |
176 | llvm::Expected<URI> URI::parse(llvm::StringRef OrigUri) { |
177 | URI U; |
178 | llvm::StringRef Uri = OrigUri; |
179 | |
180 | auto Pos = Uri.find(C: ':'); |
181 | if (Pos == llvm::StringRef::npos) |
182 | return error(Fmt: "Scheme must be provided in URI: {0}" , Vals&: OrigUri); |
183 | auto SchemeStr = Uri.substr(Start: 0, N: Pos); |
184 | U.Scheme = percentDecode(Content: SchemeStr); |
185 | if (!isValidScheme(Scheme: U.Scheme)) |
186 | return error(Fmt: "Invalid scheme: {0} (decoded: {1})" , Vals&: SchemeStr, Vals&: U.Scheme); |
187 | Uri = Uri.substr(Start: Pos + 1); |
188 | if (Uri.consume_front(Prefix: "//" )) { |
189 | Pos = Uri.find(C: '/'); |
190 | U.Authority = percentDecode(Content: Uri.substr(Start: 0, N: Pos)); |
191 | Uri = Uri.substr(Start: Pos); |
192 | } |
193 | U.Body = percentDecode(Content: Uri); |
194 | return U; |
195 | } |
196 | |
197 | llvm::Expected<std::string> URI::resolve(llvm::StringRef FileURI, |
198 | llvm::StringRef HintPath) { |
199 | auto Uri = URI::parse(OrigUri: FileURI); |
200 | if (!Uri) |
201 | return Uri.takeError(); |
202 | auto Path = URI::resolve(U: *Uri, HintPath); |
203 | if (!Path) |
204 | return Path.takeError(); |
205 | return *Path; |
206 | } |
207 | |
208 | llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath, |
209 | llvm::StringRef Scheme) { |
210 | if (!llvm::sys::path::is_absolute(path: AbsolutePath)) |
211 | return error(Fmt: "Not a valid absolute path: {0}" , Vals&: AbsolutePath); |
212 | auto S = findSchemeByName(Scheme); |
213 | if (!S) |
214 | return S.takeError(); |
215 | return S->get()->uriFromAbsolutePath(AbsolutePath); |
216 | } |
217 | |
218 | URI URI::create(llvm::StringRef AbsolutePath) { |
219 | if (!llvm::sys::path::is_absolute(path: AbsolutePath)) |
220 | llvm_unreachable( |
221 | ("Not a valid absolute path: " + AbsolutePath).str().c_str()); |
222 | for (auto &Entry : URISchemeRegistry::entries()) { |
223 | auto URI = Entry.instantiate()->uriFromAbsolutePath(AbsolutePath); |
224 | // For some paths, conversion to different URI schemes is impossible. These |
225 | // should be just skipped. |
226 | if (!URI) { |
227 | // Ignore the error. |
228 | llvm::consumeError(Err: URI.takeError()); |
229 | continue; |
230 | } |
231 | return std::move(*URI); |
232 | } |
233 | // Fallback to file: scheme which should work for any paths. |
234 | return URI::createFile(AbsolutePath); |
235 | } |
236 | |
237 | URI URI::createFile(llvm::StringRef AbsolutePath) { |
238 | auto U = FileSystemScheme().uriFromAbsolutePath(AbsolutePath); |
239 | if (!U) |
240 | llvm_unreachable(llvm::toString(U.takeError()).c_str()); |
241 | return std::move(*U); |
242 | } |
243 | |
244 | llvm::Expected<std::string> URI::resolve(const URI &Uri, |
245 | llvm::StringRef HintPath) { |
246 | auto S = findSchemeByName(Scheme: Uri.Scheme); |
247 | if (!S) |
248 | return S.takeError(); |
249 | return S->get()->getAbsolutePath(Authority: Uri.Authority, Body: Uri.Body, HintPath); |
250 | } |
251 | |
252 | llvm::Expected<std::string> URI::resolvePath(llvm::StringRef AbsPath, |
253 | llvm::StringRef HintPath) { |
254 | if (!llvm::sys::path::is_absolute(path: AbsPath)) |
255 | llvm_unreachable(("Not a valid absolute path: " + AbsPath).str().c_str()); |
256 | for (auto &Entry : URISchemeRegistry::entries()) { |
257 | auto S = Entry.instantiate(); |
258 | auto U = S->uriFromAbsolutePath(AbsolutePath: AbsPath); |
259 | // For some paths, conversion to different URI schemes is impossible. These |
260 | // should be just skipped. |
261 | if (!U) { |
262 | // Ignore the error. |
263 | llvm::consumeError(Err: U.takeError()); |
264 | continue; |
265 | } |
266 | return S->getAbsolutePath(Authority: U->Authority, Body: U->Body, HintPath); |
267 | } |
268 | // Fallback to file: scheme which doesn't do any canonicalization. |
269 | return std::string(AbsPath); |
270 | } |
271 | |
272 | llvm::Expected<std::string> URI::includeSpelling(const URI &Uri) { |
273 | auto S = findSchemeByName(Scheme: Uri.Scheme); |
274 | if (!S) |
275 | return S.takeError(); |
276 | return S->get()->getIncludeSpelling(U: Uri); |
277 | } |
278 | |
279 | } // namespace clangd |
280 | } // namespace clang |
281 | |