1 | // Copyright 2013-2015 The rust-url developers. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | /*! |
10 | |
11 | rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/) |
12 | for the [Rust](http://rust-lang.org/) programming language. |
13 | |
14 | |
15 | # URL parsing and data structures |
16 | |
17 | First, URL parsing may fail for various reasons and therefore returns a `Result`. |
18 | |
19 | ``` |
20 | use url::{Url, ParseError}; |
21 | |
22 | assert!(Url::parse("http://[:::1]" ) == Err(ParseError::InvalidIpv6Address)) |
23 | ``` |
24 | |
25 | Let’s parse a valid URL and look at its components. |
26 | |
27 | ``` |
28 | use url::{Url, Host, Position}; |
29 | # use url::ParseError; |
30 | # fn run() -> Result<(), ParseError> { |
31 | let issue_list_url = Url::parse( |
32 | "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" |
33 | )?; |
34 | |
35 | |
36 | assert!(issue_list_url.scheme() == "https" ); |
37 | assert!(issue_list_url.username() == "" ); |
38 | assert!(issue_list_url.password() == None); |
39 | assert!(issue_list_url.host_str() == Some("github.com" )); |
40 | assert!(issue_list_url.host() == Some(Host::Domain("github.com" ))); |
41 | assert!(issue_list_url.port() == None); |
42 | assert!(issue_list_url.path() == "/rust-lang/rust/issues" ); |
43 | assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) == |
44 | Some(vec!["rust-lang" , "rust" , "issues" ])); |
45 | assert!(issue_list_url.query() == Some("labels=E-easy&state=open" )); |
46 | assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open" ); |
47 | assert!(issue_list_url.fragment() == None); |
48 | assert!(!issue_list_url.cannot_be_a_base()); |
49 | # Ok(()) |
50 | # } |
51 | # run().unwrap(); |
52 | ``` |
53 | |
54 | Some URLs are said to be *cannot-be-a-base*: |
55 | they don’t have a username, password, host, or port, |
56 | and their "path" is an arbitrary string rather than slash-separated segments: |
57 | |
58 | ``` |
59 | use url::Url; |
60 | # use url::ParseError; |
61 | |
62 | # fn run() -> Result<(), ParseError> { |
63 | let data_url = Url::parse("data:text/plain,Hello?World#" )?; |
64 | |
65 | assert!(data_url.cannot_be_a_base()); |
66 | assert!(data_url.scheme() == "data" ); |
67 | assert!(data_url.path() == "text/plain,Hello" ); |
68 | assert!(data_url.path_segments().is_none()); |
69 | assert!(data_url.query() == Some("World" )); |
70 | assert!(data_url.fragment() == Some("" )); |
71 | # Ok(()) |
72 | # } |
73 | # run().unwrap(); |
74 | ``` |
75 | |
76 | ## Default Features |
77 | |
78 | Versions `<= 2.5.2` of the crate have no default features. Versions `> 2.5.2` have the default feature 'std'. |
79 | If you are upgrading across this boundary and you have specified `default-features = false`, then |
80 | you will need to add the 'std' feature or the 'alloc' feature to your dependency. |
81 | The 'std' feature has the same behavior as the previous versions. The 'alloc' feature |
82 | provides no_std support. |
83 | |
84 | ## Serde |
85 | |
86 | Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`. |
87 | |
88 | # Base URL |
89 | |
90 | Many contexts allow URL *references* that can be relative to a *base URL*: |
91 | |
92 | ```html |
93 | <link rel="stylesheet" href="../main.css"> |
94 | ``` |
95 | |
96 | Since parsed URLs are absolute, giving a base is required for parsing relative URLs: |
97 | |
98 | ``` |
99 | use url::{Url, ParseError}; |
100 | |
101 | assert!(Url::parse("../main.css" ) == Err(ParseError::RelativeUrlWithoutBase)) |
102 | ``` |
103 | |
104 | Use the `join` method on an `Url` to use it as a base URL: |
105 | |
106 | ``` |
107 | use url::Url; |
108 | # use url::ParseError; |
109 | |
110 | # fn run() -> Result<(), ParseError> { |
111 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html" )?; |
112 | let css_url = this_document.join("../main.css" )?; |
113 | assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css" ); |
114 | # Ok(()) |
115 | # } |
116 | # run().unwrap(); |
117 | ``` |
118 | |
119 | # Feature: `serde` |
120 | |
121 | If you enable the `serde` feature, [`Url`](struct.Url.html) will implement |
122 | [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and |
123 | [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html). |
124 | See [serde documentation](https://serde.rs) for more information. |
125 | |
126 | ```toml |
127 | url = { version = "2", features = ["serde"] } |
128 | ``` |
129 | |
130 | # Feature: `debugger_visualizer` |
131 | |
132 | If you enable the `debugger_visualizer` feature, the `url` crate will include |
133 | a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects) |
134 | for [Visual Studio](https://www.visualstudio.com/) that allows you to view |
135 | [`Url`](struct.Url.html) objects in the debugger. |
136 | |
137 | This feature requires Rust 1.71 or later. |
138 | |
139 | ```toml |
140 | url = { version = "2", features = ["debugger_visualizer"] } |
141 | ``` |
142 | |
143 | */ |
144 | |
145 | #![no_std ] |
146 | #![doc (html_root_url = "https://docs.rs/url/2.5.4" )] |
147 | #![cfg_attr ( |
148 | feature = "debugger_visualizer" , |
149 | debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis" ) |
150 | )] |
151 | |
152 | pub use form_urlencoded; |
153 | |
154 | // For forwards compatibility |
155 | #[cfg (feature = "std" )] |
156 | extern crate std; |
157 | |
158 | #[macro_use ] |
159 | extern crate alloc; |
160 | |
161 | #[cfg (feature = "serde" )] |
162 | extern crate serde; |
163 | |
164 | use crate::host::HostInternal; |
165 | |
166 | use crate::net::IpAddr; |
167 | #[cfg (feature = "std" )] |
168 | #[cfg (any( |
169 | unix, |
170 | windows, |
171 | target_os = "redox" , |
172 | target_os = "wasi" , |
173 | target_os = "hermit" |
174 | ))] |
175 | use crate::net::{SocketAddr, ToSocketAddrs}; |
176 | use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO}; |
177 | use alloc::borrow::ToOwned; |
178 | use alloc::str; |
179 | use alloc::string::{String, ToString}; |
180 | use core::borrow::Borrow; |
181 | use core::convert::TryFrom; |
182 | use core::fmt::Write; |
183 | use core::ops::{Range, RangeFrom, RangeTo}; |
184 | use core::{cmp, fmt, hash, mem}; |
185 | use percent_encoding::utf8_percent_encode; |
186 | #[cfg (feature = "std" )] |
187 | #[cfg (any( |
188 | unix, |
189 | windows, |
190 | target_os = "redox" , |
191 | target_os = "wasi" , |
192 | target_os = "hermit" |
193 | ))] |
194 | use std::io; |
195 | #[cfg (feature = "std" )] |
196 | use std::path::{Path, PathBuf}; |
197 | |
198 | /// `std` version of `net` |
199 | #[cfg (feature = "std" )] |
200 | pub(crate) mod net { |
201 | pub use std::net::*; |
202 | } |
203 | /// `no_std` nightly version of `net` |
204 | #[cfg (not(feature = "std" ))] |
205 | pub(crate) mod net { |
206 | pub use core::net::*; |
207 | } |
208 | |
209 | pub use crate::host::Host; |
210 | pub use crate::origin::{OpaqueOrigin, Origin}; |
211 | pub use crate::parser::{ParseError, SyntaxViolation}; |
212 | pub use crate::path_segments::PathSegmentsMut; |
213 | pub use crate::slicing::Position; |
214 | pub use form_urlencoded::EncodingOverride; |
215 | |
216 | mod host; |
217 | mod origin; |
218 | mod parser; |
219 | mod path_segments; |
220 | mod slicing; |
221 | |
222 | #[doc (hidden)] |
223 | pub mod quirks; |
224 | |
225 | /// A parsed URL record. |
226 | #[derive (Clone)] |
227 | pub struct Url { |
228 | /// Syntax in pseudo-BNF: |
229 | /// |
230 | /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]? |
231 | /// non-hierarchical = non-hierarchical-path |
232 | /// non-hierarchical-path = /* Does not start with "/" */ |
233 | /// hierarchical = authority? hierarchical-path |
234 | /// authority = "//" userinfo? host [ ":" port ]? |
235 | /// userinfo = username [ ":" password ]? "@" |
236 | /// hierarchical-path = [ "/" path-segment ]+ |
237 | serialization: String, |
238 | |
239 | // Components |
240 | scheme_end: u32, // Before ':' |
241 | username_end: u32, // Before ':' (if a password is given) or '@' (if not) |
242 | host_start: u32, |
243 | host_end: u32, |
244 | host: HostInternal, |
245 | port: Option<u16>, |
246 | path_start: u32, // Before initial '/', if any |
247 | query_start: Option<u32>, // Before '?', unlike Position::QueryStart |
248 | fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart |
249 | } |
250 | |
251 | /// Full configuration for the URL parser. |
252 | #[derive (Copy, Clone)] |
253 | #[must_use ] |
254 | pub struct ParseOptions<'a> { |
255 | base_url: Option<&'a Url>, |
256 | encoding_override: EncodingOverride<'a>, |
257 | violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, |
258 | } |
259 | |
260 | impl<'a> ParseOptions<'a> { |
261 | /// Change the base URL |
262 | /// |
263 | /// See the notes of [`Url::join`] for more details about how this base is considered |
264 | /// when parsing. |
265 | pub fn base_url(mut self, new: Option<&'a Url>) -> Self { |
266 | self.base_url = new; |
267 | self |
268 | } |
269 | |
270 | /// Override the character encoding of query strings. |
271 | /// This is a legacy concept only relevant for HTML. |
272 | pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self { |
273 | self.encoding_override = new; |
274 | self |
275 | } |
276 | |
277 | /// Call the provided function or closure for a non-fatal `SyntaxViolation` |
278 | /// when it occurs during parsing. Note that since the provided function is |
279 | /// `Fn`, the caller might need to utilize _interior mutability_, such as with |
280 | /// a `RefCell`, to collect the violations. |
281 | /// |
282 | /// ## Example |
283 | /// ``` |
284 | /// use std::cell::RefCell; |
285 | /// use url::{Url, SyntaxViolation}; |
286 | /// # use url::ParseError; |
287 | /// # fn run() -> Result<(), url::ParseError> { |
288 | /// let violations = RefCell::new(Vec::new()); |
289 | /// let url = Url::options() |
290 | /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v))) |
291 | /// .parse("https:////example.com" )?; |
292 | /// assert_eq!(url.as_str(), "https://example.com/" ); |
293 | /// assert_eq!(violations.into_inner(), |
294 | /// vec!(SyntaxViolation::ExpectedDoubleSlash)); |
295 | /// # Ok(()) |
296 | /// # } |
297 | /// # run().unwrap(); |
298 | /// ``` |
299 | pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { |
300 | self.violation_fn = new; |
301 | self |
302 | } |
303 | |
304 | /// Parse an URL string with the configuration so far. |
305 | pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> { |
306 | Parser { |
307 | serialization: String::with_capacity(input.len()), |
308 | base_url: self.base_url, |
309 | query_encoding_override: self.encoding_override, |
310 | violation_fn: self.violation_fn, |
311 | context: Context::UrlParser, |
312 | } |
313 | .parse_url(input) |
314 | } |
315 | } |
316 | |
317 | impl Url { |
318 | /// Parse an absolute URL from a string. |
319 | /// |
320 | /// # Examples |
321 | /// |
322 | /// ```rust |
323 | /// use url::Url; |
324 | /// # use url::ParseError; |
325 | /// |
326 | /// # fn run() -> Result<(), ParseError> { |
327 | /// let url = Url::parse("https://example.net" )?; |
328 | /// # Ok(()) |
329 | /// # } |
330 | /// # run().unwrap(); |
331 | /// ``` |
332 | /// |
333 | /// # Errors |
334 | /// |
335 | /// If the function can not parse an absolute URL from the given string, |
336 | /// a [`ParseError`] variant will be returned. |
337 | /// |
338 | /// [`ParseError`]: enum.ParseError.html |
339 | #[inline ] |
340 | pub fn parse(input: &str) -> Result<Url, crate::ParseError> { |
341 | Url::options().parse(input) |
342 | } |
343 | |
344 | /// Parse an absolute URL from a string and add params to its query string. |
345 | /// |
346 | /// Existing params are not removed. |
347 | /// |
348 | /// # Examples |
349 | /// |
350 | /// ```rust |
351 | /// use url::Url; |
352 | /// # use url::ParseError; |
353 | /// |
354 | /// # fn run() -> Result<(), ParseError> { |
355 | /// let url = Url::parse_with_params("https://example.net?dont=clobberme" , |
356 | /// &[("lang" , "rust" ), ("browser" , "servo" )])?; |
357 | /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo" , url.as_str()); |
358 | /// # Ok(()) |
359 | /// # } |
360 | /// # run().unwrap(); |
361 | /// ``` |
362 | /// |
363 | /// # Errors |
364 | /// |
365 | /// If the function can not parse an absolute URL from the given string, |
366 | /// a [`ParseError`] variant will be returned. |
367 | /// |
368 | /// [`ParseError`]: enum.ParseError.html |
369 | #[inline ] |
370 | pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> |
371 | where |
372 | I: IntoIterator, |
373 | I::Item: Borrow<(K, V)>, |
374 | K: AsRef<str>, |
375 | V: AsRef<str>, |
376 | { |
377 | let mut url = Url::options().parse(input); |
378 | |
379 | if let Ok(ref mut url) = url { |
380 | url.query_pairs_mut().extend_pairs(iter); |
381 | } |
382 | |
383 | url |
384 | } |
385 | |
386 | /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path |
387 | fn strip_trailing_spaces_from_opaque_path(&mut self) { |
388 | if !self.cannot_be_a_base() { |
389 | return; |
390 | } |
391 | |
392 | if self.fragment_start.is_some() { |
393 | return; |
394 | } |
395 | |
396 | if self.query_start.is_some() { |
397 | return; |
398 | } |
399 | |
400 | let trailing_space_count = self |
401 | .serialization |
402 | .chars() |
403 | .rev() |
404 | .take_while(|c| *c == ' ' ) |
405 | .count(); |
406 | |
407 | let start = self.serialization.len() - trailing_space_count; |
408 | |
409 | self.serialization.truncate(start); |
410 | } |
411 | |
412 | /// Parse a string as an URL, with this URL as the base URL. |
413 | /// |
414 | /// The inverse of this is [`make_relative`]. |
415 | /// |
416 | /// # Notes |
417 | /// |
418 | /// - A trailing slash is significant. |
419 | /// Without it, the last path component is considered to be a “file” name |
420 | /// to be removed to get at the “directory” that is used as the base. |
421 | /// - A [scheme relative special URL](https://url.spec.whatwg.org/#scheme-relative-special-url-string) |
422 | /// as input replaces everything in the base URL after the scheme. |
423 | /// - An absolute URL (with a scheme) as input replaces the whole base URL (even the scheme). |
424 | /// |
425 | /// # Examples |
426 | /// |
427 | /// ```rust |
428 | /// use url::Url; |
429 | /// # use url::ParseError; |
430 | /// |
431 | /// // Base without a trailing slash |
432 | /// # fn run() -> Result<(), ParseError> { |
433 | /// let base = Url::parse("https://example.net/a/b.html" )?; |
434 | /// let url = base.join("c.png" )?; |
435 | /// assert_eq!(url.as_str(), "https://example.net/a/c.png" ); // Not /a/b.html/c.png |
436 | /// |
437 | /// // Base with a trailing slash |
438 | /// let base = Url::parse("https://example.net/a/b/" )?; |
439 | /// let url = base.join("c.png" )?; |
440 | /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png" ); |
441 | /// |
442 | /// // Input as scheme relative special URL |
443 | /// let base = Url::parse("https://alice.com/a" )?; |
444 | /// let url = base.join("//eve.com/b" )?; |
445 | /// assert_eq!(url.as_str(), "https://eve.com/b" ); |
446 | /// |
447 | /// // Input as absolute URL |
448 | /// let base = Url::parse("https://alice.com/a" )?; |
449 | /// let url = base.join("http://eve.com/b" )?; |
450 | /// assert_eq!(url.as_str(), "http://eve.com/b" ); // http instead of https |
451 | |
452 | /// # Ok(()) |
453 | /// # } |
454 | /// # run().unwrap(); |
455 | /// ``` |
456 | /// |
457 | /// # Errors |
458 | /// |
459 | /// If the function can not parse an URL from the given string |
460 | /// with this URL as the base URL, a [`ParseError`] variant will be returned. |
461 | /// |
462 | /// [`ParseError`]: enum.ParseError.html |
463 | /// [`make_relative`]: #method.make_relative |
464 | #[inline ] |
465 | pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> { |
466 | Url::options().base_url(Some(self)).parse(input) |
467 | } |
468 | |
469 | /// Creates a relative URL if possible, with this URL as the base URL. |
470 | /// |
471 | /// This is the inverse of [`join`]. |
472 | /// |
473 | /// # Examples |
474 | /// |
475 | /// ```rust |
476 | /// use url::Url; |
477 | /// # use url::ParseError; |
478 | /// |
479 | /// # fn run() -> Result<(), ParseError> { |
480 | /// let base = Url::parse("https://example.net/a/b.html" )?; |
481 | /// let url = Url::parse("https://example.net/a/c.png" )?; |
482 | /// let relative = base.make_relative(&url); |
483 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png" )); |
484 | /// |
485 | /// let base = Url::parse("https://example.net/a/b/" )?; |
486 | /// let url = Url::parse("https://example.net/a/b/c.png" )?; |
487 | /// let relative = base.make_relative(&url); |
488 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png" )); |
489 | /// |
490 | /// let base = Url::parse("https://example.net/a/b/" )?; |
491 | /// let url = Url::parse("https://example.net/a/d/c.png" )?; |
492 | /// let relative = base.make_relative(&url); |
493 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png" )); |
494 | /// |
495 | /// let base = Url::parse("https://example.net/a/b.html?c=d" )?; |
496 | /// let url = Url::parse("https://example.net/a/b.html?e=f" )?; |
497 | /// let relative = base.make_relative(&url); |
498 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f" )); |
499 | /// # Ok(()) |
500 | /// # } |
501 | /// # run().unwrap(); |
502 | /// ``` |
503 | /// |
504 | /// # Errors |
505 | /// |
506 | /// If this URL can't be a base for the given URL, `None` is returned. |
507 | /// This is for example the case if the scheme, host or port are not the same. |
508 | /// |
509 | /// [`join`]: #method.join |
510 | pub fn make_relative(&self, url: &Url) -> Option<String> { |
511 | if self.cannot_be_a_base() { |
512 | return None; |
513 | } |
514 | |
515 | // Scheme, host and port need to be the same |
516 | if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() { |
517 | return None; |
518 | } |
519 | |
520 | // We ignore username/password at this point |
521 | |
522 | // The path has to be transformed |
523 | let mut relative = String::new(); |
524 | |
525 | // Extract the filename of both URIs, these need to be handled separately |
526 | fn extract_path_filename(s: &str) -> (&str, &str) { |
527 | let last_slash_idx = s.rfind('/' ).unwrap_or(0); |
528 | let (path, filename) = s.split_at(last_slash_idx); |
529 | if filename.is_empty() { |
530 | (path, "" ) |
531 | } else { |
532 | (path, &filename[1..]) |
533 | } |
534 | } |
535 | |
536 | let (base_path, base_filename) = extract_path_filename(self.path()); |
537 | let (url_path, url_filename) = extract_path_filename(url.path()); |
538 | |
539 | let mut base_path = base_path.split('/' ).peekable(); |
540 | let mut url_path = url_path.split('/' ).peekable(); |
541 | |
542 | // Skip over the common prefix |
543 | while base_path.peek().is_some() && base_path.peek() == url_path.peek() { |
544 | base_path.next(); |
545 | url_path.next(); |
546 | } |
547 | |
548 | // Add `..` segments for the remainder of the base path |
549 | for base_path_segment in base_path { |
550 | // Skip empty last segments |
551 | if base_path_segment.is_empty() { |
552 | break; |
553 | } |
554 | |
555 | if !relative.is_empty() { |
556 | relative.push('/' ); |
557 | } |
558 | |
559 | relative.push_str(".." ); |
560 | } |
561 | |
562 | // Append the remainder of the other URI |
563 | for url_path_segment in url_path { |
564 | if !relative.is_empty() { |
565 | relative.push('/' ); |
566 | } |
567 | |
568 | relative.push_str(url_path_segment); |
569 | } |
570 | |
571 | // Add the filename if they are not the same |
572 | if !relative.is_empty() || base_filename != url_filename { |
573 | // If the URIs filename is empty this means that it was a directory |
574 | // so we'll have to append a '/'. |
575 | // |
576 | // Otherwise append it directly as the new filename. |
577 | if url_filename.is_empty() { |
578 | relative.push('/' ); |
579 | } else { |
580 | if !relative.is_empty() { |
581 | relative.push('/' ); |
582 | } |
583 | relative.push_str(url_filename); |
584 | } |
585 | } |
586 | |
587 | // Query and fragment are only taken from the other URI |
588 | if let Some(query) = url.query() { |
589 | relative.push('?' ); |
590 | relative.push_str(query); |
591 | } |
592 | |
593 | if let Some(fragment) = url.fragment() { |
594 | relative.push('#' ); |
595 | relative.push_str(fragment); |
596 | } |
597 | |
598 | Some(relative) |
599 | } |
600 | |
601 | /// Return a default `ParseOptions` that can fully configure the URL parser. |
602 | /// |
603 | /// # Examples |
604 | /// |
605 | /// Get default `ParseOptions`, then change base url |
606 | /// |
607 | /// ```rust |
608 | /// use url::Url; |
609 | /// # use url::ParseError; |
610 | /// # fn run() -> Result<(), ParseError> { |
611 | /// let options = Url::options(); |
612 | /// let api = Url::parse("https://api.example.com" )?; |
613 | /// let base_url = options.base_url(Some(&api)); |
614 | /// let version_url = base_url.parse("version.json" )?; |
615 | /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json" ); |
616 | /// # Ok(()) |
617 | /// # } |
618 | /// # run().unwrap(); |
619 | /// ``` |
620 | pub fn options<'a>() -> ParseOptions<'a> { |
621 | ParseOptions { |
622 | base_url: None, |
623 | encoding_override: None, |
624 | violation_fn: None, |
625 | } |
626 | } |
627 | |
628 | /// Return the serialization of this URL. |
629 | /// |
630 | /// This is fast since that serialization is already stored in the `Url` struct. |
631 | /// |
632 | /// # Examples |
633 | /// |
634 | /// ```rust |
635 | /// use url::Url; |
636 | /// # use url::ParseError; |
637 | /// |
638 | /// # fn run() -> Result<(), ParseError> { |
639 | /// let url_str = "https://example.net/" ; |
640 | /// let url = Url::parse(url_str)?; |
641 | /// assert_eq!(url.as_str(), url_str); |
642 | /// # Ok(()) |
643 | /// # } |
644 | /// # run().unwrap(); |
645 | /// ``` |
646 | #[inline ] |
647 | pub fn as_str(&self) -> &str { |
648 | &self.serialization |
649 | } |
650 | |
651 | /// Return the serialization of this URL. |
652 | /// |
653 | /// This consumes the `Url` and takes ownership of the `String` stored in it. |
654 | /// |
655 | /// # Examples |
656 | /// |
657 | /// ```rust |
658 | /// use url::Url; |
659 | /// # use url::ParseError; |
660 | /// |
661 | /// # fn run() -> Result<(), ParseError> { |
662 | /// let url_str = "https://example.net/" ; |
663 | /// let url = Url::parse(url_str)?; |
664 | /// assert_eq!(String::from(url), url_str); |
665 | /// # Ok(()) |
666 | /// # } |
667 | /// # run().unwrap(); |
668 | /// ``` |
669 | #[inline ] |
670 | #[deprecated (since = "2.3.0" , note = "use Into<String>" )] |
671 | pub fn into_string(self) -> String { |
672 | self.into() |
673 | } |
674 | |
675 | /// For internal testing, not part of the public API. |
676 | /// |
677 | /// Methods of the `Url` struct assume a number of invariants. |
678 | /// This checks each of these invariants and panic if one is not met. |
679 | /// This is for testing rust-url itself. |
680 | #[doc (hidden)] |
681 | pub fn check_invariants(&self) -> Result<(), String> { |
682 | macro_rules! assert { |
683 | ($x: expr) => { |
684 | if !$x { |
685 | return Err(format!( |
686 | "!( {} ) for URL {:?}" , |
687 | stringify!($x), |
688 | self.serialization |
689 | )); |
690 | } |
691 | }; |
692 | } |
693 | |
694 | macro_rules! assert_eq { |
695 | ($a: expr, $b: expr) => { |
696 | { |
697 | let a = $a; |
698 | let b = $b; |
699 | if a != b { |
700 | return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}" , |
701 | a, b, stringify!($a), stringify!($b), |
702 | self.serialization)) |
703 | } |
704 | } |
705 | } |
706 | } |
707 | |
708 | assert!(self.scheme_end >= 1); |
709 | assert!(self.byte_at(0).is_ascii_alphabetic()); |
710 | assert!(self |
711 | .slice(1..self.scheme_end) |
712 | .chars() |
713 | .all(|c| matches!(c, 'a' ..='z' | 'A' ..='Z' | '0' ..='9' | '+' | '-' | '.' ))); |
714 | assert_eq!(self.byte_at(self.scheme_end), b':' ); |
715 | |
716 | if self.slice(self.scheme_end + 1..).starts_with("//" ) { |
717 | // URL with authority |
718 | if self.username_end != self.serialization.len() as u32 { |
719 | match self.byte_at(self.username_end) { |
720 | b':' => { |
721 | assert!(self.host_start >= self.username_end + 2); |
722 | assert_eq!(self.byte_at(self.host_start - 1), b'@' ); |
723 | } |
724 | b'@' => assert!(self.host_start == self.username_end + 1), |
725 | _ => assert_eq!(self.username_end, self.scheme_end + 3), |
726 | } |
727 | } |
728 | assert!(self.host_start >= self.username_end); |
729 | assert!(self.host_end >= self.host_start); |
730 | let host_str = self.slice(self.host_start..self.host_end); |
731 | match self.host { |
732 | HostInternal::None => assert_eq!(host_str, "" ), |
733 | HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()), |
734 | HostInternal::Ipv6(address) => { |
735 | let h: Host<String> = Host::Ipv6(address); |
736 | assert_eq!(host_str, h.to_string()) |
737 | } |
738 | HostInternal::Domain => { |
739 | if SchemeType::from(self.scheme()).is_special() { |
740 | assert!(!host_str.is_empty()) |
741 | } |
742 | } |
743 | } |
744 | if self.path_start == self.host_end { |
745 | assert_eq!(self.port, None); |
746 | } else { |
747 | assert_eq!(self.byte_at(self.host_end), b':' ); |
748 | let port_str = self.slice(self.host_end + 1..self.path_start); |
749 | assert_eq!( |
750 | self.port, |
751 | Some(port_str.parse::<u16>().expect("Couldn't parse port?" )) |
752 | ); |
753 | } |
754 | assert!( |
755 | self.path_start as usize == self.serialization.len() |
756 | || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?' ) |
757 | ); |
758 | } else { |
759 | // Anarchist URL (no authority) |
760 | assert_eq!(self.username_end, self.scheme_end + 1); |
761 | assert_eq!(self.host_start, self.scheme_end + 1); |
762 | assert_eq!(self.host_end, self.scheme_end + 1); |
763 | assert_eq!(self.host, HostInternal::None); |
764 | assert_eq!(self.port, None); |
765 | if self.path().starts_with("//" ) { |
766 | // special case when first path segment is empty |
767 | assert_eq!(self.byte_at(self.scheme_end + 1), b'/' ); |
768 | assert_eq!(self.byte_at(self.scheme_end + 2), b'.' ); |
769 | assert_eq!(self.path_start, self.scheme_end + 3); |
770 | } else { |
771 | assert_eq!(self.path_start, self.scheme_end + 1); |
772 | } |
773 | } |
774 | if let Some(start) = self.query_start { |
775 | assert!(start >= self.path_start); |
776 | assert_eq!(self.byte_at(start), b'?' ); |
777 | } |
778 | if let Some(start) = self.fragment_start { |
779 | assert!(start >= self.path_start); |
780 | assert_eq!(self.byte_at(start), b'#' ); |
781 | } |
782 | if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { |
783 | assert!(fragment_start > query_start); |
784 | } |
785 | |
786 | let other = Url::parse(self.as_str()).expect("Failed to parse myself?" ); |
787 | assert_eq!(&self.serialization, &other.serialization); |
788 | assert_eq!(self.scheme_end, other.scheme_end); |
789 | assert_eq!(self.username_end, other.username_end); |
790 | assert_eq!(self.host_start, other.host_start); |
791 | assert_eq!(self.host_end, other.host_end); |
792 | assert!( |
793 | self.host == other.host || |
794 | // XXX No host round-trips to empty host. |
795 | // See https://github.com/whatwg/url/issues/79 |
796 | (self.host_str(), other.host_str()) == (None, Some("" )) |
797 | ); |
798 | assert_eq!(self.port, other.port); |
799 | assert_eq!(self.path_start, other.path_start); |
800 | assert_eq!(self.query_start, other.query_start); |
801 | assert_eq!(self.fragment_start, other.fragment_start); |
802 | Ok(()) |
803 | } |
804 | |
805 | /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>) |
806 | /// |
807 | /// Note: this returns an opaque origin for `file:` URLs, which causes |
808 | /// `url.origin() != url.origin()`. |
809 | /// |
810 | /// # Examples |
811 | /// |
812 | /// URL with `ftp` scheme: |
813 | /// |
814 | /// ```rust |
815 | /// use url::{Host, Origin, Url}; |
816 | /// # use url::ParseError; |
817 | /// |
818 | /// # fn run() -> Result<(), ParseError> { |
819 | /// let url = Url::parse("ftp://example.com/foo" )?; |
820 | /// assert_eq!(url.origin(), |
821 | /// Origin::Tuple("ftp" .into(), |
822 | /// Host::Domain("example.com" .into()), |
823 | /// 21)); |
824 | /// # Ok(()) |
825 | /// # } |
826 | /// # run().unwrap(); |
827 | /// ``` |
828 | /// |
829 | /// URL with `blob` scheme: |
830 | /// |
831 | /// ```rust |
832 | /// use url::{Host, Origin, Url}; |
833 | /// # use url::ParseError; |
834 | /// |
835 | /// # fn run() -> Result<(), ParseError> { |
836 | /// let url = Url::parse("blob:https://example.com/foo" )?; |
837 | /// assert_eq!(url.origin(), |
838 | /// Origin::Tuple("https" .into(), |
839 | /// Host::Domain("example.com" .into()), |
840 | /// 443)); |
841 | /// # Ok(()) |
842 | /// # } |
843 | /// # run().unwrap(); |
844 | /// ``` |
845 | /// |
846 | /// URL with `file` scheme: |
847 | /// |
848 | /// ```rust |
849 | /// use url::{Host, Origin, Url}; |
850 | /// # use url::ParseError; |
851 | /// |
852 | /// # fn run() -> Result<(), ParseError> { |
853 | /// let url = Url::parse("file:///tmp/foo" )?; |
854 | /// assert!(!url.origin().is_tuple()); |
855 | /// |
856 | /// let other_url = Url::parse("file:///tmp/foo" )?; |
857 | /// assert!(url.origin() != other_url.origin()); |
858 | /// # Ok(()) |
859 | /// # } |
860 | /// # run().unwrap(); |
861 | /// ``` |
862 | /// |
863 | /// URL with other scheme: |
864 | /// |
865 | /// ```rust |
866 | /// use url::{Host, Origin, Url}; |
867 | /// # use url::ParseError; |
868 | /// |
869 | /// # fn run() -> Result<(), ParseError> { |
870 | /// let url = Url::parse("foo:bar" )?; |
871 | /// assert!(!url.origin().is_tuple()); |
872 | /// # Ok(()) |
873 | /// # } |
874 | /// # run().unwrap(); |
875 | /// ``` |
876 | #[inline ] |
877 | pub fn origin(&self) -> Origin { |
878 | origin::url_origin(self) |
879 | } |
880 | |
881 | /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter. |
882 | /// |
883 | /// # Examples |
884 | /// |
885 | /// ``` |
886 | /// use url::Url; |
887 | /// # use url::ParseError; |
888 | /// |
889 | /// # fn run() -> Result<(), ParseError> { |
890 | /// let url = Url::parse("file:///tmp/foo" )?; |
891 | /// assert_eq!(url.scheme(), "file" ); |
892 | /// # Ok(()) |
893 | /// # } |
894 | /// # run().unwrap(); |
895 | /// ``` |
896 | #[inline ] |
897 | pub fn scheme(&self) -> &str { |
898 | self.slice(..self.scheme_end) |
899 | } |
900 | |
901 | /// Return whether the URL is special (has a special scheme) |
902 | /// |
903 | /// # Examples |
904 | /// |
905 | /// ``` |
906 | /// use url::Url; |
907 | /// # use url::ParseError; |
908 | /// |
909 | /// # fn run() -> Result<(), ParseError> { |
910 | /// assert!(Url::parse("http:///tmp/foo" )?.is_special()); |
911 | /// assert!(Url::parse("file:///tmp/foo" )?.is_special()); |
912 | /// assert!(!Url::parse("moz:///tmp/foo" )?.is_special()); |
913 | /// # Ok(()) |
914 | /// # } |
915 | /// # run().unwrap(); |
916 | /// ``` |
917 | pub fn is_special(&self) -> bool { |
918 | let scheme_type = SchemeType::from(self.scheme()); |
919 | scheme_type.is_special() |
920 | } |
921 | |
922 | /// Return whether the URL has an 'authority', |
923 | /// which can contain a username, password, host, and port number. |
924 | /// |
925 | /// URLs that do *not* are either path-only like `unix:/run/foo.socket` |
926 | /// or cannot-be-a-base like `data:text/plain,Stuff`. |
927 | /// |
928 | /// See also the `authority` method. |
929 | /// |
930 | /// # Examples |
931 | /// |
932 | /// ``` |
933 | /// use url::Url; |
934 | /// # use url::ParseError; |
935 | /// |
936 | /// # fn run() -> Result<(), ParseError> { |
937 | /// let url = Url::parse("ftp://rms@example.com" )?; |
938 | /// assert!(url.has_authority()); |
939 | /// |
940 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
941 | /// assert!(!url.has_authority()); |
942 | /// |
943 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
944 | /// assert!(!url.has_authority()); |
945 | /// # Ok(()) |
946 | /// # } |
947 | /// # run().unwrap(); |
948 | /// ``` |
949 | #[inline ] |
950 | pub fn has_authority(&self) -> bool { |
951 | debug_assert!(self.byte_at(self.scheme_end) == b':' ); |
952 | self.slice(self.scheme_end..).starts_with("://" ) |
953 | } |
954 | |
955 | /// Return the authority of this URL as an ASCII string. |
956 | /// |
957 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
958 | /// of a special URL, or percent encoded for non-special URLs. |
959 | /// IPv6 addresses are given between `[` and `]` brackets. |
960 | /// Ports are omitted if they match the well known port of a special URL. |
961 | /// |
962 | /// Username and password are percent-encoded. |
963 | /// |
964 | /// See also the `has_authority` method. |
965 | /// |
966 | /// # Examples |
967 | /// |
968 | /// ``` |
969 | /// use url::Url; |
970 | /// # use url::ParseError; |
971 | /// |
972 | /// # fn run() -> Result<(), ParseError> { |
973 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
974 | /// assert_eq!(url.authority(), "" ); |
975 | /// let url = Url::parse("file:///tmp/foo" )?; |
976 | /// assert_eq!(url.authority(), "" ); |
977 | /// let url = Url::parse("https://user:password@example.com/tmp/foo" )?; |
978 | /// assert_eq!(url.authority(), "user:password@example.com" ); |
979 | /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo" )?; |
980 | /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667" ); |
981 | /// let url = Url::parse("http://àlex.рф.example.com:80/foo" )?; |
982 | /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com" ); |
983 | /// # Ok(()) |
984 | /// # } |
985 | /// # run().unwrap(); |
986 | /// ``` |
987 | pub fn authority(&self) -> &str { |
988 | let scheme_separator_len = "://" .len() as u32; |
989 | if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len { |
990 | self.slice(self.scheme_end + scheme_separator_len..self.path_start) |
991 | } else { |
992 | "" |
993 | } |
994 | } |
995 | |
996 | /// Return whether this URL is a cannot-be-a-base URL, |
997 | /// meaning that parsing a relative URL string with this URL as the base will return an error. |
998 | /// |
999 | /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash, |
1000 | /// as is typically the case of `data:` and `mailto:` URLs. |
1001 | /// |
1002 | /// # Examples |
1003 | /// |
1004 | /// ``` |
1005 | /// use url::Url; |
1006 | /// # use url::ParseError; |
1007 | /// |
1008 | /// # fn run() -> Result<(), ParseError> { |
1009 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1010 | /// assert!(!url.cannot_be_a_base()); |
1011 | /// |
1012 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1013 | /// assert!(!url.cannot_be_a_base()); |
1014 | /// |
1015 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1016 | /// assert!(url.cannot_be_a_base()); |
1017 | /// # Ok(()) |
1018 | /// # } |
1019 | /// # run().unwrap(); |
1020 | /// ``` |
1021 | #[inline ] |
1022 | pub fn cannot_be_a_base(&self) -> bool { |
1023 | !self.slice(self.scheme_end + 1..).starts_with('/' ) |
1024 | } |
1025 | |
1026 | /// Return the username for this URL (typically the empty string) |
1027 | /// as a percent-encoded ASCII string. |
1028 | /// |
1029 | /// # Examples |
1030 | /// |
1031 | /// ``` |
1032 | /// use url::Url; |
1033 | /// # use url::ParseError; |
1034 | /// |
1035 | /// # fn run() -> Result<(), ParseError> { |
1036 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1037 | /// assert_eq!(url.username(), "rms" ); |
1038 | /// |
1039 | /// let url = Url::parse("ftp://:secret123@example.com" )?; |
1040 | /// assert_eq!(url.username(), "" ); |
1041 | /// |
1042 | /// let url = Url::parse("https://example.com" )?; |
1043 | /// assert_eq!(url.username(), "" ); |
1044 | /// # Ok(()) |
1045 | /// # } |
1046 | /// # run().unwrap(); |
1047 | /// ``` |
1048 | pub fn username(&self) -> &str { |
1049 | let scheme_separator_len = "://" .len() as u32; |
1050 | if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len { |
1051 | self.slice(self.scheme_end + scheme_separator_len..self.username_end) |
1052 | } else { |
1053 | "" |
1054 | } |
1055 | } |
1056 | |
1057 | /// Return the password for this URL, if any, as a percent-encoded ASCII string. |
1058 | /// |
1059 | /// # Examples |
1060 | /// |
1061 | /// ``` |
1062 | /// use url::Url; |
1063 | /// # use url::ParseError; |
1064 | /// |
1065 | /// # fn run() -> Result<(), ParseError> { |
1066 | /// let url = Url::parse("ftp://rms:secret123@example.com" )?; |
1067 | /// assert_eq!(url.password(), Some("secret123" )); |
1068 | /// |
1069 | /// let url = Url::parse("ftp://:secret123@example.com" )?; |
1070 | /// assert_eq!(url.password(), Some("secret123" )); |
1071 | /// |
1072 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1073 | /// assert_eq!(url.password(), None); |
1074 | /// |
1075 | /// let url = Url::parse("https://example.com" )?; |
1076 | /// assert_eq!(url.password(), None); |
1077 | /// # Ok(()) |
1078 | /// # } |
1079 | /// # run().unwrap(); |
1080 | /// ``` |
1081 | pub fn password(&self) -> Option<&str> { |
1082 | // This ':' is not the one marking a port number since a host can not be empty. |
1083 | // (Except for file: URLs, which do not have port numbers.) |
1084 | if self.has_authority() |
1085 | && self.username_end != self.serialization.len() as u32 |
1086 | && self.byte_at(self.username_end) == b':' |
1087 | { |
1088 | debug_assert!(self.byte_at(self.host_start - 1) == b'@' ); |
1089 | Some(self.slice(self.username_end + 1..self.host_start - 1)) |
1090 | } else { |
1091 | None |
1092 | } |
1093 | } |
1094 | |
1095 | /// Equivalent to `url.host().is_some()`. |
1096 | /// |
1097 | /// # Examples |
1098 | /// |
1099 | /// ``` |
1100 | /// use url::Url; |
1101 | /// # use url::ParseError; |
1102 | /// |
1103 | /// # fn run() -> Result<(), ParseError> { |
1104 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1105 | /// assert!(url.has_host()); |
1106 | /// |
1107 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1108 | /// assert!(!url.has_host()); |
1109 | /// |
1110 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1111 | /// assert!(!url.has_host()); |
1112 | /// # Ok(()) |
1113 | /// # } |
1114 | /// # run().unwrap(); |
1115 | /// ``` |
1116 | pub fn has_host(&self) -> bool { |
1117 | !matches!(self.host, HostInternal::None) |
1118 | } |
1119 | |
1120 | /// Return the string representation of the host (domain or IP address) for this URL, if any. |
1121 | /// |
1122 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
1123 | /// of a special URL, or percent encoded for non-special URLs. |
1124 | /// IPv6 addresses are given between `[` and `]` brackets. |
1125 | /// |
1126 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs |
1127 | /// don’t have a host. |
1128 | /// |
1129 | /// See also the `host` method. |
1130 | /// |
1131 | /// # Examples |
1132 | /// |
1133 | /// ``` |
1134 | /// use url::Url; |
1135 | /// # use url::ParseError; |
1136 | /// |
1137 | /// # fn run() -> Result<(), ParseError> { |
1138 | /// let url = Url::parse("https://127.0.0.1/index.html" )?; |
1139 | /// assert_eq!(url.host_str(), Some("127.0.0.1" )); |
1140 | /// |
1141 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1142 | /// assert_eq!(url.host_str(), Some("example.com" )); |
1143 | /// |
1144 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1145 | /// assert_eq!(url.host_str(), None); |
1146 | /// |
1147 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1148 | /// assert_eq!(url.host_str(), None); |
1149 | /// # Ok(()) |
1150 | /// # } |
1151 | /// # run().unwrap(); |
1152 | /// ``` |
1153 | pub fn host_str(&self) -> Option<&str> { |
1154 | if self.has_host() { |
1155 | Some(self.slice(self.host_start..self.host_end)) |
1156 | } else { |
1157 | None |
1158 | } |
1159 | } |
1160 | |
1161 | /// Return the parsed representation of the host for this URL. |
1162 | /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host |
1163 | /// of a special URL, or percent encoded for non-special URLs. |
1164 | /// |
1165 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs |
1166 | /// don’t have a host. |
1167 | /// |
1168 | /// See also the `host_str` method. |
1169 | /// |
1170 | /// # Examples |
1171 | /// |
1172 | /// ``` |
1173 | /// use url::Url; |
1174 | /// # use url::ParseError; |
1175 | /// |
1176 | /// # fn run() -> Result<(), ParseError> { |
1177 | /// let url = Url::parse("https://127.0.0.1/index.html" )?; |
1178 | /// assert!(url.host().is_some()); |
1179 | /// |
1180 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1181 | /// assert!(url.host().is_some()); |
1182 | /// |
1183 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1184 | /// assert!(url.host().is_none()); |
1185 | /// |
1186 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1187 | /// assert!(url.host().is_none()); |
1188 | /// # Ok(()) |
1189 | /// # } |
1190 | /// # run().unwrap(); |
1191 | /// ``` |
1192 | pub fn host(&self) -> Option<Host<&str>> { |
1193 | match self.host { |
1194 | HostInternal::None => None, |
1195 | HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))), |
1196 | HostInternal::Ipv4(address) => Some(Host::Ipv4(address)), |
1197 | HostInternal::Ipv6(address) => Some(Host::Ipv6(address)), |
1198 | } |
1199 | } |
1200 | |
1201 | /// If this URL has a host and it is a domain name (not an IP address), return it. |
1202 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
1203 | /// of a special URL, or percent encoded for non-special URLs. |
1204 | /// |
1205 | /// # Examples |
1206 | /// |
1207 | /// ``` |
1208 | /// use url::Url; |
1209 | /// # use url::ParseError; |
1210 | /// |
1211 | /// # fn run() -> Result<(), ParseError> { |
1212 | /// let url = Url::parse("https://127.0.0.1/" )?; |
1213 | /// assert_eq!(url.domain(), None); |
1214 | /// |
1215 | /// let url = Url::parse("mailto:rms@example.net" )?; |
1216 | /// assert_eq!(url.domain(), None); |
1217 | /// |
1218 | /// let url = Url::parse("https://example.com/" )?; |
1219 | /// assert_eq!(url.domain(), Some("example.com" )); |
1220 | /// # Ok(()) |
1221 | /// # } |
1222 | /// # run().unwrap(); |
1223 | /// ``` |
1224 | pub fn domain(&self) -> Option<&str> { |
1225 | match self.host { |
1226 | HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)), |
1227 | _ => None, |
1228 | } |
1229 | } |
1230 | |
1231 | /// Return the port number for this URL, if any. |
1232 | /// |
1233 | /// Note that default port numbers are never reflected by the serialization, |
1234 | /// use the `port_or_known_default()` method if you want a default port number returned. |
1235 | /// |
1236 | /// # Examples |
1237 | /// |
1238 | /// ``` |
1239 | /// use url::Url; |
1240 | /// # use url::ParseError; |
1241 | /// |
1242 | /// # fn run() -> Result<(), ParseError> { |
1243 | /// let url = Url::parse("https://example.com" )?; |
1244 | /// assert_eq!(url.port(), None); |
1245 | /// |
1246 | /// let url = Url::parse("https://example.com:443/" )?; |
1247 | /// assert_eq!(url.port(), None); |
1248 | /// |
1249 | /// let url = Url::parse("ssh://example.com:22" )?; |
1250 | /// assert_eq!(url.port(), Some(22)); |
1251 | /// # Ok(()) |
1252 | /// # } |
1253 | /// # run().unwrap(); |
1254 | /// ``` |
1255 | #[inline ] |
1256 | pub fn port(&self) -> Option<u16> { |
1257 | self.port |
1258 | } |
1259 | |
1260 | /// Return the port number for this URL, or the default port number if it is known. |
1261 | /// |
1262 | /// This method only knows the default port number |
1263 | /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes. |
1264 | /// |
1265 | /// For URLs in these schemes, this method always returns `Some(_)`. |
1266 | /// For other schemes, it is the same as `Url::port()`. |
1267 | /// |
1268 | /// # Examples |
1269 | /// |
1270 | /// ``` |
1271 | /// use url::Url; |
1272 | /// # use url::ParseError; |
1273 | /// |
1274 | /// # fn run() -> Result<(), ParseError> { |
1275 | /// let url = Url::parse("foo://example.com" )?; |
1276 | /// assert_eq!(url.port_or_known_default(), None); |
1277 | /// |
1278 | /// let url = Url::parse("foo://example.com:1456" )?; |
1279 | /// assert_eq!(url.port_or_known_default(), Some(1456)); |
1280 | /// |
1281 | /// let url = Url::parse("https://example.com" )?; |
1282 | /// assert_eq!(url.port_or_known_default(), Some(443)); |
1283 | /// # Ok(()) |
1284 | /// # } |
1285 | /// # run().unwrap(); |
1286 | /// ``` |
1287 | #[inline ] |
1288 | pub fn port_or_known_default(&self) -> Option<u16> { |
1289 | self.port.or_else(|| parser::default_port(self.scheme())) |
1290 | } |
1291 | |
1292 | /// Resolve a URL’s host and port number to `SocketAddr`. |
1293 | /// |
1294 | /// If the URL has the default port number of a scheme that is unknown to this library, |
1295 | /// `default_port_number` provides an opportunity to provide the actual port number. |
1296 | /// In non-example code this should be implemented either simply as `|| None`, |
1297 | /// or by matching on the URL’s `.scheme()`. |
1298 | /// |
1299 | /// If the host is a domain, it is resolved using the standard library’s DNS support. |
1300 | /// |
1301 | /// # Examples |
1302 | /// |
1303 | /// ```no_run |
1304 | /// let url = url::Url::parse("https://example.net/" ).unwrap(); |
1305 | /// let addrs = url.socket_addrs(|| None).unwrap(); |
1306 | /// std::net::TcpStream::connect(&*addrs) |
1307 | /// # ; |
1308 | /// ``` |
1309 | /// |
1310 | /// ``` |
1311 | /// /// With application-specific known default port numbers |
1312 | /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> { |
1313 | /// url.socket_addrs(|| match url.scheme() { |
1314 | /// "socks5" | "socks5h" => Some(1080), |
1315 | /// _ => None, |
1316 | /// }) |
1317 | /// } |
1318 | /// ``` |
1319 | #[cfg (feature = "std" )] |
1320 | #[cfg (any( |
1321 | unix, |
1322 | windows, |
1323 | target_os = "redox" , |
1324 | target_os = "wasi" , |
1325 | target_os = "hermit" |
1326 | ))] |
1327 | pub fn socket_addrs( |
1328 | &self, |
1329 | default_port_number: impl Fn() -> Option<u16>, |
1330 | ) -> io::Result<alloc::vec::Vec<SocketAddr>> { |
1331 | // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>` |
1332 | // causes borrowck issues because the return value borrows `default_port_number`: |
1333 | // |
1334 | // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters |
1335 | // |
1336 | // > This RFC proposes that *all* type parameters are considered in scope |
1337 | // > for `impl Trait` in return position |
1338 | |
1339 | fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> { |
1340 | opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message)) |
1341 | } |
1342 | |
1343 | let host = io_result(self.host(), "No host name in the URL" )?; |
1344 | let port = io_result( |
1345 | self.port_or_known_default().or_else(default_port_number), |
1346 | "No port number in the URL" , |
1347 | )?; |
1348 | Ok(match host { |
1349 | Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(), |
1350 | Host::Ipv4(ip) => vec![(ip, port).into()], |
1351 | Host::Ipv6(ip) => vec![(ip, port).into()], |
1352 | }) |
1353 | } |
1354 | |
1355 | /// Return the path for this URL, as a percent-encoded ASCII string. |
1356 | /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. |
1357 | /// For other URLs, this starts with a '/' slash |
1358 | /// and continues with slash-separated path segments. |
1359 | /// |
1360 | /// # Examples |
1361 | /// |
1362 | /// ```rust |
1363 | /// use url::{Url, ParseError}; |
1364 | /// |
1365 | /// # fn run() -> Result<(), ParseError> { |
1366 | /// let url = Url::parse("https://example.com/api/versions?page=2" )?; |
1367 | /// assert_eq!(url.path(), "/api/versions" ); |
1368 | /// |
1369 | /// let url = Url::parse("https://example.com" )?; |
1370 | /// assert_eq!(url.path(), "/" ); |
1371 | /// |
1372 | /// let url = Url::parse("https://example.com/countries/việt nam" )?; |
1373 | /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam" ); |
1374 | /// # Ok(()) |
1375 | /// # } |
1376 | /// # run().unwrap(); |
1377 | /// ``` |
1378 | pub fn path(&self) -> &str { |
1379 | match (self.query_start, self.fragment_start) { |
1380 | (None, None) => self.slice(self.path_start..), |
1381 | (Some(next_component_start), _) | (None, Some(next_component_start)) => { |
1382 | self.slice(self.path_start..next_component_start) |
1383 | } |
1384 | } |
1385 | } |
1386 | |
1387 | /// Unless this URL is cannot-be-a-base, |
1388 | /// return an iterator of '/' slash-separated path segments, |
1389 | /// each as a percent-encoded ASCII string. |
1390 | /// |
1391 | /// Return `None` for cannot-be-a-base URLs. |
1392 | /// |
1393 | /// When `Some` is returned, the iterator always contains at least one string |
1394 | /// (which may be empty). |
1395 | /// |
1396 | /// # Examples |
1397 | /// |
1398 | /// ``` |
1399 | /// use url::Url; |
1400 | /// |
1401 | /// # #[cfg (feature = "std" )] |
1402 | /// # use std::error::Error; |
1403 | /// # #[cfg (not(feature = "std" ))] |
1404 | /// # use core::error::Error; |
1405 | /// |
1406 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1407 | /// let url = Url::parse("https://example.com/foo/bar" )?; |
1408 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1409 | /// assert_eq!(path_segments.next(), Some("foo" )); |
1410 | /// assert_eq!(path_segments.next(), Some("bar" )); |
1411 | /// assert_eq!(path_segments.next(), None); |
1412 | /// |
1413 | /// let url = Url::parse("https://example.com" )?; |
1414 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1415 | /// assert_eq!(path_segments.next(), Some("" )); |
1416 | /// assert_eq!(path_segments.next(), None); |
1417 | /// |
1418 | /// let url = Url::parse("data:text/plain,HelloWorld" )?; |
1419 | /// assert!(url.path_segments().is_none()); |
1420 | /// |
1421 | /// let url = Url::parse("https://example.com/countries/việt nam" )?; |
1422 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1423 | /// assert_eq!(path_segments.next(), Some("countries" )); |
1424 | /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam" )); |
1425 | /// # Ok(()) |
1426 | /// # } |
1427 | /// # run().unwrap(); |
1428 | /// ``` |
1429 | pub fn path_segments(&self) -> Option<str::Split<'_, char>> { |
1430 | let path = self.path(); |
1431 | path.strip_prefix('/' ).map(|remainder| remainder.split('/' )) |
1432 | } |
1433 | |
1434 | /// Return this URL’s query string, if any, as a percent-encoded ASCII string. |
1435 | /// |
1436 | /// # Examples |
1437 | /// |
1438 | /// ```rust |
1439 | /// use url::Url; |
1440 | /// # use url::ParseError; |
1441 | /// |
1442 | /// fn run() -> Result<(), ParseError> { |
1443 | /// let url = Url::parse("https://example.com/products?page=2" )?; |
1444 | /// let query = url.query(); |
1445 | /// assert_eq!(query, Some("page=2" )); |
1446 | /// |
1447 | /// let url = Url::parse("https://example.com/products" )?; |
1448 | /// let query = url.query(); |
1449 | /// assert!(query.is_none()); |
1450 | /// |
1451 | /// let url = Url::parse("https://example.com/?country=español" )?; |
1452 | /// let query = url.query(); |
1453 | /// assert_eq!(query, Some("country=espa%C3%B1ol" )); |
1454 | /// # Ok(()) |
1455 | /// # } |
1456 | /// # run().unwrap(); |
1457 | /// ``` |
1458 | pub fn query(&self) -> Option<&str> { |
1459 | match (self.query_start, self.fragment_start) { |
1460 | (None, _) => None, |
1461 | (Some(query_start), None) => { |
1462 | debug_assert!(self.byte_at(query_start) == b'?' ); |
1463 | Some(self.slice(query_start + 1..)) |
1464 | } |
1465 | (Some(query_start), Some(fragment_start)) => { |
1466 | debug_assert!(self.byte_at(query_start) == b'?' ); |
1467 | Some(self.slice(query_start + 1..fragment_start)) |
1468 | } |
1469 | } |
1470 | } |
1471 | |
1472 | /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded` |
1473 | /// and return an iterator of (key, value) pairs. |
1474 | /// |
1475 | /// # Examples |
1476 | /// |
1477 | /// ```rust |
1478 | /// use std::borrow::Cow; |
1479 | /// |
1480 | /// use url::Url; |
1481 | /// # use url::ParseError; |
1482 | /// |
1483 | /// # fn run() -> Result<(), ParseError> { |
1484 | /// let url = Url::parse("https://example.com/products?page=2&sort=desc" )?; |
1485 | /// let mut pairs = url.query_pairs(); |
1486 | /// |
1487 | /// assert_eq!(pairs.count(), 2); |
1488 | /// |
1489 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page" ), Cow::Borrowed("2" )))); |
1490 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort" ), Cow::Borrowed("desc" )))); |
1491 | /// # Ok(()) |
1492 | /// # } |
1493 | /// # run().unwrap(); |
1494 | /// ``` |
1495 | |
1496 | #[inline ] |
1497 | pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> { |
1498 | form_urlencoded::parse(self.query().unwrap_or("" ).as_bytes()) |
1499 | } |
1500 | |
1501 | /// Return this URL’s fragment identifier, if any. |
1502 | /// |
1503 | /// A fragment is the part of the URL after the `#` symbol. |
1504 | /// The fragment is optional and, if present, contains a fragment identifier |
1505 | /// that identifies a secondary resource, such as a section heading |
1506 | /// of a document. |
1507 | /// |
1508 | /// In HTML, the fragment identifier is usually the id attribute of a an element |
1509 | /// that is scrolled to on load. Browsers typically will not send the fragment portion |
1510 | /// of a URL to the server. |
1511 | /// |
1512 | /// **Note:** the parser did *not* percent-encode this component, |
1513 | /// but the input may have been percent-encoded already. |
1514 | /// |
1515 | /// # Examples |
1516 | /// |
1517 | /// ```rust |
1518 | /// use url::Url; |
1519 | /// # use url::ParseError; |
1520 | /// |
1521 | /// # fn run() -> Result<(), ParseError> { |
1522 | /// let url = Url::parse("https://example.com/data.csv#row=4" )?; |
1523 | /// |
1524 | /// assert_eq!(url.fragment(), Some("row=4" )); |
1525 | /// |
1526 | /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2" )?; |
1527 | /// |
1528 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2" )); |
1529 | /// # Ok(()) |
1530 | /// # } |
1531 | /// # run().unwrap(); |
1532 | /// ``` |
1533 | pub fn fragment(&self) -> Option<&str> { |
1534 | self.fragment_start.map(|start| { |
1535 | debug_assert!(self.byte_at(start) == b'#' ); |
1536 | self.slice(start + 1..) |
1537 | }) |
1538 | } |
1539 | |
1540 | fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R { |
1541 | let mut parser = Parser::for_setter(mem::take(&mut self.serialization)); |
1542 | let result = f(&mut parser); |
1543 | self.serialization = parser.serialization; |
1544 | result |
1545 | } |
1546 | |
1547 | /// Change this URL’s fragment identifier. |
1548 | /// |
1549 | /// # Examples |
1550 | /// |
1551 | /// ```rust |
1552 | /// use url::Url; |
1553 | /// # use url::ParseError; |
1554 | /// |
1555 | /// # fn run() -> Result<(), ParseError> { |
1556 | /// let mut url = Url::parse("https://example.com/data.csv" )?; |
1557 | /// assert_eq!(url.as_str(), "https://example.com/data.csv" ); |
1558 | |
1559 | /// url.set_fragment(Some("cell=4,1-6,2" )); |
1560 | /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2" ); |
1561 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2" )); |
1562 | /// |
1563 | /// url.set_fragment(None); |
1564 | /// assert_eq!(url.as_str(), "https://example.com/data.csv" ); |
1565 | /// assert!(url.fragment().is_none()); |
1566 | /// # Ok(()) |
1567 | /// # } |
1568 | /// # run().unwrap(); |
1569 | /// ``` |
1570 | pub fn set_fragment(&mut self, fragment: Option<&str>) { |
1571 | // Remove any previous fragment |
1572 | if let Some(start) = self.fragment_start { |
1573 | debug_assert!(self.byte_at(start) == b'#' ); |
1574 | self.serialization.truncate(start as usize); |
1575 | } |
1576 | // Write the new one |
1577 | if let Some(input) = fragment { |
1578 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); |
1579 | self.serialization.push('#' ); |
1580 | self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input))) |
1581 | } else { |
1582 | self.fragment_start = None; |
1583 | self.strip_trailing_spaces_from_opaque_path(); |
1584 | } |
1585 | } |
1586 | |
1587 | fn take_fragment(&mut self) -> Option<String> { |
1588 | self.fragment_start.take().map(|start| { |
1589 | debug_assert!(self.byte_at(start) == b'#' ); |
1590 | let fragment = self.slice(start + 1..).to_owned(); |
1591 | self.serialization.truncate(start as usize); |
1592 | fragment |
1593 | }) |
1594 | } |
1595 | |
1596 | fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) { |
1597 | if let Some(ref fragment) = fragment { |
1598 | assert!(self.fragment_start.is_none()); |
1599 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); |
1600 | self.serialization.push('#' ); |
1601 | self.serialization.push_str(fragment); |
1602 | } |
1603 | } |
1604 | |
1605 | /// Change this URL’s query string. If `query` is `None`, this URL's |
1606 | /// query string will be cleared. |
1607 | /// |
1608 | /// # Examples |
1609 | /// |
1610 | /// ```rust |
1611 | /// use url::Url; |
1612 | /// # use url::ParseError; |
1613 | /// |
1614 | /// # fn run() -> Result<(), ParseError> { |
1615 | /// let mut url = Url::parse("https://example.com/products" )?; |
1616 | /// assert_eq!(url.as_str(), "https://example.com/products" ); |
1617 | /// |
1618 | /// url.set_query(Some("page=2" )); |
1619 | /// assert_eq!(url.as_str(), "https://example.com/products?page=2" ); |
1620 | /// assert_eq!(url.query(), Some("page=2" )); |
1621 | /// # Ok(()) |
1622 | /// # } |
1623 | /// # run().unwrap(); |
1624 | /// ``` |
1625 | pub fn set_query(&mut self, query: Option<&str>) { |
1626 | let fragment = self.take_fragment(); |
1627 | |
1628 | // Remove any previous query |
1629 | if let Some(start) = self.query_start.take() { |
1630 | debug_assert!(self.byte_at(start) == b'?' ); |
1631 | self.serialization.truncate(start as usize); |
1632 | } |
1633 | // Write the new query, if any |
1634 | if let Some(input) = query { |
1635 | self.query_start = Some(to_u32(self.serialization.len()).unwrap()); |
1636 | self.serialization.push('?' ); |
1637 | let scheme_type = SchemeType::from(self.scheme()); |
1638 | let scheme_end = self.scheme_end; |
1639 | self.mutate(|parser| { |
1640 | let vfn = parser.violation_fn; |
1641 | parser.parse_query( |
1642 | scheme_type, |
1643 | scheme_end, |
1644 | parser::Input::new_trim_tab_and_newlines(input, vfn), |
1645 | ) |
1646 | }); |
1647 | } else { |
1648 | self.query_start = None; |
1649 | if fragment.is_none() { |
1650 | self.strip_trailing_spaces_from_opaque_path(); |
1651 | } |
1652 | } |
1653 | |
1654 | self.restore_already_parsed_fragment(fragment); |
1655 | } |
1656 | |
1657 | /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs |
1658 | /// in `application/x-www-form-urlencoded` syntax. |
1659 | /// |
1660 | /// The return value has a method-chaining API: |
1661 | /// |
1662 | /// ```rust |
1663 | /// # use url::{Url, ParseError}; |
1664 | /// |
1665 | /// # fn run() -> Result<(), ParseError> { |
1666 | /// let mut url = Url::parse("https://example.net?lang=fr#nav" )?; |
1667 | /// assert_eq!(url.query(), Some("lang=fr" )); |
1668 | /// |
1669 | /// url.query_pairs_mut().append_pair("foo" , "bar" ); |
1670 | /// assert_eq!(url.query(), Some("lang=fr&foo=bar" )); |
1671 | /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav" ); |
1672 | /// |
1673 | /// url.query_pairs_mut() |
1674 | /// .clear() |
1675 | /// .append_pair("foo" , "bar & baz" ) |
1676 | /// .append_pair("saisons" , " \u{00C9}t \u{00E9}+hiver" ); |
1677 | /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver" )); |
1678 | /// assert_eq!(url.as_str(), |
1679 | /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav" ); |
1680 | /// # Ok(()) |
1681 | /// # } |
1682 | /// # run().unwrap(); |
1683 | /// ``` |
1684 | /// |
1685 | /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`, |
1686 | /// not `url.set_query(None)`. |
1687 | /// |
1688 | /// The state of `Url` is unspecified if this return value is leaked without being dropped. |
1689 | pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> { |
1690 | let fragment = self.take_fragment(); |
1691 | |
1692 | let query_start; |
1693 | if let Some(start) = self.query_start { |
1694 | debug_assert!(self.byte_at(start) == b'?' ); |
1695 | query_start = start as usize; |
1696 | } else { |
1697 | query_start = self.serialization.len(); |
1698 | self.query_start = Some(to_u32(query_start).unwrap()); |
1699 | self.serialization.push('?' ); |
1700 | } |
1701 | |
1702 | let query = UrlQuery { |
1703 | url: Some(self), |
1704 | fragment, |
1705 | }; |
1706 | form_urlencoded::Serializer::for_suffix(query, query_start + "?" .len()) |
1707 | } |
1708 | |
1709 | fn take_after_path(&mut self) -> String { |
1710 | match (self.query_start, self.fragment_start) { |
1711 | (Some(i), _) | (None, Some(i)) => { |
1712 | let after_path = self.slice(i..).to_owned(); |
1713 | self.serialization.truncate(i as usize); |
1714 | after_path |
1715 | } |
1716 | (None, None) => String::new(), |
1717 | } |
1718 | } |
1719 | |
1720 | /// Change this URL’s path. |
1721 | /// |
1722 | /// # Examples |
1723 | /// |
1724 | /// ```rust |
1725 | /// use url::Url; |
1726 | /// # use url::ParseError; |
1727 | /// |
1728 | /// # fn run() -> Result<(), ParseError> { |
1729 | /// let mut url = Url::parse("https://example.com" )?; |
1730 | /// url.set_path("api/comments" ); |
1731 | /// assert_eq!(url.as_str(), "https://example.com/api/comments" ); |
1732 | /// assert_eq!(url.path(), "/api/comments" ); |
1733 | /// |
1734 | /// let mut url = Url::parse("https://example.com/api" )?; |
1735 | /// url.set_path("data/report.csv" ); |
1736 | /// assert_eq!(url.as_str(), "https://example.com/data/report.csv" ); |
1737 | /// assert_eq!(url.path(), "/data/report.csv" ); |
1738 | /// |
1739 | /// // `set_path` percent-encodes the given string if it's not already percent-encoded. |
1740 | /// let mut url = Url::parse("https://example.com" )?; |
1741 | /// url.set_path("api/some comments" ); |
1742 | /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments" ); |
1743 | /// assert_eq!(url.path(), "/api/some%20comments" ); |
1744 | /// |
1745 | /// // `set_path` will not double percent-encode the string if it's already percent-encoded. |
1746 | /// let mut url = Url::parse("https://example.com" )?; |
1747 | /// url.set_path("api/some%20comments" ); |
1748 | /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments" ); |
1749 | /// assert_eq!(url.path(), "/api/some%20comments" ); |
1750 | /// |
1751 | /// # Ok(()) |
1752 | /// # } |
1753 | /// # run().unwrap(); |
1754 | /// ``` |
1755 | pub fn set_path(&mut self, mut path: &str) { |
1756 | let after_path = self.take_after_path(); |
1757 | let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); |
1758 | let cannot_be_a_base = self.cannot_be_a_base(); |
1759 | let scheme_type = SchemeType::from(self.scheme()); |
1760 | self.serialization.truncate(self.path_start as usize); |
1761 | self.mutate(|parser| { |
1762 | if cannot_be_a_base { |
1763 | if path.starts_with('/' ) { |
1764 | parser.serialization.push_str("%2F" ); |
1765 | path = &path[1..]; |
1766 | } |
1767 | parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path)); |
1768 | } else { |
1769 | let mut has_host = true; // FIXME |
1770 | parser.parse_path_start( |
1771 | scheme_type, |
1772 | &mut has_host, |
1773 | parser::Input::new_no_trim(path), |
1774 | ); |
1775 | } |
1776 | }); |
1777 | self.restore_after_path(old_after_path_pos, &after_path); |
1778 | } |
1779 | |
1780 | /// Return an object with methods to manipulate this URL’s path segments. |
1781 | /// |
1782 | /// Return `Err(())` if this URL is cannot-be-a-base. |
1783 | #[allow (clippy::result_unit_err)] |
1784 | pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> { |
1785 | if self.cannot_be_a_base() { |
1786 | Err(()) |
1787 | } else { |
1788 | Ok(path_segments::new(self)) |
1789 | } |
1790 | } |
1791 | |
1792 | fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) { |
1793 | let new_after_path_position = to_u32(self.serialization.len()).unwrap(); |
1794 | let adjust = |index: &mut u32| { |
1795 | *index -= old_after_path_position; |
1796 | *index += new_after_path_position; |
1797 | }; |
1798 | if let Some(ref mut index) = self.query_start { |
1799 | adjust(index) |
1800 | } |
1801 | if let Some(ref mut index) = self.fragment_start { |
1802 | adjust(index) |
1803 | } |
1804 | self.serialization.push_str(after_path) |
1805 | } |
1806 | |
1807 | /// Change this URL’s port number. |
1808 | /// |
1809 | /// Note that default port numbers are not reflected in the serialization. |
1810 | /// |
1811 | /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; |
1812 | /// do nothing and return `Err`. |
1813 | /// |
1814 | /// # Examples |
1815 | /// |
1816 | /// ``` |
1817 | /// use url::Url; |
1818 | /// |
1819 | /// # #[cfg (feature = "std" )] |
1820 | /// # use std::error::Error; |
1821 | /// # #[cfg (not(feature = "std" ))] |
1822 | /// # use core::error::Error; |
1823 | /// |
1824 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1825 | /// let mut url = Url::parse("ssh://example.net:2048/" )?; |
1826 | /// |
1827 | /// url.set_port(Some(4096)).map_err(|_| "cannot be base" )?; |
1828 | /// assert_eq!(url.as_str(), "ssh://example.net:4096/" ); |
1829 | /// |
1830 | /// url.set_port(None).map_err(|_| "cannot be base" )?; |
1831 | /// assert_eq!(url.as_str(), "ssh://example.net/" ); |
1832 | /// # Ok(()) |
1833 | /// # } |
1834 | /// # run().unwrap(); |
1835 | /// ``` |
1836 | /// |
1837 | /// Known default port numbers are not reflected: |
1838 | /// |
1839 | /// ```rust |
1840 | /// use url::Url; |
1841 | /// |
1842 | /// # #[cfg (feature = "std" )] |
1843 | /// # use std::error::Error; |
1844 | /// # #[cfg (not(feature = "std" ))] |
1845 | /// # use core::error::Error; |
1846 | /// |
1847 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1848 | /// let mut url = Url::parse("https://example.org/" )?; |
1849 | /// |
1850 | /// url.set_port(Some(443)).map_err(|_| "cannot be base" )?; |
1851 | /// assert!(url.port().is_none()); |
1852 | /// # Ok(()) |
1853 | /// # } |
1854 | /// # run().unwrap(); |
1855 | /// ``` |
1856 | /// |
1857 | /// Cannot set port for cannot-be-a-base URLs: |
1858 | /// |
1859 | /// ``` |
1860 | /// use url::Url; |
1861 | /// # use url::ParseError; |
1862 | /// |
1863 | /// # fn run() -> Result<(), ParseError> { |
1864 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
1865 | /// |
1866 | /// let result = url.set_port(Some(80)); |
1867 | /// assert!(result.is_err()); |
1868 | /// |
1869 | /// let result = url.set_port(None); |
1870 | /// assert!(result.is_err()); |
1871 | /// # Ok(()) |
1872 | /// # } |
1873 | /// # run().unwrap(); |
1874 | /// ``` |
1875 | #[allow (clippy::result_unit_err)] |
1876 | pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> { |
1877 | // has_host implies !cannot_be_a_base |
1878 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
1879 | return Err(()); |
1880 | } |
1881 | if port.is_some() && port == parser::default_port(self.scheme()) { |
1882 | port = None |
1883 | } |
1884 | self.set_port_internal(port); |
1885 | Ok(()) |
1886 | } |
1887 | |
1888 | fn set_port_internal(&mut self, port: Option<u16>) { |
1889 | match (self.port, port) { |
1890 | (None, None) => {} |
1891 | (Some(_), None) => { |
1892 | self.serialization |
1893 | .drain(self.host_end as usize..self.path_start as usize); |
1894 | let offset = self.path_start - self.host_end; |
1895 | self.path_start = self.host_end; |
1896 | if let Some(ref mut index) = self.query_start { |
1897 | *index -= offset |
1898 | } |
1899 | if let Some(ref mut index) = self.fragment_start { |
1900 | *index -= offset |
1901 | } |
1902 | } |
1903 | (Some(old), Some(new)) if old == new => {} |
1904 | (_, Some(new)) => { |
1905 | let path_and_after = self.slice(self.path_start..).to_owned(); |
1906 | self.serialization.truncate(self.host_end as usize); |
1907 | write!(&mut self.serialization, ": {}" , new).unwrap(); |
1908 | let old_path_start = self.path_start; |
1909 | let new_path_start = to_u32(self.serialization.len()).unwrap(); |
1910 | self.path_start = new_path_start; |
1911 | let adjust = |index: &mut u32| { |
1912 | *index -= old_path_start; |
1913 | *index += new_path_start; |
1914 | }; |
1915 | if let Some(ref mut index) = self.query_start { |
1916 | adjust(index) |
1917 | } |
1918 | if let Some(ref mut index) = self.fragment_start { |
1919 | adjust(index) |
1920 | } |
1921 | self.serialization.push_str(&path_and_after); |
1922 | } |
1923 | } |
1924 | self.port = port; |
1925 | } |
1926 | |
1927 | /// Change this URL’s host. |
1928 | /// |
1929 | /// Removing the host (calling this with `None`) |
1930 | /// will also remove any username, password, and port number. |
1931 | /// |
1932 | /// # Examples |
1933 | /// |
1934 | /// Change host: |
1935 | /// |
1936 | /// ``` |
1937 | /// use url::Url; |
1938 | /// # use url::ParseError; |
1939 | /// |
1940 | /// # fn run() -> Result<(), ParseError> { |
1941 | /// let mut url = Url::parse("https://example.net" )?; |
1942 | /// let result = url.set_host(Some("rust-lang.org" )); |
1943 | /// assert!(result.is_ok()); |
1944 | /// assert_eq!(url.as_str(), "https://rust-lang.org/" ); |
1945 | /// # Ok(()) |
1946 | /// # } |
1947 | /// # run().unwrap(); |
1948 | /// ``` |
1949 | /// |
1950 | /// Remove host: |
1951 | /// |
1952 | /// ``` |
1953 | /// use url::Url; |
1954 | /// # use url::ParseError; |
1955 | /// |
1956 | /// # fn run() -> Result<(), ParseError> { |
1957 | /// let mut url = Url::parse("foo://example.net" )?; |
1958 | /// let result = url.set_host(None); |
1959 | /// assert!(result.is_ok()); |
1960 | /// assert_eq!(url.as_str(), "foo:/" ); |
1961 | /// # Ok(()) |
1962 | /// # } |
1963 | /// # run().unwrap(); |
1964 | /// ``` |
1965 | /// |
1966 | /// Cannot remove host for 'special' schemes (e.g. `http`): |
1967 | /// |
1968 | /// ``` |
1969 | /// use url::Url; |
1970 | /// # use url::ParseError; |
1971 | /// |
1972 | /// # fn run() -> Result<(), ParseError> { |
1973 | /// let mut url = Url::parse("https://example.net" )?; |
1974 | /// let result = url.set_host(None); |
1975 | /// assert!(result.is_err()); |
1976 | /// assert_eq!(url.as_str(), "https://example.net/" ); |
1977 | /// # Ok(()) |
1978 | /// # } |
1979 | /// # run().unwrap(); |
1980 | /// ``` |
1981 | /// |
1982 | /// Cannot change or remove host for cannot-be-a-base URLs: |
1983 | /// |
1984 | /// ``` |
1985 | /// use url::Url; |
1986 | /// # use url::ParseError; |
1987 | /// |
1988 | /// # fn run() -> Result<(), ParseError> { |
1989 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
1990 | /// |
1991 | /// let result = url.set_host(Some("rust-lang.org" )); |
1992 | /// assert!(result.is_err()); |
1993 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
1994 | /// |
1995 | /// let result = url.set_host(None); |
1996 | /// assert!(result.is_err()); |
1997 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
1998 | /// # Ok(()) |
1999 | /// # } |
2000 | /// # run().unwrap(); |
2001 | /// ``` |
2002 | /// |
2003 | /// # Errors |
2004 | /// |
2005 | /// If this URL is cannot-be-a-base or there is an error parsing the given `host`, |
2006 | /// a [`ParseError`] variant will be returned. |
2007 | /// |
2008 | /// [`ParseError`]: enum.ParseError.html |
2009 | pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { |
2010 | if self.cannot_be_a_base() { |
2011 | return Err(ParseError::SetHostOnCannotBeABaseUrl); |
2012 | } |
2013 | |
2014 | let scheme_type = SchemeType::from(self.scheme()); |
2015 | |
2016 | if let Some(host) = host { |
2017 | if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() { |
2018 | return Err(ParseError::EmptyHost); |
2019 | } |
2020 | let mut host_substr = host; |
2021 | // Otherwise, if c is U+003A (:) and the [] flag is unset, then |
2022 | if !host.starts_with('[' ) || !host.ends_with(']' ) { |
2023 | match host.find(':' ) { |
2024 | Some(0) => { |
2025 | // If buffer is the empty string, validation error, return failure. |
2026 | return Err(ParseError::InvalidDomainCharacter); |
2027 | } |
2028 | // Let host be the result of host parsing buffer |
2029 | Some(colon_index) => { |
2030 | host_substr = &host[..colon_index]; |
2031 | } |
2032 | None => {} |
2033 | } |
2034 | } |
2035 | if SchemeType::from(self.scheme()).is_special() { |
2036 | self.set_host_internal(Host::parse(host_substr)?, None); |
2037 | } else { |
2038 | self.set_host_internal(Host::parse_opaque(host_substr)?, None); |
2039 | } |
2040 | } else if self.has_host() { |
2041 | if scheme_type.is_special() && !scheme_type.is_file() { |
2042 | return Err(ParseError::EmptyHost); |
2043 | } else if self.serialization.len() == self.path_start as usize { |
2044 | self.serialization.push('/' ); |
2045 | } |
2046 | debug_assert!(self.byte_at(self.scheme_end) == b':' ); |
2047 | debug_assert!(self.byte_at(self.path_start) == b'/' ); |
2048 | |
2049 | let new_path_start = if scheme_type.is_file() { |
2050 | self.scheme_end + 3 |
2051 | } else { |
2052 | self.scheme_end + 1 |
2053 | }; |
2054 | |
2055 | self.serialization |
2056 | .drain(new_path_start as usize..self.path_start as usize); |
2057 | let offset = self.path_start - new_path_start; |
2058 | self.path_start = new_path_start; |
2059 | self.username_end = new_path_start; |
2060 | self.host_start = new_path_start; |
2061 | self.host_end = new_path_start; |
2062 | self.port = None; |
2063 | if let Some(ref mut index) = self.query_start { |
2064 | *index -= offset |
2065 | } |
2066 | if let Some(ref mut index) = self.fragment_start { |
2067 | *index -= offset |
2068 | } |
2069 | } |
2070 | Ok(()) |
2071 | } |
2072 | |
2073 | /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. |
2074 | fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) { |
2075 | let old_suffix_pos = if opt_new_port.is_some() { |
2076 | self.path_start |
2077 | } else { |
2078 | self.host_end |
2079 | }; |
2080 | let suffix = self.slice(old_suffix_pos..).to_owned(); |
2081 | self.serialization.truncate(self.host_start as usize); |
2082 | if !self.has_authority() { |
2083 | debug_assert!(self.slice(self.scheme_end..self.host_start) == ":" ); |
2084 | debug_assert!(self.username_end == self.host_start); |
2085 | self.serialization.push('/' ); |
2086 | self.serialization.push('/' ); |
2087 | self.username_end += 2; |
2088 | self.host_start += 2; |
2089 | } |
2090 | write!(&mut self.serialization, " {}" , host).unwrap(); |
2091 | self.host_end = to_u32(self.serialization.len()).unwrap(); |
2092 | self.host = host.into(); |
2093 | |
2094 | if let Some(new_port) = opt_new_port { |
2095 | self.port = new_port; |
2096 | if let Some(port) = new_port { |
2097 | write!(&mut self.serialization, ": {}" , port).unwrap(); |
2098 | } |
2099 | } |
2100 | let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); |
2101 | self.serialization.push_str(&suffix); |
2102 | |
2103 | let adjust = |index: &mut u32| { |
2104 | *index -= old_suffix_pos; |
2105 | *index += new_suffix_pos; |
2106 | }; |
2107 | adjust(&mut self.path_start); |
2108 | if let Some(ref mut index) = self.query_start { |
2109 | adjust(index) |
2110 | } |
2111 | if let Some(ref mut index) = self.fragment_start { |
2112 | adjust(index) |
2113 | } |
2114 | } |
2115 | |
2116 | /// Change this URL’s host to the given IP address. |
2117 | /// |
2118 | /// If this URL is cannot-be-a-base, do nothing and return `Err`. |
2119 | /// |
2120 | /// Compared to `Url::set_host`, this skips the host parser. |
2121 | /// |
2122 | /// # Examples |
2123 | /// |
2124 | /// ```rust |
2125 | /// use url::{Url, ParseError}; |
2126 | /// |
2127 | /// # fn run() -> Result<(), ParseError> { |
2128 | /// let mut url = Url::parse("http://example.com" )?; |
2129 | /// url.set_ip_host("127.0.0.1" .parse().unwrap()); |
2130 | /// assert_eq!(url.host_str(), Some("127.0.0.1" )); |
2131 | /// assert_eq!(url.as_str(), "http://127.0.0.1/" ); |
2132 | /// # Ok(()) |
2133 | /// # } |
2134 | /// # run().unwrap(); |
2135 | /// ``` |
2136 | /// |
2137 | /// Cannot change URL's from mailto(cannot-be-base) to ip: |
2138 | /// |
2139 | /// ```rust |
2140 | /// use url::{Url, ParseError}; |
2141 | /// |
2142 | /// # fn run() -> Result<(), ParseError> { |
2143 | /// let mut url = Url::parse("mailto:rms@example.com" )?; |
2144 | /// let result = url.set_ip_host("127.0.0.1" .parse().unwrap()); |
2145 | /// |
2146 | /// assert_eq!(url.as_str(), "mailto:rms@example.com" ); |
2147 | /// assert!(result.is_err()); |
2148 | /// # Ok(()) |
2149 | /// # } |
2150 | /// # run().unwrap(); |
2151 | /// ``` |
2152 | /// |
2153 | #[allow (clippy::result_unit_err)] |
2154 | pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { |
2155 | if self.cannot_be_a_base() { |
2156 | return Err(()); |
2157 | } |
2158 | |
2159 | let address = match address { |
2160 | IpAddr::V4(address) => Host::Ipv4(address), |
2161 | IpAddr::V6(address) => Host::Ipv6(address), |
2162 | }; |
2163 | self.set_host_internal(address, None); |
2164 | Ok(()) |
2165 | } |
2166 | |
2167 | /// Change this URL’s password. |
2168 | /// |
2169 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. |
2170 | /// |
2171 | /// # Examples |
2172 | /// |
2173 | /// ```rust |
2174 | /// use url::{Url, ParseError}; |
2175 | /// |
2176 | /// # fn run() -> Result<(), ParseError> { |
2177 | /// let mut url = Url::parse("mailto:rmz@example.com" )?; |
2178 | /// let result = url.set_password(Some("secret_password" )); |
2179 | /// assert!(result.is_err()); |
2180 | /// |
2181 | /// let mut url = Url::parse("ftp://user1:secret1@example.com" )?; |
2182 | /// let result = url.set_password(Some("secret_password" )); |
2183 | /// assert_eq!(url.password(), Some("secret_password" )); |
2184 | /// |
2185 | /// let mut url = Url::parse("ftp://user2:@example.com" )?; |
2186 | /// let result = url.set_password(Some("secret2" )); |
2187 | /// assert!(result.is_ok()); |
2188 | /// assert_eq!(url.password(), Some("secret2" )); |
2189 | /// # Ok(()) |
2190 | /// # } |
2191 | /// # run().unwrap(); |
2192 | /// ``` |
2193 | #[allow (clippy::result_unit_err)] |
2194 | pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { |
2195 | // has_host implies !cannot_be_a_base |
2196 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
2197 | return Err(()); |
2198 | } |
2199 | let password = password.unwrap_or_default(); |
2200 | if !password.is_empty() { |
2201 | let host_and_after = self.slice(self.host_start..).to_owned(); |
2202 | self.serialization.truncate(self.username_end as usize); |
2203 | self.serialization.push(':' ); |
2204 | self.serialization |
2205 | .extend(utf8_percent_encode(password, USERINFO)); |
2206 | self.serialization.push('@' ); |
2207 | |
2208 | let old_host_start = self.host_start; |
2209 | let new_host_start = to_u32(self.serialization.len()).unwrap(); |
2210 | let adjust = |index: &mut u32| { |
2211 | *index -= old_host_start; |
2212 | *index += new_host_start; |
2213 | }; |
2214 | self.host_start = new_host_start; |
2215 | adjust(&mut self.host_end); |
2216 | adjust(&mut self.path_start); |
2217 | if let Some(ref mut index) = self.query_start { |
2218 | adjust(index) |
2219 | } |
2220 | if let Some(ref mut index) = self.fragment_start { |
2221 | adjust(index) |
2222 | } |
2223 | |
2224 | self.serialization.push_str(&host_and_after); |
2225 | } else if self.byte_at(self.username_end) == b':' { |
2226 | // If there is a password to remove |
2227 | let has_username_or_password = self.byte_at(self.host_start - 1) == b'@' ; |
2228 | debug_assert!(has_username_or_password); |
2229 | let username_start = self.scheme_end + 3; |
2230 | let empty_username = username_start == self.username_end; |
2231 | let start = self.username_end; // Remove the ':' |
2232 | let end = if empty_username { |
2233 | self.host_start // Remove the '@' as well |
2234 | } else { |
2235 | self.host_start - 1 // Keep the '@' to separate the username from the host |
2236 | }; |
2237 | self.serialization.drain(start as usize..end as usize); |
2238 | let offset = end - start; |
2239 | self.host_start -= offset; |
2240 | self.host_end -= offset; |
2241 | self.path_start -= offset; |
2242 | if let Some(ref mut index) = self.query_start { |
2243 | *index -= offset |
2244 | } |
2245 | if let Some(ref mut index) = self.fragment_start { |
2246 | *index -= offset |
2247 | } |
2248 | } |
2249 | Ok(()) |
2250 | } |
2251 | |
2252 | /// Change this URL’s username. |
2253 | /// |
2254 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. |
2255 | /// # Examples |
2256 | /// |
2257 | /// Cannot setup username from mailto(cannot-be-base) |
2258 | /// |
2259 | /// ```rust |
2260 | /// use url::{Url, ParseError}; |
2261 | /// |
2262 | /// # fn run() -> Result<(), ParseError> { |
2263 | /// let mut url = Url::parse("mailto:rmz@example.com" )?; |
2264 | /// let result = url.set_username("user1" ); |
2265 | /// assert_eq!(url.as_str(), "mailto:rmz@example.com" ); |
2266 | /// assert!(result.is_err()); |
2267 | /// # Ok(()) |
2268 | /// # } |
2269 | /// # run().unwrap(); |
2270 | /// ``` |
2271 | /// |
2272 | /// Setup username to user1 |
2273 | /// |
2274 | /// ```rust |
2275 | /// use url::{Url, ParseError}; |
2276 | /// |
2277 | /// # fn run() -> Result<(), ParseError> { |
2278 | /// let mut url = Url::parse("ftp://:secre1@example.com/" )?; |
2279 | /// let result = url.set_username("user1" ); |
2280 | /// assert!(result.is_ok()); |
2281 | /// assert_eq!(url.username(), "user1" ); |
2282 | /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/" ); |
2283 | /// # Ok(()) |
2284 | /// # } |
2285 | /// # run().unwrap(); |
2286 | /// ``` |
2287 | #[allow (clippy::result_unit_err)] |
2288 | pub fn set_username(&mut self, username: &str) -> Result<(), ()> { |
2289 | // has_host implies !cannot_be_a_base |
2290 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
2291 | return Err(()); |
2292 | } |
2293 | let username_start = self.scheme_end + 3; |
2294 | debug_assert!(self.slice(self.scheme_end..username_start) == "://" ); |
2295 | if self.slice(username_start..self.username_end) == username { |
2296 | return Ok(()); |
2297 | } |
2298 | let after_username = self.slice(self.username_end..).to_owned(); |
2299 | self.serialization.truncate(username_start as usize); |
2300 | self.serialization |
2301 | .extend(utf8_percent_encode(username, USERINFO)); |
2302 | |
2303 | let mut removed_bytes = self.username_end; |
2304 | self.username_end = to_u32(self.serialization.len()).unwrap(); |
2305 | let mut added_bytes = self.username_end; |
2306 | |
2307 | let new_username_is_empty = self.username_end == username_start; |
2308 | match (new_username_is_empty, after_username.chars().next()) { |
2309 | (true, Some('@' )) => { |
2310 | removed_bytes += 1; |
2311 | self.serialization.push_str(&after_username[1..]); |
2312 | } |
2313 | (false, Some('@' )) | (_, Some(':' )) | (true, _) => { |
2314 | self.serialization.push_str(&after_username); |
2315 | } |
2316 | (false, _) => { |
2317 | added_bytes += 1; |
2318 | self.serialization.push('@' ); |
2319 | self.serialization.push_str(&after_username); |
2320 | } |
2321 | } |
2322 | |
2323 | let adjust = |index: &mut u32| { |
2324 | *index -= removed_bytes; |
2325 | *index += added_bytes; |
2326 | }; |
2327 | adjust(&mut self.host_start); |
2328 | adjust(&mut self.host_end); |
2329 | adjust(&mut self.path_start); |
2330 | if let Some(ref mut index) = self.query_start { |
2331 | adjust(index) |
2332 | } |
2333 | if let Some(ref mut index) = self.fragment_start { |
2334 | adjust(index) |
2335 | } |
2336 | Ok(()) |
2337 | } |
2338 | |
2339 | /// Change this URL’s scheme. |
2340 | /// |
2341 | /// Do nothing and return `Err` under the following circumstances: |
2342 | /// |
2343 | /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+` |
2344 | /// * If this URL is cannot-be-a-base and the new scheme is one of |
2345 | /// `http`, `https`, `ws`, `wss` or `ftp` |
2346 | /// * If either the old or new scheme is `http`, `https`, `ws`, |
2347 | /// `wss` or `ftp` and the other is not one of these |
2348 | /// * If the new scheme is `file` and this URL includes credentials |
2349 | /// or has a non-null port |
2350 | /// * If this URL's scheme is `file` and its host is empty or null |
2351 | /// |
2352 | /// See also [the URL specification's section on legal scheme state |
2353 | /// overrides](https://url.spec.whatwg.org/#scheme-state). |
2354 | /// |
2355 | /// # Examples |
2356 | /// |
2357 | /// Change the URL’s scheme from `https` to `http`: |
2358 | /// |
2359 | /// ``` |
2360 | /// use url::Url; |
2361 | /// # use url::ParseError; |
2362 | /// |
2363 | /// # fn run() -> Result<(), ParseError> { |
2364 | /// let mut url = Url::parse("https://example.net" )?; |
2365 | /// let result = url.set_scheme("http" ); |
2366 | /// assert_eq!(url.as_str(), "http://example.net/" ); |
2367 | /// assert!(result.is_ok()); |
2368 | /// # Ok(()) |
2369 | /// # } |
2370 | /// # run().unwrap(); |
2371 | /// ``` |
2372 | /// Change the URL’s scheme from `foo` to `bar`: |
2373 | /// |
2374 | /// ``` |
2375 | /// use url::Url; |
2376 | /// # use url::ParseError; |
2377 | /// |
2378 | /// # fn run() -> Result<(), ParseError> { |
2379 | /// let mut url = Url::parse("foo://example.net" )?; |
2380 | /// let result = url.set_scheme("bar" ); |
2381 | /// assert_eq!(url.as_str(), "bar://example.net" ); |
2382 | /// assert!(result.is_ok()); |
2383 | /// # Ok(()) |
2384 | /// # } |
2385 | /// # run().unwrap(); |
2386 | /// ``` |
2387 | /// |
2388 | /// Cannot change URL’s scheme from `https` to `foõ`: |
2389 | /// |
2390 | /// ``` |
2391 | /// use url::Url; |
2392 | /// # use url::ParseError; |
2393 | /// |
2394 | /// # fn run() -> Result<(), ParseError> { |
2395 | /// let mut url = Url::parse("https://example.net" )?; |
2396 | /// let result = url.set_scheme("foõ" ); |
2397 | /// assert_eq!(url.as_str(), "https://example.net/" ); |
2398 | /// assert!(result.is_err()); |
2399 | /// # Ok(()) |
2400 | /// # } |
2401 | /// # run().unwrap(); |
2402 | /// ``` |
2403 | /// |
2404 | /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`: |
2405 | /// |
2406 | /// ``` |
2407 | /// use url::Url; |
2408 | /// # use url::ParseError; |
2409 | /// |
2410 | /// # fn run() -> Result<(), ParseError> { |
2411 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
2412 | /// let result = url.set_scheme("https" ); |
2413 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
2414 | /// assert!(result.is_err()); |
2415 | /// # Ok(()) |
2416 | /// # } |
2417 | /// # run().unwrap(); |
2418 | /// ``` |
2419 | /// Cannot change the URL’s scheme from `foo` to `https`: |
2420 | /// |
2421 | /// ``` |
2422 | /// use url::Url; |
2423 | /// # use url::ParseError; |
2424 | /// |
2425 | /// # fn run() -> Result<(), ParseError> { |
2426 | /// let mut url = Url::parse("foo://example.net" )?; |
2427 | /// let result = url.set_scheme("https" ); |
2428 | /// assert_eq!(url.as_str(), "foo://example.net" ); |
2429 | /// assert!(result.is_err()); |
2430 | /// # Ok(()) |
2431 | /// # } |
2432 | /// # run().unwrap(); |
2433 | /// ``` |
2434 | /// Cannot change the URL’s scheme from `http` to `foo`: |
2435 | /// |
2436 | /// ``` |
2437 | /// use url::Url; |
2438 | /// # use url::ParseError; |
2439 | /// |
2440 | /// # fn run() -> Result<(), ParseError> { |
2441 | /// let mut url = Url::parse("http://example.net" )?; |
2442 | /// let result = url.set_scheme("foo" ); |
2443 | /// assert_eq!(url.as_str(), "http://example.net/" ); |
2444 | /// assert!(result.is_err()); |
2445 | /// # Ok(()) |
2446 | /// # } |
2447 | /// # run().unwrap(); |
2448 | /// ``` |
2449 | #[allow (clippy::result_unit_err, clippy::suspicious_operation_groupings)] |
2450 | pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { |
2451 | let mut parser = Parser::for_setter(String::new()); |
2452 | let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?; |
2453 | let new_scheme_type = SchemeType::from(&parser.serialization); |
2454 | let old_scheme_type = SchemeType::from(self.scheme()); |
2455 | // If url’s scheme is a special scheme and buffer is not a special scheme, then return. |
2456 | if (new_scheme_type.is_special() && !old_scheme_type.is_special()) || |
2457 | // If url’s scheme is not a special scheme and buffer is a special scheme, then return. |
2458 | (!new_scheme_type.is_special() && old_scheme_type.is_special()) || |
2459 | // If url includes credentials or has a non-null port, and buffer is "file", then return. |
2460 | // If url’s scheme is "file" and its host is an empty host or null, then return. |
2461 | (new_scheme_type.is_file() && self.has_authority()) |
2462 | { |
2463 | return Err(()); |
2464 | } |
2465 | |
2466 | if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { |
2467 | return Err(()); |
2468 | } |
2469 | let old_scheme_end = self.scheme_end; |
2470 | let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); |
2471 | let adjust = |index: &mut u32| { |
2472 | *index -= old_scheme_end; |
2473 | *index += new_scheme_end; |
2474 | }; |
2475 | |
2476 | self.scheme_end = new_scheme_end; |
2477 | adjust(&mut self.username_end); |
2478 | adjust(&mut self.host_start); |
2479 | adjust(&mut self.host_end); |
2480 | adjust(&mut self.path_start); |
2481 | if let Some(ref mut index) = self.query_start { |
2482 | adjust(index) |
2483 | } |
2484 | if let Some(ref mut index) = self.fragment_start { |
2485 | adjust(index) |
2486 | } |
2487 | |
2488 | parser.serialization.push_str(self.slice(old_scheme_end..)); |
2489 | self.serialization = parser.serialization; |
2490 | |
2491 | // Update the port so it can be removed |
2492 | // If it is the scheme's default |
2493 | // we don't mind it silently failing |
2494 | // if there was no port in the first place |
2495 | let previous_port = self.port(); |
2496 | let _ = self.set_port(previous_port); |
2497 | |
2498 | Ok(()) |
2499 | } |
2500 | |
2501 | /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. |
2502 | /// |
2503 | /// This returns `Err` if the given path is not absolute or, |
2504 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). |
2505 | /// |
2506 | /// # Examples |
2507 | /// |
2508 | /// On Unix-like platforms: |
2509 | /// |
2510 | /// ``` |
2511 | /// # if cfg!(unix) { |
2512 | /// use url::Url; |
2513 | /// |
2514 | /// # fn run() -> Result<(), ()> { |
2515 | /// let url = Url::from_file_path("/tmp/foo.txt" )?; |
2516 | /// assert_eq!(url.as_str(), "file:///tmp/foo.txt" ); |
2517 | /// |
2518 | /// let url = Url::from_file_path("../foo.txt" ); |
2519 | /// assert!(url.is_err()); |
2520 | /// |
2521 | /// let url = Url::from_file_path("https://google.com/" ); |
2522 | /// assert!(url.is_err()); |
2523 | /// # Ok(()) |
2524 | /// # } |
2525 | /// # run().unwrap(); |
2526 | /// # } |
2527 | /// ``` |
2528 | /// |
2529 | /// This method is only available if the `std` Cargo feature is enabled. |
2530 | #[cfg (all( |
2531 | feature = "std" , |
2532 | any( |
2533 | unix, |
2534 | windows, |
2535 | target_os = "redox" , |
2536 | target_os = "wasi" , |
2537 | target_os = "hermit" |
2538 | ) |
2539 | ))] |
2540 | #[allow (clippy::result_unit_err)] |
2541 | pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Url, ()> { |
2542 | let mut serialization = "file://" .to_owned(); |
2543 | let host_start = serialization.len() as u32; |
2544 | let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?; |
2545 | Ok(Url { |
2546 | serialization, |
2547 | scheme_end: "file" .len() as u32, |
2548 | username_end: host_start, |
2549 | host_start, |
2550 | host_end, |
2551 | host, |
2552 | port: None, |
2553 | path_start: host_end, |
2554 | query_start: None, |
2555 | fragment_start: None, |
2556 | }) |
2557 | } |
2558 | |
2559 | /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme. |
2560 | /// |
2561 | /// This returns `Err` if the given path is not absolute or, |
2562 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). |
2563 | /// |
2564 | /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash |
2565 | /// so that the entire path is considered when using this URL as a base URL. |
2566 | /// |
2567 | /// For example: |
2568 | /// |
2569 | /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))` |
2570 | /// as the base URL is `file:///var/www/index.html` |
2571 | /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))` |
2572 | /// as the base URL is `file:///var/index.html`, which might not be what was intended. |
2573 | /// |
2574 | /// Note that `std::path` does not consider trailing slashes significant |
2575 | /// and usually does not include them (e.g. in `Path::parent()`). |
2576 | /// |
2577 | /// This method is only available if the `std` Cargo feature is enabled. |
2578 | #[cfg (all( |
2579 | feature = "std" , |
2580 | any( |
2581 | unix, |
2582 | windows, |
2583 | target_os = "redox" , |
2584 | target_os = "wasi" , |
2585 | target_os = "hermit" |
2586 | ) |
2587 | ))] |
2588 | #[allow (clippy::result_unit_err)] |
2589 | pub fn from_directory_path<P: AsRef<std::path::Path>>(path: P) -> Result<Url, ()> { |
2590 | let mut url = Url::from_file_path(path)?; |
2591 | if !url.serialization.ends_with('/' ) { |
2592 | url.serialization.push('/' ) |
2593 | } |
2594 | Ok(url) |
2595 | } |
2596 | |
2597 | /// Serialize with Serde using the internal representation of the `Url` struct. |
2598 | /// |
2599 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking |
2600 | /// for speed, compared to the `Deserialize` trait impl. |
2601 | /// |
2602 | /// This method is only available if the `serde` Cargo feature is enabled. |
2603 | #[cfg (feature = "serde" )] |
2604 | #[deny (unused)] |
2605 | pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
2606 | where |
2607 | S: serde::Serializer, |
2608 | { |
2609 | use serde::Serialize; |
2610 | // Destructuring first lets us ensure that adding or removing fields forces this method |
2611 | // to be updated |
2612 | let Url { |
2613 | ref serialization, |
2614 | ref scheme_end, |
2615 | ref username_end, |
2616 | ref host_start, |
2617 | ref host_end, |
2618 | ref host, |
2619 | ref port, |
2620 | ref path_start, |
2621 | ref query_start, |
2622 | ref fragment_start, |
2623 | } = *self; |
2624 | ( |
2625 | serialization, |
2626 | scheme_end, |
2627 | username_end, |
2628 | host_start, |
2629 | host_end, |
2630 | host, |
2631 | port, |
2632 | path_start, |
2633 | query_start, |
2634 | fragment_start, |
2635 | ) |
2636 | .serialize(serializer) |
2637 | } |
2638 | |
2639 | /// Serialize with Serde using the internal representation of the `Url` struct. |
2640 | /// |
2641 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking |
2642 | /// for speed, compared to the `Deserialize` trait impl. |
2643 | /// |
2644 | /// This method is only available if the `serde` Cargo feature is enabled. |
2645 | #[cfg (feature = "serde" )] |
2646 | #[deny (unused)] |
2647 | pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> |
2648 | where |
2649 | D: serde::Deserializer<'de>, |
2650 | { |
2651 | use serde::de::{Deserialize, Error}; |
2652 | let ( |
2653 | serialization, |
2654 | scheme_end, |
2655 | username_end, |
2656 | host_start, |
2657 | host_end, |
2658 | host, |
2659 | port, |
2660 | path_start, |
2661 | query_start, |
2662 | fragment_start, |
2663 | ) = Deserialize::deserialize(deserializer)?; |
2664 | let url = Url { |
2665 | serialization, |
2666 | scheme_end, |
2667 | username_end, |
2668 | host_start, |
2669 | host_end, |
2670 | host, |
2671 | port, |
2672 | path_start, |
2673 | query_start, |
2674 | fragment_start, |
2675 | }; |
2676 | if cfg!(debug_assertions) { |
2677 | url.check_invariants() |
2678 | .map_err(|reason| Error::custom(reason))? |
2679 | } |
2680 | Ok(url) |
2681 | } |
2682 | |
2683 | /// Assuming the URL is in the `file` scheme or similar, |
2684 | /// convert its path to an absolute `std::path::Path`. |
2685 | /// |
2686 | /// **Note:** This does not actually check the URL’s `scheme`, |
2687 | /// and may give nonsensical results for other schemes. |
2688 | /// It is the user’s responsibility to check the URL’s scheme before calling this. |
2689 | /// |
2690 | /// ``` |
2691 | /// # use url::Url; |
2692 | /// # let url = Url::parse("file:///etc/passwd" ).unwrap(); |
2693 | /// let path = url.to_file_path(); |
2694 | /// ``` |
2695 | /// |
2696 | /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where |
2697 | /// `file:` URLs may have a non-local host), |
2698 | /// or if `Path::new_opt()` returns `None`. |
2699 | /// (That is, if the percent-decoded path contains a NUL byte or, |
2700 | /// for a Windows path, is not UTF-8.) |
2701 | /// |
2702 | /// This method is only available if the `std` Cargo feature is enabled. |
2703 | #[inline ] |
2704 | #[cfg (all( |
2705 | feature = "std" , |
2706 | any( |
2707 | unix, |
2708 | windows, |
2709 | target_os = "redox" , |
2710 | target_os = "wasi" , |
2711 | target_os = "hermit" |
2712 | ) |
2713 | ))] |
2714 | #[allow (clippy::result_unit_err)] |
2715 | pub fn to_file_path(&self) -> Result<PathBuf, ()> { |
2716 | if let Some(segments) = self.path_segments() { |
2717 | let host = match self.host() { |
2718 | None | Some(Host::Domain("localhost" )) => None, |
2719 | Some(_) if cfg!(windows) && self.scheme() == "file" => { |
2720 | Some(&self.serialization[self.host_start as usize..self.host_end as usize]) |
2721 | } |
2722 | _ => return Err(()), |
2723 | }; |
2724 | |
2725 | return file_url_segments_to_pathbuf(host, segments); |
2726 | } |
2727 | Err(()) |
2728 | } |
2729 | |
2730 | // Private helper methods: |
2731 | |
2732 | #[inline ] |
2733 | fn slice<R>(&self, range: R) -> &str |
2734 | where |
2735 | R: RangeArg, |
2736 | { |
2737 | range.slice_of(&self.serialization) |
2738 | } |
2739 | |
2740 | #[inline ] |
2741 | fn byte_at(&self, i: u32) -> u8 { |
2742 | self.serialization.as_bytes()[i as usize] |
2743 | } |
2744 | } |
2745 | |
2746 | /// Parse a string as an URL, without a base URL or encoding override. |
2747 | impl str::FromStr for Url { |
2748 | type Err = ParseError; |
2749 | |
2750 | #[inline ] |
2751 | fn from_str(input: &str) -> Result<Url, crate::ParseError> { |
2752 | Url::parse(input) |
2753 | } |
2754 | } |
2755 | |
2756 | impl<'a> TryFrom<&'a str> for Url { |
2757 | type Error = ParseError; |
2758 | |
2759 | fn try_from(s: &'a str) -> Result<Self, Self::Error> { |
2760 | Url::parse(input:s) |
2761 | } |
2762 | } |
2763 | |
2764 | /// Display the serialization of this URL. |
2765 | impl fmt::Display for Url { |
2766 | #[inline ] |
2767 | fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { |
2768 | fmt::Display::fmt(&self.serialization, f:formatter) |
2769 | } |
2770 | } |
2771 | |
2772 | /// String conversion. |
2773 | impl From<Url> for String { |
2774 | fn from(value: Url) -> String { |
2775 | value.serialization |
2776 | } |
2777 | } |
2778 | |
2779 | /// Debug the serialization of this URL. |
2780 | impl fmt::Debug for Url { |
2781 | #[inline ] |
2782 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
2783 | formatter&mut DebugStruct<'_, '_> |
2784 | .debug_struct("Url" ) |
2785 | .field("scheme" , &self.scheme()) |
2786 | .field("cannot_be_a_base" , &self.cannot_be_a_base()) |
2787 | .field("username" , &self.username()) |
2788 | .field("password" , &self.password()) |
2789 | .field("host" , &self.host()) |
2790 | .field("port" , &self.port()) |
2791 | .field("path" , &self.path()) |
2792 | .field("query" , &self.query()) |
2793 | .field(name:"fragment" , &self.fragment()) |
2794 | .finish() |
2795 | } |
2796 | } |
2797 | |
2798 | /// URLs compare like their serialization. |
2799 | impl Eq for Url {} |
2800 | |
2801 | /// URLs compare like their serialization. |
2802 | impl PartialEq for Url { |
2803 | #[inline ] |
2804 | fn eq(&self, other: &Self) -> bool { |
2805 | self.serialization == other.serialization |
2806 | } |
2807 | } |
2808 | |
2809 | /// URLs compare like their serialization. |
2810 | impl Ord for Url { |
2811 | #[inline ] |
2812 | fn cmp(&self, other: &Self) -> cmp::Ordering { |
2813 | self.serialization.cmp(&other.serialization) |
2814 | } |
2815 | } |
2816 | |
2817 | /// URLs compare like their serialization. |
2818 | impl PartialOrd for Url { |
2819 | #[inline ] |
2820 | fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { |
2821 | Some(self.cmp(other)) |
2822 | } |
2823 | } |
2824 | |
2825 | /// URLs hash like their serialization. |
2826 | impl hash::Hash for Url { |
2827 | #[inline ] |
2828 | fn hash<H>(&self, state: &mut H) |
2829 | where |
2830 | H: hash::Hasher, |
2831 | { |
2832 | hash::Hash::hash(&self.serialization, state) |
2833 | } |
2834 | } |
2835 | |
2836 | /// Return the serialization of this URL. |
2837 | impl AsRef<str> for Url { |
2838 | #[inline ] |
2839 | fn as_ref(&self) -> &str { |
2840 | &self.serialization |
2841 | } |
2842 | } |
2843 | |
2844 | trait RangeArg { |
2845 | fn slice_of<'a>(&self, s: &'a str) -> &'a str; |
2846 | } |
2847 | |
2848 | impl RangeArg for Range<u32> { |
2849 | #[inline ] |
2850 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2851 | &s[self.start as usize..self.end as usize] |
2852 | } |
2853 | } |
2854 | |
2855 | impl RangeArg for RangeFrom<u32> { |
2856 | #[inline ] |
2857 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2858 | &s[self.start as usize..] |
2859 | } |
2860 | } |
2861 | |
2862 | impl RangeArg for RangeTo<u32> { |
2863 | #[inline ] |
2864 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2865 | &s[..self.end as usize] |
2866 | } |
2867 | } |
2868 | |
2869 | /// Serializes this URL into a `serde` stream. |
2870 | /// |
2871 | /// This implementation is only available if the `serde` Cargo feature is enabled. |
2872 | #[cfg (feature = "serde" )] |
2873 | impl serde::Serialize for Url { |
2874 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
2875 | where |
2876 | S: serde::Serializer, |
2877 | { |
2878 | serializer.serialize_str(self.as_str()) |
2879 | } |
2880 | } |
2881 | |
2882 | /// Deserializes this URL from a `serde` stream. |
2883 | /// |
2884 | /// This implementation is only available if the `serde` Cargo feature is enabled. |
2885 | #[cfg (feature = "serde" )] |
2886 | impl<'de> serde::Deserialize<'de> for Url { |
2887 | fn deserialize<D>(deserializer: D) -> Result<Url, D::Error> |
2888 | where |
2889 | D: serde::Deserializer<'de>, |
2890 | { |
2891 | use serde::de::{Error, Visitor}; |
2892 | |
2893 | struct UrlVisitor; |
2894 | |
2895 | impl<'de> Visitor<'de> for UrlVisitor { |
2896 | type Value = Url; |
2897 | |
2898 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
2899 | formatter.write_str("a string representing an URL" ) |
2900 | } |
2901 | |
2902 | fn visit_str<E>(self, s: &str) -> Result<Self::Value, E> |
2903 | where |
2904 | E: Error, |
2905 | { |
2906 | Url::parse(s).map_err(|err| Error::custom(format!(" {}: {:?}" , err, s))) |
2907 | } |
2908 | } |
2909 | |
2910 | deserializer.deserialize_str(UrlVisitor) |
2911 | } |
2912 | } |
2913 | |
2914 | #[cfg (all( |
2915 | feature = "std" , |
2916 | any(unix, target_os = "redox" , target_os = "wasi" , target_os = "hermit" ) |
2917 | ))] |
2918 | fn path_to_file_url_segments( |
2919 | path: &Path, |
2920 | serialization: &mut String, |
2921 | ) -> Result<(u32, HostInternal), ()> { |
2922 | use parser::SPECIAL_PATH_SEGMENT; |
2923 | use percent_encoding::percent_encode; |
2924 | #[cfg (target_os = "hermit" )] |
2925 | use std::os::hermit::ffi::OsStrExt; |
2926 | #[cfg (any(unix, target_os = "redox" ))] |
2927 | use std::os::unix::prelude::OsStrExt; |
2928 | if !path.is_absolute() { |
2929 | return Err(()); |
2930 | } |
2931 | let host_end = to_u32(serialization.len()).unwrap(); |
2932 | let mut empty = true; |
2933 | // skip the root component |
2934 | for component in path.components().skip(1) { |
2935 | empty = false; |
2936 | serialization.push('/' ); |
2937 | #[cfg (not(target_os = "wasi" ))] |
2938 | serialization.extend(percent_encode( |
2939 | component.as_os_str().as_bytes(), |
2940 | SPECIAL_PATH_SEGMENT, |
2941 | )); |
2942 | #[cfg (target_os = "wasi" )] |
2943 | serialization.extend(percent_encode( |
2944 | component.as_os_str().to_string_lossy().as_bytes(), |
2945 | SPECIAL_PATH_SEGMENT, |
2946 | )); |
2947 | } |
2948 | if empty { |
2949 | // An URL’s path must not be empty. |
2950 | serialization.push('/' ); |
2951 | } |
2952 | Ok((host_end, HostInternal::None)) |
2953 | } |
2954 | |
2955 | #[cfg (all(feature = "std" , windows))] |
2956 | fn path_to_file_url_segments( |
2957 | path: &Path, |
2958 | serialization: &mut String, |
2959 | ) -> Result<(u32, HostInternal), ()> { |
2960 | path_to_file_url_segments_windows(path, serialization) |
2961 | } |
2962 | |
2963 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 |
2964 | #[cfg (feature = "std" )] |
2965 | #[cfg_attr (not(windows), allow(dead_code))] |
2966 | fn path_to_file_url_segments_windows( |
2967 | path: &Path, |
2968 | serialization: &mut String, |
2969 | ) -> Result<(u32, HostInternal), ()> { |
2970 | use crate::parser::PATH_SEGMENT; |
2971 | use percent_encoding::percent_encode; |
2972 | use std::path::{Component, Prefix}; |
2973 | if !path.is_absolute() { |
2974 | return Err(()); |
2975 | } |
2976 | let mut components = path.components(); |
2977 | |
2978 | let host_start = serialization.len() + 1; |
2979 | let host_end; |
2980 | let host_internal; |
2981 | |
2982 | match components.next() { |
2983 | Some(Component::Prefix(ref p)) => match p.kind() { |
2984 | Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { |
2985 | host_end = to_u32(serialization.len()).unwrap(); |
2986 | host_internal = HostInternal::None; |
2987 | serialization.push('/' ); |
2988 | serialization.push(letter as char); |
2989 | serialization.push(':' ); |
2990 | } |
2991 | Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { |
2992 | let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; |
2993 | write!(serialization, " {}" , host).unwrap(); |
2994 | host_end = to_u32(serialization.len()).unwrap(); |
2995 | host_internal = host.into(); |
2996 | serialization.push('/' ); |
2997 | let share = share.to_str().ok_or(())?; |
2998 | serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT)); |
2999 | } |
3000 | _ => return Err(()), |
3001 | }, |
3002 | _ => return Err(()), |
3003 | } |
3004 | |
3005 | let mut path_only_has_prefix = true; |
3006 | for component in components { |
3007 | if component == Component::RootDir { |
3008 | continue; |
3009 | } |
3010 | |
3011 | path_only_has_prefix = false; |
3012 | // FIXME: somehow work with non-unicode? |
3013 | let component = component.as_os_str().to_str().ok_or(())?; |
3014 | |
3015 | serialization.push('/' ); |
3016 | serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); |
3017 | } |
3018 | |
3019 | // A windows drive letter must end with a slash. |
3020 | if serialization.len() > host_start |
3021 | && parser::is_windows_drive_letter(&serialization[host_start..]) |
3022 | && path_only_has_prefix |
3023 | { |
3024 | serialization.push('/' ); |
3025 | } |
3026 | |
3027 | Ok((host_end, host_internal)) |
3028 | } |
3029 | |
3030 | #[cfg (all( |
3031 | feature = "std" , |
3032 | any(unix, target_os = "redox" , target_os = "wasi" , target_os = "hermit" ) |
3033 | ))] |
3034 | fn file_url_segments_to_pathbuf( |
3035 | host: Option<&str>, |
3036 | segments: str::Split<'_, char>, |
3037 | ) -> Result<PathBuf, ()> { |
3038 | use alloc::vec::Vec; |
3039 | use percent_encoding::percent_decode; |
3040 | #[cfg (not(target_os = "wasi" ))] |
3041 | use std::ffi::OsStr; |
3042 | #[cfg (target_os = "hermit" )] |
3043 | use std::os::hermit::ffi::OsStrExt; |
3044 | #[cfg (any(unix, target_os = "redox" ))] |
3045 | use std::os::unix::prelude::OsStrExt; |
3046 | use std::path::PathBuf; |
3047 | |
3048 | if host.is_some() { |
3049 | return Err(()); |
3050 | } |
3051 | |
3052 | let mut bytes = if cfg!(target_os = "redox" ) { |
3053 | b"file:" .to_vec() |
3054 | } else { |
3055 | Vec::new() |
3056 | }; |
3057 | |
3058 | for segment in segments { |
3059 | bytes.push(b'/' ); |
3060 | bytes.extend(percent_decode(segment.as_bytes())); |
3061 | } |
3062 | |
3063 | // A windows drive letter must end with a slash. |
3064 | if bytes.len() > 2 |
3065 | && bytes[bytes.len() - 2].is_ascii_alphabetic() |
3066 | && matches!(bytes[bytes.len() - 1], b':' | b'|' ) |
3067 | { |
3068 | bytes.push(b'/' ); |
3069 | } |
3070 | |
3071 | #[cfg (not(target_os = "wasi" ))] |
3072 | let path = PathBuf::from(OsStr::from_bytes(&bytes)); |
3073 | #[cfg (target_os = "wasi" )] |
3074 | let path = String::from_utf8(bytes) |
3075 | .map(|path| PathBuf::from(path)) |
3076 | .map_err(|_| ())?; |
3077 | |
3078 | debug_assert!( |
3079 | path.is_absolute(), |
3080 | "to_file_path() failed to produce an absolute Path" |
3081 | ); |
3082 | |
3083 | Ok(path) |
3084 | } |
3085 | |
3086 | #[cfg (all(feature = "std" , windows))] |
3087 | fn file_url_segments_to_pathbuf( |
3088 | host: Option<&str>, |
3089 | segments: str::Split<char>, |
3090 | ) -> Result<PathBuf, ()> { |
3091 | file_url_segments_to_pathbuf_windows(host, segments) |
3092 | } |
3093 | |
3094 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 |
3095 | #[cfg (feature = "std" )] |
3096 | #[cfg_attr (not(windows), allow(dead_code))] |
3097 | fn file_url_segments_to_pathbuf_windows( |
3098 | host: Option<&str>, |
3099 | mut segments: str::Split<'_, char>, |
3100 | ) -> Result<PathBuf, ()> { |
3101 | use percent_encoding::percent_decode; |
3102 | let mut string = if let Some(host) = host { |
3103 | r"\\" .to_owned() + host |
3104 | } else { |
3105 | let first = segments.next().ok_or(())?; |
3106 | |
3107 | match first.len() { |
3108 | 2 => { |
3109 | if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { |
3110 | return Err(()); |
3111 | } |
3112 | |
3113 | first.to_owned() |
3114 | } |
3115 | |
3116 | 4 => { |
3117 | if !first.starts_with(parser::ascii_alpha) { |
3118 | return Err(()); |
3119 | } |
3120 | let bytes = first.as_bytes(); |
3121 | if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A' ) { |
3122 | return Err(()); |
3123 | } |
3124 | |
3125 | first[0..1].to_owned() + ":" |
3126 | } |
3127 | |
3128 | _ => return Err(()), |
3129 | } |
3130 | }; |
3131 | |
3132 | for segment in segments { |
3133 | string.push(' \\' ); |
3134 | |
3135 | // Currently non-unicode windows paths cannot be represented |
3136 | match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { |
3137 | Ok(s) => string.push_str(&s), |
3138 | Err(..) => return Err(()), |
3139 | } |
3140 | } |
3141 | let path = PathBuf::from(string); |
3142 | debug_assert!( |
3143 | path.is_absolute(), |
3144 | "to_file_path() failed to produce an absolute Path" |
3145 | ); |
3146 | Ok(path) |
3147 | } |
3148 | |
3149 | /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. |
3150 | #[derive (Debug)] |
3151 | pub struct UrlQuery<'a> { |
3152 | url: Option<&'a mut Url>, |
3153 | fragment: Option<String>, |
3154 | } |
3155 | |
3156 | // `as_mut_string` string here exposes the internal serialization of an `Url`, |
3157 | // which should not be exposed to users. |
3158 | // We achieve that by not giving users direct access to `UrlQuery`: |
3159 | // * Its fields are private |
3160 | // (and so can not be constructed with struct literal syntax outside of this crate), |
3161 | // * It has no constructor |
3162 | // * It is only visible (on the type level) to users in the return type of |
3163 | // `Url::query_pairs_mut` which is `Serializer<UrlQuery>` |
3164 | // * `Serializer` keeps its target in a private field |
3165 | // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`. |
3166 | impl<'a> form_urlencoded::Target for UrlQuery<'a> { |
3167 | fn as_mut_string(&mut self) -> &mut String { |
3168 | &mut self.url.as_mut().unwrap().serialization |
3169 | } |
3170 | |
3171 | fn finish(mut self) -> &'a mut Url { |
3172 | let url: &'a mut Url = self.url.take().unwrap(); |
3173 | url.restore_already_parsed_fragment(self.fragment.take()); |
3174 | url |
3175 | } |
3176 | |
3177 | type Finished = &'a mut Url; |
3178 | } |
3179 | |
3180 | impl<'a> Drop for UrlQuery<'a> { |
3181 | fn drop(&mut self) { |
3182 | if let Some(url: &'a mut Url) = self.url.take() { |
3183 | url.restore_already_parsed_fragment(self.fragment.take()) |
3184 | } |
3185 | } |
3186 | } |
3187 | |