1 | // Copyright 2013-2015 The rust-url developers. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | /*! |
10 | |
11 | rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/) |
12 | for the [Rust](http://rust-lang.org/) programming language. |
13 | |
14 | |
15 | # URL parsing and data structures |
16 | |
17 | First, URL parsing may fail for various reasons and therefore returns a `Result`. |
18 | |
19 | ``` |
20 | use url::{Url, ParseError}; |
21 | |
22 | assert!(Url::parse("http://[:::1]" ) == Err(ParseError::InvalidIpv6Address)) |
23 | ``` |
24 | |
25 | Let’s parse a valid URL and look at its components. |
26 | |
27 | ``` |
28 | use url::{Url, Host, Position}; |
29 | # use url::ParseError; |
30 | # fn run() -> Result<(), ParseError> { |
31 | let issue_list_url = Url::parse( |
32 | "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" |
33 | )?; |
34 | |
35 | |
36 | assert!(issue_list_url.scheme() == "https" ); |
37 | assert!(issue_list_url.username() == "" ); |
38 | assert!(issue_list_url.password() == None); |
39 | assert!(issue_list_url.host_str() == Some("github.com" )); |
40 | assert!(issue_list_url.host() == Some(Host::Domain("github.com" ))); |
41 | assert!(issue_list_url.port() == None); |
42 | assert!(issue_list_url.path() == "/rust-lang/rust/issues" ); |
43 | assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) == |
44 | Some(vec!["rust-lang" , "rust" , "issues" ])); |
45 | assert!(issue_list_url.query() == Some("labels=E-easy&state=open" )); |
46 | assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open" ); |
47 | assert!(issue_list_url.fragment() == None); |
48 | assert!(!issue_list_url.cannot_be_a_base()); |
49 | # Ok(()) |
50 | # } |
51 | # run().unwrap(); |
52 | ``` |
53 | |
54 | Some URLs are said to be *cannot-be-a-base*: |
55 | they don’t have a username, password, host, or port, |
56 | and their "path" is an arbitrary string rather than slash-separated segments: |
57 | |
58 | ``` |
59 | use url::Url; |
60 | # use url::ParseError; |
61 | |
62 | # fn run() -> Result<(), ParseError> { |
63 | let data_url = Url::parse("data:text/plain,Hello?World#" )?; |
64 | |
65 | assert!(data_url.cannot_be_a_base()); |
66 | assert!(data_url.scheme() == "data" ); |
67 | assert!(data_url.path() == "text/plain,Hello" ); |
68 | assert!(data_url.path_segments().is_none()); |
69 | assert!(data_url.query() == Some("World" )); |
70 | assert!(data_url.fragment() == Some("" )); |
71 | # Ok(()) |
72 | # } |
73 | # run().unwrap(); |
74 | ``` |
75 | |
76 | ## Serde |
77 | |
78 | Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`. |
79 | |
80 | # Base URL |
81 | |
82 | Many contexts allow URL *references* that can be relative to a *base URL*: |
83 | |
84 | ```html |
85 | <link rel="stylesheet" href="../main.css"> |
86 | ``` |
87 | |
88 | Since parsed URLs are absolute, giving a base is required for parsing relative URLs: |
89 | |
90 | ``` |
91 | use url::{Url, ParseError}; |
92 | |
93 | assert!(Url::parse("../main.css" ) == Err(ParseError::RelativeUrlWithoutBase)) |
94 | ``` |
95 | |
96 | Use the `join` method on an `Url` to use it as a base URL: |
97 | |
98 | ``` |
99 | use url::Url; |
100 | # use url::ParseError; |
101 | |
102 | # fn run() -> Result<(), ParseError> { |
103 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html" )?; |
104 | let css_url = this_document.join("../main.css" )?; |
105 | assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css" ); |
106 | # Ok(()) |
107 | # } |
108 | # run().unwrap(); |
109 | ``` |
110 | |
111 | # Feature: `serde` |
112 | |
113 | If you enable the `serde` feature, [`Url`](struct.Url.html) will implement |
114 | [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and |
115 | [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html). |
116 | See [serde documentation](https://serde.rs) for more information. |
117 | |
118 | ```toml |
119 | url = { version = "2", features = ["serde"] } |
120 | ``` |
121 | |
122 | # Feature: `debugger_visualizer` |
123 | |
124 | If you enable the `debugger_visualizer` feature, the `url` crate will include |
125 | a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects) |
126 | for [Visual Studio](https://www.visualstudio.com/) that allows you to view |
127 | [`Url`](struct.Url.html) objects in the debugger. |
128 | |
129 | This feature requires Rust 1.71 or later. |
130 | |
131 | ```toml |
132 | url = { version = "2", features = ["debugger_visualizer"] } |
133 | ``` |
134 | |
135 | */ |
136 | |
137 | #![doc (html_root_url = "https://docs.rs/url/2.5.0" )] |
138 | #![cfg_attr ( |
139 | feature = "debugger_visualizer" , |
140 | debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis" ) |
141 | )] |
142 | |
143 | pub use form_urlencoded; |
144 | |
145 | #[cfg (feature = "serde" )] |
146 | extern crate serde; |
147 | |
148 | use crate::host::HostInternal; |
149 | use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO}; |
150 | use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode}; |
151 | use std::borrow::Borrow; |
152 | use std::cmp; |
153 | use std::fmt::{self, Write}; |
154 | use std::hash; |
155 | use std::io; |
156 | use std::mem; |
157 | use std::net::{IpAddr, SocketAddr, ToSocketAddrs}; |
158 | use std::ops::{Range, RangeFrom, RangeTo}; |
159 | use std::path::{Path, PathBuf}; |
160 | use std::str; |
161 | |
162 | use std::convert::TryFrom; |
163 | |
164 | pub use crate::host::Host; |
165 | pub use crate::origin::{OpaqueOrigin, Origin}; |
166 | pub use crate::parser::{ParseError, SyntaxViolation}; |
167 | pub use crate::path_segments::PathSegmentsMut; |
168 | pub use crate::slicing::Position; |
169 | pub use form_urlencoded::EncodingOverride; |
170 | |
171 | mod host; |
172 | mod origin; |
173 | mod parser; |
174 | mod path_segments; |
175 | mod slicing; |
176 | |
177 | #[doc (hidden)] |
178 | pub mod quirks; |
179 | |
180 | /// A parsed URL record. |
181 | #[derive (Clone)] |
182 | pub struct Url { |
183 | /// Syntax in pseudo-BNF: |
184 | /// |
185 | /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]? |
186 | /// non-hierarchical = non-hierarchical-path |
187 | /// non-hierarchical-path = /* Does not start with "/" */ |
188 | /// hierarchical = authority? hierarchical-path |
189 | /// authority = "//" userinfo? host [ ":" port ]? |
190 | /// userinfo = username [ ":" password ]? "@" |
191 | /// hierarchical-path = [ "/" path-segment ]+ |
192 | serialization: String, |
193 | |
194 | // Components |
195 | scheme_end: u32, // Before ':' |
196 | username_end: u32, // Before ':' (if a password is given) or '@' (if not) |
197 | host_start: u32, |
198 | host_end: u32, |
199 | host: HostInternal, |
200 | port: Option<u16>, |
201 | path_start: u32, // Before initial '/', if any |
202 | query_start: Option<u32>, // Before '?', unlike Position::QueryStart |
203 | fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart |
204 | } |
205 | |
206 | /// Full configuration for the URL parser. |
207 | #[derive (Copy, Clone)] |
208 | #[must_use ] |
209 | pub struct ParseOptions<'a> { |
210 | base_url: Option<&'a Url>, |
211 | encoding_override: EncodingOverride<'a>, |
212 | violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, |
213 | } |
214 | |
215 | impl<'a> ParseOptions<'a> { |
216 | /// Change the base URL |
217 | pub fn base_url(mut self, new: Option<&'a Url>) -> Self { |
218 | self.base_url = new; |
219 | self |
220 | } |
221 | |
222 | /// Override the character encoding of query strings. |
223 | /// This is a legacy concept only relevant for HTML. |
224 | pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self { |
225 | self.encoding_override = new; |
226 | self |
227 | } |
228 | |
229 | /// Call the provided function or closure for a non-fatal `SyntaxViolation` |
230 | /// when it occurs during parsing. Note that since the provided function is |
231 | /// `Fn`, the caller might need to utilize _interior mutability_, such as with |
232 | /// a `RefCell`, to collect the violations. |
233 | /// |
234 | /// ## Example |
235 | /// ``` |
236 | /// use std::cell::RefCell; |
237 | /// use url::{Url, SyntaxViolation}; |
238 | /// # use url::ParseError; |
239 | /// # fn run() -> Result<(), url::ParseError> { |
240 | /// let violations = RefCell::new(Vec::new()); |
241 | /// let url = Url::options() |
242 | /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v))) |
243 | /// .parse("https:////example.com" )?; |
244 | /// assert_eq!(url.as_str(), "https://example.com/" ); |
245 | /// assert_eq!(violations.into_inner(), |
246 | /// vec!(SyntaxViolation::ExpectedDoubleSlash)); |
247 | /// # Ok(()) |
248 | /// # } |
249 | /// # run().unwrap(); |
250 | /// ``` |
251 | pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { |
252 | self.violation_fn = new; |
253 | self |
254 | } |
255 | |
256 | /// Parse an URL string with the configuration so far. |
257 | pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> { |
258 | Parser { |
259 | serialization: String::with_capacity(input.len()), |
260 | base_url: self.base_url, |
261 | query_encoding_override: self.encoding_override, |
262 | violation_fn: self.violation_fn, |
263 | context: Context::UrlParser, |
264 | } |
265 | .parse_url(input) |
266 | } |
267 | } |
268 | |
269 | impl Url { |
270 | /// Parse an absolute URL from a string. |
271 | /// |
272 | /// # Examples |
273 | /// |
274 | /// ```rust |
275 | /// use url::Url; |
276 | /// # use url::ParseError; |
277 | /// |
278 | /// # fn run() -> Result<(), ParseError> { |
279 | /// let url = Url::parse("https://example.net" )?; |
280 | /// # Ok(()) |
281 | /// # } |
282 | /// # run().unwrap(); |
283 | /// ``` |
284 | /// |
285 | /// # Errors |
286 | /// |
287 | /// If the function can not parse an absolute URL from the given string, |
288 | /// a [`ParseError`] variant will be returned. |
289 | /// |
290 | /// [`ParseError`]: enum.ParseError.html |
291 | #[inline ] |
292 | pub fn parse(input: &str) -> Result<Url, crate::ParseError> { |
293 | Url::options().parse(input) |
294 | } |
295 | |
296 | /// Parse an absolute URL from a string and add params to its query string. |
297 | /// |
298 | /// Existing params are not removed. |
299 | /// |
300 | /// # Examples |
301 | /// |
302 | /// ```rust |
303 | /// use url::Url; |
304 | /// # use url::ParseError; |
305 | /// |
306 | /// # fn run() -> Result<(), ParseError> { |
307 | /// let url = Url::parse_with_params("https://example.net?dont=clobberme" , |
308 | /// &[("lang" , "rust" ), ("browser" , "servo" )])?; |
309 | /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo" , url.as_str()); |
310 | /// # Ok(()) |
311 | /// # } |
312 | /// # run().unwrap(); |
313 | /// ``` |
314 | /// |
315 | /// # Errors |
316 | /// |
317 | /// If the function can not parse an absolute URL from the given string, |
318 | /// a [`ParseError`] variant will be returned. |
319 | /// |
320 | /// [`ParseError`]: enum.ParseError.html |
321 | #[inline ] |
322 | pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> |
323 | where |
324 | I: IntoIterator, |
325 | I::Item: Borrow<(K, V)>, |
326 | K: AsRef<str>, |
327 | V: AsRef<str>, |
328 | { |
329 | let mut url = Url::options().parse(input); |
330 | |
331 | if let Ok(ref mut url) = url { |
332 | url.query_pairs_mut().extend_pairs(iter); |
333 | } |
334 | |
335 | url |
336 | } |
337 | |
338 | /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path |
339 | fn strip_trailing_spaces_from_opaque_path(&mut self) { |
340 | if !self.cannot_be_a_base() { |
341 | return; |
342 | } |
343 | |
344 | if self.fragment_start.is_some() { |
345 | return; |
346 | } |
347 | |
348 | if self.query_start.is_some() { |
349 | return; |
350 | } |
351 | |
352 | let trailing_space_count = self |
353 | .serialization |
354 | .chars() |
355 | .rev() |
356 | .take_while(|c| *c == ' ' ) |
357 | .count(); |
358 | |
359 | let start = self.serialization.len() - trailing_space_count; |
360 | |
361 | self.serialization.truncate(start); |
362 | } |
363 | |
364 | /// Parse a string as an URL, with this URL as the base URL. |
365 | /// |
366 | /// The inverse of this is [`make_relative`]. |
367 | /// |
368 | /// Note: a trailing slash is significant. |
369 | /// Without it, the last path component is considered to be a “file” name |
370 | /// to be removed to get at the “directory” that is used as the base: |
371 | /// |
372 | /// # Examples |
373 | /// |
374 | /// ```rust |
375 | /// use url::Url; |
376 | /// # use url::ParseError; |
377 | /// |
378 | /// # fn run() -> Result<(), ParseError> { |
379 | /// let base = Url::parse("https://example.net/a/b.html" )?; |
380 | /// let url = base.join("c.png" )?; |
381 | /// assert_eq!(url.as_str(), "https://example.net/a/c.png" ); // Not /a/b.html/c.png |
382 | /// |
383 | /// let base = Url::parse("https://example.net/a/b/" )?; |
384 | /// let url = base.join("c.png" )?; |
385 | /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png" ); |
386 | /// # Ok(()) |
387 | /// # } |
388 | /// # run().unwrap(); |
389 | /// ``` |
390 | /// |
391 | /// # Errors |
392 | /// |
393 | /// If the function can not parse an URL from the given string |
394 | /// with this URL as the base URL, a [`ParseError`] variant will be returned. |
395 | /// |
396 | /// [`ParseError`]: enum.ParseError.html |
397 | /// [`make_relative`]: #method.make_relative |
398 | #[inline ] |
399 | pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> { |
400 | Url::options().base_url(Some(self)).parse(input) |
401 | } |
402 | |
403 | /// Creates a relative URL if possible, with this URL as the base URL. |
404 | /// |
405 | /// This is the inverse of [`join`]. |
406 | /// |
407 | /// # Examples |
408 | /// |
409 | /// ```rust |
410 | /// use url::Url; |
411 | /// # use url::ParseError; |
412 | /// |
413 | /// # fn run() -> Result<(), ParseError> { |
414 | /// let base = Url::parse("https://example.net/a/b.html" )?; |
415 | /// let url = Url::parse("https://example.net/a/c.png" )?; |
416 | /// let relative = base.make_relative(&url); |
417 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png" )); |
418 | /// |
419 | /// let base = Url::parse("https://example.net/a/b/" )?; |
420 | /// let url = Url::parse("https://example.net/a/b/c.png" )?; |
421 | /// let relative = base.make_relative(&url); |
422 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png" )); |
423 | /// |
424 | /// let base = Url::parse("https://example.net/a/b/" )?; |
425 | /// let url = Url::parse("https://example.net/a/d/c.png" )?; |
426 | /// let relative = base.make_relative(&url); |
427 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png" )); |
428 | /// |
429 | /// let base = Url::parse("https://example.net/a/b.html?c=d" )?; |
430 | /// let url = Url::parse("https://example.net/a/b.html?e=f" )?; |
431 | /// let relative = base.make_relative(&url); |
432 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f" )); |
433 | /// # Ok(()) |
434 | /// # } |
435 | /// # run().unwrap(); |
436 | /// ``` |
437 | /// |
438 | /// # Errors |
439 | /// |
440 | /// If this URL can't be a base for the given URL, `None` is returned. |
441 | /// This is for example the case if the scheme, host or port are not the same. |
442 | /// |
443 | /// [`join`]: #method.join |
444 | pub fn make_relative(&self, url: &Url) -> Option<String> { |
445 | if self.cannot_be_a_base() { |
446 | return None; |
447 | } |
448 | |
449 | // Scheme, host and port need to be the same |
450 | if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() { |
451 | return None; |
452 | } |
453 | |
454 | // We ignore username/password at this point |
455 | |
456 | // The path has to be transformed |
457 | let mut relative = String::new(); |
458 | |
459 | // Extract the filename of both URIs, these need to be handled separately |
460 | fn extract_path_filename(s: &str) -> (&str, &str) { |
461 | let last_slash_idx = s.rfind('/' ).unwrap_or(0); |
462 | let (path, filename) = s.split_at(last_slash_idx); |
463 | if filename.is_empty() { |
464 | (path, "" ) |
465 | } else { |
466 | (path, &filename[1..]) |
467 | } |
468 | } |
469 | |
470 | let (base_path, base_filename) = extract_path_filename(self.path()); |
471 | let (url_path, url_filename) = extract_path_filename(url.path()); |
472 | |
473 | let mut base_path = base_path.split('/' ).peekable(); |
474 | let mut url_path = url_path.split('/' ).peekable(); |
475 | |
476 | // Skip over the common prefix |
477 | while base_path.peek().is_some() && base_path.peek() == url_path.peek() { |
478 | base_path.next(); |
479 | url_path.next(); |
480 | } |
481 | |
482 | // Add `..` segments for the remainder of the base path |
483 | for base_path_segment in base_path { |
484 | // Skip empty last segments |
485 | if base_path_segment.is_empty() { |
486 | break; |
487 | } |
488 | |
489 | if !relative.is_empty() { |
490 | relative.push('/' ); |
491 | } |
492 | |
493 | relative.push_str(".." ); |
494 | } |
495 | |
496 | // Append the remainder of the other URI |
497 | for url_path_segment in url_path { |
498 | if !relative.is_empty() { |
499 | relative.push('/' ); |
500 | } |
501 | |
502 | relative.push_str(url_path_segment); |
503 | } |
504 | |
505 | // Add the filename if they are not the same |
506 | if !relative.is_empty() || base_filename != url_filename { |
507 | // If the URIs filename is empty this means that it was a directory |
508 | // so we'll have to append a '/'. |
509 | // |
510 | // Otherwise append it directly as the new filename. |
511 | if url_filename.is_empty() { |
512 | relative.push('/' ); |
513 | } else { |
514 | if !relative.is_empty() { |
515 | relative.push('/' ); |
516 | } |
517 | relative.push_str(url_filename); |
518 | } |
519 | } |
520 | |
521 | // Query and fragment are only taken from the other URI |
522 | if let Some(query) = url.query() { |
523 | relative.push('?' ); |
524 | relative.push_str(query); |
525 | } |
526 | |
527 | if let Some(fragment) = url.fragment() { |
528 | relative.push('#' ); |
529 | relative.push_str(fragment); |
530 | } |
531 | |
532 | Some(relative) |
533 | } |
534 | |
535 | /// Return a default `ParseOptions` that can fully configure the URL parser. |
536 | /// |
537 | /// # Examples |
538 | /// |
539 | /// Get default `ParseOptions`, then change base url |
540 | /// |
541 | /// ```rust |
542 | /// use url::Url; |
543 | /// # use url::ParseError; |
544 | /// # fn run() -> Result<(), ParseError> { |
545 | /// let options = Url::options(); |
546 | /// let api = Url::parse("https://api.example.com" )?; |
547 | /// let base_url = options.base_url(Some(&api)); |
548 | /// let version_url = base_url.parse("version.json" )?; |
549 | /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json" ); |
550 | /// # Ok(()) |
551 | /// # } |
552 | /// # run().unwrap(); |
553 | /// ``` |
554 | pub fn options<'a>() -> ParseOptions<'a> { |
555 | ParseOptions { |
556 | base_url: None, |
557 | encoding_override: None, |
558 | violation_fn: None, |
559 | } |
560 | } |
561 | |
562 | /// Return the serialization of this URL. |
563 | /// |
564 | /// This is fast since that serialization is already stored in the `Url` struct. |
565 | /// |
566 | /// # Examples |
567 | /// |
568 | /// ```rust |
569 | /// use url::Url; |
570 | /// # use url::ParseError; |
571 | /// |
572 | /// # fn run() -> Result<(), ParseError> { |
573 | /// let url_str = "https://example.net/" ; |
574 | /// let url = Url::parse(url_str)?; |
575 | /// assert_eq!(url.as_str(), url_str); |
576 | /// # Ok(()) |
577 | /// # } |
578 | /// # run().unwrap(); |
579 | /// ``` |
580 | #[inline ] |
581 | pub fn as_str(&self) -> &str { |
582 | &self.serialization |
583 | } |
584 | |
585 | /// Return the serialization of this URL. |
586 | /// |
587 | /// This consumes the `Url` and takes ownership of the `String` stored in it. |
588 | /// |
589 | /// # Examples |
590 | /// |
591 | /// ```rust |
592 | /// use url::Url; |
593 | /// # use url::ParseError; |
594 | /// |
595 | /// # fn run() -> Result<(), ParseError> { |
596 | /// let url_str = "https://example.net/" ; |
597 | /// let url = Url::parse(url_str)?; |
598 | /// assert_eq!(String::from(url), url_str); |
599 | /// # Ok(()) |
600 | /// # } |
601 | /// # run().unwrap(); |
602 | /// ``` |
603 | #[inline ] |
604 | #[deprecated (since = "2.3.0" , note = "use Into<String>" )] |
605 | pub fn into_string(self) -> String { |
606 | self.into() |
607 | } |
608 | |
609 | /// For internal testing, not part of the public API. |
610 | /// |
611 | /// Methods of the `Url` struct assume a number of invariants. |
612 | /// This checks each of these invariants and panic if one is not met. |
613 | /// This is for testing rust-url itself. |
614 | #[doc (hidden)] |
615 | pub fn check_invariants(&self) -> Result<(), String> { |
616 | macro_rules! assert { |
617 | ($x: expr) => { |
618 | if !$x { |
619 | return Err(format!( |
620 | "!( {} ) for URL {:?}" , |
621 | stringify!($x), |
622 | self.serialization |
623 | )); |
624 | } |
625 | }; |
626 | } |
627 | |
628 | macro_rules! assert_eq { |
629 | ($a: expr, $b: expr) => { |
630 | { |
631 | let a = $a; |
632 | let b = $b; |
633 | if a != b { |
634 | return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}" , |
635 | a, b, stringify!($a), stringify!($b), |
636 | self.serialization)) |
637 | } |
638 | } |
639 | } |
640 | } |
641 | |
642 | assert!(self.scheme_end >= 1); |
643 | assert!(self.byte_at(0).is_ascii_alphabetic()); |
644 | assert!(self |
645 | .slice(1..self.scheme_end) |
646 | .chars() |
647 | .all(|c| matches!(c, 'a' ..='z' | 'A' ..='Z' | '0' ..='9' | '+' | '-' | '.' ))); |
648 | assert_eq!(self.byte_at(self.scheme_end), b':' ); |
649 | |
650 | if self.slice(self.scheme_end + 1..).starts_with("//" ) { |
651 | // URL with authority |
652 | if self.username_end != self.serialization.len() as u32 { |
653 | match self.byte_at(self.username_end) { |
654 | b':' => { |
655 | assert!(self.host_start >= self.username_end + 2); |
656 | assert_eq!(self.byte_at(self.host_start - 1), b'@' ); |
657 | } |
658 | b'@' => assert!(self.host_start == self.username_end + 1), |
659 | _ => assert_eq!(self.username_end, self.scheme_end + 3), |
660 | } |
661 | } |
662 | assert!(self.host_start >= self.username_end); |
663 | assert!(self.host_end >= self.host_start); |
664 | let host_str = self.slice(self.host_start..self.host_end); |
665 | match self.host { |
666 | HostInternal::None => assert_eq!(host_str, "" ), |
667 | HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()), |
668 | HostInternal::Ipv6(address) => { |
669 | let h: Host<String> = Host::Ipv6(address); |
670 | assert_eq!(host_str, h.to_string()) |
671 | } |
672 | HostInternal::Domain => { |
673 | if SchemeType::from(self.scheme()).is_special() { |
674 | assert!(!host_str.is_empty()) |
675 | } |
676 | } |
677 | } |
678 | if self.path_start == self.host_end { |
679 | assert_eq!(self.port, None); |
680 | } else { |
681 | assert_eq!(self.byte_at(self.host_end), b':' ); |
682 | let port_str = self.slice(self.host_end + 1..self.path_start); |
683 | assert_eq!( |
684 | self.port, |
685 | Some(port_str.parse::<u16>().expect("Couldn't parse port?" )) |
686 | ); |
687 | } |
688 | assert!( |
689 | self.path_start as usize == self.serialization.len() |
690 | || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?' ) |
691 | ); |
692 | } else { |
693 | // Anarchist URL (no authority) |
694 | assert_eq!(self.username_end, self.scheme_end + 1); |
695 | assert_eq!(self.host_start, self.scheme_end + 1); |
696 | assert_eq!(self.host_end, self.scheme_end + 1); |
697 | assert_eq!(self.host, HostInternal::None); |
698 | assert_eq!(self.port, None); |
699 | if self.path().starts_with("//" ) { |
700 | // special case when first path segment is empty |
701 | assert_eq!(self.byte_at(self.scheme_end + 1), b'/' ); |
702 | assert_eq!(self.byte_at(self.scheme_end + 2), b'.' ); |
703 | assert_eq!(self.path_start, self.scheme_end + 3); |
704 | } else { |
705 | assert_eq!(self.path_start, self.scheme_end + 1); |
706 | } |
707 | } |
708 | if let Some(start) = self.query_start { |
709 | assert!(start >= self.path_start); |
710 | assert_eq!(self.byte_at(start), b'?' ); |
711 | } |
712 | if let Some(start) = self.fragment_start { |
713 | assert!(start >= self.path_start); |
714 | assert_eq!(self.byte_at(start), b'#' ); |
715 | } |
716 | if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { |
717 | assert!(fragment_start > query_start); |
718 | } |
719 | |
720 | let other = Url::parse(self.as_str()).expect("Failed to parse myself?" ); |
721 | assert_eq!(&self.serialization, &other.serialization); |
722 | assert_eq!(self.scheme_end, other.scheme_end); |
723 | assert_eq!(self.username_end, other.username_end); |
724 | assert_eq!(self.host_start, other.host_start); |
725 | assert_eq!(self.host_end, other.host_end); |
726 | assert!( |
727 | self.host == other.host || |
728 | // XXX No host round-trips to empty host. |
729 | // See https://github.com/whatwg/url/issues/79 |
730 | (self.host_str(), other.host_str()) == (None, Some("" )) |
731 | ); |
732 | assert_eq!(self.port, other.port); |
733 | assert_eq!(self.path_start, other.path_start); |
734 | assert_eq!(self.query_start, other.query_start); |
735 | assert_eq!(self.fragment_start, other.fragment_start); |
736 | Ok(()) |
737 | } |
738 | |
739 | /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>) |
740 | /// |
741 | /// Note: this returns an opaque origin for `file:` URLs, which causes |
742 | /// `url.origin() != url.origin()`. |
743 | /// |
744 | /// # Examples |
745 | /// |
746 | /// URL with `ftp` scheme: |
747 | /// |
748 | /// ```rust |
749 | /// use url::{Host, Origin, Url}; |
750 | /// # use url::ParseError; |
751 | /// |
752 | /// # fn run() -> Result<(), ParseError> { |
753 | /// let url = Url::parse("ftp://example.com/foo" )?; |
754 | /// assert_eq!(url.origin(), |
755 | /// Origin::Tuple("ftp" .into(), |
756 | /// Host::Domain("example.com" .into()), |
757 | /// 21)); |
758 | /// # Ok(()) |
759 | /// # } |
760 | /// # run().unwrap(); |
761 | /// ``` |
762 | /// |
763 | /// URL with `blob` scheme: |
764 | /// |
765 | /// ```rust |
766 | /// use url::{Host, Origin, Url}; |
767 | /// # use url::ParseError; |
768 | /// |
769 | /// # fn run() -> Result<(), ParseError> { |
770 | /// let url = Url::parse("blob:https://example.com/foo" )?; |
771 | /// assert_eq!(url.origin(), |
772 | /// Origin::Tuple("https" .into(), |
773 | /// Host::Domain("example.com" .into()), |
774 | /// 443)); |
775 | /// # Ok(()) |
776 | /// # } |
777 | /// # run().unwrap(); |
778 | /// ``` |
779 | /// |
780 | /// URL with `file` scheme: |
781 | /// |
782 | /// ```rust |
783 | /// use url::{Host, Origin, Url}; |
784 | /// # use url::ParseError; |
785 | /// |
786 | /// # fn run() -> Result<(), ParseError> { |
787 | /// let url = Url::parse("file:///tmp/foo" )?; |
788 | /// assert!(!url.origin().is_tuple()); |
789 | /// |
790 | /// let other_url = Url::parse("file:///tmp/foo" )?; |
791 | /// assert!(url.origin() != other_url.origin()); |
792 | /// # Ok(()) |
793 | /// # } |
794 | /// # run().unwrap(); |
795 | /// ``` |
796 | /// |
797 | /// URL with other scheme: |
798 | /// |
799 | /// ```rust |
800 | /// use url::{Host, Origin, Url}; |
801 | /// # use url::ParseError; |
802 | /// |
803 | /// # fn run() -> Result<(), ParseError> { |
804 | /// let url = Url::parse("foo:bar" )?; |
805 | /// assert!(!url.origin().is_tuple()); |
806 | /// # Ok(()) |
807 | /// # } |
808 | /// # run().unwrap(); |
809 | /// ``` |
810 | #[inline ] |
811 | pub fn origin(&self) -> Origin { |
812 | origin::url_origin(self) |
813 | } |
814 | |
815 | /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter. |
816 | /// |
817 | /// # Examples |
818 | /// |
819 | /// ``` |
820 | /// use url::Url; |
821 | /// # use url::ParseError; |
822 | /// |
823 | /// # fn run() -> Result<(), ParseError> { |
824 | /// let url = Url::parse("file:///tmp/foo" )?; |
825 | /// assert_eq!(url.scheme(), "file" ); |
826 | /// # Ok(()) |
827 | /// # } |
828 | /// # run().unwrap(); |
829 | /// ``` |
830 | #[inline ] |
831 | pub fn scheme(&self) -> &str { |
832 | self.slice(..self.scheme_end) |
833 | } |
834 | |
835 | /// Return whether the URL is special (has a special scheme) |
836 | /// |
837 | /// # Examples |
838 | /// |
839 | /// ``` |
840 | /// use url::Url; |
841 | /// # use url::ParseError; |
842 | /// |
843 | /// # fn run() -> Result<(), ParseError> { |
844 | /// assert!(Url::parse("http:///tmp/foo" )?.is_special()); |
845 | /// assert!(Url::parse("file:///tmp/foo" )?.is_special()); |
846 | /// assert!(!Url::parse("moz:///tmp/foo" )?.is_special()); |
847 | /// # Ok(()) |
848 | /// # } |
849 | /// # run().unwrap(); |
850 | /// ``` |
851 | pub fn is_special(&self) -> bool { |
852 | let scheme_type = SchemeType::from(self.scheme()); |
853 | scheme_type.is_special() |
854 | } |
855 | |
856 | /// Return whether the URL has an 'authority', |
857 | /// which can contain a username, password, host, and port number. |
858 | /// |
859 | /// URLs that do *not* are either path-only like `unix:/run/foo.socket` |
860 | /// or cannot-be-a-base like `data:text/plain,Stuff`. |
861 | /// |
862 | /// See also the `authority` method. |
863 | /// |
864 | /// # Examples |
865 | /// |
866 | /// ``` |
867 | /// use url::Url; |
868 | /// # use url::ParseError; |
869 | /// |
870 | /// # fn run() -> Result<(), ParseError> { |
871 | /// let url = Url::parse("ftp://rms@example.com" )?; |
872 | /// assert!(url.has_authority()); |
873 | /// |
874 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
875 | /// assert!(!url.has_authority()); |
876 | /// |
877 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
878 | /// assert!(!url.has_authority()); |
879 | /// # Ok(()) |
880 | /// # } |
881 | /// # run().unwrap(); |
882 | /// ``` |
883 | #[inline ] |
884 | pub fn has_authority(&self) -> bool { |
885 | debug_assert!(self.byte_at(self.scheme_end) == b':' ); |
886 | self.slice(self.scheme_end..).starts_with("://" ) |
887 | } |
888 | |
889 | /// Return the authority of this URL as an ASCII string. |
890 | /// |
891 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
892 | /// of a special URL, or percent encoded for non-special URLs. |
893 | /// IPv6 addresses are given between `[` and `]` brackets. |
894 | /// Ports are omitted if they match the well known port of a special URL. |
895 | /// |
896 | /// Username and password are percent-encoded. |
897 | /// |
898 | /// See also the `has_authority` method. |
899 | /// |
900 | /// # Examples |
901 | /// |
902 | /// ``` |
903 | /// use url::Url; |
904 | /// # use url::ParseError; |
905 | /// |
906 | /// # fn run() -> Result<(), ParseError> { |
907 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
908 | /// assert_eq!(url.authority(), "" ); |
909 | /// let url = Url::parse("file:///tmp/foo" )?; |
910 | /// assert_eq!(url.authority(), "" ); |
911 | /// let url = Url::parse("https://user:password@example.com/tmp/foo" )?; |
912 | /// assert_eq!(url.authority(), "user:password@example.com" ); |
913 | /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo" )?; |
914 | /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667" ); |
915 | /// let url = Url::parse("http://àlex.рф.example.com:80/foo" )?; |
916 | /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com" ); |
917 | /// # Ok(()) |
918 | /// # } |
919 | /// # run().unwrap(); |
920 | /// ``` |
921 | pub fn authority(&self) -> &str { |
922 | let scheme_separator_len = "://" .len() as u32; |
923 | if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len { |
924 | self.slice(self.scheme_end + scheme_separator_len..self.path_start) |
925 | } else { |
926 | "" |
927 | } |
928 | } |
929 | |
930 | /// Return whether this URL is a cannot-be-a-base URL, |
931 | /// meaning that parsing a relative URL string with this URL as the base will return an error. |
932 | /// |
933 | /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash, |
934 | /// as is typically the case of `data:` and `mailto:` URLs. |
935 | /// |
936 | /// # Examples |
937 | /// |
938 | /// ``` |
939 | /// use url::Url; |
940 | /// # use url::ParseError; |
941 | /// |
942 | /// # fn run() -> Result<(), ParseError> { |
943 | /// let url = Url::parse("ftp://rms@example.com" )?; |
944 | /// assert!(!url.cannot_be_a_base()); |
945 | /// |
946 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
947 | /// assert!(!url.cannot_be_a_base()); |
948 | /// |
949 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
950 | /// assert!(url.cannot_be_a_base()); |
951 | /// # Ok(()) |
952 | /// # } |
953 | /// # run().unwrap(); |
954 | /// ``` |
955 | #[inline ] |
956 | pub fn cannot_be_a_base(&self) -> bool { |
957 | !self.slice(self.scheme_end + 1..).starts_with('/' ) |
958 | } |
959 | |
960 | /// Return the username for this URL (typically the empty string) |
961 | /// as a percent-encoded ASCII string. |
962 | /// |
963 | /// # Examples |
964 | /// |
965 | /// ``` |
966 | /// use url::Url; |
967 | /// # use url::ParseError; |
968 | /// |
969 | /// # fn run() -> Result<(), ParseError> { |
970 | /// let url = Url::parse("ftp://rms@example.com" )?; |
971 | /// assert_eq!(url.username(), "rms" ); |
972 | /// |
973 | /// let url = Url::parse("ftp://:secret123@example.com" )?; |
974 | /// assert_eq!(url.username(), "" ); |
975 | /// |
976 | /// let url = Url::parse("https://example.com" )?; |
977 | /// assert_eq!(url.username(), "" ); |
978 | /// # Ok(()) |
979 | /// # } |
980 | /// # run().unwrap(); |
981 | /// ``` |
982 | pub fn username(&self) -> &str { |
983 | let scheme_separator_len = "://" .len() as u32; |
984 | if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len { |
985 | self.slice(self.scheme_end + scheme_separator_len..self.username_end) |
986 | } else { |
987 | "" |
988 | } |
989 | } |
990 | |
991 | /// Return the password for this URL, if any, as a percent-encoded ASCII string. |
992 | /// |
993 | /// # Examples |
994 | /// |
995 | /// ``` |
996 | /// use url::Url; |
997 | /// # use url::ParseError; |
998 | /// |
999 | /// # fn run() -> Result<(), ParseError> { |
1000 | /// let url = Url::parse("ftp://rms:secret123@example.com" )?; |
1001 | /// assert_eq!(url.password(), Some("secret123" )); |
1002 | /// |
1003 | /// let url = Url::parse("ftp://:secret123@example.com" )?; |
1004 | /// assert_eq!(url.password(), Some("secret123" )); |
1005 | /// |
1006 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1007 | /// assert_eq!(url.password(), None); |
1008 | /// |
1009 | /// let url = Url::parse("https://example.com" )?; |
1010 | /// assert_eq!(url.password(), None); |
1011 | /// # Ok(()) |
1012 | /// # } |
1013 | /// # run().unwrap(); |
1014 | /// ``` |
1015 | pub fn password(&self) -> Option<&str> { |
1016 | // This ':' is not the one marking a port number since a host can not be empty. |
1017 | // (Except for file: URLs, which do not have port numbers.) |
1018 | if self.has_authority() |
1019 | && self.username_end != self.serialization.len() as u32 |
1020 | && self.byte_at(self.username_end) == b':' |
1021 | { |
1022 | debug_assert!(self.byte_at(self.host_start - 1) == b'@' ); |
1023 | Some(self.slice(self.username_end + 1..self.host_start - 1)) |
1024 | } else { |
1025 | None |
1026 | } |
1027 | } |
1028 | |
1029 | /// Equivalent to `url.host().is_some()`. |
1030 | /// |
1031 | /// # Examples |
1032 | /// |
1033 | /// ``` |
1034 | /// use url::Url; |
1035 | /// # use url::ParseError; |
1036 | /// |
1037 | /// # fn run() -> Result<(), ParseError> { |
1038 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1039 | /// assert!(url.has_host()); |
1040 | /// |
1041 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1042 | /// assert!(!url.has_host()); |
1043 | /// |
1044 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1045 | /// assert!(!url.has_host()); |
1046 | /// # Ok(()) |
1047 | /// # } |
1048 | /// # run().unwrap(); |
1049 | /// ``` |
1050 | pub fn has_host(&self) -> bool { |
1051 | !matches!(self.host, HostInternal::None) |
1052 | } |
1053 | |
1054 | /// Return the string representation of the host (domain or IP address) for this URL, if any. |
1055 | /// |
1056 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
1057 | /// of a special URL, or percent encoded for non-special URLs. |
1058 | /// IPv6 addresses are given between `[` and `]` brackets. |
1059 | /// |
1060 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs |
1061 | /// don’t have a host. |
1062 | /// |
1063 | /// See also the `host` method. |
1064 | /// |
1065 | /// # Examples |
1066 | /// |
1067 | /// ``` |
1068 | /// use url::Url; |
1069 | /// # use url::ParseError; |
1070 | /// |
1071 | /// # fn run() -> Result<(), ParseError> { |
1072 | /// let url = Url::parse("https://127.0.0.1/index.html" )?; |
1073 | /// assert_eq!(url.host_str(), Some("127.0.0.1" )); |
1074 | /// |
1075 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1076 | /// assert_eq!(url.host_str(), Some("example.com" )); |
1077 | /// |
1078 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1079 | /// assert_eq!(url.host_str(), None); |
1080 | /// |
1081 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1082 | /// assert_eq!(url.host_str(), None); |
1083 | /// # Ok(()) |
1084 | /// # } |
1085 | /// # run().unwrap(); |
1086 | /// ``` |
1087 | pub fn host_str(&self) -> Option<&str> { |
1088 | if self.has_host() { |
1089 | Some(self.slice(self.host_start..self.host_end)) |
1090 | } else { |
1091 | None |
1092 | } |
1093 | } |
1094 | |
1095 | /// Return the parsed representation of the host for this URL. |
1096 | /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host |
1097 | /// of a special URL, or percent encoded for non-special URLs. |
1098 | /// |
1099 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs |
1100 | /// don’t have a host. |
1101 | /// |
1102 | /// See also the `host_str` method. |
1103 | /// |
1104 | /// # Examples |
1105 | /// |
1106 | /// ``` |
1107 | /// use url::Url; |
1108 | /// # use url::ParseError; |
1109 | /// |
1110 | /// # fn run() -> Result<(), ParseError> { |
1111 | /// let url = Url::parse("https://127.0.0.1/index.html" )?; |
1112 | /// assert!(url.host().is_some()); |
1113 | /// |
1114 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1115 | /// assert!(url.host().is_some()); |
1116 | /// |
1117 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1118 | /// assert!(url.host().is_none()); |
1119 | /// |
1120 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1121 | /// assert!(url.host().is_none()); |
1122 | /// # Ok(()) |
1123 | /// # } |
1124 | /// # run().unwrap(); |
1125 | /// ``` |
1126 | pub fn host(&self) -> Option<Host<&str>> { |
1127 | match self.host { |
1128 | HostInternal::None => None, |
1129 | HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))), |
1130 | HostInternal::Ipv4(address) => Some(Host::Ipv4(address)), |
1131 | HostInternal::Ipv6(address) => Some(Host::Ipv6(address)), |
1132 | } |
1133 | } |
1134 | |
1135 | /// If this URL has a host and it is a domain name (not an IP address), return it. |
1136 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
1137 | /// of a special URL, or percent encoded for non-special URLs. |
1138 | /// |
1139 | /// # Examples |
1140 | /// |
1141 | /// ``` |
1142 | /// use url::Url; |
1143 | /// # use url::ParseError; |
1144 | /// |
1145 | /// # fn run() -> Result<(), ParseError> { |
1146 | /// let url = Url::parse("https://127.0.0.1/" )?; |
1147 | /// assert_eq!(url.domain(), None); |
1148 | /// |
1149 | /// let url = Url::parse("mailto:rms@example.net" )?; |
1150 | /// assert_eq!(url.domain(), None); |
1151 | /// |
1152 | /// let url = Url::parse("https://example.com/" )?; |
1153 | /// assert_eq!(url.domain(), Some("example.com" )); |
1154 | /// # Ok(()) |
1155 | /// # } |
1156 | /// # run().unwrap(); |
1157 | /// ``` |
1158 | pub fn domain(&self) -> Option<&str> { |
1159 | match self.host { |
1160 | HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)), |
1161 | _ => None, |
1162 | } |
1163 | } |
1164 | |
1165 | /// Return the port number for this URL, if any. |
1166 | /// |
1167 | /// Note that default port numbers are never reflected by the serialization, |
1168 | /// use the `port_or_known_default()` method if you want a default port number returned. |
1169 | /// |
1170 | /// # Examples |
1171 | /// |
1172 | /// ``` |
1173 | /// use url::Url; |
1174 | /// # use url::ParseError; |
1175 | /// |
1176 | /// # fn run() -> Result<(), ParseError> { |
1177 | /// let url = Url::parse("https://example.com" )?; |
1178 | /// assert_eq!(url.port(), None); |
1179 | /// |
1180 | /// let url = Url::parse("https://example.com:443/" )?; |
1181 | /// assert_eq!(url.port(), None); |
1182 | /// |
1183 | /// let url = Url::parse("ssh://example.com:22" )?; |
1184 | /// assert_eq!(url.port(), Some(22)); |
1185 | /// # Ok(()) |
1186 | /// # } |
1187 | /// # run().unwrap(); |
1188 | /// ``` |
1189 | #[inline ] |
1190 | pub fn port(&self) -> Option<u16> { |
1191 | self.port |
1192 | } |
1193 | |
1194 | /// Return the port number for this URL, or the default port number if it is known. |
1195 | /// |
1196 | /// This method only knows the default port number |
1197 | /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes. |
1198 | /// |
1199 | /// For URLs in these schemes, this method always returns `Some(_)`. |
1200 | /// For other schemes, it is the same as `Url::port()`. |
1201 | /// |
1202 | /// # Examples |
1203 | /// |
1204 | /// ``` |
1205 | /// use url::Url; |
1206 | /// # use url::ParseError; |
1207 | /// |
1208 | /// # fn run() -> Result<(), ParseError> { |
1209 | /// let url = Url::parse("foo://example.com" )?; |
1210 | /// assert_eq!(url.port_or_known_default(), None); |
1211 | /// |
1212 | /// let url = Url::parse("foo://example.com:1456" )?; |
1213 | /// assert_eq!(url.port_or_known_default(), Some(1456)); |
1214 | /// |
1215 | /// let url = Url::parse("https://example.com" )?; |
1216 | /// assert_eq!(url.port_or_known_default(), Some(443)); |
1217 | /// # Ok(()) |
1218 | /// # } |
1219 | /// # run().unwrap(); |
1220 | /// ``` |
1221 | #[inline ] |
1222 | pub fn port_or_known_default(&self) -> Option<u16> { |
1223 | self.port.or_else(|| parser::default_port(self.scheme())) |
1224 | } |
1225 | |
1226 | /// Resolve a URL’s host and port number to `SocketAddr`. |
1227 | /// |
1228 | /// If the URL has the default port number of a scheme that is unknown to this library, |
1229 | /// `default_port_number` provides an opportunity to provide the actual port number. |
1230 | /// In non-example code this should be implemented either simply as `|| None`, |
1231 | /// or by matching on the URL’s `.scheme()`. |
1232 | /// |
1233 | /// If the host is a domain, it is resolved using the standard library’s DNS support. |
1234 | /// |
1235 | /// # Examples |
1236 | /// |
1237 | /// ```no_run |
1238 | /// let url = url::Url::parse("https://example.net/" ).unwrap(); |
1239 | /// let addrs = url.socket_addrs(|| None).unwrap(); |
1240 | /// std::net::TcpStream::connect(&*addrs) |
1241 | /// # ; |
1242 | /// ``` |
1243 | /// |
1244 | /// ``` |
1245 | /// /// With application-specific known default port numbers |
1246 | /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> { |
1247 | /// url.socket_addrs(|| match url.scheme() { |
1248 | /// "socks5" | "socks5h" => Some(1080), |
1249 | /// _ => None, |
1250 | /// }) |
1251 | /// } |
1252 | /// ``` |
1253 | pub fn socket_addrs( |
1254 | &self, |
1255 | default_port_number: impl Fn() -> Option<u16>, |
1256 | ) -> io::Result<Vec<SocketAddr>> { |
1257 | // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>` |
1258 | // causes borrowck issues because the return value borrows `default_port_number`: |
1259 | // |
1260 | // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters |
1261 | // |
1262 | // > This RFC proposes that *all* type parameters are considered in scope |
1263 | // > for `impl Trait` in return position |
1264 | |
1265 | fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> { |
1266 | opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message)) |
1267 | } |
1268 | |
1269 | let host = io_result(self.host(), "No host name in the URL" )?; |
1270 | let port = io_result( |
1271 | self.port_or_known_default().or_else(default_port_number), |
1272 | "No port number in the URL" , |
1273 | )?; |
1274 | Ok(match host { |
1275 | Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(), |
1276 | Host::Ipv4(ip) => vec![(ip, port).into()], |
1277 | Host::Ipv6(ip) => vec![(ip, port).into()], |
1278 | }) |
1279 | } |
1280 | |
1281 | /// Return the path for this URL, as a percent-encoded ASCII string. |
1282 | /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. |
1283 | /// For other URLs, this starts with a '/' slash |
1284 | /// and continues with slash-separated path segments. |
1285 | /// |
1286 | /// # Examples |
1287 | /// |
1288 | /// ```rust |
1289 | /// use url::{Url, ParseError}; |
1290 | /// |
1291 | /// # fn run() -> Result<(), ParseError> { |
1292 | /// let url = Url::parse("https://example.com/api/versions?page=2" )?; |
1293 | /// assert_eq!(url.path(), "/api/versions" ); |
1294 | /// |
1295 | /// let url = Url::parse("https://example.com" )?; |
1296 | /// assert_eq!(url.path(), "/" ); |
1297 | /// |
1298 | /// let url = Url::parse("https://example.com/countries/việt nam" )?; |
1299 | /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam" ); |
1300 | /// # Ok(()) |
1301 | /// # } |
1302 | /// # run().unwrap(); |
1303 | /// ``` |
1304 | pub fn path(&self) -> &str { |
1305 | match (self.query_start, self.fragment_start) { |
1306 | (None, None) => self.slice(self.path_start..), |
1307 | (Some(next_component_start), _) | (None, Some(next_component_start)) => { |
1308 | self.slice(self.path_start..next_component_start) |
1309 | } |
1310 | } |
1311 | } |
1312 | |
1313 | /// Unless this URL is cannot-be-a-base, |
1314 | /// return an iterator of '/' slash-separated path segments, |
1315 | /// each as a percent-encoded ASCII string. |
1316 | /// |
1317 | /// Return `None` for cannot-be-a-base URLs. |
1318 | /// |
1319 | /// When `Some` is returned, the iterator always contains at least one string |
1320 | /// (which may be empty). |
1321 | /// |
1322 | /// # Examples |
1323 | /// |
1324 | /// ``` |
1325 | /// use url::Url; |
1326 | /// # use std::error::Error; |
1327 | /// |
1328 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1329 | /// let url = Url::parse("https://example.com/foo/bar" )?; |
1330 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1331 | /// assert_eq!(path_segments.next(), Some("foo" )); |
1332 | /// assert_eq!(path_segments.next(), Some("bar" )); |
1333 | /// assert_eq!(path_segments.next(), None); |
1334 | /// |
1335 | /// let url = Url::parse("https://example.com" )?; |
1336 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1337 | /// assert_eq!(path_segments.next(), Some("" )); |
1338 | /// assert_eq!(path_segments.next(), None); |
1339 | /// |
1340 | /// let url = Url::parse("data:text/plain,HelloWorld" )?; |
1341 | /// assert!(url.path_segments().is_none()); |
1342 | /// |
1343 | /// let url = Url::parse("https://example.com/countries/việt nam" )?; |
1344 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1345 | /// assert_eq!(path_segments.next(), Some("countries" )); |
1346 | /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam" )); |
1347 | /// # Ok(()) |
1348 | /// # } |
1349 | /// # run().unwrap(); |
1350 | /// ``` |
1351 | pub fn path_segments(&self) -> Option<str::Split<'_, char>> { |
1352 | let path = self.path(); |
1353 | path.strip_prefix('/' ).map(|remainder| remainder.split('/' )) |
1354 | } |
1355 | |
1356 | /// Return this URL’s query string, if any, as a percent-encoded ASCII string. |
1357 | /// |
1358 | /// # Examples |
1359 | /// |
1360 | /// ```rust |
1361 | /// use url::Url; |
1362 | /// # use url::ParseError; |
1363 | /// |
1364 | /// fn run() -> Result<(), ParseError> { |
1365 | /// let url = Url::parse("https://example.com/products?page=2" )?; |
1366 | /// let query = url.query(); |
1367 | /// assert_eq!(query, Some("page=2" )); |
1368 | /// |
1369 | /// let url = Url::parse("https://example.com/products" )?; |
1370 | /// let query = url.query(); |
1371 | /// assert!(query.is_none()); |
1372 | /// |
1373 | /// let url = Url::parse("https://example.com/?country=español" )?; |
1374 | /// let query = url.query(); |
1375 | /// assert_eq!(query, Some("country=espa%C3%B1ol" )); |
1376 | /// # Ok(()) |
1377 | /// # } |
1378 | /// # run().unwrap(); |
1379 | /// ``` |
1380 | pub fn query(&self) -> Option<&str> { |
1381 | match (self.query_start, self.fragment_start) { |
1382 | (None, _) => None, |
1383 | (Some(query_start), None) => { |
1384 | debug_assert!(self.byte_at(query_start) == b'?' ); |
1385 | Some(self.slice(query_start + 1..)) |
1386 | } |
1387 | (Some(query_start), Some(fragment_start)) => { |
1388 | debug_assert!(self.byte_at(query_start) == b'?' ); |
1389 | Some(self.slice(query_start + 1..fragment_start)) |
1390 | } |
1391 | } |
1392 | } |
1393 | |
1394 | /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded` |
1395 | /// and return an iterator of (key, value) pairs. |
1396 | /// |
1397 | /// # Examples |
1398 | /// |
1399 | /// ```rust |
1400 | /// use std::borrow::Cow; |
1401 | /// |
1402 | /// use url::Url; |
1403 | /// # use url::ParseError; |
1404 | /// |
1405 | /// # fn run() -> Result<(), ParseError> { |
1406 | /// let url = Url::parse("https://example.com/products?page=2&sort=desc" )?; |
1407 | /// let mut pairs = url.query_pairs(); |
1408 | /// |
1409 | /// assert_eq!(pairs.count(), 2); |
1410 | /// |
1411 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page" ), Cow::Borrowed("2" )))); |
1412 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort" ), Cow::Borrowed("desc" )))); |
1413 | /// # Ok(()) |
1414 | /// # } |
1415 | /// # run().unwrap(); |
1416 | /// ``` |
1417 | |
1418 | #[inline ] |
1419 | pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> { |
1420 | form_urlencoded::parse(self.query().unwrap_or("" ).as_bytes()) |
1421 | } |
1422 | |
1423 | /// Return this URL’s fragment identifier, if any. |
1424 | /// |
1425 | /// A fragment is the part of the URL after the `#` symbol. |
1426 | /// The fragment is optional and, if present, contains a fragment identifier |
1427 | /// that identifies a secondary resource, such as a section heading |
1428 | /// of a document. |
1429 | /// |
1430 | /// In HTML, the fragment identifier is usually the id attribute of a an element |
1431 | /// that is scrolled to on load. Browsers typically will not send the fragment portion |
1432 | /// of a URL to the server. |
1433 | /// |
1434 | /// **Note:** the parser did *not* percent-encode this component, |
1435 | /// but the input may have been percent-encoded already. |
1436 | /// |
1437 | /// # Examples |
1438 | /// |
1439 | /// ```rust |
1440 | /// use url::Url; |
1441 | /// # use url::ParseError; |
1442 | /// |
1443 | /// # fn run() -> Result<(), ParseError> { |
1444 | /// let url = Url::parse("https://example.com/data.csv#row=4" )?; |
1445 | /// |
1446 | /// assert_eq!(url.fragment(), Some("row=4" )); |
1447 | /// |
1448 | /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2" )?; |
1449 | /// |
1450 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2" )); |
1451 | /// # Ok(()) |
1452 | /// # } |
1453 | /// # run().unwrap(); |
1454 | /// ``` |
1455 | pub fn fragment(&self) -> Option<&str> { |
1456 | self.fragment_start.map(|start| { |
1457 | debug_assert!(self.byte_at(start) == b'#' ); |
1458 | self.slice(start + 1..) |
1459 | }) |
1460 | } |
1461 | |
1462 | fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R { |
1463 | let mut parser = Parser::for_setter(mem::take(&mut self.serialization)); |
1464 | let result = f(&mut parser); |
1465 | self.serialization = parser.serialization; |
1466 | result |
1467 | } |
1468 | |
1469 | /// Change this URL’s fragment identifier. |
1470 | /// |
1471 | /// # Examples |
1472 | /// |
1473 | /// ```rust |
1474 | /// use url::Url; |
1475 | /// # use url::ParseError; |
1476 | /// |
1477 | /// # fn run() -> Result<(), ParseError> { |
1478 | /// let mut url = Url::parse("https://example.com/data.csv" )?; |
1479 | /// assert_eq!(url.as_str(), "https://example.com/data.csv" ); |
1480 | |
1481 | /// url.set_fragment(Some("cell=4,1-6,2" )); |
1482 | /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2" ); |
1483 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2" )); |
1484 | /// |
1485 | /// url.set_fragment(None); |
1486 | /// assert_eq!(url.as_str(), "https://example.com/data.csv" ); |
1487 | /// assert!(url.fragment().is_none()); |
1488 | /// # Ok(()) |
1489 | /// # } |
1490 | /// # run().unwrap(); |
1491 | /// ``` |
1492 | pub fn set_fragment(&mut self, fragment: Option<&str>) { |
1493 | // Remove any previous fragment |
1494 | if let Some(start) = self.fragment_start { |
1495 | debug_assert!(self.byte_at(start) == b'#' ); |
1496 | self.serialization.truncate(start as usize); |
1497 | } |
1498 | // Write the new one |
1499 | if let Some(input) = fragment { |
1500 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); |
1501 | self.serialization.push('#' ); |
1502 | self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input))) |
1503 | } else { |
1504 | self.fragment_start = None; |
1505 | self.strip_trailing_spaces_from_opaque_path(); |
1506 | } |
1507 | } |
1508 | |
1509 | fn take_fragment(&mut self) -> Option<String> { |
1510 | self.fragment_start.take().map(|start| { |
1511 | debug_assert!(self.byte_at(start) == b'#' ); |
1512 | let fragment = self.slice(start + 1..).to_owned(); |
1513 | self.serialization.truncate(start as usize); |
1514 | fragment |
1515 | }) |
1516 | } |
1517 | |
1518 | fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) { |
1519 | if let Some(ref fragment) = fragment { |
1520 | assert!(self.fragment_start.is_none()); |
1521 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); |
1522 | self.serialization.push('#' ); |
1523 | self.serialization.push_str(fragment); |
1524 | } |
1525 | } |
1526 | |
1527 | /// Change this URL’s query string. |
1528 | /// |
1529 | /// # Examples |
1530 | /// |
1531 | /// ```rust |
1532 | /// use url::Url; |
1533 | /// # use url::ParseError; |
1534 | /// |
1535 | /// # fn run() -> Result<(), ParseError> { |
1536 | /// let mut url = Url::parse("https://example.com/products" )?; |
1537 | /// assert_eq!(url.as_str(), "https://example.com/products" ); |
1538 | /// |
1539 | /// url.set_query(Some("page=2" )); |
1540 | /// assert_eq!(url.as_str(), "https://example.com/products?page=2" ); |
1541 | /// assert_eq!(url.query(), Some("page=2" )); |
1542 | /// # Ok(()) |
1543 | /// # } |
1544 | /// # run().unwrap(); |
1545 | /// ``` |
1546 | pub fn set_query(&mut self, query: Option<&str>) { |
1547 | let fragment = self.take_fragment(); |
1548 | |
1549 | // Remove any previous query |
1550 | if let Some(start) = self.query_start.take() { |
1551 | debug_assert!(self.byte_at(start) == b'?' ); |
1552 | self.serialization.truncate(start as usize); |
1553 | } |
1554 | // Write the new query, if any |
1555 | if let Some(input) = query { |
1556 | self.query_start = Some(to_u32(self.serialization.len()).unwrap()); |
1557 | self.serialization.push('?' ); |
1558 | let scheme_type = SchemeType::from(self.scheme()); |
1559 | let scheme_end = self.scheme_end; |
1560 | self.mutate(|parser| { |
1561 | let vfn = parser.violation_fn; |
1562 | parser.parse_query( |
1563 | scheme_type, |
1564 | scheme_end, |
1565 | parser::Input::new_trim_tab_and_newlines(input, vfn), |
1566 | ) |
1567 | }); |
1568 | } else { |
1569 | self.query_start = None; |
1570 | if fragment.is_none() { |
1571 | self.strip_trailing_spaces_from_opaque_path(); |
1572 | } |
1573 | } |
1574 | |
1575 | self.restore_already_parsed_fragment(fragment); |
1576 | } |
1577 | |
1578 | /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs |
1579 | /// in `application/x-www-form-urlencoded` syntax. |
1580 | /// |
1581 | /// The return value has a method-chaining API: |
1582 | /// |
1583 | /// ```rust |
1584 | /// # use url::{Url, ParseError}; |
1585 | /// |
1586 | /// # fn run() -> Result<(), ParseError> { |
1587 | /// let mut url = Url::parse("https://example.net?lang=fr#nav" )?; |
1588 | /// assert_eq!(url.query(), Some("lang=fr" )); |
1589 | /// |
1590 | /// url.query_pairs_mut().append_pair("foo" , "bar" ); |
1591 | /// assert_eq!(url.query(), Some("lang=fr&foo=bar" )); |
1592 | /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav" ); |
1593 | /// |
1594 | /// url.query_pairs_mut() |
1595 | /// .clear() |
1596 | /// .append_pair("foo" , "bar & baz" ) |
1597 | /// .append_pair("saisons" , " \u{00C9}t \u{00E9}+hiver" ); |
1598 | /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver" )); |
1599 | /// assert_eq!(url.as_str(), |
1600 | /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav" ); |
1601 | /// # Ok(()) |
1602 | /// # } |
1603 | /// # run().unwrap(); |
1604 | /// ``` |
1605 | /// |
1606 | /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`, |
1607 | /// not `url.set_query(None)`. |
1608 | /// |
1609 | /// The state of `Url` is unspecified if this return value is leaked without being dropped. |
1610 | pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> { |
1611 | let fragment = self.take_fragment(); |
1612 | |
1613 | let query_start; |
1614 | if let Some(start) = self.query_start { |
1615 | debug_assert!(self.byte_at(start) == b'?' ); |
1616 | query_start = start as usize; |
1617 | } else { |
1618 | query_start = self.serialization.len(); |
1619 | self.query_start = Some(to_u32(query_start).unwrap()); |
1620 | self.serialization.push('?' ); |
1621 | } |
1622 | |
1623 | let query = UrlQuery { |
1624 | url: Some(self), |
1625 | fragment, |
1626 | }; |
1627 | form_urlencoded::Serializer::for_suffix(query, query_start + "?" .len()) |
1628 | } |
1629 | |
1630 | fn take_after_path(&mut self) -> String { |
1631 | match (self.query_start, self.fragment_start) { |
1632 | (Some(i), _) | (None, Some(i)) => { |
1633 | let after_path = self.slice(i..).to_owned(); |
1634 | self.serialization.truncate(i as usize); |
1635 | after_path |
1636 | } |
1637 | (None, None) => String::new(), |
1638 | } |
1639 | } |
1640 | |
1641 | /// Change this URL’s path. |
1642 | /// |
1643 | /// # Examples |
1644 | /// |
1645 | /// ```rust |
1646 | /// use url::Url; |
1647 | /// # use url::ParseError; |
1648 | /// |
1649 | /// # fn run() -> Result<(), ParseError> { |
1650 | /// let mut url = Url::parse("https://example.com" )?; |
1651 | /// url.set_path("api/comments" ); |
1652 | /// assert_eq!(url.as_str(), "https://example.com/api/comments" ); |
1653 | /// assert_eq!(url.path(), "/api/comments" ); |
1654 | /// |
1655 | /// let mut url = Url::parse("https://example.com/api" )?; |
1656 | /// url.set_path("data/report.csv" ); |
1657 | /// assert_eq!(url.as_str(), "https://example.com/data/report.csv" ); |
1658 | /// assert_eq!(url.path(), "/data/report.csv" ); |
1659 | /// |
1660 | /// // `set_path` percent-encodes the given string if it's not already percent-encoded. |
1661 | /// let mut url = Url::parse("https://example.com" )?; |
1662 | /// url.set_path("api/some comments" ); |
1663 | /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments" ); |
1664 | /// assert_eq!(url.path(), "/api/some%20comments" ); |
1665 | /// |
1666 | /// // `set_path` will not double percent-encode the string if it's already percent-encoded. |
1667 | /// let mut url = Url::parse("https://example.com" )?; |
1668 | /// url.set_path("api/some%20comments" ); |
1669 | /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments" ); |
1670 | /// assert_eq!(url.path(), "/api/some%20comments" ); |
1671 | /// |
1672 | /// # Ok(()) |
1673 | /// # } |
1674 | /// # run().unwrap(); |
1675 | /// ``` |
1676 | pub fn set_path(&mut self, mut path: &str) { |
1677 | let after_path = self.take_after_path(); |
1678 | let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); |
1679 | let cannot_be_a_base = self.cannot_be_a_base(); |
1680 | let scheme_type = SchemeType::from(self.scheme()); |
1681 | self.serialization.truncate(self.path_start as usize); |
1682 | self.mutate(|parser| { |
1683 | if cannot_be_a_base { |
1684 | if path.starts_with('/' ) { |
1685 | parser.serialization.push_str("%2F" ); |
1686 | path = &path[1..]; |
1687 | } |
1688 | parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path)); |
1689 | } else { |
1690 | let mut has_host = true; // FIXME |
1691 | parser.parse_path_start( |
1692 | scheme_type, |
1693 | &mut has_host, |
1694 | parser::Input::new_no_trim(path), |
1695 | ); |
1696 | } |
1697 | }); |
1698 | self.restore_after_path(old_after_path_pos, &after_path); |
1699 | } |
1700 | |
1701 | /// Return an object with methods to manipulate this URL’s path segments. |
1702 | /// |
1703 | /// Return `Err(())` if this URL is cannot-be-a-base. |
1704 | #[allow (clippy::result_unit_err)] |
1705 | pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> { |
1706 | if self.cannot_be_a_base() { |
1707 | Err(()) |
1708 | } else { |
1709 | Ok(path_segments::new(self)) |
1710 | } |
1711 | } |
1712 | |
1713 | fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) { |
1714 | let new_after_path_position = to_u32(self.serialization.len()).unwrap(); |
1715 | let adjust = |index: &mut u32| { |
1716 | *index -= old_after_path_position; |
1717 | *index += new_after_path_position; |
1718 | }; |
1719 | if let Some(ref mut index) = self.query_start { |
1720 | adjust(index) |
1721 | } |
1722 | if let Some(ref mut index) = self.fragment_start { |
1723 | adjust(index) |
1724 | } |
1725 | self.serialization.push_str(after_path) |
1726 | } |
1727 | |
1728 | /// Change this URL’s port number. |
1729 | /// |
1730 | /// Note that default port numbers are not reflected in the serialization. |
1731 | /// |
1732 | /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; |
1733 | /// do nothing and return `Err`. |
1734 | /// |
1735 | /// # Examples |
1736 | /// |
1737 | /// ``` |
1738 | /// use url::Url; |
1739 | /// # use std::error::Error; |
1740 | /// |
1741 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1742 | /// let mut url = Url::parse("ssh://example.net:2048/" )?; |
1743 | /// |
1744 | /// url.set_port(Some(4096)).map_err(|_| "cannot be base" )?; |
1745 | /// assert_eq!(url.as_str(), "ssh://example.net:4096/" ); |
1746 | /// |
1747 | /// url.set_port(None).map_err(|_| "cannot be base" )?; |
1748 | /// assert_eq!(url.as_str(), "ssh://example.net/" ); |
1749 | /// # Ok(()) |
1750 | /// # } |
1751 | /// # run().unwrap(); |
1752 | /// ``` |
1753 | /// |
1754 | /// Known default port numbers are not reflected: |
1755 | /// |
1756 | /// ```rust |
1757 | /// use url::Url; |
1758 | /// # use std::error::Error; |
1759 | /// |
1760 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1761 | /// let mut url = Url::parse("https://example.org/" )?; |
1762 | /// |
1763 | /// url.set_port(Some(443)).map_err(|_| "cannot be base" )?; |
1764 | /// assert!(url.port().is_none()); |
1765 | /// # Ok(()) |
1766 | /// # } |
1767 | /// # run().unwrap(); |
1768 | /// ``` |
1769 | /// |
1770 | /// Cannot set port for cannot-be-a-base URLs: |
1771 | /// |
1772 | /// ``` |
1773 | /// use url::Url; |
1774 | /// # use url::ParseError; |
1775 | /// |
1776 | /// # fn run() -> Result<(), ParseError> { |
1777 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
1778 | /// |
1779 | /// let result = url.set_port(Some(80)); |
1780 | /// assert!(result.is_err()); |
1781 | /// |
1782 | /// let result = url.set_port(None); |
1783 | /// assert!(result.is_err()); |
1784 | /// # Ok(()) |
1785 | /// # } |
1786 | /// # run().unwrap(); |
1787 | /// ``` |
1788 | #[allow (clippy::result_unit_err)] |
1789 | pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> { |
1790 | // has_host implies !cannot_be_a_base |
1791 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
1792 | return Err(()); |
1793 | } |
1794 | if port.is_some() && port == parser::default_port(self.scheme()) { |
1795 | port = None |
1796 | } |
1797 | self.set_port_internal(port); |
1798 | Ok(()) |
1799 | } |
1800 | |
1801 | fn set_port_internal(&mut self, port: Option<u16>) { |
1802 | match (self.port, port) { |
1803 | (None, None) => {} |
1804 | (Some(_), None) => { |
1805 | self.serialization |
1806 | .drain(self.host_end as usize..self.path_start as usize); |
1807 | let offset = self.path_start - self.host_end; |
1808 | self.path_start = self.host_end; |
1809 | if let Some(ref mut index) = self.query_start { |
1810 | *index -= offset |
1811 | } |
1812 | if let Some(ref mut index) = self.fragment_start { |
1813 | *index -= offset |
1814 | } |
1815 | } |
1816 | (Some(old), Some(new)) if old == new => {} |
1817 | (_, Some(new)) => { |
1818 | let path_and_after = self.slice(self.path_start..).to_owned(); |
1819 | self.serialization.truncate(self.host_end as usize); |
1820 | write!(&mut self.serialization, ": {}" , new).unwrap(); |
1821 | let old_path_start = self.path_start; |
1822 | let new_path_start = to_u32(self.serialization.len()).unwrap(); |
1823 | self.path_start = new_path_start; |
1824 | let adjust = |index: &mut u32| { |
1825 | *index -= old_path_start; |
1826 | *index += new_path_start; |
1827 | }; |
1828 | if let Some(ref mut index) = self.query_start { |
1829 | adjust(index) |
1830 | } |
1831 | if let Some(ref mut index) = self.fragment_start { |
1832 | adjust(index) |
1833 | } |
1834 | self.serialization.push_str(&path_and_after); |
1835 | } |
1836 | } |
1837 | self.port = port; |
1838 | } |
1839 | |
1840 | /// Change this URL’s host. |
1841 | /// |
1842 | /// Removing the host (calling this with `None`) |
1843 | /// will also remove any username, password, and port number. |
1844 | /// |
1845 | /// # Examples |
1846 | /// |
1847 | /// Change host: |
1848 | /// |
1849 | /// ``` |
1850 | /// use url::Url; |
1851 | /// # use url::ParseError; |
1852 | /// |
1853 | /// # fn run() -> Result<(), ParseError> { |
1854 | /// let mut url = Url::parse("https://example.net" )?; |
1855 | /// let result = url.set_host(Some("rust-lang.org" )); |
1856 | /// assert!(result.is_ok()); |
1857 | /// assert_eq!(url.as_str(), "https://rust-lang.org/" ); |
1858 | /// # Ok(()) |
1859 | /// # } |
1860 | /// # run().unwrap(); |
1861 | /// ``` |
1862 | /// |
1863 | /// Remove host: |
1864 | /// |
1865 | /// ``` |
1866 | /// use url::Url; |
1867 | /// # use url::ParseError; |
1868 | /// |
1869 | /// # fn run() -> Result<(), ParseError> { |
1870 | /// let mut url = Url::parse("foo://example.net" )?; |
1871 | /// let result = url.set_host(None); |
1872 | /// assert!(result.is_ok()); |
1873 | /// assert_eq!(url.as_str(), "foo:/" ); |
1874 | /// # Ok(()) |
1875 | /// # } |
1876 | /// # run().unwrap(); |
1877 | /// ``` |
1878 | /// |
1879 | /// Cannot remove host for 'special' schemes (e.g. `http`): |
1880 | /// |
1881 | /// ``` |
1882 | /// use url::Url; |
1883 | /// # use url::ParseError; |
1884 | /// |
1885 | /// # fn run() -> Result<(), ParseError> { |
1886 | /// let mut url = Url::parse("https://example.net" )?; |
1887 | /// let result = url.set_host(None); |
1888 | /// assert!(result.is_err()); |
1889 | /// assert_eq!(url.as_str(), "https://example.net/" ); |
1890 | /// # Ok(()) |
1891 | /// # } |
1892 | /// # run().unwrap(); |
1893 | /// ``` |
1894 | /// |
1895 | /// Cannot change or remove host for cannot-be-a-base URLs: |
1896 | /// |
1897 | /// ``` |
1898 | /// use url::Url; |
1899 | /// # use url::ParseError; |
1900 | /// |
1901 | /// # fn run() -> Result<(), ParseError> { |
1902 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
1903 | /// |
1904 | /// let result = url.set_host(Some("rust-lang.org" )); |
1905 | /// assert!(result.is_err()); |
1906 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
1907 | /// |
1908 | /// let result = url.set_host(None); |
1909 | /// assert!(result.is_err()); |
1910 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
1911 | /// # Ok(()) |
1912 | /// # } |
1913 | /// # run().unwrap(); |
1914 | /// ``` |
1915 | /// |
1916 | /// # Errors |
1917 | /// |
1918 | /// If this URL is cannot-be-a-base or there is an error parsing the given `host`, |
1919 | /// a [`ParseError`] variant will be returned. |
1920 | /// |
1921 | /// [`ParseError`]: enum.ParseError.html |
1922 | pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { |
1923 | if self.cannot_be_a_base() { |
1924 | return Err(ParseError::SetHostOnCannotBeABaseUrl); |
1925 | } |
1926 | |
1927 | let scheme_type = SchemeType::from(self.scheme()); |
1928 | |
1929 | if let Some(host) = host { |
1930 | if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() { |
1931 | return Err(ParseError::EmptyHost); |
1932 | } |
1933 | let mut host_substr = host; |
1934 | // Otherwise, if c is U+003A (:) and the [] flag is unset, then |
1935 | if !host.starts_with('[' ) || !host.ends_with(']' ) { |
1936 | match host.find(':' ) { |
1937 | Some(0) => { |
1938 | // If buffer is the empty string, validation error, return failure. |
1939 | return Err(ParseError::InvalidDomainCharacter); |
1940 | } |
1941 | // Let host be the result of host parsing buffer |
1942 | Some(colon_index) => { |
1943 | host_substr = &host[..colon_index]; |
1944 | } |
1945 | None => {} |
1946 | } |
1947 | } |
1948 | if SchemeType::from(self.scheme()).is_special() { |
1949 | self.set_host_internal(Host::parse(host_substr)?, None); |
1950 | } else { |
1951 | self.set_host_internal(Host::parse_opaque(host_substr)?, None); |
1952 | } |
1953 | } else if self.has_host() { |
1954 | if scheme_type.is_special() && !scheme_type.is_file() { |
1955 | return Err(ParseError::EmptyHost); |
1956 | } else if self.serialization.len() == self.path_start as usize { |
1957 | self.serialization.push('/' ); |
1958 | } |
1959 | debug_assert!(self.byte_at(self.scheme_end) == b':' ); |
1960 | debug_assert!(self.byte_at(self.path_start) == b'/' ); |
1961 | |
1962 | let new_path_start = if scheme_type.is_file() { |
1963 | self.scheme_end + 3 |
1964 | } else { |
1965 | self.scheme_end + 1 |
1966 | }; |
1967 | |
1968 | self.serialization |
1969 | .drain(new_path_start as usize..self.path_start as usize); |
1970 | let offset = self.path_start - new_path_start; |
1971 | self.path_start = new_path_start; |
1972 | self.username_end = new_path_start; |
1973 | self.host_start = new_path_start; |
1974 | self.host_end = new_path_start; |
1975 | self.port = None; |
1976 | if let Some(ref mut index) = self.query_start { |
1977 | *index -= offset |
1978 | } |
1979 | if let Some(ref mut index) = self.fragment_start { |
1980 | *index -= offset |
1981 | } |
1982 | } |
1983 | Ok(()) |
1984 | } |
1985 | |
1986 | /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. |
1987 | fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) { |
1988 | let old_suffix_pos = if opt_new_port.is_some() { |
1989 | self.path_start |
1990 | } else { |
1991 | self.host_end |
1992 | }; |
1993 | let suffix = self.slice(old_suffix_pos..).to_owned(); |
1994 | self.serialization.truncate(self.host_start as usize); |
1995 | if !self.has_authority() { |
1996 | debug_assert!(self.slice(self.scheme_end..self.host_start) == ":" ); |
1997 | debug_assert!(self.username_end == self.host_start); |
1998 | self.serialization.push('/' ); |
1999 | self.serialization.push('/' ); |
2000 | self.username_end += 2; |
2001 | self.host_start += 2; |
2002 | } |
2003 | write!(&mut self.serialization, " {}" , host).unwrap(); |
2004 | self.host_end = to_u32(self.serialization.len()).unwrap(); |
2005 | self.host = host.into(); |
2006 | |
2007 | if let Some(new_port) = opt_new_port { |
2008 | self.port = new_port; |
2009 | if let Some(port) = new_port { |
2010 | write!(&mut self.serialization, ": {}" , port).unwrap(); |
2011 | } |
2012 | } |
2013 | let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); |
2014 | self.serialization.push_str(&suffix); |
2015 | |
2016 | let adjust = |index: &mut u32| { |
2017 | *index -= old_suffix_pos; |
2018 | *index += new_suffix_pos; |
2019 | }; |
2020 | adjust(&mut self.path_start); |
2021 | if let Some(ref mut index) = self.query_start { |
2022 | adjust(index) |
2023 | } |
2024 | if let Some(ref mut index) = self.fragment_start { |
2025 | adjust(index) |
2026 | } |
2027 | } |
2028 | |
2029 | /// Change this URL’s host to the given IP address. |
2030 | /// |
2031 | /// If this URL is cannot-be-a-base, do nothing and return `Err`. |
2032 | /// |
2033 | /// Compared to `Url::set_host`, this skips the host parser. |
2034 | /// |
2035 | /// # Examples |
2036 | /// |
2037 | /// ```rust |
2038 | /// use url::{Url, ParseError}; |
2039 | /// |
2040 | /// # fn run() -> Result<(), ParseError> { |
2041 | /// let mut url = Url::parse("http://example.com" )?; |
2042 | /// url.set_ip_host("127.0.0.1" .parse().unwrap()); |
2043 | /// assert_eq!(url.host_str(), Some("127.0.0.1" )); |
2044 | /// assert_eq!(url.as_str(), "http://127.0.0.1/" ); |
2045 | /// # Ok(()) |
2046 | /// # } |
2047 | /// # run().unwrap(); |
2048 | /// ``` |
2049 | /// |
2050 | /// Cannot change URL's from mailto(cannot-be-base) to ip: |
2051 | /// |
2052 | /// ```rust |
2053 | /// use url::{Url, ParseError}; |
2054 | /// |
2055 | /// # fn run() -> Result<(), ParseError> { |
2056 | /// let mut url = Url::parse("mailto:rms@example.com" )?; |
2057 | /// let result = url.set_ip_host("127.0.0.1" .parse().unwrap()); |
2058 | /// |
2059 | /// assert_eq!(url.as_str(), "mailto:rms@example.com" ); |
2060 | /// assert!(result.is_err()); |
2061 | /// # Ok(()) |
2062 | /// # } |
2063 | /// # run().unwrap(); |
2064 | /// ``` |
2065 | /// |
2066 | #[allow (clippy::result_unit_err)] |
2067 | pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { |
2068 | if self.cannot_be_a_base() { |
2069 | return Err(()); |
2070 | } |
2071 | |
2072 | let address = match address { |
2073 | IpAddr::V4(address) => Host::Ipv4(address), |
2074 | IpAddr::V6(address) => Host::Ipv6(address), |
2075 | }; |
2076 | self.set_host_internal(address, None); |
2077 | Ok(()) |
2078 | } |
2079 | |
2080 | /// Change this URL’s password. |
2081 | /// |
2082 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. |
2083 | /// |
2084 | /// # Examples |
2085 | /// |
2086 | /// ```rust |
2087 | /// use url::{Url, ParseError}; |
2088 | /// |
2089 | /// # fn run() -> Result<(), ParseError> { |
2090 | /// let mut url = Url::parse("mailto:rmz@example.com" )?; |
2091 | /// let result = url.set_password(Some("secret_password" )); |
2092 | /// assert!(result.is_err()); |
2093 | /// |
2094 | /// let mut url = Url::parse("ftp://user1:secret1@example.com" )?; |
2095 | /// let result = url.set_password(Some("secret_password" )); |
2096 | /// assert_eq!(url.password(), Some("secret_password" )); |
2097 | /// |
2098 | /// let mut url = Url::parse("ftp://user2:@example.com" )?; |
2099 | /// let result = url.set_password(Some("secret2" )); |
2100 | /// assert!(result.is_ok()); |
2101 | /// assert_eq!(url.password(), Some("secret2" )); |
2102 | /// # Ok(()) |
2103 | /// # } |
2104 | /// # run().unwrap(); |
2105 | /// ``` |
2106 | #[allow (clippy::result_unit_err)] |
2107 | pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { |
2108 | // has_host implies !cannot_be_a_base |
2109 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
2110 | return Err(()); |
2111 | } |
2112 | let password = password.unwrap_or_default(); |
2113 | if !password.is_empty() { |
2114 | let host_and_after = self.slice(self.host_start..).to_owned(); |
2115 | self.serialization.truncate(self.username_end as usize); |
2116 | self.serialization.push(':' ); |
2117 | self.serialization |
2118 | .extend(utf8_percent_encode(password, USERINFO)); |
2119 | self.serialization.push('@' ); |
2120 | |
2121 | let old_host_start = self.host_start; |
2122 | let new_host_start = to_u32(self.serialization.len()).unwrap(); |
2123 | let adjust = |index: &mut u32| { |
2124 | *index -= old_host_start; |
2125 | *index += new_host_start; |
2126 | }; |
2127 | self.host_start = new_host_start; |
2128 | adjust(&mut self.host_end); |
2129 | adjust(&mut self.path_start); |
2130 | if let Some(ref mut index) = self.query_start { |
2131 | adjust(index) |
2132 | } |
2133 | if let Some(ref mut index) = self.fragment_start { |
2134 | adjust(index) |
2135 | } |
2136 | |
2137 | self.serialization.push_str(&host_and_after); |
2138 | } else if self.byte_at(self.username_end) == b':' { |
2139 | // If there is a password to remove |
2140 | let has_username_or_password = self.byte_at(self.host_start - 1) == b'@' ; |
2141 | debug_assert!(has_username_or_password); |
2142 | let username_start = self.scheme_end + 3; |
2143 | let empty_username = username_start == self.username_end; |
2144 | let start = self.username_end; // Remove the ':' |
2145 | let end = if empty_username { |
2146 | self.host_start // Remove the '@' as well |
2147 | } else { |
2148 | self.host_start - 1 // Keep the '@' to separate the username from the host |
2149 | }; |
2150 | self.serialization.drain(start as usize..end as usize); |
2151 | let offset = end - start; |
2152 | self.host_start -= offset; |
2153 | self.host_end -= offset; |
2154 | self.path_start -= offset; |
2155 | if let Some(ref mut index) = self.query_start { |
2156 | *index -= offset |
2157 | } |
2158 | if let Some(ref mut index) = self.fragment_start { |
2159 | *index -= offset |
2160 | } |
2161 | } |
2162 | Ok(()) |
2163 | } |
2164 | |
2165 | /// Change this URL’s username. |
2166 | /// |
2167 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. |
2168 | /// # Examples |
2169 | /// |
2170 | /// Cannot setup username from mailto(cannot-be-base) |
2171 | /// |
2172 | /// ```rust |
2173 | /// use url::{Url, ParseError}; |
2174 | /// |
2175 | /// # fn run() -> Result<(), ParseError> { |
2176 | /// let mut url = Url::parse("mailto:rmz@example.com" )?; |
2177 | /// let result = url.set_username("user1" ); |
2178 | /// assert_eq!(url.as_str(), "mailto:rmz@example.com" ); |
2179 | /// assert!(result.is_err()); |
2180 | /// # Ok(()) |
2181 | /// # } |
2182 | /// # run().unwrap(); |
2183 | /// ``` |
2184 | /// |
2185 | /// Setup username to user1 |
2186 | /// |
2187 | /// ```rust |
2188 | /// use url::{Url, ParseError}; |
2189 | /// |
2190 | /// # fn run() -> Result<(), ParseError> { |
2191 | /// let mut url = Url::parse("ftp://:secre1@example.com/" )?; |
2192 | /// let result = url.set_username("user1" ); |
2193 | /// assert!(result.is_ok()); |
2194 | /// assert_eq!(url.username(), "user1" ); |
2195 | /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/" ); |
2196 | /// # Ok(()) |
2197 | /// # } |
2198 | /// # run().unwrap(); |
2199 | /// ``` |
2200 | #[allow (clippy::result_unit_err)] |
2201 | pub fn set_username(&mut self, username: &str) -> Result<(), ()> { |
2202 | // has_host implies !cannot_be_a_base |
2203 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
2204 | return Err(()); |
2205 | } |
2206 | let username_start = self.scheme_end + 3; |
2207 | debug_assert!(self.slice(self.scheme_end..username_start) == "://" ); |
2208 | if self.slice(username_start..self.username_end) == username { |
2209 | return Ok(()); |
2210 | } |
2211 | let after_username = self.slice(self.username_end..).to_owned(); |
2212 | self.serialization.truncate(username_start as usize); |
2213 | self.serialization |
2214 | .extend(utf8_percent_encode(username, USERINFO)); |
2215 | |
2216 | let mut removed_bytes = self.username_end; |
2217 | self.username_end = to_u32(self.serialization.len()).unwrap(); |
2218 | let mut added_bytes = self.username_end; |
2219 | |
2220 | let new_username_is_empty = self.username_end == username_start; |
2221 | match (new_username_is_empty, after_username.chars().next()) { |
2222 | (true, Some('@' )) => { |
2223 | removed_bytes += 1; |
2224 | self.serialization.push_str(&after_username[1..]); |
2225 | } |
2226 | (false, Some('@' )) | (_, Some(':' )) | (true, _) => { |
2227 | self.serialization.push_str(&after_username); |
2228 | } |
2229 | (false, _) => { |
2230 | added_bytes += 1; |
2231 | self.serialization.push('@' ); |
2232 | self.serialization.push_str(&after_username); |
2233 | } |
2234 | } |
2235 | |
2236 | let adjust = |index: &mut u32| { |
2237 | *index -= removed_bytes; |
2238 | *index += added_bytes; |
2239 | }; |
2240 | adjust(&mut self.host_start); |
2241 | adjust(&mut self.host_end); |
2242 | adjust(&mut self.path_start); |
2243 | if let Some(ref mut index) = self.query_start { |
2244 | adjust(index) |
2245 | } |
2246 | if let Some(ref mut index) = self.fragment_start { |
2247 | adjust(index) |
2248 | } |
2249 | Ok(()) |
2250 | } |
2251 | |
2252 | /// Change this URL’s scheme. |
2253 | /// |
2254 | /// Do nothing and return `Err` under the following circumstances: |
2255 | /// |
2256 | /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+` |
2257 | /// * If this URL is cannot-be-a-base and the new scheme is one of |
2258 | /// `http`, `https`, `ws`, `wss` or `ftp` |
2259 | /// * If either the old or new scheme is `http`, `https`, `ws`, |
2260 | /// `wss` or `ftp` and the other is not one of these |
2261 | /// * If the new scheme is `file` and this URL includes credentials |
2262 | /// or has a non-null port |
2263 | /// * If this URL's scheme is `file` and its host is empty or null |
2264 | /// |
2265 | /// See also [the URL specification's section on legal scheme state |
2266 | /// overrides](https://url.spec.whatwg.org/#scheme-state). |
2267 | /// |
2268 | /// # Examples |
2269 | /// |
2270 | /// Change the URL’s scheme from `https` to `http`: |
2271 | /// |
2272 | /// ``` |
2273 | /// use url::Url; |
2274 | /// # use url::ParseError; |
2275 | /// |
2276 | /// # fn run() -> Result<(), ParseError> { |
2277 | /// let mut url = Url::parse("https://example.net" )?; |
2278 | /// let result = url.set_scheme("http" ); |
2279 | /// assert_eq!(url.as_str(), "http://example.net/" ); |
2280 | /// assert!(result.is_ok()); |
2281 | /// # Ok(()) |
2282 | /// # } |
2283 | /// # run().unwrap(); |
2284 | /// ``` |
2285 | /// Change the URL’s scheme from `foo` to `bar`: |
2286 | /// |
2287 | /// ``` |
2288 | /// use url::Url; |
2289 | /// # use url::ParseError; |
2290 | /// |
2291 | /// # fn run() -> Result<(), ParseError> { |
2292 | /// let mut url = Url::parse("foo://example.net" )?; |
2293 | /// let result = url.set_scheme("bar" ); |
2294 | /// assert_eq!(url.as_str(), "bar://example.net" ); |
2295 | /// assert!(result.is_ok()); |
2296 | /// # Ok(()) |
2297 | /// # } |
2298 | /// # run().unwrap(); |
2299 | /// ``` |
2300 | /// |
2301 | /// Cannot change URL’s scheme from `https` to `foõ`: |
2302 | /// |
2303 | /// ``` |
2304 | /// use url::Url; |
2305 | /// # use url::ParseError; |
2306 | /// |
2307 | /// # fn run() -> Result<(), ParseError> { |
2308 | /// let mut url = Url::parse("https://example.net" )?; |
2309 | /// let result = url.set_scheme("foõ" ); |
2310 | /// assert_eq!(url.as_str(), "https://example.net/" ); |
2311 | /// assert!(result.is_err()); |
2312 | /// # Ok(()) |
2313 | /// # } |
2314 | /// # run().unwrap(); |
2315 | /// ``` |
2316 | /// |
2317 | /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`: |
2318 | /// |
2319 | /// ``` |
2320 | /// use url::Url; |
2321 | /// # use url::ParseError; |
2322 | /// |
2323 | /// # fn run() -> Result<(), ParseError> { |
2324 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
2325 | /// let result = url.set_scheme("https" ); |
2326 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
2327 | /// assert!(result.is_err()); |
2328 | /// # Ok(()) |
2329 | /// # } |
2330 | /// # run().unwrap(); |
2331 | /// ``` |
2332 | /// Cannot change the URL’s scheme from `foo` to `https`: |
2333 | /// |
2334 | /// ``` |
2335 | /// use url::Url; |
2336 | /// # use url::ParseError; |
2337 | /// |
2338 | /// # fn run() -> Result<(), ParseError> { |
2339 | /// let mut url = Url::parse("foo://example.net" )?; |
2340 | /// let result = url.set_scheme("https" ); |
2341 | /// assert_eq!(url.as_str(), "foo://example.net" ); |
2342 | /// assert!(result.is_err()); |
2343 | /// # Ok(()) |
2344 | /// # } |
2345 | /// # run().unwrap(); |
2346 | /// ``` |
2347 | /// Cannot change the URL’s scheme from `http` to `foo`: |
2348 | /// |
2349 | /// ``` |
2350 | /// use url::Url; |
2351 | /// # use url::ParseError; |
2352 | /// |
2353 | /// # fn run() -> Result<(), ParseError> { |
2354 | /// let mut url = Url::parse("http://example.net" )?; |
2355 | /// let result = url.set_scheme("foo" ); |
2356 | /// assert_eq!(url.as_str(), "http://example.net/" ); |
2357 | /// assert!(result.is_err()); |
2358 | /// # Ok(()) |
2359 | /// # } |
2360 | /// # run().unwrap(); |
2361 | /// ``` |
2362 | #[allow (clippy::result_unit_err, clippy::suspicious_operation_groupings)] |
2363 | pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { |
2364 | let mut parser = Parser::for_setter(String::new()); |
2365 | let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?; |
2366 | let new_scheme_type = SchemeType::from(&parser.serialization); |
2367 | let old_scheme_type = SchemeType::from(self.scheme()); |
2368 | // If url’s scheme is a special scheme and buffer is not a special scheme, then return. |
2369 | if (new_scheme_type.is_special() && !old_scheme_type.is_special()) || |
2370 | // If url’s scheme is not a special scheme and buffer is a special scheme, then return. |
2371 | (!new_scheme_type.is_special() && old_scheme_type.is_special()) || |
2372 | // If url includes credentials or has a non-null port, and buffer is "file", then return. |
2373 | // If url’s scheme is "file" and its host is an empty host or null, then return. |
2374 | (new_scheme_type.is_file() && self.has_authority()) |
2375 | { |
2376 | return Err(()); |
2377 | } |
2378 | |
2379 | if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { |
2380 | return Err(()); |
2381 | } |
2382 | let old_scheme_end = self.scheme_end; |
2383 | let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); |
2384 | let adjust = |index: &mut u32| { |
2385 | *index -= old_scheme_end; |
2386 | *index += new_scheme_end; |
2387 | }; |
2388 | |
2389 | self.scheme_end = new_scheme_end; |
2390 | adjust(&mut self.username_end); |
2391 | adjust(&mut self.host_start); |
2392 | adjust(&mut self.host_end); |
2393 | adjust(&mut self.path_start); |
2394 | if let Some(ref mut index) = self.query_start { |
2395 | adjust(index) |
2396 | } |
2397 | if let Some(ref mut index) = self.fragment_start { |
2398 | adjust(index) |
2399 | } |
2400 | |
2401 | parser.serialization.push_str(self.slice(old_scheme_end..)); |
2402 | self.serialization = parser.serialization; |
2403 | |
2404 | // Update the port so it can be removed |
2405 | // If it is the scheme's default |
2406 | // we don't mind it silently failing |
2407 | // if there was no port in the first place |
2408 | let previous_port = self.port(); |
2409 | let _ = self.set_port(previous_port); |
2410 | |
2411 | Ok(()) |
2412 | } |
2413 | |
2414 | /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. |
2415 | /// |
2416 | /// This returns `Err` if the given path is not absolute or, |
2417 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). |
2418 | /// |
2419 | /// # Examples |
2420 | /// |
2421 | /// On Unix-like platforms: |
2422 | /// |
2423 | /// ``` |
2424 | /// # if cfg!(unix) { |
2425 | /// use url::Url; |
2426 | /// |
2427 | /// # fn run() -> Result<(), ()> { |
2428 | /// let url = Url::from_file_path("/tmp/foo.txt" )?; |
2429 | /// assert_eq!(url.as_str(), "file:///tmp/foo.txt" ); |
2430 | /// |
2431 | /// let url = Url::from_file_path("../foo.txt" ); |
2432 | /// assert!(url.is_err()); |
2433 | /// |
2434 | /// let url = Url::from_file_path("https://google.com/" ); |
2435 | /// assert!(url.is_err()); |
2436 | /// # Ok(()) |
2437 | /// # } |
2438 | /// # run().unwrap(); |
2439 | /// # } |
2440 | /// ``` |
2441 | #[cfg (any(unix, windows, target_os = "redox" , target_os = "wasi" ))] |
2442 | #[allow (clippy::result_unit_err)] |
2443 | pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { |
2444 | let mut serialization = "file://" .to_owned(); |
2445 | let host_start = serialization.len() as u32; |
2446 | let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?; |
2447 | Ok(Url { |
2448 | serialization, |
2449 | scheme_end: "file" .len() as u32, |
2450 | username_end: host_start, |
2451 | host_start, |
2452 | host_end, |
2453 | host, |
2454 | port: None, |
2455 | path_start: host_end, |
2456 | query_start: None, |
2457 | fragment_start: None, |
2458 | }) |
2459 | } |
2460 | |
2461 | /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme. |
2462 | /// |
2463 | /// This returns `Err` if the given path is not absolute or, |
2464 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). |
2465 | /// |
2466 | /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash |
2467 | /// so that the entire path is considered when using this URL as a base URL. |
2468 | /// |
2469 | /// For example: |
2470 | /// |
2471 | /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))` |
2472 | /// as the base URL is `file:///var/www/index.html` |
2473 | /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))` |
2474 | /// as the base URL is `file:///var/index.html`, which might not be what was intended. |
2475 | /// |
2476 | /// Note that `std::path` does not consider trailing slashes significant |
2477 | /// and usually does not include them (e.g. in `Path::parent()`). |
2478 | #[cfg (any(unix, windows, target_os = "redox" , target_os = "wasi" ))] |
2479 | #[allow (clippy::result_unit_err)] |
2480 | pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { |
2481 | let mut url = Url::from_file_path(path)?; |
2482 | if !url.serialization.ends_with('/' ) { |
2483 | url.serialization.push('/' ) |
2484 | } |
2485 | Ok(url) |
2486 | } |
2487 | |
2488 | /// Serialize with Serde using the internal representation of the `Url` struct. |
2489 | /// |
2490 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking |
2491 | /// for speed, compared to the `Deserialize` trait impl. |
2492 | /// |
2493 | /// This method is only available if the `serde` Cargo feature is enabled. |
2494 | #[cfg (feature = "serde" )] |
2495 | #[deny (unused)] |
2496 | pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
2497 | where |
2498 | S: serde::Serializer, |
2499 | { |
2500 | use serde::Serialize; |
2501 | // Destructuring first lets us ensure that adding or removing fields forces this method |
2502 | // to be updated |
2503 | let Url { |
2504 | ref serialization, |
2505 | ref scheme_end, |
2506 | ref username_end, |
2507 | ref host_start, |
2508 | ref host_end, |
2509 | ref host, |
2510 | ref port, |
2511 | ref path_start, |
2512 | ref query_start, |
2513 | ref fragment_start, |
2514 | } = *self; |
2515 | ( |
2516 | serialization, |
2517 | scheme_end, |
2518 | username_end, |
2519 | host_start, |
2520 | host_end, |
2521 | host, |
2522 | port, |
2523 | path_start, |
2524 | query_start, |
2525 | fragment_start, |
2526 | ) |
2527 | .serialize(serializer) |
2528 | } |
2529 | |
2530 | /// Serialize with Serde using the internal representation of the `Url` struct. |
2531 | /// |
2532 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking |
2533 | /// for speed, compared to the `Deserialize` trait impl. |
2534 | /// |
2535 | /// This method is only available if the `serde` Cargo feature is enabled. |
2536 | #[cfg (feature = "serde" )] |
2537 | #[deny (unused)] |
2538 | pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> |
2539 | where |
2540 | D: serde::Deserializer<'de>, |
2541 | { |
2542 | use serde::de::{Deserialize, Error, Unexpected}; |
2543 | let ( |
2544 | serialization, |
2545 | scheme_end, |
2546 | username_end, |
2547 | host_start, |
2548 | host_end, |
2549 | host, |
2550 | port, |
2551 | path_start, |
2552 | query_start, |
2553 | fragment_start, |
2554 | ) = Deserialize::deserialize(deserializer)?; |
2555 | let url = Url { |
2556 | serialization, |
2557 | scheme_end, |
2558 | username_end, |
2559 | host_start, |
2560 | host_end, |
2561 | host, |
2562 | port, |
2563 | path_start, |
2564 | query_start, |
2565 | fragment_start, |
2566 | }; |
2567 | if cfg!(debug_assertions) { |
2568 | url.check_invariants().map_err(|reason| { |
2569 | let reason: &str = &reason; |
2570 | Error::invalid_value(Unexpected::Other("value" ), &reason) |
2571 | })? |
2572 | } |
2573 | Ok(url) |
2574 | } |
2575 | |
2576 | /// Assuming the URL is in the `file` scheme or similar, |
2577 | /// convert its path to an absolute `std::path::Path`. |
2578 | /// |
2579 | /// **Note:** This does not actually check the URL’s `scheme`, |
2580 | /// and may give nonsensical results for other schemes. |
2581 | /// It is the user’s responsibility to check the URL’s scheme before calling this. |
2582 | /// |
2583 | /// ``` |
2584 | /// # use url::Url; |
2585 | /// # let url = Url::parse("file:///etc/passwd" ).unwrap(); |
2586 | /// let path = url.to_file_path(); |
2587 | /// ``` |
2588 | /// |
2589 | /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where |
2590 | /// `file:` URLs may have a non-local host), |
2591 | /// or if `Path::new_opt()` returns `None`. |
2592 | /// (That is, if the percent-decoded path contains a NUL byte or, |
2593 | /// for a Windows path, is not UTF-8.) |
2594 | #[inline ] |
2595 | #[cfg (any(unix, windows, target_os = "redox" , target_os = "wasi" ))] |
2596 | #[allow (clippy::result_unit_err)] |
2597 | pub fn to_file_path(&self) -> Result<PathBuf, ()> { |
2598 | if let Some(segments) = self.path_segments() { |
2599 | let host = match self.host() { |
2600 | None | Some(Host::Domain("localhost" )) => None, |
2601 | Some(_) if cfg!(windows) && self.scheme() == "file" => { |
2602 | Some(&self.serialization[self.host_start as usize..self.host_end as usize]) |
2603 | } |
2604 | _ => return Err(()), |
2605 | }; |
2606 | |
2607 | return file_url_segments_to_pathbuf(host, segments); |
2608 | } |
2609 | Err(()) |
2610 | } |
2611 | |
2612 | // Private helper methods: |
2613 | |
2614 | #[inline ] |
2615 | fn slice<R>(&self, range: R) -> &str |
2616 | where |
2617 | R: RangeArg, |
2618 | { |
2619 | range.slice_of(&self.serialization) |
2620 | } |
2621 | |
2622 | #[inline ] |
2623 | fn byte_at(&self, i: u32) -> u8 { |
2624 | self.serialization.as_bytes()[i as usize] |
2625 | } |
2626 | } |
2627 | |
2628 | /// Parse a string as an URL, without a base URL or encoding override. |
2629 | impl str::FromStr for Url { |
2630 | type Err = ParseError; |
2631 | |
2632 | #[inline ] |
2633 | fn from_str(input: &str) -> Result<Url, crate::ParseError> { |
2634 | Url::parse(input) |
2635 | } |
2636 | } |
2637 | |
2638 | impl<'a> TryFrom<&'a str> for Url { |
2639 | type Error = ParseError; |
2640 | |
2641 | fn try_from(s: &'a str) -> Result<Self, Self::Error> { |
2642 | Url::parse(input:s) |
2643 | } |
2644 | } |
2645 | |
2646 | /// Display the serialization of this URL. |
2647 | impl fmt::Display for Url { |
2648 | #[inline ] |
2649 | fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { |
2650 | fmt::Display::fmt(&self.serialization, f:formatter) |
2651 | } |
2652 | } |
2653 | |
2654 | /// String conversion. |
2655 | impl From<Url> for String { |
2656 | fn from(value: Url) -> String { |
2657 | value.serialization |
2658 | } |
2659 | } |
2660 | |
2661 | /// Debug the serialization of this URL. |
2662 | impl fmt::Debug for Url { |
2663 | #[inline ] |
2664 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
2665 | formatter&mut DebugStruct<'_, '_> |
2666 | .debug_struct("Url" ) |
2667 | .field("scheme" , &self.scheme()) |
2668 | .field("cannot_be_a_base" , &self.cannot_be_a_base()) |
2669 | .field("username" , &self.username()) |
2670 | .field("password" , &self.password()) |
2671 | .field("host" , &self.host()) |
2672 | .field("port" , &self.port()) |
2673 | .field("path" , &self.path()) |
2674 | .field("query" , &self.query()) |
2675 | .field(name:"fragment" , &self.fragment()) |
2676 | .finish() |
2677 | } |
2678 | } |
2679 | |
2680 | /// URLs compare like their serialization. |
2681 | impl Eq for Url {} |
2682 | |
2683 | /// URLs compare like their serialization. |
2684 | impl PartialEq for Url { |
2685 | #[inline ] |
2686 | fn eq(&self, other: &Self) -> bool { |
2687 | self.serialization == other.serialization |
2688 | } |
2689 | } |
2690 | |
2691 | /// URLs compare like their serialization. |
2692 | impl Ord for Url { |
2693 | #[inline ] |
2694 | fn cmp(&self, other: &Self) -> cmp::Ordering { |
2695 | self.serialization.cmp(&other.serialization) |
2696 | } |
2697 | } |
2698 | |
2699 | /// URLs compare like their serialization. |
2700 | impl PartialOrd for Url { |
2701 | #[inline ] |
2702 | fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { |
2703 | Some(self.cmp(other)) |
2704 | } |
2705 | } |
2706 | |
2707 | /// URLs hash like their serialization. |
2708 | impl hash::Hash for Url { |
2709 | #[inline ] |
2710 | fn hash<H>(&self, state: &mut H) |
2711 | where |
2712 | H: hash::Hasher, |
2713 | { |
2714 | hash::Hash::hash(&self.serialization, state) |
2715 | } |
2716 | } |
2717 | |
2718 | /// Return the serialization of this URL. |
2719 | impl AsRef<str> for Url { |
2720 | #[inline ] |
2721 | fn as_ref(&self) -> &str { |
2722 | &self.serialization |
2723 | } |
2724 | } |
2725 | |
2726 | trait RangeArg { |
2727 | fn slice_of<'a>(&self, s: &'a str) -> &'a str; |
2728 | } |
2729 | |
2730 | impl RangeArg for Range<u32> { |
2731 | #[inline ] |
2732 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2733 | &s[self.start as usize..self.end as usize] |
2734 | } |
2735 | } |
2736 | |
2737 | impl RangeArg for RangeFrom<u32> { |
2738 | #[inline ] |
2739 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2740 | &s[self.start as usize..] |
2741 | } |
2742 | } |
2743 | |
2744 | impl RangeArg for RangeTo<u32> { |
2745 | #[inline ] |
2746 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2747 | &s[..self.end as usize] |
2748 | } |
2749 | } |
2750 | |
2751 | /// Serializes this URL into a `serde` stream. |
2752 | /// |
2753 | /// This implementation is only available if the `serde` Cargo feature is enabled. |
2754 | #[cfg (feature = "serde" )] |
2755 | impl serde::Serialize for Url { |
2756 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
2757 | where |
2758 | S: serde::Serializer, |
2759 | { |
2760 | serializer.serialize_str(self.as_str()) |
2761 | } |
2762 | } |
2763 | |
2764 | /// Deserializes this URL from a `serde` stream. |
2765 | /// |
2766 | /// This implementation is only available if the `serde` Cargo feature is enabled. |
2767 | #[cfg (feature = "serde" )] |
2768 | impl<'de> serde::Deserialize<'de> for Url { |
2769 | fn deserialize<D>(deserializer: D) -> Result<Url, D::Error> |
2770 | where |
2771 | D: serde::Deserializer<'de>, |
2772 | { |
2773 | use serde::de::{Error, Unexpected, Visitor}; |
2774 | |
2775 | struct UrlVisitor; |
2776 | |
2777 | impl<'de> Visitor<'de> for UrlVisitor { |
2778 | type Value = Url; |
2779 | |
2780 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
2781 | formatter.write_str("a string representing an URL" ) |
2782 | } |
2783 | |
2784 | fn visit_str<E>(self, s: &str) -> Result<Self::Value, E> |
2785 | where |
2786 | E: Error, |
2787 | { |
2788 | Url::parse(s).map_err(|err| { |
2789 | let err_s = format!(" {}" , err); |
2790 | Error::invalid_value(Unexpected::Str(s), &err_s.as_str()) |
2791 | }) |
2792 | } |
2793 | } |
2794 | |
2795 | deserializer.deserialize_str(UrlVisitor) |
2796 | } |
2797 | } |
2798 | |
2799 | #[cfg (any(unix, target_os = "redox" , target_os = "wasi" ))] |
2800 | fn path_to_file_url_segments( |
2801 | path: &Path, |
2802 | serialization: &mut String, |
2803 | ) -> Result<(u32, HostInternal), ()> { |
2804 | #[cfg (any(unix, target_os = "redox" ))] |
2805 | use std::os::unix::prelude::OsStrExt; |
2806 | #[cfg (target_os = "wasi" )] |
2807 | use std::os::wasi::prelude::OsStrExt; |
2808 | if !path.is_absolute() { |
2809 | return Err(()); |
2810 | } |
2811 | let host_end = to_u32(serialization.len()).unwrap(); |
2812 | let mut empty = true; |
2813 | // skip the root component |
2814 | for component in path.components().skip(1) { |
2815 | empty = false; |
2816 | serialization.push('/' ); |
2817 | serialization.extend(percent_encode( |
2818 | component.as_os_str().as_bytes(), |
2819 | PATH_SEGMENT, |
2820 | )); |
2821 | } |
2822 | if empty { |
2823 | // An URL’s path must not be empty. |
2824 | serialization.push('/' ); |
2825 | } |
2826 | Ok((host_end, HostInternal::None)) |
2827 | } |
2828 | |
2829 | #[cfg (windows)] |
2830 | fn path_to_file_url_segments( |
2831 | path: &Path, |
2832 | serialization: &mut String, |
2833 | ) -> Result<(u32, HostInternal), ()> { |
2834 | path_to_file_url_segments_windows(path, serialization) |
2835 | } |
2836 | |
2837 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 |
2838 | #[cfg_attr (not(windows), allow(dead_code))] |
2839 | fn path_to_file_url_segments_windows( |
2840 | path: &Path, |
2841 | serialization: &mut String, |
2842 | ) -> Result<(u32, HostInternal), ()> { |
2843 | use std::path::{Component, Prefix}; |
2844 | if !path.is_absolute() { |
2845 | return Err(()); |
2846 | } |
2847 | let mut components = path.components(); |
2848 | |
2849 | let host_start = serialization.len() + 1; |
2850 | let host_end; |
2851 | let host_internal; |
2852 | |
2853 | match components.next() { |
2854 | Some(Component::Prefix(ref p)) => match p.kind() { |
2855 | Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { |
2856 | host_end = to_u32(serialization.len()).unwrap(); |
2857 | host_internal = HostInternal::None; |
2858 | serialization.push('/' ); |
2859 | serialization.push(letter as char); |
2860 | serialization.push(':' ); |
2861 | } |
2862 | Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { |
2863 | let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; |
2864 | write!(serialization, " {}" , host).unwrap(); |
2865 | host_end = to_u32(serialization.len()).unwrap(); |
2866 | host_internal = host.into(); |
2867 | serialization.push('/' ); |
2868 | let share = share.to_str().ok_or(())?; |
2869 | serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT)); |
2870 | } |
2871 | _ => return Err(()), |
2872 | }, |
2873 | _ => return Err(()), |
2874 | } |
2875 | |
2876 | let mut path_only_has_prefix = true; |
2877 | for component in components { |
2878 | if component == Component::RootDir { |
2879 | continue; |
2880 | } |
2881 | |
2882 | path_only_has_prefix = false; |
2883 | // FIXME: somehow work with non-unicode? |
2884 | let component = component.as_os_str().to_str().ok_or(())?; |
2885 | |
2886 | serialization.push('/' ); |
2887 | serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); |
2888 | } |
2889 | |
2890 | // A windows drive letter must end with a slash. |
2891 | if serialization.len() > host_start |
2892 | && parser::is_windows_drive_letter(&serialization[host_start..]) |
2893 | && path_only_has_prefix |
2894 | { |
2895 | serialization.push('/' ); |
2896 | } |
2897 | |
2898 | Ok((host_end, host_internal)) |
2899 | } |
2900 | |
2901 | #[cfg (any(unix, target_os = "redox" , target_os = "wasi" ))] |
2902 | fn file_url_segments_to_pathbuf( |
2903 | host: Option<&str>, |
2904 | segments: str::Split<'_, char>, |
2905 | ) -> Result<PathBuf, ()> { |
2906 | use std::ffi::OsStr; |
2907 | #[cfg (any(unix, target_os = "redox" ))] |
2908 | use std::os::unix::prelude::OsStrExt; |
2909 | #[cfg (target_os = "wasi" )] |
2910 | use std::os::wasi::prelude::OsStrExt; |
2911 | |
2912 | if host.is_some() { |
2913 | return Err(()); |
2914 | } |
2915 | |
2916 | let mut bytes = if cfg!(target_os = "redox" ) { |
2917 | b"file:" .to_vec() |
2918 | } else { |
2919 | Vec::new() |
2920 | }; |
2921 | |
2922 | for segment in segments { |
2923 | bytes.push(b'/' ); |
2924 | bytes.extend(percent_decode(segment.as_bytes())); |
2925 | } |
2926 | |
2927 | // A windows drive letter must end with a slash. |
2928 | if bytes.len() > 2 |
2929 | && bytes[bytes.len() - 2].is_ascii_alphabetic() |
2930 | && matches!(bytes[bytes.len() - 1], b':' | b'|' ) |
2931 | { |
2932 | bytes.push(b'/' ); |
2933 | } |
2934 | |
2935 | let os_str = OsStr::from_bytes(&bytes); |
2936 | let path = PathBuf::from(os_str); |
2937 | |
2938 | debug_assert!( |
2939 | path.is_absolute(), |
2940 | "to_file_path() failed to produce an absolute Path" |
2941 | ); |
2942 | |
2943 | Ok(path) |
2944 | } |
2945 | |
2946 | #[cfg (windows)] |
2947 | fn file_url_segments_to_pathbuf( |
2948 | host: Option<&str>, |
2949 | segments: str::Split<char>, |
2950 | ) -> Result<PathBuf, ()> { |
2951 | file_url_segments_to_pathbuf_windows(host, segments) |
2952 | } |
2953 | |
2954 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 |
2955 | #[cfg_attr (not(windows), allow(dead_code))] |
2956 | fn file_url_segments_to_pathbuf_windows( |
2957 | host: Option<&str>, |
2958 | mut segments: str::Split<'_, char>, |
2959 | ) -> Result<PathBuf, ()> { |
2960 | let mut string = if let Some(host) = host { |
2961 | r"\\" .to_owned() + host |
2962 | } else { |
2963 | let first = segments.next().ok_or(())?; |
2964 | |
2965 | match first.len() { |
2966 | 2 => { |
2967 | if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { |
2968 | return Err(()); |
2969 | } |
2970 | |
2971 | first.to_owned() |
2972 | } |
2973 | |
2974 | 4 => { |
2975 | if !first.starts_with(parser::ascii_alpha) { |
2976 | return Err(()); |
2977 | } |
2978 | let bytes = first.as_bytes(); |
2979 | if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A' ) { |
2980 | return Err(()); |
2981 | } |
2982 | |
2983 | first[0..1].to_owned() + ":" |
2984 | } |
2985 | |
2986 | _ => return Err(()), |
2987 | } |
2988 | }; |
2989 | |
2990 | for segment in segments { |
2991 | string.push(' \\' ); |
2992 | |
2993 | // Currently non-unicode windows paths cannot be represented |
2994 | match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { |
2995 | Ok(s) => string.push_str(&s), |
2996 | Err(..) => return Err(()), |
2997 | } |
2998 | } |
2999 | let path = PathBuf::from(string); |
3000 | debug_assert!( |
3001 | path.is_absolute(), |
3002 | "to_file_path() failed to produce an absolute Path" |
3003 | ); |
3004 | Ok(path) |
3005 | } |
3006 | |
3007 | /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. |
3008 | #[derive (Debug)] |
3009 | pub struct UrlQuery<'a> { |
3010 | url: Option<&'a mut Url>, |
3011 | fragment: Option<String>, |
3012 | } |
3013 | |
3014 | // `as_mut_string` string here exposes the internal serialization of an `Url`, |
3015 | // which should not be exposed to users. |
3016 | // We achieve that by not giving users direct access to `UrlQuery`: |
3017 | // * Its fields are private |
3018 | // (and so can not be constructed with struct literal syntax outside of this crate), |
3019 | // * It has no constructor |
3020 | // * It is only visible (on the type level) to users in the return type of |
3021 | // `Url::query_pairs_mut` which is `Serializer<UrlQuery>` |
3022 | // * `Serializer` keeps its target in a private field |
3023 | // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`. |
3024 | impl<'a> form_urlencoded::Target for UrlQuery<'a> { |
3025 | fn as_mut_string(&mut self) -> &mut String { |
3026 | &mut self.url.as_mut().unwrap().serialization |
3027 | } |
3028 | |
3029 | fn finish(mut self) -> &'a mut Url { |
3030 | let url: &mut Url = self.url.take().unwrap(); |
3031 | url.restore_already_parsed_fragment(self.fragment.take()); |
3032 | url |
3033 | } |
3034 | |
3035 | type Finished = &'a mut Url; |
3036 | } |
3037 | |
3038 | impl<'a> Drop for UrlQuery<'a> { |
3039 | fn drop(&mut self) { |
3040 | if let Some(url: &mut Url) = self.url.take() { |
3041 | url.restore_already_parsed_fragment(self.fragment.take()) |
3042 | } |
3043 | } |
3044 | } |
3045 | |