1// Copyright 2013-2015 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*!
10
11rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12for the [Rust](http://rust-lang.org/) programming language.
13
14
15# URL parsing and data structures
16
17First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19```
20use url::{Url, ParseError};
21
22assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23```
24
25Let’s parse a valid URL and look at its components.
26
27```
28use url::{Url, Host, Position};
29# use url::ParseError;
30# fn run() -> Result<(), ParseError> {
31let issue_list_url = Url::parse(
32 "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33)?;
34
35
36assert!(issue_list_url.scheme() == "https");
37assert!(issue_list_url.username() == "");
38assert!(issue_list_url.password() == None);
39assert!(issue_list_url.host_str() == Some("github.com"));
40assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41assert!(issue_list_url.port() == None);
42assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44 Some(vec!["rust-lang", "rust", "issues"]));
45assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47assert!(issue_list_url.fragment() == None);
48assert!(!issue_list_url.cannot_be_a_base());
49# Ok(())
50# }
51# run().unwrap();
52```
53
54Some URLs are said to be *cannot-be-a-base*:
55they don’t have a username, password, host, or port,
56and their "path" is an arbitrary string rather than slash-separated segments:
57
58```
59use url::Url;
60# use url::ParseError;
61
62# fn run() -> Result<(), ParseError> {
63let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65assert!(data_url.cannot_be_a_base());
66assert!(data_url.scheme() == "data");
67assert!(data_url.path() == "text/plain,Hello");
68assert!(data_url.path_segments().is_none());
69assert!(data_url.query() == Some("World"));
70assert!(data_url.fragment() == Some(""));
71# Ok(())
72# }
73# run().unwrap();
74```
75
76## Serde
77
78Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79
80# Base URL
81
82Many contexts allow URL *references* that can be relative to a *base URL*:
83
84```html
85<link rel="stylesheet" href="../main.css">
86```
87
88Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89
90```
91use url::{Url, ParseError};
92
93assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94```
95
96Use the `join` method on an `Url` to use it as a base URL:
97
98```
99use url::Url;
100# use url::ParseError;
101
102# fn run() -> Result<(), ParseError> {
103let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104let css_url = this_document.join("../main.css")?;
105assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106# Ok(())
107# }
108# run().unwrap();
109```
110
111# Feature: `serde`
112
113If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114[`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115[`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116See [serde documentation](https://serde.rs) for more information.
117
118```toml
119url = { version = "2", features = ["serde"] }
120```
121
122# Feature: `debugger_visualizer`
123
124If you enable the `debugger_visualizer` feature, the `url` crate will include
125a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects)
126for [Visual Studio](https://www.visualstudio.com/) that allows you to view
127[`Url`](struct.Url.html) objects in the debugger.
128
129This feature requires Rust 1.71 or later.
130
131```toml
132url = { version = "2", features = ["debugger_visualizer"] }
133```
134
135*/
136
137#![doc(html_root_url = "https://docs.rs/url/2.5.0")]
138#![cfg_attr(
139 feature = "debugger_visualizer",
140 debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
141)]
142
143pub use form_urlencoded;
144
145#[cfg(feature = "serde")]
146extern crate serde;
147
148use crate::host::HostInternal;
149use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
150use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
151use std::borrow::Borrow;
152use std::cmp;
153use std::fmt::{self, Write};
154use std::hash;
155use std::io;
156use std::mem;
157use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
158use std::ops::{Range, RangeFrom, RangeTo};
159use std::path::{Path, PathBuf};
160use std::str;
161
162use std::convert::TryFrom;
163
164pub use crate::host::Host;
165pub use crate::origin::{OpaqueOrigin, Origin};
166pub use crate::parser::{ParseError, SyntaxViolation};
167pub use crate::path_segments::PathSegmentsMut;
168pub use crate::slicing::Position;
169pub use form_urlencoded::EncodingOverride;
170
171mod host;
172mod origin;
173mod parser;
174mod path_segments;
175mod slicing;
176
177#[doc(hidden)]
178pub mod quirks;
179
180/// A parsed URL record.
181#[derive(Clone)]
182pub struct Url {
183 /// Syntax in pseudo-BNF:
184 ///
185 /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
186 /// non-hierarchical = non-hierarchical-path
187 /// non-hierarchical-path = /* Does not start with "/" */
188 /// hierarchical = authority? hierarchical-path
189 /// authority = "//" userinfo? host [ ":" port ]?
190 /// userinfo = username [ ":" password ]? "@"
191 /// hierarchical-path = [ "/" path-segment ]+
192 serialization: String,
193
194 // Components
195 scheme_end: u32, // Before ':'
196 username_end: u32, // Before ':' (if a password is given) or '@' (if not)
197 host_start: u32,
198 host_end: u32,
199 host: HostInternal,
200 port: Option<u16>,
201 path_start: u32, // Before initial '/', if any
202 query_start: Option<u32>, // Before '?', unlike Position::QueryStart
203 fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
204}
205
206/// Full configuration for the URL parser.
207#[derive(Copy, Clone)]
208#[must_use]
209pub struct ParseOptions<'a> {
210 base_url: Option<&'a Url>,
211 encoding_override: EncodingOverride<'a>,
212 violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
213}
214
215impl<'a> ParseOptions<'a> {
216 /// Change the base URL
217 pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
218 self.base_url = new;
219 self
220 }
221
222 /// Override the character encoding of query strings.
223 /// This is a legacy concept only relevant for HTML.
224 pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
225 self.encoding_override = new;
226 self
227 }
228
229 /// Call the provided function or closure for a non-fatal `SyntaxViolation`
230 /// when it occurs during parsing. Note that since the provided function is
231 /// `Fn`, the caller might need to utilize _interior mutability_, such as with
232 /// a `RefCell`, to collect the violations.
233 ///
234 /// ## Example
235 /// ```
236 /// use std::cell::RefCell;
237 /// use url::{Url, SyntaxViolation};
238 /// # use url::ParseError;
239 /// # fn run() -> Result<(), url::ParseError> {
240 /// let violations = RefCell::new(Vec::new());
241 /// let url = Url::options()
242 /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
243 /// .parse("https:////example.com")?;
244 /// assert_eq!(url.as_str(), "https://example.com/");
245 /// assert_eq!(violations.into_inner(),
246 /// vec!(SyntaxViolation::ExpectedDoubleSlash));
247 /// # Ok(())
248 /// # }
249 /// # run().unwrap();
250 /// ```
251 pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
252 self.violation_fn = new;
253 self
254 }
255
256 /// Parse an URL string with the configuration so far.
257 pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
258 Parser {
259 serialization: String::with_capacity(input.len()),
260 base_url: self.base_url,
261 query_encoding_override: self.encoding_override,
262 violation_fn: self.violation_fn,
263 context: Context::UrlParser,
264 }
265 .parse_url(input)
266 }
267}
268
269impl Url {
270 /// Parse an absolute URL from a string.
271 ///
272 /// # Examples
273 ///
274 /// ```rust
275 /// use url::Url;
276 /// # use url::ParseError;
277 ///
278 /// # fn run() -> Result<(), ParseError> {
279 /// let url = Url::parse("https://example.net")?;
280 /// # Ok(())
281 /// # }
282 /// # run().unwrap();
283 /// ```
284 ///
285 /// # Errors
286 ///
287 /// If the function can not parse an absolute URL from the given string,
288 /// a [`ParseError`] variant will be returned.
289 ///
290 /// [`ParseError`]: enum.ParseError.html
291 #[inline]
292 pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
293 Url::options().parse(input)
294 }
295
296 /// Parse an absolute URL from a string and add params to its query string.
297 ///
298 /// Existing params are not removed.
299 ///
300 /// # Examples
301 ///
302 /// ```rust
303 /// use url::Url;
304 /// # use url::ParseError;
305 ///
306 /// # fn run() -> Result<(), ParseError> {
307 /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
308 /// &[("lang", "rust"), ("browser", "servo")])?;
309 /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
310 /// # Ok(())
311 /// # }
312 /// # run().unwrap();
313 /// ```
314 ///
315 /// # Errors
316 ///
317 /// If the function can not parse an absolute URL from the given string,
318 /// a [`ParseError`] variant will be returned.
319 ///
320 /// [`ParseError`]: enum.ParseError.html
321 #[inline]
322 pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
323 where
324 I: IntoIterator,
325 I::Item: Borrow<(K, V)>,
326 K: AsRef<str>,
327 V: AsRef<str>,
328 {
329 let mut url = Url::options().parse(input);
330
331 if let Ok(ref mut url) = url {
332 url.query_pairs_mut().extend_pairs(iter);
333 }
334
335 url
336 }
337
338 /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
339 fn strip_trailing_spaces_from_opaque_path(&mut self) {
340 if !self.cannot_be_a_base() {
341 return;
342 }
343
344 if self.fragment_start.is_some() {
345 return;
346 }
347
348 if self.query_start.is_some() {
349 return;
350 }
351
352 let trailing_space_count = self
353 .serialization
354 .chars()
355 .rev()
356 .take_while(|c| *c == ' ')
357 .count();
358
359 let start = self.serialization.len() - trailing_space_count;
360
361 self.serialization.truncate(start);
362 }
363
364 /// Parse a string as an URL, with this URL as the base URL.
365 ///
366 /// The inverse of this is [`make_relative`].
367 ///
368 /// Note: a trailing slash is significant.
369 /// Without it, the last path component is considered to be a “file” name
370 /// to be removed to get at the “directory” that is used as the base:
371 ///
372 /// # Examples
373 ///
374 /// ```rust
375 /// use url::Url;
376 /// # use url::ParseError;
377 ///
378 /// # fn run() -> Result<(), ParseError> {
379 /// let base = Url::parse("https://example.net/a/b.html")?;
380 /// let url = base.join("c.png")?;
381 /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
382 ///
383 /// let base = Url::parse("https://example.net/a/b/")?;
384 /// let url = base.join("c.png")?;
385 /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
386 /// # Ok(())
387 /// # }
388 /// # run().unwrap();
389 /// ```
390 ///
391 /// # Errors
392 ///
393 /// If the function can not parse an URL from the given string
394 /// with this URL as the base URL, a [`ParseError`] variant will be returned.
395 ///
396 /// [`ParseError`]: enum.ParseError.html
397 /// [`make_relative`]: #method.make_relative
398 #[inline]
399 pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
400 Url::options().base_url(Some(self)).parse(input)
401 }
402
403 /// Creates a relative URL if possible, with this URL as the base URL.
404 ///
405 /// This is the inverse of [`join`].
406 ///
407 /// # Examples
408 ///
409 /// ```rust
410 /// use url::Url;
411 /// # use url::ParseError;
412 ///
413 /// # fn run() -> Result<(), ParseError> {
414 /// let base = Url::parse("https://example.net/a/b.html")?;
415 /// let url = Url::parse("https://example.net/a/c.png")?;
416 /// let relative = base.make_relative(&url);
417 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
418 ///
419 /// let base = Url::parse("https://example.net/a/b/")?;
420 /// let url = Url::parse("https://example.net/a/b/c.png")?;
421 /// let relative = base.make_relative(&url);
422 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
423 ///
424 /// let base = Url::parse("https://example.net/a/b/")?;
425 /// let url = Url::parse("https://example.net/a/d/c.png")?;
426 /// let relative = base.make_relative(&url);
427 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
428 ///
429 /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
430 /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
431 /// let relative = base.make_relative(&url);
432 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
433 /// # Ok(())
434 /// # }
435 /// # run().unwrap();
436 /// ```
437 ///
438 /// # Errors
439 ///
440 /// If this URL can't be a base for the given URL, `None` is returned.
441 /// This is for example the case if the scheme, host or port are not the same.
442 ///
443 /// [`join`]: #method.join
444 pub fn make_relative(&self, url: &Url) -> Option<String> {
445 if self.cannot_be_a_base() {
446 return None;
447 }
448
449 // Scheme, host and port need to be the same
450 if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
451 return None;
452 }
453
454 // We ignore username/password at this point
455
456 // The path has to be transformed
457 let mut relative = String::new();
458
459 // Extract the filename of both URIs, these need to be handled separately
460 fn extract_path_filename(s: &str) -> (&str, &str) {
461 let last_slash_idx = s.rfind('/').unwrap_or(0);
462 let (path, filename) = s.split_at(last_slash_idx);
463 if filename.is_empty() {
464 (path, "")
465 } else {
466 (path, &filename[1..])
467 }
468 }
469
470 let (base_path, base_filename) = extract_path_filename(self.path());
471 let (url_path, url_filename) = extract_path_filename(url.path());
472
473 let mut base_path = base_path.split('/').peekable();
474 let mut url_path = url_path.split('/').peekable();
475
476 // Skip over the common prefix
477 while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
478 base_path.next();
479 url_path.next();
480 }
481
482 // Add `..` segments for the remainder of the base path
483 for base_path_segment in base_path {
484 // Skip empty last segments
485 if base_path_segment.is_empty() {
486 break;
487 }
488
489 if !relative.is_empty() {
490 relative.push('/');
491 }
492
493 relative.push_str("..");
494 }
495
496 // Append the remainder of the other URI
497 for url_path_segment in url_path {
498 if !relative.is_empty() {
499 relative.push('/');
500 }
501
502 relative.push_str(url_path_segment);
503 }
504
505 // Add the filename if they are not the same
506 if !relative.is_empty() || base_filename != url_filename {
507 // If the URIs filename is empty this means that it was a directory
508 // so we'll have to append a '/'.
509 //
510 // Otherwise append it directly as the new filename.
511 if url_filename.is_empty() {
512 relative.push('/');
513 } else {
514 if !relative.is_empty() {
515 relative.push('/');
516 }
517 relative.push_str(url_filename);
518 }
519 }
520
521 // Query and fragment are only taken from the other URI
522 if let Some(query) = url.query() {
523 relative.push('?');
524 relative.push_str(query);
525 }
526
527 if let Some(fragment) = url.fragment() {
528 relative.push('#');
529 relative.push_str(fragment);
530 }
531
532 Some(relative)
533 }
534
535 /// Return a default `ParseOptions` that can fully configure the URL parser.
536 ///
537 /// # Examples
538 ///
539 /// Get default `ParseOptions`, then change base url
540 ///
541 /// ```rust
542 /// use url::Url;
543 /// # use url::ParseError;
544 /// # fn run() -> Result<(), ParseError> {
545 /// let options = Url::options();
546 /// let api = Url::parse("https://api.example.com")?;
547 /// let base_url = options.base_url(Some(&api));
548 /// let version_url = base_url.parse("version.json")?;
549 /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
550 /// # Ok(())
551 /// # }
552 /// # run().unwrap();
553 /// ```
554 pub fn options<'a>() -> ParseOptions<'a> {
555 ParseOptions {
556 base_url: None,
557 encoding_override: None,
558 violation_fn: None,
559 }
560 }
561
562 /// Return the serialization of this URL.
563 ///
564 /// This is fast since that serialization is already stored in the `Url` struct.
565 ///
566 /// # Examples
567 ///
568 /// ```rust
569 /// use url::Url;
570 /// # use url::ParseError;
571 ///
572 /// # fn run() -> Result<(), ParseError> {
573 /// let url_str = "https://example.net/";
574 /// let url = Url::parse(url_str)?;
575 /// assert_eq!(url.as_str(), url_str);
576 /// # Ok(())
577 /// # }
578 /// # run().unwrap();
579 /// ```
580 #[inline]
581 pub fn as_str(&self) -> &str {
582 &self.serialization
583 }
584
585 /// Return the serialization of this URL.
586 ///
587 /// This consumes the `Url` and takes ownership of the `String` stored in it.
588 ///
589 /// # Examples
590 ///
591 /// ```rust
592 /// use url::Url;
593 /// # use url::ParseError;
594 ///
595 /// # fn run() -> Result<(), ParseError> {
596 /// let url_str = "https://example.net/";
597 /// let url = Url::parse(url_str)?;
598 /// assert_eq!(String::from(url), url_str);
599 /// # Ok(())
600 /// # }
601 /// # run().unwrap();
602 /// ```
603 #[inline]
604 #[deprecated(since = "2.3.0", note = "use Into<String>")]
605 pub fn into_string(self) -> String {
606 self.into()
607 }
608
609 /// For internal testing, not part of the public API.
610 ///
611 /// Methods of the `Url` struct assume a number of invariants.
612 /// This checks each of these invariants and panic if one is not met.
613 /// This is for testing rust-url itself.
614 #[doc(hidden)]
615 pub fn check_invariants(&self) -> Result<(), String> {
616 macro_rules! assert {
617 ($x: expr) => {
618 if !$x {
619 return Err(format!(
620 "!( {} ) for URL {:?}",
621 stringify!($x),
622 self.serialization
623 ));
624 }
625 };
626 }
627
628 macro_rules! assert_eq {
629 ($a: expr, $b: expr) => {
630 {
631 let a = $a;
632 let b = $b;
633 if a != b {
634 return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
635 a, b, stringify!($a), stringify!($b),
636 self.serialization))
637 }
638 }
639 }
640 }
641
642 assert!(self.scheme_end >= 1);
643 assert!(self.byte_at(0).is_ascii_alphabetic());
644 assert!(self
645 .slice(1..self.scheme_end)
646 .chars()
647 .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
648 assert_eq!(self.byte_at(self.scheme_end), b':');
649
650 if self.slice(self.scheme_end + 1..).starts_with("//") {
651 // URL with authority
652 if self.username_end != self.serialization.len() as u32 {
653 match self.byte_at(self.username_end) {
654 b':' => {
655 assert!(self.host_start >= self.username_end + 2);
656 assert_eq!(self.byte_at(self.host_start - 1), b'@');
657 }
658 b'@' => assert!(self.host_start == self.username_end + 1),
659 _ => assert_eq!(self.username_end, self.scheme_end + 3),
660 }
661 }
662 assert!(self.host_start >= self.username_end);
663 assert!(self.host_end >= self.host_start);
664 let host_str = self.slice(self.host_start..self.host_end);
665 match self.host {
666 HostInternal::None => assert_eq!(host_str, ""),
667 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
668 HostInternal::Ipv6(address) => {
669 let h: Host<String> = Host::Ipv6(address);
670 assert_eq!(host_str, h.to_string())
671 }
672 HostInternal::Domain => {
673 if SchemeType::from(self.scheme()).is_special() {
674 assert!(!host_str.is_empty())
675 }
676 }
677 }
678 if self.path_start == self.host_end {
679 assert_eq!(self.port, None);
680 } else {
681 assert_eq!(self.byte_at(self.host_end), b':');
682 let port_str = self.slice(self.host_end + 1..self.path_start);
683 assert_eq!(
684 self.port,
685 Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
686 );
687 }
688 assert!(
689 self.path_start as usize == self.serialization.len()
690 || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
691 );
692 } else {
693 // Anarchist URL (no authority)
694 assert_eq!(self.username_end, self.scheme_end + 1);
695 assert_eq!(self.host_start, self.scheme_end + 1);
696 assert_eq!(self.host_end, self.scheme_end + 1);
697 assert_eq!(self.host, HostInternal::None);
698 assert_eq!(self.port, None);
699 if self.path().starts_with("//") {
700 // special case when first path segment is empty
701 assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
702 assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
703 assert_eq!(self.path_start, self.scheme_end + 3);
704 } else {
705 assert_eq!(self.path_start, self.scheme_end + 1);
706 }
707 }
708 if let Some(start) = self.query_start {
709 assert!(start >= self.path_start);
710 assert_eq!(self.byte_at(start), b'?');
711 }
712 if let Some(start) = self.fragment_start {
713 assert!(start >= self.path_start);
714 assert_eq!(self.byte_at(start), b'#');
715 }
716 if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
717 assert!(fragment_start > query_start);
718 }
719
720 let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
721 assert_eq!(&self.serialization, &other.serialization);
722 assert_eq!(self.scheme_end, other.scheme_end);
723 assert_eq!(self.username_end, other.username_end);
724 assert_eq!(self.host_start, other.host_start);
725 assert_eq!(self.host_end, other.host_end);
726 assert!(
727 self.host == other.host ||
728 // XXX No host round-trips to empty host.
729 // See https://github.com/whatwg/url/issues/79
730 (self.host_str(), other.host_str()) == (None, Some(""))
731 );
732 assert_eq!(self.port, other.port);
733 assert_eq!(self.path_start, other.path_start);
734 assert_eq!(self.query_start, other.query_start);
735 assert_eq!(self.fragment_start, other.fragment_start);
736 Ok(())
737 }
738
739 /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
740 ///
741 /// Note: this returns an opaque origin for `file:` URLs, which causes
742 /// `url.origin() != url.origin()`.
743 ///
744 /// # Examples
745 ///
746 /// URL with `ftp` scheme:
747 ///
748 /// ```rust
749 /// use url::{Host, Origin, Url};
750 /// # use url::ParseError;
751 ///
752 /// # fn run() -> Result<(), ParseError> {
753 /// let url = Url::parse("ftp://example.com/foo")?;
754 /// assert_eq!(url.origin(),
755 /// Origin::Tuple("ftp".into(),
756 /// Host::Domain("example.com".into()),
757 /// 21));
758 /// # Ok(())
759 /// # }
760 /// # run().unwrap();
761 /// ```
762 ///
763 /// URL with `blob` scheme:
764 ///
765 /// ```rust
766 /// use url::{Host, Origin, Url};
767 /// # use url::ParseError;
768 ///
769 /// # fn run() -> Result<(), ParseError> {
770 /// let url = Url::parse("blob:https://example.com/foo")?;
771 /// assert_eq!(url.origin(),
772 /// Origin::Tuple("https".into(),
773 /// Host::Domain("example.com".into()),
774 /// 443));
775 /// # Ok(())
776 /// # }
777 /// # run().unwrap();
778 /// ```
779 ///
780 /// URL with `file` scheme:
781 ///
782 /// ```rust
783 /// use url::{Host, Origin, Url};
784 /// # use url::ParseError;
785 ///
786 /// # fn run() -> Result<(), ParseError> {
787 /// let url = Url::parse("file:///tmp/foo")?;
788 /// assert!(!url.origin().is_tuple());
789 ///
790 /// let other_url = Url::parse("file:///tmp/foo")?;
791 /// assert!(url.origin() != other_url.origin());
792 /// # Ok(())
793 /// # }
794 /// # run().unwrap();
795 /// ```
796 ///
797 /// URL with other scheme:
798 ///
799 /// ```rust
800 /// use url::{Host, Origin, Url};
801 /// # use url::ParseError;
802 ///
803 /// # fn run() -> Result<(), ParseError> {
804 /// let url = Url::parse("foo:bar")?;
805 /// assert!(!url.origin().is_tuple());
806 /// # Ok(())
807 /// # }
808 /// # run().unwrap();
809 /// ```
810 #[inline]
811 pub fn origin(&self) -> Origin {
812 origin::url_origin(self)
813 }
814
815 /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
816 ///
817 /// # Examples
818 ///
819 /// ```
820 /// use url::Url;
821 /// # use url::ParseError;
822 ///
823 /// # fn run() -> Result<(), ParseError> {
824 /// let url = Url::parse("file:///tmp/foo")?;
825 /// assert_eq!(url.scheme(), "file");
826 /// # Ok(())
827 /// # }
828 /// # run().unwrap();
829 /// ```
830 #[inline]
831 pub fn scheme(&self) -> &str {
832 self.slice(..self.scheme_end)
833 }
834
835 /// Return whether the URL is special (has a special scheme)
836 ///
837 /// # Examples
838 ///
839 /// ```
840 /// use url::Url;
841 /// # use url::ParseError;
842 ///
843 /// # fn run() -> Result<(), ParseError> {
844 /// assert!(Url::parse("http:///tmp/foo")?.is_special());
845 /// assert!(Url::parse("file:///tmp/foo")?.is_special());
846 /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
847 /// # Ok(())
848 /// # }
849 /// # run().unwrap();
850 /// ```
851 pub fn is_special(&self) -> bool {
852 let scheme_type = SchemeType::from(self.scheme());
853 scheme_type.is_special()
854 }
855
856 /// Return whether the URL has an 'authority',
857 /// which can contain a username, password, host, and port number.
858 ///
859 /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
860 /// or cannot-be-a-base like `data:text/plain,Stuff`.
861 ///
862 /// See also the `authority` method.
863 ///
864 /// # Examples
865 ///
866 /// ```
867 /// use url::Url;
868 /// # use url::ParseError;
869 ///
870 /// # fn run() -> Result<(), ParseError> {
871 /// let url = Url::parse("ftp://rms@example.com")?;
872 /// assert!(url.has_authority());
873 ///
874 /// let url = Url::parse("unix:/run/foo.socket")?;
875 /// assert!(!url.has_authority());
876 ///
877 /// let url = Url::parse("data:text/plain,Stuff")?;
878 /// assert!(!url.has_authority());
879 /// # Ok(())
880 /// # }
881 /// # run().unwrap();
882 /// ```
883 #[inline]
884 pub fn has_authority(&self) -> bool {
885 debug_assert!(self.byte_at(self.scheme_end) == b':');
886 self.slice(self.scheme_end..).starts_with("://")
887 }
888
889 /// Return the authority of this URL as an ASCII string.
890 ///
891 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
892 /// of a special URL, or percent encoded for non-special URLs.
893 /// IPv6 addresses are given between `[` and `]` brackets.
894 /// Ports are omitted if they match the well known port of a special URL.
895 ///
896 /// Username and password are percent-encoded.
897 ///
898 /// See also the `has_authority` method.
899 ///
900 /// # Examples
901 ///
902 /// ```
903 /// use url::Url;
904 /// # use url::ParseError;
905 ///
906 /// # fn run() -> Result<(), ParseError> {
907 /// let url = Url::parse("unix:/run/foo.socket")?;
908 /// assert_eq!(url.authority(), "");
909 /// let url = Url::parse("file:///tmp/foo")?;
910 /// assert_eq!(url.authority(), "");
911 /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
912 /// assert_eq!(url.authority(), "user:password@example.com");
913 /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
914 /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
915 /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
916 /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
917 /// # Ok(())
918 /// # }
919 /// # run().unwrap();
920 /// ```
921 pub fn authority(&self) -> &str {
922 let scheme_separator_len = "://".len() as u32;
923 if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
924 self.slice(self.scheme_end + scheme_separator_len..self.path_start)
925 } else {
926 ""
927 }
928 }
929
930 /// Return whether this URL is a cannot-be-a-base URL,
931 /// meaning that parsing a relative URL string with this URL as the base will return an error.
932 ///
933 /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
934 /// as is typically the case of `data:` and `mailto:` URLs.
935 ///
936 /// # Examples
937 ///
938 /// ```
939 /// use url::Url;
940 /// # use url::ParseError;
941 ///
942 /// # fn run() -> Result<(), ParseError> {
943 /// let url = Url::parse("ftp://rms@example.com")?;
944 /// assert!(!url.cannot_be_a_base());
945 ///
946 /// let url = Url::parse("unix:/run/foo.socket")?;
947 /// assert!(!url.cannot_be_a_base());
948 ///
949 /// let url = Url::parse("data:text/plain,Stuff")?;
950 /// assert!(url.cannot_be_a_base());
951 /// # Ok(())
952 /// # }
953 /// # run().unwrap();
954 /// ```
955 #[inline]
956 pub fn cannot_be_a_base(&self) -> bool {
957 !self.slice(self.scheme_end + 1..).starts_with('/')
958 }
959
960 /// Return the username for this URL (typically the empty string)
961 /// as a percent-encoded ASCII string.
962 ///
963 /// # Examples
964 ///
965 /// ```
966 /// use url::Url;
967 /// # use url::ParseError;
968 ///
969 /// # fn run() -> Result<(), ParseError> {
970 /// let url = Url::parse("ftp://rms@example.com")?;
971 /// assert_eq!(url.username(), "rms");
972 ///
973 /// let url = Url::parse("ftp://:secret123@example.com")?;
974 /// assert_eq!(url.username(), "");
975 ///
976 /// let url = Url::parse("https://example.com")?;
977 /// assert_eq!(url.username(), "");
978 /// # Ok(())
979 /// # }
980 /// # run().unwrap();
981 /// ```
982 pub fn username(&self) -> &str {
983 let scheme_separator_len = "://".len() as u32;
984 if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
985 self.slice(self.scheme_end + scheme_separator_len..self.username_end)
986 } else {
987 ""
988 }
989 }
990
991 /// Return the password for this URL, if any, as a percent-encoded ASCII string.
992 ///
993 /// # Examples
994 ///
995 /// ```
996 /// use url::Url;
997 /// # use url::ParseError;
998 ///
999 /// # fn run() -> Result<(), ParseError> {
1000 /// let url = Url::parse("ftp://rms:secret123@example.com")?;
1001 /// assert_eq!(url.password(), Some("secret123"));
1002 ///
1003 /// let url = Url::parse("ftp://:secret123@example.com")?;
1004 /// assert_eq!(url.password(), Some("secret123"));
1005 ///
1006 /// let url = Url::parse("ftp://rms@example.com")?;
1007 /// assert_eq!(url.password(), None);
1008 ///
1009 /// let url = Url::parse("https://example.com")?;
1010 /// assert_eq!(url.password(), None);
1011 /// # Ok(())
1012 /// # }
1013 /// # run().unwrap();
1014 /// ```
1015 pub fn password(&self) -> Option<&str> {
1016 // This ':' is not the one marking a port number since a host can not be empty.
1017 // (Except for file: URLs, which do not have port numbers.)
1018 if self.has_authority()
1019 && self.username_end != self.serialization.len() as u32
1020 && self.byte_at(self.username_end) == b':'
1021 {
1022 debug_assert!(self.byte_at(self.host_start - 1) == b'@');
1023 Some(self.slice(self.username_end + 1..self.host_start - 1))
1024 } else {
1025 None
1026 }
1027 }
1028
1029 /// Equivalent to `url.host().is_some()`.
1030 ///
1031 /// # Examples
1032 ///
1033 /// ```
1034 /// use url::Url;
1035 /// # use url::ParseError;
1036 ///
1037 /// # fn run() -> Result<(), ParseError> {
1038 /// let url = Url::parse("ftp://rms@example.com")?;
1039 /// assert!(url.has_host());
1040 ///
1041 /// let url = Url::parse("unix:/run/foo.socket")?;
1042 /// assert!(!url.has_host());
1043 ///
1044 /// let url = Url::parse("data:text/plain,Stuff")?;
1045 /// assert!(!url.has_host());
1046 /// # Ok(())
1047 /// # }
1048 /// # run().unwrap();
1049 /// ```
1050 pub fn has_host(&self) -> bool {
1051 !matches!(self.host, HostInternal::None)
1052 }
1053
1054 /// Return the string representation of the host (domain or IP address) for this URL, if any.
1055 ///
1056 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1057 /// of a special URL, or percent encoded for non-special URLs.
1058 /// IPv6 addresses are given between `[` and `]` brackets.
1059 ///
1060 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1061 /// don’t have a host.
1062 ///
1063 /// See also the `host` method.
1064 ///
1065 /// # Examples
1066 ///
1067 /// ```
1068 /// use url::Url;
1069 /// # use url::ParseError;
1070 ///
1071 /// # fn run() -> Result<(), ParseError> {
1072 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1073 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1074 ///
1075 /// let url = Url::parse("ftp://rms@example.com")?;
1076 /// assert_eq!(url.host_str(), Some("example.com"));
1077 ///
1078 /// let url = Url::parse("unix:/run/foo.socket")?;
1079 /// assert_eq!(url.host_str(), None);
1080 ///
1081 /// let url = Url::parse("data:text/plain,Stuff")?;
1082 /// assert_eq!(url.host_str(), None);
1083 /// # Ok(())
1084 /// # }
1085 /// # run().unwrap();
1086 /// ```
1087 pub fn host_str(&self) -> Option<&str> {
1088 if self.has_host() {
1089 Some(self.slice(self.host_start..self.host_end))
1090 } else {
1091 None
1092 }
1093 }
1094
1095 /// Return the parsed representation of the host for this URL.
1096 /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
1097 /// of a special URL, or percent encoded for non-special URLs.
1098 ///
1099 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1100 /// don’t have a host.
1101 ///
1102 /// See also the `host_str` method.
1103 ///
1104 /// # Examples
1105 ///
1106 /// ```
1107 /// use url::Url;
1108 /// # use url::ParseError;
1109 ///
1110 /// # fn run() -> Result<(), ParseError> {
1111 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1112 /// assert!(url.host().is_some());
1113 ///
1114 /// let url = Url::parse("ftp://rms@example.com")?;
1115 /// assert!(url.host().is_some());
1116 ///
1117 /// let url = Url::parse("unix:/run/foo.socket")?;
1118 /// assert!(url.host().is_none());
1119 ///
1120 /// let url = Url::parse("data:text/plain,Stuff")?;
1121 /// assert!(url.host().is_none());
1122 /// # Ok(())
1123 /// # }
1124 /// # run().unwrap();
1125 /// ```
1126 pub fn host(&self) -> Option<Host<&str>> {
1127 match self.host {
1128 HostInternal::None => None,
1129 HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1130 HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1131 HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1132 }
1133 }
1134
1135 /// If this URL has a host and it is a domain name (not an IP address), return it.
1136 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1137 /// of a special URL, or percent encoded for non-special URLs.
1138 ///
1139 /// # Examples
1140 ///
1141 /// ```
1142 /// use url::Url;
1143 /// # use url::ParseError;
1144 ///
1145 /// # fn run() -> Result<(), ParseError> {
1146 /// let url = Url::parse("https://127.0.0.1/")?;
1147 /// assert_eq!(url.domain(), None);
1148 ///
1149 /// let url = Url::parse("mailto:rms@example.net")?;
1150 /// assert_eq!(url.domain(), None);
1151 ///
1152 /// let url = Url::parse("https://example.com/")?;
1153 /// assert_eq!(url.domain(), Some("example.com"));
1154 /// # Ok(())
1155 /// # }
1156 /// # run().unwrap();
1157 /// ```
1158 pub fn domain(&self) -> Option<&str> {
1159 match self.host {
1160 HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1161 _ => None,
1162 }
1163 }
1164
1165 /// Return the port number for this URL, if any.
1166 ///
1167 /// Note that default port numbers are never reflected by the serialization,
1168 /// use the `port_or_known_default()` method if you want a default port number returned.
1169 ///
1170 /// # Examples
1171 ///
1172 /// ```
1173 /// use url::Url;
1174 /// # use url::ParseError;
1175 ///
1176 /// # fn run() -> Result<(), ParseError> {
1177 /// let url = Url::parse("https://example.com")?;
1178 /// assert_eq!(url.port(), None);
1179 ///
1180 /// let url = Url::parse("https://example.com:443/")?;
1181 /// assert_eq!(url.port(), None);
1182 ///
1183 /// let url = Url::parse("ssh://example.com:22")?;
1184 /// assert_eq!(url.port(), Some(22));
1185 /// # Ok(())
1186 /// # }
1187 /// # run().unwrap();
1188 /// ```
1189 #[inline]
1190 pub fn port(&self) -> Option<u16> {
1191 self.port
1192 }
1193
1194 /// Return the port number for this URL, or the default port number if it is known.
1195 ///
1196 /// This method only knows the default port number
1197 /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1198 ///
1199 /// For URLs in these schemes, this method always returns `Some(_)`.
1200 /// For other schemes, it is the same as `Url::port()`.
1201 ///
1202 /// # Examples
1203 ///
1204 /// ```
1205 /// use url::Url;
1206 /// # use url::ParseError;
1207 ///
1208 /// # fn run() -> Result<(), ParseError> {
1209 /// let url = Url::parse("foo://example.com")?;
1210 /// assert_eq!(url.port_or_known_default(), None);
1211 ///
1212 /// let url = Url::parse("foo://example.com:1456")?;
1213 /// assert_eq!(url.port_or_known_default(), Some(1456));
1214 ///
1215 /// let url = Url::parse("https://example.com")?;
1216 /// assert_eq!(url.port_or_known_default(), Some(443));
1217 /// # Ok(())
1218 /// # }
1219 /// # run().unwrap();
1220 /// ```
1221 #[inline]
1222 pub fn port_or_known_default(&self) -> Option<u16> {
1223 self.port.or_else(|| parser::default_port(self.scheme()))
1224 }
1225
1226 /// Resolve a URL’s host and port number to `SocketAddr`.
1227 ///
1228 /// If the URL has the default port number of a scheme that is unknown to this library,
1229 /// `default_port_number` provides an opportunity to provide the actual port number.
1230 /// In non-example code this should be implemented either simply as `|| None`,
1231 /// or by matching on the URL’s `.scheme()`.
1232 ///
1233 /// If the host is a domain, it is resolved using the standard library’s DNS support.
1234 ///
1235 /// # Examples
1236 ///
1237 /// ```no_run
1238 /// let url = url::Url::parse("https://example.net/").unwrap();
1239 /// let addrs = url.socket_addrs(|| None).unwrap();
1240 /// std::net::TcpStream::connect(&*addrs)
1241 /// # ;
1242 /// ```
1243 ///
1244 /// ```
1245 /// /// With application-specific known default port numbers
1246 /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1247 /// url.socket_addrs(|| match url.scheme() {
1248 /// "socks5" | "socks5h" => Some(1080),
1249 /// _ => None,
1250 /// })
1251 /// }
1252 /// ```
1253 pub fn socket_addrs(
1254 &self,
1255 default_port_number: impl Fn() -> Option<u16>,
1256 ) -> io::Result<Vec<SocketAddr>> {
1257 // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1258 // causes borrowck issues because the return value borrows `default_port_number`:
1259 //
1260 // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1261 //
1262 // > This RFC proposes that *all* type parameters are considered in scope
1263 // > for `impl Trait` in return position
1264
1265 fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1266 opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1267 }
1268
1269 let host = io_result(self.host(), "No host name in the URL")?;
1270 let port = io_result(
1271 self.port_or_known_default().or_else(default_port_number),
1272 "No port number in the URL",
1273 )?;
1274 Ok(match host {
1275 Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1276 Host::Ipv4(ip) => vec![(ip, port).into()],
1277 Host::Ipv6(ip) => vec![(ip, port).into()],
1278 })
1279 }
1280
1281 /// Return the path for this URL, as a percent-encoded ASCII string.
1282 /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1283 /// For other URLs, this starts with a '/' slash
1284 /// and continues with slash-separated path segments.
1285 ///
1286 /// # Examples
1287 ///
1288 /// ```rust
1289 /// use url::{Url, ParseError};
1290 ///
1291 /// # fn run() -> Result<(), ParseError> {
1292 /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1293 /// assert_eq!(url.path(), "/api/versions");
1294 ///
1295 /// let url = Url::parse("https://example.com")?;
1296 /// assert_eq!(url.path(), "/");
1297 ///
1298 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1299 /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1300 /// # Ok(())
1301 /// # }
1302 /// # run().unwrap();
1303 /// ```
1304 pub fn path(&self) -> &str {
1305 match (self.query_start, self.fragment_start) {
1306 (None, None) => self.slice(self.path_start..),
1307 (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1308 self.slice(self.path_start..next_component_start)
1309 }
1310 }
1311 }
1312
1313 /// Unless this URL is cannot-be-a-base,
1314 /// return an iterator of '/' slash-separated path segments,
1315 /// each as a percent-encoded ASCII string.
1316 ///
1317 /// Return `None` for cannot-be-a-base URLs.
1318 ///
1319 /// When `Some` is returned, the iterator always contains at least one string
1320 /// (which may be empty).
1321 ///
1322 /// # Examples
1323 ///
1324 /// ```
1325 /// use url::Url;
1326 /// # use std::error::Error;
1327 ///
1328 /// # fn run() -> Result<(), Box<dyn Error>> {
1329 /// let url = Url::parse("https://example.com/foo/bar")?;
1330 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1331 /// assert_eq!(path_segments.next(), Some("foo"));
1332 /// assert_eq!(path_segments.next(), Some("bar"));
1333 /// assert_eq!(path_segments.next(), None);
1334 ///
1335 /// let url = Url::parse("https://example.com")?;
1336 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1337 /// assert_eq!(path_segments.next(), Some(""));
1338 /// assert_eq!(path_segments.next(), None);
1339 ///
1340 /// let url = Url::parse("data:text/plain,HelloWorld")?;
1341 /// assert!(url.path_segments().is_none());
1342 ///
1343 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1344 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1345 /// assert_eq!(path_segments.next(), Some("countries"));
1346 /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1347 /// # Ok(())
1348 /// # }
1349 /// # run().unwrap();
1350 /// ```
1351 pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1352 let path = self.path();
1353 path.strip_prefix('/').map(|remainder| remainder.split('/'))
1354 }
1355
1356 /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1357 ///
1358 /// # Examples
1359 ///
1360 /// ```rust
1361 /// use url::Url;
1362 /// # use url::ParseError;
1363 ///
1364 /// fn run() -> Result<(), ParseError> {
1365 /// let url = Url::parse("https://example.com/products?page=2")?;
1366 /// let query = url.query();
1367 /// assert_eq!(query, Some("page=2"));
1368 ///
1369 /// let url = Url::parse("https://example.com/products")?;
1370 /// let query = url.query();
1371 /// assert!(query.is_none());
1372 ///
1373 /// let url = Url::parse("https://example.com/?country=español")?;
1374 /// let query = url.query();
1375 /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1376 /// # Ok(())
1377 /// # }
1378 /// # run().unwrap();
1379 /// ```
1380 pub fn query(&self) -> Option<&str> {
1381 match (self.query_start, self.fragment_start) {
1382 (None, _) => None,
1383 (Some(query_start), None) => {
1384 debug_assert!(self.byte_at(query_start) == b'?');
1385 Some(self.slice(query_start + 1..))
1386 }
1387 (Some(query_start), Some(fragment_start)) => {
1388 debug_assert!(self.byte_at(query_start) == b'?');
1389 Some(self.slice(query_start + 1..fragment_start))
1390 }
1391 }
1392 }
1393
1394 /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1395 /// and return an iterator of (key, value) pairs.
1396 ///
1397 /// # Examples
1398 ///
1399 /// ```rust
1400 /// use std::borrow::Cow;
1401 ///
1402 /// use url::Url;
1403 /// # use url::ParseError;
1404 ///
1405 /// # fn run() -> Result<(), ParseError> {
1406 /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1407 /// let mut pairs = url.query_pairs();
1408 ///
1409 /// assert_eq!(pairs.count(), 2);
1410 ///
1411 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1412 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1413 /// # Ok(())
1414 /// # }
1415 /// # run().unwrap();
1416 /// ```
1417
1418 #[inline]
1419 pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1420 form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1421 }
1422
1423 /// Return this URL’s fragment identifier, if any.
1424 ///
1425 /// A fragment is the part of the URL after the `#` symbol.
1426 /// The fragment is optional and, if present, contains a fragment identifier
1427 /// that identifies a secondary resource, such as a section heading
1428 /// of a document.
1429 ///
1430 /// In HTML, the fragment identifier is usually the id attribute of a an element
1431 /// that is scrolled to on load. Browsers typically will not send the fragment portion
1432 /// of a URL to the server.
1433 ///
1434 /// **Note:** the parser did *not* percent-encode this component,
1435 /// but the input may have been percent-encoded already.
1436 ///
1437 /// # Examples
1438 ///
1439 /// ```rust
1440 /// use url::Url;
1441 /// # use url::ParseError;
1442 ///
1443 /// # fn run() -> Result<(), ParseError> {
1444 /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1445 ///
1446 /// assert_eq!(url.fragment(), Some("row=4"));
1447 ///
1448 /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1449 ///
1450 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1451 /// # Ok(())
1452 /// # }
1453 /// # run().unwrap();
1454 /// ```
1455 pub fn fragment(&self) -> Option<&str> {
1456 self.fragment_start.map(|start| {
1457 debug_assert!(self.byte_at(start) == b'#');
1458 self.slice(start + 1..)
1459 })
1460 }
1461
1462 fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1463 let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1464 let result = f(&mut parser);
1465 self.serialization = parser.serialization;
1466 result
1467 }
1468
1469 /// Change this URL’s fragment identifier.
1470 ///
1471 /// # Examples
1472 ///
1473 /// ```rust
1474 /// use url::Url;
1475 /// # use url::ParseError;
1476 ///
1477 /// # fn run() -> Result<(), ParseError> {
1478 /// let mut url = Url::parse("https://example.com/data.csv")?;
1479 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1480
1481 /// url.set_fragment(Some("cell=4,1-6,2"));
1482 /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1483 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1484 ///
1485 /// url.set_fragment(None);
1486 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1487 /// assert!(url.fragment().is_none());
1488 /// # Ok(())
1489 /// # }
1490 /// # run().unwrap();
1491 /// ```
1492 pub fn set_fragment(&mut self, fragment: Option<&str>) {
1493 // Remove any previous fragment
1494 if let Some(start) = self.fragment_start {
1495 debug_assert!(self.byte_at(start) == b'#');
1496 self.serialization.truncate(start as usize);
1497 }
1498 // Write the new one
1499 if let Some(input) = fragment {
1500 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1501 self.serialization.push('#');
1502 self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input)))
1503 } else {
1504 self.fragment_start = None;
1505 self.strip_trailing_spaces_from_opaque_path();
1506 }
1507 }
1508
1509 fn take_fragment(&mut self) -> Option<String> {
1510 self.fragment_start.take().map(|start| {
1511 debug_assert!(self.byte_at(start) == b'#');
1512 let fragment = self.slice(start + 1..).to_owned();
1513 self.serialization.truncate(start as usize);
1514 fragment
1515 })
1516 }
1517
1518 fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1519 if let Some(ref fragment) = fragment {
1520 assert!(self.fragment_start.is_none());
1521 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1522 self.serialization.push('#');
1523 self.serialization.push_str(fragment);
1524 }
1525 }
1526
1527 /// Change this URL’s query string.
1528 ///
1529 /// # Examples
1530 ///
1531 /// ```rust
1532 /// use url::Url;
1533 /// # use url::ParseError;
1534 ///
1535 /// # fn run() -> Result<(), ParseError> {
1536 /// let mut url = Url::parse("https://example.com/products")?;
1537 /// assert_eq!(url.as_str(), "https://example.com/products");
1538 ///
1539 /// url.set_query(Some("page=2"));
1540 /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1541 /// assert_eq!(url.query(), Some("page=2"));
1542 /// # Ok(())
1543 /// # }
1544 /// # run().unwrap();
1545 /// ```
1546 pub fn set_query(&mut self, query: Option<&str>) {
1547 let fragment = self.take_fragment();
1548
1549 // Remove any previous query
1550 if let Some(start) = self.query_start.take() {
1551 debug_assert!(self.byte_at(start) == b'?');
1552 self.serialization.truncate(start as usize);
1553 }
1554 // Write the new query, if any
1555 if let Some(input) = query {
1556 self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1557 self.serialization.push('?');
1558 let scheme_type = SchemeType::from(self.scheme());
1559 let scheme_end = self.scheme_end;
1560 self.mutate(|parser| {
1561 let vfn = parser.violation_fn;
1562 parser.parse_query(
1563 scheme_type,
1564 scheme_end,
1565 parser::Input::new_trim_tab_and_newlines(input, vfn),
1566 )
1567 });
1568 } else {
1569 self.query_start = None;
1570 if fragment.is_none() {
1571 self.strip_trailing_spaces_from_opaque_path();
1572 }
1573 }
1574
1575 self.restore_already_parsed_fragment(fragment);
1576 }
1577
1578 /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1579 /// in `application/x-www-form-urlencoded` syntax.
1580 ///
1581 /// The return value has a method-chaining API:
1582 ///
1583 /// ```rust
1584 /// # use url::{Url, ParseError};
1585 ///
1586 /// # fn run() -> Result<(), ParseError> {
1587 /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1588 /// assert_eq!(url.query(), Some("lang=fr"));
1589 ///
1590 /// url.query_pairs_mut().append_pair("foo", "bar");
1591 /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1592 /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1593 ///
1594 /// url.query_pairs_mut()
1595 /// .clear()
1596 /// .append_pair("foo", "bar & baz")
1597 /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1598 /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1599 /// assert_eq!(url.as_str(),
1600 /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1601 /// # Ok(())
1602 /// # }
1603 /// # run().unwrap();
1604 /// ```
1605 ///
1606 /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1607 /// not `url.set_query(None)`.
1608 ///
1609 /// The state of `Url` is unspecified if this return value is leaked without being dropped.
1610 pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1611 let fragment = self.take_fragment();
1612
1613 let query_start;
1614 if let Some(start) = self.query_start {
1615 debug_assert!(self.byte_at(start) == b'?');
1616 query_start = start as usize;
1617 } else {
1618 query_start = self.serialization.len();
1619 self.query_start = Some(to_u32(query_start).unwrap());
1620 self.serialization.push('?');
1621 }
1622
1623 let query = UrlQuery {
1624 url: Some(self),
1625 fragment,
1626 };
1627 form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1628 }
1629
1630 fn take_after_path(&mut self) -> String {
1631 match (self.query_start, self.fragment_start) {
1632 (Some(i), _) | (None, Some(i)) => {
1633 let after_path = self.slice(i..).to_owned();
1634 self.serialization.truncate(i as usize);
1635 after_path
1636 }
1637 (None, None) => String::new(),
1638 }
1639 }
1640
1641 /// Change this URL’s path.
1642 ///
1643 /// # Examples
1644 ///
1645 /// ```rust
1646 /// use url::Url;
1647 /// # use url::ParseError;
1648 ///
1649 /// # fn run() -> Result<(), ParseError> {
1650 /// let mut url = Url::parse("https://example.com")?;
1651 /// url.set_path("api/comments");
1652 /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1653 /// assert_eq!(url.path(), "/api/comments");
1654 ///
1655 /// let mut url = Url::parse("https://example.com/api")?;
1656 /// url.set_path("data/report.csv");
1657 /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1658 /// assert_eq!(url.path(), "/data/report.csv");
1659 ///
1660 /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1661 /// let mut url = Url::parse("https://example.com")?;
1662 /// url.set_path("api/some comments");
1663 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1664 /// assert_eq!(url.path(), "/api/some%20comments");
1665 ///
1666 /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1667 /// let mut url = Url::parse("https://example.com")?;
1668 /// url.set_path("api/some%20comments");
1669 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1670 /// assert_eq!(url.path(), "/api/some%20comments");
1671 ///
1672 /// # Ok(())
1673 /// # }
1674 /// # run().unwrap();
1675 /// ```
1676 pub fn set_path(&mut self, mut path: &str) {
1677 let after_path = self.take_after_path();
1678 let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1679 let cannot_be_a_base = self.cannot_be_a_base();
1680 let scheme_type = SchemeType::from(self.scheme());
1681 self.serialization.truncate(self.path_start as usize);
1682 self.mutate(|parser| {
1683 if cannot_be_a_base {
1684 if path.starts_with('/') {
1685 parser.serialization.push_str("%2F");
1686 path = &path[1..];
1687 }
1688 parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
1689 } else {
1690 let mut has_host = true; // FIXME
1691 parser.parse_path_start(
1692 scheme_type,
1693 &mut has_host,
1694 parser::Input::new_no_trim(path),
1695 );
1696 }
1697 });
1698 self.restore_after_path(old_after_path_pos, &after_path);
1699 }
1700
1701 /// Return an object with methods to manipulate this URL’s path segments.
1702 ///
1703 /// Return `Err(())` if this URL is cannot-be-a-base.
1704 #[allow(clippy::result_unit_err)]
1705 pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1706 if self.cannot_be_a_base() {
1707 Err(())
1708 } else {
1709 Ok(path_segments::new(self))
1710 }
1711 }
1712
1713 fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1714 let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1715 let adjust = |index: &mut u32| {
1716 *index -= old_after_path_position;
1717 *index += new_after_path_position;
1718 };
1719 if let Some(ref mut index) = self.query_start {
1720 adjust(index)
1721 }
1722 if let Some(ref mut index) = self.fragment_start {
1723 adjust(index)
1724 }
1725 self.serialization.push_str(after_path)
1726 }
1727
1728 /// Change this URL’s port number.
1729 ///
1730 /// Note that default port numbers are not reflected in the serialization.
1731 ///
1732 /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1733 /// do nothing and return `Err`.
1734 ///
1735 /// # Examples
1736 ///
1737 /// ```
1738 /// use url::Url;
1739 /// # use std::error::Error;
1740 ///
1741 /// # fn run() -> Result<(), Box<dyn Error>> {
1742 /// let mut url = Url::parse("ssh://example.net:2048/")?;
1743 ///
1744 /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1745 /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1746 ///
1747 /// url.set_port(None).map_err(|_| "cannot be base")?;
1748 /// assert_eq!(url.as_str(), "ssh://example.net/");
1749 /// # Ok(())
1750 /// # }
1751 /// # run().unwrap();
1752 /// ```
1753 ///
1754 /// Known default port numbers are not reflected:
1755 ///
1756 /// ```rust
1757 /// use url::Url;
1758 /// # use std::error::Error;
1759 ///
1760 /// # fn run() -> Result<(), Box<dyn Error>> {
1761 /// let mut url = Url::parse("https://example.org/")?;
1762 ///
1763 /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1764 /// assert!(url.port().is_none());
1765 /// # Ok(())
1766 /// # }
1767 /// # run().unwrap();
1768 /// ```
1769 ///
1770 /// Cannot set port for cannot-be-a-base URLs:
1771 ///
1772 /// ```
1773 /// use url::Url;
1774 /// # use url::ParseError;
1775 ///
1776 /// # fn run() -> Result<(), ParseError> {
1777 /// let mut url = Url::parse("mailto:rms@example.net")?;
1778 ///
1779 /// let result = url.set_port(Some(80));
1780 /// assert!(result.is_err());
1781 ///
1782 /// let result = url.set_port(None);
1783 /// assert!(result.is_err());
1784 /// # Ok(())
1785 /// # }
1786 /// # run().unwrap();
1787 /// ```
1788 #[allow(clippy::result_unit_err)]
1789 pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1790 // has_host implies !cannot_be_a_base
1791 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1792 return Err(());
1793 }
1794 if port.is_some() && port == parser::default_port(self.scheme()) {
1795 port = None
1796 }
1797 self.set_port_internal(port);
1798 Ok(())
1799 }
1800
1801 fn set_port_internal(&mut self, port: Option<u16>) {
1802 match (self.port, port) {
1803 (None, None) => {}
1804 (Some(_), None) => {
1805 self.serialization
1806 .drain(self.host_end as usize..self.path_start as usize);
1807 let offset = self.path_start - self.host_end;
1808 self.path_start = self.host_end;
1809 if let Some(ref mut index) = self.query_start {
1810 *index -= offset
1811 }
1812 if let Some(ref mut index) = self.fragment_start {
1813 *index -= offset
1814 }
1815 }
1816 (Some(old), Some(new)) if old == new => {}
1817 (_, Some(new)) => {
1818 let path_and_after = self.slice(self.path_start..).to_owned();
1819 self.serialization.truncate(self.host_end as usize);
1820 write!(&mut self.serialization, ":{}", new).unwrap();
1821 let old_path_start = self.path_start;
1822 let new_path_start = to_u32(self.serialization.len()).unwrap();
1823 self.path_start = new_path_start;
1824 let adjust = |index: &mut u32| {
1825 *index -= old_path_start;
1826 *index += new_path_start;
1827 };
1828 if let Some(ref mut index) = self.query_start {
1829 adjust(index)
1830 }
1831 if let Some(ref mut index) = self.fragment_start {
1832 adjust(index)
1833 }
1834 self.serialization.push_str(&path_and_after);
1835 }
1836 }
1837 self.port = port;
1838 }
1839
1840 /// Change this URL’s host.
1841 ///
1842 /// Removing the host (calling this with `None`)
1843 /// will also remove any username, password, and port number.
1844 ///
1845 /// # Examples
1846 ///
1847 /// Change host:
1848 ///
1849 /// ```
1850 /// use url::Url;
1851 /// # use url::ParseError;
1852 ///
1853 /// # fn run() -> Result<(), ParseError> {
1854 /// let mut url = Url::parse("https://example.net")?;
1855 /// let result = url.set_host(Some("rust-lang.org"));
1856 /// assert!(result.is_ok());
1857 /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1858 /// # Ok(())
1859 /// # }
1860 /// # run().unwrap();
1861 /// ```
1862 ///
1863 /// Remove host:
1864 ///
1865 /// ```
1866 /// use url::Url;
1867 /// # use url::ParseError;
1868 ///
1869 /// # fn run() -> Result<(), ParseError> {
1870 /// let mut url = Url::parse("foo://example.net")?;
1871 /// let result = url.set_host(None);
1872 /// assert!(result.is_ok());
1873 /// assert_eq!(url.as_str(), "foo:/");
1874 /// # Ok(())
1875 /// # }
1876 /// # run().unwrap();
1877 /// ```
1878 ///
1879 /// Cannot remove host for 'special' schemes (e.g. `http`):
1880 ///
1881 /// ```
1882 /// use url::Url;
1883 /// # use url::ParseError;
1884 ///
1885 /// # fn run() -> Result<(), ParseError> {
1886 /// let mut url = Url::parse("https://example.net")?;
1887 /// let result = url.set_host(None);
1888 /// assert!(result.is_err());
1889 /// assert_eq!(url.as_str(), "https://example.net/");
1890 /// # Ok(())
1891 /// # }
1892 /// # run().unwrap();
1893 /// ```
1894 ///
1895 /// Cannot change or remove host for cannot-be-a-base URLs:
1896 ///
1897 /// ```
1898 /// use url::Url;
1899 /// # use url::ParseError;
1900 ///
1901 /// # fn run() -> Result<(), ParseError> {
1902 /// let mut url = Url::parse("mailto:rms@example.net")?;
1903 ///
1904 /// let result = url.set_host(Some("rust-lang.org"));
1905 /// assert!(result.is_err());
1906 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1907 ///
1908 /// let result = url.set_host(None);
1909 /// assert!(result.is_err());
1910 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1911 /// # Ok(())
1912 /// # }
1913 /// # run().unwrap();
1914 /// ```
1915 ///
1916 /// # Errors
1917 ///
1918 /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1919 /// a [`ParseError`] variant will be returned.
1920 ///
1921 /// [`ParseError`]: enum.ParseError.html
1922 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1923 if self.cannot_be_a_base() {
1924 return Err(ParseError::SetHostOnCannotBeABaseUrl);
1925 }
1926
1927 let scheme_type = SchemeType::from(self.scheme());
1928
1929 if let Some(host) = host {
1930 if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
1931 return Err(ParseError::EmptyHost);
1932 }
1933 let mut host_substr = host;
1934 // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1935 if !host.starts_with('[') || !host.ends_with(']') {
1936 match host.find(':') {
1937 Some(0) => {
1938 // If buffer is the empty string, validation error, return failure.
1939 return Err(ParseError::InvalidDomainCharacter);
1940 }
1941 // Let host be the result of host parsing buffer
1942 Some(colon_index) => {
1943 host_substr = &host[..colon_index];
1944 }
1945 None => {}
1946 }
1947 }
1948 if SchemeType::from(self.scheme()).is_special() {
1949 self.set_host_internal(Host::parse(host_substr)?, None);
1950 } else {
1951 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1952 }
1953 } else if self.has_host() {
1954 if scheme_type.is_special() && !scheme_type.is_file() {
1955 return Err(ParseError::EmptyHost);
1956 } else if self.serialization.len() == self.path_start as usize {
1957 self.serialization.push('/');
1958 }
1959 debug_assert!(self.byte_at(self.scheme_end) == b':');
1960 debug_assert!(self.byte_at(self.path_start) == b'/');
1961
1962 let new_path_start = if scheme_type.is_file() {
1963 self.scheme_end + 3
1964 } else {
1965 self.scheme_end + 1
1966 };
1967
1968 self.serialization
1969 .drain(new_path_start as usize..self.path_start as usize);
1970 let offset = self.path_start - new_path_start;
1971 self.path_start = new_path_start;
1972 self.username_end = new_path_start;
1973 self.host_start = new_path_start;
1974 self.host_end = new_path_start;
1975 self.port = None;
1976 if let Some(ref mut index) = self.query_start {
1977 *index -= offset
1978 }
1979 if let Some(ref mut index) = self.fragment_start {
1980 *index -= offset
1981 }
1982 }
1983 Ok(())
1984 }
1985
1986 /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
1987 fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
1988 let old_suffix_pos = if opt_new_port.is_some() {
1989 self.path_start
1990 } else {
1991 self.host_end
1992 };
1993 let suffix = self.slice(old_suffix_pos..).to_owned();
1994 self.serialization.truncate(self.host_start as usize);
1995 if !self.has_authority() {
1996 debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
1997 debug_assert!(self.username_end == self.host_start);
1998 self.serialization.push('/');
1999 self.serialization.push('/');
2000 self.username_end += 2;
2001 self.host_start += 2;
2002 }
2003 write!(&mut self.serialization, "{}", host).unwrap();
2004 self.host_end = to_u32(self.serialization.len()).unwrap();
2005 self.host = host.into();
2006
2007 if let Some(new_port) = opt_new_port {
2008 self.port = new_port;
2009 if let Some(port) = new_port {
2010 write!(&mut self.serialization, ":{}", port).unwrap();
2011 }
2012 }
2013 let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
2014 self.serialization.push_str(&suffix);
2015
2016 let adjust = |index: &mut u32| {
2017 *index -= old_suffix_pos;
2018 *index += new_suffix_pos;
2019 };
2020 adjust(&mut self.path_start);
2021 if let Some(ref mut index) = self.query_start {
2022 adjust(index)
2023 }
2024 if let Some(ref mut index) = self.fragment_start {
2025 adjust(index)
2026 }
2027 }
2028
2029 /// Change this URL’s host to the given IP address.
2030 ///
2031 /// If this URL is cannot-be-a-base, do nothing and return `Err`.
2032 ///
2033 /// Compared to `Url::set_host`, this skips the host parser.
2034 ///
2035 /// # Examples
2036 ///
2037 /// ```rust
2038 /// use url::{Url, ParseError};
2039 ///
2040 /// # fn run() -> Result<(), ParseError> {
2041 /// let mut url = Url::parse("http://example.com")?;
2042 /// url.set_ip_host("127.0.0.1".parse().unwrap());
2043 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
2044 /// assert_eq!(url.as_str(), "http://127.0.0.1/");
2045 /// # Ok(())
2046 /// # }
2047 /// # run().unwrap();
2048 /// ```
2049 ///
2050 /// Cannot change URL's from mailto(cannot-be-base) to ip:
2051 ///
2052 /// ```rust
2053 /// use url::{Url, ParseError};
2054 ///
2055 /// # fn run() -> Result<(), ParseError> {
2056 /// let mut url = Url::parse("mailto:rms@example.com")?;
2057 /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
2058 ///
2059 /// assert_eq!(url.as_str(), "mailto:rms@example.com");
2060 /// assert!(result.is_err());
2061 /// # Ok(())
2062 /// # }
2063 /// # run().unwrap();
2064 /// ```
2065 ///
2066 #[allow(clippy::result_unit_err)]
2067 pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
2068 if self.cannot_be_a_base() {
2069 return Err(());
2070 }
2071
2072 let address = match address {
2073 IpAddr::V4(address) => Host::Ipv4(address),
2074 IpAddr::V6(address) => Host::Ipv6(address),
2075 };
2076 self.set_host_internal(address, None);
2077 Ok(())
2078 }
2079
2080 /// Change this URL’s password.
2081 ///
2082 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2083 ///
2084 /// # Examples
2085 ///
2086 /// ```rust
2087 /// use url::{Url, ParseError};
2088 ///
2089 /// # fn run() -> Result<(), ParseError> {
2090 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2091 /// let result = url.set_password(Some("secret_password"));
2092 /// assert!(result.is_err());
2093 ///
2094 /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
2095 /// let result = url.set_password(Some("secret_password"));
2096 /// assert_eq!(url.password(), Some("secret_password"));
2097 ///
2098 /// let mut url = Url::parse("ftp://user2:@example.com")?;
2099 /// let result = url.set_password(Some("secret2"));
2100 /// assert!(result.is_ok());
2101 /// assert_eq!(url.password(), Some("secret2"));
2102 /// # Ok(())
2103 /// # }
2104 /// # run().unwrap();
2105 /// ```
2106 #[allow(clippy::result_unit_err)]
2107 pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
2108 // has_host implies !cannot_be_a_base
2109 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2110 return Err(());
2111 }
2112 let password = password.unwrap_or_default();
2113 if !password.is_empty() {
2114 let host_and_after = self.slice(self.host_start..).to_owned();
2115 self.serialization.truncate(self.username_end as usize);
2116 self.serialization.push(':');
2117 self.serialization
2118 .extend(utf8_percent_encode(password, USERINFO));
2119 self.serialization.push('@');
2120
2121 let old_host_start = self.host_start;
2122 let new_host_start = to_u32(self.serialization.len()).unwrap();
2123 let adjust = |index: &mut u32| {
2124 *index -= old_host_start;
2125 *index += new_host_start;
2126 };
2127 self.host_start = new_host_start;
2128 adjust(&mut self.host_end);
2129 adjust(&mut self.path_start);
2130 if let Some(ref mut index) = self.query_start {
2131 adjust(index)
2132 }
2133 if let Some(ref mut index) = self.fragment_start {
2134 adjust(index)
2135 }
2136
2137 self.serialization.push_str(&host_and_after);
2138 } else if self.byte_at(self.username_end) == b':' {
2139 // If there is a password to remove
2140 let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2141 debug_assert!(has_username_or_password);
2142 let username_start = self.scheme_end + 3;
2143 let empty_username = username_start == self.username_end;
2144 let start = self.username_end; // Remove the ':'
2145 let end = if empty_username {
2146 self.host_start // Remove the '@' as well
2147 } else {
2148 self.host_start - 1 // Keep the '@' to separate the username from the host
2149 };
2150 self.serialization.drain(start as usize..end as usize);
2151 let offset = end - start;
2152 self.host_start -= offset;
2153 self.host_end -= offset;
2154 self.path_start -= offset;
2155 if let Some(ref mut index) = self.query_start {
2156 *index -= offset
2157 }
2158 if let Some(ref mut index) = self.fragment_start {
2159 *index -= offset
2160 }
2161 }
2162 Ok(())
2163 }
2164
2165 /// Change this URL’s username.
2166 ///
2167 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2168 /// # Examples
2169 ///
2170 /// Cannot setup username from mailto(cannot-be-base)
2171 ///
2172 /// ```rust
2173 /// use url::{Url, ParseError};
2174 ///
2175 /// # fn run() -> Result<(), ParseError> {
2176 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2177 /// let result = url.set_username("user1");
2178 /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2179 /// assert!(result.is_err());
2180 /// # Ok(())
2181 /// # }
2182 /// # run().unwrap();
2183 /// ```
2184 ///
2185 /// Setup username to user1
2186 ///
2187 /// ```rust
2188 /// use url::{Url, ParseError};
2189 ///
2190 /// # fn run() -> Result<(), ParseError> {
2191 /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2192 /// let result = url.set_username("user1");
2193 /// assert!(result.is_ok());
2194 /// assert_eq!(url.username(), "user1");
2195 /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2196 /// # Ok(())
2197 /// # }
2198 /// # run().unwrap();
2199 /// ```
2200 #[allow(clippy::result_unit_err)]
2201 pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2202 // has_host implies !cannot_be_a_base
2203 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2204 return Err(());
2205 }
2206 let username_start = self.scheme_end + 3;
2207 debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2208 if self.slice(username_start..self.username_end) == username {
2209 return Ok(());
2210 }
2211 let after_username = self.slice(self.username_end..).to_owned();
2212 self.serialization.truncate(username_start as usize);
2213 self.serialization
2214 .extend(utf8_percent_encode(username, USERINFO));
2215
2216 let mut removed_bytes = self.username_end;
2217 self.username_end = to_u32(self.serialization.len()).unwrap();
2218 let mut added_bytes = self.username_end;
2219
2220 let new_username_is_empty = self.username_end == username_start;
2221 match (new_username_is_empty, after_username.chars().next()) {
2222 (true, Some('@')) => {
2223 removed_bytes += 1;
2224 self.serialization.push_str(&after_username[1..]);
2225 }
2226 (false, Some('@')) | (_, Some(':')) | (true, _) => {
2227 self.serialization.push_str(&after_username);
2228 }
2229 (false, _) => {
2230 added_bytes += 1;
2231 self.serialization.push('@');
2232 self.serialization.push_str(&after_username);
2233 }
2234 }
2235
2236 let adjust = |index: &mut u32| {
2237 *index -= removed_bytes;
2238 *index += added_bytes;
2239 };
2240 adjust(&mut self.host_start);
2241 adjust(&mut self.host_end);
2242 adjust(&mut self.path_start);
2243 if let Some(ref mut index) = self.query_start {
2244 adjust(index)
2245 }
2246 if let Some(ref mut index) = self.fragment_start {
2247 adjust(index)
2248 }
2249 Ok(())
2250 }
2251
2252 /// Change this URL’s scheme.
2253 ///
2254 /// Do nothing and return `Err` under the following circumstances:
2255 ///
2256 /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2257 /// * If this URL is cannot-be-a-base and the new scheme is one of
2258 /// `http`, `https`, `ws`, `wss` or `ftp`
2259 /// * If either the old or new scheme is `http`, `https`, `ws`,
2260 /// `wss` or `ftp` and the other is not one of these
2261 /// * If the new scheme is `file` and this URL includes credentials
2262 /// or has a non-null port
2263 /// * If this URL's scheme is `file` and its host is empty or null
2264 ///
2265 /// See also [the URL specification's section on legal scheme state
2266 /// overrides](https://url.spec.whatwg.org/#scheme-state).
2267 ///
2268 /// # Examples
2269 ///
2270 /// Change the URL’s scheme from `https` to `http`:
2271 ///
2272 /// ```
2273 /// use url::Url;
2274 /// # use url::ParseError;
2275 ///
2276 /// # fn run() -> Result<(), ParseError> {
2277 /// let mut url = Url::parse("https://example.net")?;
2278 /// let result = url.set_scheme("http");
2279 /// assert_eq!(url.as_str(), "http://example.net/");
2280 /// assert!(result.is_ok());
2281 /// # Ok(())
2282 /// # }
2283 /// # run().unwrap();
2284 /// ```
2285 /// Change the URL’s scheme from `foo` to `bar`:
2286 ///
2287 /// ```
2288 /// use url::Url;
2289 /// # use url::ParseError;
2290 ///
2291 /// # fn run() -> Result<(), ParseError> {
2292 /// let mut url = Url::parse("foo://example.net")?;
2293 /// let result = url.set_scheme("bar");
2294 /// assert_eq!(url.as_str(), "bar://example.net");
2295 /// assert!(result.is_ok());
2296 /// # Ok(())
2297 /// # }
2298 /// # run().unwrap();
2299 /// ```
2300 ///
2301 /// Cannot change URL’s scheme from `https` to `foõ`:
2302 ///
2303 /// ```
2304 /// use url::Url;
2305 /// # use url::ParseError;
2306 ///
2307 /// # fn run() -> Result<(), ParseError> {
2308 /// let mut url = Url::parse("https://example.net")?;
2309 /// let result = url.set_scheme("foõ");
2310 /// assert_eq!(url.as_str(), "https://example.net/");
2311 /// assert!(result.is_err());
2312 /// # Ok(())
2313 /// # }
2314 /// # run().unwrap();
2315 /// ```
2316 ///
2317 /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2318 ///
2319 /// ```
2320 /// use url::Url;
2321 /// # use url::ParseError;
2322 ///
2323 /// # fn run() -> Result<(), ParseError> {
2324 /// let mut url = Url::parse("mailto:rms@example.net")?;
2325 /// let result = url.set_scheme("https");
2326 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2327 /// assert!(result.is_err());
2328 /// # Ok(())
2329 /// # }
2330 /// # run().unwrap();
2331 /// ```
2332 /// Cannot change the URL’s scheme from `foo` to `https`:
2333 ///
2334 /// ```
2335 /// use url::Url;
2336 /// # use url::ParseError;
2337 ///
2338 /// # fn run() -> Result<(), ParseError> {
2339 /// let mut url = Url::parse("foo://example.net")?;
2340 /// let result = url.set_scheme("https");
2341 /// assert_eq!(url.as_str(), "foo://example.net");
2342 /// assert!(result.is_err());
2343 /// # Ok(())
2344 /// # }
2345 /// # run().unwrap();
2346 /// ```
2347 /// Cannot change the URL’s scheme from `http` to `foo`:
2348 ///
2349 /// ```
2350 /// use url::Url;
2351 /// # use url::ParseError;
2352 ///
2353 /// # fn run() -> Result<(), ParseError> {
2354 /// let mut url = Url::parse("http://example.net")?;
2355 /// let result = url.set_scheme("foo");
2356 /// assert_eq!(url.as_str(), "http://example.net/");
2357 /// assert!(result.is_err());
2358 /// # Ok(())
2359 /// # }
2360 /// # run().unwrap();
2361 /// ```
2362 #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
2363 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2364 let mut parser = Parser::for_setter(String::new());
2365 let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?;
2366 let new_scheme_type = SchemeType::from(&parser.serialization);
2367 let old_scheme_type = SchemeType::from(self.scheme());
2368 // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2369 if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2370 // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2371 (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2372 // If url includes credentials or has a non-null port, and buffer is "file", then return.
2373 // If url’s scheme is "file" and its host is an empty host or null, then return.
2374 (new_scheme_type.is_file() && self.has_authority())
2375 {
2376 return Err(());
2377 }
2378
2379 if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2380 return Err(());
2381 }
2382 let old_scheme_end = self.scheme_end;
2383 let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2384 let adjust = |index: &mut u32| {
2385 *index -= old_scheme_end;
2386 *index += new_scheme_end;
2387 };
2388
2389 self.scheme_end = new_scheme_end;
2390 adjust(&mut self.username_end);
2391 adjust(&mut self.host_start);
2392 adjust(&mut self.host_end);
2393 adjust(&mut self.path_start);
2394 if let Some(ref mut index) = self.query_start {
2395 adjust(index)
2396 }
2397 if let Some(ref mut index) = self.fragment_start {
2398 adjust(index)
2399 }
2400
2401 parser.serialization.push_str(self.slice(old_scheme_end..));
2402 self.serialization = parser.serialization;
2403
2404 // Update the port so it can be removed
2405 // If it is the scheme's default
2406 // we don't mind it silently failing
2407 // if there was no port in the first place
2408 let previous_port = self.port();
2409 let _ = self.set_port(previous_port);
2410
2411 Ok(())
2412 }
2413
2414 /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2415 ///
2416 /// This returns `Err` if the given path is not absolute or,
2417 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2418 ///
2419 /// # Examples
2420 ///
2421 /// On Unix-like platforms:
2422 ///
2423 /// ```
2424 /// # if cfg!(unix) {
2425 /// use url::Url;
2426 ///
2427 /// # fn run() -> Result<(), ()> {
2428 /// let url = Url::from_file_path("/tmp/foo.txt")?;
2429 /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2430 ///
2431 /// let url = Url::from_file_path("../foo.txt");
2432 /// assert!(url.is_err());
2433 ///
2434 /// let url = Url::from_file_path("https://google.com/");
2435 /// assert!(url.is_err());
2436 /// # Ok(())
2437 /// # }
2438 /// # run().unwrap();
2439 /// # }
2440 /// ```
2441 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2442 #[allow(clippy::result_unit_err)]
2443 pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2444 let mut serialization = "file://".to_owned();
2445 let host_start = serialization.len() as u32;
2446 let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2447 Ok(Url {
2448 serialization,
2449 scheme_end: "file".len() as u32,
2450 username_end: host_start,
2451 host_start,
2452 host_end,
2453 host,
2454 port: None,
2455 path_start: host_end,
2456 query_start: None,
2457 fragment_start: None,
2458 })
2459 }
2460
2461 /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2462 ///
2463 /// This returns `Err` if the given path is not absolute or,
2464 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2465 ///
2466 /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2467 /// so that the entire path is considered when using this URL as a base URL.
2468 ///
2469 /// For example:
2470 ///
2471 /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2472 /// as the base URL is `file:///var/www/index.html`
2473 /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2474 /// as the base URL is `file:///var/index.html`, which might not be what was intended.
2475 ///
2476 /// Note that `std::path` does not consider trailing slashes significant
2477 /// and usually does not include them (e.g. in `Path::parent()`).
2478 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2479 #[allow(clippy::result_unit_err)]
2480 pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2481 let mut url = Url::from_file_path(path)?;
2482 if !url.serialization.ends_with('/') {
2483 url.serialization.push('/')
2484 }
2485 Ok(url)
2486 }
2487
2488 /// Serialize with Serde using the internal representation of the `Url` struct.
2489 ///
2490 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2491 /// for speed, compared to the `Deserialize` trait impl.
2492 ///
2493 /// This method is only available if the `serde` Cargo feature is enabled.
2494 #[cfg(feature = "serde")]
2495 #[deny(unused)]
2496 pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2497 where
2498 S: serde::Serializer,
2499 {
2500 use serde::Serialize;
2501 // Destructuring first lets us ensure that adding or removing fields forces this method
2502 // to be updated
2503 let Url {
2504 ref serialization,
2505 ref scheme_end,
2506 ref username_end,
2507 ref host_start,
2508 ref host_end,
2509 ref host,
2510 ref port,
2511 ref path_start,
2512 ref query_start,
2513 ref fragment_start,
2514 } = *self;
2515 (
2516 serialization,
2517 scheme_end,
2518 username_end,
2519 host_start,
2520 host_end,
2521 host,
2522 port,
2523 path_start,
2524 query_start,
2525 fragment_start,
2526 )
2527 .serialize(serializer)
2528 }
2529
2530 /// Serialize with Serde using the internal representation of the `Url` struct.
2531 ///
2532 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2533 /// for speed, compared to the `Deserialize` trait impl.
2534 ///
2535 /// This method is only available if the `serde` Cargo feature is enabled.
2536 #[cfg(feature = "serde")]
2537 #[deny(unused)]
2538 pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2539 where
2540 D: serde::Deserializer<'de>,
2541 {
2542 use serde::de::{Deserialize, Error, Unexpected};
2543 let (
2544 serialization,
2545 scheme_end,
2546 username_end,
2547 host_start,
2548 host_end,
2549 host,
2550 port,
2551 path_start,
2552 query_start,
2553 fragment_start,
2554 ) = Deserialize::deserialize(deserializer)?;
2555 let url = Url {
2556 serialization,
2557 scheme_end,
2558 username_end,
2559 host_start,
2560 host_end,
2561 host,
2562 port,
2563 path_start,
2564 query_start,
2565 fragment_start,
2566 };
2567 if cfg!(debug_assertions) {
2568 url.check_invariants().map_err(|reason| {
2569 let reason: &str = &reason;
2570 Error::invalid_value(Unexpected::Other("value"), &reason)
2571 })?
2572 }
2573 Ok(url)
2574 }
2575
2576 /// Assuming the URL is in the `file` scheme or similar,
2577 /// convert its path to an absolute `std::path::Path`.
2578 ///
2579 /// **Note:** This does not actually check the URL’s `scheme`,
2580 /// and may give nonsensical results for other schemes.
2581 /// It is the user’s responsibility to check the URL’s scheme before calling this.
2582 ///
2583 /// ```
2584 /// # use url::Url;
2585 /// # let url = Url::parse("file:///etc/passwd").unwrap();
2586 /// let path = url.to_file_path();
2587 /// ```
2588 ///
2589 /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2590 /// `file:` URLs may have a non-local host),
2591 /// or if `Path::new_opt()` returns `None`.
2592 /// (That is, if the percent-decoded path contains a NUL byte or,
2593 /// for a Windows path, is not UTF-8.)
2594 #[inline]
2595 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2596 #[allow(clippy::result_unit_err)]
2597 pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2598 if let Some(segments) = self.path_segments() {
2599 let host = match self.host() {
2600 None | Some(Host::Domain("localhost")) => None,
2601 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2602 Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2603 }
2604 _ => return Err(()),
2605 };
2606
2607 return file_url_segments_to_pathbuf(host, segments);
2608 }
2609 Err(())
2610 }
2611
2612 // Private helper methods:
2613
2614 #[inline]
2615 fn slice<R>(&self, range: R) -> &str
2616 where
2617 R: RangeArg,
2618 {
2619 range.slice_of(&self.serialization)
2620 }
2621
2622 #[inline]
2623 fn byte_at(&self, i: u32) -> u8 {
2624 self.serialization.as_bytes()[i as usize]
2625 }
2626}
2627
2628/// Parse a string as an URL, without a base URL or encoding override.
2629impl str::FromStr for Url {
2630 type Err = ParseError;
2631
2632 #[inline]
2633 fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2634 Url::parse(input)
2635 }
2636}
2637
2638impl<'a> TryFrom<&'a str> for Url {
2639 type Error = ParseError;
2640
2641 fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2642 Url::parse(input:s)
2643 }
2644}
2645
2646/// Display the serialization of this URL.
2647impl fmt::Display for Url {
2648 #[inline]
2649 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2650 fmt::Display::fmt(&self.serialization, f:formatter)
2651 }
2652}
2653
2654/// String conversion.
2655impl From<Url> for String {
2656 fn from(value: Url) -> String {
2657 value.serialization
2658 }
2659}
2660
2661/// Debug the serialization of this URL.
2662impl fmt::Debug for Url {
2663 #[inline]
2664 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2665 formatter&mut DebugStruct<'_, '_>
2666 .debug_struct("Url")
2667 .field("scheme", &self.scheme())
2668 .field("cannot_be_a_base", &self.cannot_be_a_base())
2669 .field("username", &self.username())
2670 .field("password", &self.password())
2671 .field("host", &self.host())
2672 .field("port", &self.port())
2673 .field("path", &self.path())
2674 .field("query", &self.query())
2675 .field(name:"fragment", &self.fragment())
2676 .finish()
2677 }
2678}
2679
2680/// URLs compare like their serialization.
2681impl Eq for Url {}
2682
2683/// URLs compare like their serialization.
2684impl PartialEq for Url {
2685 #[inline]
2686 fn eq(&self, other: &Self) -> bool {
2687 self.serialization == other.serialization
2688 }
2689}
2690
2691/// URLs compare like their serialization.
2692impl Ord for Url {
2693 #[inline]
2694 fn cmp(&self, other: &Self) -> cmp::Ordering {
2695 self.serialization.cmp(&other.serialization)
2696 }
2697}
2698
2699/// URLs compare like their serialization.
2700impl PartialOrd for Url {
2701 #[inline]
2702 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2703 Some(self.cmp(other))
2704 }
2705}
2706
2707/// URLs hash like their serialization.
2708impl hash::Hash for Url {
2709 #[inline]
2710 fn hash<H>(&self, state: &mut H)
2711 where
2712 H: hash::Hasher,
2713 {
2714 hash::Hash::hash(&self.serialization, state)
2715 }
2716}
2717
2718/// Return the serialization of this URL.
2719impl AsRef<str> for Url {
2720 #[inline]
2721 fn as_ref(&self) -> &str {
2722 &self.serialization
2723 }
2724}
2725
2726trait RangeArg {
2727 fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2728}
2729
2730impl RangeArg for Range<u32> {
2731 #[inline]
2732 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2733 &s[self.start as usize..self.end as usize]
2734 }
2735}
2736
2737impl RangeArg for RangeFrom<u32> {
2738 #[inline]
2739 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2740 &s[self.start as usize..]
2741 }
2742}
2743
2744impl RangeArg for RangeTo<u32> {
2745 #[inline]
2746 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2747 &s[..self.end as usize]
2748 }
2749}
2750
2751/// Serializes this URL into a `serde` stream.
2752///
2753/// This implementation is only available if the `serde` Cargo feature is enabled.
2754#[cfg(feature = "serde")]
2755impl serde::Serialize for Url {
2756 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2757 where
2758 S: serde::Serializer,
2759 {
2760 serializer.serialize_str(self.as_str())
2761 }
2762}
2763
2764/// Deserializes this URL from a `serde` stream.
2765///
2766/// This implementation is only available if the `serde` Cargo feature is enabled.
2767#[cfg(feature = "serde")]
2768impl<'de> serde::Deserialize<'de> for Url {
2769 fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2770 where
2771 D: serde::Deserializer<'de>,
2772 {
2773 use serde::de::{Error, Unexpected, Visitor};
2774
2775 struct UrlVisitor;
2776
2777 impl<'de> Visitor<'de> for UrlVisitor {
2778 type Value = Url;
2779
2780 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2781 formatter.write_str("a string representing an URL")
2782 }
2783
2784 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2785 where
2786 E: Error,
2787 {
2788 Url::parse(s).map_err(|err| {
2789 let err_s = format!("{}", err);
2790 Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2791 })
2792 }
2793 }
2794
2795 deserializer.deserialize_str(UrlVisitor)
2796 }
2797}
2798
2799#[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
2800fn path_to_file_url_segments(
2801 path: &Path,
2802 serialization: &mut String,
2803) -> Result<(u32, HostInternal), ()> {
2804 #[cfg(any(unix, target_os = "redox"))]
2805 use std::os::unix::prelude::OsStrExt;
2806 #[cfg(target_os = "wasi")]
2807 use std::os::wasi::prelude::OsStrExt;
2808 if !path.is_absolute() {
2809 return Err(());
2810 }
2811 let host_end = to_u32(serialization.len()).unwrap();
2812 let mut empty = true;
2813 // skip the root component
2814 for component in path.components().skip(1) {
2815 empty = false;
2816 serialization.push('/');
2817 serialization.extend(percent_encode(
2818 component.as_os_str().as_bytes(),
2819 PATH_SEGMENT,
2820 ));
2821 }
2822 if empty {
2823 // An URL’s path must not be empty.
2824 serialization.push('/');
2825 }
2826 Ok((host_end, HostInternal::None))
2827}
2828
2829#[cfg(windows)]
2830fn path_to_file_url_segments(
2831 path: &Path,
2832 serialization: &mut String,
2833) -> Result<(u32, HostInternal), ()> {
2834 path_to_file_url_segments_windows(path, serialization)
2835}
2836
2837// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2838#[cfg_attr(not(windows), allow(dead_code))]
2839fn path_to_file_url_segments_windows(
2840 path: &Path,
2841 serialization: &mut String,
2842) -> Result<(u32, HostInternal), ()> {
2843 use std::path::{Component, Prefix};
2844 if !path.is_absolute() {
2845 return Err(());
2846 }
2847 let mut components = path.components();
2848
2849 let host_start = serialization.len() + 1;
2850 let host_end;
2851 let host_internal;
2852
2853 match components.next() {
2854 Some(Component::Prefix(ref p)) => match p.kind() {
2855 Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2856 host_end = to_u32(serialization.len()).unwrap();
2857 host_internal = HostInternal::None;
2858 serialization.push('/');
2859 serialization.push(letter as char);
2860 serialization.push(':');
2861 }
2862 Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2863 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2864 write!(serialization, "{}", host).unwrap();
2865 host_end = to_u32(serialization.len()).unwrap();
2866 host_internal = host.into();
2867 serialization.push('/');
2868 let share = share.to_str().ok_or(())?;
2869 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2870 }
2871 _ => return Err(()),
2872 },
2873 _ => return Err(()),
2874 }
2875
2876 let mut path_only_has_prefix = true;
2877 for component in components {
2878 if component == Component::RootDir {
2879 continue;
2880 }
2881
2882 path_only_has_prefix = false;
2883 // FIXME: somehow work with non-unicode?
2884 let component = component.as_os_str().to_str().ok_or(())?;
2885
2886 serialization.push('/');
2887 serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2888 }
2889
2890 // A windows drive letter must end with a slash.
2891 if serialization.len() > host_start
2892 && parser::is_windows_drive_letter(&serialization[host_start..])
2893 && path_only_has_prefix
2894 {
2895 serialization.push('/');
2896 }
2897
2898 Ok((host_end, host_internal))
2899}
2900
2901#[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
2902fn file_url_segments_to_pathbuf(
2903 host: Option<&str>,
2904 segments: str::Split<'_, char>,
2905) -> Result<PathBuf, ()> {
2906 use std::ffi::OsStr;
2907 #[cfg(any(unix, target_os = "redox"))]
2908 use std::os::unix::prelude::OsStrExt;
2909 #[cfg(target_os = "wasi")]
2910 use std::os::wasi::prelude::OsStrExt;
2911
2912 if host.is_some() {
2913 return Err(());
2914 }
2915
2916 let mut bytes = if cfg!(target_os = "redox") {
2917 b"file:".to_vec()
2918 } else {
2919 Vec::new()
2920 };
2921
2922 for segment in segments {
2923 bytes.push(b'/');
2924 bytes.extend(percent_decode(segment.as_bytes()));
2925 }
2926
2927 // A windows drive letter must end with a slash.
2928 if bytes.len() > 2
2929 && bytes[bytes.len() - 2].is_ascii_alphabetic()
2930 && matches!(bytes[bytes.len() - 1], b':' | b'|')
2931 {
2932 bytes.push(b'/');
2933 }
2934
2935 let os_str = OsStr::from_bytes(&bytes);
2936 let path = PathBuf::from(os_str);
2937
2938 debug_assert!(
2939 path.is_absolute(),
2940 "to_file_path() failed to produce an absolute Path"
2941 );
2942
2943 Ok(path)
2944}
2945
2946#[cfg(windows)]
2947fn file_url_segments_to_pathbuf(
2948 host: Option<&str>,
2949 segments: str::Split<char>,
2950) -> Result<PathBuf, ()> {
2951 file_url_segments_to_pathbuf_windows(host, segments)
2952}
2953
2954// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2955#[cfg_attr(not(windows), allow(dead_code))]
2956fn file_url_segments_to_pathbuf_windows(
2957 host: Option<&str>,
2958 mut segments: str::Split<'_, char>,
2959) -> Result<PathBuf, ()> {
2960 let mut string = if let Some(host) = host {
2961 r"\\".to_owned() + host
2962 } else {
2963 let first = segments.next().ok_or(())?;
2964
2965 match first.len() {
2966 2 => {
2967 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2968 return Err(());
2969 }
2970
2971 first.to_owned()
2972 }
2973
2974 4 => {
2975 if !first.starts_with(parser::ascii_alpha) {
2976 return Err(());
2977 }
2978 let bytes = first.as_bytes();
2979 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
2980 return Err(());
2981 }
2982
2983 first[0..1].to_owned() + ":"
2984 }
2985
2986 _ => return Err(()),
2987 }
2988 };
2989
2990 for segment in segments {
2991 string.push('\\');
2992
2993 // Currently non-unicode windows paths cannot be represented
2994 match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
2995 Ok(s) => string.push_str(&s),
2996 Err(..) => return Err(()),
2997 }
2998 }
2999 let path = PathBuf::from(string);
3000 debug_assert!(
3001 path.is_absolute(),
3002 "to_file_path() failed to produce an absolute Path"
3003 );
3004 Ok(path)
3005}
3006
3007/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
3008#[derive(Debug)]
3009pub struct UrlQuery<'a> {
3010 url: Option<&'a mut Url>,
3011 fragment: Option<String>,
3012}
3013
3014// `as_mut_string` string here exposes the internal serialization of an `Url`,
3015// which should not be exposed to users.
3016// We achieve that by not giving users direct access to `UrlQuery`:
3017// * Its fields are private
3018// (and so can not be constructed with struct literal syntax outside of this crate),
3019// * It has no constructor
3020// * It is only visible (on the type level) to users in the return type of
3021// `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
3022// * `Serializer` keeps its target in a private field
3023// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
3024impl<'a> form_urlencoded::Target for UrlQuery<'a> {
3025 fn as_mut_string(&mut self) -> &mut String {
3026 &mut self.url.as_mut().unwrap().serialization
3027 }
3028
3029 fn finish(mut self) -> &'a mut Url {
3030 let url: &mut Url = self.url.take().unwrap();
3031 url.restore_already_parsed_fragment(self.fragment.take());
3032 url
3033 }
3034
3035 type Finished = &'a mut Url;
3036}
3037
3038impl<'a> Drop for UrlQuery<'a> {
3039 fn drop(&mut self) {
3040 if let Some(url: &mut Url) = self.url.take() {
3041 url.restore_already_parsed_fragment(self.fragment.take())
3042 }
3043 }
3044}
3045