1 | // Copyright 2013-2015 The rust-url developers. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | /*! |
10 | |
11 | rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/) |
12 | for the [Rust](http://rust-lang.org/) programming language. |
13 | |
14 | |
15 | # URL parsing and data structures |
16 | |
17 | First, URL parsing may fail for various reasons and therefore returns a `Result`. |
18 | |
19 | ``` |
20 | use url::{Url, ParseError}; |
21 | |
22 | assert!(Url::parse("http://[:::1]" ) == Err(ParseError::InvalidIpv6Address)) |
23 | ``` |
24 | |
25 | Let’s parse a valid URL and look at its components. |
26 | |
27 | ``` |
28 | use url::{Url, Host, Position}; |
29 | # use url::ParseError; |
30 | # fn run() -> Result<(), ParseError> { |
31 | let issue_list_url = Url::parse( |
32 | "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" |
33 | )?; |
34 | |
35 | |
36 | assert!(issue_list_url.scheme() == "https" ); |
37 | assert!(issue_list_url.username() == "" ); |
38 | assert!(issue_list_url.password() == None); |
39 | assert!(issue_list_url.host_str() == Some("github.com" )); |
40 | assert!(issue_list_url.host() == Some(Host::Domain("github.com" ))); |
41 | assert!(issue_list_url.port() == None); |
42 | assert!(issue_list_url.path() == "/rust-lang/rust/issues" ); |
43 | assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) == |
44 | Some(vec!["rust-lang" , "rust" , "issues" ])); |
45 | assert!(issue_list_url.query() == Some("labels=E-easy&state=open" )); |
46 | assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open" ); |
47 | assert!(issue_list_url.fragment() == None); |
48 | assert!(!issue_list_url.cannot_be_a_base()); |
49 | # Ok(()) |
50 | # } |
51 | # run().unwrap(); |
52 | ``` |
53 | |
54 | Some URLs are said to be *cannot-be-a-base*: |
55 | they don’t have a username, password, host, or port, |
56 | and their "path" is an arbitrary string rather than slash-separated segments: |
57 | |
58 | ``` |
59 | use url::Url; |
60 | # use url::ParseError; |
61 | |
62 | # fn run() -> Result<(), ParseError> { |
63 | let data_url = Url::parse("data:text/plain,Hello?World#" )?; |
64 | |
65 | assert!(data_url.cannot_be_a_base()); |
66 | assert!(data_url.scheme() == "data" ); |
67 | assert!(data_url.path() == "text/plain,Hello" ); |
68 | assert!(data_url.path_segments().is_none()); |
69 | assert!(data_url.query() == Some("World" )); |
70 | assert!(data_url.fragment() == Some("" )); |
71 | # Ok(()) |
72 | # } |
73 | # run().unwrap(); |
74 | ``` |
75 | |
76 | ## Serde |
77 | |
78 | Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`. |
79 | |
80 | # Base URL |
81 | |
82 | Many contexts allow URL *references* that can be relative to a *base URL*: |
83 | |
84 | ```html |
85 | <link rel="stylesheet" href="../main.css"> |
86 | ``` |
87 | |
88 | Since parsed URLs are absolute, giving a base is required for parsing relative URLs: |
89 | |
90 | ``` |
91 | use url::{Url, ParseError}; |
92 | |
93 | assert!(Url::parse("../main.css" ) == Err(ParseError::RelativeUrlWithoutBase)) |
94 | ``` |
95 | |
96 | Use the `join` method on an `Url` to use it as a base URL: |
97 | |
98 | ``` |
99 | use url::Url; |
100 | # use url::ParseError; |
101 | |
102 | # fn run() -> Result<(), ParseError> { |
103 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html" )?; |
104 | let css_url = this_document.join("../main.css" )?; |
105 | assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css" ); |
106 | # Ok(()) |
107 | # } |
108 | # run().unwrap(); |
109 | ``` |
110 | |
111 | # Feature: `serde` |
112 | |
113 | If you enable the `serde` feature, [`Url`](struct.Url.html) will implement |
114 | [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and |
115 | [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html). |
116 | See [serde documentation](https://serde.rs) for more information. |
117 | |
118 | ```toml |
119 | url = { version = "2", features = ["serde"] } |
120 | ``` |
121 | |
122 | */ |
123 | |
124 | #![doc (html_root_url = "https://docs.rs/url/2.4.0" )] |
125 | #![cfg_attr ( |
126 | feature = "debugger_visualizer" , |
127 | feature(debugger_visualizer), |
128 | debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis" ) |
129 | )] |
130 | |
131 | pub use form_urlencoded; |
132 | |
133 | #[cfg (feature = "serde" )] |
134 | extern crate serde; |
135 | |
136 | use crate::host::HostInternal; |
137 | use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO}; |
138 | use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode}; |
139 | use std::borrow::Borrow; |
140 | use std::cmp; |
141 | use std::fmt::{self, Write}; |
142 | use std::hash; |
143 | use std::io; |
144 | use std::mem; |
145 | use std::net::{IpAddr, SocketAddr, ToSocketAddrs}; |
146 | use std::ops::{Range, RangeFrom, RangeTo}; |
147 | use std::path::{Path, PathBuf}; |
148 | use std::str; |
149 | |
150 | use std::convert::TryFrom; |
151 | |
152 | pub use crate::host::Host; |
153 | pub use crate::origin::{OpaqueOrigin, Origin}; |
154 | pub use crate::parser::{ParseError, SyntaxViolation}; |
155 | pub use crate::path_segments::PathSegmentsMut; |
156 | pub use crate::slicing::Position; |
157 | pub use form_urlencoded::EncodingOverride; |
158 | |
159 | mod host; |
160 | mod origin; |
161 | mod parser; |
162 | mod path_segments; |
163 | mod slicing; |
164 | |
165 | #[doc (hidden)] |
166 | pub mod quirks; |
167 | |
168 | /// A parsed URL record. |
169 | #[derive (Clone)] |
170 | pub struct Url { |
171 | /// Syntax in pseudo-BNF: |
172 | /// |
173 | /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]? |
174 | /// non-hierarchical = non-hierarchical-path |
175 | /// non-hierarchical-path = /* Does not start with "/" */ |
176 | /// hierarchical = authority? hierarchical-path |
177 | /// authority = "//" userinfo? host [ ":" port ]? |
178 | /// userinfo = username [ ":" password ]? "@" |
179 | /// hierarchical-path = [ "/" path-segment ]+ |
180 | serialization: String, |
181 | |
182 | // Components |
183 | scheme_end: u32, // Before ':' |
184 | username_end: u32, // Before ':' (if a password is given) or '@' (if not) |
185 | host_start: u32, |
186 | host_end: u32, |
187 | host: HostInternal, |
188 | port: Option<u16>, |
189 | path_start: u32, // Before initial '/', if any |
190 | query_start: Option<u32>, // Before '?', unlike Position::QueryStart |
191 | fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart |
192 | } |
193 | |
194 | /// Full configuration for the URL parser. |
195 | #[derive (Copy, Clone)] |
196 | pub struct ParseOptions<'a> { |
197 | base_url: Option<&'a Url>, |
198 | encoding_override: EncodingOverride<'a>, |
199 | violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, |
200 | } |
201 | |
202 | impl<'a> ParseOptions<'a> { |
203 | /// Change the base URL |
204 | pub fn base_url(mut self, new: Option<&'a Url>) -> Self { |
205 | self.base_url = new; |
206 | self |
207 | } |
208 | |
209 | /// Override the character encoding of query strings. |
210 | /// This is a legacy concept only relevant for HTML. |
211 | pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self { |
212 | self.encoding_override = new; |
213 | self |
214 | } |
215 | |
216 | /// Call the provided function or closure for a non-fatal `SyntaxViolation` |
217 | /// when it occurs during parsing. Note that since the provided function is |
218 | /// `Fn`, the caller might need to utilize _interior mutability_, such as with |
219 | /// a `RefCell`, to collect the violations. |
220 | /// |
221 | /// ## Example |
222 | /// ``` |
223 | /// use std::cell::RefCell; |
224 | /// use url::{Url, SyntaxViolation}; |
225 | /// # use url::ParseError; |
226 | /// # fn run() -> Result<(), url::ParseError> { |
227 | /// let violations = RefCell::new(Vec::new()); |
228 | /// let url = Url::options() |
229 | /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v))) |
230 | /// .parse("https:////example.com" )?; |
231 | /// assert_eq!(url.as_str(), "https://example.com/" ); |
232 | /// assert_eq!(violations.into_inner(), |
233 | /// vec!(SyntaxViolation::ExpectedDoubleSlash)); |
234 | /// # Ok(()) |
235 | /// # } |
236 | /// # run().unwrap(); |
237 | /// ``` |
238 | pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { |
239 | self.violation_fn = new; |
240 | self |
241 | } |
242 | |
243 | /// Parse an URL string with the configuration so far. |
244 | pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> { |
245 | Parser { |
246 | serialization: String::with_capacity(input.len()), |
247 | base_url: self.base_url, |
248 | query_encoding_override: self.encoding_override, |
249 | violation_fn: self.violation_fn, |
250 | context: Context::UrlParser, |
251 | } |
252 | .parse_url(input) |
253 | } |
254 | } |
255 | |
256 | impl Url { |
257 | /// Parse an absolute URL from a string. |
258 | /// |
259 | /// # Examples |
260 | /// |
261 | /// ```rust |
262 | /// use url::Url; |
263 | /// # use url::ParseError; |
264 | /// |
265 | /// # fn run() -> Result<(), ParseError> { |
266 | /// let url = Url::parse("https://example.net" )?; |
267 | /// # Ok(()) |
268 | /// # } |
269 | /// # run().unwrap(); |
270 | /// ``` |
271 | /// |
272 | /// # Errors |
273 | /// |
274 | /// If the function can not parse an absolute URL from the given string, |
275 | /// a [`ParseError`] variant will be returned. |
276 | /// |
277 | /// [`ParseError`]: enum.ParseError.html |
278 | #[inline ] |
279 | pub fn parse(input: &str) -> Result<Url, crate::ParseError> { |
280 | Url::options().parse(input) |
281 | } |
282 | |
283 | /// Parse an absolute URL from a string and add params to its query string. |
284 | /// |
285 | /// Existing params are not removed. |
286 | /// |
287 | /// # Examples |
288 | /// |
289 | /// ```rust |
290 | /// use url::Url; |
291 | /// # use url::ParseError; |
292 | /// |
293 | /// # fn run() -> Result<(), ParseError> { |
294 | /// let url = Url::parse_with_params("https://example.net?dont=clobberme" , |
295 | /// &[("lang" , "rust" ), ("browser" , "servo" )])?; |
296 | /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo" , url.as_str()); |
297 | /// # Ok(()) |
298 | /// # } |
299 | /// # run().unwrap(); |
300 | /// ``` |
301 | /// |
302 | /// # Errors |
303 | /// |
304 | /// If the function can not parse an absolute URL from the given string, |
305 | /// a [`ParseError`] variant will be returned. |
306 | /// |
307 | /// [`ParseError`]: enum.ParseError.html |
308 | #[inline ] |
309 | pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> |
310 | where |
311 | I: IntoIterator, |
312 | I::Item: Borrow<(K, V)>, |
313 | K: AsRef<str>, |
314 | V: AsRef<str>, |
315 | { |
316 | let mut url = Url::options().parse(input); |
317 | |
318 | if let Ok(ref mut url) = url { |
319 | url.query_pairs_mut().extend_pairs(iter); |
320 | } |
321 | |
322 | url |
323 | } |
324 | |
325 | /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path |
326 | fn strip_trailing_spaces_from_opaque_path(&mut self) { |
327 | if !self.cannot_be_a_base() { |
328 | return; |
329 | } |
330 | |
331 | if self.fragment_start.is_some() { |
332 | return; |
333 | } |
334 | |
335 | if self.query_start.is_some() { |
336 | return; |
337 | } |
338 | |
339 | let trailing_space_count = self |
340 | .serialization |
341 | .chars() |
342 | .rev() |
343 | .take_while(|c| *c == ' ' ) |
344 | .count(); |
345 | |
346 | let start = self.serialization.len() - trailing_space_count; |
347 | |
348 | self.serialization.truncate(start); |
349 | } |
350 | |
351 | /// Parse a string as an URL, with this URL as the base URL. |
352 | /// |
353 | /// The inverse of this is [`make_relative`]. |
354 | /// |
355 | /// Note: a trailing slash is significant. |
356 | /// Without it, the last path component is considered to be a “file” name |
357 | /// to be removed to get at the “directory” that is used as the base: |
358 | /// |
359 | /// # Examples |
360 | /// |
361 | /// ```rust |
362 | /// use url::Url; |
363 | /// # use url::ParseError; |
364 | /// |
365 | /// # fn run() -> Result<(), ParseError> { |
366 | /// let base = Url::parse("https://example.net/a/b.html" )?; |
367 | /// let url = base.join("c.png" )?; |
368 | /// assert_eq!(url.as_str(), "https://example.net/a/c.png" ); // Not /a/b.html/c.png |
369 | /// |
370 | /// let base = Url::parse("https://example.net/a/b/" )?; |
371 | /// let url = base.join("c.png" )?; |
372 | /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png" ); |
373 | /// # Ok(()) |
374 | /// # } |
375 | /// # run().unwrap(); |
376 | /// ``` |
377 | /// |
378 | /// # Errors |
379 | /// |
380 | /// If the function can not parse an URL from the given string |
381 | /// with this URL as the base URL, a [`ParseError`] variant will be returned. |
382 | /// |
383 | /// [`ParseError`]: enum.ParseError.html |
384 | /// [`make_relative`]: #method.make_relative |
385 | #[inline ] |
386 | pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> { |
387 | Url::options().base_url(Some(self)).parse(input) |
388 | } |
389 | |
390 | /// Creates a relative URL if possible, with this URL as the base URL. |
391 | /// |
392 | /// This is the inverse of [`join`]. |
393 | /// |
394 | /// # Examples |
395 | /// |
396 | /// ```rust |
397 | /// use url::Url; |
398 | /// # use url::ParseError; |
399 | /// |
400 | /// # fn run() -> Result<(), ParseError> { |
401 | /// let base = Url::parse("https://example.net/a/b.html" )?; |
402 | /// let url = Url::parse("https://example.net/a/c.png" )?; |
403 | /// let relative = base.make_relative(&url); |
404 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png" )); |
405 | /// |
406 | /// let base = Url::parse("https://example.net/a/b/" )?; |
407 | /// let url = Url::parse("https://example.net/a/b/c.png" )?; |
408 | /// let relative = base.make_relative(&url); |
409 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png" )); |
410 | /// |
411 | /// let base = Url::parse("https://example.net/a/b/" )?; |
412 | /// let url = Url::parse("https://example.net/a/d/c.png" )?; |
413 | /// let relative = base.make_relative(&url); |
414 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png" )); |
415 | /// |
416 | /// let base = Url::parse("https://example.net/a/b.html?c=d" )?; |
417 | /// let url = Url::parse("https://example.net/a/b.html?e=f" )?; |
418 | /// let relative = base.make_relative(&url); |
419 | /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f" )); |
420 | /// # Ok(()) |
421 | /// # } |
422 | /// # run().unwrap(); |
423 | /// ``` |
424 | /// |
425 | /// # Errors |
426 | /// |
427 | /// If this URL can't be a base for the given URL, `None` is returned. |
428 | /// This is for example the case if the scheme, host or port are not the same. |
429 | /// |
430 | /// [`join`]: #method.join |
431 | pub fn make_relative(&self, url: &Url) -> Option<String> { |
432 | if self.cannot_be_a_base() { |
433 | return None; |
434 | } |
435 | |
436 | // Scheme, host and port need to be the same |
437 | if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() { |
438 | return None; |
439 | } |
440 | |
441 | // We ignore username/password at this point |
442 | |
443 | // The path has to be transformed |
444 | let mut relative = String::new(); |
445 | |
446 | // Extract the filename of both URIs, these need to be handled separately |
447 | fn extract_path_filename(s: &str) -> (&str, &str) { |
448 | let last_slash_idx = s.rfind('/' ).unwrap_or(0); |
449 | let (path, filename) = s.split_at(last_slash_idx); |
450 | if filename.is_empty() { |
451 | (path, "" ) |
452 | } else { |
453 | (path, &filename[1..]) |
454 | } |
455 | } |
456 | |
457 | let (base_path, base_filename) = extract_path_filename(self.path()); |
458 | let (url_path, url_filename) = extract_path_filename(url.path()); |
459 | |
460 | let mut base_path = base_path.split('/' ).peekable(); |
461 | let mut url_path = url_path.split('/' ).peekable(); |
462 | |
463 | // Skip over the common prefix |
464 | while base_path.peek().is_some() && base_path.peek() == url_path.peek() { |
465 | base_path.next(); |
466 | url_path.next(); |
467 | } |
468 | |
469 | // Add `..` segments for the remainder of the base path |
470 | for base_path_segment in base_path { |
471 | // Skip empty last segments |
472 | if base_path_segment.is_empty() { |
473 | break; |
474 | } |
475 | |
476 | if !relative.is_empty() { |
477 | relative.push('/' ); |
478 | } |
479 | |
480 | relative.push_str(".." ); |
481 | } |
482 | |
483 | // Append the remainder of the other URI |
484 | for url_path_segment in url_path { |
485 | if !relative.is_empty() { |
486 | relative.push('/' ); |
487 | } |
488 | |
489 | relative.push_str(url_path_segment); |
490 | } |
491 | |
492 | // Add the filename if they are not the same |
493 | if !relative.is_empty() || base_filename != url_filename { |
494 | // If the URIs filename is empty this means that it was a directory |
495 | // so we'll have to append a '/'. |
496 | // |
497 | // Otherwise append it directly as the new filename. |
498 | if url_filename.is_empty() { |
499 | relative.push('/' ); |
500 | } else { |
501 | if !relative.is_empty() { |
502 | relative.push('/' ); |
503 | } |
504 | relative.push_str(url_filename); |
505 | } |
506 | } |
507 | |
508 | // Query and fragment are only taken from the other URI |
509 | if let Some(query) = url.query() { |
510 | relative.push('?' ); |
511 | relative.push_str(query); |
512 | } |
513 | |
514 | if let Some(fragment) = url.fragment() { |
515 | relative.push('#' ); |
516 | relative.push_str(fragment); |
517 | } |
518 | |
519 | Some(relative) |
520 | } |
521 | |
522 | /// Return a default `ParseOptions` that can fully configure the URL parser. |
523 | /// |
524 | /// # Examples |
525 | /// |
526 | /// Get default `ParseOptions`, then change base url |
527 | /// |
528 | /// ```rust |
529 | /// use url::Url; |
530 | /// # use url::ParseError; |
531 | /// # fn run() -> Result<(), ParseError> { |
532 | /// let options = Url::options(); |
533 | /// let api = Url::parse("https://api.example.com" )?; |
534 | /// let base_url = options.base_url(Some(&api)); |
535 | /// let version_url = base_url.parse("version.json" )?; |
536 | /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json" ); |
537 | /// # Ok(()) |
538 | /// # } |
539 | /// # run().unwrap(); |
540 | /// ``` |
541 | pub fn options<'a>() -> ParseOptions<'a> { |
542 | ParseOptions { |
543 | base_url: None, |
544 | encoding_override: None, |
545 | violation_fn: None, |
546 | } |
547 | } |
548 | |
549 | /// Return the serialization of this URL. |
550 | /// |
551 | /// This is fast since that serialization is already stored in the `Url` struct. |
552 | /// |
553 | /// # Examples |
554 | /// |
555 | /// ```rust |
556 | /// use url::Url; |
557 | /// # use url::ParseError; |
558 | /// |
559 | /// # fn run() -> Result<(), ParseError> { |
560 | /// let url_str = "https://example.net/" ; |
561 | /// let url = Url::parse(url_str)?; |
562 | /// assert_eq!(url.as_str(), url_str); |
563 | /// # Ok(()) |
564 | /// # } |
565 | /// # run().unwrap(); |
566 | /// ``` |
567 | #[inline ] |
568 | pub fn as_str(&self) -> &str { |
569 | &self.serialization |
570 | } |
571 | |
572 | /// Return the serialization of this URL. |
573 | /// |
574 | /// This consumes the `Url` and takes ownership of the `String` stored in it. |
575 | /// |
576 | /// # Examples |
577 | /// |
578 | /// ```rust |
579 | /// use url::Url; |
580 | /// # use url::ParseError; |
581 | /// |
582 | /// # fn run() -> Result<(), ParseError> { |
583 | /// let url_str = "https://example.net/" ; |
584 | /// let url = Url::parse(url_str)?; |
585 | /// assert_eq!(String::from(url), url_str); |
586 | /// # Ok(()) |
587 | /// # } |
588 | /// # run().unwrap(); |
589 | /// ``` |
590 | #[inline ] |
591 | #[deprecated (since = "2.3.0" , note = "use Into<String>" )] |
592 | pub fn into_string(self) -> String { |
593 | self.into() |
594 | } |
595 | |
596 | /// For internal testing, not part of the public API. |
597 | /// |
598 | /// Methods of the `Url` struct assume a number of invariants. |
599 | /// This checks each of these invariants and panic if one is not met. |
600 | /// This is for testing rust-url itself. |
601 | #[doc (hidden)] |
602 | pub fn check_invariants(&self) -> Result<(), String> { |
603 | macro_rules! assert { |
604 | ($x: expr) => { |
605 | if !$x { |
606 | return Err(format!( |
607 | "!( {} ) for URL {:?}" , |
608 | stringify!($x), |
609 | self.serialization |
610 | )); |
611 | } |
612 | }; |
613 | } |
614 | |
615 | macro_rules! assert_eq { |
616 | ($a: expr, $b: expr) => { |
617 | { |
618 | let a = $a; |
619 | let b = $b; |
620 | if a != b { |
621 | return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}" , |
622 | a, b, stringify!($a), stringify!($b), |
623 | self.serialization)) |
624 | } |
625 | } |
626 | } |
627 | } |
628 | |
629 | assert!(self.scheme_end >= 1); |
630 | assert!(self.byte_at(0).is_ascii_alphabetic()); |
631 | assert!(self |
632 | .slice(1..self.scheme_end) |
633 | .chars() |
634 | .all(|c| matches!(c, 'a' ..='z' | 'A' ..='Z' | '0' ..='9' | '+' | '-' | '.' ))); |
635 | assert_eq!(self.byte_at(self.scheme_end), b':' ); |
636 | |
637 | if self.slice(self.scheme_end + 1..).starts_with("//" ) { |
638 | // URL with authority |
639 | if self.username_end != self.serialization.len() as u32 { |
640 | match self.byte_at(self.username_end) { |
641 | b':' => { |
642 | assert!(self.host_start >= self.username_end + 2); |
643 | assert_eq!(self.byte_at(self.host_start - 1), b'@' ); |
644 | } |
645 | b'@' => assert!(self.host_start == self.username_end + 1), |
646 | _ => assert_eq!(self.username_end, self.scheme_end + 3), |
647 | } |
648 | } |
649 | assert!(self.host_start >= self.username_end); |
650 | assert!(self.host_end >= self.host_start); |
651 | let host_str = self.slice(self.host_start..self.host_end); |
652 | match self.host { |
653 | HostInternal::None => assert_eq!(host_str, "" ), |
654 | HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()), |
655 | HostInternal::Ipv6(address) => { |
656 | let h: Host<String> = Host::Ipv6(address); |
657 | assert_eq!(host_str, h.to_string()) |
658 | } |
659 | HostInternal::Domain => { |
660 | if SchemeType::from(self.scheme()).is_special() { |
661 | assert!(!host_str.is_empty()) |
662 | } |
663 | } |
664 | } |
665 | if self.path_start == self.host_end { |
666 | assert_eq!(self.port, None); |
667 | } else { |
668 | assert_eq!(self.byte_at(self.host_end), b':' ); |
669 | let port_str = self.slice(self.host_end + 1..self.path_start); |
670 | assert_eq!( |
671 | self.port, |
672 | Some(port_str.parse::<u16>().expect("Couldn't parse port?" )) |
673 | ); |
674 | } |
675 | assert!( |
676 | self.path_start as usize == self.serialization.len() |
677 | || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?' ) |
678 | ); |
679 | } else { |
680 | // Anarchist URL (no authority) |
681 | assert_eq!(self.username_end, self.scheme_end + 1); |
682 | assert_eq!(self.host_start, self.scheme_end + 1); |
683 | assert_eq!(self.host_end, self.scheme_end + 1); |
684 | assert_eq!(self.host, HostInternal::None); |
685 | assert_eq!(self.port, None); |
686 | if self.path().starts_with("//" ) { |
687 | // special case when first path segment is empty |
688 | assert_eq!(self.byte_at(self.scheme_end + 1), b'/' ); |
689 | assert_eq!(self.byte_at(self.scheme_end + 2), b'.' ); |
690 | assert_eq!(self.path_start, self.scheme_end + 3); |
691 | } else { |
692 | assert_eq!(self.path_start, self.scheme_end + 1); |
693 | } |
694 | } |
695 | if let Some(start) = self.query_start { |
696 | assert!(start >= self.path_start); |
697 | assert_eq!(self.byte_at(start), b'?' ); |
698 | } |
699 | if let Some(start) = self.fragment_start { |
700 | assert!(start >= self.path_start); |
701 | assert_eq!(self.byte_at(start), b'#' ); |
702 | } |
703 | if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { |
704 | assert!(fragment_start > query_start); |
705 | } |
706 | |
707 | let other = Url::parse(self.as_str()).expect("Failed to parse myself?" ); |
708 | assert_eq!(&self.serialization, &other.serialization); |
709 | assert_eq!(self.scheme_end, other.scheme_end); |
710 | assert_eq!(self.username_end, other.username_end); |
711 | assert_eq!(self.host_start, other.host_start); |
712 | assert_eq!(self.host_end, other.host_end); |
713 | assert!( |
714 | self.host == other.host || |
715 | // XXX No host round-trips to empty host. |
716 | // See https://github.com/whatwg/url/issues/79 |
717 | (self.host_str(), other.host_str()) == (None, Some("" )) |
718 | ); |
719 | assert_eq!(self.port, other.port); |
720 | assert_eq!(self.path_start, other.path_start); |
721 | assert_eq!(self.query_start, other.query_start); |
722 | assert_eq!(self.fragment_start, other.fragment_start); |
723 | Ok(()) |
724 | } |
725 | |
726 | /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>) |
727 | /// |
728 | /// Note: this returns an opaque origin for `file:` URLs, which causes |
729 | /// `url.origin() != url.origin()`. |
730 | /// |
731 | /// # Examples |
732 | /// |
733 | /// URL with `ftp` scheme: |
734 | /// |
735 | /// ```rust |
736 | /// use url::{Host, Origin, Url}; |
737 | /// # use url::ParseError; |
738 | /// |
739 | /// # fn run() -> Result<(), ParseError> { |
740 | /// let url = Url::parse("ftp://example.com/foo" )?; |
741 | /// assert_eq!(url.origin(), |
742 | /// Origin::Tuple("ftp" .into(), |
743 | /// Host::Domain("example.com" .into()), |
744 | /// 21)); |
745 | /// # Ok(()) |
746 | /// # } |
747 | /// # run().unwrap(); |
748 | /// ``` |
749 | /// |
750 | /// URL with `blob` scheme: |
751 | /// |
752 | /// ```rust |
753 | /// use url::{Host, Origin, Url}; |
754 | /// # use url::ParseError; |
755 | /// |
756 | /// # fn run() -> Result<(), ParseError> { |
757 | /// let url = Url::parse("blob:https://example.com/foo" )?; |
758 | /// assert_eq!(url.origin(), |
759 | /// Origin::Tuple("https" .into(), |
760 | /// Host::Domain("example.com" .into()), |
761 | /// 443)); |
762 | /// # Ok(()) |
763 | /// # } |
764 | /// # run().unwrap(); |
765 | /// ``` |
766 | /// |
767 | /// URL with `file` scheme: |
768 | /// |
769 | /// ```rust |
770 | /// use url::{Host, Origin, Url}; |
771 | /// # use url::ParseError; |
772 | /// |
773 | /// # fn run() -> Result<(), ParseError> { |
774 | /// let url = Url::parse("file:///tmp/foo" )?; |
775 | /// assert!(!url.origin().is_tuple()); |
776 | /// |
777 | /// let other_url = Url::parse("file:///tmp/foo" )?; |
778 | /// assert!(url.origin() != other_url.origin()); |
779 | /// # Ok(()) |
780 | /// # } |
781 | /// # run().unwrap(); |
782 | /// ``` |
783 | /// |
784 | /// URL with other scheme: |
785 | /// |
786 | /// ```rust |
787 | /// use url::{Host, Origin, Url}; |
788 | /// # use url::ParseError; |
789 | /// |
790 | /// # fn run() -> Result<(), ParseError> { |
791 | /// let url = Url::parse("foo:bar" )?; |
792 | /// assert!(!url.origin().is_tuple()); |
793 | /// # Ok(()) |
794 | /// # } |
795 | /// # run().unwrap(); |
796 | /// ``` |
797 | #[inline ] |
798 | pub fn origin(&self) -> Origin { |
799 | origin::url_origin(self) |
800 | } |
801 | |
802 | /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter. |
803 | /// |
804 | /// # Examples |
805 | /// |
806 | /// ``` |
807 | /// use url::Url; |
808 | /// # use url::ParseError; |
809 | /// |
810 | /// # fn run() -> Result<(), ParseError> { |
811 | /// let url = Url::parse("file:///tmp/foo" )?; |
812 | /// assert_eq!(url.scheme(), "file" ); |
813 | /// # Ok(()) |
814 | /// # } |
815 | /// # run().unwrap(); |
816 | /// ``` |
817 | #[inline ] |
818 | pub fn scheme(&self) -> &str { |
819 | self.slice(..self.scheme_end) |
820 | } |
821 | |
822 | /// Return whether the URL is special (has a special scheme) |
823 | /// |
824 | /// # Examples |
825 | /// |
826 | /// ``` |
827 | /// use url::Url; |
828 | /// # use url::ParseError; |
829 | /// |
830 | /// # fn run() -> Result<(), ParseError> { |
831 | /// assert!(Url::parse("http:///tmp/foo" )?.is_special()); |
832 | /// assert!(Url::parse("file:///tmp/foo" )?.is_special()); |
833 | /// assert!(!Url::parse("moz:///tmp/foo" )?.is_special()); |
834 | /// # Ok(()) |
835 | /// # } |
836 | /// # run().unwrap(); |
837 | /// ``` |
838 | pub fn is_special(&self) -> bool { |
839 | let scheme_type = SchemeType::from(self.scheme()); |
840 | scheme_type.is_special() |
841 | } |
842 | |
843 | /// Return whether the URL has an 'authority', |
844 | /// which can contain a username, password, host, and port number. |
845 | /// |
846 | /// URLs that do *not* are either path-only like `unix:/run/foo.socket` |
847 | /// or cannot-be-a-base like `data:text/plain,Stuff`. |
848 | /// |
849 | /// See also the `authority` method. |
850 | /// |
851 | /// # Examples |
852 | /// |
853 | /// ``` |
854 | /// use url::Url; |
855 | /// # use url::ParseError; |
856 | /// |
857 | /// # fn run() -> Result<(), ParseError> { |
858 | /// let url = Url::parse("ftp://rms@example.com" )?; |
859 | /// assert!(url.has_authority()); |
860 | /// |
861 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
862 | /// assert!(!url.has_authority()); |
863 | /// |
864 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
865 | /// assert!(!url.has_authority()); |
866 | /// # Ok(()) |
867 | /// # } |
868 | /// # run().unwrap(); |
869 | /// ``` |
870 | #[inline ] |
871 | pub fn has_authority(&self) -> bool { |
872 | debug_assert!(self.byte_at(self.scheme_end) == b':' ); |
873 | self.slice(self.scheme_end..).starts_with("://" ) |
874 | } |
875 | |
876 | /// Return the authority of this URL as an ASCII string. |
877 | /// |
878 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
879 | /// of a special URL, or percent encoded for non-special URLs. |
880 | /// IPv6 addresses are given between `[` and `]` brackets. |
881 | /// Ports are omitted if they match the well known port of a special URL. |
882 | /// |
883 | /// Username and password are percent-encoded. |
884 | /// |
885 | /// See also the `has_authority` method. |
886 | /// |
887 | /// # Examples |
888 | /// |
889 | /// ``` |
890 | /// use url::Url; |
891 | /// # use url::ParseError; |
892 | /// |
893 | /// # fn run() -> Result<(), ParseError> { |
894 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
895 | /// assert_eq!(url.authority(), "" ); |
896 | /// let url = Url::parse("file:///tmp/foo" )?; |
897 | /// assert_eq!(url.authority(), "" ); |
898 | /// let url = Url::parse("https://user:password@example.com/tmp/foo" )?; |
899 | /// assert_eq!(url.authority(), "user:password@example.com" ); |
900 | /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo" )?; |
901 | /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667" ); |
902 | /// let url = Url::parse("http://àlex.рф.example.com:80/foo" )?; |
903 | /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com" ); |
904 | /// # Ok(()) |
905 | /// # } |
906 | /// # run().unwrap(); |
907 | /// ``` |
908 | pub fn authority(&self) -> &str { |
909 | let scheme_separator_len = "://" .len() as u32; |
910 | if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len { |
911 | self.slice(self.scheme_end + scheme_separator_len..self.path_start) |
912 | } else { |
913 | "" |
914 | } |
915 | } |
916 | |
917 | /// Return whether this URL is a cannot-be-a-base URL, |
918 | /// meaning that parsing a relative URL string with this URL as the base will return an error. |
919 | /// |
920 | /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash, |
921 | /// as is typically the case of `data:` and `mailto:` URLs. |
922 | /// |
923 | /// # Examples |
924 | /// |
925 | /// ``` |
926 | /// use url::Url; |
927 | /// # use url::ParseError; |
928 | /// |
929 | /// # fn run() -> Result<(), ParseError> { |
930 | /// let url = Url::parse("ftp://rms@example.com" )?; |
931 | /// assert!(!url.cannot_be_a_base()); |
932 | /// |
933 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
934 | /// assert!(!url.cannot_be_a_base()); |
935 | /// |
936 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
937 | /// assert!(url.cannot_be_a_base()); |
938 | /// # Ok(()) |
939 | /// # } |
940 | /// # run().unwrap(); |
941 | /// ``` |
942 | #[inline ] |
943 | pub fn cannot_be_a_base(&self) -> bool { |
944 | !self.slice(self.scheme_end + 1..).starts_with('/' ) |
945 | } |
946 | |
947 | /// Return the username for this URL (typically the empty string) |
948 | /// as a percent-encoded ASCII string. |
949 | /// |
950 | /// # Examples |
951 | /// |
952 | /// ``` |
953 | /// use url::Url; |
954 | /// # use url::ParseError; |
955 | /// |
956 | /// # fn run() -> Result<(), ParseError> { |
957 | /// let url = Url::parse("ftp://rms@example.com" )?; |
958 | /// assert_eq!(url.username(), "rms" ); |
959 | /// |
960 | /// let url = Url::parse("ftp://:secret123@example.com" )?; |
961 | /// assert_eq!(url.username(), "" ); |
962 | /// |
963 | /// let url = Url::parse("https://example.com" )?; |
964 | /// assert_eq!(url.username(), "" ); |
965 | /// # Ok(()) |
966 | /// # } |
967 | /// # run().unwrap(); |
968 | /// ``` |
969 | pub fn username(&self) -> &str { |
970 | let scheme_separator_len = "://" .len() as u32; |
971 | if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len { |
972 | self.slice(self.scheme_end + scheme_separator_len..self.username_end) |
973 | } else { |
974 | "" |
975 | } |
976 | } |
977 | |
978 | /// Return the password for this URL, if any, as a percent-encoded ASCII string. |
979 | /// |
980 | /// # Examples |
981 | /// |
982 | /// ``` |
983 | /// use url::Url; |
984 | /// # use url::ParseError; |
985 | /// |
986 | /// # fn run() -> Result<(), ParseError> { |
987 | /// let url = Url::parse("ftp://rms:secret123@example.com" )?; |
988 | /// assert_eq!(url.password(), Some("secret123" )); |
989 | /// |
990 | /// let url = Url::parse("ftp://:secret123@example.com" )?; |
991 | /// assert_eq!(url.password(), Some("secret123" )); |
992 | /// |
993 | /// let url = Url::parse("ftp://rms@example.com" )?; |
994 | /// assert_eq!(url.password(), None); |
995 | /// |
996 | /// let url = Url::parse("https://example.com" )?; |
997 | /// assert_eq!(url.password(), None); |
998 | /// # Ok(()) |
999 | /// # } |
1000 | /// # run().unwrap(); |
1001 | /// ``` |
1002 | pub fn password(&self) -> Option<&str> { |
1003 | // This ':' is not the one marking a port number since a host can not be empty. |
1004 | // (Except for file: URLs, which do not have port numbers.) |
1005 | if self.has_authority() |
1006 | && self.username_end != self.serialization.len() as u32 |
1007 | && self.byte_at(self.username_end) == b':' |
1008 | { |
1009 | debug_assert!(self.byte_at(self.host_start - 1) == b'@' ); |
1010 | Some(self.slice(self.username_end + 1..self.host_start - 1)) |
1011 | } else { |
1012 | None |
1013 | } |
1014 | } |
1015 | |
1016 | /// Equivalent to `url.host().is_some()`. |
1017 | /// |
1018 | /// # Examples |
1019 | /// |
1020 | /// ``` |
1021 | /// use url::Url; |
1022 | /// # use url::ParseError; |
1023 | /// |
1024 | /// # fn run() -> Result<(), ParseError> { |
1025 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1026 | /// assert!(url.has_host()); |
1027 | /// |
1028 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1029 | /// assert!(!url.has_host()); |
1030 | /// |
1031 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1032 | /// assert!(!url.has_host()); |
1033 | /// # Ok(()) |
1034 | /// # } |
1035 | /// # run().unwrap(); |
1036 | /// ``` |
1037 | pub fn has_host(&self) -> bool { |
1038 | !matches!(self.host, HostInternal::None) |
1039 | } |
1040 | |
1041 | /// Return the string representation of the host (domain or IP address) for this URL, if any. |
1042 | /// |
1043 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
1044 | /// of a special URL, or percent encoded for non-special URLs. |
1045 | /// IPv6 addresses are given between `[` and `]` brackets. |
1046 | /// |
1047 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs |
1048 | /// don’t have a host. |
1049 | /// |
1050 | /// See also the `host` method. |
1051 | /// |
1052 | /// # Examples |
1053 | /// |
1054 | /// ``` |
1055 | /// use url::Url; |
1056 | /// # use url::ParseError; |
1057 | /// |
1058 | /// # fn run() -> Result<(), ParseError> { |
1059 | /// let url = Url::parse("https://127.0.0.1/index.html" )?; |
1060 | /// assert_eq!(url.host_str(), Some("127.0.0.1" )); |
1061 | /// |
1062 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1063 | /// assert_eq!(url.host_str(), Some("example.com" )); |
1064 | /// |
1065 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1066 | /// assert_eq!(url.host_str(), None); |
1067 | /// |
1068 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1069 | /// assert_eq!(url.host_str(), None); |
1070 | /// # Ok(()) |
1071 | /// # } |
1072 | /// # run().unwrap(); |
1073 | /// ``` |
1074 | pub fn host_str(&self) -> Option<&str> { |
1075 | if self.has_host() { |
1076 | Some(self.slice(self.host_start..self.host_end)) |
1077 | } else { |
1078 | None |
1079 | } |
1080 | } |
1081 | |
1082 | /// Return the parsed representation of the host for this URL. |
1083 | /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host |
1084 | /// of a special URL, or percent encoded for non-special URLs. |
1085 | /// |
1086 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs |
1087 | /// don’t have a host. |
1088 | /// |
1089 | /// See also the `host_str` method. |
1090 | /// |
1091 | /// # Examples |
1092 | /// |
1093 | /// ``` |
1094 | /// use url::Url; |
1095 | /// # use url::ParseError; |
1096 | /// |
1097 | /// # fn run() -> Result<(), ParseError> { |
1098 | /// let url = Url::parse("https://127.0.0.1/index.html" )?; |
1099 | /// assert!(url.host().is_some()); |
1100 | /// |
1101 | /// let url = Url::parse("ftp://rms@example.com" )?; |
1102 | /// assert!(url.host().is_some()); |
1103 | /// |
1104 | /// let url = Url::parse("unix:/run/foo.socket" )?; |
1105 | /// assert!(url.host().is_none()); |
1106 | /// |
1107 | /// let url = Url::parse("data:text/plain,Stuff" )?; |
1108 | /// assert!(url.host().is_none()); |
1109 | /// # Ok(()) |
1110 | /// # } |
1111 | /// # run().unwrap(); |
1112 | /// ``` |
1113 | pub fn host(&self) -> Option<Host<&str>> { |
1114 | match self.host { |
1115 | HostInternal::None => None, |
1116 | HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))), |
1117 | HostInternal::Ipv4(address) => Some(Host::Ipv4(address)), |
1118 | HostInternal::Ipv6(address) => Some(Host::Ipv6(address)), |
1119 | } |
1120 | } |
1121 | |
1122 | /// If this URL has a host and it is a domain name (not an IP address), return it. |
1123 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host |
1124 | /// of a special URL, or percent encoded for non-special URLs. |
1125 | /// |
1126 | /// # Examples |
1127 | /// |
1128 | /// ``` |
1129 | /// use url::Url; |
1130 | /// # use url::ParseError; |
1131 | /// |
1132 | /// # fn run() -> Result<(), ParseError> { |
1133 | /// let url = Url::parse("https://127.0.0.1/" )?; |
1134 | /// assert_eq!(url.domain(), None); |
1135 | /// |
1136 | /// let url = Url::parse("mailto:rms@example.net" )?; |
1137 | /// assert_eq!(url.domain(), None); |
1138 | /// |
1139 | /// let url = Url::parse("https://example.com/" )?; |
1140 | /// assert_eq!(url.domain(), Some("example.com" )); |
1141 | /// # Ok(()) |
1142 | /// # } |
1143 | /// # run().unwrap(); |
1144 | /// ``` |
1145 | pub fn domain(&self) -> Option<&str> { |
1146 | match self.host { |
1147 | HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)), |
1148 | _ => None, |
1149 | } |
1150 | } |
1151 | |
1152 | /// Return the port number for this URL, if any. |
1153 | /// |
1154 | /// Note that default port numbers are never reflected by the serialization, |
1155 | /// use the `port_or_known_default()` method if you want a default port number returned. |
1156 | /// |
1157 | /// # Examples |
1158 | /// |
1159 | /// ``` |
1160 | /// use url::Url; |
1161 | /// # use url::ParseError; |
1162 | /// |
1163 | /// # fn run() -> Result<(), ParseError> { |
1164 | /// let url = Url::parse("https://example.com" )?; |
1165 | /// assert_eq!(url.port(), None); |
1166 | /// |
1167 | /// let url = Url::parse("https://example.com:443/" )?; |
1168 | /// assert_eq!(url.port(), None); |
1169 | /// |
1170 | /// let url = Url::parse("ssh://example.com:22" )?; |
1171 | /// assert_eq!(url.port(), Some(22)); |
1172 | /// # Ok(()) |
1173 | /// # } |
1174 | /// # run().unwrap(); |
1175 | /// ``` |
1176 | #[inline ] |
1177 | pub fn port(&self) -> Option<u16> { |
1178 | self.port |
1179 | } |
1180 | |
1181 | /// Return the port number for this URL, or the default port number if it is known. |
1182 | /// |
1183 | /// This method only knows the default port number |
1184 | /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes. |
1185 | /// |
1186 | /// For URLs in these schemes, this method always returns `Some(_)`. |
1187 | /// For other schemes, it is the same as `Url::port()`. |
1188 | /// |
1189 | /// # Examples |
1190 | /// |
1191 | /// ``` |
1192 | /// use url::Url; |
1193 | /// # use url::ParseError; |
1194 | /// |
1195 | /// # fn run() -> Result<(), ParseError> { |
1196 | /// let url = Url::parse("foo://example.com" )?; |
1197 | /// assert_eq!(url.port_or_known_default(), None); |
1198 | /// |
1199 | /// let url = Url::parse("foo://example.com:1456" )?; |
1200 | /// assert_eq!(url.port_or_known_default(), Some(1456)); |
1201 | /// |
1202 | /// let url = Url::parse("https://example.com" )?; |
1203 | /// assert_eq!(url.port_or_known_default(), Some(443)); |
1204 | /// # Ok(()) |
1205 | /// # } |
1206 | /// # run().unwrap(); |
1207 | /// ``` |
1208 | #[inline ] |
1209 | pub fn port_or_known_default(&self) -> Option<u16> { |
1210 | self.port.or_else(|| parser::default_port(self.scheme())) |
1211 | } |
1212 | |
1213 | /// Resolve a URL’s host and port number to `SocketAddr`. |
1214 | /// |
1215 | /// If the URL has the default port number of a scheme that is unknown to this library, |
1216 | /// `default_port_number` provides an opportunity to provide the actual port number. |
1217 | /// In non-example code this should be implemented either simply as `|| None`, |
1218 | /// or by matching on the URL’s `.scheme()`. |
1219 | /// |
1220 | /// If the host is a domain, it is resolved using the standard library’s DNS support. |
1221 | /// |
1222 | /// # Examples |
1223 | /// |
1224 | /// ```no_run |
1225 | /// let url = url::Url::parse("https://example.net/" ).unwrap(); |
1226 | /// let addrs = url.socket_addrs(|| None).unwrap(); |
1227 | /// std::net::TcpStream::connect(&*addrs) |
1228 | /// # ; |
1229 | /// ``` |
1230 | /// |
1231 | /// ``` |
1232 | /// /// With application-specific known default port numbers |
1233 | /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> { |
1234 | /// url.socket_addrs(|| match url.scheme() { |
1235 | /// "socks5" | "socks5h" => Some(1080), |
1236 | /// _ => None, |
1237 | /// }) |
1238 | /// } |
1239 | /// ``` |
1240 | pub fn socket_addrs( |
1241 | &self, |
1242 | default_port_number: impl Fn() -> Option<u16>, |
1243 | ) -> io::Result<Vec<SocketAddr>> { |
1244 | // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>` |
1245 | // causes borrowck issues because the return value borrows `default_port_number`: |
1246 | // |
1247 | // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters |
1248 | // |
1249 | // > This RFC proposes that *all* type parameters are considered in scope |
1250 | // > for `impl Trait` in return position |
1251 | |
1252 | fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> { |
1253 | opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message)) |
1254 | } |
1255 | |
1256 | let host = io_result(self.host(), "No host name in the URL" )?; |
1257 | let port = io_result( |
1258 | self.port_or_known_default().or_else(default_port_number), |
1259 | "No port number in the URL" , |
1260 | )?; |
1261 | Ok(match host { |
1262 | Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(), |
1263 | Host::Ipv4(ip) => vec![(ip, port).into()], |
1264 | Host::Ipv6(ip) => vec![(ip, port).into()], |
1265 | }) |
1266 | } |
1267 | |
1268 | /// Return the path for this URL, as a percent-encoded ASCII string. |
1269 | /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. |
1270 | /// For other URLs, this starts with a '/' slash |
1271 | /// and continues with slash-separated path segments. |
1272 | /// |
1273 | /// # Examples |
1274 | /// |
1275 | /// ```rust |
1276 | /// use url::{Url, ParseError}; |
1277 | /// |
1278 | /// # fn run() -> Result<(), ParseError> { |
1279 | /// let url = Url::parse("https://example.com/api/versions?page=2" )?; |
1280 | /// assert_eq!(url.path(), "/api/versions" ); |
1281 | /// |
1282 | /// let url = Url::parse("https://example.com" )?; |
1283 | /// assert_eq!(url.path(), "/" ); |
1284 | /// |
1285 | /// let url = Url::parse("https://example.com/countries/việt nam" )?; |
1286 | /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam" ); |
1287 | /// # Ok(()) |
1288 | /// # } |
1289 | /// # run().unwrap(); |
1290 | /// ``` |
1291 | pub fn path(&self) -> &str { |
1292 | match (self.query_start, self.fragment_start) { |
1293 | (None, None) => self.slice(self.path_start..), |
1294 | (Some(next_component_start), _) | (None, Some(next_component_start)) => { |
1295 | self.slice(self.path_start..next_component_start) |
1296 | } |
1297 | } |
1298 | } |
1299 | |
1300 | /// Unless this URL is cannot-be-a-base, |
1301 | /// return an iterator of '/' slash-separated path segments, |
1302 | /// each as a percent-encoded ASCII string. |
1303 | /// |
1304 | /// Return `None` for cannot-be-a-base URLs. |
1305 | /// |
1306 | /// When `Some` is returned, the iterator always contains at least one string |
1307 | /// (which may be empty). |
1308 | /// |
1309 | /// # Examples |
1310 | /// |
1311 | /// ``` |
1312 | /// use url::Url; |
1313 | /// # use std::error::Error; |
1314 | /// |
1315 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1316 | /// let url = Url::parse("https://example.com/foo/bar" )?; |
1317 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1318 | /// assert_eq!(path_segments.next(), Some("foo" )); |
1319 | /// assert_eq!(path_segments.next(), Some("bar" )); |
1320 | /// assert_eq!(path_segments.next(), None); |
1321 | /// |
1322 | /// let url = Url::parse("https://example.com" )?; |
1323 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1324 | /// assert_eq!(path_segments.next(), Some("" )); |
1325 | /// assert_eq!(path_segments.next(), None); |
1326 | /// |
1327 | /// let url = Url::parse("data:text/plain,HelloWorld" )?; |
1328 | /// assert!(url.path_segments().is_none()); |
1329 | /// |
1330 | /// let url = Url::parse("https://example.com/countries/việt nam" )?; |
1331 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base" )?; |
1332 | /// assert_eq!(path_segments.next(), Some("countries" )); |
1333 | /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam" )); |
1334 | /// # Ok(()) |
1335 | /// # } |
1336 | /// # run().unwrap(); |
1337 | /// ``` |
1338 | pub fn path_segments(&self) -> Option<str::Split<'_, char>> { |
1339 | let path = self.path(); |
1340 | path.strip_prefix('/' ).map(|remainder| remainder.split('/' )) |
1341 | } |
1342 | |
1343 | /// Return this URL’s query string, if any, as a percent-encoded ASCII string. |
1344 | /// |
1345 | /// # Examples |
1346 | /// |
1347 | /// ```rust |
1348 | /// use url::Url; |
1349 | /// # use url::ParseError; |
1350 | /// |
1351 | /// fn run() -> Result<(), ParseError> { |
1352 | /// let url = Url::parse("https://example.com/products?page=2" )?; |
1353 | /// let query = url.query(); |
1354 | /// assert_eq!(query, Some("page=2" )); |
1355 | /// |
1356 | /// let url = Url::parse("https://example.com/products" )?; |
1357 | /// let query = url.query(); |
1358 | /// assert!(query.is_none()); |
1359 | /// |
1360 | /// let url = Url::parse("https://example.com/?country=español" )?; |
1361 | /// let query = url.query(); |
1362 | /// assert_eq!(query, Some("country=espa%C3%B1ol" )); |
1363 | /// # Ok(()) |
1364 | /// # } |
1365 | /// # run().unwrap(); |
1366 | /// ``` |
1367 | pub fn query(&self) -> Option<&str> { |
1368 | match (self.query_start, self.fragment_start) { |
1369 | (None, _) => None, |
1370 | (Some(query_start), None) => { |
1371 | debug_assert!(self.byte_at(query_start) == b'?' ); |
1372 | Some(self.slice(query_start + 1..)) |
1373 | } |
1374 | (Some(query_start), Some(fragment_start)) => { |
1375 | debug_assert!(self.byte_at(query_start) == b'?' ); |
1376 | Some(self.slice(query_start + 1..fragment_start)) |
1377 | } |
1378 | } |
1379 | } |
1380 | |
1381 | /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded` |
1382 | /// and return an iterator of (key, value) pairs. |
1383 | /// |
1384 | /// # Examples |
1385 | /// |
1386 | /// ```rust |
1387 | /// use std::borrow::Cow; |
1388 | /// |
1389 | /// use url::Url; |
1390 | /// # use url::ParseError; |
1391 | /// |
1392 | /// # fn run() -> Result<(), ParseError> { |
1393 | /// let url = Url::parse("https://example.com/products?page=2&sort=desc" )?; |
1394 | /// let mut pairs = url.query_pairs(); |
1395 | /// |
1396 | /// assert_eq!(pairs.count(), 2); |
1397 | /// |
1398 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page" ), Cow::Borrowed("2" )))); |
1399 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort" ), Cow::Borrowed("desc" )))); |
1400 | /// # Ok(()) |
1401 | /// # } |
1402 | /// # run().unwrap(); |
1403 | /// ``` |
1404 | |
1405 | #[inline ] |
1406 | pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> { |
1407 | form_urlencoded::parse(self.query().unwrap_or("" ).as_bytes()) |
1408 | } |
1409 | |
1410 | /// Return this URL’s fragment identifier, if any. |
1411 | /// |
1412 | /// A fragment is the part of the URL after the `#` symbol. |
1413 | /// The fragment is optional and, if present, contains a fragment identifier |
1414 | /// that identifies a secondary resource, such as a section heading |
1415 | /// of a document. |
1416 | /// |
1417 | /// In HTML, the fragment identifier is usually the id attribute of a an element |
1418 | /// that is scrolled to on load. Browsers typically will not send the fragment portion |
1419 | /// of a URL to the server. |
1420 | /// |
1421 | /// **Note:** the parser did *not* percent-encode this component, |
1422 | /// but the input may have been percent-encoded already. |
1423 | /// |
1424 | /// # Examples |
1425 | /// |
1426 | /// ```rust |
1427 | /// use url::Url; |
1428 | /// # use url::ParseError; |
1429 | /// |
1430 | /// # fn run() -> Result<(), ParseError> { |
1431 | /// let url = Url::parse("https://example.com/data.csv#row=4" )?; |
1432 | /// |
1433 | /// assert_eq!(url.fragment(), Some("row=4" )); |
1434 | /// |
1435 | /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2" )?; |
1436 | /// |
1437 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2" )); |
1438 | /// # Ok(()) |
1439 | /// # } |
1440 | /// # run().unwrap(); |
1441 | /// ``` |
1442 | pub fn fragment(&self) -> Option<&str> { |
1443 | self.fragment_start.map(|start| { |
1444 | debug_assert!(self.byte_at(start) == b'#' ); |
1445 | self.slice(start + 1..) |
1446 | }) |
1447 | } |
1448 | |
1449 | fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R { |
1450 | let mut parser = Parser::for_setter(mem::take(&mut self.serialization)); |
1451 | let result = f(&mut parser); |
1452 | self.serialization = parser.serialization; |
1453 | result |
1454 | } |
1455 | |
1456 | /// Change this URL’s fragment identifier. |
1457 | /// |
1458 | /// # Examples |
1459 | /// |
1460 | /// ```rust |
1461 | /// use url::Url; |
1462 | /// # use url::ParseError; |
1463 | /// |
1464 | /// # fn run() -> Result<(), ParseError> { |
1465 | /// let mut url = Url::parse("https://example.com/data.csv" )?; |
1466 | /// assert_eq!(url.as_str(), "https://example.com/data.csv" ); |
1467 | |
1468 | /// url.set_fragment(Some("cell=4,1-6,2" )); |
1469 | /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2" ); |
1470 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2" )); |
1471 | /// |
1472 | /// url.set_fragment(None); |
1473 | /// assert_eq!(url.as_str(), "https://example.com/data.csv" ); |
1474 | /// assert!(url.fragment().is_none()); |
1475 | /// # Ok(()) |
1476 | /// # } |
1477 | /// # run().unwrap(); |
1478 | /// ``` |
1479 | pub fn set_fragment(&mut self, fragment: Option<&str>) { |
1480 | // Remove any previous fragment |
1481 | if let Some(start) = self.fragment_start { |
1482 | debug_assert!(self.byte_at(start) == b'#' ); |
1483 | self.serialization.truncate(start as usize); |
1484 | } |
1485 | // Write the new one |
1486 | if let Some(input) = fragment { |
1487 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); |
1488 | self.serialization.push('#' ); |
1489 | self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input))) |
1490 | } else { |
1491 | self.fragment_start = None; |
1492 | self.strip_trailing_spaces_from_opaque_path(); |
1493 | } |
1494 | } |
1495 | |
1496 | fn take_fragment(&mut self) -> Option<String> { |
1497 | self.fragment_start.take().map(|start| { |
1498 | debug_assert!(self.byte_at(start) == b'#' ); |
1499 | let fragment = self.slice(start + 1..).to_owned(); |
1500 | self.serialization.truncate(start as usize); |
1501 | fragment |
1502 | }) |
1503 | } |
1504 | |
1505 | fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) { |
1506 | if let Some(ref fragment) = fragment { |
1507 | assert!(self.fragment_start.is_none()); |
1508 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); |
1509 | self.serialization.push('#' ); |
1510 | self.serialization.push_str(fragment); |
1511 | } |
1512 | } |
1513 | |
1514 | /// Change this URL’s query string. |
1515 | /// |
1516 | /// # Examples |
1517 | /// |
1518 | /// ```rust |
1519 | /// use url::Url; |
1520 | /// # use url::ParseError; |
1521 | /// |
1522 | /// # fn run() -> Result<(), ParseError> { |
1523 | /// let mut url = Url::parse("https://example.com/products" )?; |
1524 | /// assert_eq!(url.as_str(), "https://example.com/products" ); |
1525 | /// |
1526 | /// url.set_query(Some("page=2" )); |
1527 | /// assert_eq!(url.as_str(), "https://example.com/products?page=2" ); |
1528 | /// assert_eq!(url.query(), Some("page=2" )); |
1529 | /// # Ok(()) |
1530 | /// # } |
1531 | /// # run().unwrap(); |
1532 | /// ``` |
1533 | pub fn set_query(&mut self, query: Option<&str>) { |
1534 | let fragment = self.take_fragment(); |
1535 | |
1536 | // Remove any previous query |
1537 | if let Some(start) = self.query_start.take() { |
1538 | debug_assert!(self.byte_at(start) == b'?' ); |
1539 | self.serialization.truncate(start as usize); |
1540 | } |
1541 | // Write the new query, if any |
1542 | if let Some(input) = query { |
1543 | self.query_start = Some(to_u32(self.serialization.len()).unwrap()); |
1544 | self.serialization.push('?' ); |
1545 | let scheme_type = SchemeType::from(self.scheme()); |
1546 | let scheme_end = self.scheme_end; |
1547 | self.mutate(|parser| { |
1548 | let vfn = parser.violation_fn; |
1549 | parser.parse_query( |
1550 | scheme_type, |
1551 | scheme_end, |
1552 | parser::Input::trim_tab_and_newlines(input, vfn), |
1553 | ) |
1554 | }); |
1555 | } else { |
1556 | self.query_start = None; |
1557 | self.strip_trailing_spaces_from_opaque_path(); |
1558 | } |
1559 | |
1560 | self.restore_already_parsed_fragment(fragment); |
1561 | } |
1562 | |
1563 | /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs |
1564 | /// in `application/x-www-form-urlencoded` syntax. |
1565 | /// |
1566 | /// The return value has a method-chaining API: |
1567 | /// |
1568 | /// ```rust |
1569 | /// # use url::{Url, ParseError}; |
1570 | /// |
1571 | /// # fn run() -> Result<(), ParseError> { |
1572 | /// let mut url = Url::parse("https://example.net?lang=fr#nav" )?; |
1573 | /// assert_eq!(url.query(), Some("lang=fr" )); |
1574 | /// |
1575 | /// url.query_pairs_mut().append_pair("foo" , "bar" ); |
1576 | /// assert_eq!(url.query(), Some("lang=fr&foo=bar" )); |
1577 | /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav" ); |
1578 | /// |
1579 | /// url.query_pairs_mut() |
1580 | /// .clear() |
1581 | /// .append_pair("foo" , "bar & baz" ) |
1582 | /// .append_pair("saisons" , " \u{00C9}t \u{00E9}+hiver" ); |
1583 | /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver" )); |
1584 | /// assert_eq!(url.as_str(), |
1585 | /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav" ); |
1586 | /// # Ok(()) |
1587 | /// # } |
1588 | /// # run().unwrap(); |
1589 | /// ``` |
1590 | /// |
1591 | /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`, |
1592 | /// not `url.set_query(None)`. |
1593 | /// |
1594 | /// The state of `Url` is unspecified if this return value is leaked without being dropped. |
1595 | pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> { |
1596 | let fragment = self.take_fragment(); |
1597 | |
1598 | let query_start; |
1599 | if let Some(start) = self.query_start { |
1600 | debug_assert!(self.byte_at(start) == b'?' ); |
1601 | query_start = start as usize; |
1602 | } else { |
1603 | query_start = self.serialization.len(); |
1604 | self.query_start = Some(to_u32(query_start).unwrap()); |
1605 | self.serialization.push('?' ); |
1606 | } |
1607 | |
1608 | let query = UrlQuery { |
1609 | url: Some(self), |
1610 | fragment, |
1611 | }; |
1612 | form_urlencoded::Serializer::for_suffix(query, query_start + "?" .len()) |
1613 | } |
1614 | |
1615 | fn take_after_path(&mut self) -> String { |
1616 | match (self.query_start, self.fragment_start) { |
1617 | (Some(i), _) | (None, Some(i)) => { |
1618 | let after_path = self.slice(i..).to_owned(); |
1619 | self.serialization.truncate(i as usize); |
1620 | after_path |
1621 | } |
1622 | (None, None) => String::new(), |
1623 | } |
1624 | } |
1625 | |
1626 | /// Change this URL’s path. |
1627 | /// |
1628 | /// # Examples |
1629 | /// |
1630 | /// ```rust |
1631 | /// use url::Url; |
1632 | /// # use url::ParseError; |
1633 | /// |
1634 | /// # fn run() -> Result<(), ParseError> { |
1635 | /// let mut url = Url::parse("https://example.com" )?; |
1636 | /// url.set_path("api/comments" ); |
1637 | /// assert_eq!(url.as_str(), "https://example.com/api/comments" ); |
1638 | /// assert_eq!(url.path(), "/api/comments" ); |
1639 | /// |
1640 | /// let mut url = Url::parse("https://example.com/api" )?; |
1641 | /// url.set_path("data/report.csv" ); |
1642 | /// assert_eq!(url.as_str(), "https://example.com/data/report.csv" ); |
1643 | /// assert_eq!(url.path(), "/data/report.csv" ); |
1644 | /// |
1645 | /// // `set_path` percent-encodes the given string if it's not already percent-encoded. |
1646 | /// let mut url = Url::parse("https://example.com" )?; |
1647 | /// url.set_path("api/some comments" ); |
1648 | /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments" ); |
1649 | /// assert_eq!(url.path(), "/api/some%20comments" ); |
1650 | /// |
1651 | /// // `set_path` will not double percent-encode the string if it's already percent-encoded. |
1652 | /// let mut url = Url::parse("https://example.com" )?; |
1653 | /// url.set_path("api/some%20comments" ); |
1654 | /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments" ); |
1655 | /// assert_eq!(url.path(), "/api/some%20comments" ); |
1656 | /// |
1657 | /// # Ok(()) |
1658 | /// # } |
1659 | /// # run().unwrap(); |
1660 | /// ``` |
1661 | pub fn set_path(&mut self, mut path: &str) { |
1662 | let after_path = self.take_after_path(); |
1663 | let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); |
1664 | let cannot_be_a_base = self.cannot_be_a_base(); |
1665 | let scheme_type = SchemeType::from(self.scheme()); |
1666 | self.serialization.truncate(self.path_start as usize); |
1667 | self.mutate(|parser| { |
1668 | if cannot_be_a_base { |
1669 | if path.starts_with('/' ) { |
1670 | parser.serialization.push_str("%2F" ); |
1671 | path = &path[1..]; |
1672 | } |
1673 | parser.parse_cannot_be_a_base_path(parser::Input::new(path)); |
1674 | } else { |
1675 | let mut has_host = true; // FIXME |
1676 | parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path)); |
1677 | } |
1678 | }); |
1679 | self.restore_after_path(old_after_path_pos, &after_path); |
1680 | } |
1681 | |
1682 | /// Return an object with methods to manipulate this URL’s path segments. |
1683 | /// |
1684 | /// Return `Err(())` if this URL is cannot-be-a-base. |
1685 | #[allow (clippy::result_unit_err)] |
1686 | pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> { |
1687 | if self.cannot_be_a_base() { |
1688 | Err(()) |
1689 | } else { |
1690 | Ok(path_segments::new(self)) |
1691 | } |
1692 | } |
1693 | |
1694 | fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) { |
1695 | let new_after_path_position = to_u32(self.serialization.len()).unwrap(); |
1696 | let adjust = |index: &mut u32| { |
1697 | *index -= old_after_path_position; |
1698 | *index += new_after_path_position; |
1699 | }; |
1700 | if let Some(ref mut index) = self.query_start { |
1701 | adjust(index) |
1702 | } |
1703 | if let Some(ref mut index) = self.fragment_start { |
1704 | adjust(index) |
1705 | } |
1706 | self.serialization.push_str(after_path) |
1707 | } |
1708 | |
1709 | /// Change this URL’s port number. |
1710 | /// |
1711 | /// Note that default port numbers are not reflected in the serialization. |
1712 | /// |
1713 | /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; |
1714 | /// do nothing and return `Err`. |
1715 | /// |
1716 | /// # Examples |
1717 | /// |
1718 | /// ``` |
1719 | /// use url::Url; |
1720 | /// # use std::error::Error; |
1721 | /// |
1722 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1723 | /// let mut url = Url::parse("ssh://example.net:2048/" )?; |
1724 | /// |
1725 | /// url.set_port(Some(4096)).map_err(|_| "cannot be base" )?; |
1726 | /// assert_eq!(url.as_str(), "ssh://example.net:4096/" ); |
1727 | /// |
1728 | /// url.set_port(None).map_err(|_| "cannot be base" )?; |
1729 | /// assert_eq!(url.as_str(), "ssh://example.net/" ); |
1730 | /// # Ok(()) |
1731 | /// # } |
1732 | /// # run().unwrap(); |
1733 | /// ``` |
1734 | /// |
1735 | /// Known default port numbers are not reflected: |
1736 | /// |
1737 | /// ```rust |
1738 | /// use url::Url; |
1739 | /// # use std::error::Error; |
1740 | /// |
1741 | /// # fn run() -> Result<(), Box<dyn Error>> { |
1742 | /// let mut url = Url::parse("https://example.org/" )?; |
1743 | /// |
1744 | /// url.set_port(Some(443)).map_err(|_| "cannot be base" )?; |
1745 | /// assert!(url.port().is_none()); |
1746 | /// # Ok(()) |
1747 | /// # } |
1748 | /// # run().unwrap(); |
1749 | /// ``` |
1750 | /// |
1751 | /// Cannot set port for cannot-be-a-base URLs: |
1752 | /// |
1753 | /// ``` |
1754 | /// use url::Url; |
1755 | /// # use url::ParseError; |
1756 | /// |
1757 | /// # fn run() -> Result<(), ParseError> { |
1758 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
1759 | /// |
1760 | /// let result = url.set_port(Some(80)); |
1761 | /// assert!(result.is_err()); |
1762 | /// |
1763 | /// let result = url.set_port(None); |
1764 | /// assert!(result.is_err()); |
1765 | /// # Ok(()) |
1766 | /// # } |
1767 | /// # run().unwrap(); |
1768 | /// ``` |
1769 | #[allow (clippy::result_unit_err)] |
1770 | pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> { |
1771 | // has_host implies !cannot_be_a_base |
1772 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
1773 | return Err(()); |
1774 | } |
1775 | if port.is_some() && port == parser::default_port(self.scheme()) { |
1776 | port = None |
1777 | } |
1778 | self.set_port_internal(port); |
1779 | Ok(()) |
1780 | } |
1781 | |
1782 | fn set_port_internal(&mut self, port: Option<u16>) { |
1783 | match (self.port, port) { |
1784 | (None, None) => {} |
1785 | (Some(_), None) => { |
1786 | self.serialization |
1787 | .drain(self.host_end as usize..self.path_start as usize); |
1788 | let offset = self.path_start - self.host_end; |
1789 | self.path_start = self.host_end; |
1790 | if let Some(ref mut index) = self.query_start { |
1791 | *index -= offset |
1792 | } |
1793 | if let Some(ref mut index) = self.fragment_start { |
1794 | *index -= offset |
1795 | } |
1796 | } |
1797 | (Some(old), Some(new)) if old == new => {} |
1798 | (_, Some(new)) => { |
1799 | let path_and_after = self.slice(self.path_start..).to_owned(); |
1800 | self.serialization.truncate(self.host_end as usize); |
1801 | write!(&mut self.serialization, ": {}" , new).unwrap(); |
1802 | let old_path_start = self.path_start; |
1803 | let new_path_start = to_u32(self.serialization.len()).unwrap(); |
1804 | self.path_start = new_path_start; |
1805 | let adjust = |index: &mut u32| { |
1806 | *index -= old_path_start; |
1807 | *index += new_path_start; |
1808 | }; |
1809 | if let Some(ref mut index) = self.query_start { |
1810 | adjust(index) |
1811 | } |
1812 | if let Some(ref mut index) = self.fragment_start { |
1813 | adjust(index) |
1814 | } |
1815 | self.serialization.push_str(&path_and_after); |
1816 | } |
1817 | } |
1818 | self.port = port; |
1819 | } |
1820 | |
1821 | /// Change this URL’s host. |
1822 | /// |
1823 | /// Removing the host (calling this with `None`) |
1824 | /// will also remove any username, password, and port number. |
1825 | /// |
1826 | /// # Examples |
1827 | /// |
1828 | /// Change host: |
1829 | /// |
1830 | /// ``` |
1831 | /// use url::Url; |
1832 | /// # use url::ParseError; |
1833 | /// |
1834 | /// # fn run() -> Result<(), ParseError> { |
1835 | /// let mut url = Url::parse("https://example.net" )?; |
1836 | /// let result = url.set_host(Some("rust-lang.org" )); |
1837 | /// assert!(result.is_ok()); |
1838 | /// assert_eq!(url.as_str(), "https://rust-lang.org/" ); |
1839 | /// # Ok(()) |
1840 | /// # } |
1841 | /// # run().unwrap(); |
1842 | /// ``` |
1843 | /// |
1844 | /// Remove host: |
1845 | /// |
1846 | /// ``` |
1847 | /// use url::Url; |
1848 | /// # use url::ParseError; |
1849 | /// |
1850 | /// # fn run() -> Result<(), ParseError> { |
1851 | /// let mut url = Url::parse("foo://example.net" )?; |
1852 | /// let result = url.set_host(None); |
1853 | /// assert!(result.is_ok()); |
1854 | /// assert_eq!(url.as_str(), "foo:/" ); |
1855 | /// # Ok(()) |
1856 | /// # } |
1857 | /// # run().unwrap(); |
1858 | /// ``` |
1859 | /// |
1860 | /// Cannot remove host for 'special' schemes (e.g. `http`): |
1861 | /// |
1862 | /// ``` |
1863 | /// use url::Url; |
1864 | /// # use url::ParseError; |
1865 | /// |
1866 | /// # fn run() -> Result<(), ParseError> { |
1867 | /// let mut url = Url::parse("https://example.net" )?; |
1868 | /// let result = url.set_host(None); |
1869 | /// assert!(result.is_err()); |
1870 | /// assert_eq!(url.as_str(), "https://example.net/" ); |
1871 | /// # Ok(()) |
1872 | /// # } |
1873 | /// # run().unwrap(); |
1874 | /// ``` |
1875 | /// |
1876 | /// Cannot change or remove host for cannot-be-a-base URLs: |
1877 | /// |
1878 | /// ``` |
1879 | /// use url::Url; |
1880 | /// # use url::ParseError; |
1881 | /// |
1882 | /// # fn run() -> Result<(), ParseError> { |
1883 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
1884 | /// |
1885 | /// let result = url.set_host(Some("rust-lang.org" )); |
1886 | /// assert!(result.is_err()); |
1887 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
1888 | /// |
1889 | /// let result = url.set_host(None); |
1890 | /// assert!(result.is_err()); |
1891 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
1892 | /// # Ok(()) |
1893 | /// # } |
1894 | /// # run().unwrap(); |
1895 | /// ``` |
1896 | /// |
1897 | /// # Errors |
1898 | /// |
1899 | /// If this URL is cannot-be-a-base or there is an error parsing the given `host`, |
1900 | /// a [`ParseError`] variant will be returned. |
1901 | /// |
1902 | /// [`ParseError`]: enum.ParseError.html |
1903 | pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { |
1904 | if self.cannot_be_a_base() { |
1905 | return Err(ParseError::SetHostOnCannotBeABaseUrl); |
1906 | } |
1907 | |
1908 | let scheme_type = SchemeType::from(self.scheme()); |
1909 | |
1910 | if let Some(host) = host { |
1911 | if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() { |
1912 | return Err(ParseError::EmptyHost); |
1913 | } |
1914 | let mut host_substr = host; |
1915 | // Otherwise, if c is U+003A (:) and the [] flag is unset, then |
1916 | if !host.starts_with('[' ) || !host.ends_with(']' ) { |
1917 | match host.find(':' ) { |
1918 | Some(0) => { |
1919 | // If buffer is the empty string, validation error, return failure. |
1920 | return Err(ParseError::InvalidDomainCharacter); |
1921 | } |
1922 | // Let host be the result of host parsing buffer |
1923 | Some(colon_index) => { |
1924 | host_substr = &host[..colon_index]; |
1925 | } |
1926 | None => {} |
1927 | } |
1928 | } |
1929 | if SchemeType::from(self.scheme()).is_special() { |
1930 | self.set_host_internal(Host::parse(host_substr)?, None); |
1931 | } else { |
1932 | self.set_host_internal(Host::parse_opaque(host_substr)?, None); |
1933 | } |
1934 | } else if self.has_host() { |
1935 | if scheme_type.is_special() && !scheme_type.is_file() { |
1936 | return Err(ParseError::EmptyHost); |
1937 | } else if self.serialization.len() == self.path_start as usize { |
1938 | self.serialization.push('/' ); |
1939 | } |
1940 | debug_assert!(self.byte_at(self.scheme_end) == b':' ); |
1941 | debug_assert!(self.byte_at(self.path_start) == b'/' ); |
1942 | |
1943 | let new_path_start = if scheme_type.is_file() { |
1944 | self.scheme_end + 3 |
1945 | } else { |
1946 | self.scheme_end + 1 |
1947 | }; |
1948 | |
1949 | self.serialization |
1950 | .drain(new_path_start as usize..self.path_start as usize); |
1951 | let offset = self.path_start - new_path_start; |
1952 | self.path_start = new_path_start; |
1953 | self.username_end = new_path_start; |
1954 | self.host_start = new_path_start; |
1955 | self.host_end = new_path_start; |
1956 | self.port = None; |
1957 | if let Some(ref mut index) = self.query_start { |
1958 | *index -= offset |
1959 | } |
1960 | if let Some(ref mut index) = self.fragment_start { |
1961 | *index -= offset |
1962 | } |
1963 | } |
1964 | Ok(()) |
1965 | } |
1966 | |
1967 | /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. |
1968 | fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) { |
1969 | let old_suffix_pos = if opt_new_port.is_some() { |
1970 | self.path_start |
1971 | } else { |
1972 | self.host_end |
1973 | }; |
1974 | let suffix = self.slice(old_suffix_pos..).to_owned(); |
1975 | self.serialization.truncate(self.host_start as usize); |
1976 | if !self.has_authority() { |
1977 | debug_assert!(self.slice(self.scheme_end..self.host_start) == ":" ); |
1978 | debug_assert!(self.username_end == self.host_start); |
1979 | self.serialization.push('/' ); |
1980 | self.serialization.push('/' ); |
1981 | self.username_end += 2; |
1982 | self.host_start += 2; |
1983 | } |
1984 | write!(&mut self.serialization, " {}" , host).unwrap(); |
1985 | self.host_end = to_u32(self.serialization.len()).unwrap(); |
1986 | self.host = host.into(); |
1987 | |
1988 | if let Some(new_port) = opt_new_port { |
1989 | self.port = new_port; |
1990 | if let Some(port) = new_port { |
1991 | write!(&mut self.serialization, ": {}" , port).unwrap(); |
1992 | } |
1993 | } |
1994 | let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); |
1995 | self.serialization.push_str(&suffix); |
1996 | |
1997 | let adjust = |index: &mut u32| { |
1998 | *index -= old_suffix_pos; |
1999 | *index += new_suffix_pos; |
2000 | }; |
2001 | adjust(&mut self.path_start); |
2002 | if let Some(ref mut index) = self.query_start { |
2003 | adjust(index) |
2004 | } |
2005 | if let Some(ref mut index) = self.fragment_start { |
2006 | adjust(index) |
2007 | } |
2008 | } |
2009 | |
2010 | /// Change this URL’s host to the given IP address. |
2011 | /// |
2012 | /// If this URL is cannot-be-a-base, do nothing and return `Err`. |
2013 | /// |
2014 | /// Compared to `Url::set_host`, this skips the host parser. |
2015 | /// |
2016 | /// # Examples |
2017 | /// |
2018 | /// ```rust |
2019 | /// use url::{Url, ParseError}; |
2020 | /// |
2021 | /// # fn run() -> Result<(), ParseError> { |
2022 | /// let mut url = Url::parse("http://example.com" )?; |
2023 | /// url.set_ip_host("127.0.0.1" .parse().unwrap()); |
2024 | /// assert_eq!(url.host_str(), Some("127.0.0.1" )); |
2025 | /// assert_eq!(url.as_str(), "http://127.0.0.1/" ); |
2026 | /// # Ok(()) |
2027 | /// # } |
2028 | /// # run().unwrap(); |
2029 | /// ``` |
2030 | /// |
2031 | /// Cannot change URL's from mailto(cannot-be-base) to ip: |
2032 | /// |
2033 | /// ```rust |
2034 | /// use url::{Url, ParseError}; |
2035 | /// |
2036 | /// # fn run() -> Result<(), ParseError> { |
2037 | /// let mut url = Url::parse("mailto:rms@example.com" )?; |
2038 | /// let result = url.set_ip_host("127.0.0.1" .parse().unwrap()); |
2039 | /// |
2040 | /// assert_eq!(url.as_str(), "mailto:rms@example.com" ); |
2041 | /// assert!(result.is_err()); |
2042 | /// # Ok(()) |
2043 | /// # } |
2044 | /// # run().unwrap(); |
2045 | /// ``` |
2046 | /// |
2047 | #[allow (clippy::result_unit_err)] |
2048 | pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { |
2049 | if self.cannot_be_a_base() { |
2050 | return Err(()); |
2051 | } |
2052 | |
2053 | let address = match address { |
2054 | IpAddr::V4(address) => Host::Ipv4(address), |
2055 | IpAddr::V6(address) => Host::Ipv6(address), |
2056 | }; |
2057 | self.set_host_internal(address, None); |
2058 | Ok(()) |
2059 | } |
2060 | |
2061 | /// Change this URL’s password. |
2062 | /// |
2063 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. |
2064 | /// |
2065 | /// # Examples |
2066 | /// |
2067 | /// ```rust |
2068 | /// use url::{Url, ParseError}; |
2069 | /// |
2070 | /// # fn run() -> Result<(), ParseError> { |
2071 | /// let mut url = Url::parse("mailto:rmz@example.com" )?; |
2072 | /// let result = url.set_password(Some("secret_password" )); |
2073 | /// assert!(result.is_err()); |
2074 | /// |
2075 | /// let mut url = Url::parse("ftp://user1:secret1@example.com" )?; |
2076 | /// let result = url.set_password(Some("secret_password" )); |
2077 | /// assert_eq!(url.password(), Some("secret_password" )); |
2078 | /// |
2079 | /// let mut url = Url::parse("ftp://user2:@example.com" )?; |
2080 | /// let result = url.set_password(Some("secret2" )); |
2081 | /// assert!(result.is_ok()); |
2082 | /// assert_eq!(url.password(), Some("secret2" )); |
2083 | /// # Ok(()) |
2084 | /// # } |
2085 | /// # run().unwrap(); |
2086 | /// ``` |
2087 | #[allow (clippy::result_unit_err)] |
2088 | pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { |
2089 | // has_host implies !cannot_be_a_base |
2090 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
2091 | return Err(()); |
2092 | } |
2093 | let password = password.unwrap_or_default(); |
2094 | if !password.is_empty() { |
2095 | let host_and_after = self.slice(self.host_start..).to_owned(); |
2096 | self.serialization.truncate(self.username_end as usize); |
2097 | self.serialization.push(':' ); |
2098 | self.serialization |
2099 | .extend(utf8_percent_encode(password, USERINFO)); |
2100 | self.serialization.push('@' ); |
2101 | |
2102 | let old_host_start = self.host_start; |
2103 | let new_host_start = to_u32(self.serialization.len()).unwrap(); |
2104 | let adjust = |index: &mut u32| { |
2105 | *index -= old_host_start; |
2106 | *index += new_host_start; |
2107 | }; |
2108 | self.host_start = new_host_start; |
2109 | adjust(&mut self.host_end); |
2110 | adjust(&mut self.path_start); |
2111 | if let Some(ref mut index) = self.query_start { |
2112 | adjust(index) |
2113 | } |
2114 | if let Some(ref mut index) = self.fragment_start { |
2115 | adjust(index) |
2116 | } |
2117 | |
2118 | self.serialization.push_str(&host_and_after); |
2119 | } else if self.byte_at(self.username_end) == b':' { |
2120 | // If there is a password to remove |
2121 | let has_username_or_password = self.byte_at(self.host_start - 1) == b'@' ; |
2122 | debug_assert!(has_username_or_password); |
2123 | let username_start = self.scheme_end + 3; |
2124 | let empty_username = username_start == self.username_end; |
2125 | let start = self.username_end; // Remove the ':' |
2126 | let end = if empty_username { |
2127 | self.host_start // Remove the '@' as well |
2128 | } else { |
2129 | self.host_start - 1 // Keep the '@' to separate the username from the host |
2130 | }; |
2131 | self.serialization.drain(start as usize..end as usize); |
2132 | let offset = end - start; |
2133 | self.host_start -= offset; |
2134 | self.host_end -= offset; |
2135 | self.path_start -= offset; |
2136 | if let Some(ref mut index) = self.query_start { |
2137 | *index -= offset |
2138 | } |
2139 | if let Some(ref mut index) = self.fragment_start { |
2140 | *index -= offset |
2141 | } |
2142 | } |
2143 | Ok(()) |
2144 | } |
2145 | |
2146 | /// Change this URL’s username. |
2147 | /// |
2148 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. |
2149 | /// # Examples |
2150 | /// |
2151 | /// Cannot setup username from mailto(cannot-be-base) |
2152 | /// |
2153 | /// ```rust |
2154 | /// use url::{Url, ParseError}; |
2155 | /// |
2156 | /// # fn run() -> Result<(), ParseError> { |
2157 | /// let mut url = Url::parse("mailto:rmz@example.com" )?; |
2158 | /// let result = url.set_username("user1" ); |
2159 | /// assert_eq!(url.as_str(), "mailto:rmz@example.com" ); |
2160 | /// assert!(result.is_err()); |
2161 | /// # Ok(()) |
2162 | /// # } |
2163 | /// # run().unwrap(); |
2164 | /// ``` |
2165 | /// |
2166 | /// Setup username to user1 |
2167 | /// |
2168 | /// ```rust |
2169 | /// use url::{Url, ParseError}; |
2170 | /// |
2171 | /// # fn run() -> Result<(), ParseError> { |
2172 | /// let mut url = Url::parse("ftp://:secre1@example.com/" )?; |
2173 | /// let result = url.set_username("user1" ); |
2174 | /// assert!(result.is_ok()); |
2175 | /// assert_eq!(url.username(), "user1" ); |
2176 | /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/" ); |
2177 | /// # Ok(()) |
2178 | /// # } |
2179 | /// # run().unwrap(); |
2180 | /// ``` |
2181 | #[allow (clippy::result_unit_err)] |
2182 | pub fn set_username(&mut self, username: &str) -> Result<(), ()> { |
2183 | // has_host implies !cannot_be_a_base |
2184 | if !self.has_host() || self.host() == Some(Host::Domain("" )) || self.scheme() == "file" { |
2185 | return Err(()); |
2186 | } |
2187 | let username_start = self.scheme_end + 3; |
2188 | debug_assert!(self.slice(self.scheme_end..username_start) == "://" ); |
2189 | if self.slice(username_start..self.username_end) == username { |
2190 | return Ok(()); |
2191 | } |
2192 | let after_username = self.slice(self.username_end..).to_owned(); |
2193 | self.serialization.truncate(username_start as usize); |
2194 | self.serialization |
2195 | .extend(utf8_percent_encode(username, USERINFO)); |
2196 | |
2197 | let mut removed_bytes = self.username_end; |
2198 | self.username_end = to_u32(self.serialization.len()).unwrap(); |
2199 | let mut added_bytes = self.username_end; |
2200 | |
2201 | let new_username_is_empty = self.username_end == username_start; |
2202 | match (new_username_is_empty, after_username.chars().next()) { |
2203 | (true, Some('@' )) => { |
2204 | removed_bytes += 1; |
2205 | self.serialization.push_str(&after_username[1..]); |
2206 | } |
2207 | (false, Some('@' )) | (_, Some(':' )) | (true, _) => { |
2208 | self.serialization.push_str(&after_username); |
2209 | } |
2210 | (false, _) => { |
2211 | added_bytes += 1; |
2212 | self.serialization.push('@' ); |
2213 | self.serialization.push_str(&after_username); |
2214 | } |
2215 | } |
2216 | |
2217 | let adjust = |index: &mut u32| { |
2218 | *index -= removed_bytes; |
2219 | *index += added_bytes; |
2220 | }; |
2221 | adjust(&mut self.host_start); |
2222 | adjust(&mut self.host_end); |
2223 | adjust(&mut self.path_start); |
2224 | if let Some(ref mut index) = self.query_start { |
2225 | adjust(index) |
2226 | } |
2227 | if let Some(ref mut index) = self.fragment_start { |
2228 | adjust(index) |
2229 | } |
2230 | Ok(()) |
2231 | } |
2232 | |
2233 | /// Change this URL’s scheme. |
2234 | /// |
2235 | /// Do nothing and return `Err` under the following circumstances: |
2236 | /// |
2237 | /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+` |
2238 | /// * If this URL is cannot-be-a-base and the new scheme is one of |
2239 | /// `http`, `https`, `ws`, `wss` or `ftp` |
2240 | /// * If either the old or new scheme is `http`, `https`, `ws`, |
2241 | /// `wss` or `ftp` and the other is not one of these |
2242 | /// * If the new scheme is `file` and this URL includes credentials |
2243 | /// or has a non-null port |
2244 | /// * If this URL's scheme is `file` and its host is empty or null |
2245 | /// |
2246 | /// See also [the URL specification's section on legal scheme state |
2247 | /// overrides](https://url.spec.whatwg.org/#scheme-state). |
2248 | /// |
2249 | /// # Examples |
2250 | /// |
2251 | /// Change the URL’s scheme from `https` to `http`: |
2252 | /// |
2253 | /// ``` |
2254 | /// use url::Url; |
2255 | /// # use url::ParseError; |
2256 | /// |
2257 | /// # fn run() -> Result<(), ParseError> { |
2258 | /// let mut url = Url::parse("https://example.net" )?; |
2259 | /// let result = url.set_scheme("http" ); |
2260 | /// assert_eq!(url.as_str(), "http://example.net/" ); |
2261 | /// assert!(result.is_ok()); |
2262 | /// # Ok(()) |
2263 | /// # } |
2264 | /// # run().unwrap(); |
2265 | /// ``` |
2266 | /// Change the URL’s scheme from `foo` to `bar`: |
2267 | /// |
2268 | /// ``` |
2269 | /// use url::Url; |
2270 | /// # use url::ParseError; |
2271 | /// |
2272 | /// # fn run() -> Result<(), ParseError> { |
2273 | /// let mut url = Url::parse("foo://example.net" )?; |
2274 | /// let result = url.set_scheme("bar" ); |
2275 | /// assert_eq!(url.as_str(), "bar://example.net" ); |
2276 | /// assert!(result.is_ok()); |
2277 | /// # Ok(()) |
2278 | /// # } |
2279 | /// # run().unwrap(); |
2280 | /// ``` |
2281 | /// |
2282 | /// Cannot change URL’s scheme from `https` to `foõ`: |
2283 | /// |
2284 | /// ``` |
2285 | /// use url::Url; |
2286 | /// # use url::ParseError; |
2287 | /// |
2288 | /// # fn run() -> Result<(), ParseError> { |
2289 | /// let mut url = Url::parse("https://example.net" )?; |
2290 | /// let result = url.set_scheme("foõ" ); |
2291 | /// assert_eq!(url.as_str(), "https://example.net/" ); |
2292 | /// assert!(result.is_err()); |
2293 | /// # Ok(()) |
2294 | /// # } |
2295 | /// # run().unwrap(); |
2296 | /// ``` |
2297 | /// |
2298 | /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`: |
2299 | /// |
2300 | /// ``` |
2301 | /// use url::Url; |
2302 | /// # use url::ParseError; |
2303 | /// |
2304 | /// # fn run() -> Result<(), ParseError> { |
2305 | /// let mut url = Url::parse("mailto:rms@example.net" )?; |
2306 | /// let result = url.set_scheme("https" ); |
2307 | /// assert_eq!(url.as_str(), "mailto:rms@example.net" ); |
2308 | /// assert!(result.is_err()); |
2309 | /// # Ok(()) |
2310 | /// # } |
2311 | /// # run().unwrap(); |
2312 | /// ``` |
2313 | /// Cannot change the URL’s scheme from `foo` to `https`: |
2314 | /// |
2315 | /// ``` |
2316 | /// use url::Url; |
2317 | /// # use url::ParseError; |
2318 | /// |
2319 | /// # fn run() -> Result<(), ParseError> { |
2320 | /// let mut url = Url::parse("foo://example.net" )?; |
2321 | /// let result = url.set_scheme("https" ); |
2322 | /// assert_eq!(url.as_str(), "foo://example.net" ); |
2323 | /// assert!(result.is_err()); |
2324 | /// # Ok(()) |
2325 | /// # } |
2326 | /// # run().unwrap(); |
2327 | /// ``` |
2328 | /// Cannot change the URL’s scheme from `http` to `foo`: |
2329 | /// |
2330 | /// ``` |
2331 | /// use url::Url; |
2332 | /// # use url::ParseError; |
2333 | /// |
2334 | /// # fn run() -> Result<(), ParseError> { |
2335 | /// let mut url = Url::parse("http://example.net" )?; |
2336 | /// let result = url.set_scheme("foo" ); |
2337 | /// assert_eq!(url.as_str(), "http://example.net/" ); |
2338 | /// assert!(result.is_err()); |
2339 | /// # Ok(()) |
2340 | /// # } |
2341 | /// # run().unwrap(); |
2342 | /// ``` |
2343 | #[allow (clippy::result_unit_err, clippy::suspicious_operation_groupings)] |
2344 | pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { |
2345 | let mut parser = Parser::for_setter(String::new()); |
2346 | let remaining = parser.parse_scheme(parser::Input::new(scheme))?; |
2347 | let new_scheme_type = SchemeType::from(&parser.serialization); |
2348 | let old_scheme_type = SchemeType::from(self.scheme()); |
2349 | // If url’s scheme is a special scheme and buffer is not a special scheme, then return. |
2350 | if (new_scheme_type.is_special() && !old_scheme_type.is_special()) || |
2351 | // If url’s scheme is not a special scheme and buffer is a special scheme, then return. |
2352 | (!new_scheme_type.is_special() && old_scheme_type.is_special()) || |
2353 | // If url includes credentials or has a non-null port, and buffer is "file", then return. |
2354 | // If url’s scheme is "file" and its host is an empty host or null, then return. |
2355 | (new_scheme_type.is_file() && self.has_authority()) |
2356 | { |
2357 | return Err(()); |
2358 | } |
2359 | |
2360 | if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { |
2361 | return Err(()); |
2362 | } |
2363 | let old_scheme_end = self.scheme_end; |
2364 | let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); |
2365 | let adjust = |index: &mut u32| { |
2366 | *index -= old_scheme_end; |
2367 | *index += new_scheme_end; |
2368 | }; |
2369 | |
2370 | self.scheme_end = new_scheme_end; |
2371 | adjust(&mut self.username_end); |
2372 | adjust(&mut self.host_start); |
2373 | adjust(&mut self.host_end); |
2374 | adjust(&mut self.path_start); |
2375 | if let Some(ref mut index) = self.query_start { |
2376 | adjust(index) |
2377 | } |
2378 | if let Some(ref mut index) = self.fragment_start { |
2379 | adjust(index) |
2380 | } |
2381 | |
2382 | parser.serialization.push_str(self.slice(old_scheme_end..)); |
2383 | self.serialization = parser.serialization; |
2384 | |
2385 | // Update the port so it can be removed |
2386 | // If it is the scheme's default |
2387 | // we don't mind it silently failing |
2388 | // if there was no port in the first place |
2389 | let previous_port = self.port(); |
2390 | let _ = self.set_port(previous_port); |
2391 | |
2392 | Ok(()) |
2393 | } |
2394 | |
2395 | /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. |
2396 | /// |
2397 | /// This returns `Err` if the given path is not absolute or, |
2398 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). |
2399 | /// |
2400 | /// # Examples |
2401 | /// |
2402 | /// On Unix-like platforms: |
2403 | /// |
2404 | /// ``` |
2405 | /// # if cfg!(unix) { |
2406 | /// use url::Url; |
2407 | /// |
2408 | /// # fn run() -> Result<(), ()> { |
2409 | /// let url = Url::from_file_path("/tmp/foo.txt" )?; |
2410 | /// assert_eq!(url.as_str(), "file:///tmp/foo.txt" ); |
2411 | /// |
2412 | /// let url = Url::from_file_path("../foo.txt" ); |
2413 | /// assert!(url.is_err()); |
2414 | /// |
2415 | /// let url = Url::from_file_path("https://google.com/" ); |
2416 | /// assert!(url.is_err()); |
2417 | /// # Ok(()) |
2418 | /// # } |
2419 | /// # run().unwrap(); |
2420 | /// # } |
2421 | /// ``` |
2422 | #[cfg (any(unix, windows, target_os = "redox" , target_os = "wasi" ))] |
2423 | #[allow (clippy::result_unit_err)] |
2424 | pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { |
2425 | let mut serialization = "file://" .to_owned(); |
2426 | let host_start = serialization.len() as u32; |
2427 | let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?; |
2428 | Ok(Url { |
2429 | serialization, |
2430 | scheme_end: "file" .len() as u32, |
2431 | username_end: host_start, |
2432 | host_start, |
2433 | host_end, |
2434 | host, |
2435 | port: None, |
2436 | path_start: host_end, |
2437 | query_start: None, |
2438 | fragment_start: None, |
2439 | }) |
2440 | } |
2441 | |
2442 | /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme. |
2443 | /// |
2444 | /// This returns `Err` if the given path is not absolute or, |
2445 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). |
2446 | /// |
2447 | /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash |
2448 | /// so that the entire path is considered when using this URL as a base URL. |
2449 | /// |
2450 | /// For example: |
2451 | /// |
2452 | /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))` |
2453 | /// as the base URL is `file:///var/www/index.html` |
2454 | /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))` |
2455 | /// as the base URL is `file:///var/index.html`, which might not be what was intended. |
2456 | /// |
2457 | /// Note that `std::path` does not consider trailing slashes significant |
2458 | /// and usually does not include them (e.g. in `Path::parent()`). |
2459 | #[cfg (any(unix, windows, target_os = "redox" , target_os = "wasi" ))] |
2460 | #[allow (clippy::result_unit_err)] |
2461 | pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { |
2462 | let mut url = Url::from_file_path(path)?; |
2463 | if !url.serialization.ends_with('/' ) { |
2464 | url.serialization.push('/' ) |
2465 | } |
2466 | Ok(url) |
2467 | } |
2468 | |
2469 | /// Serialize with Serde using the internal representation of the `Url` struct. |
2470 | /// |
2471 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking |
2472 | /// for speed, compared to the `Deserialize` trait impl. |
2473 | /// |
2474 | /// This method is only available if the `serde` Cargo feature is enabled. |
2475 | #[cfg (feature = "serde" )] |
2476 | #[deny (unused)] |
2477 | pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
2478 | where |
2479 | S: serde::Serializer, |
2480 | { |
2481 | use serde::Serialize; |
2482 | // Destructuring first lets us ensure that adding or removing fields forces this method |
2483 | // to be updated |
2484 | let Url { |
2485 | ref serialization, |
2486 | ref scheme_end, |
2487 | ref username_end, |
2488 | ref host_start, |
2489 | ref host_end, |
2490 | ref host, |
2491 | ref port, |
2492 | ref path_start, |
2493 | ref query_start, |
2494 | ref fragment_start, |
2495 | } = *self; |
2496 | ( |
2497 | serialization, |
2498 | scheme_end, |
2499 | username_end, |
2500 | host_start, |
2501 | host_end, |
2502 | host, |
2503 | port, |
2504 | path_start, |
2505 | query_start, |
2506 | fragment_start, |
2507 | ) |
2508 | .serialize(serializer) |
2509 | } |
2510 | |
2511 | /// Serialize with Serde using the internal representation of the `Url` struct. |
2512 | /// |
2513 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking |
2514 | /// for speed, compared to the `Deserialize` trait impl. |
2515 | /// |
2516 | /// This method is only available if the `serde` Cargo feature is enabled. |
2517 | #[cfg (feature = "serde" )] |
2518 | #[deny (unused)] |
2519 | pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> |
2520 | where |
2521 | D: serde::Deserializer<'de>, |
2522 | { |
2523 | use serde::de::{Deserialize, Error, Unexpected}; |
2524 | let ( |
2525 | serialization, |
2526 | scheme_end, |
2527 | username_end, |
2528 | host_start, |
2529 | host_end, |
2530 | host, |
2531 | port, |
2532 | path_start, |
2533 | query_start, |
2534 | fragment_start, |
2535 | ) = Deserialize::deserialize(deserializer)?; |
2536 | let url = Url { |
2537 | serialization, |
2538 | scheme_end, |
2539 | username_end, |
2540 | host_start, |
2541 | host_end, |
2542 | host, |
2543 | port, |
2544 | path_start, |
2545 | query_start, |
2546 | fragment_start, |
2547 | }; |
2548 | if cfg!(debug_assertions) { |
2549 | url.check_invariants().map_err(|reason| { |
2550 | let reason: &str = &reason; |
2551 | Error::invalid_value(Unexpected::Other("value" ), &reason) |
2552 | })? |
2553 | } |
2554 | Ok(url) |
2555 | } |
2556 | |
2557 | /// Assuming the URL is in the `file` scheme or similar, |
2558 | /// convert its path to an absolute `std::path::Path`. |
2559 | /// |
2560 | /// **Note:** This does not actually check the URL’s `scheme`, |
2561 | /// and may give nonsensical results for other schemes. |
2562 | /// It is the user’s responsibility to check the URL’s scheme before calling this. |
2563 | /// |
2564 | /// ``` |
2565 | /// # use url::Url; |
2566 | /// # let url = Url::parse("file:///etc/passwd" ).unwrap(); |
2567 | /// let path = url.to_file_path(); |
2568 | /// ``` |
2569 | /// |
2570 | /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where |
2571 | /// `file:` URLs may have a non-local host), |
2572 | /// or if `Path::new_opt()` returns `None`. |
2573 | /// (That is, if the percent-decoded path contains a NUL byte or, |
2574 | /// for a Windows path, is not UTF-8.) |
2575 | #[inline ] |
2576 | #[cfg (any(unix, windows, target_os = "redox" , target_os = "wasi" ))] |
2577 | #[allow (clippy::result_unit_err)] |
2578 | pub fn to_file_path(&self) -> Result<PathBuf, ()> { |
2579 | if let Some(segments) = self.path_segments() { |
2580 | let host = match self.host() { |
2581 | None | Some(Host::Domain("localhost" )) => None, |
2582 | Some(_) if cfg!(windows) && self.scheme() == "file" => { |
2583 | Some(&self.serialization[self.host_start as usize..self.host_end as usize]) |
2584 | } |
2585 | _ => return Err(()), |
2586 | }; |
2587 | |
2588 | return file_url_segments_to_pathbuf(host, segments); |
2589 | } |
2590 | Err(()) |
2591 | } |
2592 | |
2593 | // Private helper methods: |
2594 | |
2595 | #[inline ] |
2596 | fn slice<R>(&self, range: R) -> &str |
2597 | where |
2598 | R: RangeArg, |
2599 | { |
2600 | range.slice_of(&self.serialization) |
2601 | } |
2602 | |
2603 | #[inline ] |
2604 | fn byte_at(&self, i: u32) -> u8 { |
2605 | self.serialization.as_bytes()[i as usize] |
2606 | } |
2607 | } |
2608 | |
2609 | /// Parse a string as an URL, without a base URL or encoding override. |
2610 | impl str::FromStr for Url { |
2611 | type Err = ParseError; |
2612 | |
2613 | #[inline ] |
2614 | fn from_str(input: &str) -> Result<Url, crate::ParseError> { |
2615 | Url::parse(input) |
2616 | } |
2617 | } |
2618 | |
2619 | impl<'a> TryFrom<&'a str> for Url { |
2620 | type Error = ParseError; |
2621 | |
2622 | fn try_from(s: &'a str) -> Result<Self, Self::Error> { |
2623 | Url::parse(input:s) |
2624 | } |
2625 | } |
2626 | |
2627 | /// Display the serialization of this URL. |
2628 | impl fmt::Display for Url { |
2629 | #[inline ] |
2630 | fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { |
2631 | fmt::Display::fmt(&self.serialization, f:formatter) |
2632 | } |
2633 | } |
2634 | |
2635 | /// String conversion. |
2636 | impl From<Url> for String { |
2637 | fn from(value: Url) -> String { |
2638 | value.serialization |
2639 | } |
2640 | } |
2641 | |
2642 | /// Debug the serialization of this URL. |
2643 | impl fmt::Debug for Url { |
2644 | #[inline ] |
2645 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
2646 | formatter&mut DebugStruct<'_, '_> |
2647 | .debug_struct("Url" ) |
2648 | .field("scheme" , &self.scheme()) |
2649 | .field("cannot_be_a_base" , &self.cannot_be_a_base()) |
2650 | .field("username" , &self.username()) |
2651 | .field("password" , &self.password()) |
2652 | .field("host" , &self.host()) |
2653 | .field("port" , &self.port()) |
2654 | .field("path" , &self.path()) |
2655 | .field("query" , &self.query()) |
2656 | .field(name:"fragment" , &self.fragment()) |
2657 | .finish() |
2658 | } |
2659 | } |
2660 | |
2661 | /// URLs compare like their serialization. |
2662 | impl Eq for Url {} |
2663 | |
2664 | /// URLs compare like their serialization. |
2665 | impl PartialEq for Url { |
2666 | #[inline ] |
2667 | fn eq(&self, other: &Self) -> bool { |
2668 | self.serialization == other.serialization |
2669 | } |
2670 | } |
2671 | |
2672 | /// URLs compare like their serialization. |
2673 | impl Ord for Url { |
2674 | #[inline ] |
2675 | fn cmp(&self, other: &Self) -> cmp::Ordering { |
2676 | self.serialization.cmp(&other.serialization) |
2677 | } |
2678 | } |
2679 | |
2680 | /// URLs compare like their serialization. |
2681 | impl PartialOrd for Url { |
2682 | #[inline ] |
2683 | fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { |
2684 | self.serialization.partial_cmp(&other.serialization) |
2685 | } |
2686 | } |
2687 | |
2688 | /// URLs hash like their serialization. |
2689 | impl hash::Hash for Url { |
2690 | #[inline ] |
2691 | fn hash<H>(&self, state: &mut H) |
2692 | where |
2693 | H: hash::Hasher, |
2694 | { |
2695 | hash::Hash::hash(&self.serialization, state) |
2696 | } |
2697 | } |
2698 | |
2699 | /// Return the serialization of this URL. |
2700 | impl AsRef<str> for Url { |
2701 | #[inline ] |
2702 | fn as_ref(&self) -> &str { |
2703 | &self.serialization |
2704 | } |
2705 | } |
2706 | |
2707 | trait RangeArg { |
2708 | fn slice_of<'a>(&self, s: &'a str) -> &'a str; |
2709 | } |
2710 | |
2711 | impl RangeArg for Range<u32> { |
2712 | #[inline ] |
2713 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2714 | &s[self.start as usize..self.end as usize] |
2715 | } |
2716 | } |
2717 | |
2718 | impl RangeArg for RangeFrom<u32> { |
2719 | #[inline ] |
2720 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2721 | &s[self.start as usize..] |
2722 | } |
2723 | } |
2724 | |
2725 | impl RangeArg for RangeTo<u32> { |
2726 | #[inline ] |
2727 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { |
2728 | &s[..self.end as usize] |
2729 | } |
2730 | } |
2731 | |
2732 | /// Serializes this URL into a `serde` stream. |
2733 | /// |
2734 | /// This implementation is only available if the `serde` Cargo feature is enabled. |
2735 | #[cfg (feature = "serde" )] |
2736 | impl serde::Serialize for Url { |
2737 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
2738 | where |
2739 | S: serde::Serializer, |
2740 | { |
2741 | serializer.serialize_str(self.as_str()) |
2742 | } |
2743 | } |
2744 | |
2745 | /// Deserializes this URL from a `serde` stream. |
2746 | /// |
2747 | /// This implementation is only available if the `serde` Cargo feature is enabled. |
2748 | #[cfg (feature = "serde" )] |
2749 | impl<'de> serde::Deserialize<'de> for Url { |
2750 | fn deserialize<D>(deserializer: D) -> Result<Url, D::Error> |
2751 | where |
2752 | D: serde::Deserializer<'de>, |
2753 | { |
2754 | use serde::de::{Error, Unexpected, Visitor}; |
2755 | |
2756 | struct UrlVisitor; |
2757 | |
2758 | impl<'de> Visitor<'de> for UrlVisitor { |
2759 | type Value = Url; |
2760 | |
2761 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
2762 | formatter.write_str("a string representing an URL" ) |
2763 | } |
2764 | |
2765 | fn visit_str<E>(self, s: &str) -> Result<Self::Value, E> |
2766 | where |
2767 | E: Error, |
2768 | { |
2769 | Url::parse(s).map_err(|err| { |
2770 | let err_s = format!(" {}" , err); |
2771 | Error::invalid_value(Unexpected::Str(s), &err_s.as_str()) |
2772 | }) |
2773 | } |
2774 | } |
2775 | |
2776 | deserializer.deserialize_str(UrlVisitor) |
2777 | } |
2778 | } |
2779 | |
2780 | #[cfg (any(unix, target_os = "redox" , target_os = "wasi" ))] |
2781 | fn path_to_file_url_segments( |
2782 | path: &Path, |
2783 | serialization: &mut String, |
2784 | ) -> Result<(u32, HostInternal), ()> { |
2785 | #[cfg (any(unix, target_os = "redox" ))] |
2786 | use std::os::unix::prelude::OsStrExt; |
2787 | #[cfg (target_os = "wasi" )] |
2788 | use std::os::wasi::prelude::OsStrExt; |
2789 | if !path.is_absolute() { |
2790 | return Err(()); |
2791 | } |
2792 | let host_end = to_u32(serialization.len()).unwrap(); |
2793 | let mut empty = true; |
2794 | // skip the root component |
2795 | for component in path.components().skip(1) { |
2796 | empty = false; |
2797 | serialization.push('/' ); |
2798 | serialization.extend(percent_encode( |
2799 | component.as_os_str().as_bytes(), |
2800 | PATH_SEGMENT, |
2801 | )); |
2802 | } |
2803 | if empty { |
2804 | // An URL’s path must not be empty. |
2805 | serialization.push('/' ); |
2806 | } |
2807 | Ok((host_end, HostInternal::None)) |
2808 | } |
2809 | |
2810 | #[cfg (windows)] |
2811 | fn path_to_file_url_segments( |
2812 | path: &Path, |
2813 | serialization: &mut String, |
2814 | ) -> Result<(u32, HostInternal), ()> { |
2815 | path_to_file_url_segments_windows(path, serialization) |
2816 | } |
2817 | |
2818 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 |
2819 | #[cfg_attr (not(windows), allow(dead_code))] |
2820 | fn path_to_file_url_segments_windows( |
2821 | path: &Path, |
2822 | serialization: &mut String, |
2823 | ) -> Result<(u32, HostInternal), ()> { |
2824 | use std::path::{Component, Prefix}; |
2825 | if !path.is_absolute() { |
2826 | return Err(()); |
2827 | } |
2828 | let mut components = path.components(); |
2829 | |
2830 | let host_start = serialization.len() + 1; |
2831 | let host_end; |
2832 | let host_internal; |
2833 | |
2834 | match components.next() { |
2835 | Some(Component::Prefix(ref p)) => match p.kind() { |
2836 | Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { |
2837 | host_end = to_u32(serialization.len()).unwrap(); |
2838 | host_internal = HostInternal::None; |
2839 | serialization.push('/' ); |
2840 | serialization.push(letter as char); |
2841 | serialization.push(':' ); |
2842 | } |
2843 | Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { |
2844 | let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; |
2845 | write!(serialization, " {}" , host).unwrap(); |
2846 | host_end = to_u32(serialization.len()).unwrap(); |
2847 | host_internal = host.into(); |
2848 | serialization.push('/' ); |
2849 | let share = share.to_str().ok_or(())?; |
2850 | serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT)); |
2851 | } |
2852 | _ => return Err(()), |
2853 | }, |
2854 | _ => return Err(()), |
2855 | } |
2856 | |
2857 | let mut path_only_has_prefix = true; |
2858 | for component in components { |
2859 | if component == Component::RootDir { |
2860 | continue; |
2861 | } |
2862 | |
2863 | path_only_has_prefix = false; |
2864 | // FIXME: somehow work with non-unicode? |
2865 | let component = component.as_os_str().to_str().ok_or(())?; |
2866 | |
2867 | serialization.push('/' ); |
2868 | serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); |
2869 | } |
2870 | |
2871 | // A windows drive letter must end with a slash. |
2872 | if serialization.len() > host_start |
2873 | && parser::is_windows_drive_letter(&serialization[host_start..]) |
2874 | && path_only_has_prefix |
2875 | { |
2876 | serialization.push('/' ); |
2877 | } |
2878 | |
2879 | Ok((host_end, host_internal)) |
2880 | } |
2881 | |
2882 | #[cfg (any(unix, target_os = "redox" , target_os = "wasi" ))] |
2883 | fn file_url_segments_to_pathbuf( |
2884 | host: Option<&str>, |
2885 | segments: str::Split<'_, char>, |
2886 | ) -> Result<PathBuf, ()> { |
2887 | use std::ffi::OsStr; |
2888 | #[cfg (any(unix, target_os = "redox" ))] |
2889 | use std::os::unix::prelude::OsStrExt; |
2890 | #[cfg (target_os = "wasi" )] |
2891 | use std::os::wasi::prelude::OsStrExt; |
2892 | |
2893 | if host.is_some() { |
2894 | return Err(()); |
2895 | } |
2896 | |
2897 | let mut bytes = if cfg!(target_os = "redox" ) { |
2898 | b"file:" .to_vec() |
2899 | } else { |
2900 | Vec::new() |
2901 | }; |
2902 | |
2903 | for segment in segments { |
2904 | bytes.push(b'/' ); |
2905 | bytes.extend(percent_decode(segment.as_bytes())); |
2906 | } |
2907 | |
2908 | // A windows drive letter must end with a slash. |
2909 | if bytes.len() > 2 |
2910 | && bytes[bytes.len() - 2].is_ascii_alphabetic() |
2911 | && matches!(bytes[bytes.len() - 1], b':' | b'|' ) |
2912 | { |
2913 | bytes.push(b'/' ); |
2914 | } |
2915 | |
2916 | let os_str = OsStr::from_bytes(&bytes); |
2917 | let path = PathBuf::from(os_str); |
2918 | |
2919 | debug_assert!( |
2920 | path.is_absolute(), |
2921 | "to_file_path() failed to produce an absolute Path" |
2922 | ); |
2923 | |
2924 | Ok(path) |
2925 | } |
2926 | |
2927 | #[cfg (windows)] |
2928 | fn file_url_segments_to_pathbuf( |
2929 | host: Option<&str>, |
2930 | segments: str::Split<char>, |
2931 | ) -> Result<PathBuf, ()> { |
2932 | file_url_segments_to_pathbuf_windows(host, segments) |
2933 | } |
2934 | |
2935 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 |
2936 | #[cfg_attr (not(windows), allow(dead_code))] |
2937 | fn file_url_segments_to_pathbuf_windows( |
2938 | host: Option<&str>, |
2939 | mut segments: str::Split<'_, char>, |
2940 | ) -> Result<PathBuf, ()> { |
2941 | let mut string = if let Some(host) = host { |
2942 | r"\\" .to_owned() + host |
2943 | } else { |
2944 | let first = segments.next().ok_or(())?; |
2945 | |
2946 | match first.len() { |
2947 | 2 => { |
2948 | if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { |
2949 | return Err(()); |
2950 | } |
2951 | |
2952 | first.to_owned() |
2953 | } |
2954 | |
2955 | 4 => { |
2956 | if !first.starts_with(parser::ascii_alpha) { |
2957 | return Err(()); |
2958 | } |
2959 | let bytes = first.as_bytes(); |
2960 | if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A' ) { |
2961 | return Err(()); |
2962 | } |
2963 | |
2964 | first[0..1].to_owned() + ":" |
2965 | } |
2966 | |
2967 | _ => return Err(()), |
2968 | } |
2969 | }; |
2970 | |
2971 | for segment in segments { |
2972 | string.push(' \\' ); |
2973 | |
2974 | // Currently non-unicode windows paths cannot be represented |
2975 | match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { |
2976 | Ok(s) => string.push_str(&s), |
2977 | Err(..) => return Err(()), |
2978 | } |
2979 | } |
2980 | let path = PathBuf::from(string); |
2981 | debug_assert!( |
2982 | path.is_absolute(), |
2983 | "to_file_path() failed to produce an absolute Path" |
2984 | ); |
2985 | Ok(path) |
2986 | } |
2987 | |
2988 | /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. |
2989 | #[derive (Debug)] |
2990 | pub struct UrlQuery<'a> { |
2991 | url: Option<&'a mut Url>, |
2992 | fragment: Option<String>, |
2993 | } |
2994 | |
2995 | // `as_mut_string` string here exposes the internal serialization of an `Url`, |
2996 | // which should not be exposed to users. |
2997 | // We achieve that by not giving users direct access to `UrlQuery`: |
2998 | // * Its fields are private |
2999 | // (and so can not be constructed with struct literal syntax outside of this crate), |
3000 | // * It has no constructor |
3001 | // * It is only visible (on the type level) to users in the return type of |
3002 | // `Url::query_pairs_mut` which is `Serializer<UrlQuery>` |
3003 | // * `Serializer` keeps its target in a private field |
3004 | // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`. |
3005 | impl<'a> form_urlencoded::Target for UrlQuery<'a> { |
3006 | fn as_mut_string(&mut self) -> &mut String { |
3007 | &mut self.url.as_mut().unwrap().serialization |
3008 | } |
3009 | |
3010 | fn finish(mut self) -> &'a mut Url { |
3011 | let url: &mut Url = self.url.take().unwrap(); |
3012 | url.restore_already_parsed_fragment(self.fragment.take()); |
3013 | url |
3014 | } |
3015 | |
3016 | type Finished = &'a mut Url; |
3017 | } |
3018 | |
3019 | impl<'a> Drop for UrlQuery<'a> { |
3020 | fn drop(&mut self) { |
3021 | if let Some(url: &mut Url) = self.url.take() { |
3022 | url.restore_already_parsed_fragment(self.fragment.take()) |
3023 | } |
3024 | } |
3025 | } |
3026 | |