1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use std::cmp;
10use std::fmt::{self, Formatter};
11use std::net::{Ipv4Addr, Ipv6Addr};
12
13use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
14#[cfg(feature = "serde")]
15use serde::{Deserialize, Serialize};
16
17use crate::parser::{ParseError, ParseResult};
18
19#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
20#[derive(Copy, Clone, Debug, Eq, PartialEq)]
21pub(crate) enum HostInternal {
22 None,
23 Domain,
24 Ipv4(Ipv4Addr),
25 Ipv6(Ipv6Addr),
26}
27
28impl From<Host<String>> for HostInternal {
29 fn from(host: Host<String>) -> HostInternal {
30 match host {
31 Host::Domain(ref s: &String) if s.is_empty() => HostInternal::None,
32 Host::Domain(_) => HostInternal::Domain,
33 Host::Ipv4(address: Ipv4Addr) => HostInternal::Ipv4(address),
34 Host::Ipv6(address: Ipv6Addr) => HostInternal::Ipv6(address),
35 }
36 }
37}
38
39/// The host name of an URL.
40#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
41#[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
42pub enum Host<S = String> {
43 /// A DNS domain name, as '.' dot-separated labels.
44 /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
45 /// a special URL, or percent encoded for non-special URLs. Hosts for
46 /// non-special URLs are also called opaque hosts.
47 Domain(S),
48
49 /// An IPv4 address.
50 /// `Url::host_str` returns the serialization of this address,
51 /// as four decimal integers separated by `.` dots.
52 Ipv4(Ipv4Addr),
53
54 /// An IPv6 address.
55 /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
56 /// in the format per [RFC 5952 *A Recommendation
57 /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
58 /// lowercase hexadecimal with maximal `::` compression.
59 Ipv6(Ipv6Addr),
60}
61
62impl<'a> Host<&'a str> {
63 /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
64 pub fn to_owned(&self) -> Host<String> {
65 match *self {
66 Host::Domain(domain: &str) => Host::Domain(domain.to_owned()),
67 Host::Ipv4(address: Ipv4Addr) => Host::Ipv4(address),
68 Host::Ipv6(address: Ipv6Addr) => Host::Ipv6(address),
69 }
70 }
71}
72
73impl Host<String> {
74 /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
75 ///
76 /// <https://url.spec.whatwg.org/#host-parsing>
77 pub fn parse(input: &str) -> Result<Self, ParseError> {
78 if input.starts_with('[') {
79 if !input.ends_with(']') {
80 return Err(ParseError::InvalidIpv6Address);
81 }
82 return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
83 }
84 let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
85
86 let domain = Self::domain_to_ascii(&domain)?;
87
88 if domain.is_empty() {
89 return Err(ParseError::EmptyHost);
90 }
91
92 let is_invalid_domain_char = |c| {
93 matches!(
94 c,
95 '\0'..='\u{001F}'
96 | ' '
97 | '#'
98 | '%'
99 | '/'
100 | ':'
101 | '<'
102 | '>'
103 | '?'
104 | '@'
105 | '['
106 | '\\'
107 | ']'
108 | '^'
109 | '\u{007F}'
110 | '|'
111 )
112 };
113
114 if domain.find(is_invalid_domain_char).is_some() {
115 Err(ParseError::InvalidDomainCharacter)
116 } else if ends_in_a_number(&domain) {
117 let address = parse_ipv4addr(&domain)?;
118 Ok(Host::Ipv4(address))
119 } else {
120 Ok(Host::Domain(domain))
121 }
122 }
123
124 // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
125 pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
126 if input.starts_with('[') {
127 if !input.ends_with(']') {
128 return Err(ParseError::InvalidIpv6Address);
129 }
130 return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
131 }
132
133 let is_invalid_host_char = |c| {
134 matches!(
135 c,
136 '\0' | '\t'
137 | '\n'
138 | '\r'
139 | ' '
140 | '#'
141 | '/'
142 | ':'
143 | '<'
144 | '>'
145 | '?'
146 | '@'
147 | '['
148 | '\\'
149 | ']'
150 | '^'
151 | '|'
152 )
153 };
154
155 if input.find(is_invalid_host_char).is_some() {
156 Err(ParseError::InvalidDomainCharacter)
157 } else {
158 Ok(Host::Domain(
159 utf8_percent_encode(input, CONTROLS).to_string(),
160 ))
161 }
162 }
163
164 /// convert domain with idna
165 fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
166 idna::domain_to_ascii(domain).map_err(Into::into)
167 }
168}
169
170impl<S: AsRef<str>> fmt::Display for Host<S> {
171 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
172 match *self {
173 Host::Domain(ref domain: &S) => domain.as_ref().fmt(f),
174 Host::Ipv4(ref addr: &Ipv4Addr) => addr.fmt(f),
175 Host::Ipv6(ref addr: &Ipv6Addr) => {
176 f.write_str(data:"[")?;
177 write_ipv6(addr, f)?;
178 f.write_str(data:"]")
179 }
180 }
181 }
182}
183
184impl<S, T> PartialEq<Host<T>> for Host<S>
185where
186 S: PartialEq<T>,
187{
188 fn eq(&self, other: &Host<T>) -> bool {
189 match (self, other) {
190 (Host::Domain(a: &S), Host::Domain(b: &T)) => a == b,
191 (Host::Ipv4(a: &Ipv4Addr), Host::Ipv4(b: &Ipv4Addr)) => a == b,
192 (Host::Ipv6(a: &Ipv6Addr), Host::Ipv6(b: &Ipv6Addr)) => a == b,
193 (_, _) => false,
194 }
195 }
196}
197
198fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
199 let segments: [u16; 8] = addr.segments();
200 let (compress_start: isize, compress_end: isize) = longest_zero_sequence(&segments);
201 let mut i: isize = 0;
202 while i < 8 {
203 if i == compress_start {
204 f.write_str(data:":")?;
205 if i == 0 {
206 f.write_str(data:":")?;
207 }
208 if compress_end < 8 {
209 i = compress_end;
210 } else {
211 break;
212 }
213 }
214 write!(f, "{:x}", segments[i as usize])?;
215 if i < 7 {
216 f.write_str(data:":")?;
217 }
218 i += 1;
219 }
220 Ok(())
221}
222
223// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
224fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
225 let mut longest = -1;
226 let mut longest_length = -1;
227 let mut start = -1;
228 macro_rules! finish_sequence(
229 ($end: expr) => {
230 if start >= 0 {
231 let length = $end - start;
232 if length > longest_length {
233 longest = start;
234 longest_length = length;
235 }
236 }
237 };
238 );
239 for i in 0..8 {
240 if pieces[i as usize] == 0 {
241 if start < 0 {
242 start = i;
243 }
244 } else {
245 finish_sequence!(i);
246 start = -1;
247 }
248 }
249 finish_sequence!(8);
250 // https://url.spec.whatwg.org/#concept-ipv6-serializer
251 // step 3: ignore lone zeroes
252 if longest_length < 2 {
253 (-1, -2)
254 } else {
255 (longest, longest + longest_length)
256 }
257}
258
259/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
260fn ends_in_a_number(input: &str) -> bool {
261 let mut parts: RSplit<'_, char> = input.rsplit('.');
262 let last: &str = parts.next().unwrap();
263 let last: &str = if last.is_empty() {
264 if let Some(last: &str) = parts.next() {
265 last
266 } else {
267 return false;
268 }
269 } else {
270 last
271 };
272 if !last.is_empty() && last.as_bytes().iter().all(|c: &u8| c.is_ascii_digit()) {
273 return true;
274 }
275
276 parse_ipv4number(input:last).is_ok()
277}
278
279/// <https://url.spec.whatwg.org/#ipv4-number-parser>
280/// Ok(None) means the input is a valid number, but it overflows a `u32`.
281fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
282 if input.is_empty() {
283 return Err(());
284 }
285
286 let mut r = 10;
287 if input.starts_with("0x") || input.starts_with("0X") {
288 input = &input[2..];
289 r = 16;
290 } else if input.len() >= 2 && input.starts_with('0') {
291 input = &input[1..];
292 r = 8;
293 }
294
295 if input.is_empty() {
296 return Ok(Some(0));
297 }
298
299 let valid_number = match r {
300 8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
301 10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
302 16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
303 _ => false,
304 };
305 if !valid_number {
306 return Err(());
307 }
308
309 match u32::from_str_radix(input, r) {
310 Ok(num) => Ok(Some(num)),
311 Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
312 // The validity of the chars in the input is checked above.
313 }
314}
315
316/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
317fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
318 let mut parts: Vec<&str> = input.split('.').collect();
319 if parts.last() == Some(&"") {
320 parts.pop();
321 }
322 if parts.len() > 4 {
323 return Err(ParseError::InvalidIpv4Address);
324 }
325 let mut numbers: Vec<u32> = Vec::new();
326 for part in parts {
327 match parse_ipv4number(part) {
328 Ok(Some(n)) => numbers.push(n),
329 Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
330 Err(()) => return Err(ParseError::InvalidIpv4Address),
331 };
332 }
333 let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
334 // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
335 if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
336 return Err(ParseError::InvalidIpv4Address);
337 }
338 if numbers.iter().any(|x| *x > 255) {
339 return Err(ParseError::InvalidIpv4Address);
340 }
341 for (counter, n) in numbers.iter().enumerate() {
342 ipv4 += n << (8 * (3 - counter as u32))
343 }
344 Ok(Ipv4Addr::from(ipv4))
345}
346
347/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
348fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
349 let input = input.as_bytes();
350 let len = input.len();
351 let mut is_ip_v4 = false;
352 let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
353 let mut piece_pointer = 0;
354 let mut compress_pointer = None;
355 let mut i = 0;
356
357 if len < 2 {
358 return Err(ParseError::InvalidIpv6Address);
359 }
360
361 if input[0] == b':' {
362 if input[1] != b':' {
363 return Err(ParseError::InvalidIpv6Address);
364 }
365 i = 2;
366 piece_pointer = 1;
367 compress_pointer = Some(1);
368 }
369
370 while i < len {
371 if piece_pointer == 8 {
372 return Err(ParseError::InvalidIpv6Address);
373 }
374 if input[i] == b':' {
375 if compress_pointer.is_some() {
376 return Err(ParseError::InvalidIpv6Address);
377 }
378 i += 1;
379 piece_pointer += 1;
380 compress_pointer = Some(piece_pointer);
381 continue;
382 }
383 let start = i;
384 let end = cmp::min(len, start + 4);
385 let mut value = 0u16;
386 while i < end {
387 match (input[i] as char).to_digit(16) {
388 Some(digit) => {
389 value = value * 0x10 + digit as u16;
390 i += 1;
391 }
392 None => break,
393 }
394 }
395 if i < len {
396 match input[i] {
397 b'.' => {
398 if i == start {
399 return Err(ParseError::InvalidIpv6Address);
400 }
401 i = start;
402 if piece_pointer > 6 {
403 return Err(ParseError::InvalidIpv6Address);
404 }
405 is_ip_v4 = true;
406 }
407 b':' => {
408 i += 1;
409 if i == len {
410 return Err(ParseError::InvalidIpv6Address);
411 }
412 }
413 _ => return Err(ParseError::InvalidIpv6Address),
414 }
415 }
416 if is_ip_v4 {
417 break;
418 }
419 pieces[piece_pointer] = value;
420 piece_pointer += 1;
421 }
422
423 if is_ip_v4 {
424 if piece_pointer > 6 {
425 return Err(ParseError::InvalidIpv6Address);
426 }
427 let mut numbers_seen = 0;
428 while i < len {
429 if numbers_seen > 0 {
430 if numbers_seen < 4 && (i < len && input[i] == b'.') {
431 i += 1
432 } else {
433 return Err(ParseError::InvalidIpv6Address);
434 }
435 }
436
437 let mut ipv4_piece = None;
438 while i < len {
439 let digit = match input[i] {
440 c @ b'0'..=b'9' => c - b'0',
441 _ => break,
442 };
443 match ipv4_piece {
444 None => ipv4_piece = Some(digit as u16),
445 Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
446 Some(ref mut v) => {
447 *v = *v * 10 + digit as u16;
448 if *v > 255 {
449 return Err(ParseError::InvalidIpv6Address);
450 }
451 }
452 }
453 i += 1;
454 }
455
456 pieces[piece_pointer] = if let Some(v) = ipv4_piece {
457 pieces[piece_pointer] * 0x100 + v
458 } else {
459 return Err(ParseError::InvalidIpv6Address);
460 };
461 numbers_seen += 1;
462
463 if numbers_seen == 2 || numbers_seen == 4 {
464 piece_pointer += 1;
465 }
466 }
467
468 if numbers_seen != 4 {
469 return Err(ParseError::InvalidIpv6Address);
470 }
471 }
472
473 if i < len {
474 return Err(ParseError::InvalidIpv6Address);
475 }
476
477 match compress_pointer {
478 Some(compress_pointer) => {
479 let mut swaps = piece_pointer - compress_pointer;
480 piece_pointer = 7;
481 while swaps > 0 {
482 pieces.swap(piece_pointer, compress_pointer + swaps - 1);
483 swaps -= 1;
484 piece_pointer -= 1;
485 }
486 }
487 _ => {
488 if piece_pointer != 8 {
489 return Err(ParseError::InvalidIpv6Address);
490 }
491 }
492 }
493 Ok(Ipv6Addr::new(
494 pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
495 ))
496}
497