parser.rs source code [crates/url-2.5.0/src/parser.rs]

1	// Copyright 2013-2016 The rust-url developers.
2	//
3	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6	// option. This file may not be copied, modified, or distributed
7	// except according to those terms.
8
9	use std::error::Error;
10	use std::fmt::{self, Formatter, Write};
11	use std::str;
12
13	use crate::host::{Host, HostInternal};
14	use crate::Url;
15	use form_urlencoded::EncodingOverride;
16	use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};
17
18	/// https://url.spec.whatwg.org/#fragment-percent-encode-set
19	const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(byte:b'`');
20
21	/// https://url.spec.whatwg.org/#path-percent-encode-set
22	const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(byte:b'}');
23
24	/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
25	pub(crate) const USERINFO: &AsciiSet = &PATH
26	.add(b'/')
27	.add(b':')
28	.add(b';')
29	.add(b'=')
30	.add(b'@')
31	.add(b'[')
32	.add(b'`\\`')
33	.add(b']')
34	.add(b'^')
35	.add(byte:b'\|');
36
37	pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(byte:b'%');
38
39	// The backslash (\) character is treated as a path separator in special URLs
40	// so it needs to be additionally escaped in that case.
41	pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(byte:b'`\\`');
42
43	// https://url.spec.whatwg.org/#query-state
44	const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(byte:b'>');
45	const SPECIAL_QUERY: &AsciiSet = &QUERY.add(byte:b'`\'`');
46
47	pub type ParseResult<T> = Result<T, ParseError>;
48
49	macro_rules! simple_enum_error {
50	($($name: ident => $description: expr,)+) => {
51	/// Errors that can occur during parsing.
52	///
53	/// This may be extended in the future so exhaustive matching is
54	/// discouraged with an unused variant.
55	#[derive(PartialEq, Eq, Clone, Copy, Debug)]
56	#[non_exhaustive]
57	pub enum ParseError {
58	$(
59	$name,
60	)+
61	}
62
63	impl fmt::Display for ParseError {
64	fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
65	match *self {
66	$(
67	ParseError::$name => fmt.write_str($description),
68	)+
69	}
70	}
71	}
72	}
73	}
74
75	impl Error for ParseError {}
76
77	simple_enum_error! {
78	EmptyHost => "empty host",
79	IdnaError => "invalid international domain name",
80	InvalidPort => "invalid port number",
81	InvalidIpv4Address => "invalid IPv4 address",
82	InvalidIpv6Address => "invalid IPv6 address",
83	InvalidDomainCharacter => "invalid domain character",
84	RelativeUrlWithoutBase => "relative URL without a base",
85	RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base",
86	SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set",
87	Overflow => "URLs more than 4 GB are not supported",
88	}
89
90	impl From<::idna::Errors> for ParseError {
91	fn from(_: ::idna::Errors) -> ParseError {
92	ParseError::IdnaError
93	}
94	}
95
96	macro_rules! syntax_violation_enum {
97	($($name: ident => $description: expr,)+) => {
98	/// Non-fatal syntax violations that can occur during parsing.
99	///
100	/// This may be extended in the future so exhaustive matching is
101	/// discouraged with an unused variant.
102	#[derive(PartialEq, Eq, Clone, Copy, Debug)]
103	#[non_exhaustive]
104	pub enum SyntaxViolation {
105	$(
106	$name,
107	)+
108	}
109
110	impl SyntaxViolation {
111	pub fn description(&self) -> &'static str {
112	match *self {
113	$(
114	SyntaxViolation::$name => $description,
115	)+
116	}
117	}
118	}
119	}
120	}
121
122	syntax_violation_enum! {
123	Backslash => "backslash",
124	C0SpaceIgnored =>
125	"leading or trailing control or space character are ignored in URLs",
126	EmbeddedCredentials =>
127	"embedding authentication information (username or password) \
128	in an URL is not recommended",
129	ExpectedDoubleSlash => "expected //",
130	ExpectedFileDoubleSlash => "expected // after file:",
131	FileWithHostAndWindowsDrive => "file: with host and Windows drive letter",
132	NonUrlCodePoint => "non-URL code point",
133	NullInFragment => "NULL characters are ignored in URL fragment identifiers",
134	PercentDecode => "expected 2 hex digits after %",
135	TabOrNewlineIgnored => "tabs or newlines are ignored in URLs",
136	UnencodedAtSign => "unencoded @ sign in username or password",
137	}
138
139	impl fmt::Display for SyntaxViolation {
140	fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
141	fmt::Display::fmt(self.description(), f)
142	}
143	}
144
145	#[derive(Copy, Clone, PartialEq, Eq)]
146	pub enum SchemeType {
147	File,
148	SpecialNotFile,
149	NotSpecial,
150	}
151
152	impl SchemeType {
153	pub fn is_special(&self) -> bool {
154	!matches!(*self, SchemeType::NotSpecial)
155	}
156
157	pub fn is_file(&self) -> bool {
158	matches!(*self, SchemeType::File)
159	}
160	}
161
162	impl<T: AsRef<str>> From<T> for SchemeType {
163	fn from(s: T) -> Self {
164	match s.as_ref() {
165	"http" \| "https" \| "ws" \| "wss" \| "ftp" => SchemeType::SpecialNotFile,
166	"file" => SchemeType::File,
167	_ => SchemeType::NotSpecial,
168	}
169	}
170	}
171
172	pub fn default_port(scheme: &str) -> Option<u16> {
173	match scheme {
174	"http" \| "ws" => Some(`80`),
175	"https" \| "wss" => Some(`443`),
176	"ftp" => Some(`21`),
177	_ => None,
178	}
179	}
180
181	#[derive(Clone, Debug)]
182	pub struct Input<'i> {
183	chars: str::Chars<'i>,
184	}
185
186	impl<'i> Input<'i> {
187	pub fn new_no_trim(input: &'i str) -> Self {
188	Input {
189	chars: input.chars(),
190	}
191	}
192
193	pub fn new_trim_tab_and_newlines(
194	original_input: &'i str,
195	vfn: Option<&dyn Fn(SyntaxViolation)>,
196	) -> Self {
197	let input = original_input.trim_matches(ascii_tab_or_new_line);
198	if let Some(vfn) = vfn {
199	if input.len() < original_input.len() {
200	vfn(SyntaxViolation::C0SpaceIgnored)
201	}
202	if input.chars().any(\|c\| matches!(c, '`\t`' \| '`\n`' \| '`\r`')) {
203	vfn(SyntaxViolation::TabOrNewlineIgnored)
204	}
205	}
206	Input {
207	chars: input.chars(),
208	}
209	}
210
211	pub fn new_trim_c0_control_and_space(
212	original_input: &'i str,
213	vfn: Option<&dyn Fn(SyntaxViolation)>,
214	) -> Self {
215	let input = original_input.trim_matches(c0_control_or_space);
216	if let Some(vfn) = vfn {
217	if input.len() < original_input.len() {
218	vfn(SyntaxViolation::C0SpaceIgnored)
219	}
220	if input.chars().any(\|c\| matches!(c, '`\t`' \| '`\n`' \| '`\r`')) {
221	vfn(SyntaxViolation::TabOrNewlineIgnored)
222	}
223	}
224	Input {
225	chars: input.chars(),
226	}
227	}
228
229	#[inline]
230	pub fn is_empty(&self) -> bool {
231	self.clone().next().is_none()
232	}
233
234	#[inline]
235	fn starts_with<P: Pattern>(&self, p: P) -> bool {
236	p.split_prefix(&mut self.clone())
237	}
238
239	#[inline]
240	pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
241	let mut remaining = self.clone();
242	if p.split_prefix(&mut remaining) {
243	Some(remaining)
244	} else {
245	None
246	}
247	}
248
249	#[inline]
250	fn split_first(&self) -> (Option<char>, Self) {
251	let mut remaining = self.clone();
252	(remaining.next(), remaining)
253	}
254
255	#[inline]
256	fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
257	let mut count = `0`;
258	let mut remaining = self.clone();
259	loop {
260	let mut input = remaining.clone();
261	if matches!(input.next(), Some(c) if f(c)) {
262	remaining = input;
263	count += `1`;
264	} else {
265	return (count, remaining);
266	}
267	}
268	}
269
270	#[inline]
271	fn next_utf8(&mut self) -> Option<(char, &'i str)> {
272	loop {
273	let utf8 = self.chars.as_str();
274	match self.chars.next() {
275	Some(c) => {
276	if !matches!(c, '`\t`' \| '`\n`' \| '`\r`') {
277	return Some((c, &utf8[..c.len_utf8()]));
278	}
279	}
280	None => return None,
281	}
282	}
283	}
284	}
285
286	pub trait Pattern {
287	fn split_prefix(self, input: &mut Input) -> bool;
288	}
289
290	impl Pattern for char {
291	fn split_prefix(self, input: &mut Input) -> bool {
292	input.next() == Some(self)
293	}
294	}
295
296	impl<'a> Pattern for &'a str {
297	fn split_prefix(self, input: &mut Input) -> bool {
298	for c: char in self.chars() {
299	if input.next() != Some(c) {
300	return `false`;
301	}
302	}
303	`true`
304	}
305	}
306
307	impl<F: FnMut(char) -> bool> Pattern for F {
308	fn split_prefix(self, input: &mut Input) -> bool {
309	input.next().map_or(default:`false`, self)
310	}
311	}
312
313	impl<'i> Iterator for Input<'i> {
314	type Item = char;
315	fn next(&mut self) -> Option<char> {
316	self.chars
317	.by_ref()
318	.find(\|&c: char\| !matches!(c, '`\t`' \| '`\n`' \| '`\r`'))
319	}
320	}
321
322	pub struct Parser<'a> {
323	pub serialization: String,
324	pub base_url: Option<&'a Url>,
325	pub query_encoding_override: EncodingOverride<'a>,
326	pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
327	pub context: Context,
328	}
329
330	#[derive(PartialEq, Eq, Copy, Clone)]
331	pub enum Context {
332	UrlParser,
333	Setter,
334	PathSegmentSetter,
335	}
336
337	impl<'a> Parser<'a> {
338	fn log_violation(&self, v: SyntaxViolation) {
339	if let Some(f) = self.violation_fn {
340	f(v)
341	}
342	}
343
344	fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
345	if let Some(f) = self.violation_fn {
346	if test() {
347	f(v)
348	}
349	}
350	}
351
352	pub fn for_setter(serialization: String) -> Parser<'a> {
353	Parser {
354	serialization,
355	base_url: None,
356	query_encoding_override: None,
357	violation_fn: None,
358	context: Context::Setter,
359	}
360	}
361
362	/// https://url.spec.whatwg.org/#concept-basic-url-parser
363	pub fn parse_url(mut self, input: &str) -> ParseResult<Url> {
364	let input = Input::new_trim_c0_control_and_space(input, self.violation_fn);
365	if let Ok(remaining) = self.parse_scheme(input.clone()) {
366	return self.parse_with_scheme(remaining);
367	}
368
369	// No-scheme state
370	if let Some(base_url) = self.base_url {
371	if input.starts_with('#') {
372	self.fragment_only(base_url, input)
373	} else if base_url.cannot_be_a_base() {
374	Err(ParseError::RelativeUrlWithCannotBeABaseBase)
375	} else {
376	let scheme_type = SchemeType::from(base_url.scheme());
377	if scheme_type.is_file() {
378	self.parse_file(input, scheme_type, Some(base_url))
379	} else {
380	self.parse_relative(input, scheme_type, base_url)
381	}
382	}
383	} else {
384	Err(ParseError::RelativeUrlWithoutBase)
385	}
386	}
387
388	pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
389	if input.is_empty() \|\| !input.starts_with(ascii_alpha) {
390	return Err(());
391	}
392	debug_assert!(self.serialization.is_empty());
393	while let Some(c) = input.next() {
394	match c {
395	'a'..='z' \| 'A'..='Z' \| '0'..='9' \| '+' \| '-' \| '.' => {
396	self.serialization.push(c.to_ascii_lowercase())
397	}
398	':' => return Ok(input),
399	_ => {
400	self.serialization.clear();
401	return Err(());
402	}
403	}
404	}
405	// EOF before ':'
406	if self.context == Context::Setter {
407	Ok(input)
408	} else {
409	self.serialization.clear();
410	Err(())
411	}
412	}
413
414	fn parse_with_scheme(mut self, input: Input<'_>) -> ParseResult<Url> {
415	use crate::SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash};
416	let scheme_end = to_u32(self.serialization.len())?;
417	let scheme_type = SchemeType::from(&self.serialization);
418	self.serialization.push(':');
419	match scheme_type {
420	SchemeType::File => {
421	self.log_violation_if(ExpectedFileDoubleSlash, \|\| !input.starts_with("//"));
422	let base_file_url = self.base_url.and_then(\|base\| {
423	if base.scheme() == "file" {
424	Some(base)
425	} else {
426	None
427	}
428	});
429	self.serialization.clear();
430	self.parse_file(input, scheme_type, base_file_url)
431	}
432	SchemeType::SpecialNotFile => {
433	// special relative or authority state
434	let (slashes_count, remaining) = input.count_matching(\|c\| matches!(c, '/' \| '`\\`'));
435	if let Some(base_url) = self.base_url {
436	if slashes_count < `2`
437	&& base_url.scheme() == &self.serialization[..scheme_end as usize]
438	{
439	// "Cannot-be-a-base" URLs only happen with "not special" schemes.
440	debug_assert!(!base_url.cannot_be_a_base());
441	self.serialization.clear();
442	return self.parse_relative(input, scheme_type, base_url);
443	}
444	}
445	// special authority slashes state
446	self.log_violation_if(ExpectedDoubleSlash, \|\| {
447	input
448	.clone()
449	.take_while(\|&c\| matches!(c, '/' \| '`\\`'))
450	.collect::<String>()
451	!= "//"
452	});
453	self.after_double_slash(remaining, scheme_type, scheme_end)
454	}
455	SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end),
456	}
457	}
458
459	/// Scheme other than file, http, https, ws, ws, ftp.
460	fn parse_non_special(
461	mut self,
462	input: Input<'_>,
463	scheme_type: SchemeType,
464	scheme_end: u32,
465	) -> ParseResult<Url> {
466	// path or authority state (
467	if let Some(input) = input.split_prefix("//") {
468	return self.after_double_slash(input, scheme_type, scheme_end);
469	}
470	// Anarchist URL (no authority)
471	let path_start = to_u32(self.serialization.len())?;
472	let username_end = path_start;
473	let host_start = path_start;
474	let host_end = path_start;
475	let host = HostInternal::None;
476	let port = None;
477	let remaining = if let Some(input) = input.split_prefix('/') {
478	self.serialization.push('/');
479	self.parse_path(scheme_type, &mut `false`, path_start as usize, input)
480	} else {
481	self.parse_cannot_be_a_base_path(input)
482	};
483	self.with_query_and_fragment(
484	scheme_type,
485	scheme_end,
486	username_end,
487	host_start,
488	host_end,
489	host,
490	port,
491	path_start,
492	remaining,
493	)
494	}
495
496	fn parse_file(
497	mut self,
498	input: Input<'_>,
499	scheme_type: SchemeType,
500	base_file_url: Option<&Url>,
501	) -> ParseResult<Url> {
502	use crate::SyntaxViolation::Backslash;
503	// file state
504	debug_assert!(self.serialization.is_empty());
505	let (first_char, input_after_first_char) = input.split_first();
506	if matches!(first_char, Some('/') \| Some('`\\`')) {
507	self.log_violation_if(SyntaxViolation::Backslash, \|\| first_char == Some('`\\`'));
508	// file slash state
509	let (next_char, input_after_next_char) = input_after_first_char.split_first();
510	if matches!(next_char, Some('/') \| Some('`\\`')) {
511	self.log_violation_if(Backslash, \|\| next_char == Some('`\\`'));
512	// file host state
513	self.serialization.push_str("file://");
514	let scheme_end = "file".len() as u32;
515	let host_start = "file://".len() as u32;
516	let (path_start, mut host, remaining) =
517	self.parse_file_host(input_after_next_char)?;
518	let mut host_end = to_u32(self.serialization.len())?;
519	let mut has_host = !matches!(host, HostInternal::None);
520	let remaining = if path_start {
521	self.parse_path_start(SchemeType::File, &mut has_host, remaining)
522	} else {
523	let path_start = self.serialization.len();
524	self.serialization.push('/');
525	self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
526	};
527
528	// For file URLs that have a host and whose path starts
529	// with the windows drive letter we just remove the host.
530	if !has_host {
531	self.serialization
532	.drain(host_start as usize..host_end as usize);
533	host_end = host_start;
534	host = HostInternal::None;
535	}
536	let (query_start, fragment_start) =
537	self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
538	return Ok(Url {
539	serialization: self.serialization,
540	scheme_end,
541	username_end: host_start,
542	host_start,
543	host_end,
544	host,
545	port: None,
546	path_start: host_end,
547	query_start,
548	fragment_start,
549	});
550	} else {
551	self.serialization.push_str("file://");
552	let scheme_end = "file".len() as u32;
553	let host_start = "file://".len();
554	let mut host_end = host_start;
555	let mut host = HostInternal::None;
556	if !starts_with_windows_drive_letter_segment(&input_after_first_char) {
557	if let Some(base_url) = base_file_url {
558	let first_segment = base_url.path_segments().unwrap().next().unwrap();
559	if is_normalized_windows_drive_letter(first_segment) {
560	self.serialization.push('/');
561	self.serialization.push_str(first_segment);
562	} else if let Some(host_str) = base_url.host_str() {
563	self.serialization.push_str(host_str);
564	host_end = self.serialization.len();
565	host = base_url.host;
566	}
567	}
568	}
569	// If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
570	let parse_path_input = if let Some(c) = first_char {
571	if c == '/' \|\| c == '`\\`' \|\| c == '?' \|\| c == '#' {
572	input
573	} else {
574	input_after_first_char
575	}
576	} else {
577	input_after_first_char
578	};
579
580	let remaining =
581	self.parse_path(SchemeType::File, &mut `false`, host_end, parse_path_input);
582
583	let host_start = host_start as u32;
584
585	let (query_start, fragment_start) =
586	self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
587
588	let host_end = host_end as u32;
589	return Ok(Url {
590	serialization: self.serialization,
591	scheme_end,
592	username_end: host_start,
593	host_start,
594	host_end,
595	host,
596	port: None,
597	path_start: host_end,
598	query_start,
599	fragment_start,
600	});
601	}
602	}
603	if let Some(base_url) = base_file_url {
604	match first_char {
605	None => {
606	// Copy everything except the fragment
607	let before_fragment = match base_url.fragment_start {
608	Some(i) => &base_url.serialization[..i as usize],
609	None => &*base_url.serialization,
610	};
611	self.serialization.push_str(before_fragment);
612	Ok(Url {
613	serialization: self.serialization,
614	fragment_start: None,
615	..*base_url
616	})
617	}
618	Some('?') => {
619	// Copy everything up to the query string
620	let before_query = match (base_url.query_start, base_url.fragment_start) {
621	(None, None) => &*base_url.serialization,
622	(Some(i), _) \| (None, Some(i)) => base_url.slice(..i),
623	};
624	self.serialization.push_str(before_query);
625	let (query_start, fragment_start) =
626	self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
627	Ok(Url {
628	serialization: self.serialization,
629	query_start,
630	fragment_start,
631	..*base_url
632	})
633	}
634	Some('#') => self.fragment_only(base_url, input),
635	_ => {
636	if !starts_with_windows_drive_letter_segment(&input) {
637	let before_query = match (base_url.query_start, base_url.fragment_start) {
638	(None, None) => &*base_url.serialization,
639	(Some(i), _) \| (None, Some(i)) => base_url.slice(..i),
640	};
641	self.serialization.push_str(before_query);
642	self.shorten_path(SchemeType::File, base_url.path_start as usize);
643	let remaining = self.parse_path(
644	SchemeType::File,
645	&mut `true`,
646	base_url.path_start as usize,
647	input,
648	);
649	self.with_query_and_fragment(
650	SchemeType::File,
651	base_url.scheme_end,
652	base_url.username_end,
653	base_url.host_start,
654	base_url.host_end,
655	base_url.host,
656	base_url.port,
657	base_url.path_start,
658	remaining,
659	)
660	} else {
661	self.serialization.push_str("file:///");
662	let scheme_end = "file".len() as u32;
663	let path_start = "file://".len();
664	let remaining =
665	self.parse_path(SchemeType::File, &mut `false`, path_start, input);
666	let (query_start, fragment_start) =
667	self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
668	let path_start = path_start as u32;
669	Ok(Url {
670	serialization: self.serialization,
671	scheme_end,
672	username_end: path_start,
673	host_start: path_start,
674	host_end: path_start,
675	host: HostInternal::None,
676	port: None,
677	path_start,
678	query_start,
679	fragment_start,
680	})
681	}
682	}
683	}
684	} else {
685	self.serialization.push_str("file:///");
686	let scheme_end = "file".len() as u32;
687	let path_start = "file://".len();
688	let remaining = self.parse_path(SchemeType::File, &mut `false`, path_start, input);
689	let (query_start, fragment_start) =
690	self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
691	let path_start = path_start as u32;
692	Ok(Url {
693	serialization: self.serialization,
694	scheme_end,
695	username_end: path_start,
696	host_start: path_start,
697	host_end: path_start,
698	host: HostInternal::None,
699	port: None,
700	path_start,
701	query_start,
702	fragment_start,
703	})
704	}
705	}
706
707	fn parse_relative(
708	mut self,
709	input: Input<'_>,
710	scheme_type: SchemeType,
711	base_url: &Url,
712	) -> ParseResult<Url> {
713	// relative state
714	debug_assert!(self.serialization.is_empty());
715	let (first_char, input_after_first_char) = input.split_first();
716	match first_char {
717	None => {
718	// Copy everything except the fragment
719	let before_fragment = match base_url.fragment_start {
720	Some(i) => &base_url.serialization[..i as usize],
721	None => &*base_url.serialization,
722	};
723	self.serialization.push_str(before_fragment);
724	Ok(Url {
725	serialization: self.serialization,
726	fragment_start: None,
727	..*base_url
728	})
729	}
730	Some('?') => {
731	// Copy everything up to the query string
732	let before_query = match (base_url.query_start, base_url.fragment_start) {
733	(None, None) => &*base_url.serialization,
734	(Some(i), _) \| (None, Some(i)) => base_url.slice(..i),
735	};
736	self.serialization.push_str(before_query);
737	let (query_start, fragment_start) =
738	self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
739	Ok(Url {
740	serialization: self.serialization,
741	query_start,
742	fragment_start,
743	..*base_url
744	})
745	}
746	Some('#') => self.fragment_only(base_url, input),
747	Some('/') \| Some('`\\`') => {
748	let (slashes_count, remaining) = input.count_matching(\|c\| matches!(c, '/' \| '`\\`'));
749	if slashes_count >= `2` {
750	self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, \|\| {
751	input
752	.clone()
753	.take_while(\|&c\| matches!(c, '/' \| '`\\`'))
754	.collect::<String>()
755	!= "//"
756	});
757	let scheme_end = base_url.scheme_end;
758	debug_assert!(base_url.byte_at(scheme_end) == b':');
759	self.serialization
760	.push_str(base_url.slice(..scheme_end + `1`));
761	if let Some(after_prefix) = input.split_prefix("//") {
762	return self.after_double_slash(after_prefix, scheme_type, scheme_end);
763	}
764	return self.after_double_slash(remaining, scheme_type, scheme_end);
765	}
766	let path_start = base_url.path_start;
767	self.serialization.push_str(base_url.slice(..path_start));
768	self.serialization.push('/');
769	let remaining = self.parse_path(
770	scheme_type,
771	&mut `true`,
772	path_start as usize,
773	input_after_first_char,
774	);
775	self.with_query_and_fragment(
776	scheme_type,
777	base_url.scheme_end,
778	base_url.username_end,
779	base_url.host_start,
780	base_url.host_end,
781	base_url.host,
782	base_url.port,
783	base_url.path_start,
784	remaining,
785	)
786	}
787	_ => {
788	let before_query = match (base_url.query_start, base_url.fragment_start) {
789	(None, None) => &*base_url.serialization,
790	(Some(i), _) \| (None, Some(i)) => base_url.slice(..i),
791	};
792	self.serialization.push_str(before_query);
793	// FIXME spec says just "remove last entry", not the "pop" algorithm
794	self.pop_path(scheme_type, base_url.path_start as usize);
795	// A special url always has a path.
796	// A path always starts with '/'
797	if self.serialization.len() == base_url.path_start as usize
798	&& (SchemeType::from(base_url.scheme()).is_special() \|\| !input.is_empty())
799	{
800	self.serialization.push('/');
801	}
802	let remaining = match input.split_first() {
803	(Some('/'), remaining) => self.parse_path(
804	scheme_type,
805	&mut `true`,
806	base_url.path_start as usize,
807	remaining,
808	),
809	_ => {
810	self.parse_path(scheme_type, &mut `true`, base_url.path_start as usize, input)
811	}
812	};
813	self.with_query_and_fragment(
814	scheme_type,
815	base_url.scheme_end,
816	base_url.username_end,
817	base_url.host_start,
818	base_url.host_end,
819	base_url.host,
820	base_url.port,
821	base_url.path_start,
822	remaining,
823	)
824	}
825	}
826	}
827
828	fn after_double_slash(
829	mut self,
830	input: Input<'_>,
831	scheme_type: SchemeType,
832	scheme_end: u32,
833	) -> ParseResult<Url> {
834	self.serialization.push('/');
835	self.serialization.push('/');
836	// authority state
837	let before_authority = self.serialization.len();
838	let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
839	let has_authority = before_authority != self.serialization.len();
840	// host state
841	let host_start = to_u32(self.serialization.len())?;
842	let (host_end, host, port, remaining) =
843	self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
844	if host == HostInternal::None && has_authority {
845	return Err(ParseError::EmptyHost);
846	}
847	// path state
848	let path_start = to_u32(self.serialization.len())?;
849	let remaining = self.parse_path_start(scheme_type, &mut `true`, remaining);
850	self.with_query_and_fragment(
851	scheme_type,
852	scheme_end,
853	username_end,
854	host_start,
855	host_end,
856	host,
857	port,
858	path_start,
859	remaining,
860	)
861	}
862
863	/// Return (username_end, remaining)
864	fn parse_userinfo<'i>(
865	&mut self,
866	mut input: Input<'i>,
867	scheme_type: SchemeType,
868	) -> ParseResult<(u32, Input<'i>)> {
869	let mut last_at = None;
870	let mut remaining = input.clone();
871	let mut char_count = `0`;
872	while let Some(c) = remaining.next() {
873	match c {
874	'@' => {
875	if last_at.is_some() {
876	self.log_violation(SyntaxViolation::UnencodedAtSign)
877	} else {
878	self.log_violation(SyntaxViolation::EmbeddedCredentials)
879	}
880	last_at = Some((char_count, remaining.clone()))
881	}
882	'/' \| '?' \| '#' => break,
883	'`\\`' if scheme_type.is_special() => break,
884	_ => (),
885	}
886	char_count += `1`;
887	}
888	let (mut userinfo_char_count, remaining) = match last_at {
889	None => return Ok((to_u32(self.serialization.len())?, input)),
890	Some((`0`, remaining)) => {
891	// Otherwise, if one of the following is true
892	// c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
893	// url is special and c is U+005C (\)
894	// If @ flag is set and buffer is the empty string, validation error, return failure.
895	if let (Some(c), _) = remaining.split_first() {
896	if c == '/' \|\| c == '?' \|\| c == '#' \|\| (scheme_type.is_special() && c == '`\\`') {
897	return Err(ParseError::EmptyHost);
898	}
899	}
900	return Ok((to_u32(self.serialization.len())?, remaining));
901	}
902	Some(x) => x,
903	};
904
905	let mut username_end = None;
906	let mut has_password = `false`;
907	let mut has_username = `false`;
908	while userinfo_char_count > `0` {
909	let (c, utf8_c) = input.next_utf8().unwrap();
910	userinfo_char_count -= `1`;
911	if c == ':' && username_end.is_none() {
912	// Start parsing password
913	username_end = Some(to_u32(self.serialization.len())?);
914	// We don't add a colon if the password is empty
915	if userinfo_char_count > `0` {
916	self.serialization.push(':');
917	has_password = `true`;
918	}
919	} else {
920	if !has_password {
921	has_username = `true`;
922	}
923	self.check_url_code_point(c, &input);
924	self.serialization
925	.extend(utf8_percent_encode(utf8_c, USERINFO));
926	}
927	}
928	let username_end = match username_end {
929	Some(i) => i,
930	None => to_u32(self.serialization.len())?,
931	};
932	if has_username \|\| has_password {
933	self.serialization.push('@');
934	}
935	Ok((username_end, remaining))
936	}
937
938	fn parse_host_and_port<'i>(
939	&mut self,
940	input: Input<'i>,
941	scheme_end: u32,
942	scheme_type: SchemeType,
943	) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
944	let (host, remaining) = Parser::parse_host(input, scheme_type)?;
945	write!(&mut self.serialization, "{}", host).unwrap();
946	let host_end = to_u32(self.serialization.len())?;
947	if let Host::Domain(h) = &host {
948	if h.is_empty() {
949	// Port with an empty host
950	if remaining.starts_with(":") {
951	return Err(ParseError::EmptyHost);
952	}
953	if scheme_type.is_special() {
954	return Err(ParseError::EmptyHost);
955	}
956	}
957	};
958
959	let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
960	let scheme = \|\| default_port(&self.serialization[..scheme_end as usize]);
961	Parser::parse_port(remaining, scheme, self.context)?
962	} else {
963	(None, remaining)
964	};
965	if let Some(port) = port {
966	write!(&mut self.serialization, ":{}", port).unwrap()
967	}
968	Ok((host_end, host.into(), port, remaining))
969	}
970
971	pub fn parse_host(
972	mut input: Input<'_>,
973	scheme_type: SchemeType,
974	) -> ParseResult<(Host<String>, Input<'_>)> {
975	if scheme_type.is_file() {
976	return Parser::get_file_host(input);
977	}
978	// Undo the Input abstraction here to avoid allocating in the common case
979	// where the host part of the input does not contain any tab or newline
980	let input_str = input.chars.as_str();
981	let mut inside_square_brackets = `false`;
982	let mut has_ignored_chars = `false`;
983	let mut non_ignored_chars = `0`;
984	let mut bytes = `0`;
985	for c in input_str.chars() {
986	match c {
987	':' if !inside_square_brackets => break,
988	'`\\`' if scheme_type.is_special() => break,
989	'/' \| '?' \| '#' => break,
990	'`\t`' \| '`\n`' \| '`\r`' => {
991	has_ignored_chars = `true`;
992	}
993	'[' => {
994	inside_square_brackets = `true`;
995	non_ignored_chars += `1`
996	}
997	']' => {
998	inside_square_brackets = `false`;
999	non_ignored_chars += `1`
1000	}
1001	_ => non_ignored_chars += `1`,
1002	}
1003	bytes += c.len_utf8();
1004	}
1005	let replaced: String;
1006	let host_str;
1007	{
1008	let host_input = input.by_ref().take(non_ignored_chars);
1009	if has_ignored_chars {
1010	replaced = host_input.collect();
1011	host_str = &*replaced
1012	} else {
1013	for _ in host_input {}
1014	host_str = &input_str[..bytes]
1015	}
1016	}
1017	if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
1018	return Err(ParseError::EmptyHost);
1019	}
1020	if !scheme_type.is_special() {
1021	let host = Host::parse_opaque(host_str)?;
1022	return Ok((host, input));
1023	}
1024	let host = Host::parse(host_str)?;
1025	Ok((host, input))
1026	}
1027
1028	fn get_file_host(input: Input<'_>) -> ParseResult<(Host<String>, Input<'_>)> {
1029	let (_, host_str, remaining) = Parser::file_host(input)?;
1030	let host = match Host::parse(&host_str)? {
1031	Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
1032	host => host,
1033	};
1034	Ok((host, remaining))
1035	}
1036
1037	fn parse_file_host<'i>(
1038	&mut self,
1039	input: Input<'i>,
1040	) -> ParseResult<(bool, HostInternal, Input<'i>)> {
1041	let has_host;
1042	let (_, host_str, remaining) = Parser::file_host(input)?;
1043	let host = if host_str.is_empty() {
1044	has_host = `false`;
1045	HostInternal::None
1046	} else {
1047	match Host::parse(&host_str)? {
1048	Host::Domain(ref d) if d == "localhost" => {
1049	has_host = `false`;
1050	HostInternal::None
1051	}
1052	host => {
1053	write!(&mut self.serialization, "{}", host).unwrap();
1054	has_host = `true`;
1055	host.into()
1056	}
1057	}
1058	};
1059	Ok((has_host, host, remaining))
1060	}
1061
1062	pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> {
1063	// Undo the Input abstraction here to avoid allocating in the common case
1064	// where the host part of the input does not contain any tab or newline
1065	let input_str = input.chars.as_str();
1066	let mut has_ignored_chars = `false`;
1067	let mut non_ignored_chars = `0`;
1068	let mut bytes = `0`;
1069	for c in input_str.chars() {
1070	match c {
1071	'/' \| '`\\`' \| '?' \| '#' => break,
1072	'`\t`' \| '`\n`' \| '`\r`' => has_ignored_chars = `true`,
1073	_ => non_ignored_chars += `1`,
1074	}
1075	bytes += c.len_utf8();
1076	}
1077	let replaced: String;
1078	let host_str;
1079	let mut remaining = input.clone();
1080	{
1081	let host_input = remaining.by_ref().take(non_ignored_chars);
1082	if has_ignored_chars {
1083	replaced = host_input.collect();
1084	host_str = &*replaced
1085	} else {
1086	for _ in host_input {}
1087	host_str = &input_str[..bytes]
1088	}
1089	}
1090	if is_windows_drive_letter(host_str) {
1091	return Ok((`false`, "".to_string(), input));
1092	}
1093	Ok((`true`, host_str.to_string(), remaining))
1094	}
1095
1096	pub fn parse_port<P>(
1097	mut input: Input<'_>,
1098	default_port: P,
1099	context: Context,
1100	) -> ParseResult<(Option<u16>, Input<'_>)>
1101	where
1102	P: Fn() -> Option<u16>,
1103	{
1104	let mut port: u32 = `0`;
1105	let mut has_any_digit = `false`;
1106	while let (Some(c), remaining) = input.split_first() {
1107	if let Some(digit) = c.to_digit(`10`) {
1108	port = port * `10` + digit;
1109	if port > ::std::u16::MAX as u32 {
1110	return Err(ParseError::InvalidPort);
1111	}
1112	has_any_digit = `true`;
1113	} else if context == Context::UrlParser && !matches!(c, '/' \| '`\\`' \| '?' \| '#') {
1114	return Err(ParseError::InvalidPort);
1115	} else {
1116	break;
1117	}
1118	input = remaining;
1119	}
1120	let mut opt_port = Some(port as u16);
1121	if !has_any_digit \|\| opt_port == default_port() {
1122	opt_port = None;
1123	}
1124	Ok((opt_port, input))
1125	}
1126
1127	pub fn parse_path_start<'i>(
1128	&mut self,
1129	scheme_type: SchemeType,
1130	has_host: &mut bool,
1131	input: Input<'i>,
1132	) -> Input<'i> {
1133	let path_start = self.serialization.len();
1134	let (maybe_c, remaining) = input.split_first();
1135	// If url is special, then:
1136	if scheme_type.is_special() {
1137	if maybe_c == Some('`\\`') {
1138	// If c is U+005C (\), validation error.
1139	self.log_violation(SyntaxViolation::Backslash);
1140	}
1141	// A special URL always has a non-empty path.
1142	if !self.serialization.ends_with('/') {
1143	self.serialization.push('/');
1144	// We have already made sure the forward slash is present.
1145	if maybe_c == Some('/') \|\| maybe_c == Some('`\\`') {
1146	return self.parse_path(scheme_type, has_host, path_start, remaining);
1147	}
1148	}
1149	return self.parse_path(scheme_type, has_host, path_start, input);
1150	} else if maybe_c == Some('?') \|\| maybe_c == Some('#') {
1151	// Otherwise, if state override is not given and c is U+003F (?),
1152	// set url’s query to the empty string and state to query state.
1153	// Otherwise, if state override is not given and c is U+0023 (#),
1154	// set url’s fragment to the empty string and state to fragment state.
1155	// The query and path states will be handled by the caller.
1156	return input;
1157	}
1158
1159	if maybe_c.is_some() && maybe_c != Some('/') {
1160	self.serialization.push('/');
1161	}
1162	// Otherwise, if c is not the EOF code point:
1163	self.parse_path(scheme_type, has_host, path_start, input)
1164	}
1165
1166	pub fn parse_path<'i>(
1167	&mut self,
1168	scheme_type: SchemeType,
1169	has_host: &mut bool,
1170	path_start: usize,
1171	mut input: Input<'i>,
1172	) -> Input<'i> {
1173	// Relative path state
1174	loop {
1175	let mut segment_start = self.serialization.len();
1176	let mut ends_with_slash = `false`;
1177	loop {
1178	let input_before_c = input.clone();
1179	let (c, utf8_c) = if let Some(x) = input.next_utf8() {
1180	x
1181	} else {
1182	break;
1183	};
1184	match c {
1185	'/' if self.context != Context::PathSegmentSetter => {
1186	self.serialization.push(c);
1187	ends_with_slash = `true`;
1188	break;
1189	}
1190	'`\\`' if self.context != Context::PathSegmentSetter
1191	&& scheme_type.is_special() =>
1192	{
1193	self.log_violation(SyntaxViolation::Backslash);
1194	self.serialization.push('/');
1195	ends_with_slash = `true`;
1196	break;
1197	}
1198	'?' \| '#' if self.context == Context::UrlParser => {
1199	input = input_before_c;
1200	break;
1201	}
1202	_ => {
1203	self.check_url_code_point(c, &input);
1204	if scheme_type.is_file()
1205	&& self.serialization.len() > path_start
1206	&& is_normalized_windows_drive_letter(
1207	&self.serialization[path_start + `1`..],
1208	)
1209	{
1210	self.serialization.push('/');
1211	segment_start += `1`;
1212	}
1213	if self.context == Context::PathSegmentSetter {
1214	if scheme_type.is_special() {
1215	self.serialization
1216	.extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT));
1217	} else {
1218	self.serialization
1219	.extend(utf8_percent_encode(utf8_c, PATH_SEGMENT));
1220	}
1221	} else {
1222	self.serialization.extend(utf8_percent_encode(utf8_c, PATH));
1223	}
1224	}
1225	}
1226	}
1227	let segment_before_slash = if ends_with_slash {
1228	&self.serialization[segment_start..self.serialization.len() - `1`]
1229	} else {
1230	&self.serialization[segment_start..self.serialization.len()]
1231	};
1232	match segment_before_slash {
1233	// If buffer is a double-dot path segment, shorten url’s path,
1234	".." \| "%2e%2e" \| "%2e%2E" \| "%2E%2e" \| "%2E%2E" \| "%2e." \| "%2E." \| ".%2e"
1235	\| ".%2E" => {
1236	debug_assert!(self.serialization.as_bytes()[segment_start - `1`] == b'/');
1237	self.serialization.truncate(segment_start);
1238	if self.serialization.ends_with('/')
1239	&& Parser::last_slash_can_be_removed(&self.serialization, path_start)
1240	{
1241	self.serialization.pop();
1242	}
1243	self.shorten_path(scheme_type, path_start);
1244
1245	// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1246	if ends_with_slash && !self.serialization.ends_with('/') {
1247	self.serialization.push('/');
1248	}
1249	}
1250	// Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/),
1251	// nor url is special and c is U+005C (\), append the empty string to url’s path.
1252	"." \| "%2e" \| "%2E" => {
1253	self.serialization.truncate(segment_start);
1254	if !self.serialization.ends_with('/') {
1255	self.serialization.push('/');
1256	}
1257	}
1258	_ => {
1259	// If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then
1260	if scheme_type.is_file()
1261	&& segment_start == path_start + `1`
1262	&& is_windows_drive_letter(segment_before_slash)
1263	{
1264	// Replace the second code point in buffer with U+003A (:).
1265	if let Some(c) = segment_before_slash.chars().next() {
1266	self.serialization.truncate(segment_start);
1267	self.serialization.push(c);
1268	self.serialization.push(':');
1269	if ends_with_slash {
1270	self.serialization.push('/');
1271	}
1272	}
1273	// If url’s host is neither the empty string nor null,
1274	// validation error, set url’s host to the empty string.
1275	if *has_host {
1276	self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive);
1277	has_host = `false`; // FIXME account for this in callers*
1278	}
1279	}
1280	}
1281	}
1282	if !ends_with_slash {
1283	break;
1284	}
1285	}
1286	if scheme_type.is_file() {
1287	// while url’s path’s size is greater than 1
1288	// and url’s path[0] is the empty string,
1289	// validation error, remove the first item from url’s path.
1290	//FIXME: log violation
1291	let path = self.serialization.split_off(path_start);
1292	self.serialization.push('/');
1293	self.serialization.push_str(path.trim_start_matches('/'));
1294	}
1295
1296	input
1297	}
1298
1299	fn last_slash_can_be_removed(serialization: &str, path_start: usize) -> bool {
1300	let url_before_segment = &serialization[..serialization.len() - `1`];
1301	if let Some(segment_before_start) = url_before_segment.rfind('/') {
1302	// Do not remove the root slash
1303	segment_before_start >= path_start
1304	// Or a windows drive letter slash
1305	&& !path_starts_with_windows_drive_letter(&serialization[segment_before_start..])
1306	} else {
1307	`false`
1308	}
1309	}
1310
1311	/// https://url.spec.whatwg.org/#shorten-a-urls-path
1312	fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1313	// If path is empty, then return.
1314	if self.serialization.len() == path_start {
1315	return;
1316	}
1317	// If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
1318	if scheme_type.is_file()
1319	&& is_normalized_windows_drive_letter(&self.serialization[path_start..])
1320	{
1321	return;
1322	}
1323	// Remove path’s last item.
1324	self.pop_path(scheme_type, path_start);
1325	}
1326
1327	/// https://url.spec.whatwg.org/#pop-a-urls-path
1328	fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1329	if self.serialization.len() > path_start {
1330	let slash_position = self.serialization[path_start..].rfind('/').unwrap();
1331	// + 1 since rfind returns the position before the slash.
1332	let segment_start = path_start + slash_position + `1`;
1333	// Don’t pop a Windows drive letter
1334	if !(scheme_type.is_file()
1335	&& is_normalized_windows_drive_letter(&self.serialization[segment_start..]))
1336	{
1337	self.serialization.truncate(segment_start);
1338	}
1339	}
1340	}
1341
1342	pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1343	loop {
1344	let input_before_c = input.clone();
1345	match input.next_utf8() {
1346	Some(('?', _)) \| Some(('#', _)) if self.context == Context::UrlParser => {
1347	return input_before_c
1348	}
1349	Some((c, utf8_c)) => {
1350	self.check_url_code_point(c, &input);
1351	self.serialization
1352	.extend(utf8_percent_encode(utf8_c, CONTROLS));
1353	}
1354	None => return input,
1355	}
1356	}
1357	}
1358
1359	#[allow(clippy::too_many_arguments)]
1360	fn with_query_and_fragment(
1361	mut self,
1362	scheme_type: SchemeType,
1363	scheme_end: u32,
1364	username_end: u32,
1365	host_start: u32,
1366	host_end: u32,
1367	host: HostInternal,
1368	port: Option<u16>,
1369	mut path_start: u32,
1370	remaining: Input<'_>,
1371	) -> ParseResult<Url> {
1372	// Special case for anarchist URL's with a leading empty path segment
1373	// This prevents web+demo:/.//not-a-host/ or web+demo:/path/..//not-a-host/,
1374	// when parsed and then serialized, from ending up as web+demo://not-a-host/
1375	// (they end up as web+demo:/.//not-a-host/).
1376	//
1377	// If url’s host is null, url does not have an opaque path,
1378	// url’s path’s size is greater than 1, and url’s path[0] is the empty string,
1379	// then append U+002F (/) followed by U+002E (.) to output.
1380	let scheme_end_as_usize = scheme_end as usize;
1381	let path_start_as_usize = path_start as usize;
1382	if path_start_as_usize == scheme_end_as_usize + `1` {
1383	// Anarchist URL
1384	if self.serialization[path_start_as_usize..].starts_with("//") {
1385	// Case 1: The base URL did not have an empty path segment, but the resulting one does
1386	// Insert the "/." prefix
1387	self.serialization.insert_str(path_start_as_usize, "/.");
1388	path_start += `2`;
1389	}
1390	assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
1391	} else if path_start_as_usize == scheme_end_as_usize + `3`
1392	&& &self.serialization[scheme_end_as_usize..path_start_as_usize] == ":/."
1393	{
1394	// Anarchist URL with leading empty path segment
1395	// The base URL has a "/." between the host and the path
1396	assert_eq!(self.serialization.as_bytes()[path_start_as_usize], b'/');
1397	if self
1398	.serialization
1399	.as_bytes()
1400	.get(path_start_as_usize + `1`)
1401	.copied()
1402	!= Some(b'/')
1403	{
1404	// Case 2: The base URL had an empty path segment, but the resulting one does not
1405	// Remove the "/." prefix
1406	self.serialization
1407	.replace_range(scheme_end_as_usize..path_start_as_usize, ":");
1408	path_start -= `2`;
1409	}
1410	assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
1411	}
1412
1413	let (query_start, fragment_start) =
1414	self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
1415	Ok(Url {
1416	serialization: self.serialization,
1417	scheme_end,
1418	username_end,
1419	host_start,
1420	host_end,
1421	host,
1422	port,
1423	path_start,
1424	query_start,
1425	fragment_start,
1426	})
1427	}
1428
1429	/// Return (query_start, fragment_start)
1430	fn parse_query_and_fragment(
1431	&mut self,
1432	scheme_type: SchemeType,
1433	scheme_end: u32,
1434	mut input: Input<'_>,
1435	) -> ParseResult<(Option<u32>, Option<u32>)> {
1436	let mut query_start = None;
1437	match input.next() {
1438	Some('#') => {}
1439	Some('?') => {
1440	query_start = Some(to_u32(self.serialization.len())?);
1441	self.serialization.push('?');
1442	let remaining = self.parse_query(scheme_type, scheme_end, input);
1443	if let Some(remaining) = remaining {
1444	input = remaining
1445	} else {
1446	return Ok((query_start, None));
1447	}
1448	}
1449	None => return Ok((None, None)),
1450	_ => panic!("Programming error. parse_query_and_fragment() called without ? or #"),
1451	}
1452
1453	let fragment_start = to_u32(self.serialization.len())?;
1454	self.serialization.push('#');
1455	self.parse_fragment(input);
1456	Ok((query_start, Some(fragment_start)))
1457	}
1458
1459	pub fn parse_query<'i>(
1460	&mut self,
1461	scheme_type: SchemeType,
1462	scheme_end: u32,
1463	mut input: Input<'i>,
1464	) -> Option<Input<'i>> {
1465	let len = input.chars.as_str().len();
1466	let mut query = String::with_capacity(len); // FIXME: use a streaming decoder instead
1467	let mut remaining = None;
1468	while let Some(c) = input.next() {
1469	if c == '#' && self.context == Context::UrlParser {
1470	remaining = Some(input);
1471	break;
1472	} else {
1473	self.check_url_code_point(c, &input);
1474	query.push(c);
1475	}
1476	}
1477
1478	let encoding = match &self.serialization[..scheme_end as usize] {
1479	"http" \| "https" \| "file" \| "ftp" => self.query_encoding_override,
1480	_ => None,
1481	};
1482	let query_bytes = if let Some(o) = encoding {
1483	o(&query)
1484	} else {
1485	query.as_bytes().into()
1486	};
1487	let set = if scheme_type.is_special() {
1488	SPECIAL_QUERY
1489	} else {
1490	QUERY
1491	};
1492	self.serialization.extend(percent_encode(&query_bytes, set));
1493	remaining
1494	}
1495
1496	fn fragment_only(mut self, base_url: &Url, mut input: Input<'_>) -> ParseResult<Url> {
1497	let before_fragment = match base_url.fragment_start {
1498	Some(i) => base_url.slice(..i),
1499	None => &*base_url.serialization,
1500	};
1501	debug_assert!(self.serialization.is_empty());
1502	self.serialization
1503	.reserve(before_fragment.len() + input.chars.as_str().len());
1504	self.serialization.push_str(before_fragment);
1505	self.serialization.push('#');
1506	let next = input.next();
1507	debug_assert!(next == Some('#'));
1508	self.parse_fragment(input);
1509	Ok(Url {
1510	serialization: self.serialization,
1511	fragment_start: Some(to_u32(before_fragment.len())?),
1512	..*base_url
1513	})
1514	}
1515
1516	pub fn parse_fragment(&mut self, mut input: Input<'_>) {
1517	while let Some((c, utf8_c)) = input.next_utf8() {
1518	if c == '`\0`' {
1519	self.log_violation(SyntaxViolation::NullInFragment)
1520	} else {
1521	self.check_url_code_point(c, &input);
1522	}
1523	self.serialization
1524	.extend(utf8_percent_encode(utf8_c, FRAGMENT));
1525	}
1526	}
1527
1528	fn check_url_code_point(&self, c: char, input: &Input<'_>) {
1529	if let Some(vfn) = self.violation_fn {
1530	if c == '%' {
1531	let mut input = input.clone();
1532	if !matches!((input.next(), input.next()), (Some(a), Some(b))
1533	if a.is_ascii_hexdigit() && b.is_ascii_hexdigit())
1534	{
1535	vfn(SyntaxViolation::PercentDecode)
1536	}
1537	} else if !is_url_code_point(c) {
1538	vfn(SyntaxViolation::NonUrlCodePoint)
1539	}
1540	}
1541	}
1542	}
1543
1544	// Non URL code points:
1545	// U+0000 to U+0020 (space)
1546	// " # % < > [ \ ] ^ ` { \| }
1547	// U+007F to U+009F
1548	// surrogates
1549	// U+FDD0 to U+FDEF
1550	// Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex
1551	#[inline]
1552	fn is_url_code_point(c: char) -> bool {
1553	matches!(c,
1554	'a'..='z' \|
1555	'A'..='Z' \|
1556	'0'..='9' \|
1557	'!' \| '$' \| '&' \| '`\'`' \| '(' \| ')' \| '*' \| '+' \| ',' \| '-' \|
1558	'.' \| '/' \| ':' \| ';' \| '=' \| '?' \| '@' \| '_' \| '~' \|
1559	'`\u{A0}`'..='`\u{D7FF}`' \| '`\u{E000}`'..='`\u{FDCF}`' \| '`\u{FDF0}`'..='`\u{FFFD}`' \|
1560	'`\u{10000}`'..='`\u{1FFFD}`' \| '`\u{20000}`'..='`\u{2FFFD}`' \|
1561	'`\u{30000}`'..='`\u{3FFFD}`' \| '`\u{40000}`'..='`\u{4FFFD}`' \|
1562	'`\u{50000}`'..='`\u{5FFFD}`' \| '`\u{60000}`'..='`\u{6FFFD}`' \|
1563	'`\u{70000}`'..='`\u{7FFFD}`' \| '`\u{80000}`'..='`\u{8FFFD}`' \|
1564	'`\u{90000}`'..='`\u{9FFFD}`' \| '`\u{A0000}`'..='`\u{AFFFD}`' \|
1565	'`\u{B0000}`'..='`\u{BFFFD}`' \| '`\u{C0000}`'..='`\u{CFFFD}`' \|
1566	'`\u{D0000}`'..='`\u{DFFFD}`' \| '`\u{E1000}`'..='`\u{EFFFD}`' \|
1567	'`\u{F0000}`'..='`\u{FFFFD}`' \| '`\u{100000}`'..='`\u{10FFFD}`')
1568	}
1569
1570	/// https://url.spec.whatwg.org/#c0-controls-and-space
1571	#[inline]
1572	fn c0_control_or_space(ch: char) -> bool {
1573	ch <= ' ' // U+0000 to U+0020
1574	}
1575
1576	/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
1577	#[inline]
1578	fn ascii_tab_or_new_line(ch: char) -> bool {
1579	matches!(ch, '`\t`' \| '`\r`' \| '`\n`')
1580	}
1581
1582	/// https://url.spec.whatwg.org/#ascii-alpha
1583	#[inline]
1584	pub fn ascii_alpha(ch: char) -> bool {
1585	ch.is_ascii_alphabetic()
1586	}
1587
1588	#[inline]
1589	pub fn to_u32(i: usize) -> ParseResult<u32> {
1590	if i <= ::std::u32::MAX as usize {
1591	Ok(i as u32)
1592	} else {
1593	Err(ParseError::Overflow)
1594	}
1595	}
1596
1597	fn is_normalized_windows_drive_letter(segment: &str) -> bool {
1598	is_windows_drive_letter(segment) && segment.as_bytes()[`1`] == b':'
1599	}
1600
1601	/// Whether the scheme is file:, the path has a single segment, and that segment
1602	/// is a Windows drive letter
1603	#[inline]
1604	pub fn is_windows_drive_letter(segment: &str) -> bool {
1605	segment.len() == `2` && starts_with_windows_drive_letter(segment)
1606	}
1607
1608	/// Whether path starts with a root slash
1609	/// and a windows drive letter eg: "/c:" or "/a:/"
1610	fn path_starts_with_windows_drive_letter(s: &str) -> bool {
1611	if let Some(c: &u8) = s.as_bytes().first() {
1612	matches!(c, b'/' \| b'`\\`' \| b'?' \| b'#') && starts_with_windows_drive_letter(&s[`1`..])
1613	} else {
1614	`false`
1615	}
1616	}
1617
1618	fn starts_with_windows_drive_letter(s: &str) -> bool {
1619	s.len() >= `2`
1620	&& ascii_alpha(ch:s.as_bytes()[`0`] as char)
1621	&& matches!(s.as_bytes()[`1`], b':' \| b'\|')
1622	&& (s.len() == `2` \|\| matches!(s.as_bytes()[`2`], b'/' \| b'`\\`' \| b'?' \| b'#'))
1623	}
1624
1625	/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
1626	fn starts_with_windows_drive_letter_segment(input: &Input<'_>) -> bool {
1627	let mut input: Input<'_> = input.clone();
1628	match (input.next(), input.next(), input.next()) {
1629	// its first two code points are a Windows drive letter
1630	// its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
1631	(Some(a: char), Some(b: char), Some(c: char))
1632	if ascii_alpha(ch:a) && matches!(b, ':' \| '\|') && matches!(c, '/' \| '`\\`' \| '?' \| '#') =>
1633	{
1634	`true`
1635	}
1636	// its first two code points are a Windows drive letter
1637	// its length is 2
1638	(Some(a: char), Some(b: char), None) if ascii_alpha(ch:a) && matches!(b, ':' \| '\|') => `true`,
1639	_ => `false`,
1640	}
1641	}
1642