parse.rs source code [crates/mime/src/parse.rs]

1	#[allow(unused, deprecated)]
2	use std::ascii::AsciiExt;
3	use std::error::Error;
4	use std::fmt;
5	use std::iter::Enumerate;
6	use std::str::Bytes;
7
8	use super::{Mime, MimeIter, Source, ParamSource, Indexed, CHARSET, UTF_8};
9
10	#[derive(Debug)]
11	pub enum ParseError {
12	MissingSlash,
13	MissingEqual,
14	MissingQuote,
15	InvalidToken {
16	pos: usize,
17	byte: u8,
18	},
19	}
20
21	impl ParseError {
22	fn s(&self) -> &str {
23	use self::ParseError::*;
24
25	match *self {
26	MissingSlash => "a slash (/) was missing between the type and subtype",
27	MissingEqual => "an equals sign (=) was missing between a parameter and its value",
28	MissingQuote => "a quote (`\"`) was missing from a parameter value",
29	InvalidToken { .. } => "an invalid token was encountered",
30	}
31	}
32	}
33
34	impl fmt::Display for ParseError {
35	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36	if let ParseError::InvalidToken { pos: usize, byte: u8 } = *self {
37	write!(f, "{}, {:X} at position {}", self.s(), byte, pos)
38	} else {
39	f.write_str(self.s())
40	}
41	}
42	}
43
44	impl Error for ParseError {
45	// Minimum Rust is 1.15, Error::description was still required then
46	#[allow(deprecated)]
47	fn description(&self) -> &str {
48	self.s()
49	}
50	}
51
52	impl<'a> MimeIter<'a> {
53	/// A new iterator over mimes or media types
54	pub fn new(s: &'a str) -> Self {
55	Self {
56	pos: `0`,
57	source: s,
58	}
59	}
60	}
61
62	impl<'a> Iterator for MimeIter<'a> {
63	type Item = Result<Mime, &'a str>;
64
65	fn next(&mut self) -> Option<Self::Item> {
66	let start = self.pos;
67	let len = self.source.bytes().len();
68
69	if start >= len {
70	return None
71	}
72
73	// Try parsing the whole remaining slice, until the end
74	match parse(&self.source[start ..len]) {
75	Ok(value) => {
76	self.pos = len;
77	Some(Ok(value))
78	}
79	Err(ParseError::InvalidToken { pos, .. }) => {
80	// The first token is immediately found to be wrong by `parse`. Skip it
81	if pos == `0` {
82	self.pos += `1`;
83	return self.next()
84	}
85	let slice = &self.source[start .. start + pos];
86	// Try parsing the longest slice (until the first invalid token)
87	return match parse(slice) {
88	Ok(mime) => {
89	self.pos = start + pos + `1`;
90	Some(Ok(mime))
91	}
92	Err(_) => {
93	if start + pos < len {
94	// Skip this invalid slice,
95	// try parsing the remaining slice in the next iteration
96	self.pos = start + pos;
97	Some(Err(slice))
98	} else {
99	None
100	}
101	}
102	}
103	}
104	// Do not process any other error condition: the slice is malformed and
105	// no character is found to be invalid: a character is missing
106	Err(_) => None,
107	}
108	}
109	}
110
111	pub fn parse(s: &str) -> Result<Mime, ParseError> {
112	if s == "/" {
113	return Ok(::STAR_STAR);
114	}
115
116	let mut iter = s.bytes().enumerate();
117	// toplevel
118	let mut start;
119	let slash;
120	loop {
121	match iter.next() {
122	Some((_, c)) if is_token(c) => (),
123	Some((i, b'/')) if i > `0` => {
124	slash = i;
125	start = i + `1`;
126	break;
127	},
128	None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime
129	Some((pos, byte)) => return Err(ParseError::InvalidToken {
130	pos: pos,
131	byte: byte,
132	})
133	};
134
135	}
136
137	// sublevel
138	let mut plus = None;
139	loop {
140	match iter.next() {
141	Some((i, b'+')) if i > start => {
142	plus = Some(i);
143	},
144	Some((i, b';')) if i > start => {
145	start = i;
146	break;
147	},
148	Some((_, c)) if is_token(c) => (),
149	None => {
150	return Ok(Mime {
151	source: Source::Dynamic(s.to_ascii_lowercase()),
152	slash: slash,
153	plus: plus,
154	params: ParamSource::None,
155	});
156	},
157	Some((pos, byte)) => return Err(ParseError::InvalidToken {
158	pos: pos,
159	byte: byte,
160	})
161	};
162	}
163
164	// params
165	let params = params_from_str(s, &mut iter, start)?;
166
167	let src = match params {
168	ParamSource::Utf8(_) => s.to_ascii_lowercase(),
169	ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices),
170	ParamSource::None => {
171	// Chop off the empty list
172	s[..start].to_ascii_lowercase()
173	}
174	};
175
176	Ok(Mime {
177	source: Source::Dynamic(src),
178	slash: slash,
179	plus: plus,
180	params: params,
181	})
182	}
183
184
185	fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> {
186	let semicolon = start;
187	start += `1`;
188	let mut params = ParamSource::None;
189	'params: while start < s.len() {
190	let name;
191	// name
192	'name: loop {
193	match iter.next() {
194	Some((i, b' ')) if i == start => {
195	start = i + `1`;
196	continue 'params;
197	},
198	Some((_, c)) if is_token(c) => (),
199	Some((i, b'=')) if i > start => {
200	name = Indexed(start, i);
201	start = i + `1`;
202	break 'name;
203	},
204	None => return Err(ParseError::MissingEqual),
205	Some((pos, byte)) => return Err(ParseError::InvalidToken {
206	pos: pos,
207	byte: byte,
208	}),
209	}
210	}
211
212	let value;
213	// values must be restrict-name-char or "anything goes"
214	let mut is_quoted = `false`;
215
216	'value: loop {
217	if is_quoted {
218	match iter.next() {
219	Some((i, b'"')) if i > start => {
220	value = Indexed(start, i);
221	break 'value;
222	},
223	Some((_, c)) if is_restricted_quoted_char(c) => (),
224	None => return Err(ParseError::MissingQuote),
225	Some((pos, byte)) => return Err(ParseError::InvalidToken {
226	pos: pos,
227	byte: byte,
228	}),
229	}
230	} else {
231	match iter.next() {
232	Some((i, b'"')) if i == start => {
233	is_quoted = `true`;
234	start = i + `1`;
235	},
236	Some((_, c)) if is_token(c) => (),
237	Some((i, b';')) if i > start => {
238	value = Indexed(start, i);
239	start = i + `1`;
240	break 'value;
241	}
242	None => {
243	value = Indexed(start, s.len());
244	start = s.len();
245	break 'value;
246	},
247
248	Some((pos, byte)) => return Err(ParseError::InvalidToken {
249	pos: pos,
250	byte: byte,
251	}),
252	}
253	}
254	}
255
256	if is_quoted {
257	'ws: loop {
258	match iter.next() {
259	Some((i, b';')) => {
260	// next param
261	start = i + `1`;
262	break 'ws;
263	},
264	Some((_, b' ')) => {
265	// skip whitespace
266	},
267	None => {
268	// eof
269	start = s.len();
270	break 'ws;
271	},
272	Some((pos, byte)) => return Err(ParseError::InvalidToken {
273	pos: pos,
274	byte: byte,
275	}),
276	}
277	}
278	}
279
280	match params {
281	ParamSource::Utf8(i) => {
282	let i = i + `2`;
283	let charset = Indexed(i, "charset".len() + i);
284	let utf8 = Indexed(charset.1 + `1`, charset.1 + "utf-8".len() + `1`);
285	params = ParamSource::Custom(semicolon, vec![
286	(charset, utf8),
287	(name, value),
288	]);
289	},
290	ParamSource::Custom(_, ref mut vec) => {
291	vec.push((name, value));
292	},
293	ParamSource::None => {
294	if semicolon + `2` == name.0 && CHARSET == &s[name.0..name.1] {
295	if UTF_8 == &s[value.0..value.1] {
296	params = ParamSource::Utf8(semicolon);
297	continue 'params;
298	}
299	}
300	params = ParamSource::Custom(semicolon, vec![(name, value)]);
301	},
302	}
303	}
304	Ok(params)
305	}
306
307	fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String {
308	let mut owned: String = s.to_owned();
309	owned[..semi].make_ascii_lowercase();
310
311	for &(ref name: &Indexed, ref value: &Indexed) in params {
312	owned[name.0..name.1].make_ascii_lowercase();
313	// Since we just converted this part of the string to lowercase,
314	// we can skip the `Name == &str` unicase check and do a faster
315	// memcmp instead.
316	if &owned[name.0..name.1] == CHARSET.source {
317	owned[value.0..value.1].make_ascii_lowercase();
318	}
319	}
320
321	owned
322	}
323
324	// From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
325	//
326	// > All registered media types MUST be assigned top-level type and
327	// > subtype names. The combination of these names serves to uniquely
328	// > identify the media type, and the subtype name facet (or the absence
329	// > of one) identifies the registration tree. Both top-level type and
330	// > subtype names are case-insensitive.
331	// >
332	// > Type and subtype names MUST conform to the following ABNF:
333	// >
334	// > type-name = restricted-name
335	// > subtype-name = restricted-name
336	// >
337	// > restricted-name = restricted-name-first 126restricted-name-chars*
338	// > restricted-name-first = ALPHA / DIGIT
339	// > restricted-name-chars = ALPHA / DIGIT / "!" / "#" /
340	// > "$" / "&" / "-" / "^" / "_"
341	// > restricted-name-chars =/ "." ; Characters before first dot always
342	// > ; specify a facet name
343	// > restricted-name-chars =/ "+" ; Characters after last plus always
344	// > ; specify a structured syntax suffix
345
346	// However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1):
347	//
348	// > media-type = type "/" subtype ( OWS ";" OWS parameter )*
349	// > type = token
350	// > subtype = token
351	// > parameter = token "=" ( token / quoted-string )
352	//
353	// Where token is defined as:
354	//
355	// > token = 1tchar*
356	// > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "" / "+" / "-" / "." /*
357	// > "^" / "_" / "`" / "\|" / "~" / DIGIT / ALPHA
358	//
359	// So, clearly, ¯\_(Ä_/¯
360
361	macro_rules! byte_map {
362	($($flag:expr,)*) => ([
363	$($flag != `0`,)*
364	])
365	}
366
367	static TOKEN_MAP: [bool; `256`] = byte_map![
368	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
369	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
370	`0`, `1`, `0`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `1`, `1`, `0`, `1`, `1`, `0`,
371	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`,
372	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
373	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `1`, `1`,
374	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
375	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `1`, `0`, `1`, `0`,
376	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
377	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
378	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
379	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
380	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
381	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
382	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
383	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
384	];
385
386	fn is_token(c: u8) -> bool {
387	TOKEN_MAP[c as usize]
388	}
389
390	fn is_restricted_quoted_char(c: u8) -> bool {
391	c > `31` && c != `127`
392	}
393
394	#[test]
395	#[allow(warnings)] // ... ranges deprecated
396	fn test_lookup_tables() {
397	for (i, &valid) in TOKEN_MAP.iter().enumerate() {
398	let i = i as u8;
399	let should = match i {
400	b'a'...b'z' \|
401	b'A'...b'Z' \|
402	b'0'...b'9' \|
403	b'!' \|
404	b'#' \|
405	b'$' \|
406	b'%' \|
407	b'&' \|
408	b'`\'`' \|
409	b'*' \|
410	b'+' \|
411	b'-' \|
412	b'.' \|
413	b'^' \|
414	b'_' \|
415	b'`' \|
416	b'\|' \|
417	b'~' => `true`,
418	_ => `false`
419	};
420	assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
421	}
422	}
423
424	#[test]
425	fn test_parse_iterator() {
426	let mut iter = MimeIter::new("application/json, application/json");
427	assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
428	assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
429	assert_eq!(iter.next(), None);
430
431	let mut iter = MimeIter::new("application/json");
432	assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
433	assert_eq!(iter.next(), None);
434
435	let mut iter = MimeIter::new("application/json; ");
436	assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
437	assert_eq!(iter.next(), None);
438	}
439
440	#[test]
441	fn test_parse_iterator_invalid() {
442	let mut iter = MimeIter::new("application/json, invalid, application/json");
443	assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
444	assert_eq!(iter.next().unwrap().unwrap_err(), "invalid");
445	assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
446	assert_eq!(iter.next(), None);
447	}
448
449	#[test]
450	fn test_parse_iterator_all_invalid() {
451	let mut iter = MimeIter::new("application/json, text/html");
452	assert_eq!(iter.next().unwrap().unwrap_err(), "application/json");
453	assert_eq!(iter.next(), None);
454	}
455

Provided by KDAB

Definitions