1#[allow(unused, deprecated)]
2use std::ascii::AsciiExt;
3use std::error::Error;
4use std::fmt;
5use std::iter::Enumerate;
6use std::str::Bytes;
7
8use super::{Mime, MimeIter, Source, ParamSource, Indexed, CHARSET, UTF_8};
9
10#[derive(Debug)]
11pub enum ParseError {
12 MissingSlash,
13 MissingEqual,
14 MissingQuote,
15 InvalidToken {
16 pos: usize,
17 byte: u8,
18 },
19}
20
21impl ParseError {
22 fn s(&self) -> &str {
23 use self::ParseError::*;
24
25 match *self {
26 MissingSlash => "a slash (/) was missing between the type and subtype",
27 MissingEqual => "an equals sign (=) was missing between a parameter and its value",
28 MissingQuote => "a quote (\") was missing from a parameter value",
29 InvalidToken { .. } => "an invalid token was encountered",
30 }
31 }
32}
33
34impl fmt::Display for ParseError {
35 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36 if let ParseError::InvalidToken { pos: usize, byte: u8 } = *self {
37 write!(f, "{}, {:X} at position {}", self.s(), byte, pos)
38 } else {
39 f.write_str(self.s())
40 }
41 }
42}
43
44impl Error for ParseError {
45 // Minimum Rust is 1.15, Error::description was still required then
46 #[allow(deprecated)]
47 fn description(&self) -> &str {
48 self.s()
49 }
50}
51
52impl<'a> MimeIter<'a> {
53 /// A new iterator over mimes or media types
54 pub fn new(s: &'a str) -> Self {
55 Self {
56 pos: 0,
57 source: s,
58 }
59 }
60}
61
62impl<'a> Iterator for MimeIter<'a> {
63 type Item = Result<Mime, &'a str>;
64
65 fn next(&mut self) -> Option<Self::Item> {
66 let start = self.pos;
67 let len = self.source.bytes().len();
68
69 if start >= len {
70 return None
71 }
72
73 // Try parsing the whole remaining slice, until the end
74 match parse(&self.source[start ..len]) {
75 Ok(value) => {
76 self.pos = len;
77 Some(Ok(value))
78 }
79 Err(ParseError::InvalidToken { pos, .. }) => {
80 // The first token is immediately found to be wrong by `parse`. Skip it
81 if pos == 0 {
82 self.pos += 1;
83 return self.next()
84 }
85 let slice = &self.source[start .. start + pos];
86 // Try parsing the longest slice (until the first invalid token)
87 return match parse(slice) {
88 Ok(mime) => {
89 self.pos = start + pos + 1;
90 Some(Ok(mime))
91 }
92 Err(_) => {
93 if start + pos < len {
94 // Skip this invalid slice,
95 // try parsing the remaining slice in the next iteration
96 self.pos = start + pos;
97 Some(Err(slice))
98 } else {
99 None
100 }
101 }
102 }
103 }
104 // Do not process any other error condition: the slice is malformed and
105 // no character is found to be invalid: a character is missing
106 Err(_) => None,
107 }
108 }
109}
110
111pub fn parse(s: &str) -> Result<Mime, ParseError> {
112 if s == "*/*" {
113 return Ok(::STAR_STAR);
114 }
115
116 let mut iter = s.bytes().enumerate();
117 // toplevel
118 let mut start;
119 let slash;
120 loop {
121 match iter.next() {
122 Some((_, c)) if is_token(c) => (),
123 Some((i, b'/')) if i > 0 => {
124 slash = i;
125 start = i + 1;
126 break;
127 },
128 None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime
129 Some((pos, byte)) => return Err(ParseError::InvalidToken {
130 pos: pos,
131 byte: byte,
132 })
133 };
134
135 }
136
137 // sublevel
138 let mut plus = None;
139 loop {
140 match iter.next() {
141 Some((i, b'+')) if i > start => {
142 plus = Some(i);
143 },
144 Some((i, b';')) if i > start => {
145 start = i;
146 break;
147 },
148 Some((_, c)) if is_token(c) => (),
149 None => {
150 return Ok(Mime {
151 source: Source::Dynamic(s.to_ascii_lowercase()),
152 slash: slash,
153 plus: plus,
154 params: ParamSource::None,
155 });
156 },
157 Some((pos, byte)) => return Err(ParseError::InvalidToken {
158 pos: pos,
159 byte: byte,
160 })
161 };
162 }
163
164 // params
165 let params = params_from_str(s, &mut iter, start)?;
166
167 let src = match params {
168 ParamSource::Utf8(_) => s.to_ascii_lowercase(),
169 ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices),
170 ParamSource::None => {
171 // Chop off the empty list
172 s[..start].to_ascii_lowercase()
173 }
174 };
175
176 Ok(Mime {
177 source: Source::Dynamic(src),
178 slash: slash,
179 plus: plus,
180 params: params,
181 })
182}
183
184
185fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> {
186 let semicolon = start;
187 start += 1;
188 let mut params = ParamSource::None;
189 'params: while start < s.len() {
190 let name;
191 // name
192 'name: loop {
193 match iter.next() {
194 Some((i, b' ')) if i == start => {
195 start = i + 1;
196 continue 'params;
197 },
198 Some((_, c)) if is_token(c) => (),
199 Some((i, b'=')) if i > start => {
200 name = Indexed(start, i);
201 start = i + 1;
202 break 'name;
203 },
204 None => return Err(ParseError::MissingEqual),
205 Some((pos, byte)) => return Err(ParseError::InvalidToken {
206 pos: pos,
207 byte: byte,
208 }),
209 }
210 }
211
212 let value;
213 // values must be restrict-name-char or "anything goes"
214 let mut is_quoted = false;
215
216 'value: loop {
217 if is_quoted {
218 match iter.next() {
219 Some((i, b'"')) if i > start => {
220 value = Indexed(start, i);
221 break 'value;
222 },
223 Some((_, c)) if is_restricted_quoted_char(c) => (),
224 None => return Err(ParseError::MissingQuote),
225 Some((pos, byte)) => return Err(ParseError::InvalidToken {
226 pos: pos,
227 byte: byte,
228 }),
229 }
230 } else {
231 match iter.next() {
232 Some((i, b'"')) if i == start => {
233 is_quoted = true;
234 start = i + 1;
235 },
236 Some((_, c)) if is_token(c) => (),
237 Some((i, b';')) if i > start => {
238 value = Indexed(start, i);
239 start = i + 1;
240 break 'value;
241 }
242 None => {
243 value = Indexed(start, s.len());
244 start = s.len();
245 break 'value;
246 },
247
248 Some((pos, byte)) => return Err(ParseError::InvalidToken {
249 pos: pos,
250 byte: byte,
251 }),
252 }
253 }
254 }
255
256 if is_quoted {
257 'ws: loop {
258 match iter.next() {
259 Some((i, b';')) => {
260 // next param
261 start = i + 1;
262 break 'ws;
263 },
264 Some((_, b' ')) => {
265 // skip whitespace
266 },
267 None => {
268 // eof
269 start = s.len();
270 break 'ws;
271 },
272 Some((pos, byte)) => return Err(ParseError::InvalidToken {
273 pos: pos,
274 byte: byte,
275 }),
276 }
277 }
278 }
279
280 match params {
281 ParamSource::Utf8(i) => {
282 let i = i + 2;
283 let charset = Indexed(i, "charset".len() + i);
284 let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8".len() + 1);
285 params = ParamSource::Custom(semicolon, vec![
286 (charset, utf8),
287 (name, value),
288 ]);
289 },
290 ParamSource::Custom(_, ref mut vec) => {
291 vec.push((name, value));
292 },
293 ParamSource::None => {
294 if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] {
295 if UTF_8 == &s[value.0..value.1] {
296 params = ParamSource::Utf8(semicolon);
297 continue 'params;
298 }
299 }
300 params = ParamSource::Custom(semicolon, vec![(name, value)]);
301 },
302 }
303 }
304 Ok(params)
305}
306
307fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String {
308 let mut owned: String = s.to_owned();
309 owned[..semi].make_ascii_lowercase();
310
311 for &(ref name: &Indexed, ref value: &Indexed) in params {
312 owned[name.0..name.1].make_ascii_lowercase();
313 // Since we just converted this part of the string to lowercase,
314 // we can skip the `Name == &str` unicase check and do a faster
315 // memcmp instead.
316 if &owned[name.0..name.1] == CHARSET.source {
317 owned[value.0..value.1].make_ascii_lowercase();
318 }
319 }
320
321 owned
322}
323
324// From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
325//
326// > All registered media types MUST be assigned top-level type and
327// > subtype names. The combination of these names serves to uniquely
328// > identify the media type, and the subtype name facet (or the absence
329// > of one) identifies the registration tree. Both top-level type and
330// > subtype names are case-insensitive.
331// >
332// > Type and subtype names MUST conform to the following ABNF:
333// >
334// > type-name = restricted-name
335// > subtype-name = restricted-name
336// >
337// > restricted-name = restricted-name-first *126restricted-name-chars
338// > restricted-name-first = ALPHA / DIGIT
339// > restricted-name-chars = ALPHA / DIGIT / "!" / "#" /
340// > "$" / "&" / "-" / "^" / "_"
341// > restricted-name-chars =/ "." ; Characters before first dot always
342// > ; specify a facet name
343// > restricted-name-chars =/ "+" ; Characters after last plus always
344// > ; specify a structured syntax suffix
345
346// However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1):
347//
348// > media-type = type "/" subtype *( OWS ";" OWS parameter )
349// > type = token
350// > subtype = token
351// > parameter = token "=" ( token / quoted-string )
352//
353// Where token is defined as:
354//
355// > token = 1*tchar
356// > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
357// > "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
358//
359// So, clearly, ¯\_(Ä_/¯
360
361macro_rules! byte_map {
362 ($($flag:expr,)*) => ([
363 $($flag != 0,)*
364 ])
365}
366
367static TOKEN_MAP: [bool; 256] = byte_map![
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
369 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
370 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
371 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
372 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
373 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
374 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
375 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384];
385
386fn is_token(c: u8) -> bool {
387 TOKEN_MAP[c as usize]
388}
389
390fn is_restricted_quoted_char(c: u8) -> bool {
391 c > 31 && c != 127
392}
393
394#[test]
395#[allow(warnings)] // ... ranges deprecated
396fn test_lookup_tables() {
397 for (i, &valid) in TOKEN_MAP.iter().enumerate() {
398 let i = i as u8;
399 let should = match i {
400 b'a'...b'z' |
401 b'A'...b'Z' |
402 b'0'...b'9' |
403 b'!' |
404 b'#' |
405 b'$' |
406 b'%' |
407 b'&' |
408 b'\'' |
409 b'*' |
410 b'+' |
411 b'-' |
412 b'.' |
413 b'^' |
414 b'_' |
415 b'`' |
416 b'|' |
417 b'~' => true,
418 _ => false
419 };
420 assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
421 }
422}
423
424#[test]
425fn test_parse_iterator() {
426 let mut iter: MimeIter<'_> = MimeIter::new("application/json, application/json");
427 assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
428 assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
429 assert_eq!(iter.next(), None);
430
431 let mut iter: MimeIter<'_> = MimeIter::new("application/json");
432 assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
433 assert_eq!(iter.next(), None);
434
435 let mut iter: MimeIter<'_> = MimeIter::new("application/json; ");
436 assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
437 assert_eq!(iter.next(), None);
438}
439
440#[test]
441fn test_parse_iterator_invalid() {
442 let mut iter: MimeIter<'_> = MimeIter::new("application/json, invalid, application/json");
443 assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
444 assert_eq!(iter.next().unwrap().unwrap_err(), "invalid");
445 assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
446 assert_eq!(iter.next(), None);
447}
448
449#[test]
450fn test_parse_iterator_all_invalid() {
451 let mut iter: MimeIter<'_> = MimeIter::new("application/json, text/html");
452 assert_eq!(iter.next().unwrap().unwrap_err(), "application/json");
453 assert_eq!(iter.next(), None);
454}
455