| 1 | #[allow (unused, deprecated)] |
| 2 | use std::ascii::AsciiExt; |
| 3 | use std::error::Error; |
| 4 | use std::fmt; |
| 5 | use std::iter::Enumerate; |
| 6 | use std::str::Bytes; |
| 7 | |
| 8 | use super::{Mime, MimeIter, Source, ParamSource, Indexed, CHARSET, UTF_8}; |
| 9 | |
| 10 | #[derive (Debug)] |
| 11 | pub enum ParseError { |
| 12 | MissingSlash, |
| 13 | MissingEqual, |
| 14 | MissingQuote, |
| 15 | InvalidToken { |
| 16 | pos: usize, |
| 17 | byte: u8, |
| 18 | }, |
| 19 | } |
| 20 | |
| 21 | impl ParseError { |
| 22 | fn s(&self) -> &str { |
| 23 | use self::ParseError::*; |
| 24 | |
| 25 | match *self { |
| 26 | MissingSlash => "a slash (/) was missing between the type and subtype" , |
| 27 | MissingEqual => "an equals sign (=) was missing between a parameter and its value" , |
| 28 | MissingQuote => "a quote ( \") was missing from a parameter value" , |
| 29 | InvalidToken { .. } => "an invalid token was encountered" , |
| 30 | } |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | impl fmt::Display for ParseError { |
| 35 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 36 | if let ParseError::InvalidToken { pos: usize, byte: u8 } = *self { |
| 37 | write!(f, " {}, {:X} at position {}" , self.s(), byte, pos) |
| 38 | } else { |
| 39 | f.write_str(self.s()) |
| 40 | } |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | impl Error for ParseError { |
| 45 | // Minimum Rust is 1.15, Error::description was still required then |
| 46 | #[allow (deprecated)] |
| 47 | fn description(&self) -> &str { |
| 48 | self.s() |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | impl<'a> MimeIter<'a> { |
| 53 | /// A new iterator over mimes or media types |
| 54 | pub fn new(s: &'a str) -> Self { |
| 55 | Self { |
| 56 | pos: 0, |
| 57 | source: s, |
| 58 | } |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | impl<'a> Iterator for MimeIter<'a> { |
| 63 | type Item = Result<Mime, &'a str>; |
| 64 | |
| 65 | fn next(&mut self) -> Option<Self::Item> { |
| 66 | let start = self.pos; |
| 67 | let len = self.source.bytes().len(); |
| 68 | |
| 69 | if start >= len { |
| 70 | return None |
| 71 | } |
| 72 | |
| 73 | // Try parsing the whole remaining slice, until the end |
| 74 | match parse(&self.source[start ..len]) { |
| 75 | Ok(value) => { |
| 76 | self.pos = len; |
| 77 | Some(Ok(value)) |
| 78 | } |
| 79 | Err(ParseError::InvalidToken { pos, .. }) => { |
| 80 | // The first token is immediately found to be wrong by `parse`. Skip it |
| 81 | if pos == 0 { |
| 82 | self.pos += 1; |
| 83 | return self.next() |
| 84 | } |
| 85 | let slice = &self.source[start .. start + pos]; |
| 86 | // Try parsing the longest slice (until the first invalid token) |
| 87 | return match parse(slice) { |
| 88 | Ok(mime) => { |
| 89 | self.pos = start + pos + 1; |
| 90 | Some(Ok(mime)) |
| 91 | } |
| 92 | Err(_) => { |
| 93 | if start + pos < len { |
| 94 | // Skip this invalid slice, |
| 95 | // try parsing the remaining slice in the next iteration |
| 96 | self.pos = start + pos; |
| 97 | Some(Err(slice)) |
| 98 | } else { |
| 99 | None |
| 100 | } |
| 101 | } |
| 102 | } |
| 103 | } |
| 104 | // Do not process any other error condition: the slice is malformed and |
| 105 | // no character is found to be invalid: a character is missing |
| 106 | Err(_) => None, |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | pub fn parse(s: &str) -> Result<Mime, ParseError> { |
| 112 | if s == "*/*" { |
| 113 | return Ok(::STAR_STAR); |
| 114 | } |
| 115 | |
| 116 | let mut iter = s.bytes().enumerate(); |
| 117 | // toplevel |
| 118 | let mut start; |
| 119 | let slash; |
| 120 | loop { |
| 121 | match iter.next() { |
| 122 | Some((_, c)) if is_token(c) => (), |
| 123 | Some((i, b'/' )) if i > 0 => { |
| 124 | slash = i; |
| 125 | start = i + 1; |
| 126 | break; |
| 127 | }, |
| 128 | None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime |
| 129 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| 130 | pos: pos, |
| 131 | byte: byte, |
| 132 | }) |
| 133 | }; |
| 134 | |
| 135 | } |
| 136 | |
| 137 | // sublevel |
| 138 | let mut plus = None; |
| 139 | loop { |
| 140 | match iter.next() { |
| 141 | Some((i, b'+' )) if i > start => { |
| 142 | plus = Some(i); |
| 143 | }, |
| 144 | Some((i, b';' )) if i > start => { |
| 145 | start = i; |
| 146 | break; |
| 147 | }, |
| 148 | Some((_, c)) if is_token(c) => (), |
| 149 | None => { |
| 150 | return Ok(Mime { |
| 151 | source: Source::Dynamic(s.to_ascii_lowercase()), |
| 152 | slash: slash, |
| 153 | plus: plus, |
| 154 | params: ParamSource::None, |
| 155 | }); |
| 156 | }, |
| 157 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| 158 | pos: pos, |
| 159 | byte: byte, |
| 160 | }) |
| 161 | }; |
| 162 | } |
| 163 | |
| 164 | // params |
| 165 | let params = params_from_str(s, &mut iter, start)?; |
| 166 | |
| 167 | let src = match params { |
| 168 | ParamSource::Utf8(_) => s.to_ascii_lowercase(), |
| 169 | ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices), |
| 170 | ParamSource::None => { |
| 171 | // Chop off the empty list |
| 172 | s[..start].to_ascii_lowercase() |
| 173 | } |
| 174 | }; |
| 175 | |
| 176 | Ok(Mime { |
| 177 | source: Source::Dynamic(src), |
| 178 | slash: slash, |
| 179 | plus: plus, |
| 180 | params: params, |
| 181 | }) |
| 182 | } |
| 183 | |
| 184 | |
| 185 | fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> { |
| 186 | let semicolon = start; |
| 187 | start += 1; |
| 188 | let mut params = ParamSource::None; |
| 189 | 'params: while start < s.len() { |
| 190 | let name; |
| 191 | // name |
| 192 | 'name: loop { |
| 193 | match iter.next() { |
| 194 | Some((i, b' ' )) if i == start => { |
| 195 | start = i + 1; |
| 196 | continue 'params; |
| 197 | }, |
| 198 | Some((_, c)) if is_token(c) => (), |
| 199 | Some((i, b'=' )) if i > start => { |
| 200 | name = Indexed(start, i); |
| 201 | start = i + 1; |
| 202 | break 'name; |
| 203 | }, |
| 204 | None => return Err(ParseError::MissingEqual), |
| 205 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| 206 | pos: pos, |
| 207 | byte: byte, |
| 208 | }), |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | let value; |
| 213 | // values must be restrict-name-char or "anything goes" |
| 214 | let mut is_quoted = false; |
| 215 | |
| 216 | 'value: loop { |
| 217 | if is_quoted { |
| 218 | match iter.next() { |
| 219 | Some((i, b'"' )) if i > start => { |
| 220 | value = Indexed(start, i); |
| 221 | break 'value; |
| 222 | }, |
| 223 | Some((_, c)) if is_restricted_quoted_char(c) => (), |
| 224 | None => return Err(ParseError::MissingQuote), |
| 225 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| 226 | pos: pos, |
| 227 | byte: byte, |
| 228 | }), |
| 229 | } |
| 230 | } else { |
| 231 | match iter.next() { |
| 232 | Some((i, b'"' )) if i == start => { |
| 233 | is_quoted = true; |
| 234 | start = i + 1; |
| 235 | }, |
| 236 | Some((_, c)) if is_token(c) => (), |
| 237 | Some((i, b';' )) if i > start => { |
| 238 | value = Indexed(start, i); |
| 239 | start = i + 1; |
| 240 | break 'value; |
| 241 | } |
| 242 | None => { |
| 243 | value = Indexed(start, s.len()); |
| 244 | start = s.len(); |
| 245 | break 'value; |
| 246 | }, |
| 247 | |
| 248 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| 249 | pos: pos, |
| 250 | byte: byte, |
| 251 | }), |
| 252 | } |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | if is_quoted { |
| 257 | 'ws: loop { |
| 258 | match iter.next() { |
| 259 | Some((i, b';' )) => { |
| 260 | // next param |
| 261 | start = i + 1; |
| 262 | break 'ws; |
| 263 | }, |
| 264 | Some((_, b' ' )) => { |
| 265 | // skip whitespace |
| 266 | }, |
| 267 | None => { |
| 268 | // eof |
| 269 | start = s.len(); |
| 270 | break 'ws; |
| 271 | }, |
| 272 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| 273 | pos: pos, |
| 274 | byte: byte, |
| 275 | }), |
| 276 | } |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | match params { |
| 281 | ParamSource::Utf8(i) => { |
| 282 | let i = i + 2; |
| 283 | let charset = Indexed(i, "charset" .len() + i); |
| 284 | let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8" .len() + 1); |
| 285 | params = ParamSource::Custom(semicolon, vec![ |
| 286 | (charset, utf8), |
| 287 | (name, value), |
| 288 | ]); |
| 289 | }, |
| 290 | ParamSource::Custom(_, ref mut vec) => { |
| 291 | vec.push((name, value)); |
| 292 | }, |
| 293 | ParamSource::None => { |
| 294 | if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] { |
| 295 | if UTF_8 == &s[value.0..value.1] { |
| 296 | params = ParamSource::Utf8(semicolon); |
| 297 | continue 'params; |
| 298 | } |
| 299 | } |
| 300 | params = ParamSource::Custom(semicolon, vec![(name, value)]); |
| 301 | }, |
| 302 | } |
| 303 | } |
| 304 | Ok(params) |
| 305 | } |
| 306 | |
| 307 | fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String { |
| 308 | let mut owned: String = s.to_owned(); |
| 309 | owned[..semi].make_ascii_lowercase(); |
| 310 | |
| 311 | for &(ref name: &Indexed, ref value: &Indexed) in params { |
| 312 | owned[name.0..name.1].make_ascii_lowercase(); |
| 313 | // Since we just converted this part of the string to lowercase, |
| 314 | // we can skip the `Name == &str` unicase check and do a faster |
| 315 | // memcmp instead. |
| 316 | if &owned[name.0..name.1] == CHARSET.source { |
| 317 | owned[value.0..value.1].make_ascii_lowercase(); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | owned |
| 322 | } |
| 323 | |
| 324 | // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2): |
| 325 | // |
| 326 | // > All registered media types MUST be assigned top-level type and |
| 327 | // > subtype names. The combination of these names serves to uniquely |
| 328 | // > identify the media type, and the subtype name facet (or the absence |
| 329 | // > of one) identifies the registration tree. Both top-level type and |
| 330 | // > subtype names are case-insensitive. |
| 331 | // > |
| 332 | // > Type and subtype names MUST conform to the following ABNF: |
| 333 | // > |
| 334 | // > type-name = restricted-name |
| 335 | // > subtype-name = restricted-name |
| 336 | // > |
| 337 | // > restricted-name = restricted-name-first *126restricted-name-chars |
| 338 | // > restricted-name-first = ALPHA / DIGIT |
| 339 | // > restricted-name-chars = ALPHA / DIGIT / "!" / "#" / |
| 340 | // > "$" / "&" / "-" / "^" / "_" |
| 341 | // > restricted-name-chars =/ "." ; Characters before first dot always |
| 342 | // > ; specify a facet name |
| 343 | // > restricted-name-chars =/ "+" ; Characters after last plus always |
| 344 | // > ; specify a structured syntax suffix |
| 345 | |
| 346 | // However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1): |
| 347 | // |
| 348 | // > media-type = type "/" subtype *( OWS ";" OWS parameter ) |
| 349 | // > type = token |
| 350 | // > subtype = token |
| 351 | // > parameter = token "=" ( token / quoted-string ) |
| 352 | // |
| 353 | // Where token is defined as: |
| 354 | // |
| 355 | // > token = 1*tchar |
| 356 | // > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / |
| 357 | // > "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA |
| 358 | // |
| 359 | // So, clearly, ¯\_(Ä_/¯ |
| 360 | |
| 361 | macro_rules! byte_map { |
| 362 | ($($flag:expr,)*) => ([ |
| 363 | $($flag != 0,)* |
| 364 | ]) |
| 365 | } |
| 366 | |
| 367 | static TOKEN_MAP: [bool; 256] = byte_map![ |
| 368 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 369 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 370 | 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, |
| 371 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, |
| 372 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 373 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, |
| 374 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 375 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, |
| 376 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 377 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 378 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 379 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 380 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 381 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 382 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 383 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 384 | ]; |
| 385 | |
| 386 | fn is_token(c: u8) -> bool { |
| 387 | TOKEN_MAP[c as usize] |
| 388 | } |
| 389 | |
| 390 | fn is_restricted_quoted_char(c: u8) -> bool { |
| 391 | c > 31 && c != 127 |
| 392 | } |
| 393 | |
| 394 | #[test ] |
| 395 | #[allow (warnings)] // ... ranges deprecated |
| 396 | fn test_lookup_tables() { |
| 397 | for (i, &valid) in TOKEN_MAP.iter().enumerate() { |
| 398 | let i = i as u8; |
| 399 | let should = match i { |
| 400 | b'a' ...b'z' | |
| 401 | b'A' ...b'Z' | |
| 402 | b'0' ...b'9' | |
| 403 | b'!' | |
| 404 | b'#' | |
| 405 | b'$' | |
| 406 | b'%' | |
| 407 | b'&' | |
| 408 | b' \'' | |
| 409 | b'*' | |
| 410 | b'+' | |
| 411 | b'-' | |
| 412 | b'.' | |
| 413 | b'^' | |
| 414 | b'_' | |
| 415 | b'`' | |
| 416 | b'|' | |
| 417 | b'~' => true, |
| 418 | _ => false |
| 419 | }; |
| 420 | assert_eq!(valid, should, "{:?} ({}) should be {}" , i as char, i, should); |
| 421 | } |
| 422 | } |
| 423 | |
| 424 | #[test ] |
| 425 | fn test_parse_iterator() { |
| 426 | let mut iter = MimeIter::new("application/json, application/json" ); |
| 427 | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json" ).unwrap()); |
| 428 | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json" ).unwrap()); |
| 429 | assert_eq!(iter.next(), None); |
| 430 | |
| 431 | let mut iter = MimeIter::new("application/json" ); |
| 432 | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json" ).unwrap()); |
| 433 | assert_eq!(iter.next(), None); |
| 434 | |
| 435 | let mut iter = MimeIter::new("application/json; " ); |
| 436 | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json" ).unwrap()); |
| 437 | assert_eq!(iter.next(), None); |
| 438 | } |
| 439 | |
| 440 | #[test ] |
| 441 | fn test_parse_iterator_invalid() { |
| 442 | let mut iter = MimeIter::new("application/json, invalid, application/json" ); |
| 443 | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json" ).unwrap()); |
| 444 | assert_eq!(iter.next().unwrap().unwrap_err(), "invalid" ); |
| 445 | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json" ).unwrap()); |
| 446 | assert_eq!(iter.next(), None); |
| 447 | } |
| 448 | |
| 449 | #[test ] |
| 450 | fn test_parse_iterator_all_invalid() { |
| 451 | let mut iter = MimeIter::new("application/json, text/html" ); |
| 452 | assert_eq!(iter.next().unwrap().unwrap_err(), "application/json" ); |
| 453 | assert_eq!(iter.next(), None); |
| 454 | } |
| 455 | |