| 1 | use std::convert::TryFrom; |
| 2 | use std::fmt; |
| 3 | use std::hash::{Hash, Hasher}; |
| 4 | use std::str::FromStr; |
| 5 | |
| 6 | use bytes::Bytes; |
| 7 | |
| 8 | use super::{ErrorKind, InvalidUri}; |
| 9 | use crate::byte_str::ByteStr; |
| 10 | |
| 11 | /// Represents the scheme component of a URI |
| 12 | #[derive (Clone)] |
| 13 | pub struct Scheme { |
| 14 | pub(super) inner: Scheme2, |
| 15 | } |
| 16 | |
| 17 | #[derive (Clone, Debug)] |
| 18 | pub(super) enum Scheme2<T = Box<ByteStr>> { |
| 19 | None, |
| 20 | Standard(Protocol), |
| 21 | Other(T), |
| 22 | } |
| 23 | |
| 24 | #[derive (Copy, Clone, Debug)] |
| 25 | pub(super) enum Protocol { |
| 26 | Http, |
| 27 | Https, |
| 28 | } |
| 29 | |
| 30 | impl Scheme { |
| 31 | /// HTTP protocol scheme |
| 32 | pub const HTTP: Scheme = Scheme { |
| 33 | inner: Scheme2::Standard(Protocol::Http), |
| 34 | }; |
| 35 | |
| 36 | /// HTTP protocol over TLS. |
| 37 | pub const HTTPS: Scheme = Scheme { |
| 38 | inner: Scheme2::Standard(Protocol::Https), |
| 39 | }; |
| 40 | |
| 41 | pub(super) fn empty() -> Self { |
| 42 | Scheme { |
| 43 | inner: Scheme2::None, |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | /// Return a str representation of the scheme |
| 48 | /// |
| 49 | /// # Examples |
| 50 | /// |
| 51 | /// ``` |
| 52 | /// # use http::uri::*; |
| 53 | /// let scheme: Scheme = "http" .parse().unwrap(); |
| 54 | /// assert_eq!(scheme.as_str(), "http" ); |
| 55 | /// ``` |
| 56 | #[inline ] |
| 57 | pub fn as_str(&self) -> &str { |
| 58 | use self::Protocol::*; |
| 59 | use self::Scheme2::*; |
| 60 | |
| 61 | match self.inner { |
| 62 | Standard(Http) => "http" , |
| 63 | Standard(Https) => "https" , |
| 64 | Other(ref v) => &v[..], |
| 65 | None => unreachable!(), |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | impl<'a> TryFrom<&'a [u8]> for Scheme { |
| 71 | type Error = InvalidUri; |
| 72 | #[inline ] |
| 73 | fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> { |
| 74 | use self::Scheme2::*; |
| 75 | |
| 76 | match Scheme2::parse_exact(s)? { |
| 77 | None => Err(ErrorKind::InvalidScheme.into()), |
| 78 | Standard(p: Protocol) => Ok(Standard(p).into()), |
| 79 | Other(_) => { |
| 80 | let bytes: Bytes = Bytes::copy_from_slice(data:s); |
| 81 | |
| 82 | // Safety: postcondition on parse_exact() means that s and |
| 83 | // hence bytes are valid UTF-8. |
| 84 | let string: ByteStr = unsafe { ByteStr::from_utf8_unchecked(bytes) }; |
| 85 | |
| 86 | Ok(Other(Box::new(string)).into()) |
| 87 | } |
| 88 | } |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | impl<'a> TryFrom<&'a str> for Scheme { |
| 93 | type Error = InvalidUri; |
| 94 | #[inline ] |
| 95 | fn try_from(s: &'a str) -> Result<Self, Self::Error> { |
| 96 | TryFrom::try_from(s.as_bytes()) |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | impl FromStr for Scheme { |
| 101 | type Err = InvalidUri; |
| 102 | |
| 103 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
| 104 | TryFrom::try_from(s) |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | impl fmt::Debug for Scheme { |
| 109 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 110 | fmt::Debug::fmt(self.as_str(), f) |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | impl fmt::Display for Scheme { |
| 115 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 116 | f.write_str(self.as_str()) |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | impl AsRef<str> for Scheme { |
| 121 | #[inline ] |
| 122 | fn as_ref(&self) -> &str { |
| 123 | self.as_str() |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | impl PartialEq for Scheme { |
| 128 | fn eq(&self, other: &Scheme) -> bool { |
| 129 | use self::Protocol::*; |
| 130 | use self::Scheme2::*; |
| 131 | |
| 132 | match (&self.inner, &other.inner) { |
| 133 | (&Standard(Http), &Standard(Http)) => true, |
| 134 | (&Standard(Https), &Standard(Https)) => true, |
| 135 | (Other(a: &Box), Other(b: &Box)) => a.eq_ignore_ascii_case(b), |
| 136 | (&None, _) | (_, &None) => unreachable!(), |
| 137 | _ => false, |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | impl Eq for Scheme {} |
| 143 | |
| 144 | /// Case-insensitive equality |
| 145 | /// |
| 146 | /// # Examples |
| 147 | /// |
| 148 | /// ``` |
| 149 | /// # use http::uri::Scheme; |
| 150 | /// let scheme: Scheme = "HTTP" .parse().unwrap(); |
| 151 | /// assert_eq!(scheme, *"http" ); |
| 152 | /// ``` |
| 153 | impl PartialEq<str> for Scheme { |
| 154 | fn eq(&self, other: &str) -> bool { |
| 155 | self.as_str().eq_ignore_ascii_case(other) |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | /// Case-insensitive equality |
| 160 | impl PartialEq<Scheme> for str { |
| 161 | fn eq(&self, other: &Scheme) -> bool { |
| 162 | other == self |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | /// Case-insensitive hashing |
| 167 | impl Hash for Scheme { |
| 168 | fn hash<H>(&self, state: &mut H) |
| 169 | where |
| 170 | H: Hasher, |
| 171 | { |
| 172 | match self.inner { |
| 173 | Scheme2::None => (), |
| 174 | Scheme2::Standard(Protocol::Http) => state.write_u8(1), |
| 175 | Scheme2::Standard(Protocol::Https) => state.write_u8(2), |
| 176 | Scheme2::Other(ref other: &Box) => { |
| 177 | other.len().hash(state); |
| 178 | for &b: u8 in other.as_bytes() { |
| 179 | state.write_u8(b.to_ascii_lowercase()); |
| 180 | } |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | impl<T> Scheme2<T> { |
| 187 | pub(super) fn is_none(&self) -> bool { |
| 188 | matches!(*self, Scheme2::None) |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | // Require the scheme to not be too long in order to enable further |
| 193 | // optimizations later. |
| 194 | const MAX_SCHEME_LEN: usize = 64; |
| 195 | |
| 196 | // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) |
| 197 | // |
| 198 | // SCHEME_CHARS is a table of valid characters in the scheme part of a URI. An |
| 199 | // entry in the table is 0 for invalid characters. For valid characters the |
| 200 | // entry is itself (i.e. the entry for 43 is b'+' because b'+' == 43u8). An |
| 201 | // important characteristic of this table is that all entries above 127 are |
| 202 | // invalid. This makes all of the valid entries a valid single-byte UTF-8 code |
| 203 | // point. This means that a slice of such valid entries is valid UTF-8. |
| 204 | #[rustfmt::skip] |
| 205 | const SCHEME_CHARS: [u8; 256] = [ |
| 206 | // 0 1 2 3 4 5 6 7 8 9 |
| 207 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x |
| 208 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x |
| 209 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x |
| 210 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x |
| 211 | 0, 0, 0, b'+' , 0, b'-' , b'.' , 0, b'0' , b'1' , // 4x |
| 212 | b'2' , b'3' , b'4' , b'5' , b'6' , b'7' , b'8' , b'9' , b':' , 0, // 5x |
| 213 | 0, 0, 0, 0, 0, b'A' , b'B' , b'C' , b'D' , b'E' , // 6x |
| 214 | b'F' , b'G' , b'H' , b'I' , b'J' , b'K' , b'L' , b'M' , b'N' , b'O' , // 7x |
| 215 | b'P' , b'Q' , b'R' , b'S' , b'T' , b'U' , b'V' , b'W' , b'X' , b'Y' , // 8x |
| 216 | b'Z' , 0, 0, 0, 0, 0, 0, b'a' , b'b' , b'c' , // 9x |
| 217 | b'd' , b'e' , b'f' , b'g' , b'h' , b'i' , b'j' , b'k' , b'l' , b'm' , // 10x |
| 218 | b'n' , b'o' , b'p' , b'q' , b'r' , b's' , b't' , b'u' , b'v' , b'w' , // 11x |
| 219 | b'x' , b'y' , b'z' , 0, 0, 0, b'~' , 0, 0, 0, // 12x |
| 220 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x |
| 221 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x |
| 222 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x |
| 223 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x |
| 224 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x |
| 225 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x |
| 226 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x |
| 227 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x |
| 228 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x |
| 229 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x |
| 230 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x |
| 231 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x |
| 232 | 0, 0, 0, 0, 0, 0 // 25x |
| 233 | ]; |
| 234 | |
| 235 | impl Scheme2<usize> { |
| 236 | // Postcondition: On all Ok() returns, s is valid UTF-8 |
| 237 | fn parse_exact(s: &[u8]) -> Result<Scheme2<()>, InvalidUri> { |
| 238 | match s { |
| 239 | b"http" => Ok(Protocol::Http.into()), |
| 240 | b"https" => Ok(Protocol::Https.into()), |
| 241 | _ => { |
| 242 | if s.len() > MAX_SCHEME_LEN { |
| 243 | return Err(ErrorKind::SchemeTooLong.into()); |
| 244 | } |
| 245 | |
| 246 | // check that each byte in s is a SCHEME_CHARS which implies |
| 247 | // that it is a valid single byte UTF-8 code point. |
| 248 | for &b in s { |
| 249 | match SCHEME_CHARS[b as usize] { |
| 250 | b':' => { |
| 251 | // Don't want :// here |
| 252 | return Err(ErrorKind::InvalidScheme.into()); |
| 253 | } |
| 254 | 0 => { |
| 255 | return Err(ErrorKind::InvalidScheme.into()); |
| 256 | } |
| 257 | _ => {} |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | Ok(Scheme2::Other(())) |
| 262 | } |
| 263 | } |
| 264 | } |
| 265 | |
| 266 | pub(super) fn parse(s: &[u8]) -> Result<Scheme2<usize>, InvalidUri> { |
| 267 | if s.len() >= 7 { |
| 268 | // Check for HTTP |
| 269 | if s[..7].eq_ignore_ascii_case(b"http://" ) { |
| 270 | // Prefix will be striped |
| 271 | return Ok(Protocol::Http.into()); |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | if s.len() >= 8 { |
| 276 | // Check for HTTPs |
| 277 | if s[..8].eq_ignore_ascii_case(b"https://" ) { |
| 278 | return Ok(Protocol::Https.into()); |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | if s.len() > 3 { |
| 283 | for i in 0..s.len() { |
| 284 | let b = s[i]; |
| 285 | |
| 286 | match SCHEME_CHARS[b as usize] { |
| 287 | b':' => { |
| 288 | // Not enough data remaining |
| 289 | if s.len() < i + 3 { |
| 290 | break; |
| 291 | } |
| 292 | |
| 293 | // Not a scheme |
| 294 | if &s[i + 1..i + 3] != b"//" { |
| 295 | break; |
| 296 | } |
| 297 | |
| 298 | if i > MAX_SCHEME_LEN { |
| 299 | return Err(ErrorKind::SchemeTooLong.into()); |
| 300 | } |
| 301 | |
| 302 | // Return scheme |
| 303 | return Ok(Scheme2::Other(i)); |
| 304 | } |
| 305 | // Invalid scheme character, abort |
| 306 | 0 => break, |
| 307 | _ => {} |
| 308 | } |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | Ok(Scheme2::None) |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | impl Protocol { |
| 317 | pub(super) fn len(&self) -> usize { |
| 318 | match *self { |
| 319 | Protocol::Http => 4, |
| 320 | Protocol::Https => 5, |
| 321 | } |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | impl<T> From<Protocol> for Scheme2<T> { |
| 326 | fn from(src: Protocol) -> Self { |
| 327 | Scheme2::Standard(src) |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | #[doc (hidden)] |
| 332 | impl From<Scheme2> for Scheme { |
| 333 | fn from(src: Scheme2) -> Self { |
| 334 | Scheme { inner: src } |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | #[cfg (test)] |
| 339 | mod test { |
| 340 | use super::*; |
| 341 | |
| 342 | #[test ] |
| 343 | fn scheme_eq_to_str() { |
| 344 | assert_eq!(&scheme("http" ), "http" ); |
| 345 | assert_eq!(&scheme("https" ), "https" ); |
| 346 | assert_eq!(&scheme("ftp" ), "ftp" ); |
| 347 | assert_eq!(&scheme("my+funky+scheme" ), "my+funky+scheme" ); |
| 348 | } |
| 349 | |
| 350 | #[test ] |
| 351 | fn invalid_scheme_is_error() { |
| 352 | Scheme::try_from("my_funky_scheme" ).expect_err("Unexpectedly valid Scheme" ); |
| 353 | |
| 354 | // Invalid UTF-8 |
| 355 | Scheme::try_from([0xC0].as_ref()).expect_err("Unexpectedly valid Scheme" ); |
| 356 | } |
| 357 | |
| 358 | fn scheme(s: &str) -> Scheme { |
| 359 | s.parse().expect(&format!("Invalid scheme: {}" , s)) |
| 360 | } |
| 361 | } |
| 362 | |