1 | use std::convert::TryFrom; |
2 | use std::fmt; |
3 | use std::hash::{Hash, Hasher}; |
4 | use std::str::FromStr; |
5 | |
6 | use bytes::Bytes; |
7 | |
8 | use super::{ErrorKind, InvalidUri}; |
9 | use crate::byte_str::ByteStr; |
10 | |
11 | /// Represents the scheme component of a URI |
12 | #[derive(Clone)] |
13 | pub struct Scheme { |
14 | pub(super) inner: Scheme2, |
15 | } |
16 | |
17 | #[derive(Clone, Debug)] |
18 | pub(super) enum Scheme2<T = Box<ByteStr>> { |
19 | None, |
20 | Standard(Protocol), |
21 | Other(T), |
22 | } |
23 | |
24 | #[derive(Copy, Clone, Debug)] |
25 | pub(super) enum Protocol { |
26 | Http, |
27 | Https, |
28 | } |
29 | |
30 | impl Scheme { |
31 | /// HTTP protocol scheme |
32 | pub const HTTP: Scheme = Scheme { |
33 | inner: Scheme2::Standard(Protocol::Http), |
34 | }; |
35 | |
36 | /// HTTP protocol over TLS. |
37 | pub const HTTPS: Scheme = Scheme { |
38 | inner: Scheme2::Standard(Protocol::Https), |
39 | }; |
40 | |
41 | pub(super) fn empty() -> Self { |
42 | Scheme { |
43 | inner: Scheme2::None, |
44 | } |
45 | } |
46 | |
47 | /// Return a str representation of the scheme |
48 | /// |
49 | /// # Examples |
50 | /// |
51 | /// ``` |
52 | /// # use http::uri::*; |
53 | /// let scheme: Scheme = "http" .parse().unwrap(); |
54 | /// assert_eq!(scheme.as_str(), "http" ); |
55 | /// ``` |
56 | #[inline ] |
57 | pub fn as_str(&self) -> &str { |
58 | use self::Protocol::*; |
59 | use self::Scheme2::*; |
60 | |
61 | match self.inner { |
62 | Standard(Http) => "http" , |
63 | Standard(Https) => "https" , |
64 | Other(ref v) => &v[..], |
65 | None => unreachable!(), |
66 | } |
67 | } |
68 | } |
69 | |
70 | impl<'a> TryFrom<&'a [u8]> for Scheme { |
71 | type Error = InvalidUri; |
72 | #[inline ] |
73 | fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> { |
74 | use self::Scheme2::*; |
75 | |
76 | match Scheme2::parse_exact(s)? { |
77 | None => Err(ErrorKind::InvalidScheme.into()), |
78 | Standard(p) => Ok(Standard(p).into()), |
79 | Other(_) => { |
80 | let bytes = Bytes::copy_from_slice(s); |
81 | |
82 | // Safety: postcondition on parse_exact() means that s and |
83 | // hence bytes are valid UTF-8. |
84 | let string = unsafe { ByteStr::from_utf8_unchecked(bytes) }; |
85 | |
86 | Ok(Other(Box::new(string)).into()) |
87 | } |
88 | } |
89 | } |
90 | } |
91 | |
92 | impl<'a> TryFrom<&'a str> for Scheme { |
93 | type Error = InvalidUri; |
94 | #[inline ] |
95 | fn try_from(s: &'a str) -> Result<Self, Self::Error> { |
96 | TryFrom::try_from(s.as_bytes()) |
97 | } |
98 | } |
99 | |
100 | impl FromStr for Scheme { |
101 | type Err = InvalidUri; |
102 | |
103 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
104 | TryFrom::try_from(s) |
105 | } |
106 | } |
107 | |
108 | impl fmt::Debug for Scheme { |
109 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
110 | fmt::Debug::fmt(self.as_str(), f) |
111 | } |
112 | } |
113 | |
114 | impl fmt::Display for Scheme { |
115 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
116 | f.write_str(self.as_str()) |
117 | } |
118 | } |
119 | |
120 | impl AsRef<str> for Scheme { |
121 | #[inline ] |
122 | fn as_ref(&self) -> &str { |
123 | self.as_str() |
124 | } |
125 | } |
126 | |
127 | impl PartialEq for Scheme { |
128 | fn eq(&self, other: &Scheme) -> bool { |
129 | use self::Protocol::*; |
130 | use self::Scheme2::*; |
131 | |
132 | match (&self.inner, &other.inner) { |
133 | (&Standard(Http), &Standard(Http)) => true, |
134 | (&Standard(Https), &Standard(Https)) => true, |
135 | (&Other(ref a), &Other(ref b)) => a.eq_ignore_ascii_case(b), |
136 | (&None, _) | (_, &None) => unreachable!(), |
137 | _ => false, |
138 | } |
139 | } |
140 | } |
141 | |
142 | impl Eq for Scheme {} |
143 | |
144 | /// Case-insensitive equality |
145 | /// |
146 | /// # Examples |
147 | /// |
148 | /// ``` |
149 | /// # use http::uri::Scheme; |
150 | /// let scheme: Scheme = "HTTP" .parse().unwrap(); |
151 | /// assert_eq!(scheme, *"http" ); |
152 | /// ``` |
153 | impl PartialEq<str> for Scheme { |
154 | fn eq(&self, other: &str) -> bool { |
155 | self.as_str().eq_ignore_ascii_case(other) |
156 | } |
157 | } |
158 | |
159 | /// Case-insensitive equality |
160 | impl PartialEq<Scheme> for str { |
161 | fn eq(&self, other: &Scheme) -> bool { |
162 | other == self |
163 | } |
164 | } |
165 | |
166 | /// Case-insensitive hashing |
167 | impl Hash for Scheme { |
168 | fn hash<H>(&self, state: &mut H) |
169 | where |
170 | H: Hasher, |
171 | { |
172 | match self.inner { |
173 | Scheme2::None => (), |
174 | Scheme2::Standard(Protocol::Http) => state.write_u8(1), |
175 | Scheme2::Standard(Protocol::Https) => state.write_u8(2), |
176 | Scheme2::Other(ref other) => { |
177 | other.len().hash(state); |
178 | for &b in other.as_bytes() { |
179 | state.write_u8(b.to_ascii_lowercase()); |
180 | } |
181 | } |
182 | } |
183 | } |
184 | } |
185 | |
186 | impl<T> Scheme2<T> { |
187 | pub(super) fn is_none(&self) -> bool { |
188 | match *self { |
189 | Scheme2::None => true, |
190 | _ => false, |
191 | } |
192 | } |
193 | } |
194 | |
195 | // Require the scheme to not be too long in order to enable further |
196 | // optimizations later. |
197 | const MAX_SCHEME_LEN: usize = 64; |
198 | |
199 | // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) |
200 | // |
201 | // SCHEME_CHARS is a table of valid characters in the scheme part of a URI. An |
202 | // entry in the table is 0 for invalid characters. For valid characters the |
203 | // entry is itself (i.e. the entry for 43 is b'+' because b'+' == 43u8). An |
204 | // important characteristic of this table is that all entries above 127 are |
205 | // invalid. This makes all of the valid entries a valid single-byte UTF-8 code |
206 | // point. This means that a slice of such valid entries is valid UTF-8. |
207 | const SCHEME_CHARS: [u8; 256] = [ |
208 | // 0 1 2 3 4 5 6 7 8 9 |
209 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x |
210 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x |
211 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x |
212 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x |
213 | 0, 0, 0, b'+' , 0, b'-' , b'.' , 0, b'0' , b'1' , // 4x |
214 | b'2' , b'3' , b'4' , b'5' , b'6' , b'7' , b'8' , b'9' , b':' , 0, // 5x |
215 | 0, 0, 0, 0, 0, b'A' , b'B' , b'C' , b'D' , b'E' , // 6x |
216 | b'F' , b'G' , b'H' , b'I' , b'J' , b'K' , b'L' , b'M' , b'N' , b'O' , // 7x |
217 | b'P' , b'Q' , b'R' , b'S' , b'T' , b'U' , b'V' , b'W' , b'X' , b'Y' , // 8x |
218 | b'Z' , 0, 0, 0, 0, 0, 0, b'a' , b'b' , b'c' , // 9x |
219 | b'd' , b'e' , b'f' , b'g' , b'h' , b'i' , b'j' , b'k' , b'l' , b'm' , // 10x |
220 | b'n' , b'o' , b'p' , b'q' , b'r' , b's' , b't' , b'u' , b'v' , b'w' , // 11x |
221 | b'x' , b'y' , b'z' , 0, 0, 0, b'~' , 0, 0, 0, // 12x |
222 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x |
223 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x |
224 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x |
225 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x |
226 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x |
227 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x |
228 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x |
229 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x |
230 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x |
231 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x |
232 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x |
233 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x |
234 | 0, 0, 0, 0, 0, 0 // 25x |
235 | ]; |
236 | |
237 | impl Scheme2<usize> { |
238 | // Postcondition: On all Ok() returns, s is valid UTF-8 |
239 | fn parse_exact(s: &[u8]) -> Result<Scheme2<()>, InvalidUri> { |
240 | match s { |
241 | b"http" => Ok(Protocol::Http.into()), |
242 | b"https" => Ok(Protocol::Https.into()), |
243 | _ => { |
244 | if s.len() > MAX_SCHEME_LEN { |
245 | return Err(ErrorKind::SchemeTooLong.into()); |
246 | } |
247 | |
248 | // check that each byte in s is a SCHEME_CHARS which implies |
249 | // that it is a valid single byte UTF-8 code point. |
250 | for &b in s { |
251 | match SCHEME_CHARS[b as usize] { |
252 | b':' => { |
253 | // Don't want :// here |
254 | return Err(ErrorKind::InvalidScheme.into()); |
255 | } |
256 | 0 => { |
257 | return Err(ErrorKind::InvalidScheme.into()); |
258 | } |
259 | _ => {} |
260 | } |
261 | } |
262 | |
263 | Ok(Scheme2::Other(())) |
264 | } |
265 | } |
266 | } |
267 | |
268 | pub(super) fn parse(s: &[u8]) -> Result<Scheme2<usize>, InvalidUri> { |
269 | if s.len() >= 7 { |
270 | // Check for HTTP |
271 | if s[..7].eq_ignore_ascii_case(b"http://" ) { |
272 | // Prefix will be striped |
273 | return Ok(Protocol::Http.into()); |
274 | } |
275 | } |
276 | |
277 | if s.len() >= 8 { |
278 | // Check for HTTPs |
279 | if s[..8].eq_ignore_ascii_case(b"https://" ) { |
280 | return Ok(Protocol::Https.into()); |
281 | } |
282 | } |
283 | |
284 | if s.len() > 3 { |
285 | for i in 0..s.len() { |
286 | let b = s[i]; |
287 | |
288 | match SCHEME_CHARS[b as usize] { |
289 | b':' => { |
290 | // Not enough data remaining |
291 | if s.len() < i + 3 { |
292 | break; |
293 | } |
294 | |
295 | // Not a scheme |
296 | if &s[i + 1..i + 3] != b"//" { |
297 | break; |
298 | } |
299 | |
300 | if i > MAX_SCHEME_LEN { |
301 | return Err(ErrorKind::SchemeTooLong.into()); |
302 | } |
303 | |
304 | // Return scheme |
305 | return Ok(Scheme2::Other(i)); |
306 | } |
307 | // Invald scheme character, abort |
308 | 0 => break, |
309 | _ => {} |
310 | } |
311 | } |
312 | } |
313 | |
314 | Ok(Scheme2::None) |
315 | } |
316 | } |
317 | |
318 | impl Protocol { |
319 | pub(super) fn len(&self) -> usize { |
320 | match *self { |
321 | Protocol::Http => 4, |
322 | Protocol::Https => 5, |
323 | } |
324 | } |
325 | } |
326 | |
327 | impl<T> From<Protocol> for Scheme2<T> { |
328 | fn from(src: Protocol) -> Self { |
329 | Scheme2::Standard(src) |
330 | } |
331 | } |
332 | |
333 | #[doc (hidden)] |
334 | impl From<Scheme2> for Scheme { |
335 | fn from(src: Scheme2) -> Self { |
336 | Scheme { inner: src } |
337 | } |
338 | } |
339 | |
340 | #[cfg (test)] |
341 | mod test { |
342 | use super::*; |
343 | |
344 | #[test] |
345 | fn scheme_eq_to_str() { |
346 | assert_eq!(&scheme("http" ), "http" ); |
347 | assert_eq!(&scheme("https" ), "https" ); |
348 | assert_eq!(&scheme("ftp" ), "ftp" ); |
349 | assert_eq!(&scheme("my+funky+scheme" ), "my+funky+scheme" ); |
350 | } |
351 | |
352 | #[test] |
353 | fn invalid_scheme_is_error() { |
354 | Scheme::try_from("my_funky_scheme" ).expect_err("Unexpectly valid Scheme" ); |
355 | |
356 | // Invalid UTF-8 |
357 | Scheme::try_from([0xC0].as_ref()).expect_err("Unexpectly valid Scheme" ); |
358 | } |
359 | |
360 | fn scheme(s: &str) -> Scheme { |
361 | s.parse().expect(&format!("Invalid scheme: {}" , s)) |
362 | } |
363 | } |
364 | |