1 | use crate::error::{Error, ErrorKind}; |
2 | use std::fmt; |
3 | use std::str::{from_utf8, FromStr}; |
4 | |
5 | /// Since a status line or header can contain non-utf8 characters the |
6 | /// backing store is a `Vec<u8>` |
7 | #[derive (Debug, Clone, PartialEq, Eq)] |
8 | pub(crate) struct HeaderLine(Vec<u8>); |
9 | |
10 | impl From<String> for HeaderLine { |
11 | fn from(s: String) -> Self { |
12 | HeaderLine(s.into_bytes()) |
13 | } |
14 | } |
15 | |
16 | impl From<Vec<u8>> for HeaderLine { |
17 | fn from(b: Vec<u8>) -> Self { |
18 | HeaderLine(b) |
19 | } |
20 | } |
21 | |
22 | impl HeaderLine { |
23 | pub fn into_string_lossy(self) -> String { |
24 | // Try to avoid an extra allcation. |
25 | String::from_utf8(self.0) |
26 | .unwrap_or_else(|e| String::from_utf8_lossy(&e.into_bytes()).to_string()) |
27 | } |
28 | |
29 | pub fn is_empty(&self) -> bool { |
30 | self.0.is_empty() |
31 | } |
32 | |
33 | fn as_bytes(&self) -> &[u8] { |
34 | &self.0 |
35 | } |
36 | |
37 | pub fn into_header(self) -> Result<Header, Error> { |
38 | // The header name should always be ascii, we can read anything up to the |
39 | // ':' delimiter byte-by-byte. |
40 | let mut index = 0; |
41 | |
42 | for c in self.as_bytes() { |
43 | if *c == b':' { |
44 | break; |
45 | } |
46 | if !is_tchar(c) { |
47 | return Err(Error::new( |
48 | ErrorKind::BadHeader, |
49 | Some(format!("Invalid char ( {:0x?}) while looking for ':'" , *c)), |
50 | )); |
51 | } |
52 | index += 1; |
53 | } |
54 | |
55 | Ok(Header { line: self, index }) |
56 | } |
57 | } |
58 | |
59 | impl fmt::Display for HeaderLine { |
60 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
61 | write!(f, " {}" , String::from_utf8_lossy(&self.0)) |
62 | } |
63 | } |
64 | |
65 | #[derive (Clone, PartialEq, Eq)] |
66 | /// Wrapper type for a header field. |
67 | /// <https://tools.ietf.org/html/rfc7230#section-3.2> |
68 | pub(crate) struct Header { |
69 | // Line contains the unmodified bytes of single header field. |
70 | // It does not contain the final CRLF. |
71 | line: HeaderLine, |
72 | // Index is the position of the colon within the header field. |
73 | // Invariant: index > 0 |
74 | // Invariant: index + 1 < line.len() |
75 | index: usize, |
76 | } |
77 | |
78 | impl fmt::Debug for Header { |
79 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
80 | write!(f, " {}" , self.line) |
81 | } |
82 | } |
83 | |
84 | impl Header { |
85 | pub fn new(name: &str, value: &str) -> Self { |
86 | let line = format!(" {}: {}" , name, value).into(); |
87 | let index = name.len(); |
88 | Header { line, index } |
89 | } |
90 | |
91 | /// The header name. |
92 | pub fn name(&self) -> &str { |
93 | let bytes = &self.line.as_bytes()[0..self.index]; |
94 | // Since we validate the header name in HeaderLine::into_header, we |
95 | // are guaranteed it is valid utf-8 at this point. |
96 | from_utf8(bytes).expect("Legal chars in header name" ) |
97 | } |
98 | |
99 | /// The header value. |
100 | /// |
101 | /// For non-utf8 headers this returns [`None`] (use [`Header::value_raw()`]). |
102 | pub fn value(&self) -> Option<&str> { |
103 | let bytes = &self.line.as_bytes()[self.index + 1..]; |
104 | from_utf8(bytes) |
105 | .map(|s| s.trim()) |
106 | .ok() |
107 | // ensure all bytes are valid field name. |
108 | .filter(|s| s.as_bytes().iter().all(is_field_vchar_or_obs_fold)) |
109 | } |
110 | |
111 | /// The header value as a byte slice. |
112 | /// |
113 | /// For legacy reasons, the HTTP spec allows headers to be non-ascii characters. |
114 | /// Typically such headers are encoded in a non-utf8 encoding (such as iso-8859-1). |
115 | /// |
116 | /// ureq can't know what encoding the header is in, but this function provides |
117 | /// an escape hatch for users that need to handle such headers. |
118 | #[allow (unused)] |
119 | pub fn value_raw(&self) -> &[u8] { |
120 | let mut bytes = &self.line.as_bytes()[self.index + 1..]; |
121 | |
122 | if !bytes.is_empty() { |
123 | // trim front |
124 | while !bytes.is_empty() && bytes[0].is_ascii_whitespace() { |
125 | bytes = &bytes[1..]; |
126 | } |
127 | // trim back |
128 | while !bytes.is_empty() && bytes[bytes.len() - 1].is_ascii_whitespace() { |
129 | bytes = &bytes[..(bytes.len() - 1)]; |
130 | } |
131 | } |
132 | |
133 | bytes |
134 | } |
135 | |
136 | /// Compares the given str to the header name ignoring case. |
137 | pub fn is_name(&self, other: &str) -> bool { |
138 | self.name().eq_ignore_ascii_case(other) |
139 | } |
140 | |
141 | pub(crate) fn validate(&self) -> Result<(), Error> { |
142 | let bytes = self.line.as_bytes(); |
143 | let name_raw = &bytes[0..self.index]; |
144 | let value_raw = &bytes[self.index + 1..]; |
145 | |
146 | if !valid_name(name_raw) || !valid_value(value_raw) { |
147 | Err(ErrorKind::BadHeader.msg(format!("invalid header ' {}'" , self.line))) |
148 | } else { |
149 | Ok(()) |
150 | } |
151 | } |
152 | } |
153 | |
154 | /// For non-utf8 headers this returns [`None`] (use [`get_header_raw()`]). |
155 | pub(crate) fn get_header<'h>(headers: &'h [Header], name: &str) -> Option<&'h str> { |
156 | headersOption<&Header> |
157 | .iter() |
158 | .find(|h: &&Header| h.is_name(name)) |
159 | .and_then(|h: &Header| h.value()) |
160 | } |
161 | |
162 | #[allow (unused)] |
163 | pub(crate) fn get_header_raw<'h>(headers: &'h [Header], name: &str) -> Option<&'h [u8]> { |
164 | headersOption<&Header> |
165 | .iter() |
166 | .find(|h: &&Header| h.is_name(name)) |
167 | .map(|h: &Header| h.value_raw()) |
168 | } |
169 | |
170 | pub(crate) fn get_all_headers<'h>(headers: &'h [Header], name: &str) -> Vec<&'h str> { |
171 | headersimpl Iterator |
172 | .iter() |
173 | .filter(|h: &&Header| h.is_name(name)) |
174 | .filter_map(|h: &Header| h.value()) |
175 | .collect() |
176 | } |
177 | |
178 | pub(crate) fn has_header(headers: &[Header], name: &str) -> bool { |
179 | get_header(headers, name).is_some() |
180 | } |
181 | |
182 | pub(crate) fn add_header(headers: &mut Vec<Header>, header: Header) { |
183 | let name: &str = header.name(); |
184 | if !name.starts_with("x-" ) && !name.starts_with("X-" ) { |
185 | headers.retain(|h: &Header| h.name() != name); |
186 | } |
187 | headers.push(header); |
188 | } |
189 | |
190 | // https://tools.ietf.org/html/rfc7230#section-3.2 |
191 | // Each header field consists of a case-insensitive field name followed |
192 | // by a colon (":"), optional leading whitespace, the field value, and |
193 | // optional trailing whitespace. |
194 | // field-name = token |
195 | // token = 1*tchar |
196 | // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / |
197 | // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA |
198 | fn valid_name(name: &[u8]) -> bool { |
199 | !name.is_empty() && name.iter().all(is_tchar) |
200 | } |
201 | |
202 | #[inline ] |
203 | pub(crate) fn is_tchar(b: &u8) -> bool { |
204 | match b { |
205 | b'!' | b'#' | b'$' | b'%' | b'&' => true, |
206 | b' \'' | b'*' | b'+' | b'-' | b'.' => true, |
207 | b'^' | b'_' | b'`' | b'|' | b'~' => true, |
208 | b: &u8 if b.is_ascii_alphanumeric() => true, |
209 | _ => false, |
210 | } |
211 | } |
212 | |
213 | // https://tools.ietf.org/html/rfc7230#section-3.2 |
214 | // Note that field-content has an errata: |
215 | // https://www.rfc-editor.org/errata/eid4189 |
216 | // field-value = *( field-content / obs-fold ) |
217 | // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] |
218 | // field-vchar = VCHAR / obs-text |
219 | // |
220 | // obs-fold = CRLF 1*( SP / HTAB ) |
221 | // ; obsolete line folding |
222 | // ; see Section 3.2.4 |
223 | // https://tools.ietf.org/html/rfc5234#appendix-B.1 |
224 | // VCHAR = %x21-7E |
225 | // ; visible (printing) characters |
226 | fn valid_value(value: &[u8]) -> bool { |
227 | value.iter().all(is_field_vchar_or_obs_fold) |
228 | } |
229 | |
230 | #[inline ] |
231 | fn is_field_vchar_or_obs_fold(b: &u8) -> bool { |
232 | match b { |
233 | b' ' | b' \t' => true, |
234 | 0x21..=0x7E => true, |
235 | _ => false, |
236 | } |
237 | } |
238 | |
239 | impl FromStr for Header { |
240 | type Err = Error; |
241 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
242 | // |
243 | let line: HeaderLine = s.to_string().into(); |
244 | |
245 | let header: Header = line.into_header()?; |
246 | |
247 | header.validate()?; |
248 | Ok(header) |
249 | } |
250 | } |
251 | |
252 | #[cfg (test)] |
253 | mod tests { |
254 | use super::*; |
255 | |
256 | #[test ] |
257 | fn test_valid_name() { |
258 | assert!(valid_name(b"example" )); |
259 | assert!(valid_name(b"Content-Type" )); |
260 | assert!(valid_name(b"h-123456789" )); |
261 | assert!(!valid_name(b"Content-Type:" )); |
262 | assert!(!valid_name(b"Content-Type " )); |
263 | assert!(!valid_name(b" some-header" )); |
264 | assert!(!valid_name(b" \"invalid \"" )); |
265 | assert!(!valid_name(b"G \xf6del" )); |
266 | } |
267 | |
268 | #[test ] |
269 | fn test_valid_value() { |
270 | assert!(valid_value(b"example" )); |
271 | assert!(valid_value(b"foo bar" )); |
272 | assert!(valid_value(b" foobar " )); |
273 | assert!(valid_value(b" foo \tbar " )); |
274 | assert!(valid_value(b" foo~" )); |
275 | assert!(valid_value(b" !bar" )); |
276 | assert!(valid_value(b" " )); |
277 | assert!(!valid_value(b" \nfoo" )); |
278 | assert!(!valid_value(b"foo \x7F" )); |
279 | } |
280 | |
281 | #[test ] |
282 | fn test_parse_invalid_name() { |
283 | let cases = vec![ |
284 | "Content-Type :" , |
285 | " Content-Type: foo" , |
286 | "Content-Type foo" , |
287 | " \"some-header \": foo" , |
288 | "Gödel: Escher, Bach" , |
289 | "Foo: \n" , |
290 | "Foo: \nbar" , |
291 | "Foo: \x7F bar" , |
292 | ]; |
293 | for c in cases { |
294 | let result = c.parse::<Header>(); |
295 | assert!( |
296 | matches!(result, Err(ref e) if e.kind() == ErrorKind::BadHeader), |
297 | "' {}'.parse(): expected BadHeader, got {:?}" , |
298 | c, |
299 | result |
300 | ); |
301 | } |
302 | } |
303 | |
304 | #[test ] |
305 | #[cfg (feature = "charset" )] |
306 | fn test_parse_non_utf8_value() { |
307 | let (cow, _, _) = encoding_rs::WINDOWS_1252.encode("x-geo-stuff: älvsjö " ); |
308 | let bytes = cow.to_vec(); |
309 | let line: HeaderLine = bytes.into(); |
310 | let header = line.into_header().unwrap(); |
311 | assert_eq!(header.name(), "x-geo-stuff" ); |
312 | assert_eq!(header.value(), None); |
313 | assert_eq!(header.value_raw(), [228, 108, 118, 115, 106, 246]); |
314 | } |
315 | |
316 | #[test ] |
317 | fn empty_value() { |
318 | let h = "foo:" .parse::<Header>().unwrap(); |
319 | assert_eq!(h.value(), Some("" )); |
320 | } |
321 | |
322 | #[test ] |
323 | fn value_with_whitespace() { |
324 | let h = "foo: bar " .parse::<Header>().unwrap(); |
325 | assert_eq!(h.value(), Some("bar" )); |
326 | } |
327 | |
328 | #[test ] |
329 | fn name_and_value() { |
330 | let header: Header = "X-Forwarded-For: 127.0.0.1" .parse().unwrap(); |
331 | assert_eq!("X-Forwarded-For" , header.name()); |
332 | assert_eq!(header.value(), Some("127.0.0.1" )); |
333 | assert!(header.is_name("X-Forwarded-For" )); |
334 | assert!(header.is_name("x-forwarded-for" )); |
335 | assert!(header.is_name("X-FORWARDED-FOR" )); |
336 | } |
337 | |
338 | #[test ] |
339 | fn test_iso8859_utf8_mixup() { |
340 | // C2 A5 is ¥ in UTF-8 and Â¥ in ISO-8859-1 |
341 | let b = "header: \0xc2 \0xa5" .to_string().into_bytes(); |
342 | let l: HeaderLine = b.into(); |
343 | let h = l.into_header().unwrap(); |
344 | assert_eq!(h.value(), None); |
345 | } |
346 | } |
347 | |