| 1 | use crate::error::{Error, ErrorKind}; |
| 2 | use std::fmt; |
| 3 | use std::str::{from_utf8, FromStr}; |
| 4 | |
| 5 | /// Since a status line or header can contain non-utf8 characters the |
| 6 | /// backing store is a `Vec<u8>` |
| 7 | #[derive (Debug, Clone, PartialEq, Eq)] |
| 8 | pub(crate) struct HeaderLine(Vec<u8>); |
| 9 | |
| 10 | impl From<String> for HeaderLine { |
| 11 | fn from(s: String) -> Self { |
| 12 | HeaderLine(s.into_bytes()) |
| 13 | } |
| 14 | } |
| 15 | |
| 16 | impl From<Vec<u8>> for HeaderLine { |
| 17 | fn from(b: Vec<u8>) -> Self { |
| 18 | HeaderLine(b) |
| 19 | } |
| 20 | } |
| 21 | |
| 22 | impl HeaderLine { |
| 23 | pub fn into_string_lossy(self) -> String { |
| 24 | // Try to avoid an extra allcation. |
| 25 | String::from_utf8(self.0) |
| 26 | .unwrap_or_else(|e| String::from_utf8_lossy(&e.into_bytes()).to_string()) |
| 27 | } |
| 28 | |
| 29 | pub fn is_empty(&self) -> bool { |
| 30 | self.0.is_empty() |
| 31 | } |
| 32 | |
| 33 | fn as_bytes(&self) -> &[u8] { |
| 34 | &self.0 |
| 35 | } |
| 36 | |
| 37 | pub fn into_header(self) -> Result<Header, Error> { |
| 38 | // The header name should always be ascii, we can read anything up to the |
| 39 | // ':' delimiter byte-by-byte. |
| 40 | let mut index = 0; |
| 41 | |
| 42 | for c in self.as_bytes() { |
| 43 | if *c == b':' { |
| 44 | break; |
| 45 | } |
| 46 | if !is_tchar(c) { |
| 47 | return Err(Error::new( |
| 48 | ErrorKind::BadHeader, |
| 49 | Some(format!("Invalid char ( {:0x?}) while looking for ':'" , *c)), |
| 50 | )); |
| 51 | } |
| 52 | index += 1; |
| 53 | } |
| 54 | |
| 55 | Ok(Header { line: self, index }) |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | impl fmt::Display for HeaderLine { |
| 60 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 61 | write!(f, " {}" , String::from_utf8_lossy(&self.0)) |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | #[derive (Clone, PartialEq, Eq)] |
| 66 | /// Wrapper type for a header field. |
| 67 | /// <https://tools.ietf.org/html/rfc7230#section-3.2> |
| 68 | pub(crate) struct Header { |
| 69 | // Line contains the unmodified bytes of single header field. |
| 70 | // It does not contain the final CRLF. |
| 71 | line: HeaderLine, |
| 72 | // Index is the position of the colon within the header field. |
| 73 | // Invariant: index > 0 |
| 74 | // Invariant: index + 1 < line.len() |
| 75 | index: usize, |
| 76 | } |
| 77 | |
| 78 | impl fmt::Debug for Header { |
| 79 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 80 | write!(f, " {}" , self.line) |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | impl Header { |
| 85 | pub fn new(name: &str, value: &str) -> Self { |
| 86 | let line = format!(" {}: {}" , name, value).into(); |
| 87 | let index = name.len(); |
| 88 | Header { line, index } |
| 89 | } |
| 90 | |
| 91 | /// The header name. |
| 92 | pub fn name(&self) -> &str { |
| 93 | let bytes = &self.line.as_bytes()[0..self.index]; |
| 94 | // Since we validate the header name in HeaderLine::into_header, we |
| 95 | // are guaranteed it is valid utf-8 at this point. |
| 96 | from_utf8(bytes).expect("Legal chars in header name" ) |
| 97 | } |
| 98 | |
| 99 | /// The header value. |
| 100 | /// |
| 101 | /// For non-utf8 headers this returns [`None`] (use [`Header::value_raw()`]). |
| 102 | pub fn value(&self) -> Option<&str> { |
| 103 | let bytes = &self.line.as_bytes()[self.index + 1..]; |
| 104 | from_utf8(bytes) |
| 105 | .map(|s| s.trim()) |
| 106 | .ok() |
| 107 | // ensure all bytes are valid field name. |
| 108 | .filter(|s| s.as_bytes().iter().all(is_field_vchar_or_obs_fold)) |
| 109 | } |
| 110 | |
| 111 | /// The header value as a byte slice. |
| 112 | /// |
| 113 | /// For legacy reasons, the HTTP spec allows headers to be non-ascii characters. |
| 114 | /// Typically such headers are encoded in a non-utf8 encoding (such as iso-8859-1). |
| 115 | /// |
| 116 | /// ureq can't know what encoding the header is in, but this function provides |
| 117 | /// an escape hatch for users that need to handle such headers. |
| 118 | #[allow (unused)] |
| 119 | pub fn value_raw(&self) -> &[u8] { |
| 120 | let mut bytes = &self.line.as_bytes()[self.index + 1..]; |
| 121 | |
| 122 | if !bytes.is_empty() { |
| 123 | // trim front |
| 124 | while !bytes.is_empty() && bytes[0].is_ascii_whitespace() { |
| 125 | bytes = &bytes[1..]; |
| 126 | } |
| 127 | // trim back |
| 128 | while !bytes.is_empty() && bytes[bytes.len() - 1].is_ascii_whitespace() { |
| 129 | bytes = &bytes[..(bytes.len() - 1)]; |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | bytes |
| 134 | } |
| 135 | |
| 136 | /// Compares the given str to the header name ignoring case. |
| 137 | pub fn is_name(&self, other: &str) -> bool { |
| 138 | self.name().eq_ignore_ascii_case(other) |
| 139 | } |
| 140 | |
| 141 | pub(crate) fn validate(&self) -> Result<(), Error> { |
| 142 | let bytes = self.line.as_bytes(); |
| 143 | let name_raw = &bytes[0..self.index]; |
| 144 | let value_raw = &bytes[self.index + 1..]; |
| 145 | |
| 146 | if !valid_name(name_raw) || !valid_value(value_raw) { |
| 147 | Err(ErrorKind::BadHeader.msg(format!("invalid header ' {}'" , self.line))) |
| 148 | } else { |
| 149 | Ok(()) |
| 150 | } |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | /// For non-utf8 headers this returns [`None`] (use [`get_header_raw()`]). |
| 155 | pub(crate) fn get_header<'h>(headers: &'h [Header], name: &str) -> Option<&'h str> { |
| 156 | headersOption<&Header> |
| 157 | .iter() |
| 158 | .find(|h: &&Header| h.is_name(name)) |
| 159 | .and_then(|h: &Header| h.value()) |
| 160 | } |
| 161 | |
| 162 | #[allow (unused)] |
| 163 | pub(crate) fn get_header_raw<'h>(headers: &'h [Header], name: &str) -> Option<&'h [u8]> { |
| 164 | headersOption<&Header> |
| 165 | .iter() |
| 166 | .find(|h: &&Header| h.is_name(name)) |
| 167 | .map(|h: &Header| h.value_raw()) |
| 168 | } |
| 169 | |
| 170 | pub(crate) fn get_all_headers<'h>(headers: &'h [Header], name: &str) -> Vec<&'h str> { |
| 171 | headersimpl Iterator |
| 172 | .iter() |
| 173 | .filter(|h: &&Header| h.is_name(name)) |
| 174 | .filter_map(|h: &Header| h.value()) |
| 175 | .collect() |
| 176 | } |
| 177 | |
| 178 | pub(crate) fn has_header(headers: &[Header], name: &str) -> bool { |
| 179 | get_header(headers, name).is_some() |
| 180 | } |
| 181 | |
| 182 | pub(crate) fn add_header(headers: &mut Vec<Header>, header: Header) { |
| 183 | let name: &str = header.name(); |
| 184 | if !name.starts_with("x-" ) && !name.starts_with("X-" ) { |
| 185 | headers.retain(|h: &Header| h.name() != name); |
| 186 | } |
| 187 | headers.push(header); |
| 188 | } |
| 189 | |
| 190 | // https://tools.ietf.org/html/rfc7230#section-3.2 |
| 191 | // Each header field consists of a case-insensitive field name followed |
| 192 | // by a colon (":"), optional leading whitespace, the field value, and |
| 193 | // optional trailing whitespace. |
| 194 | // field-name = token |
| 195 | // token = 1*tchar |
| 196 | // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / |
| 197 | // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA |
| 198 | fn valid_name(name: &[u8]) -> bool { |
| 199 | !name.is_empty() && name.iter().all(is_tchar) |
| 200 | } |
| 201 | |
| 202 | #[inline ] |
| 203 | pub(crate) fn is_tchar(b: &u8) -> bool { |
| 204 | match b { |
| 205 | b'!' | b'#' | b'$' | b'%' | b'&' => true, |
| 206 | b' \'' | b'*' | b'+' | b'-' | b'.' => true, |
| 207 | b'^' | b'_' | b'`' | b'|' | b'~' => true, |
| 208 | b: &u8 if b.is_ascii_alphanumeric() => true, |
| 209 | _ => false, |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | // https://tools.ietf.org/html/rfc7230#section-3.2 |
| 214 | // Note that field-content has an errata: |
| 215 | // https://www.rfc-editor.org/errata/eid4189 |
| 216 | // field-value = *( field-content / obs-fold ) |
| 217 | // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] |
| 218 | // field-vchar = VCHAR / obs-text |
| 219 | // |
| 220 | // obs-fold = CRLF 1*( SP / HTAB ) |
| 221 | // ; obsolete line folding |
| 222 | // ; see Section 3.2.4 |
| 223 | // https://tools.ietf.org/html/rfc5234#appendix-B.1 |
| 224 | // VCHAR = %x21-7E |
| 225 | // ; visible (printing) characters |
| 226 | fn valid_value(value: &[u8]) -> bool { |
| 227 | value.iter().all(is_field_vchar_or_obs_fold) |
| 228 | } |
| 229 | |
| 230 | #[inline ] |
| 231 | fn is_field_vchar_or_obs_fold(b: &u8) -> bool { |
| 232 | match b { |
| 233 | b' ' | b' \t' => true, |
| 234 | 0x21..=0x7E => true, |
| 235 | _ => false, |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | impl FromStr for Header { |
| 240 | type Err = Error; |
| 241 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
| 242 | // |
| 243 | let line: HeaderLine = s.to_string().into(); |
| 244 | |
| 245 | let header: Header = line.into_header()?; |
| 246 | |
| 247 | header.validate()?; |
| 248 | Ok(header) |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | #[cfg (test)] |
| 253 | mod tests { |
| 254 | use super::*; |
| 255 | |
| 256 | #[test ] |
| 257 | fn test_valid_name() { |
| 258 | assert!(valid_name(b"example" )); |
| 259 | assert!(valid_name(b"Content-Type" )); |
| 260 | assert!(valid_name(b"h-123456789" )); |
| 261 | assert!(!valid_name(b"Content-Type:" )); |
| 262 | assert!(!valid_name(b"Content-Type " )); |
| 263 | assert!(!valid_name(b" some-header" )); |
| 264 | assert!(!valid_name(b" \"invalid \"" )); |
| 265 | assert!(!valid_name(b"G \xf6del" )); |
| 266 | } |
| 267 | |
| 268 | #[test ] |
| 269 | fn test_valid_value() { |
| 270 | assert!(valid_value(b"example" )); |
| 271 | assert!(valid_value(b"foo bar" )); |
| 272 | assert!(valid_value(b" foobar " )); |
| 273 | assert!(valid_value(b" foo \tbar " )); |
| 274 | assert!(valid_value(b" foo~" )); |
| 275 | assert!(valid_value(b" !bar" )); |
| 276 | assert!(valid_value(b" " )); |
| 277 | assert!(!valid_value(b" \nfoo" )); |
| 278 | assert!(!valid_value(b"foo \x7F" )); |
| 279 | } |
| 280 | |
| 281 | #[test ] |
| 282 | fn test_parse_invalid_name() { |
| 283 | let cases = vec![ |
| 284 | "Content-Type :" , |
| 285 | " Content-Type: foo" , |
| 286 | "Content-Type foo" , |
| 287 | " \"some-header \": foo" , |
| 288 | "Gödel: Escher, Bach" , |
| 289 | "Foo: \n" , |
| 290 | "Foo: \nbar" , |
| 291 | "Foo: \x7F bar" , |
| 292 | ]; |
| 293 | for c in cases { |
| 294 | let result = c.parse::<Header>(); |
| 295 | assert!( |
| 296 | matches!(result, Err(ref e) if e.kind() == ErrorKind::BadHeader), |
| 297 | "'{}'.parse(): expected BadHeader, got {:?}" , |
| 298 | c, |
| 299 | result |
| 300 | ); |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | #[test ] |
| 305 | #[cfg (feature = "charset" )] |
| 306 | fn test_parse_non_utf8_value() { |
| 307 | let (cow, _, _) = encoding_rs::WINDOWS_1252.encode("x-geo-stuff: älvsjö " ); |
| 308 | let bytes = cow.to_vec(); |
| 309 | let line: HeaderLine = bytes.into(); |
| 310 | let header = line.into_header().unwrap(); |
| 311 | assert_eq!(header.name(), "x-geo-stuff" ); |
| 312 | assert_eq!(header.value(), None); |
| 313 | assert_eq!(header.value_raw(), [228, 108, 118, 115, 106, 246]); |
| 314 | } |
| 315 | |
| 316 | #[test ] |
| 317 | fn empty_value() { |
| 318 | let h = "foo:" .parse::<Header>().unwrap(); |
| 319 | assert_eq!(h.value(), Some("" )); |
| 320 | } |
| 321 | |
| 322 | #[test ] |
| 323 | fn value_with_whitespace() { |
| 324 | let h = "foo: bar " .parse::<Header>().unwrap(); |
| 325 | assert_eq!(h.value(), Some("bar" )); |
| 326 | } |
| 327 | |
| 328 | #[test ] |
| 329 | fn name_and_value() { |
| 330 | let header: Header = "X-Forwarded-For: 127.0.0.1" .parse().unwrap(); |
| 331 | assert_eq!("X-Forwarded-For" , header.name()); |
| 332 | assert_eq!(header.value(), Some("127.0.0.1" )); |
| 333 | assert!(header.is_name("X-Forwarded-For" )); |
| 334 | assert!(header.is_name("x-forwarded-for" )); |
| 335 | assert!(header.is_name("X-FORWARDED-FOR" )); |
| 336 | } |
| 337 | |
| 338 | #[test ] |
| 339 | fn test_iso8859_utf8_mixup() { |
| 340 | // C2 A5 is ¥ in UTF-8 and Â¥ in ISO-8859-1 |
| 341 | let b = "header: \0xc2 \0xa5" .to_string().into_bytes(); |
| 342 | let l: HeaderLine = b.into(); |
| 343 | let h = l.into_header().unwrap(); |
| 344 | assert_eq!(h.value(), None); |
| 345 | } |
| 346 | } |
| 347 | |