1use crate::error::{Error, ErrorKind};
2use std::fmt;
3use std::str::{from_utf8, FromStr};
4
5/// Since a status line or header can contain non-utf8 characters the
6/// backing store is a `Vec<u8>`
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub(crate) struct HeaderLine(Vec<u8>);
9
10impl From<String> for HeaderLine {
11 fn from(s: String) -> Self {
12 HeaderLine(s.into_bytes())
13 }
14}
15
16impl From<Vec<u8>> for HeaderLine {
17 fn from(b: Vec<u8>) -> Self {
18 HeaderLine(b)
19 }
20}
21
22impl HeaderLine {
23 pub fn into_string_lossy(self) -> String {
24 // Try to avoid an extra allcation.
25 String::from_utf8(self.0)
26 .unwrap_or_else(|e| String::from_utf8_lossy(&e.into_bytes()).to_string())
27 }
28
29 pub fn is_empty(&self) -> bool {
30 self.0.is_empty()
31 }
32
33 fn as_bytes(&self) -> &[u8] {
34 &self.0
35 }
36
37 pub fn into_header(self) -> Result<Header, Error> {
38 // The header name should always be ascii, we can read anything up to the
39 // ':' delimiter byte-by-byte.
40 let mut index = 0;
41
42 for c in self.as_bytes() {
43 if *c == b':' {
44 break;
45 }
46 if !is_tchar(c) {
47 return Err(Error::new(
48 ErrorKind::BadHeader,
49 Some(format!("Invalid char ({:0x?}) while looking for ':'", *c)),
50 ));
51 }
52 index += 1;
53 }
54
55 Ok(Header { line: self, index })
56 }
57}
58
59impl fmt::Display for HeaderLine {
60 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61 write!(f, "{}", String::from_utf8_lossy(&self.0))
62 }
63}
64
65#[derive(Clone, PartialEq, Eq)]
66/// Wrapper type for a header field.
67/// <https://tools.ietf.org/html/rfc7230#section-3.2>
68pub(crate) struct Header {
69 // Line contains the unmodified bytes of single header field.
70 // It does not contain the final CRLF.
71 line: HeaderLine,
72 // Index is the position of the colon within the header field.
73 // Invariant: index > 0
74 // Invariant: index + 1 < line.len()
75 index: usize,
76}
77
78impl fmt::Debug for Header {
79 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
80 write!(f, "{}", self.line)
81 }
82}
83
84impl Header {
85 pub fn new(name: &str, value: &str) -> Self {
86 let line = format!("{}: {}", name, value).into();
87 let index = name.len();
88 Header { line, index }
89 }
90
91 /// The header name.
92 pub fn name(&self) -> &str {
93 let bytes = &self.line.as_bytes()[0..self.index];
94 // Since we validate the header name in HeaderLine::into_header, we
95 // are guaranteed it is valid utf-8 at this point.
96 from_utf8(bytes).expect("Legal chars in header name")
97 }
98
99 /// The header value.
100 ///
101 /// For non-utf8 headers this returns [`None`] (use [`Header::value_raw()`]).
102 pub fn value(&self) -> Option<&str> {
103 let bytes = &self.line.as_bytes()[self.index + 1..];
104 from_utf8(bytes)
105 .map(|s| s.trim())
106 .ok()
107 // ensure all bytes are valid field name.
108 .filter(|s| s.as_bytes().iter().all(is_field_vchar_or_obs_fold))
109 }
110
111 /// The header value as a byte slice.
112 ///
113 /// For legacy reasons, the HTTP spec allows headers to be non-ascii characters.
114 /// Typically such headers are encoded in a non-utf8 encoding (such as iso-8859-1).
115 ///
116 /// ureq can't know what encoding the header is in, but this function provides
117 /// an escape hatch for users that need to handle such headers.
118 #[allow(unused)]
119 pub fn value_raw(&self) -> &[u8] {
120 let mut bytes = &self.line.as_bytes()[self.index + 1..];
121
122 if !bytes.is_empty() {
123 // trim front
124 while !bytes.is_empty() && bytes[0].is_ascii_whitespace() {
125 bytes = &bytes[1..];
126 }
127 // trim back
128 while !bytes.is_empty() && bytes[bytes.len() - 1].is_ascii_whitespace() {
129 bytes = &bytes[..(bytes.len() - 1)];
130 }
131 }
132
133 bytes
134 }
135
136 /// Compares the given str to the header name ignoring case.
137 pub fn is_name(&self, other: &str) -> bool {
138 self.name().eq_ignore_ascii_case(other)
139 }
140
141 pub(crate) fn validate(&self) -> Result<(), Error> {
142 let bytes = self.line.as_bytes();
143 let name_raw = &bytes[0..self.index];
144 let value_raw = &bytes[self.index + 1..];
145
146 if !valid_name(name_raw) || !valid_value(value_raw) {
147 Err(ErrorKind::BadHeader.msg(format!("invalid header '{}'", self.line)))
148 } else {
149 Ok(())
150 }
151 }
152}
153
154/// For non-utf8 headers this returns [`None`] (use [`get_header_raw()`]).
155pub(crate) fn get_header<'h>(headers: &'h [Header], name: &str) -> Option<&'h str> {
156 headersOption<&Header>
157 .iter()
158 .find(|h: &&Header| h.is_name(name))
159 .and_then(|h: &Header| h.value())
160}
161
162#[allow(unused)]
163pub(crate) fn get_header_raw<'h>(headers: &'h [Header], name: &str) -> Option<&'h [u8]> {
164 headersOption<&Header>
165 .iter()
166 .find(|h: &&Header| h.is_name(name))
167 .map(|h: &Header| h.value_raw())
168}
169
170pub(crate) fn get_all_headers<'h>(headers: &'h [Header], name: &str) -> Vec<&'h str> {
171 headersimpl Iterator
172 .iter()
173 .filter(|h: &&Header| h.is_name(name))
174 .filter_map(|h: &Header| h.value())
175 .collect()
176}
177
178pub(crate) fn has_header(headers: &[Header], name: &str) -> bool {
179 get_header(headers, name).is_some()
180}
181
182pub(crate) fn add_header(headers: &mut Vec<Header>, header: Header) {
183 let name: &str = header.name();
184 if !name.starts_with("x-") && !name.starts_with("X-") {
185 headers.retain(|h: &Header| h.name() != name);
186 }
187 headers.push(header);
188}
189
190// https://tools.ietf.org/html/rfc7230#section-3.2
191// Each header field consists of a case-insensitive field name followed
192// by a colon (":"), optional leading whitespace, the field value, and
193// optional trailing whitespace.
194// field-name = token
195// token = 1*tchar
196// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
197// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
198fn valid_name(name: &[u8]) -> bool {
199 !name.is_empty() && name.iter().all(is_tchar)
200}
201
202#[inline]
203pub(crate) fn is_tchar(b: &u8) -> bool {
204 match b {
205 b'!' | b'#' | b'$' | b'%' | b'&' => true,
206 b'\'' | b'*' | b'+' | b'-' | b'.' => true,
207 b'^' | b'_' | b'`' | b'|' | b'~' => true,
208 b: &u8 if b.is_ascii_alphanumeric() => true,
209 _ => false,
210 }
211}
212
213// https://tools.ietf.org/html/rfc7230#section-3.2
214// Note that field-content has an errata:
215// https://www.rfc-editor.org/errata/eid4189
216// field-value = *( field-content / obs-fold )
217// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
218// field-vchar = VCHAR / obs-text
219//
220// obs-fold = CRLF 1*( SP / HTAB )
221// ; obsolete line folding
222// ; see Section 3.2.4
223// https://tools.ietf.org/html/rfc5234#appendix-B.1
224// VCHAR = %x21-7E
225// ; visible (printing) characters
226fn valid_value(value: &[u8]) -> bool {
227 value.iter().all(is_field_vchar_or_obs_fold)
228}
229
230#[inline]
231fn is_field_vchar_or_obs_fold(b: &u8) -> bool {
232 match b {
233 b' ' | b'\t' => true,
234 0x21..=0x7E => true,
235 _ => false,
236 }
237}
238
239impl FromStr for Header {
240 type Err = Error;
241 fn from_str(s: &str) -> Result<Self, Self::Err> {
242 //
243 let line: HeaderLine = s.to_string().into();
244
245 let header: Header = line.into_header()?;
246
247 header.validate()?;
248 Ok(header)
249 }
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255
256 #[test]
257 fn test_valid_name() {
258 assert!(valid_name(b"example"));
259 assert!(valid_name(b"Content-Type"));
260 assert!(valid_name(b"h-123456789"));
261 assert!(!valid_name(b"Content-Type:"));
262 assert!(!valid_name(b"Content-Type "));
263 assert!(!valid_name(b" some-header"));
264 assert!(!valid_name(b"\"invalid\""));
265 assert!(!valid_name(b"G\xf6del"));
266 }
267
268 #[test]
269 fn test_valid_value() {
270 assert!(valid_value(b"example"));
271 assert!(valid_value(b"foo bar"));
272 assert!(valid_value(b" foobar "));
273 assert!(valid_value(b" foo\tbar "));
274 assert!(valid_value(b" foo~"));
275 assert!(valid_value(b" !bar"));
276 assert!(valid_value(b" "));
277 assert!(!valid_value(b" \nfoo"));
278 assert!(!valid_value(b"foo\x7F"));
279 }
280
281 #[test]
282 fn test_parse_invalid_name() {
283 let cases = vec![
284 "Content-Type :",
285 " Content-Type: foo",
286 "Content-Type foo",
287 "\"some-header\": foo",
288 "Gödel: Escher, Bach",
289 "Foo: \n",
290 "Foo: \nbar",
291 "Foo: \x7F bar",
292 ];
293 for c in cases {
294 let result = c.parse::<Header>();
295 assert!(
296 matches!(result, Err(ref e) if e.kind() == ErrorKind::BadHeader),
297 "'{}'.parse(): expected BadHeader, got {:?}",
298 c,
299 result
300 );
301 }
302 }
303
304 #[test]
305 #[cfg(feature = "charset")]
306 fn test_parse_non_utf8_value() {
307 let (cow, _, _) = encoding_rs::WINDOWS_1252.encode("x-geo-stuff: älvsjö ");
308 let bytes = cow.to_vec();
309 let line: HeaderLine = bytes.into();
310 let header = line.into_header().unwrap();
311 assert_eq!(header.name(), "x-geo-stuff");
312 assert_eq!(header.value(), None);
313 assert_eq!(header.value_raw(), [228, 108, 118, 115, 106, 246]);
314 }
315
316 #[test]
317 fn empty_value() {
318 let h = "foo:".parse::<Header>().unwrap();
319 assert_eq!(h.value(), Some(""));
320 }
321
322 #[test]
323 fn value_with_whitespace() {
324 let h = "foo: bar ".parse::<Header>().unwrap();
325 assert_eq!(h.value(), Some("bar"));
326 }
327
328 #[test]
329 fn name_and_value() {
330 let header: Header = "X-Forwarded-For: 127.0.0.1".parse().unwrap();
331 assert_eq!("X-Forwarded-For", header.name());
332 assert_eq!(header.value(), Some("127.0.0.1"));
333 assert!(header.is_name("X-Forwarded-For"));
334 assert!(header.is_name("x-forwarded-for"));
335 assert!(header.is_name("X-FORWARDED-FOR"));
336 }
337
338 #[test]
339 fn test_iso8859_utf8_mixup() {
340 // C2 A5 is ¥ in UTF-8 and Â¥ in ISO-8859-1
341 let b = "header: \0xc2\0xa5".to_string().into_bytes();
342 let l: HeaderLine = b.into();
343 let h = l.into_header().unwrap();
344 assert_eq!(h.value(), None);
345 }
346}
347