1 | // Copyright 2015 The tiny-http Contributors |
2 | // Copyright 2015 The rust-chunked-transfer Contributors |
3 | // Forked into ureq, 2024, from https://github.com/frewsxcv/rust-chunked-transfer |
4 | // Forked under dual MIT and Apache 2.0 license (see adjacent LICENSE-MIT and LICENSE-APACHE file) |
5 | |
6 | use std::error::Error; |
7 | use std::fmt; |
8 | use std::io::Error as IoError; |
9 | use std::io::ErrorKind; |
10 | use std::io::Read; |
11 | use std::io::Result as IoResult; |
12 | |
13 | /// Reads HTTP chunks and sends back real data. |
14 | /// |
15 | /// # Example |
16 | /// |
17 | /// ```no_compile |
18 | /// use chunked_transfer::Decoder; |
19 | /// use std::io::Read; |
20 | /// |
21 | /// let encoded = b"3\r\nhel\r\nb\r\nlo world!!!\r\n0\r\n\r\n"; |
22 | /// let mut decoded = String::new(); |
23 | /// |
24 | /// let mut decoder = Decoder::new(encoded as &[u8]); |
25 | /// decoder.read_to_string(&mut decoded); |
26 | /// |
27 | /// assert_eq!(decoded, "hello world!!!"); |
28 | /// ``` |
29 | pub struct Decoder<R> { |
30 | // where the chunks come from |
31 | source: R, |
32 | |
33 | // remaining size of the chunk being read |
34 | // none if we are not in a chunk |
35 | remaining_chunks_size: Option<usize>, |
36 | } |
37 | |
38 | impl<R> Decoder<R> |
39 | where |
40 | R: Read, |
41 | { |
42 | pub fn new(source: R) -> Decoder<R> { |
43 | Decoder { |
44 | source, |
45 | remaining_chunks_size: None, |
46 | } |
47 | } |
48 | |
49 | /// Unwraps the Decoder into its inner `Read` source. |
50 | pub fn into_inner(self) -> R { |
51 | self.source |
52 | } |
53 | |
54 | fn read_chunk_size(&mut self) -> IoResult<usize> { |
55 | let mut chunk_size_bytes = Vec::new(); |
56 | let mut has_ext = false; |
57 | |
58 | loop { |
59 | let byte = match self.source.by_ref().bytes().next() { |
60 | Some(b) => b?, |
61 | None => return Err(IoError::new(ErrorKind::InvalidInput, DecoderError)), |
62 | }; |
63 | |
64 | if byte == b' \r' { |
65 | break; |
66 | } |
67 | |
68 | if byte == b';' { |
69 | has_ext = true; |
70 | break; |
71 | } |
72 | |
73 | chunk_size_bytes.push(byte); |
74 | } |
75 | |
76 | // Ignore extensions for now |
77 | if has_ext { |
78 | loop { |
79 | let byte = match self.source.by_ref().bytes().next() { |
80 | Some(b) => b?, |
81 | None => return Err(IoError::new(ErrorKind::InvalidInput, DecoderError)), |
82 | }; |
83 | if byte == b' \r' { |
84 | break; |
85 | } |
86 | } |
87 | } |
88 | |
89 | self.read_line_feed()?; |
90 | |
91 | let chunk_size = String::from_utf8(chunk_size_bytes) |
92 | .ok() |
93 | .and_then(|c| usize::from_str_radix(c.trim(), 16).ok()) |
94 | .ok_or_else(|| IoError::new(ErrorKind::InvalidInput, DecoderError))?; |
95 | |
96 | Ok(chunk_size) |
97 | } |
98 | |
99 | fn read_carriage_return(&mut self) -> IoResult<()> { |
100 | match self.source.by_ref().bytes().next() { |
101 | Some(Ok(b' \r' )) => Ok(()), |
102 | _ => Err(IoError::new(ErrorKind::InvalidInput, DecoderError)), |
103 | } |
104 | } |
105 | |
106 | fn read_line_feed(&mut self) -> IoResult<()> { |
107 | match self.source.by_ref().bytes().next() { |
108 | Some(Ok(b' \n' )) => Ok(()), |
109 | _ => Err(IoError::new(ErrorKind::InvalidInput, DecoderError)), |
110 | } |
111 | } |
112 | |
113 | // Sometimes the last \r\n is missing. |
114 | fn read_end(&mut self) -> IoResult<()> { |
115 | fn expect_or_end( |
116 | bytes: &mut impl Iterator<Item = IoResult<u8>>, |
117 | expected: u8, |
118 | ) -> IoResult<()> { |
119 | match bytes.next() { |
120 | Some(Ok(c)) => { |
121 | if c == expected { |
122 | Ok(()) |
123 | } else { |
124 | Err(IoError::new(ErrorKind::InvalidInput, DecoderError)) |
125 | } |
126 | } |
127 | Some(Err(e)) => { |
128 | match e.kind() { |
129 | // Closed connections are ok. |
130 | ErrorKind::ConnectionReset | ErrorKind::ConnectionAborted => Ok(()), |
131 | _ => Err(IoError::new(ErrorKind::InvalidInput, DecoderError)), |
132 | } |
133 | } |
134 | None => Ok(()), // End of iterator is ok |
135 | } |
136 | } |
137 | |
138 | let mut bytes = self.source.by_ref().bytes(); |
139 | |
140 | expect_or_end(&mut bytes, b' \r' )?; |
141 | expect_or_end(&mut bytes, b' \n' )?; |
142 | |
143 | Ok(()) |
144 | } |
145 | } |
146 | |
147 | impl<R> Read for Decoder<R> |
148 | where |
149 | R: Read, |
150 | { |
151 | fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> { |
152 | let remaining_chunks_size = match self.remaining_chunks_size { |
153 | Some(c) => c, |
154 | None => { |
155 | // first possibility: we are not in a chunk, so we'll attempt to determine |
156 | // the chunks size |
157 | let chunk_size = self.read_chunk_size()?; |
158 | |
159 | // if the chunk size is 0, we are at EOF |
160 | if chunk_size == 0 { |
161 | self.read_end()?; |
162 | return Ok(0); |
163 | } |
164 | |
165 | chunk_size |
166 | } |
167 | }; |
168 | |
169 | // second possibility: we continue reading from a chunk |
170 | if buf.len() < remaining_chunks_size { |
171 | let read = self.source.read(buf)?; |
172 | self.remaining_chunks_size = Some(remaining_chunks_size - read); |
173 | return Ok(read); |
174 | } |
175 | |
176 | // third possibility: the read request goes further than the current chunk |
177 | // we simply read until the end of the chunk and return |
178 | assert!(buf.len() >= remaining_chunks_size); |
179 | |
180 | let buf = &mut buf[..remaining_chunks_size]; |
181 | let read = self.source.read(buf)?; |
182 | |
183 | self.remaining_chunks_size = if read == remaining_chunks_size { |
184 | self.read_carriage_return()?; |
185 | self.read_line_feed()?; |
186 | None |
187 | } else { |
188 | Some(remaining_chunks_size - read) |
189 | }; |
190 | |
191 | Ok(read) |
192 | } |
193 | } |
194 | |
195 | #[derive (Debug, Copy, Clone)] |
196 | struct DecoderError; |
197 | |
198 | impl fmt::Display for DecoderError { |
199 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { |
200 | write!(fmt, "Error while decoding chunks" ) |
201 | } |
202 | } |
203 | |
204 | impl Error for DecoderError { |
205 | fn description(&self) -> &str { |
206 | "Error while decoding chunks" |
207 | } |
208 | } |
209 | |
210 | #[cfg (test)] |
211 | mod test { |
212 | use super::Decoder; |
213 | use std::io; |
214 | use std::io::Read; |
215 | |
216 | /// This unit test is taken from from Hyper |
217 | /// https://github.com/hyperium/hyper |
218 | /// Copyright (c) 2014 Sean McArthur |
219 | #[test ] |
220 | fn test_read_chunk_size() { |
221 | fn read(s: &str, expected: usize) { |
222 | let mut decoded = Decoder::new(s.as_bytes()); |
223 | let actual = decoded.read_chunk_size().unwrap(); |
224 | assert_eq!(expected, actual); |
225 | } |
226 | |
227 | fn read_err(s: &str) { |
228 | let mut decoded = Decoder::new(s.as_bytes()); |
229 | let err_kind = decoded.read_chunk_size().unwrap_err().kind(); |
230 | assert_eq!(err_kind, io::ErrorKind::InvalidInput); |
231 | } |
232 | |
233 | read("1 \r\n" , 1); |
234 | read("01 \r\n" , 1); |
235 | read("0 \r\n" , 0); |
236 | read("00 \r\n" , 0); |
237 | read("A \r\n" , 10); |
238 | read("a \r\n" , 10); |
239 | read("Ff \r\n" , 255); |
240 | read("Ff \r\n" , 255); |
241 | // Missing LF or CRLF |
242 | read_err("F \rF" ); |
243 | read_err("F" ); |
244 | // Invalid hex digit |
245 | read_err("X \r\n" ); |
246 | read_err("1X \r\n" ); |
247 | read_err("- \r\n" ); |
248 | read_err("-1 \r\n" ); |
249 | // Acceptable (if not fully valid) extensions do not influence the size |
250 | read("1;extension \r\n" , 1); |
251 | read("a;ext name=value \r\n" , 10); |
252 | read("1;extension;extension2 \r\n" , 1); |
253 | read("1;;; ; \r\n" , 1); |
254 | read("2; extension... \r\n" , 2); |
255 | read("3 ; extension=123 \r\n" , 3); |
256 | read("3 ; \r\n" , 3); |
257 | read("3 ; \r\n" , 3); |
258 | // Invalid extensions cause an error |
259 | read_err("1 invalid extension \r\n" ); |
260 | read_err("1 A \r\n" ); |
261 | read_err("1;no CRLF" ); |
262 | } |
263 | |
264 | #[test ] |
265 | fn test_valid_chunk_decode() { |
266 | let source = io::Cursor::new( |
267 | "3 \r\nhel \r\nb \r\nlo world!!! \r\n0 \r\n\r\n" |
268 | .to_string() |
269 | .into_bytes(), |
270 | ); |
271 | let mut decoded = Decoder::new(source); |
272 | |
273 | let mut string = String::new(); |
274 | decoded.read_to_string(&mut string).unwrap(); |
275 | |
276 | assert_eq!(string, "hello world!!!" ); |
277 | } |
278 | |
279 | #[test ] |
280 | fn test_decode_zero_length() { |
281 | let mut decoder = Decoder::new(b"0 \r\n\r\n" as &[u8]); |
282 | |
283 | let mut decoded = String::new(); |
284 | decoder.read_to_string(&mut decoded).unwrap(); |
285 | |
286 | assert_eq!(decoded, "" ); |
287 | } |
288 | |
289 | #[test ] |
290 | fn test_decode_invalid_chunk_length() { |
291 | let mut decoder = Decoder::new(b"m \r\n\r\n" as &[u8]); |
292 | |
293 | let mut decoded = String::new(); |
294 | assert!(decoder.read_to_string(&mut decoded).is_err()); |
295 | } |
296 | |
297 | #[test ] |
298 | fn invalid_input1() { |
299 | let source = io::Cursor::new( |
300 | "2 \r\nhel \r\nb \r\nlo world!!! \r\n0 \r\n" |
301 | .to_string() |
302 | .into_bytes(), |
303 | ); |
304 | let mut decoded = Decoder::new(source); |
305 | |
306 | let mut string = String::new(); |
307 | assert!(decoded.read_to_string(&mut string).is_err()); |
308 | } |
309 | |
310 | #[test ] |
311 | fn invalid_input2() { |
312 | let source = io::Cursor::new( |
313 | "3 \rhel \r\nb \r\nlo world!!! \r\n0 \r\n" |
314 | .to_string() |
315 | .into_bytes(), |
316 | ); |
317 | let mut decoded = Decoder::new(source); |
318 | |
319 | let mut string = String::new(); |
320 | assert!(decoded.read_to_string(&mut string).is_err()); |
321 | } |
322 | |
323 | #[test ] |
324 | fn test_decode_end_missing_last_crlf() { |
325 | // This has been observed in the wild. |
326 | // See https://github.com/algesten/ureq/issues/325 |
327 | |
328 | // Missing last \r\n |
329 | let source = io::Cursor::new( |
330 | "3 \r\nhel \r\nb \r\nlo world!!! \r\n0 \r\n" |
331 | .to_string() |
332 | .into_bytes(), |
333 | ); |
334 | let mut decoded = Decoder::new(source); |
335 | |
336 | let mut string = String::new(); |
337 | decoded.read_to_string(&mut string).unwrap(); |
338 | |
339 | assert_eq!(string, "hello world!!!" ); |
340 | } |
341 | } |
342 | |