decoder.rs source code [crates/ureq/src/chunked/decoder.rs]

1	// Copyright 2015 The tiny-http Contributors
2	// Copyright 2015 The rust-chunked-transfer Contributors
3	// Forked into ureq, 2024, from https://github.com/frewsxcv/rust-chunked-transfer
4	// Forked under dual MIT and Apache 2.0 license (see adjacent LICENSE-MIT and LICENSE-APACHE file)
5
6	use std::error::Error;
7	use std::fmt;
8	use std::io::Error as IoError;
9	use std::io::ErrorKind;
10	use std::io::Read;
11	use std::io::Result as IoResult;
12
13	/// Reads HTTP chunks and sends back real data.
14	///
15	/// # Example
16	///
17	/// ```no_compile
18	/// use chunked_transfer::Decoder;
19	/// use std::io::Read;
20	///
21	/// let encoded = b"3\r\nhel\r\nb\r\nlo world!!!\r\n0\r\n\r\n";
22	/// let mut decoded = String::new();
23	///
24	/// let mut decoder = Decoder::new(encoded as &[u8]);
25	/// decoder.read_to_string(&mut decoded);
26	///
27	/// assert_eq!(decoded, "hello world!!!");
28	/// ```
29	pub struct Decoder<R> {
30	// where the chunks come from
31	source: R,
32
33	// remaining size of the chunk being read
34	// none if we are not in a chunk
35	remaining_chunks_size: Option<usize>,
36	}
37
38	impl<R> Decoder<R>
39	where
40	R: Read,
41	{
42	pub fn new(source: R) -> Decoder<R> {
43	Decoder {
44	source,
45	remaining_chunks_size: None,
46	}
47	}
48
49	/// Unwraps the Decoder into its inner `Read` source.
50	pub fn into_inner(self) -> R {
51	self.source
52	}
53
54	fn read_chunk_size(&mut self) -> IoResult<usize> {
55	let mut chunk_size_bytes = Vec::new();
56	let mut has_ext = `false`;
57
58	loop {
59	let byte = match self.source.by_ref().bytes().next() {
60	Some(b) => b?,
61	None => return Err(IoError::new(ErrorKind::InvalidInput, DecoderError)),
62	};
63
64	if byte == b'`\r`' {
65	break;
66	}
67
68	if byte == b';' {
69	has_ext = `true`;
70	break;
71	}
72
73	chunk_size_bytes.push(byte);
74	}
75
76	// Ignore extensions for now
77	if has_ext {
78	loop {
79	let byte = match self.source.by_ref().bytes().next() {
80	Some(b) => b?,
81	None => return Err(IoError::new(ErrorKind::InvalidInput, DecoderError)),
82	};
83	if byte == b'`\r`' {
84	break;
85	}
86	}
87	}
88
89	self.read_line_feed()?;
90
91	let chunk_size = String::from_utf8(chunk_size_bytes)
92	.ok()
93	.and_then(\|c\| usize::from_str_radix(c.trim(), `16`).ok())
94	.ok_or_else(\|\| IoError::new(ErrorKind::InvalidInput, DecoderError))?;
95
96	Ok(chunk_size)
97	}
98
99	fn read_carriage_return(&mut self) -> IoResult<()> {
100	match self.source.by_ref().bytes().next() {
101	Some(Ok(b'`\r`')) => Ok(()),
102	_ => Err(IoError::new(ErrorKind::InvalidInput, DecoderError)),
103	}
104	}
105
106	fn read_line_feed(&mut self) -> IoResult<()> {
107	match self.source.by_ref().bytes().next() {
108	Some(Ok(b'`\n`')) => Ok(()),
109	_ => Err(IoError::new(ErrorKind::InvalidInput, DecoderError)),
110	}
111	}
112
113	// Sometimes the last \r\n is missing.
114	fn read_end(&mut self) -> IoResult<()> {
115	fn expect_or_end(
116	bytes: &mut impl Iterator<Item = IoResult<u8>>,
117	expected: u8,
118	) -> IoResult<()> {
119	match bytes.next() {
120	Some(Ok(c)) => {
121	if c == expected {
122	Ok(())
123	} else {
124	Err(IoError::new(ErrorKind::InvalidInput, DecoderError))
125	}
126	}
127	Some(Err(e)) => {
128	match e.kind() {
129	// Closed connections are ok.
130	ErrorKind::ConnectionReset \| ErrorKind::ConnectionAborted => Ok(()),
131	_ => Err(IoError::new(ErrorKind::InvalidInput, DecoderError)),
132	}
133	}
134	None => Ok(()), // End of iterator is ok
135	}
136	}
137
138	let mut bytes = self.source.by_ref().bytes();
139
140	expect_or_end(&mut bytes, b'`\r`')?;
141	expect_or_end(&mut bytes, b'`\n`')?;
142
143	Ok(())
144	}
145	}
146
147	impl<R> Read for Decoder<R>
148	where
149	R: Read,
150	{
151	fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> {
152	let remaining_chunks_size = match self.remaining_chunks_size {
153	Some(c) => c,
154	None => {
155	// first possibility: we are not in a chunk, so we'll attempt to determine
156	// the chunks size
157	let chunk_size = self.read_chunk_size()?;
158
159	// if the chunk size is 0, we are at EOF
160	if chunk_size == `0` {
161	self.read_end()?;
162	return Ok(`0`);
163	}
164
165	chunk_size
166	}
167	};
168
169	// second possibility: we continue reading from a chunk
170	if buf.len() < remaining_chunks_size {
171	let read = self.source.read(buf)?;
172	self.remaining_chunks_size = Some(remaining_chunks_size - read);
173	return Ok(read);
174	}
175
176	// third possibility: the read request goes further than the current chunk
177	// we simply read until the end of the chunk and return
178	assert!(buf.len() >= remaining_chunks_size);
179
180	let buf = &mut buf[..remaining_chunks_size];
181	let read = self.source.read(buf)?;
182
183	self.remaining_chunks_size = if read == remaining_chunks_size {
184	self.read_carriage_return()?;
185	self.read_line_feed()?;
186	None
187	} else {
188	Some(remaining_chunks_size - read)
189	};
190
191	Ok(read)
192	}
193	}
194
195	#[derive(Debug, Copy, Clone)]
196	struct DecoderError;
197
198	impl fmt::Display for DecoderError {
199	fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
200	write!(fmt, "Error while decoding chunks")
201	}
202	}
203
204	impl Error for DecoderError {
205	fn description(&self) -> &str {
206	"Error while decoding chunks"
207	}
208	}
209
210	#[cfg(test)]
211	mod test {
212	use super::Decoder;
213	use std::io;
214	use std::io::Read;
215
216	/// This unit test is taken from from Hyper
217	/// https://github.com/hyperium/hyper
218	/// Copyright (c) 2014 Sean McArthur
219	#[test]
220	fn test_read_chunk_size() {
221	fn read(s: &str, expected: usize) {
222	let mut decoded = Decoder::new(s.as_bytes());
223	let actual = decoded.read_chunk_size().unwrap();
224	assert_eq!(expected, actual);
225	}
226
227	fn read_err(s: &str) {
228	let mut decoded = Decoder::new(s.as_bytes());
229	let err_kind = decoded.read_chunk_size().unwrap_err().kind();
230	assert_eq!(err_kind, io::ErrorKind::InvalidInput);
231	}
232
233	read("1`\r\n`", `1`);
234	read("01`\r\n`", `1`);
235	read("0`\r\n`", `0`);
236	read("00`\r\n`", `0`);
237	read("A`\r\n`", `10`);
238	read("a`\r\n`", `10`);
239	read("Ff`\r\n`", `255`);
240	read("Ff `\r\n`", `255`);
241	// Missing LF or CRLF
242	read_err("F`\r`F");
243	read_err("F");
244	// Invalid hex digit
245	read_err("X`\r\n`");
246	read_err("1X`\r\n`");
247	read_err("-`\r\n`");
248	read_err("-1`\r\n`");
249	// Acceptable (if not fully valid) extensions do not influence the size
250	read("1;extension`\r\n`", `1`);
251	read("a;ext name=value`\r\n`", `10`);
252	read("1;extension;extension2`\r\n`", `1`);
253	read("1;;; ;`\r\n`", `1`);
254	read("2; extension...`\r\n`", `2`);
255	read("3 ; extension=123`\r\n`", `3`);
256	read("3 ;`\r\n`", `3`);
257	read("3 ; `\r\n`", `3`);
258	// Invalid extensions cause an error
259	read_err("1 invalid extension`\r\n`");
260	read_err("1 A`\r\n`");
261	read_err("1;no CRLF");
262	}
263
264	#[test]
265	fn test_valid_chunk_decode() {
266	let source = io::Cursor::new(
267	"3`\r\n`hel`\r\n`b`\r\n`lo world!!!`\r\n`0`\r\n\r\n`"
268	.to_string()
269	.into_bytes(),
270	);
271	let mut decoded = Decoder::new(source);
272
273	let mut string = String::new();
274	decoded.read_to_string(&mut string).unwrap();
275
276	assert_eq!(string, "hello world!!!");
277	}
278
279	#[test]
280	fn test_decode_zero_length() {
281	let mut decoder = Decoder::new(b"0`\r\n\r\n`" as &[u8]);
282
283	let mut decoded = String::new();
284	decoder.read_to_string(&mut decoded).unwrap();
285
286	assert_eq!(decoded, "");
287	}
288
289	#[test]
290	fn test_decode_invalid_chunk_length() {
291	let mut decoder = Decoder::new(b"m`\r\n\r\n`" as &[u8]);
292
293	let mut decoded = String::new();
294	assert!(decoder.read_to_string(&mut decoded).is_err());
295	}
296
297	#[test]
298	fn invalid_input1() {
299	let source = io::Cursor::new(
300	"2`\r\n`hel`\r\n`b`\r\n`lo world!!!`\r\n`0`\r\n`"
301	.to_string()
302	.into_bytes(),
303	);
304	let mut decoded = Decoder::new(source);
305
306	let mut string = String::new();
307	assert!(decoded.read_to_string(&mut string).is_err());
308	}
309
310	#[test]
311	fn invalid_input2() {
312	let source = io::Cursor::new(
313	"3`\r`hel`\r\n`b`\r\n`lo world!!!`\r\n`0`\r\n`"
314	.to_string()
315	.into_bytes(),
316	);
317	let mut decoded = Decoder::new(source);
318
319	let mut string = String::new();
320	assert!(decoded.read_to_string(&mut string).is_err());
321	}
322
323	#[test]
324	fn test_decode_end_missing_last_crlf() {
325	// This has been observed in the wild.
326	// See https://github.com/algesten/ureq/issues/325
327
328	// Missing last \r\n
329	let source = io::Cursor::new(
330	"3`\r\n`hel`\r\n`b`\r\n`lo world!!!`\r\n`0`\r\n`"
331	.to_string()
332	.into_bytes(),
333	);
334	let mut decoded = Decoder::new(source);
335
336	let mut string = String::new();
337	decoded.read_to_string(&mut string).unwrap();
338
339	assert_eq!(string, "hello world!!!");
340	}
341	}
342