read.rs source code [crates/utf-8/src/read.rs]

1	use std::io::{self, BufRead};
2	use std::error::Error;
3	use std::fmt;
4	use std::str;
5	use super::*;
6
7	/// Wraps a `std::io::BufRead` buffered byte stream and decode it as UTF-8.
8	pub struct BufReadDecoder<B: BufRead> {
9	buf_read: B,
10	bytes_consumed: usize,
11	incomplete: Incomplete,
12	}
13
14	#[derive(Debug)]
15	pub enum BufReadDecoderError<'a> {
16	/// Represents one UTF-8 error in the byte stream.
17	///
18	/// In lossy decoding, each such error should be replaced with U+FFFD.
19	/// (See `BufReadDecoder::next_lossy` and `BufReadDecoderError::lossy`.)
20	InvalidByteSequence(&'a [u8]),
21
22	/// An I/O error from the underlying byte stream
23	Io(io::Error),
24	}
25
26	impl<'a> BufReadDecoderError<'a> {
27	/// Replace UTF-8 errors with U+FFFD
28	pub fn lossy(self) -> Result<&'static str, io::Error> {
29	match self {
30	BufReadDecoderError::Io(error: Error) => Err(error),
31	BufReadDecoderError::InvalidByteSequence(_) => Ok(REPLACEMENT_CHARACTER),
32	}
33	}
34	}
35
36	impl<'a> fmt::Display for BufReadDecoderError<'a> {
37	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
38	match *self {
39	BufReadDecoderError::InvalidByteSequence(bytes: &[u8]) => {
40	write!(f, "invalid byte sequence: {:`02`x?}", bytes)
41	}
42	BufReadDecoderError::Io(ref err: &Error) => write!(f, "underlying bytestream error: {}", err),
43	}
44	}
45	}
46
47	impl<'a> Error for BufReadDecoderError<'a> {
48	fn source(&self) -> Option<&(dyn Error + 'static)> {
49	match *self {
50	BufReadDecoderError::InvalidByteSequence(_) => None,
51	BufReadDecoderError::Io(ref err: &Error) => Some(err),
52	}
53	}
54	}
55
56	impl<B: BufRead> BufReadDecoder<B> {
57	/// This is to `Read::read_to_string` what `String::from_utf8_lossy` is to `String::from_utf8`.
58	pub fn read_to_string_lossy(buf_read: B) -> io::Result<String> {
59	let mut decoder = Self::new(buf_read);
60	let mut string = String::new();
61	while let Some(result) = decoder.next_lossy() {
62	string.push_str(result?)
63	}
64	Ok(string)
65	}
66
67	pub fn new(buf_read: B) -> Self {
68	Self {
69	buf_read,
70	bytes_consumed: `0`,
71	incomplete: Incomplete::empty(),
72	}
73	}
74
75	/// Same as `BufReadDecoder::next_strict`, but replace UTF-8 errors with U+FFFD.
76	pub fn next_lossy(&mut self) -> Option<io::Result<&str>> {
77	self.next_strict().map(\|result\| result.or_else(\|e\| e.lossy()))
78	}
79
80	/// Decode and consume the next chunk of UTF-8 input.
81	///
82	/// This method is intended to be called repeatedly until it returns `None`,
83	/// which represents EOF from the underlying byte stream.
84	/// This is similar to `Iterator::next`,
85	/// except that decoded chunks borrow the decoder (~iterator)
86	/// so they need to be handled or copied before the next chunk can start decoding.
87	pub fn next_strict(&mut self) -> Option<Result<&str, BufReadDecoderError>> {
88	enum BytesSource {
89	BufRead(usize),
90	Incomplete,
91	}
92	macro_rules! try_io {
93	($io_result: expr) => {
94	match $io_result {
95	Ok(value) => value,
96	Err(error) => return Some(Err(BufReadDecoderError::Io(error)))
97	}
98	}
99	}
100	let (source, result) = loop {
101	if self.bytes_consumed > `0` {
102	self.buf_read.consume(self.bytes_consumed);
103	self.bytes_consumed = `0`;
104	}
105	let buf = try_io!(self.buf_read.fill_buf());
106
107	// Force loop iteration to go through an explicit `continue`
108	enum Unreachable {}
109	let _: Unreachable = if self.incomplete.is_empty() {
110	if buf.is_empty() {
111	return None // EOF
112	}
113	match str::from_utf8(buf) {
114	Ok(_) => {
115	break (BytesSource::BufRead(buf.len()), Ok(()))
116	}
117	Err(error) => {
118	let valid_up_to = error.valid_up_to();
119	if valid_up_to > `0` {
120	break (BytesSource::BufRead(valid_up_to), Ok(()))
121	}
122	match error.error_len() {
123	Some(invalid_sequence_length) => {
124	break (BytesSource::BufRead(invalid_sequence_length), Err(()))
125	}
126	None => {
127	self.bytes_consumed = buf.len();
128	self.incomplete = Incomplete::new(buf);
129	// need more input bytes
130	continue
131	}
132	}
133	}
134	}
135	} else {
136	if buf.is_empty() {
137	break (BytesSource::Incomplete, Err(())) // EOF with incomplete code point
138	}
139	let (consumed, opt_result) = self.incomplete.try_complete_offsets(buf);
140	self.bytes_consumed = consumed;
141	match opt_result {
142	None => {
143	// need more input bytes
144	continue
145	}
146	Some(result) => {
147	break (BytesSource::Incomplete, result)
148	}
149	}
150	};
151	};
152	let bytes = match source {
153	BytesSource::BufRead(byte_count) => {
154	self.bytes_consumed = byte_count;
155	let buf = try_io!(self.buf_read.fill_buf());
156	&buf[..byte_count]
157	}
158	BytesSource::Incomplete => {
159	self.incomplete.take_buffer()
160	}
161	};
162	match result {
163	Ok(()) => Some(Ok(unsafe { str::from_utf8_unchecked(bytes) })),
164	Err(()) => Some(Err(BufReadDecoderError::InvalidByteSequence(bytes))),
165	}
166	}
167	}
168