1use std::io::{self, BufRead};
2use std::error::Error;
3use std::fmt;
4use std::str;
5use super::*;
6
7/// Wraps a `std::io::BufRead` buffered byte stream and decode it as UTF-8.
8pub struct BufReadDecoder<B: BufRead> {
9 buf_read: B,
10 bytes_consumed: usize,
11 incomplete: Incomplete,
12}
13
14#[derive(Debug)]
15pub enum BufReadDecoderError<'a> {
16 /// Represents one UTF-8 error in the byte stream.
17 ///
18 /// In lossy decoding, each such error should be replaced with U+FFFD.
19 /// (See `BufReadDecoder::next_lossy` and `BufReadDecoderError::lossy`.)
20 InvalidByteSequence(&'a [u8]),
21
22 /// An I/O error from the underlying byte stream
23 Io(io::Error),
24}
25
26impl<'a> BufReadDecoderError<'a> {
27 /// Replace UTF-8 errors with U+FFFD
28 pub fn lossy(self) -> Result<&'static str, io::Error> {
29 match self {
30 BufReadDecoderError::Io(error: Error) => Err(error),
31 BufReadDecoderError::InvalidByteSequence(_) => Ok(REPLACEMENT_CHARACTER),
32 }
33 }
34}
35
36impl<'a> fmt::Display for BufReadDecoderError<'a> {
37 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
38 match *self {
39 BufReadDecoderError::InvalidByteSequence(bytes: &[u8]) => {
40 write!(f, "invalid byte sequence: {:02x?}", bytes)
41 }
42 BufReadDecoderError::Io(ref err: &Error) => write!(f, "underlying bytestream error: {}", err),
43 }
44 }
45}
46
47impl<'a> Error for BufReadDecoderError<'a> {
48 fn source(&self) -> Option<&(dyn Error + 'static)> {
49 match *self {
50 BufReadDecoderError::InvalidByteSequence(_) => None,
51 BufReadDecoderError::Io(ref err: &Error) => Some(err),
52 }
53 }
54}
55
56impl<B: BufRead> BufReadDecoder<B> {
57 /// This is to `Read::read_to_string` what `String::from_utf8_lossy` is to `String::from_utf8`.
58 pub fn read_to_string_lossy(buf_read: B) -> io::Result<String> {
59 let mut decoder = Self::new(buf_read);
60 let mut string = String::new();
61 while let Some(result) = decoder.next_lossy() {
62 string.push_str(result?)
63 }
64 Ok(string)
65 }
66
67 pub fn new(buf_read: B) -> Self {
68 Self {
69 buf_read,
70 bytes_consumed: 0,
71 incomplete: Incomplete::empty(),
72 }
73 }
74
75 /// Same as `BufReadDecoder::next_strict`, but replace UTF-8 errors with U+FFFD.
76 pub fn next_lossy(&mut self) -> Option<io::Result<&str>> {
77 self.next_strict().map(|result| result.or_else(|e| e.lossy()))
78 }
79
80 /// Decode and consume the next chunk of UTF-8 input.
81 ///
82 /// This method is intended to be called repeatedly until it returns `None`,
83 /// which represents EOF from the underlying byte stream.
84 /// This is similar to `Iterator::next`,
85 /// except that decoded chunks borrow the decoder (~iterator)
86 /// so they need to be handled or copied before the next chunk can start decoding.
87 pub fn next_strict(&mut self) -> Option<Result<&str, BufReadDecoderError>> {
88 enum BytesSource {
89 BufRead(usize),
90 Incomplete,
91 }
92 macro_rules! try_io {
93 ($io_result: expr) => {
94 match $io_result {
95 Ok(value) => value,
96 Err(error) => return Some(Err(BufReadDecoderError::Io(error)))
97 }
98 }
99 }
100 let (source, result) = loop {
101 if self.bytes_consumed > 0 {
102 self.buf_read.consume(self.bytes_consumed);
103 self.bytes_consumed = 0;
104 }
105 let buf = try_io!(self.buf_read.fill_buf());
106
107 // Force loop iteration to go through an explicit `continue`
108 enum Unreachable {}
109 let _: Unreachable = if self.incomplete.is_empty() {
110 if buf.is_empty() {
111 return None // EOF
112 }
113 match str::from_utf8(buf) {
114 Ok(_) => {
115 break (BytesSource::BufRead(buf.len()), Ok(()))
116 }
117 Err(error) => {
118 let valid_up_to = error.valid_up_to();
119 if valid_up_to > 0 {
120 break (BytesSource::BufRead(valid_up_to), Ok(()))
121 }
122 match error.error_len() {
123 Some(invalid_sequence_length) => {
124 break (BytesSource::BufRead(invalid_sequence_length), Err(()))
125 }
126 None => {
127 self.bytes_consumed = buf.len();
128 self.incomplete = Incomplete::new(buf);
129 // need more input bytes
130 continue
131 }
132 }
133 }
134 }
135 } else {
136 if buf.is_empty() {
137 break (BytesSource::Incomplete, Err(())) // EOF with incomplete code point
138 }
139 let (consumed, opt_result) = self.incomplete.try_complete_offsets(buf);
140 self.bytes_consumed = consumed;
141 match opt_result {
142 None => {
143 // need more input bytes
144 continue
145 }
146 Some(result) => {
147 break (BytesSource::Incomplete, result)
148 }
149 }
150 };
151 };
152 let bytes = match source {
153 BytesSource::BufRead(byte_count) => {
154 self.bytes_consumed = byte_count;
155 let buf = try_io!(self.buf_read.fill_buf());
156 &buf[..byte_count]
157 }
158 BytesSource::Incomplete => {
159 self.incomplete.take_buffer()
160 }
161 };
162 match result {
163 Ok(()) => Some(Ok(unsafe { str::from_utf8_unchecked(bytes) })),
164 Err(()) => Some(Err(BufReadDecoderError::InvalidByteSequence(bytes))),
165 }
166 }
167}
168