1 | // Take a look at the license at the top of the repository in the LICENSE file. |
2 | |
3 | use crate::json::read::internal_reader::InternalReader; |
4 | use std::{ |
5 | error, fmt, |
6 | io::{Error, Read}, |
7 | str::from_utf8, |
8 | }; |
9 | |
10 | pub struct ByteToChar<R> { |
11 | iter: InternalReader<R>, |
12 | } |
13 | |
14 | impl<R: Read> ByteToChar<R> { |
15 | #[inline ] |
16 | pub fn new(read: R, buffer_size: usize) -> Result<Self, Error> { |
17 | Ok(ByteToChar { |
18 | iter: InternalReader::new(read, buffer_size)?, |
19 | }) |
20 | } |
21 | |
22 | fn get_next(&mut self) -> Result<Option<u8>, CharsError> { |
23 | match self.iter.next() { |
24 | None => Ok(None), |
25 | Some(item: Result) => match item { |
26 | Ok(item: u8) => Ok(Some(item)), |
27 | Err(err: Error) => Err(CharsError::Other(err)), |
28 | }, |
29 | } |
30 | } |
31 | } |
32 | |
33 | impl<R: Read + fmt::Debug> fmt::Debug for ByteToChar<R> { |
34 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
35 | f.debug_struct("Filter" ).field(name:"iter" , &self.iter).finish() |
36 | } |
37 | } |
38 | |
39 | impl<R: Read> Iterator for ByteToChar<R> { |
40 | type Item = Result<char, CharsError>; |
41 | |
42 | fn next(&mut self) -> Option<Result<char, CharsError>> { |
43 | let first_byte = match self.get_next() { |
44 | Err(err) => return Some(Err(err)), |
45 | Ok(item) => item?, |
46 | }; |
47 | |
48 | let width = utf8_char_width(first_byte); |
49 | if width == 1 { |
50 | return Some(Ok(first_byte as char)); |
51 | } |
52 | if width == 0 { |
53 | return Some(Err(CharsError::NotUtf8)); |
54 | } |
55 | let mut buf = [first_byte, 0, 0, 0]; |
56 | { |
57 | let mut start = 1; |
58 | while start < width { |
59 | let byte = match self.get_next() { |
60 | Err(err) => return Some(Err(err)), |
61 | Ok(item) => match item { |
62 | Some(item) => item, |
63 | None => return Some(Err(CharsError::NotUtf8)), |
64 | }, |
65 | }; |
66 | buf[start] = byte; |
67 | start += 1; |
68 | } |
69 | } |
70 | Some(match from_utf8(&buf[..width]).ok() { |
71 | Some(s) => Ok(s.chars().next().unwrap()), |
72 | None => Err(CharsError::NotUtf8), |
73 | }) |
74 | } |
75 | } |
76 | |
77 | fn utf8_char_width(b: u8) -> usize { |
78 | UTF8_CHAR_WIDTH[b as usize] as usize |
79 | } |
80 | |
81 | // https://tools.ietf.org/html/rfc3629 |
82 | static UTF8_CHAR_WIDTH: [u8; 256] = [ |
83 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
84 | 1, // 0x1F |
85 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
86 | 1, // 0x3F |
87 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
88 | 1, // 0x5F |
89 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
90 | 1, // 0x7F |
91 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
92 | 0, // 0x9F |
93 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
94 | 0, // 0xBF |
95 | 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
96 | 2, // 0xDF |
97 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF |
98 | 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF |
99 | ]; |
100 | |
101 | /// An enumeration of possible errors that can be generated from the `Chars` |
102 | /// adapter. |
103 | #[derive (Debug)] |
104 | pub enum CharsError { |
105 | /// Variant representing that the underlying stream was read successfully |
106 | /// but it did not contain valid utf8 data. |
107 | NotUtf8, |
108 | |
109 | /// Variant representing that an I/O error occurred. |
110 | Other(Error), |
111 | } |
112 | |
113 | impl error::Error for CharsError { |
114 | fn cause(&self) -> Option<&dyn error::Error> { |
115 | match *self { |
116 | CharsError::NotUtf8 => None, |
117 | CharsError::Other(ref e: &Error) => e.source(), |
118 | } |
119 | } |
120 | } |
121 | |
122 | impl fmt::Display for CharsError { |
123 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
124 | match *self { |
125 | CharsError::NotUtf8 => "byte stream did not contain valid utf8" .fmt(f), |
126 | CharsError::Other(ref e: &Error) => e.fmt(f), |
127 | } |
128 | } |
129 | } |
130 | |