1 | // Take a look at the license at the top of the repository in the LICENSE file. |
2 | |
3 | use crate::json::read::internal_reader::InternalReader; |
4 | use std::{ |
5 | error, fmt, |
6 | io::{Error, Read}, |
7 | str::from_utf8, |
8 | }; |
9 | |
10 | pub struct ByteToChar<R> { |
11 | iter: InternalReader<R>, |
12 | } |
13 | |
14 | impl<R: Read> ByteToChar<R> { |
15 | #[inline ] |
16 | pub fn new(read: R, buffer_size: usize) -> Result<Self, Error> { |
17 | Ok(ByteToChar { |
18 | iter: InternalReader::new(read, buffer_size)?, |
19 | }) |
20 | } |
21 | |
22 | fn get_next(&mut self) -> Result<Option<u8>, CharsError> { |
23 | match self.iter.next() { |
24 | None => Ok(None), |
25 | Some(item: Result) => match item { |
26 | Ok(item: u8) => Ok(Some(item)), |
27 | Err(err: Error) => Err(CharsError::Other(err)), |
28 | }, |
29 | } |
30 | } |
31 | } |
32 | |
33 | impl<R: Read + fmt::Debug> fmt::Debug for ByteToChar<R> { |
34 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
35 | f.debug_struct("Filter" ).field(name:"iter" , &self.iter).finish() |
36 | } |
37 | } |
38 | |
39 | impl<R: Read> Iterator for ByteToChar<R> { |
40 | type Item = Result<char, CharsError>; |
41 | |
42 | fn next(&mut self) -> Option<Result<char, CharsError>> { |
43 | let first_byte = match self.get_next() { |
44 | Err(err) => return Some(Err(err)), |
45 | Ok(item) => match item { |
46 | Some(item) => item, |
47 | None => return None, |
48 | }, |
49 | }; |
50 | |
51 | let width = utf8_char_width(first_byte); |
52 | if width == 1 { |
53 | return Some(Ok(first_byte as char)); |
54 | } |
55 | if width == 0 { |
56 | return Some(Err(CharsError::NotUtf8)); |
57 | } |
58 | let mut buf = [first_byte, 0, 0, 0]; |
59 | { |
60 | let mut start = 1; |
61 | while start < width { |
62 | let byte = match self.get_next() { |
63 | Err(err) => return Some(Err(err)), |
64 | Ok(item) => match item { |
65 | Some(item) => item, |
66 | None => return Some(Err(CharsError::NotUtf8)), |
67 | }, |
68 | }; |
69 | buf[start] = byte; |
70 | start += 1; |
71 | } |
72 | } |
73 | Some(match from_utf8(&buf[..width]).ok() { |
74 | Some(s) => Ok(s.chars().next().unwrap()), |
75 | None => Err(CharsError::NotUtf8), |
76 | }) |
77 | } |
78 | } |
79 | |
80 | fn utf8_char_width(b: u8) -> usize { |
81 | UTF8_CHAR_WIDTH[b as usize] as usize |
82 | } |
83 | |
84 | // https://tools.ietf.org/html/rfc3629 |
85 | static UTF8_CHAR_WIDTH: [u8; 256] = [ |
86 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
87 | 1, // 0x1F |
88 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
89 | 1, // 0x3F |
90 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
91 | 1, // 0x5F |
92 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
93 | 1, // 0x7F |
94 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
95 | 0, // 0x9F |
96 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
97 | 0, // 0xBF |
98 | 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
99 | 2, // 0xDF |
100 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF |
101 | 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF |
102 | ]; |
103 | |
104 | /// An enumeration of possible errors that can be generated from the `Chars` |
105 | /// adapter. |
106 | #[derive (Debug)] |
107 | pub enum CharsError { |
108 | /// Variant representing that the underlying stream was read successfully |
109 | /// but it did not contain valid utf8 data. |
110 | NotUtf8, |
111 | |
112 | /// Variant representing that an I/O error occurred. |
113 | Other(Error), |
114 | } |
115 | |
116 | impl error::Error for CharsError { |
117 | fn cause(&self) -> Option<&dyn error::Error> { |
118 | match *self { |
119 | CharsError::NotUtf8 => None, |
120 | CharsError::Other(ref e: &Error) => e.source(), |
121 | } |
122 | } |
123 | } |
124 | |
125 | impl fmt::Display for CharsError { |
126 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
127 | match *self { |
128 | CharsError::NotUtf8 => "byte stream did not contain valid utf8" .fmt(f), |
129 | CharsError::Other(ref e: &Error) => e.fmt(f), |
130 | } |
131 | } |
132 | } |
133 | |