| 1 | // Take a look at the license at the top of the repository in the LICENSE file. |
| 2 | |
| 3 | use crate::json::read::internal_reader::InternalReader; |
| 4 | use std::{ |
| 5 | error, fmt, |
| 6 | io::{Error, Read}, |
| 7 | str::from_utf8, |
| 8 | }; |
| 9 | |
| 10 | pub struct ByteToChar<R> { |
| 11 | iter: InternalReader<R>, |
| 12 | } |
| 13 | |
| 14 | impl<R: Read> ByteToChar<R> { |
| 15 | #[inline ] |
| 16 | pub fn new(read: R, buffer_size: usize) -> Result<Self, Error> { |
| 17 | Ok(ByteToChar { |
| 18 | iter: InternalReader::new(read, buffer_size)?, |
| 19 | }) |
| 20 | } |
| 21 | |
| 22 | fn get_next(&mut self) -> Result<Option<u8>, CharsError> { |
| 23 | match self.iter.next() { |
| 24 | None => Ok(None), |
| 25 | Some(item: Result) => match item { |
| 26 | Ok(item: u8) => Ok(Some(item)), |
| 27 | Err(err: Error) => Err(CharsError::Other(err)), |
| 28 | }, |
| 29 | } |
| 30 | } |
| 31 | } |
| 32 | |
| 33 | impl<R: Read + fmt::Debug> fmt::Debug for ByteToChar<R> { |
| 34 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 35 | f.debug_struct("Filter" ).field(name:"iter" , &self.iter).finish() |
| 36 | } |
| 37 | } |
| 38 | |
| 39 | impl<R: Read> Iterator for ByteToChar<R> { |
| 40 | type Item = Result<char, CharsError>; |
| 41 | |
| 42 | fn next(&mut self) -> Option<Result<char, CharsError>> { |
| 43 | let first_byte = match self.get_next() { |
| 44 | Err(err) => return Some(Err(err)), |
| 45 | Ok(item) => item?, |
| 46 | }; |
| 47 | |
| 48 | let width = utf8_char_width(first_byte); |
| 49 | if width == 1 { |
| 50 | return Some(Ok(first_byte as char)); |
| 51 | } |
| 52 | if width == 0 { |
| 53 | return Some(Err(CharsError::NotUtf8)); |
| 54 | } |
| 55 | let mut buf = [first_byte, 0, 0, 0]; |
| 56 | { |
| 57 | let mut start = 1; |
| 58 | while start < width { |
| 59 | let byte = match self.get_next() { |
| 60 | Err(err) => return Some(Err(err)), |
| 61 | Ok(item) => match item { |
| 62 | Some(item) => item, |
| 63 | None => return Some(Err(CharsError::NotUtf8)), |
| 64 | }, |
| 65 | }; |
| 66 | buf[start] = byte; |
| 67 | start += 1; |
| 68 | } |
| 69 | } |
| 70 | Some(match from_utf8(&buf[..width]).ok() { |
| 71 | Some(s) => Ok(s.chars().next().unwrap()), |
| 72 | None => Err(CharsError::NotUtf8), |
| 73 | }) |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | fn utf8_char_width(b: u8) -> usize { |
| 78 | UTF8_CHAR_WIDTH[b as usize] as usize |
| 79 | } |
| 80 | |
| 81 | // https://tools.ietf.org/html/rfc3629 |
| 82 | static UTF8_CHAR_WIDTH: [u8; 256] = [ |
| 83 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 84 | 1, // 0x1F |
| 85 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 86 | 1, // 0x3F |
| 87 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 88 | 1, // 0x5F |
| 89 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 90 | 1, // 0x7F |
| 91 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 92 | 0, // 0x9F |
| 93 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 94 | 0, // 0xBF |
| 95 | 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 96 | 2, // 0xDF |
| 97 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF |
| 98 | 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF |
| 99 | ]; |
| 100 | |
| 101 | /// An enumeration of possible errors that can be generated from the `Chars` |
| 102 | /// adapter. |
| 103 | #[derive (Debug)] |
| 104 | pub enum CharsError { |
| 105 | /// Variant representing that the underlying stream was read successfully |
| 106 | /// but it did not contain valid utf8 data. |
| 107 | NotUtf8, |
| 108 | |
| 109 | /// Variant representing that an I/O error occurred. |
| 110 | Other(Error), |
| 111 | } |
| 112 | |
| 113 | impl error::Error for CharsError { |
| 114 | fn cause(&self) -> Option<&dyn error::Error> { |
| 115 | match *self { |
| 116 | CharsError::NotUtf8 => None, |
| 117 | CharsError::Other(ref e: &Error) => e.source(), |
| 118 | } |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | impl fmt::Display for CharsError { |
| 123 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 124 | match *self { |
| 125 | CharsError::NotUtf8 => "byte stream did not contain valid utf8" .fmt(f), |
| 126 | CharsError::Other(ref e: &Error) => e.fmt(f), |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | |