| 1 | /// A type that wraps a single byte with a convenient fmt::Debug impl that | 
| 2 | /// escapes the byte. | 
|---|
| 3 | pub(crate) struct Byte(pub(crate) u8); | 
|---|
| 4 |  | 
|---|
| 5 | impl core::fmt::Debug for Byte { | 
|---|
| 6 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { | 
|---|
| 7 | // Special case ASCII space. It's too hard to read otherwise, so | 
|---|
| 8 | // put quotes around it. I sometimes wonder whether just '\x20' would | 
|---|
| 9 | // be better... | 
|---|
| 10 | if self.0 == b' '{ | 
|---|
| 11 | return write!(f, "' '"); | 
|---|
| 12 | } | 
|---|
| 13 | // 10 bytes is enough to cover any output from ascii::escape_default. | 
|---|
| 14 | let mut bytes: [u8; 10] = [0u8; 10]; | 
|---|
| 15 | let mut len: usize = 0; | 
|---|
| 16 | for (i: usize, mut b: u8) in core::ascii::escape_default(self.0).enumerate() { | 
|---|
| 17 | // capitalize \xab to \xAB | 
|---|
| 18 | if i >= 2 && b'a'<= b && b <= b'f'{ | 
|---|
| 19 | b -= 32; | 
|---|
| 20 | } | 
|---|
| 21 | bytes[len] = b; | 
|---|
| 22 | len += 1; | 
|---|
| 23 | } | 
|---|
| 24 | write!(f, "{} ", core::str::from_utf8(&bytes[..len]).unwrap()) | 
|---|
| 25 | } | 
|---|
| 26 | } | 
|---|
| 27 |  | 
|---|
| 28 | /// A type that provides a human readable debug impl for arbitrary bytes. | 
|---|
| 29 | /// | 
|---|
| 30 | /// This generally works best when the bytes are presumed to be mostly UTF-8, | 
|---|
| 31 | /// but will work for anything. | 
|---|
| 32 | /// | 
|---|
| 33 | /// N.B. This is copied nearly verbatim from regex-automata. Sigh. | 
|---|
| 34 | pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); | 
|---|
| 35 |  | 
|---|
| 36 | impl<'a> core::fmt::Debug for Bytes<'a> { | 
|---|
| 37 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { | 
|---|
| 38 | write!(f, "\" ")?; | 
|---|
| 39 | // This is a sad re-implementation of a similar impl found in bstr. | 
|---|
| 40 | let mut bytes = self.0; | 
|---|
| 41 | while let Some(result) = utf8_decode(bytes) { | 
|---|
| 42 | let ch = match result { | 
|---|
| 43 | Ok(ch) => ch, | 
|---|
| 44 | Err(byte) => { | 
|---|
| 45 | write!(f, r"\x{:02x}", byte)?; | 
|---|
| 46 | bytes = &bytes[1..]; | 
|---|
| 47 | continue; | 
|---|
| 48 | } | 
|---|
| 49 | }; | 
|---|
| 50 | bytes = &bytes[ch.len_utf8()..]; | 
|---|
| 51 | match ch { | 
|---|
| 52 | '\0 '=> write!(f, "\\ 0")?, | 
|---|
| 53 | // ASCII control characters except \0, \n, \r, \t | 
|---|
| 54 | '\x01 '..= '\x08 ' | 
|---|
| 55 | | '\x0b ' | 
|---|
| 56 | | '\x0c ' | 
|---|
| 57 | | '\x0e '..= '\x19 ' | 
|---|
| 58 | | '\x7f '=> { | 
|---|
| 59 | write!(f, "\\ x{:02x} ", u32::from(ch))?; | 
|---|
| 60 | } | 
|---|
| 61 | '\n '| '\r '| '\t '| _ => { | 
|---|
| 62 | write!(f, "{} ", ch.escape_debug())?; | 
|---|
| 63 | } | 
|---|
| 64 | } | 
|---|
| 65 | } | 
|---|
| 66 | write!(f, "\" ")?; | 
|---|
| 67 | Ok(()) | 
|---|
| 68 | } | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | /// Decodes the next UTF-8 encoded codepoint from the given byte slice. | 
|---|
| 72 | /// | 
|---|
| 73 | /// If no valid encoding of a codepoint exists at the beginning of the given | 
|---|
| 74 | /// byte slice, then the first byte is returned instead. | 
|---|
| 75 | /// | 
|---|
| 76 | /// This returns `None` if and only if `bytes` is empty. | 
|---|
| 77 | pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> { | 
|---|
| 78 | fn len(byte: u8) -> Option<usize> { | 
|---|
| 79 | if byte <= 0x7F { | 
|---|
| 80 | return Some(1); | 
|---|
| 81 | } else if byte & 0b1100_0000 == 0b1000_0000 { | 
|---|
| 82 | return None; | 
|---|
| 83 | } else if byte <= 0b1101_1111 { | 
|---|
| 84 | Some(2) | 
|---|
| 85 | } else if byte <= 0b1110_1111 { | 
|---|
| 86 | Some(3) | 
|---|
| 87 | } else if byte <= 0b1111_0111 { | 
|---|
| 88 | Some(4) | 
|---|
| 89 | } else { | 
|---|
| 90 | None | 
|---|
| 91 | } | 
|---|
| 92 | } | 
|---|
| 93 |  | 
|---|
| 94 | if bytes.is_empty() { | 
|---|
| 95 | return None; | 
|---|
| 96 | } | 
|---|
| 97 | let len = match len(bytes[0]) { | 
|---|
| 98 | None => return Some(Err(bytes[0])), | 
|---|
| 99 | Some(len) if len > bytes.len() => return Some(Err(bytes[0])), | 
|---|
| 100 | Some(1) => return Some(Ok(char::from(bytes[0]))), | 
|---|
| 101 | Some(len) => len, | 
|---|
| 102 | }; | 
|---|
| 103 | match core::str::from_utf8(&bytes[..len]) { | 
|---|
| 104 | Ok(s) => Some(Ok(s.chars().next().unwrap())), | 
|---|
| 105 | Err(_) => Some(Err(bytes[0])), | 
|---|
| 106 | } | 
|---|
| 107 | } | 
|---|
| 108 |  | 
|---|