1 | /// A type that wraps a single byte with a convenient fmt::Debug impl that |
2 | /// escapes the byte. |
3 | pub(crate) struct Byte(pub(crate) u8); |
4 | |
5 | impl core::fmt::Debug for Byte { |
6 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
7 | // Special case ASCII space. It's too hard to read otherwise, so |
8 | // put quotes around it. I sometimes wonder whether just '\x20' would |
9 | // be better... |
10 | if self.0 == b' ' { |
11 | return write!(f, "' '" ); |
12 | } |
13 | // 10 bytes is enough to cover any output from ascii::escape_default. |
14 | let mut bytes = [0u8; 10]; |
15 | let mut len = 0; |
16 | for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { |
17 | // capitalize \xab to \xAB |
18 | if i >= 2 && b'a' <= b && b <= b'f' { |
19 | b -= 32; |
20 | } |
21 | bytes[len] = b; |
22 | len += 1; |
23 | } |
24 | write!(f, "{}" , core::str::from_utf8(&bytes[..len]).unwrap()) |
25 | } |
26 | } |
27 | |
28 | /// A type that provides a human readable debug impl for arbitrary bytes. |
29 | /// |
30 | /// This generally works best when the bytes are presumed to be mostly UTF-8, |
31 | /// but will work for anything. |
32 | /// |
33 | /// N.B. This is copied nearly verbatim from regex-automata. Sigh. |
34 | pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); |
35 | |
36 | impl<'a> core::fmt::Debug for Bytes<'a> { |
37 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
38 | write!(f, " \"" )?; |
39 | // This is a sad re-implementation of a similar impl found in bstr. |
40 | let mut bytes = self.0; |
41 | while let Some(result) = utf8_decode(bytes) { |
42 | let ch = match result { |
43 | Ok(ch) => ch, |
44 | Err(byte) => { |
45 | write!(f, r"\x{:02x}" , byte)?; |
46 | bytes = &bytes[1..]; |
47 | continue; |
48 | } |
49 | }; |
50 | bytes = &bytes[ch.len_utf8()..]; |
51 | match ch { |
52 | ' \0' => write!(f, " \\0" )?, |
53 | // ASCII control characters except \0, \n, \r, \t |
54 | ' \x01' ..=' \x08' |
55 | | ' \x0b' |
56 | | ' \x0c' |
57 | | ' \x0e' ..=' \x19' |
58 | | ' \x7f' => { |
59 | write!(f, " \\x{:02x}" , u32::from(ch))?; |
60 | } |
61 | ' \n' | ' \r' | ' \t' | _ => { |
62 | write!(f, "{}" , ch.escape_debug())?; |
63 | } |
64 | } |
65 | } |
66 | write!(f, " \"" )?; |
67 | Ok(()) |
68 | } |
69 | } |
70 | |
71 | /// Decodes the next UTF-8 encoded codepoint from the given byte slice. |
72 | /// |
73 | /// If no valid encoding of a codepoint exists at the beginning of the given |
74 | /// byte slice, then the first byte is returned instead. |
75 | /// |
76 | /// This returns `None` if and only if `bytes` is empty. |
77 | pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> { |
78 | fn len(byte: u8) -> Option<usize> { |
79 | if byte <= 0x7F { |
80 | return Some(1); |
81 | } else if byte & 0b1100_0000 == 0b1000_0000 { |
82 | return None; |
83 | } else if byte <= 0b1101_1111 { |
84 | Some(2) |
85 | } else if byte <= 0b1110_1111 { |
86 | Some(3) |
87 | } else if byte <= 0b1111_0111 { |
88 | Some(4) |
89 | } else { |
90 | None |
91 | } |
92 | } |
93 | |
94 | if bytes.is_empty() { |
95 | return None; |
96 | } |
97 | let len = match len(bytes[0]) { |
98 | None => return Some(Err(bytes[0])), |
99 | Some(len) if len > bytes.len() => return Some(Err(bytes[0])), |
100 | Some(1) => return Some(Ok(char::from(bytes[0]))), |
101 | Some(len) => len, |
102 | }; |
103 | match core::str::from_utf8(&bytes[..len]) { |
104 | Ok(s) => Some(Ok(s.chars().next().unwrap())), |
105 | Err(_) => Some(Err(bytes[0])), |
106 | } |
107 | } |
108 | |