1/// A type that wraps a single byte with a convenient fmt::Debug impl that
2/// escapes the byte.
3pub(crate) struct Byte(pub(crate) u8);
4
5impl core::fmt::Debug for Byte {
6 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
7 // Special case ASCII space. It's too hard to read otherwise, so
8 // put quotes around it. I sometimes wonder whether just '\x20' would
9 // be better...
10 if self.0 == b' ' {
11 return write!(f, "' '");
12 }
13 // 10 bytes is enough to cover any output from ascii::escape_default.
14 let mut bytes: [u8; 10] = [0u8; 10];
15 let mut len: usize = 0;
16 for (i: usize, mut b: u8) in core::ascii::escape_default(self.0).enumerate() {
17 // capitalize \xab to \xAB
18 if i >= 2 && b'a' <= b && b <= b'f' {
19 b -= 32;
20 }
21 bytes[len] = b;
22 len += 1;
23 }
24 write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
25 }
26}
27
28/// A type that provides a human readable debug impl for arbitrary bytes.
29///
30/// This generally works best when the bytes are presumed to be mostly UTF-8,
31/// but will work for anything.
32///
33/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
34pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
35
36impl<'a> core::fmt::Debug for Bytes<'a> {
37 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
38 write!(f, "\"")?;
39 // This is a sad re-implementation of a similar impl found in bstr.
40 let mut bytes = self.0;
41 while let Some(result) = utf8_decode(bytes) {
42 let ch = match result {
43 Ok(ch) => ch,
44 Err(byte) => {
45 write!(f, r"\x{:02x}", byte)?;
46 bytes = &bytes[1..];
47 continue;
48 }
49 };
50 bytes = &bytes[ch.len_utf8()..];
51 match ch {
52 '\0' => write!(f, "\\0")?,
53 // ASCII control characters except \0, \n, \r, \t
54 '\x01'..='\x08'
55 | '\x0b'
56 | '\x0c'
57 | '\x0e'..='\x19'
58 | '\x7f' => {
59 write!(f, "\\x{:02x}", u32::from(ch))?;
60 }
61 '\n' | '\r' | '\t' | _ => {
62 write!(f, "{}", ch.escape_debug())?;
63 }
64 }
65 }
66 write!(f, "\"")?;
67 Ok(())
68 }
69}
70
71/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
72///
73/// If no valid encoding of a codepoint exists at the beginning of the given
74/// byte slice, then the first byte is returned instead.
75///
76/// This returns `None` if and only if `bytes` is empty.
77pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
78 fn len(byte: u8) -> Option<usize> {
79 if byte <= 0x7F {
80 return Some(1);
81 } else if byte & 0b1100_0000 == 0b1000_0000 {
82 return None;
83 } else if byte <= 0b1101_1111 {
84 Some(2)
85 } else if byte <= 0b1110_1111 {
86 Some(3)
87 } else if byte <= 0b1111_0111 {
88 Some(4)
89 } else {
90 None
91 }
92 }
93
94 if bytes.is_empty() {
95 return None;
96 }
97 let len = match len(bytes[0]) {
98 None => return Some(Err(bytes[0])),
99 Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
100 Some(1) => return Some(Ok(char::from(bytes[0]))),
101 Some(len) => len,
102 };
103 match core::str::from_utf8(&bytes[..len]) {
104 Ok(s) => Some(Ok(s.chars().next().unwrap())),
105 Err(_) => Some(Err(bytes[0])),
106 }
107}
108