debug.rs source code [crates/regex_syntax/src/debug.rs]

1	/// A type that wraps a single byte with a convenient fmt::Debug impl that
2	/// escapes the byte.
3	pub(crate) struct Byte(pub(crate) u8);
4
5	impl core::fmt::Debug for Byte {
6	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
7	// Special case ASCII space. It's too hard to read otherwise, so
8	// put quotes around it. I sometimes wonder whether just '\x20' would
9	// be better...
10	if self.0 == b' ' {
11	return write!(f, "' '");
12	}
13	// 10 bytes is enough to cover any output from ascii::escape_default.
14	let mut bytes: [u8; 10] = [`0u8`; `10`];
15	let mut len: usize = `0`;
16	for (i: usize, mut b: u8) in core::ascii::escape_default(self.0).enumerate() {
17	// capitalize \xab to \xAB
18	if i >= `2` && b'a' <= b && b <= b'f' {
19	b -= `32`;
20	}
21	bytes[len] = b;
22	len += `1`;
23	}
24	write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
25	}
26	}
27
28	/// A type that provides a human readable debug impl for arbitrary bytes.
29	///
30	/// This generally works best when the bytes are presumed to be mostly UTF-8,
31	/// but will work for anything.
32	///
33	/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
34	pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
35
36	impl<'a> core::fmt::Debug for Bytes<'a> {
37	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
38	write!(f, "`\"`")?;
39	// This is a sad re-implementation of a similar impl found in bstr.
40	let mut bytes = self.0;
41	while let Some(result) = utf8_decode(bytes) {
42	let ch = match result {
43	Ok(ch) => ch,
44	Err(byte) => {
45	write!(f, r"\x{:02x}", byte)?;
46	bytes = &bytes[`1`..];
47	continue;
48	}
49	};
50	bytes = &bytes[ch.len_utf8()..];
51	match ch {
52	'`\0`' => write!(f, "`\\`0")?,
53	// ASCII control characters except \0, \n, \r, \t
54	'`\x01`'..='`\x08`'
55	\| '`\x0b`'
56	\| '`\x0c`'
57	\| '`\x0e`'..='`\x19`'
58	\| '`\x7f`' => {
59	write!(f, "`\\`x{:`02`x}", u32::from(ch))?;
60	}
61	'`\n`' \| '`\r`' \| '`\t`' \| _ => {
62	write!(f, "{}", ch.escape_debug())?;
63	}
64	}
65	}
66	write!(f, "`\"`")?;
67	Ok(())
68	}
69	}
70
71	/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
72	///
73	/// If no valid encoding of a codepoint exists at the beginning of the given
74	/// byte slice, then the first byte is returned instead.
75	///
76	/// This returns `None` if and only if `bytes` is empty.
77	pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
78	fn len(byte: u8) -> Option<usize> {
79	if byte <= `0x7F` {
80	return Some(`1`);
81	} else if byte & `0b1100_0000` == `0b1000_0000` {
82	return None;
83	} else if byte <= `0b1101_1111` {
84	Some(`2`)
85	} else if byte <= `0b1110_1111` {
86	Some(`3`)
87	} else if byte <= `0b1111_0111` {
88	Some(`4`)
89	} else {
90	None
91	}
92	}
93
94	if bytes.is_empty() {
95	return None;
96	}
97	let len = match len(bytes[`0`]) {
98	None => return Some(Err(bytes[`0`])),
99	Some(len) if len > bytes.len() => return Some(Err(bytes[`0`])),
100	Some(`1`) => return Some(Ok(char::from(bytes[`0`]))),
101	Some(len) => len,
102	};
103	match core::str::from_utf8(&bytes[..len]) {
104	Ok(s) => Some(Ok(s.chars().next().unwrap())),
105	Err(_) => Some(Err(bytes[`0`])),
106	}
107	}
108