escape.rs source code [crates/core/src/escape.rs]

1	//! Helper code for character escaping.
2
3	use crate::ascii;
4	use crate::num::NonZero;
5	use crate::ops::Range;
6
7	const HEX_DIGITS: [ascii::Char; `16`] = *b"0123456789abcdef".as_ascii().unwrap();
8
9	#[inline]
10	const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
11	const { assert!(N >= `2`) };
12
13	let mut output: [AsciiChar; N] = [ascii::Char::Null; N];
14
15	output[`0`] = ascii::Char::ReverseSolidus;
16	output[`1`] = a;
17
18	(output, `0`..`2`)
19	}
20
21	#[inline]
22	const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
23	const { assert!(N >= `4`) };
24
25	let mut output: [AsciiChar; N] = [ascii::Char::Null; N];
26
27	let hi: AsciiChar = HEX_DIGITS[(byte >> `4`) as usize];
28	let lo: AsciiChar = HEX_DIGITS[(byte & `0xf`) as usize];
29
30	output[`0`] = ascii::Char::ReverseSolidus;
31	output[`1`] = ascii::Char::SmallX;
32	output[`2`] = hi;
33	output[`3`] = lo;
34
35	(output, `0`..`4`)
36	}
37
38	#[inline]
39	const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
40	const { assert!(N >= `1`) };
41
42	let mut output: [AsciiChar; N] = [ascii::Char::Null; N];
43
44	output[`0`] = a;
45
46	(output, `0`..`1`)
47	}
48
49	/// Escapes an ASCII character.
50	///
51	/// Returns a buffer and the length of the escaped representation.
52	const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
53	const { assert!(N >= `4`) };
54
55	#[cfg(feature = "optimize_for_size")]
56	{
57	match byte {
58	b'`\t`' => backslash(ascii::Char::SmallT),
59	b'`\r`' => backslash(ascii::Char::SmallR),
60	b'`\n`' => backslash(ascii::Char::SmallN),
61	b'`\\`' => backslash(ascii::Char::ReverseSolidus),
62	b'`\'`' => backslash(ascii::Char::Apostrophe),
63	b'"' => backslash(ascii::Char::QuotationMark),
64	`0x00`..=`0x1F` \| `0x7F` => hex_escape(byte),
65	_ => match ascii::Char::from_u8(byte) {
66	Some(a) => verbatim(a),
67	None => hex_escape(byte),
68	},
69	}
70	}
71
72	#[cfg(not(feature = "optimize_for_size"))]
73	{
74	/// Lookup table helps us determine how to display character.
75	///
76	/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
77	/// indicate whether the result is escaped or unescaped.
78	///
79	/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
80	/// escaped NUL will not occur.
81	const LOOKUP: [u8; `256`] = {
82	let mut arr = [`0`; `256`];
83	let mut idx = `0`;
84	while idx <= `255` {
85	arr[idx] = match idx as u8 {
86	// use 8th bit to indicate escaped
87	b'`\t`' => `0x80` \| b't',
88	b'`\r`' => `0x80` \| b'r',
89	b'`\n`' => `0x80` \| b'n',
90	b'`\\`' => `0x80` \| b'`\\`',
91	b'`\'`' => `0x80` \| b'`\'`',
92	b'"' => `0x80` \| b'"',
93
94	// use NUL to indicate hex-escaped
95	`0x00`..=`0x1F` \| `0x7F`..=`0xFF` => `0x80` \| b'`\0`',
96
97	idx => idx,
98	};
99	idx += `1`;
100	}
101	arr
102	};
103
104	let lookup = LOOKUP[byte as usize];
105
106	// 8th bit indicates escape
107	let lookup_escaped = lookup & `0x80` != `0`;
108
109	// SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.
110	let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & `0x7F`) };
111
112	if lookup_escaped {
113	// NUL indicates hex-escaped
114	if matches!(lookup_ascii, ascii::Char::Null) {
115	hex_escape(byte)
116	} else {
117	backslash(lookup_ascii)
118	}
119	} else {
120	verbatim(lookup_ascii)
121	}
122	}
123	}
124
125	/// Escapes a character `\u{NNNN}` representation.
126	///
127	/// Returns a buffer and the length of the escaped representation.
128	const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], Range<u8>) {
129	const { assert!(N >= `10` && N < u8::MAX as usize) };
130
131	let c: u32 = c as u32;
132
133	// OR-ing `1` ensures that for `c == 0` the code computes that
134	// one digit should be printed.
135	let start: usize = (c \| `1`).leading_zeros() as usize / `4` - `2`;
136
137	let mut output: [AsciiChar; N] = [ascii::Char::Null; N];
138	output[`3`] = HEX_DIGITS[((c >> `20`) & `15`) as usize];
139	output[`4`] = HEX_DIGITS[((c >> `16`) & `15`) as usize];
140	output[`5`] = HEX_DIGITS[((c >> `12`) & `15`) as usize];
141	output[`6`] = HEX_DIGITS[((c >> `8`) & `15`) as usize];
142	output[`7`] = HEX_DIGITS[((c >> `4`) & `15`) as usize];
143	output[`8`] = HEX_DIGITS[((c >> `0`) & `15`) as usize];
144	output[`9`] = ascii::Char::RightCurlyBracket;
145	output[start + `0`] = ascii::Char::ReverseSolidus;
146	output[start + `1`] = ascii::Char::SmallU;
147	output[start + `2`] = ascii::Char::LeftCurlyBracket;
148
149	(output, (start as u8)..(N as u8))
150	}
151
152	/// An iterator over an fixed-size array.
153	///
154	/// This is essentially equivalent to array’s IntoIter except that indexes are
155	/// limited to u8 to reduce size of the structure.
156	#[derive(Clone, Debug)]
157	pub(crate) struct EscapeIterInner<const N: usize> {
158	// The element type ensures this is always ASCII, and thus also valid UTF-8.
159	data: [ascii::Char; N],
160
161	// Invariant: `alive.start <= alive.end <= N`
162	alive: Range<u8>,
163	}
164
165	impl<const N: usize> EscapeIterInner<N> {
166	pub(crate) const fn backslash(c: ascii::Char) -> Self {
167	let (data, range) = backslash(c);
168	Self { data, alive: range }
169	}
170
171	pub(crate) const fn ascii(c: u8) -> Self {
172	let (data, range) = escape_ascii(c);
173	Self { data, alive: range }
174	}
175
176	pub(crate) const fn unicode(c: char) -> Self {
177	let (data, range) = escape_unicode(c);
178	Self { data, alive: range }
179	}
180
181	#[inline]
182	pub(crate) const fn empty() -> Self {
183	Self { data: [ascii::Char::Null; N], alive: `0`..`0` }
184	}
185
186	#[inline]
187	pub(crate) fn as_ascii(&self) -> &[ascii::Char] {
188	// SAFETY: `self.alive` is guaranteed to be a valid range for indexing `self.data`.
189	unsafe {
190	self.data.get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end))
191	}
192	}
193
194	#[inline]
195	pub(crate) fn as_str(&self) -> &str {
196	self.as_ascii().as_str()
197	}
198
199	#[inline]
200	pub(crate) fn len(&self) -> usize {
201	usize::from(self.alive.end - self.alive.start)
202	}
203
204	pub(crate) fn next(&mut self) -> Option<u8> {
205	let i = self.alive.next()?;
206
207	// SAFETY: `i` is guaranteed to be a valid index for `self.data`.
208	unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
209	}
210
211	pub(crate) fn next_back(&mut self) -> Option<u8> {
212	let i = self.alive.next_back()?;
213
214	// SAFETY: `i` is guaranteed to be a valid index for `self.data`.
215	unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
216	}
217
218	pub(crate) fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
219	self.alive.advance_by(n)
220	}
221
222	pub(crate) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
223	self.alive.advance_back_by(n)
224	}
225	}
226