html.rs source code [crates/rinja_derive/src/html.rs]

1	use std::{fmt, str};
2
3	#[allow(unused)]
4	pub(crate) fn write_escaped_str(mut dest: impl fmt::Write, src: &str) -> fmt::Result {
5	// This implementation reads one byte after another.
6	// It's not very fast, but should work well enough until portable SIMD gets stabilized.
7
8	let mut escaped_buf: [u8; 8] = ESCAPED_BUF_INIT;
9	let mut last: usize = `0`;
10
11	for (index: usize, byte: u8) in src.bytes().enumerate() {
12	if let Some(escaped: [u8; 2]) = get_escaped(byte) {
13	[escaped_buf[`2`], escaped_buf[`3`]] = escaped;
14	write_str_if_nonempty(&mut dest, &src[last..index])?;
15	// SAFETY: the content of `escaped_buf` is pure ASCII
16	dest.write_str(unsafe { str::from_utf8_unchecked(&escaped_buf[..ESCAPED_BUF_LEN]) })?;
17	last = index + `1`;
18	}
19	}
20	write_str_if_nonempty(&mut dest, &src[last..])
21	}
22
23	#[allow(unused)]
24	pub(crate) fn write_escaped_char(mut dest: impl fmt::Write, c: char) -> fmt::Result {
25	if !c.is_ascii() {
26	dest.write_char(c)
27	} else if let Some(escaped: [u8; 2]) = get_escaped(byte:c as u8) {
28	let mut escaped_buf: [u8; 8] = ESCAPED_BUF_INIT;
29	[escaped_buf[`2`], escaped_buf[`3`]] = escaped;
30	// SAFETY: the content of `escaped_buf` is pure ASCII
31	dest.write_str(unsafe { str::from_utf8_unchecked(&escaped_buf[..ESCAPED_BUF_LEN]) })
32	} else {
33	// RATIONALE: `write_char(c)` gets optimized if it is known that `c.is_ascii()`
34	dest.write_char(c)
35	}
36	}
37
38	/// Returns the decimal representation of the codepoint if the character needs HTML escaping.
39	#[inline(always)]
40	fn get_escaped(byte: u8) -> Option<[u8; `2`]> {
41	match byte {
42	MIN_CHAR..=MAX_CHAR => match TABLE.lookup[(byte - MIN_CHAR) as usize] {
43	`0` => None,
44	escaped: u16 => Some(escaped.to_ne_bytes()),
45	},
46	_ => None,
47	}
48	}
49
50	#[inline(always)]
51	fn write_str_if_nonempty(output: &mut impl fmt::Write, input: &str) -> fmt::Result {
52	if !input.is_empty() {
53	output.write_str(input)
54	} else {
55	Ok(())
56	}
57	}
58
59	/// List of characters that need HTML escaping, not necessarily in ordinal order.
60	const CHARS: &[u8] = br#""&'<>"#;
61
62	/// The character with the lowest codepoint that needs HTML escaping.
63	const MIN_CHAR: u8 = {
64	let mut v: u8 = u8::MAX;
65	let mut i: usize = `0`;
66	while i < CHARS.len() {
67	if v > CHARS[i] {
68	v = CHARS[i];
69	}
70	i += `1`;
71	}
72	v
73	};
74
75	/// The character with the highest codepoint that needs HTML escaping.
76	const MAX_CHAR: u8 = {
77	let mut v: u8 = u8::MIN;
78	let mut i: usize = `0`;
79	while i < CHARS.len() {
80	if v < CHARS[i] {
81	v = CHARS[i];
82	}
83	i += `1`;
84	}
85	v
86	};
87
88	/// Number of codepoints between the lowest and highest character that needs escaping, incl.
89	const CHAR_RANGE: usize = (MAX_CHAR - MIN_CHAR + `1`) as usize;
90
91	struct Table {
92	_align: [usize; `0`],
93	lookup: [u16; CHAR_RANGE],
94	}
95
96	/// For characters that need HTML escaping, the codepoint is formatted as decimal digits,
97	/// otherwise `b"\0\0"`. Starting at [`MIN_CHAR`].
98	const TABLE: Table = {
99	let mut table: Table = Table {
100	_align: [],
101	lookup: [`0`; CHAR_RANGE],
102	};
103	let mut i: usize = `0`;
104	while i < CHARS.len() {
105	let c: u8 = CHARS[i];
106	let h: u8 = c / `10` + b'0';
107	let l: u8 = c % `10` + b'0';
108	table.lookup[(c - MIN_CHAR) as usize] = u16::from_ne_bytes([h, l]);
109	i += `1`;
110	}
111	table
112	};
113
114	// RATIONALE: llvm generates better code if the buffer is register sized
115	const ESCAPED_BUF_INIT: [u8; `8`] = *b"&#__;`\0\0\0`";
116	const ESCAPED_BUF_LEN: usize = b"&#__;".len();
117
118	#[test]
119	fn test_simple_html_string_escaping() {
120	let mut buf = String::new();
121	write_escaped_str(&mut buf, "<script>").unwrap();
122	assert_eq!(buf, "<script>");
123
124	buf.clear();
125	write_escaped_str(&mut buf, "s<crip>t").unwrap();
126	assert_eq!(buf, "s<crip>t");
127
128	buf.clear();
129	write_escaped_str(&mut buf, "s<cripcripcripcripcripcripcripcripcripcrip>t").unwrap();
130	assert_eq!(buf, "s<cripcripcripcripcripcripcripcripcripcrip>t");
131	}
132