1//! Contains functions for performing XML special characters escaping.
2
3use std::{borrow::Cow, marker::PhantomData, fmt::{Display, Result, Formatter}};
4
5pub(crate) trait Escapes {
6 fn escape(c: u8) -> Option<&'static str>;
7
8 fn byte_needs_escaping(c: u8) -> bool {
9 Self::escape(c).is_some()
10 }
11
12 fn str_needs_escaping(s: &str) -> bool {
13 s.bytes().any(|c: u8| Self::escape(c).is_some())
14 }
15}
16
17pub(crate) struct Escaped<'a, E: Escapes> {
18 _escape_phantom: PhantomData<E>,
19 to_escape: &'a str,
20}
21
22impl<'a, E: Escapes> Escaped<'a, E> {
23 pub fn new(s: &'a str) -> Self {
24 Escaped {
25 _escape_phantom: PhantomData,
26 to_escape: s,
27 }
28 }
29}
30
31impl<'a, E: Escapes> Display for Escaped<'a, E> {
32 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
33 let mut total_remaining: &str = self.to_escape;
34
35 // find the next occurence
36 while let Some(n: usize) = total_remainingBytes<'_>
37 .bytes()
38 .position(E::byte_needs_escaping)
39 {
40 let (start: &str, remaining: &str) = total_remaining.split_at(mid:n);
41
42 f.write_str(data:start)?;
43
44 // unwrap is safe because we checked is_some for position n earlier
45 let next_byte: u8 = remaining.bytes().next().unwrap();
46 let replacement: &str = E::escape(next_byte).unwrap();
47 f.write_str(data:replacement)?;
48
49 total_remaining = &remaining[1..];
50 }
51
52 f.write_str(data:total_remaining)
53 }
54}
55
56fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> {
57 if E::str_needs_escaping(s) {
58 Cow::Owned(format!("{}", Escaped::<E>::new(s)))
59 } else {
60 Cow::Borrowed(s)
61 }
62}
63
64macro_rules! escapes {
65 {
66 $name: ident,
67 $($k: expr => $v: expr),* $(,)?
68 } => {
69 pub(crate) struct $name;
70
71 impl Escapes for $name {
72 fn escape(c: u8) -> Option<&'static str> {
73 match c {
74 $( $k => Some($v),)*
75 _ => None
76 }
77 }
78 }
79 };
80}
81
82escapes!(
83 AttributeEscapes,
84 b'<' => "&lt;",
85 b'>' => "&gt;",
86 b'"' => "&quot;",
87 b'\'' => "&apos;",
88 b'&' => "&amp;",
89 b'\n' => "&#xA;",
90 b'\r' => "&#xD;",
91);
92
93escapes!(
94 PcDataEscapes,
95 b'<' => "&lt;",
96 b'&' => "&amp;",
97);
98
99/// Performs escaping of common XML characters inside an attribute value.
100///
101/// This function replaces several important markup characters with their
102/// entity equivalents:
103///
104/// * `<` → `&lt;`
105/// * `>` → `&gt;`
106/// * `"` → `&quot;`
107/// * `'` → `&apos;`
108/// * `&` → `&amp;`
109///
110/// The following characters are escaped so that attributes are printed on
111/// a single line:
112/// * `\n` → `&#xA;`
113/// * `\r` → `&#xD;`
114///
115/// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
116///
117/// Does not perform allocations if the given string does not contain escapable characters.
118#[inline]
119#[must_use]
120pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
121 escape_str::<AttributeEscapes>(s)
122}
123
124/// Performs escaping of common XML characters inside PCDATA.
125///
126/// This function replaces several important markup characters with their
127/// entity equivalents:
128///
129/// * `<` → `&lt;`
130/// * `&` → `&amp;`
131///
132/// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
133///
134/// Does not perform allocations if the given string does not contain escapable characters.
135#[inline]
136#[must_use]
137pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
138 escape_str::<PcDataEscapes>(s)
139}
140
141#[cfg(test)]
142mod tests {
143 use super::{escape_str_attribute, escape_str_pcdata};
144
145 #[test]
146 fn test_escape_str_attribute() {
147 assert_eq!(escape_str_attribute("<>'\"&\n\r"), "&lt;&gt;&apos;&quot;&amp;&#xA;&#xD;");
148 assert_eq!(escape_str_attribute("no_escapes"), "no_escapes");
149 }
150
151 #[test]
152 fn test_escape_str_pcdata() {
153 assert_eq!(escape_str_pcdata("<&"), "&lt;&amp;");
154 assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes");
155 }
156
157 #[test]
158 fn test_escape_multibyte_code_points() {
159 assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
160 assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
161 }
162}
163