1//! Contains functions for performing XML special characters escaping.
2
3use std::borrow::Cow;
4use std::fmt::{Display, Formatter, Result};
5use std::marker::PhantomData;
6
7pub(crate) trait Escapes {
8 fn escape(c: u8) -> Option<&'static str>;
9
10 fn byte_needs_escaping(c: u8) -> bool {
11 Self::escape(c).is_some()
12 }
13
14 fn str_needs_escaping(s: &str) -> bool {
15 s.bytes().any(|c: u8| Self::escape(c).is_some())
16 }
17}
18
19pub(crate) struct Escaped<'a, E: Escapes> {
20 _escape_phantom: PhantomData<E>,
21 to_escape: &'a str,
22}
23
24impl<'a, E: Escapes> Escaped<'a, E> {
25 pub const fn new(s: &'a str) -> Self {
26 Escaped {
27 _escape_phantom: PhantomData,
28 to_escape: s,
29 }
30 }
31}
32
33impl<E: Escapes> Display for Escaped<'_, E> {
34 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
35 let mut total_remaining: &str = self.to_escape;
36
37 // find the next occurence
38 while let Some(n: usize) = total_remaining.bytes().position(E::byte_needs_escaping) {
39 let (start: &str, remaining: &str) = total_remaining.split_at(mid:n);
40
41 f.write_str(data:start)?;
42
43 // unwrap is safe because we checked is_some for position n earlier
44 let next_byte: u8 = remaining.bytes().next().unwrap();
45 let replacement: &'static str = E::escape(next_byte).unwrap_or(default:"unexpected token");
46 f.write_str(data:replacement)?;
47
48 total_remaining = &remaining[1..];
49 }
50
51 f.write_str(data:total_remaining)
52 }
53}
54
55fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> {
56 if E::str_needs_escaping(s) {
57 Cow::Owned(Escaped::<E>::new(s).to_string())
58 } else {
59 Cow::Borrowed(s)
60 }
61}
62
63macro_rules! escapes {
64 {
65 $name: ident,
66 $($k: expr => $v: expr),* $(,)?
67 } => {
68 pub(crate) struct $name;
69
70 impl Escapes for $name {
71 fn escape(c: u8) -> Option<&'static str> {
72 match c {
73 $( $k => Some($v),)*
74 _ => None
75 }
76 }
77 }
78 };
79}
80
81escapes!(
82 AttributeEscapes,
83 b'<' => "&lt;",
84 b'>' => "&gt;",
85 b'"' => "&quot;",
86 b'\'' => "&apos;",
87 b'&' => "&amp;",
88 b'\n' => "&#xA;",
89 b'\r' => "&#xD;",
90);
91
92escapes!(
93 PcDataEscapes,
94 b'<' => "&lt;",
95 b'>' => "&gt;",
96 b'&' => "&amp;",
97);
98
99/// Performs escaping of common XML characters inside an attribute value.
100///
101/// This function replaces several important markup characters with their
102/// entity equivalents:
103///
104/// * `<` → `&lt;`
105/// * `>` → `&gt;`
106/// * `"` → `&quot;`
107/// * `'` → `&apos;`
108/// * `&` → `&amp;`
109///
110/// The following characters are escaped so that attributes are printed on
111/// a single line:
112/// * `\n` → `&#xA;`
113/// * `\r` → `&#xD;`
114///
115/// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
116///
117/// Does not perform allocations if the given string does not contain escapable characters.
118#[inline]
119#[must_use]
120pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
121 escape_str::<AttributeEscapes>(s)
122}
123
124/// Performs escaping of common XML characters inside PCDATA.
125///
126/// This function replaces several important markup characters with their
127/// entity equivalents:
128///
129/// * `<` → `&lt;`
130/// * `&` → `&amp;`
131///
132/// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
133///
134/// Does not perform allocations if the given string does not contain escapable characters.
135#[inline]
136#[must_use]
137pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
138 escape_str::<PcDataEscapes>(s)
139}
140
141#[cfg(test)]
142mod tests {
143 use super::{escape_str_attribute, escape_str_pcdata};
144
145 #[test]
146 fn test_escape_str_attribute() {
147 assert_eq!(escape_str_attribute("<>'\"&\n\r"), "&lt;&gt;&apos;&quot;&amp;&#xA;&#xD;");
148 assert_eq!(escape_str_attribute("no_escapes"), "no_escapes");
149 }
150
151 #[test]
152 fn test_escape_str_pcdata() {
153 assert_eq!(escape_str_pcdata("<>&"), "&lt;&gt;&amp;");
154 assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes");
155 }
156
157 #[test]
158 fn test_escape_multibyte_code_points() {
159 assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
160 assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
161 }
162}
163