1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5#![allow(clippy::comparison_chain)]
6
7use roxmltree::Error;
8
9use super::{AId, Document, EId, NodeId, NodeKind, SvgNode};
10
11const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
12
13pub(crate) fn parse_svg_text_element<'input>(
14 parent: roxmltree::Node<'_, 'input>,
15 parent_id: NodeId,
16 style_sheet: &simplecss::StyleSheet,
17 doc: &mut Document<'input>,
18) -> Result<(), Error> {
19 debug_assert_eq!(parent.tag_name().name(), "text");
20
21 let space: XmlSpace = if doc.get(parent_id).has_attribute(AId::Space) {
22 get_xmlspace(doc, node_id:parent_id, XmlSpace::Default)
23 } else {
24 if let Some(node: SvgNode<'_, '_>) = docAncestors<'_, '_>
25 .get(parent_id)
26 .ancestors()
27 .find(|n: &SvgNode<'_, '_>| n.has_attribute(AId::Space))
28 {
29 get_xmlspace(doc, node_id:node.id, XmlSpace::Default)
30 } else {
31 XmlSpace::Default
32 }
33 };
34
35 parse_svg_text_element_impl(parent, parent_id, style_sheet, space, doc)?;
36
37 trim_text_nodes(text_elem_id:parent_id, xmlspace:space, doc);
38 Ok(())
39}
40
41fn parse_svg_text_element_impl<'input>(
42 parent: roxmltree::Node<'_, 'input>,
43 parent_id: NodeId,
44 style_sheet: &simplecss::StyleSheet,
45 space: XmlSpace,
46 doc: &mut Document<'input>,
47) -> Result<(), Error> {
48 for node in parent.children() {
49 if node.is_text() {
50 let text = trim_text(node.text().unwrap(), space);
51 doc.append(parent_id, NodeKind::Text(text));
52 continue;
53 }
54
55 let mut tag_name = match super::parse::parse_tag_name(node) {
56 Some(v) => v,
57 None => continue,
58 };
59
60 if tag_name == EId::A {
61 // Treat links as simple text.
62 tag_name = EId::Tspan;
63 }
64
65 if !matches!(tag_name, EId::Tspan | EId::Tref | EId::TextPath) {
66 continue;
67 }
68
69 // `textPath` must be a direct `text` child.
70 if tag_name == EId::TextPath && parent.tag_name().name() != "text" {
71 continue;
72 }
73
74 // We are converting `tref` into `tspan` to simplify later use.
75 let mut is_tref = false;
76 if tag_name == EId::Tref {
77 tag_name = EId::Tspan;
78 is_tref = true;
79 }
80
81 let node_id =
82 super::parse::parse_svg_element(node, parent_id, tag_name, style_sheet, false, doc)?;
83 let space = get_xmlspace(doc, node_id, space);
84
85 if is_tref {
86 let link_value = node
87 .attribute((XLINK_NS, "href"))
88 .or_else(|| node.attribute("href"));
89
90 if let Some(href) = link_value {
91 if let Some(text) = resolve_tref_text(node.document(), href) {
92 let text = trim_text(&text, space);
93 doc.append(node_id, NodeKind::Text(text));
94 }
95 }
96 } else {
97 parse_svg_text_element_impl(node, node_id, style_sheet, space, doc)?;
98 }
99 }
100
101 Ok(())
102}
103
104fn resolve_tref_text(xml: &roxmltree::Document, href: &str) -> Option<String> {
105 let id = svgtypes::IRI::from_str(href).ok()?.0;
106
107 // Find linked element in the original tree.
108 let node = xml.descendants().find(|n| n.attribute("id") == Some(id))?;
109
110 // `tref` should be linked to an SVG element.
111 super::parse::parse_tag_name(node)?;
112
113 // 'All character data within the referenced element, including character data enclosed
114 // within additional markup, will be rendered.'
115 //
116 // So we don't care about attributes and everything. Just collecting text nodes data.
117 //
118 // Note: we have to filter nodes by `is_text()` first since `text()` will look up
119 // for text nodes in element children therefore we will get duplicates.
120 let text: String = node
121 .descendants()
122 .filter(|n| n.is_text())
123 .filter_map(|n| n.text())
124 .collect();
125 if text.is_empty() {
126 None
127 } else {
128 Some(text)
129 }
130}
131
132#[derive(Clone, Copy, PartialEq, Debug)]
133enum XmlSpace {
134 Default,
135 Preserve,
136}
137
138fn get_xmlspace(doc: &Document, node_id: NodeId, default: XmlSpace) -> XmlSpace {
139 match doc.get(node_id).attribute(AId::Space) {
140 Some("preserve") => XmlSpace::Preserve,
141 Some(_) => XmlSpace::Default,
142 _ => default,
143 }
144}
145
146trait StrTrim {
147 fn remove_first_space(&mut self);
148 fn remove_last_space(&mut self);
149}
150
151impl StrTrim for String {
152 fn remove_first_space(&mut self) {
153 debug_assert_eq!(self.chars().next().unwrap(), ' ');
154 self.drain(range:0..1);
155 }
156
157 fn remove_last_space(&mut self) {
158 debug_assert_eq!(self.chars().next_back().unwrap(), ' ');
159 self.pop();
160 }
161}
162
163/// Prepares text nodes according to the spec: https://www.w3.org/TR/SVG11/text.html#WhiteSpace
164///
165/// This function handles:
166/// - 'xml:space' processing
167/// - tabs and newlines removing/replacing
168/// - spaces trimming
169fn trim_text_nodes(text_elem_id: NodeId, xmlspace: XmlSpace, doc: &mut Document) {
170 let mut nodes = Vec::new(); // TODO: allocate only once
171 collect_text_nodes(doc.get(text_elem_id), 0, &mut nodes);
172
173 // `trim` method has already collapsed all spaces into a single one,
174 // so we have to check only for one leading or trailing space.
175
176 if nodes.len() == 1 {
177 // Process element with a single text node child.
178
179 let node_id = nodes[0].0;
180
181 if xmlspace == XmlSpace::Default {
182 if let NodeKind::Text(ref mut text) = doc.nodes[node_id.get_usize()].kind {
183 match text.len() {
184 0 => {} // An empty string. Do nothing.
185 1 => {
186 // If string has only one character and it's a space - clear this string.
187 if text.as_bytes()[0] == b' ' {
188 text.clear();
189 }
190 }
191 _ => {
192 // 'text' has at least 2 bytes, so indexing is safe.
193 let c1 = text.as_bytes()[0];
194 let c2 = text.as_bytes()[text.len() - 1];
195
196 if c1 == b' ' {
197 text.remove_first_space();
198 }
199
200 if c2 == b' ' {
201 text.remove_last_space();
202 }
203 }
204 }
205 }
206 } else {
207 // Do nothing when xml:space=preserve.
208 }
209 } else if nodes.len() > 1 {
210 // Process element with many text node children.
211
212 // We manage all text nodes as a single text node
213 // and trying to remove duplicated spaces across nodes.
214 //
215 // For example '<text>Text <tspan> text </tspan> text</text>'
216 // is the same is '<text>Text <tspan>text</tspan> text</text>'
217
218 let mut i = 0;
219 let len = nodes.len() - 1;
220 let mut last_non_empty: Option<NodeId> = None;
221 while i < len {
222 // Process pairs.
223 let (mut node1_id, depth1) = nodes[i];
224 let (node2_id, depth2) = nodes[i + 1];
225
226 if doc.get(node1_id).text().is_empty() {
227 if let Some(n) = last_non_empty {
228 node1_id = n;
229 }
230 }
231
232 // Parent of the text node is always an element node and always exist,
233 // so unwrap is safe.
234 let xmlspace1 = get_xmlspace(doc, doc.get(node1_id).parent().unwrap().id, xmlspace);
235 let xmlspace2 = get_xmlspace(doc, doc.get(node2_id).parent().unwrap().id, xmlspace);
236
237 // >text<..>text<
238 // 1 2 3 4
239 let (c1, c2, c3, c4) = {
240 let text1 = doc.get(node1_id).text();
241 let text2 = doc.get(node2_id).text();
242
243 let bytes1 = text1.as_bytes();
244 let bytes2 = text2.as_bytes();
245
246 let c1 = bytes1.first().cloned();
247 let c2 = bytes1.last().cloned();
248 let c3 = bytes2.first().cloned();
249 let c4 = bytes2.last().cloned();
250
251 (c1, c2, c3, c4)
252 };
253
254 // NOTE: xml:space processing is mostly an undefined behavior,
255 // because everyone do it differently.
256 // We're mimicking the Chrome behavior.
257
258 // Remove space from the second text node if both nodes has bound spaces.
259 // From: '<text>Text <tspan> text</tspan></text>'
260 // To: '<text>Text <tspan>text</tspan></text>'
261 //
262 // See text-tspan-02-b.svg for details.
263 if depth1 < depth2 {
264 if c3 == Some(b' ') {
265 if xmlspace2 == XmlSpace::Default {
266 if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
267 text.remove_first_space();
268 }
269 }
270 }
271 } else {
272 if c2 == Some(b' ') && c2 == c3 {
273 if xmlspace1 == XmlSpace::Default && xmlspace2 == XmlSpace::Default {
274 if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
275 text.remove_last_space();
276 }
277 } else {
278 if xmlspace1 == XmlSpace::Preserve && xmlspace2 == XmlSpace::Default {
279 if let NodeKind::Text(ref mut text) =
280 doc.nodes[node2_id.get_usize()].kind
281 {
282 text.remove_first_space();
283 }
284 }
285 }
286 }
287 }
288
289 let is_first = i == 0;
290 let is_last = i == len - 1;
291
292 if is_first
293 && c1 == Some(b' ')
294 && xmlspace1 == XmlSpace::Default
295 && !doc.get(node1_id).text().is_empty()
296 {
297 // Remove a leading space from a first text node.
298 if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
299 text.remove_first_space();
300 }
301 } else if is_last
302 && c4 == Some(b' ')
303 && !doc.get(node2_id).text().is_empty()
304 && xmlspace2 == XmlSpace::Default
305 {
306 // Remove a trailing space from a last text node.
307 // Also check that 'text2' is not empty already.
308 if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
309 text.remove_last_space();
310 }
311 }
312
313 if is_last
314 && c2 == Some(b' ')
315 && !doc.get(node1_id).text().is_empty()
316 && doc.get(node2_id).text().is_empty()
317 && doc.get(node1_id).text().ends_with(' ')
318 {
319 if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
320 text.remove_last_space();
321 }
322 }
323
324 if !doc.get(node1_id).text().trim().is_empty() {
325 last_non_empty = Some(node1_id);
326 }
327
328 i += 1;
329 }
330 }
331
332 // TODO: find a way to remove all empty text nodes
333}
334
335fn collect_text_nodes(parent: SvgNode, depth: usize, nodes: &mut Vec<(NodeId, usize)>) {
336 for child: SvgNode<'_, '_> in parent.children() {
337 if child.is_text() {
338 nodes.push((child.id, depth));
339 } else if child.is_element() {
340 collect_text_nodes(parent:child, depth:depth + 1, nodes);
341 }
342 }
343}
344
345fn trim_text(text: &str, space: XmlSpace) -> String {
346 let mut s: String = String::with_capacity(text.len());
347
348 let mut prev: char = '0';
349 for c: char in text.chars() {
350 // \r, \n and \t should be converted into spaces.
351 let c: char = match c {
352 '\r' | '\n' | '\t' => ' ',
353 _ => c,
354 };
355
356 // Skip continuous spaces.
357 if space == XmlSpace::Default && c == ' ' && c == prev {
358 continue;
359 }
360
361 prev = c;
362
363 s.push(ch:c);
364 }
365
366 s
367}
368