1// Copyright 2021 the Resvg Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4#![allow(clippy::comparison_chain)]
5
6use roxmltree::Error;
7
8use super::{AId, Document, EId, NodeId, NodeKind, SvgNode};
9
10const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
11
12pub(crate) fn parse_svg_text_element<'input>(
13 parent: roxmltree::Node<'_, 'input>,
14 parent_id: NodeId,
15 style_sheet: &simplecss::StyleSheet,
16 doc: &mut Document<'input>,
17) -> Result<(), Error> {
18 debug_assert_eq!(parent.tag_name().name(), "text");
19
20 let space: XmlSpace = if doc.get(parent_id).has_attribute(AId::Space) {
21 get_xmlspace(doc, node_id:parent_id, XmlSpace::Default)
22 } else {
23 if let Some(node: SvgNode<'_, 'input>) = docAncestors<'_, '_>
24 .get(parent_id)
25 .ancestors()
26 .find(|n: &SvgNode<'_, 'input>| n.has_attribute(AId::Space))
27 {
28 get_xmlspace(doc, node.id, XmlSpace::Default)
29 } else {
30 XmlSpace::Default
31 }
32 };
33
34 parse_svg_text_element_impl(parent, parent_id, style_sheet, space, doc)?;
35
36 trim_text_nodes(text_elem_id:parent_id, xmlspace:space, doc);
37 Ok(())
38}
39
40fn parse_svg_text_element_impl<'input>(
41 parent: roxmltree::Node<'_, 'input>,
42 parent_id: NodeId,
43 style_sheet: &simplecss::StyleSheet,
44 space: XmlSpace,
45 doc: &mut Document<'input>,
46) -> Result<(), Error> {
47 for node in parent.children() {
48 if node.is_text() {
49 let text = trim_text(node.text().unwrap(), space);
50 doc.append(parent_id, NodeKind::Text(text));
51 continue;
52 }
53
54 let mut tag_name = match super::parse::parse_tag_name(node) {
55 Some(v) => v,
56 None => continue,
57 };
58
59 if tag_name == EId::A {
60 // Treat links as simple text.
61 tag_name = EId::Tspan;
62 }
63
64 if !matches!(tag_name, EId::Tspan | EId::Tref | EId::TextPath) {
65 continue;
66 }
67
68 // `textPath` must be a direct `text` child.
69 if tag_name == EId::TextPath && parent.tag_name().name() != "text" {
70 continue;
71 }
72
73 // We are converting `tref` into `tspan` to simplify later use.
74 let mut is_tref = false;
75 if tag_name == EId::Tref {
76 tag_name = EId::Tspan;
77 is_tref = true;
78 }
79
80 let node_id =
81 super::parse::parse_svg_element(node, parent_id, tag_name, style_sheet, false, doc)?;
82 let space = get_xmlspace(doc, node_id, space);
83
84 if is_tref {
85 let link_value = node
86 .attribute((XLINK_NS, "href"))
87 .or_else(|| node.attribute("href"));
88
89 if let Some(href) = link_value {
90 if let Some(text) = resolve_tref_text(node.document(), href) {
91 let text = trim_text(&text, space);
92 doc.append(node_id, NodeKind::Text(text));
93 }
94 }
95 } else {
96 parse_svg_text_element_impl(node, node_id, style_sheet, space, doc)?;
97 }
98 }
99
100 Ok(())
101}
102
103fn resolve_tref_text(xml: &roxmltree::Document, href: &str) -> Option<String> {
104 let id = svgtypes::IRI::from_str(href).ok()?.0;
105
106 // Find linked element in the original tree.
107 let node = xml.descendants().find(|n| n.attribute("id") == Some(id))?;
108
109 // `tref` should be linked to an SVG element.
110 super::parse::parse_tag_name(node)?;
111
112 // 'All character data within the referenced element, including character data enclosed
113 // within additional markup, will be rendered.'
114 //
115 // So we don't care about attributes and everything. Just collecting text nodes data.
116 //
117 // Note: we have to filter nodes by `is_text()` first since `text()` will look up
118 // for text nodes in element children therefore we will get duplicates.
119 let text: String = node
120 .descendants()
121 .filter(|n| n.is_text())
122 .filter_map(|n| n.text())
123 .collect();
124 if text.is_empty() {
125 None
126 } else {
127 Some(text)
128 }
129}
130
131#[derive(Clone, Copy, PartialEq, Debug)]
132enum XmlSpace {
133 Default,
134 Preserve,
135}
136
137fn get_xmlspace(doc: &Document, node_id: NodeId, default: XmlSpace) -> XmlSpace {
138 match doc.get(node_id).attribute(AId::Space) {
139 Some("preserve") => XmlSpace::Preserve,
140 Some(_) => XmlSpace::Default,
141 _ => default,
142 }
143}
144
145trait StrTrim {
146 fn remove_first_space(&mut self);
147 fn remove_last_space(&mut self);
148}
149
150impl StrTrim for String {
151 fn remove_first_space(&mut self) {
152 debug_assert_eq!(self.chars().next().unwrap(), ' ');
153 self.drain(range:0..1);
154 }
155
156 fn remove_last_space(&mut self) {
157 debug_assert_eq!(self.chars().next_back().unwrap(), ' ');
158 self.pop();
159 }
160}
161
162/// Prepares text nodes according to the spec: https://www.w3.org/TR/SVG11/text.html#WhiteSpace
163///
164/// This function handles:
165/// - 'xml:space' processing
166/// - tabs and newlines removing/replacing
167/// - spaces trimming
168fn trim_text_nodes(text_elem_id: NodeId, xmlspace: XmlSpace, doc: &mut Document) {
169 let mut nodes = Vec::new(); // TODO: allocate only once
170 collect_text_nodes(doc.get(text_elem_id), 0, &mut nodes);
171
172 // `trim` method has already collapsed all spaces into a single one,
173 // so we have to check only for one leading or trailing space.
174
175 if nodes.len() == 1 {
176 // Process element with a single text node child.
177
178 let node_id = nodes[0].0;
179
180 if xmlspace == XmlSpace::Default {
181 if let NodeKind::Text(ref mut text) = doc.nodes[node_id.get_usize()].kind {
182 match text.len() {
183 0 => {} // An empty string. Do nothing.
184 1 => {
185 // If string has only one character and it's a space - clear this string.
186 if text.as_bytes()[0] == b' ' {
187 text.clear();
188 }
189 }
190 _ => {
191 // 'text' has at least 2 bytes, so indexing is safe.
192 let c1 = text.as_bytes()[0];
193 let c2 = text.as_bytes()[text.len() - 1];
194
195 if c1 == b' ' {
196 text.remove_first_space();
197 }
198
199 if c2 == b' ' {
200 text.remove_last_space();
201 }
202 }
203 }
204 }
205 } else {
206 // Do nothing when xml:space=preserve.
207 }
208 } else if nodes.len() > 1 {
209 // Process element with many text node children.
210
211 // We manage all text nodes as a single text node
212 // and trying to remove duplicated spaces across nodes.
213 //
214 // For example '<text>Text <tspan> text </tspan> text</text>'
215 // is the same is '<text>Text <tspan>text</tspan> text</text>'
216
217 let mut i = 0;
218 let len = nodes.len() - 1;
219 let mut last_non_empty: Option<NodeId> = None;
220 while i < len {
221 // Process pairs.
222 let (mut node1_id, depth1) = nodes[i];
223 let (node2_id, depth2) = nodes[i + 1];
224
225 if doc.get(node1_id).text().is_empty() {
226 if let Some(n) = last_non_empty {
227 node1_id = n;
228 }
229 }
230
231 // Parent of the text node is always an element node and always exist,
232 // so unwrap is safe.
233 let xmlspace1 = get_xmlspace(doc, doc.get(node1_id).parent().unwrap().id, xmlspace);
234 let xmlspace2 = get_xmlspace(doc, doc.get(node2_id).parent().unwrap().id, xmlspace);
235
236 // >text<..>text<
237 // 1 2 3 4
238 let (c1, c2, c3, c4) = {
239 let text1 = doc.get(node1_id).text();
240 let text2 = doc.get(node2_id).text();
241
242 let bytes1 = text1.as_bytes();
243 let bytes2 = text2.as_bytes();
244
245 let c1 = bytes1.first().cloned();
246 let c2 = bytes1.last().cloned();
247 let c3 = bytes2.first().cloned();
248 let c4 = bytes2.last().cloned();
249
250 (c1, c2, c3, c4)
251 };
252
253 // NOTE: xml:space processing is mostly an undefined behavior,
254 // because everyone do it differently.
255 // We're mimicking the Chrome behavior.
256
257 // Remove space from the second text node if both nodes has bound spaces.
258 // From: '<text>Text <tspan> text</tspan></text>'
259 // To: '<text>Text <tspan>text</tspan></text>'
260 //
261 // See text-tspan-02-b.svg for details.
262 if depth1 < depth2 {
263 if c3 == Some(b' ') {
264 if xmlspace2 == XmlSpace::Default {
265 if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
266 text.remove_first_space();
267 }
268 }
269 }
270 } else {
271 if c2 == Some(b' ') && c2 == c3 {
272 if xmlspace1 == XmlSpace::Default && xmlspace2 == XmlSpace::Default {
273 if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
274 text.remove_last_space();
275 }
276 } else {
277 if xmlspace1 == XmlSpace::Preserve && xmlspace2 == XmlSpace::Default {
278 if let NodeKind::Text(ref mut text) =
279 doc.nodes[node2_id.get_usize()].kind
280 {
281 text.remove_first_space();
282 }
283 }
284 }
285 }
286 }
287
288 let is_first = i == 0;
289 let is_last = i == len - 1;
290
291 if is_first
292 && c1 == Some(b' ')
293 && xmlspace1 == XmlSpace::Default
294 && !doc.get(node1_id).text().is_empty()
295 {
296 // Remove a leading space from a first text node.
297 if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
298 text.remove_first_space();
299 }
300 } else if is_last
301 && c4 == Some(b' ')
302 && !doc.get(node2_id).text().is_empty()
303 && xmlspace2 == XmlSpace::Default
304 {
305 // Remove a trailing space from a last text node.
306 // Also check that 'text2' is not empty already.
307 if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
308 text.remove_last_space();
309 }
310 }
311
312 if is_last
313 && c2 == Some(b' ')
314 && !doc.get(node1_id).text().is_empty()
315 && doc.get(node2_id).text().is_empty()
316 && doc.get(node1_id).text().ends_with(' ')
317 {
318 if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
319 text.remove_last_space();
320 }
321 }
322
323 if !doc.get(node1_id).text().trim().is_empty() {
324 last_non_empty = Some(node1_id);
325 }
326
327 i += 1;
328 }
329 }
330
331 // TODO: find a way to remove all empty text nodes
332}
333
334fn collect_text_nodes(parent: SvgNode, depth: usize, nodes: &mut Vec<(NodeId, usize)>) {
335 for child: SvgNode<'_, '_> in parent.children() {
336 if child.is_text() {
337 nodes.push((child.id, depth));
338 } else if child.is_element() {
339 collect_text_nodes(parent:child, depth:depth + 1, nodes);
340 }
341 }
342}
343
344fn trim_text(text: &str, space: XmlSpace) -> String {
345 let mut s: String = String::with_capacity(text.len());
346
347 let mut prev: char = '0';
348 for c: char in text.chars() {
349 // \r, \n and \t should be converted into spaces.
350 let c: char = match c {
351 '\r' | '\n' | '\t' => ' ',
352 _ => c,
353 };
354
355 // Skip continuous spaces.
356 if space == XmlSpace::Default && c == ' ' && c == prev {
357 continue;
358 }
359
360 prev = c;
361
362 s.push(ch:c);
363 }
364
365 s
366}
367