1 | // This Source Code Form is subject to the terms of the Mozilla Public |
2 | // License, v. 2.0. If a copy of the MPL was not distributed with this |
3 | // file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4 | |
5 | #![allow (clippy::comparison_chain)] |
6 | |
7 | use roxmltree::Error; |
8 | |
9 | use super::{AId, Document, EId, NodeId, NodeKind, SvgNode}; |
10 | |
11 | const XLINK_NS: &str = "http://www.w3.org/1999/xlink" ; |
12 | |
13 | pub(crate) fn parse_svg_text_element<'input>( |
14 | parent: roxmltree::Node<'_, 'input>, |
15 | parent_id: NodeId, |
16 | style_sheet: &simplecss::StyleSheet, |
17 | doc: &mut Document<'input>, |
18 | ) -> Result<(), Error> { |
19 | debug_assert_eq!(parent.tag_name().name(), "text" ); |
20 | |
21 | let space: XmlSpace = if doc.get(parent_id).has_attribute(AId::Space) { |
22 | get_xmlspace(doc, node_id:parent_id, XmlSpace::Default) |
23 | } else { |
24 | if let Some(node: SvgNode<'_, '_>) = docAncestors<'_, '_> |
25 | .get(parent_id) |
26 | .ancestors() |
27 | .find(|n: &SvgNode<'_, '_>| n.has_attribute(AId::Space)) |
28 | { |
29 | get_xmlspace(doc, node_id:node.id, XmlSpace::Default) |
30 | } else { |
31 | XmlSpace::Default |
32 | } |
33 | }; |
34 | |
35 | parse_svg_text_element_impl(parent, parent_id, style_sheet, space, doc)?; |
36 | |
37 | trim_text_nodes(text_elem_id:parent_id, xmlspace:space, doc); |
38 | Ok(()) |
39 | } |
40 | |
41 | fn parse_svg_text_element_impl<'input>( |
42 | parent: roxmltree::Node<'_, 'input>, |
43 | parent_id: NodeId, |
44 | style_sheet: &simplecss::StyleSheet, |
45 | space: XmlSpace, |
46 | doc: &mut Document<'input>, |
47 | ) -> Result<(), Error> { |
48 | for node in parent.children() { |
49 | if node.is_text() { |
50 | let text = trim_text(node.text().unwrap(), space); |
51 | doc.append(parent_id, NodeKind::Text(text)); |
52 | continue; |
53 | } |
54 | |
55 | let mut tag_name = match super::parse::parse_tag_name(node) { |
56 | Some(v) => v, |
57 | None => continue, |
58 | }; |
59 | |
60 | if tag_name == EId::A { |
61 | // Treat links as simple text. |
62 | tag_name = EId::Tspan; |
63 | } |
64 | |
65 | if !matches!(tag_name, EId::Tspan | EId::Tref | EId::TextPath) { |
66 | continue; |
67 | } |
68 | |
69 | // `textPath` must be a direct `text` child. |
70 | if tag_name == EId::TextPath && parent.tag_name().name() != "text" { |
71 | continue; |
72 | } |
73 | |
74 | // We are converting `tref` into `tspan` to simplify later use. |
75 | let mut is_tref = false; |
76 | if tag_name == EId::Tref { |
77 | tag_name = EId::Tspan; |
78 | is_tref = true; |
79 | } |
80 | |
81 | let node_id = |
82 | super::parse::parse_svg_element(node, parent_id, tag_name, style_sheet, false, doc)?; |
83 | let space = get_xmlspace(doc, node_id, space); |
84 | |
85 | if is_tref { |
86 | let link_value = node |
87 | .attribute((XLINK_NS, "href" )) |
88 | .or_else(|| node.attribute("href" )); |
89 | |
90 | if let Some(href) = link_value { |
91 | if let Some(text) = resolve_tref_text(node.document(), href) { |
92 | let text = trim_text(&text, space); |
93 | doc.append(node_id, NodeKind::Text(text)); |
94 | } |
95 | } |
96 | } else { |
97 | parse_svg_text_element_impl(node, node_id, style_sheet, space, doc)?; |
98 | } |
99 | } |
100 | |
101 | Ok(()) |
102 | } |
103 | |
104 | fn resolve_tref_text(xml: &roxmltree::Document, href: &str) -> Option<String> { |
105 | let id = svgtypes::IRI::from_str(href).ok()?.0; |
106 | |
107 | // Find linked element in the original tree. |
108 | let node = xml.descendants().find(|n| n.attribute("id" ) == Some(id))?; |
109 | |
110 | // `tref` should be linked to an SVG element. |
111 | super::parse::parse_tag_name(node)?; |
112 | |
113 | // 'All character data within the referenced element, including character data enclosed |
114 | // within additional markup, will be rendered.' |
115 | // |
116 | // So we don't care about attributes and everything. Just collecting text nodes data. |
117 | // |
118 | // Note: we have to filter nodes by `is_text()` first since `text()` will look up |
119 | // for text nodes in element children therefore we will get duplicates. |
120 | let text: String = node |
121 | .descendants() |
122 | .filter(|n| n.is_text()) |
123 | .filter_map(|n| n.text()) |
124 | .collect(); |
125 | if text.is_empty() { |
126 | None |
127 | } else { |
128 | Some(text) |
129 | } |
130 | } |
131 | |
132 | #[derive (Clone, Copy, PartialEq, Debug)] |
133 | enum XmlSpace { |
134 | Default, |
135 | Preserve, |
136 | } |
137 | |
138 | fn get_xmlspace(doc: &Document, node_id: NodeId, default: XmlSpace) -> XmlSpace { |
139 | match doc.get(node_id).attribute(AId::Space) { |
140 | Some("preserve" ) => XmlSpace::Preserve, |
141 | Some(_) => XmlSpace::Default, |
142 | _ => default, |
143 | } |
144 | } |
145 | |
146 | trait StrTrim { |
147 | fn remove_first_space(&mut self); |
148 | fn remove_last_space(&mut self); |
149 | } |
150 | |
151 | impl StrTrim for String { |
152 | fn remove_first_space(&mut self) { |
153 | debug_assert_eq!(self.chars().next().unwrap(), ' ' ); |
154 | self.drain(range:0..1); |
155 | } |
156 | |
157 | fn remove_last_space(&mut self) { |
158 | debug_assert_eq!(self.chars().next_back().unwrap(), ' ' ); |
159 | self.pop(); |
160 | } |
161 | } |
162 | |
163 | /// Prepares text nodes according to the spec: https://www.w3.org/TR/SVG11/text.html#WhiteSpace |
164 | /// |
165 | /// This function handles: |
166 | /// - 'xml:space' processing |
167 | /// - tabs and newlines removing/replacing |
168 | /// - spaces trimming |
169 | fn trim_text_nodes(text_elem_id: NodeId, xmlspace: XmlSpace, doc: &mut Document) { |
170 | let mut nodes = Vec::new(); // TODO: allocate only once |
171 | collect_text_nodes(doc.get(text_elem_id), 0, &mut nodes); |
172 | |
173 | // `trim` method has already collapsed all spaces into a single one, |
174 | // so we have to check only for one leading or trailing space. |
175 | |
176 | if nodes.len() == 1 { |
177 | // Process element with a single text node child. |
178 | |
179 | let node_id = nodes[0].0; |
180 | |
181 | if xmlspace == XmlSpace::Default { |
182 | if let NodeKind::Text(ref mut text) = doc.nodes[node_id.get_usize()].kind { |
183 | match text.len() { |
184 | 0 => {} // An empty string. Do nothing. |
185 | 1 => { |
186 | // If string has only one character and it's a space - clear this string. |
187 | if text.as_bytes()[0] == b' ' { |
188 | text.clear(); |
189 | } |
190 | } |
191 | _ => { |
192 | // 'text' has at least 2 bytes, so indexing is safe. |
193 | let c1 = text.as_bytes()[0]; |
194 | let c2 = text.as_bytes()[text.len() - 1]; |
195 | |
196 | if c1 == b' ' { |
197 | text.remove_first_space(); |
198 | } |
199 | |
200 | if c2 == b' ' { |
201 | text.remove_last_space(); |
202 | } |
203 | } |
204 | } |
205 | } |
206 | } else { |
207 | // Do nothing when xml:space=preserve. |
208 | } |
209 | } else if nodes.len() > 1 { |
210 | // Process element with many text node children. |
211 | |
212 | // We manage all text nodes as a single text node |
213 | // and trying to remove duplicated spaces across nodes. |
214 | // |
215 | // For example '<text>Text <tspan> text </tspan> text</text>' |
216 | // is the same is '<text>Text <tspan>text</tspan> text</text>' |
217 | |
218 | let mut i = 0; |
219 | let len = nodes.len() - 1; |
220 | let mut last_non_empty: Option<NodeId> = None; |
221 | while i < len { |
222 | // Process pairs. |
223 | let (mut node1_id, depth1) = nodes[i]; |
224 | let (node2_id, depth2) = nodes[i + 1]; |
225 | |
226 | if doc.get(node1_id).text().is_empty() { |
227 | if let Some(n) = last_non_empty { |
228 | node1_id = n; |
229 | } |
230 | } |
231 | |
232 | // Parent of the text node is always an element node and always exist, |
233 | // so unwrap is safe. |
234 | let xmlspace1 = get_xmlspace(doc, doc.get(node1_id).parent().unwrap().id, xmlspace); |
235 | let xmlspace2 = get_xmlspace(doc, doc.get(node2_id).parent().unwrap().id, xmlspace); |
236 | |
237 | // >text<..>text< |
238 | // 1 2 3 4 |
239 | let (c1, c2, c3, c4) = { |
240 | let text1 = doc.get(node1_id).text(); |
241 | let text2 = doc.get(node2_id).text(); |
242 | |
243 | let bytes1 = text1.as_bytes(); |
244 | let bytes2 = text2.as_bytes(); |
245 | |
246 | let c1 = bytes1.first().cloned(); |
247 | let c2 = bytes1.last().cloned(); |
248 | let c3 = bytes2.first().cloned(); |
249 | let c4 = bytes2.last().cloned(); |
250 | |
251 | (c1, c2, c3, c4) |
252 | }; |
253 | |
254 | // NOTE: xml:space processing is mostly an undefined behavior, |
255 | // because everyone do it differently. |
256 | // We're mimicking the Chrome behavior. |
257 | |
258 | // Remove space from the second text node if both nodes has bound spaces. |
259 | // From: '<text>Text <tspan> text</tspan></text>' |
260 | // To: '<text>Text <tspan>text</tspan></text>' |
261 | // |
262 | // See text-tspan-02-b.svg for details. |
263 | if depth1 < depth2 { |
264 | if c3 == Some(b' ' ) { |
265 | if xmlspace2 == XmlSpace::Default { |
266 | if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind { |
267 | text.remove_first_space(); |
268 | } |
269 | } |
270 | } |
271 | } else { |
272 | if c2 == Some(b' ' ) && c2 == c3 { |
273 | if xmlspace1 == XmlSpace::Default && xmlspace2 == XmlSpace::Default { |
274 | if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind { |
275 | text.remove_last_space(); |
276 | } |
277 | } else { |
278 | if xmlspace1 == XmlSpace::Preserve && xmlspace2 == XmlSpace::Default { |
279 | if let NodeKind::Text(ref mut text) = |
280 | doc.nodes[node2_id.get_usize()].kind |
281 | { |
282 | text.remove_first_space(); |
283 | } |
284 | } |
285 | } |
286 | } |
287 | } |
288 | |
289 | let is_first = i == 0; |
290 | let is_last = i == len - 1; |
291 | |
292 | if is_first |
293 | && c1 == Some(b' ' ) |
294 | && xmlspace1 == XmlSpace::Default |
295 | && !doc.get(node1_id).text().is_empty() |
296 | { |
297 | // Remove a leading space from a first text node. |
298 | if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind { |
299 | text.remove_first_space(); |
300 | } |
301 | } else if is_last |
302 | && c4 == Some(b' ' ) |
303 | && !doc.get(node2_id).text().is_empty() |
304 | && xmlspace2 == XmlSpace::Default |
305 | { |
306 | // Remove a trailing space from a last text node. |
307 | // Also check that 'text2' is not empty already. |
308 | if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind { |
309 | text.remove_last_space(); |
310 | } |
311 | } |
312 | |
313 | if is_last |
314 | && c2 == Some(b' ' ) |
315 | && !doc.get(node1_id).text().is_empty() |
316 | && doc.get(node2_id).text().is_empty() |
317 | && doc.get(node1_id).text().ends_with(' ' ) |
318 | { |
319 | if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind { |
320 | text.remove_last_space(); |
321 | } |
322 | } |
323 | |
324 | if !doc.get(node1_id).text().trim().is_empty() { |
325 | last_non_empty = Some(node1_id); |
326 | } |
327 | |
328 | i += 1; |
329 | } |
330 | } |
331 | |
332 | // TODO: find a way to remove all empty text nodes |
333 | } |
334 | |
335 | fn collect_text_nodes(parent: SvgNode, depth: usize, nodes: &mut Vec<(NodeId, usize)>) { |
336 | for child: SvgNode<'_, '_> in parent.children() { |
337 | if child.is_text() { |
338 | nodes.push((child.id, depth)); |
339 | } else if child.is_element() { |
340 | collect_text_nodes(parent:child, depth:depth + 1, nodes); |
341 | } |
342 | } |
343 | } |
344 | |
345 | fn trim_text(text: &str, space: XmlSpace) -> String { |
346 | let mut s: String = String::with_capacity(text.len()); |
347 | |
348 | let mut prev: char = '0' ; |
349 | for c: char in text.chars() { |
350 | // \r, \n and \t should be converted into spaces. |
351 | let c: char = match c { |
352 | ' \r' | ' \n' | ' \t' => ' ' , |
353 | _ => c, |
354 | }; |
355 | |
356 | // Skip continuous spaces. |
357 | if space == XmlSpace::Default && c == ' ' && c == prev { |
358 | continue; |
359 | } |
360 | |
361 | prev = c; |
362 | |
363 | s.push(ch:c); |
364 | } |
365 | |
366 | s |
367 | } |
368 | |