text.rs source code [crates/usvg/src/parser/svgtree/text.rs]

1	// Copyright 2021 the Resvg Authors
2	// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4	#![allow(clippy::comparison_chain)]
5
6	use roxmltree::Error;
7
8	use super::{AId, Document, EId, NodeId, NodeKind, SvgNode};
9
10	const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
11
12	pub(crate) fn parse_svg_text_element<'input>(
13	parent: roxmltree::Node<'_, 'input>,
14	parent_id: NodeId,
15	style_sheet: &simplecss::StyleSheet,
16	doc: &mut Document<'input>,
17	) -> Result<(), Error> {
18	debug_assert_eq!(parent.tag_name().name(), "text");
19
20	let space: XmlSpace = if doc.get(parent_id).has_attribute(AId::Space) {
21	get_xmlspace(doc, node_id:parent_id, XmlSpace::Default)
22	} else {
23	if let Some(node: SvgNode<'_, 'input>) = docAncestors<'_, '_>
24	.get(parent_id)
25	.ancestors()
26	.find(\|n: &SvgNode<'_, 'input>\| n.has_attribute(AId::Space))
27	{
28	get_xmlspace(doc, node.id, XmlSpace::Default)
29	} else {
30	XmlSpace::Default
31	}
32	};
33
34	parse_svg_text_element_impl(parent, parent_id, style_sheet, space, doc)?;
35
36	trim_text_nodes(text_elem_id:parent_id, xmlspace:space, doc);
37	Ok(())
38	}
39
40	fn parse_svg_text_element_impl<'input>(
41	parent: roxmltree::Node<'_, 'input>,
42	parent_id: NodeId,
43	style_sheet: &simplecss::StyleSheet,
44	space: XmlSpace,
45	doc: &mut Document<'input>,
46	) -> Result<(), Error> {
47	for node in parent.children() {
48	if node.is_text() {
49	let text = trim_text(node.text().unwrap(), space);
50	doc.append(parent_id, NodeKind::Text(text));
51	continue;
52	}
53
54	let mut tag_name = match super::parse::parse_tag_name(node) {
55	Some(v) => v,
56	None => continue,
57	};
58
59	if tag_name == EId::A {
60	// Treat links as simple text.
61	tag_name = EId::Tspan;
62	}
63
64	if !matches!(tag_name, EId::Tspan \| EId::Tref \| EId::TextPath) {
65	continue;
66	}
67
68	// `textPath` must be a direct `text` child.
69	if tag_name == EId::TextPath && parent.tag_name().name() != "text" {
70	continue;
71	}
72
73	// We are converting `tref` into `tspan` to simplify later use.
74	let mut is_tref = `false`;
75	if tag_name == EId::Tref {
76	tag_name = EId::Tspan;
77	is_tref = `true`;
78	}
79
80	let node_id =
81	super::parse::parse_svg_element(node, parent_id, tag_name, style_sheet, `false`, doc)?;
82	let space = get_xmlspace(doc, node_id, space);
83
84	if is_tref {
85	let link_value = node
86	.attribute((XLINK_NS, "href"))
87	.or_else(\|\| node.attribute("href"));
88
89	if let Some(href) = link_value {
90	if let Some(text) = resolve_tref_text(node.document(), href) {
91	let text = trim_text(&text, space);
92	doc.append(node_id, NodeKind::Text(text));
93	}
94	}
95	} else {
96	parse_svg_text_element_impl(node, node_id, style_sheet, space, doc)?;
97	}
98	}
99
100	Ok(())
101	}
102
103	fn resolve_tref_text(xml: &roxmltree::Document, href: &str) -> Option<String> {
104	let id = svgtypes::IRI::from_str(href).ok()?.0;
105
106	// Find linked element in the original tree.
107	let node = xml.descendants().find(\|n\| n.attribute("id") == Some(id))?;
108
109	// `tref` should be linked to an SVG element.
110	super::parse::parse_tag_name(node)?;
111
112	// 'All character data within the referenced element, including character data enclosed
113	// within additional markup, will be rendered.'
114	//
115	// So we don't care about attributes and everything. Just collecting text nodes data.
116	//
117	// Note: we have to filter nodes by `is_text()` first since `text()` will look up
118	// for text nodes in element children therefore we will get duplicates.
119	let text: String = node
120	.descendants()
121	.filter(\|n\| n.is_text())
122	.filter_map(\|n\| n.text())
123	.collect();
124	if text.is_empty() {
125	None
126	} else {
127	Some(text)
128	}
129	}
130
131	#[derive(Clone, Copy, PartialEq, Debug)]
132	enum XmlSpace {
133	Default,
134	Preserve,
135	}
136
137	fn get_xmlspace(doc: &Document, node_id: NodeId, default: XmlSpace) -> XmlSpace {
138	match doc.get(node_id).attribute(AId::Space) {
139	Some("preserve") => XmlSpace::Preserve,
140	Some(_) => XmlSpace::Default,
141	_ => default,
142	}
143	}
144
145	trait StrTrim {
146	fn remove_first_space(&mut self);
147	fn remove_last_space(&mut self);
148	}
149
150	impl StrTrim for String {
151	fn remove_first_space(&mut self) {
152	debug_assert_eq!(self.chars().next().unwrap(), ' ');
153	self.drain(range:`0`..`1`);
154	}
155
156	fn remove_last_space(&mut self) {
157	debug_assert_eq!(self.chars().next_back().unwrap(), ' ');
158	self.pop();
159	}
160	}
161
162	/// Prepares text nodes according to the spec: https://www.w3.org/TR/SVG11/text.html#WhiteSpace
163	///
164	/// This function handles:
165	/// - 'xml:space' processing
166	/// - tabs and newlines removing/replacing
167	/// - spaces trimming
168	fn trim_text_nodes(text_elem_id: NodeId, xmlspace: XmlSpace, doc: &mut Document) {
169	let mut nodes = Vec::new(); // TODO: allocate only once
170	collect_text_nodes(doc.get(text_elem_id), `0`, &mut nodes);
171
172	// `trim` method has already collapsed all spaces into a single one,
173	// so we have to check only for one leading or trailing space.
174
175	if nodes.len() == `1` {
176	// Process element with a single text node child.
177
178	let node_id = nodes[`0`].0;
179
180	if xmlspace == XmlSpace::Default {
181	if let NodeKind::Text(ref mut text) = doc.nodes[node_id.get_usize()].kind {
182	match text.len() {
183	`0` => {} // An empty string. Do nothing.
184	`1` => {
185	// If string has only one character and it's a space - clear this string.
186	if text.as_bytes()[`0`] == b' ' {
187	text.clear();
188	}
189	}
190	_ => {
191	// 'text' has at least 2 bytes, so indexing is safe.
192	let c1 = text.as_bytes()[`0`];
193	let c2 = text.as_bytes()[text.len() - `1`];
194
195	if c1 == b' ' {
196	text.remove_first_space();
197	}
198
199	if c2 == b' ' {
200	text.remove_last_space();
201	}
202	}
203	}
204	}
205	} else {
206	// Do nothing when xml:space=preserve.
207	}
208	} else if nodes.len() > `1` {
209	// Process element with many text node children.
210
211	// We manage all text nodes as a single text node
212	// and trying to remove duplicated spaces across nodes.
213	//
214	// For example '<text>Text <tspan> text </tspan> text</text>'
215	// is the same is '<text>Text <tspan>text</tspan> text</text>'
216
217	let mut i = `0`;
218	let len = nodes.len() - `1`;
219	let mut last_non_empty: Option<NodeId> = None;
220	while i < len {
221	// Process pairs.
222	let (mut node1_id, depth1) = nodes[i];
223	let (node2_id, depth2) = nodes[i + `1`];
224
225	if doc.get(node1_id).text().is_empty() {
226	if let Some(n) = last_non_empty {
227	node1_id = n;
228	}
229	}
230
231	// Parent of the text node is always an element node and always exist,
232	// so unwrap is safe.
233	let xmlspace1 = get_xmlspace(doc, doc.get(node1_id).parent().unwrap().id, xmlspace);
234	let xmlspace2 = get_xmlspace(doc, doc.get(node2_id).parent().unwrap().id, xmlspace);
235
236	// >text<..>text<
237	// 1 2 3 4
238	let (c1, c2, c3, c4) = {
239	let text1 = doc.get(node1_id).text();
240	let text2 = doc.get(node2_id).text();
241
242	let bytes1 = text1.as_bytes();
243	let bytes2 = text2.as_bytes();
244
245	let c1 = bytes1.first().cloned();
246	let c2 = bytes1.last().cloned();
247	let c3 = bytes2.first().cloned();
248	let c4 = bytes2.last().cloned();
249
250	(c1, c2, c3, c4)
251	};
252
253	// NOTE: xml:space processing is mostly an undefined behavior,
254	// because everyone do it differently.
255	// We're mimicking the Chrome behavior.
256
257	// Remove space from the second text node if both nodes has bound spaces.
258	// From: '<text>Text <tspan> text</tspan></text>'
259	// To: '<text>Text <tspan>text</tspan></text>'
260	//
261	// See text-tspan-02-b.svg for details.
262	if depth1 < depth2 {
263	if c3 == Some(b' ') {
264	if xmlspace2 == XmlSpace::Default {
265	if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
266	text.remove_first_space();
267	}
268	}
269	}
270	} else {
271	if c2 == Some(b' ') && c2 == c3 {
272	if xmlspace1 == XmlSpace::Default && xmlspace2 == XmlSpace::Default {
273	if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
274	text.remove_last_space();
275	}
276	} else {
277	if xmlspace1 == XmlSpace::Preserve && xmlspace2 == XmlSpace::Default {
278	if let NodeKind::Text(ref mut text) =
279	doc.nodes[node2_id.get_usize()].kind
280	{
281	text.remove_first_space();
282	}
283	}
284	}
285	}
286	}
287
288	let is_first = i == `0`;
289	let is_last = i == len - `1`;
290
291	if is_first
292	&& c1 == Some(b' ')
293	&& xmlspace1 == XmlSpace::Default
294	&& !doc.get(node1_id).text().is_empty()
295	{
296	// Remove a leading space from a first text node.
297	if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
298	text.remove_first_space();
299	}
300	} else if is_last
301	&& c4 == Some(b' ')
302	&& !doc.get(node2_id).text().is_empty()
303	&& xmlspace2 == XmlSpace::Default
304	{
305	// Remove a trailing space from a last text node.
306	// Also check that 'text2' is not empty already.
307	if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
308	text.remove_last_space();
309	}
310	}
311
312	if is_last
313	&& c2 == Some(b' ')
314	&& !doc.get(node1_id).text().is_empty()
315	&& doc.get(node2_id).text().is_empty()
316	&& doc.get(node1_id).text().ends_with(' ')
317	{
318	if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
319	text.remove_last_space();
320	}
321	}
322
323	if !doc.get(node1_id).text().trim().is_empty() {
324	last_non_empty = Some(node1_id);
325	}
326
327	i += `1`;
328	}
329	}
330
331	// TODO: find a way to remove all empty text nodes
332	}
333
334	fn collect_text_nodes(parent: SvgNode, depth: usize, nodes: &mut Vec<(NodeId, usize)>) {
335	for child: SvgNode<'_, '_> in parent.children() {
336	if child.is_text() {
337	nodes.push((child.id, depth));
338	} else if child.is_element() {
339	collect_text_nodes(parent:child, depth:depth + `1`, nodes);
340	}
341	}
342	}
343
344	fn trim_text(text: &str, space: XmlSpace) -> String {
345	let mut s: String = String::with_capacity(text.len());
346
347	let mut prev: char = '0';
348	for c: char in text.chars() {
349	// \r, \n and \t should be converted into spaces.
350	let c: char = match c {
351	'`\r`' \| '`\n`' \| '`\t`' => ' ',
352	_ => c,
353	};
354
355	// Skip continuous spaces.
356	if space == XmlSpace::Default && c == ' ' && c == prev {
357	continue;
358	}
359
360	prev = c;
361
362	s.push(ch:c);
363	}
364
365	s
366	}
367