text.rs source code [crates/usvg-0.40.0/src/parser/svgtree/text.rs]

1	// This Source Code Form is subject to the terms of the Mozilla Public
2	// License, v. 2.0. If a copy of the MPL was not distributed with this
3	// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5	#![allow(clippy::comparison_chain)]
6
7	use roxmltree::Error;
8
9	use super::{AId, Document, EId, NodeId, NodeKind, SvgNode};
10
11	const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
12
13	pub(crate) fn parse_svg_text_element<'input>(
14	parent: roxmltree::Node<'_, 'input>,
15	parent_id: NodeId,
16	style_sheet: &simplecss::StyleSheet,
17	doc: &mut Document<'input>,
18	) -> Result<(), Error> {
19	debug_assert_eq!(parent.tag_name().name(), "text");
20
21	let space: XmlSpace = if doc.get(parent_id).has_attribute(AId::Space) {
22	get_xmlspace(doc, node_id:parent_id, XmlSpace::Default)
23	} else {
24	if let Some(node: SvgNode<'_, '_>) = docAncestors<'_, '_>
25	.get(parent_id)
26	.ancestors()
27	.find(\|n: &SvgNode<'_, '_>\| n.has_attribute(AId::Space))
28	{
29	get_xmlspace(doc, node_id:node.id, XmlSpace::Default)
30	} else {
31	XmlSpace::Default
32	}
33	};
34
35	parse_svg_text_element_impl(parent, parent_id, style_sheet, space, doc)?;
36
37	trim_text_nodes(text_elem_id:parent_id, xmlspace:space, doc);
38	Ok(())
39	}
40
41	fn parse_svg_text_element_impl<'input>(
42	parent: roxmltree::Node<'_, 'input>,
43	parent_id: NodeId,
44	style_sheet: &simplecss::StyleSheet,
45	space: XmlSpace,
46	doc: &mut Document<'input>,
47	) -> Result<(), Error> {
48	for node in parent.children() {
49	if node.is_text() {
50	let text = trim_text(node.text().unwrap(), space);
51	doc.append(parent_id, NodeKind::Text(text));
52	continue;
53	}
54
55	let mut tag_name = match super::parse::parse_tag_name(node) {
56	Some(v) => v,
57	None => continue,
58	};
59
60	if tag_name == EId::A {
61	// Treat links as simple text.
62	tag_name = EId::Tspan;
63	}
64
65	if !matches!(tag_name, EId::Tspan \| EId::Tref \| EId::TextPath) {
66	continue;
67	}
68
69	// `textPath` must be a direct `text` child.
70	if tag_name == EId::TextPath && parent.tag_name().name() != "text" {
71	continue;
72	}
73
74	// We are converting `tref` into `tspan` to simplify later use.
75	let mut is_tref = `false`;
76	if tag_name == EId::Tref {
77	tag_name = EId::Tspan;
78	is_tref = `true`;
79	}
80
81	let node_id =
82	super::parse::parse_svg_element(node, parent_id, tag_name, style_sheet, `false`, doc)?;
83	let space = get_xmlspace(doc, node_id, space);
84
85	if is_tref {
86	let link_value = node
87	.attribute((XLINK_NS, "href"))
88	.or_else(\|\| node.attribute("href"));
89
90	if let Some(href) = link_value {
91	if let Some(text) = resolve_tref_text(node.document(), href) {
92	let text = trim_text(&text, space);
93	doc.append(node_id, NodeKind::Text(text));
94	}
95	}
96	} else {
97	parse_svg_text_element_impl(node, node_id, style_sheet, space, doc)?;
98	}
99	}
100
101	Ok(())
102	}
103
104	fn resolve_tref_text(xml: &roxmltree::Document, href: &str) -> Option<String> {
105	let id = svgtypes::IRI::from_str(href).ok()?.0;
106
107	// Find linked element in the original tree.
108	let node = xml.descendants().find(\|n\| n.attribute("id") == Some(id))?;
109
110	// `tref` should be linked to an SVG element.
111	super::parse::parse_tag_name(node)?;
112
113	// 'All character data within the referenced element, including character data enclosed
114	// within additional markup, will be rendered.'
115	//
116	// So we don't care about attributes and everything. Just collecting text nodes data.
117	//
118	// Note: we have to filter nodes by `is_text()` first since `text()` will look up
119	// for text nodes in element children therefore we will get duplicates.
120	let text: String = node
121	.descendants()
122	.filter(\|n\| n.is_text())
123	.filter_map(\|n\| n.text())
124	.collect();
125	if text.is_empty() {
126	None
127	} else {
128	Some(text)
129	}
130	}
131
132	#[derive(Clone, Copy, PartialEq, Debug)]
133	enum XmlSpace {
134	Default,
135	Preserve,
136	}
137
138	fn get_xmlspace(doc: &Document, node_id: NodeId, default: XmlSpace) -> XmlSpace {
139	match doc.get(node_id).attribute(AId::Space) {
140	Some("preserve") => XmlSpace::Preserve,
141	Some(_) => XmlSpace::Default,
142	_ => default,
143	}
144	}
145
146	trait StrTrim {
147	fn remove_first_space(&mut self);
148	fn remove_last_space(&mut self);
149	}
150
151	impl StrTrim for String {
152	fn remove_first_space(&mut self) {
153	debug_assert_eq!(self.chars().next().unwrap(), ' ');
154	self.drain(range:`0`..`1`);
155	}
156
157	fn remove_last_space(&mut self) {
158	debug_assert_eq!(self.chars().next_back().unwrap(), ' ');
159	self.pop();
160	}
161	}
162
163	/// Prepares text nodes according to the spec: https://www.w3.org/TR/SVG11/text.html#WhiteSpace
164	///
165	/// This function handles:
166	/// - 'xml:space' processing
167	/// - tabs and newlines removing/replacing
168	/// - spaces trimming
169	fn trim_text_nodes(text_elem_id: NodeId, xmlspace: XmlSpace, doc: &mut Document) {
170	let mut nodes = Vec::new(); // TODO: allocate only once
171	collect_text_nodes(doc.get(text_elem_id), `0`, &mut nodes);
172
173	// `trim` method has already collapsed all spaces into a single one,
174	// so we have to check only for one leading or trailing space.
175
176	if nodes.len() == `1` {
177	// Process element with a single text node child.
178
179	let node_id = nodes[`0`].0;
180
181	if xmlspace == XmlSpace::Default {
182	if let NodeKind::Text(ref mut text) = doc.nodes[node_id.get_usize()].kind {
183	match text.len() {
184	`0` => {} // An empty string. Do nothing.
185	`1` => {
186	// If string has only one character and it's a space - clear this string.
187	if text.as_bytes()[`0`] == b' ' {
188	text.clear();
189	}
190	}
191	_ => {
192	// 'text' has at least 2 bytes, so indexing is safe.
193	let c1 = text.as_bytes()[`0`];
194	let c2 = text.as_bytes()[text.len() - `1`];
195
196	if c1 == b' ' {
197	text.remove_first_space();
198	}
199
200	if c2 == b' ' {
201	text.remove_last_space();
202	}
203	}
204	}
205	}
206	} else {
207	// Do nothing when xml:space=preserve.
208	}
209	} else if nodes.len() > `1` {
210	// Process element with many text node children.
211
212	// We manage all text nodes as a single text node
213	// and trying to remove duplicated spaces across nodes.
214	//
215	// For example '<text>Text <tspan> text </tspan> text</text>'
216	// is the same is '<text>Text <tspan>text</tspan> text</text>'
217
218	let mut i = `0`;
219	let len = nodes.len() - `1`;
220	let mut last_non_empty: Option<NodeId> = None;
221	while i < len {
222	// Process pairs.
223	let (mut node1_id, depth1) = nodes[i];
224	let (node2_id, depth2) = nodes[i + `1`];
225
226	if doc.get(node1_id).text().is_empty() {
227	if let Some(n) = last_non_empty {
228	node1_id = n;
229	}
230	}
231
232	// Parent of the text node is always an element node and always exist,
233	// so unwrap is safe.
234	let xmlspace1 = get_xmlspace(doc, doc.get(node1_id).parent().unwrap().id, xmlspace);
235	let xmlspace2 = get_xmlspace(doc, doc.get(node2_id).parent().unwrap().id, xmlspace);
236
237	// >text<..>text<
238	// 1 2 3 4
239	let (c1, c2, c3, c4) = {
240	let text1 = doc.get(node1_id).text();
241	let text2 = doc.get(node2_id).text();
242
243	let bytes1 = text1.as_bytes();
244	let bytes2 = text2.as_bytes();
245
246	let c1 = bytes1.first().cloned();
247	let c2 = bytes1.last().cloned();
248	let c3 = bytes2.first().cloned();
249	let c4 = bytes2.last().cloned();
250
251	(c1, c2, c3, c4)
252	};
253
254	// NOTE: xml:space processing is mostly an undefined behavior,
255	// because everyone do it differently.
256	// We're mimicking the Chrome behavior.
257
258	// Remove space from the second text node if both nodes has bound spaces.
259	// From: '<text>Text <tspan> text</tspan></text>'
260	// To: '<text>Text <tspan>text</tspan></text>'
261	//
262	// See text-tspan-02-b.svg for details.
263	if depth1 < depth2 {
264	if c3 == Some(b' ') {
265	if xmlspace2 == XmlSpace::Default {
266	if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
267	text.remove_first_space();
268	}
269	}
270	}
271	} else {
272	if c2 == Some(b' ') && c2 == c3 {
273	if xmlspace1 == XmlSpace::Default && xmlspace2 == XmlSpace::Default {
274	if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
275	text.remove_last_space();
276	}
277	} else {
278	if xmlspace1 == XmlSpace::Preserve && xmlspace2 == XmlSpace::Default {
279	if let NodeKind::Text(ref mut text) =
280	doc.nodes[node2_id.get_usize()].kind
281	{
282	text.remove_first_space();
283	}
284	}
285	}
286	}
287	}
288
289	let is_first = i == `0`;
290	let is_last = i == len - `1`;
291
292	if is_first
293	&& c1 == Some(b' ')
294	&& xmlspace1 == XmlSpace::Default
295	&& !doc.get(node1_id).text().is_empty()
296	{
297	// Remove a leading space from a first text node.
298	if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
299	text.remove_first_space();
300	}
301	} else if is_last
302	&& c4 == Some(b' ')
303	&& !doc.get(node2_id).text().is_empty()
304	&& xmlspace2 == XmlSpace::Default
305	{
306	// Remove a trailing space from a last text node.
307	// Also check that 'text2' is not empty already.
308	if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
309	text.remove_last_space();
310	}
311	}
312
313	if is_last
314	&& c2 == Some(b' ')
315	&& !doc.get(node1_id).text().is_empty()
316	&& doc.get(node2_id).text().is_empty()
317	&& doc.get(node1_id).text().ends_with(' ')
318	{
319	if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
320	text.remove_last_space();
321	}
322	}
323
324	if !doc.get(node1_id).text().trim().is_empty() {
325	last_non_empty = Some(node1_id);
326	}
327
328	i += `1`;
329	}
330	}
331
332	// TODO: find a way to remove all empty text nodes
333	}
334
335	fn collect_text_nodes(parent: SvgNode, depth: usize, nodes: &mut Vec<(NodeId, usize)>) {
336	for child: SvgNode<'_, '_> in parent.children() {
337	if child.is_text() {
338	nodes.push((child.id, depth));
339	} else if child.is_element() {
340	collect_text_nodes(parent:child, depth:depth + `1`, nodes);
341	}
342	}
343	}
344
345	fn trim_text(text: &str, space: XmlSpace) -> String {
346	let mut s: String = String::with_capacity(text.len());
347
348	let mut prev: char = '0';
349	for c: char in text.chars() {
350	// \r, \n and \t should be converted into spaces.
351	let c: char = match c {
352	'`\r`' \| '`\n`' \| '`\t`' => ' ',
353	_ => c,
354	};
355
356	// Skip continuous spaces.
357	if space == XmlSpace::Default && c == ' ' && c == prev {
358	continue;
359	}
360
361	prev = c;
362
363	s.push(ch:c);
364	}
365
366	s
367	}
368