1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5use std::collections::HashMap;
6
7use roxmltree::Error;
8use simplecss::Declaration;
9use svgtypes::FontShorthand;
10
11use super::{AId, Attribute, Document, EId, NodeData, NodeId, NodeKind, ShortRange};
12
13const SVG_NS: &str = "http://www.w3.org/2000/svg";
14const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
15const XML_NAMESPACE_NS: &str = "http://www.w3.org/XML/1998/namespace";
16
17impl<'input> Document<'input> {
18 /// Parses a [`Document`] from a [`roxmltree::Document`].
19 pub fn parse_tree(xml: &roxmltree::Document<'input>) -> Result<Document<'input>, Error> {
20 parse(xml)
21 }
22
23 pub(crate) fn append(&mut self, parent_id: NodeId, kind: NodeKind) -> NodeId {
24 let new_child_id = NodeId::from(self.nodes.len());
25 self.nodes.push(NodeData {
26 parent: Some(parent_id),
27 next_sibling: None,
28 children: None,
29 kind,
30 });
31
32 let last_child_id = self.nodes[parent_id.get_usize()].children.map(|(_, id)| id);
33
34 if let Some(id) = last_child_id {
35 self.nodes[id.get_usize()].next_sibling = Some(new_child_id);
36 }
37
38 self.nodes[parent_id.get_usize()].children = Some(
39 if let Some((first_child_id, _)) = self.nodes[parent_id.get_usize()].children {
40 (first_child_id, new_child_id)
41 } else {
42 (new_child_id, new_child_id)
43 },
44 );
45
46 new_child_id
47 }
48
49 fn append_attribute(&mut self, name: AId, value: roxmltree::StringStorage<'input>) {
50 self.attrs.push(Attribute { name, value });
51 }
52}
53
54fn parse<'input>(xml: &roxmltree::Document<'input>) -> Result<Document<'input>, Error> {
55 let mut doc = Document {
56 nodes: Vec::new(),
57 attrs: Vec::new(),
58 links: HashMap::new(),
59 };
60
61 // build a map of id -> node for resolve_href
62 let mut id_map = HashMap::new();
63 for node in xml.descendants() {
64 if let Some(id) = node.attribute("id") {
65 if !id_map.contains_key(id) {
66 id_map.insert(id, node);
67 }
68 }
69 }
70
71 // Add a root node.
72 doc.nodes.push(NodeData {
73 parent: None,
74 next_sibling: None,
75 children: None,
76 kind: NodeKind::Root,
77 });
78
79 let style_sheet = resolve_css(xml);
80
81 parse_xml_node_children(
82 xml.root(),
83 xml.root(),
84 doc.root().id,
85 &style_sheet,
86 false,
87 0,
88 &mut doc,
89 &id_map,
90 )?;
91
92 // Check that the root element is `svg`.
93 match doc.root().first_element_child() {
94 Some(child) => {
95 if child.tag_name() != Some(EId::Svg) {
96 return Err(roxmltree::Error::NoRootNode);
97 }
98 }
99 None => return Err(roxmltree::Error::NoRootNode),
100 }
101
102 // Collect all elements with `id` attribute.
103 let mut links = HashMap::new();
104 for node in doc.descendants() {
105 if let Some(id) = node.attribute::<&str>(AId::Id) {
106 links.insert(id.to_string(), node.id);
107 }
108 }
109 doc.links = links;
110
111 fix_recursive_patterns(&mut doc);
112 fix_recursive_links(EId::ClipPath, AId::ClipPath, &mut doc);
113 fix_recursive_links(EId::Mask, AId::Mask, &mut doc);
114 fix_recursive_links(EId::Filter, AId::Filter, &mut doc);
115 fix_recursive_fe_image(&mut doc);
116
117 Ok(doc)
118}
119
120pub(crate) fn parse_tag_name(node: roxmltree::Node) -> Option<EId> {
121 if !node.is_element() {
122 return None;
123 }
124
125 if node.tag_name().namespace() != Some(SVG_NS) {
126 return None;
127 }
128
129 EId::from_str(text:node.tag_name().name())
130}
131
132fn parse_xml_node_children<'input>(
133 parent: roxmltree::Node<'_, 'input>,
134 origin: roxmltree::Node,
135 parent_id: NodeId,
136 style_sheet: &simplecss::StyleSheet,
137 ignore_ids: bool,
138 depth: u32,
139 doc: &mut Document<'input>,
140 id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
141) -> Result<(), Error> {
142 for node: Node<'_, '_> in parent.children() {
143 parse_xml_node(
144 node,
145 origin,
146 parent_id,
147 style_sheet,
148 ignore_ids,
149 depth,
150 doc,
151 id_map,
152 )?;
153 }
154
155 Ok(())
156}
157
158fn parse_xml_node<'input>(
159 node: roxmltree::Node<'_, 'input>,
160 origin: roxmltree::Node,
161 parent_id: NodeId,
162 style_sheet: &simplecss::StyleSheet,
163 ignore_ids: bool,
164 depth: u32,
165 doc: &mut Document<'input>,
166 id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
167) -> Result<(), Error> {
168 if depth > 1024 {
169 return Err(Error::NodesLimitReached);
170 }
171
172 let mut tag_name = match parse_tag_name(node) {
173 Some(id) => id,
174 None => return Ok(()),
175 };
176
177 if tag_name == EId::Style {
178 return Ok(());
179 }
180
181 // TODO: remove?
182 // Treat links as groups.
183 if tag_name == EId::A {
184 tag_name = EId::G;
185 }
186
187 let node_id = parse_svg_element(node, parent_id, tag_name, style_sheet, ignore_ids, doc)?;
188 if tag_name == EId::Text {
189 super::text::parse_svg_text_element(node, node_id, style_sheet, doc)?;
190 } else if tag_name == EId::Use {
191 parse_svg_use_element(node, origin, node_id, style_sheet, depth + 1, doc, id_map)?;
192 } else {
193 parse_xml_node_children(
194 node,
195 origin,
196 node_id,
197 style_sheet,
198 ignore_ids,
199 depth + 1,
200 doc,
201 id_map,
202 )?;
203 }
204
205 Ok(())
206}
207
208pub(crate) fn parse_svg_element<'input>(
209 xml_node: roxmltree::Node<'_, 'input>,
210 parent_id: NodeId,
211 tag_name: EId,
212 style_sheet: &simplecss::StyleSheet,
213 ignore_ids: bool,
214 doc: &mut Document<'input>,
215) -> Result<NodeId, Error> {
216 let attrs_start_idx = doc.attrs.len();
217
218 // Copy presentational attributes first.
219 for attr in xml_node.attributes() {
220 match attr.namespace() {
221 None | Some(SVG_NS) | Some(XLINK_NS) | Some(XML_NAMESPACE_NS) => {}
222 _ => continue,
223 }
224
225 let aid = match AId::from_str(attr.name()) {
226 Some(v) => v,
227 None => continue,
228 };
229
230 // During a `use` resolving, all `id` attributes must be ignored.
231 // Otherwise we will get elements with duplicated id's.
232 if ignore_ids && aid == AId::Id {
233 continue;
234 }
235
236 // For some reason those properties are allowed only inside a `style` attribute and CSS.
237 if matches!(aid, AId::MixBlendMode | AId::Isolation | AId::FontKerning) {
238 continue;
239 }
240
241 append_attribute(parent_id, tag_name, aid, attr.value_storage().clone(), doc);
242 }
243
244 let mut insert_attribute = |aid, value: &str| {
245 // Check that attribute already exists.
246 let idx = doc.attrs[attrs_start_idx..]
247 .iter_mut()
248 .position(|a| a.name == aid);
249
250 // Append an attribute as usual.
251 let added = append_attribute(
252 parent_id,
253 tag_name,
254 aid,
255 roxmltree::StringStorage::new_owned(value),
256 doc,
257 );
258
259 // Check that attribute was actually added, because it could be skipped.
260 if added {
261 if let Some(idx) = idx {
262 // Swap the last attribute with an existing one.
263 let last_idx = doc.attrs.len() - 1;
264 doc.attrs.swap(attrs_start_idx + idx, last_idx);
265 // Remove last.
266 doc.attrs.pop();
267 }
268 }
269 };
270
271 let mut write_declaration = |declaration: &Declaration| {
272 // TODO: perform XML attribute normalization
273 if declaration.name == "marker" {
274 insert_attribute(AId::MarkerStart, declaration.value);
275 insert_attribute(AId::MarkerMid, declaration.value);
276 insert_attribute(AId::MarkerEnd, declaration.value);
277 } else if declaration.name == "font" {
278 if let Ok(shorthand) = FontShorthand::from_str(declaration.value) {
279 // First we need to reset all values to their default.
280 insert_attribute(AId::FontStyle, "normal");
281 insert_attribute(AId::FontVariant, "normal");
282 insert_attribute(AId::FontWeight, "normal");
283 insert_attribute(AId::FontStretch, "normal");
284 insert_attribute(AId::LineHeight, "normal");
285 insert_attribute(AId::FontSizeAdjust, "none");
286 insert_attribute(AId::FontKerning, "auto");
287 insert_attribute(AId::FontVariantCaps, "normal");
288 insert_attribute(AId::FontVariantLigatures, "normal");
289 insert_attribute(AId::FontVariantNumeric, "normal");
290 insert_attribute(AId::FontVariantEastAsian, "normal");
291 insert_attribute(AId::FontVariantPosition, "normal");
292
293 // Then, we set the properties that have been declared.
294 shorthand
295 .font_stretch
296 .map(|s| insert_attribute(AId::FontStretch, s));
297 shorthand
298 .font_weight
299 .map(|s| insert_attribute(AId::FontWeight, s));
300 shorthand
301 .font_variant
302 .map(|s| insert_attribute(AId::FontVariant, s));
303 shorthand
304 .font_style
305 .map(|s| insert_attribute(AId::FontStyle, s));
306 insert_attribute(AId::FontSize, shorthand.font_size);
307 insert_attribute(AId::FontFamily, shorthand.font_family);
308 } else {
309 log::warn!(
310 "Failed to parse {} value: '{}'",
311 AId::Font,
312 declaration.value
313 );
314 }
315 } else if let Some(aid) = AId::from_str(declaration.name) {
316 // Parse only the presentation attributes.
317 if aid.is_presentation() {
318 insert_attribute(aid, declaration.value);
319 }
320 }
321 };
322
323 // Apply CSS.
324 for rule in &style_sheet.rules {
325 if rule.selector.matches(&XmlNode(xml_node)) {
326 for declaration in &rule.declarations {
327 write_declaration(declaration);
328 }
329 }
330 }
331
332 // Split a `style` attribute.
333 if let Some(value) = xml_node.attribute("style") {
334 for declaration in simplecss::DeclarationTokenizer::from(value) {
335 write_declaration(&declaration);
336 }
337 }
338
339 if doc.nodes.len() > 1_000_000 {
340 return Err(Error::NodesLimitReached);
341 }
342
343 let node_id = doc.append(
344 parent_id,
345 NodeKind::Element {
346 tag_name,
347 attributes: ShortRange::new(attrs_start_idx as u32, doc.attrs.len() as u32),
348 },
349 );
350
351 Ok(node_id)
352}
353
354fn append_attribute<'input>(
355 parent_id: NodeId,
356 tag_name: EId,
357 aid: AId,
358 value: roxmltree::StringStorage<'input>,
359 doc: &mut Document<'input>,
360) -> bool {
361 match aid {
362 // The `style` attribute will be split into attributes, so we don't need it.
363 AId::Style |
364 // No need to copy a `class` attribute since CSS were already resolved.
365 AId::Class => return false,
366 _ => {}
367 }
368
369 // Ignore `xlink:href` on `tspan` (which was originally `tref` or `a`),
370 // because we will convert `tref` into `tspan` anyway.
371 if tag_name == EId::Tspan && aid == AId::Href {
372 return false;
373 }
374
375 if aid.allows_inherit_value() && &*value == "inherit" {
376 return resolve_inherit(parent_id, aid, doc);
377 }
378
379 doc.append_attribute(name:aid, value);
380 true
381}
382
383fn resolve_inherit(parent_id: NodeId, aid: AId, doc: &mut Document) -> bool {
384 if aid.is_inheritable() {
385 // Inheritable attributes can inherit a value from an any ancestor.
386 let node_id = doc
387 .get(parent_id)
388 .ancestors()
389 .find(|n| n.has_attribute(aid))
390 .map(|n| n.id);
391 if let Some(node_id) = node_id {
392 if let Some(attr) = doc
393 .get(node_id)
394 .attributes()
395 .iter()
396 .find(|a| a.name == aid)
397 .cloned()
398 {
399 doc.attrs.push(Attribute {
400 name: aid,
401 value: attr.value,
402 });
403
404 return true;
405 }
406 }
407 } else {
408 // Non-inheritable attributes can inherit a value only from a direct parent.
409 if let Some(attr) = doc
410 .get(parent_id)
411 .attributes()
412 .iter()
413 .find(|a| a.name == aid)
414 .cloned()
415 {
416 doc.attrs.push(Attribute {
417 name: aid,
418 value: attr.value,
419 });
420
421 return true;
422 }
423 }
424
425 // Fallback to a default value if possible.
426 let value = match aid {
427 AId::ImageRendering | AId::ShapeRendering | AId::TextRendering => "auto",
428
429 AId::ClipPath
430 | AId::Filter
431 | AId::MarkerEnd
432 | AId::MarkerMid
433 | AId::MarkerStart
434 | AId::Mask
435 | AId::Stroke
436 | AId::StrokeDasharray
437 | AId::TextDecoration => "none",
438
439 AId::FontStretch
440 | AId::FontStyle
441 | AId::FontVariant
442 | AId::FontWeight
443 | AId::LetterSpacing
444 | AId::WordSpacing => "normal",
445
446 AId::Fill | AId::FloodColor | AId::StopColor => "black",
447
448 AId::FillOpacity
449 | AId::FloodOpacity
450 | AId::Opacity
451 | AId::StopOpacity
452 | AId::StrokeOpacity => "1",
453
454 AId::ClipRule | AId::FillRule => "nonzero",
455
456 AId::BaselineShift => "baseline",
457 AId::ColorInterpolationFilters => "linearRGB",
458 AId::Direction => "ltr",
459 AId::Display => "inline",
460 AId::FontSize => "medium",
461 AId::Overflow => "visible",
462 AId::StrokeDashoffset => "0",
463 AId::StrokeLinecap => "butt",
464 AId::StrokeLinejoin => "miter",
465 AId::StrokeMiterlimit => "4",
466 AId::StrokeWidth => "1",
467 AId::TextAnchor => "start",
468 AId::Visibility => "visible",
469 AId::WritingMode => "lr-tb",
470 _ => return false,
471 };
472
473 doc.append_attribute(aid, roxmltree::StringStorage::Borrowed(value));
474 true
475}
476
477fn resolve_href<'a, 'input: 'a>(
478 node: roxmltree::Node<'a, 'input>,
479 id_map: &HashMap<&str, roxmltree::Node<'a, 'input>>,
480) -> Option<roxmltree::Node<'a, 'input>> {
481 let link_value: &str = nodeOption<&str>
482 .attribute((XLINK_NS, "href"))
483 .or_else(|| node.attribute(name:"href"))?;
484
485 let link_id: &str = svgtypes::IRI::from_str(text:link_value).ok()?.0;
486
487 id_map.get(link_id).copied()
488}
489
490fn parse_svg_use_element<'input>(
491 node: roxmltree::Node<'_, 'input>,
492 origin: roxmltree::Node,
493 parent_id: NodeId,
494 style_sheet: &simplecss::StyleSheet,
495 depth: u32,
496 doc: &mut Document<'input>,
497 id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
498) -> Result<(), Error> {
499 let link = match resolve_href(node, id_map) {
500 Some(v) => v,
501 None => return Ok(()),
502 };
503
504 if link == node || link == origin {
505 log::warn!(
506 "Recursive 'use' detected. '{}' will be skipped.",
507 node.attribute((SVG_NS, "id")).unwrap_or_default()
508 );
509 return Ok(());
510 }
511
512 // Make sure we're linked to an SVG element.
513 if parse_tag_name(link).is_none() {
514 return Ok(());
515 }
516
517 // Check that none of the linked node's children reference current `use` node
518 // via other `use` node.
519 //
520 // Example:
521 // <g id="g1">
522 // <use xlink:href="#use1" id="use2"/>
523 // </g>
524 // <use xlink:href="#g1" id="use1"/>
525 //
526 // `use2` should be removed.
527 //
528 // Also, child should not reference its parent:
529 // <g id="g1">
530 // <use xlink:href="#g1" id="use1"/>
531 // </g>
532 //
533 // `use1` should be removed.
534 let mut is_recursive = false;
535 for link_child in link
536 .descendants()
537 .skip(1)
538 .filter(|n| n.has_tag_name((SVG_NS, "use")))
539 {
540 if let Some(link2) = resolve_href(link_child, id_map) {
541 if link2 == node || link2 == link {
542 is_recursive = true;
543 break;
544 }
545 }
546 }
547
548 if is_recursive {
549 log::warn!(
550 "Recursive 'use' detected. '{}' will be skipped.",
551 node.attribute((SVG_NS, "id")).unwrap_or_default()
552 );
553 return Ok(());
554 }
555
556 parse_xml_node(
557 link,
558 node,
559 parent_id,
560 style_sheet,
561 true,
562 depth + 1,
563 doc,
564 id_map,
565 )
566}
567
568fn resolve_css<'a>(xml: &'a roxmltree::Document<'a>) -> simplecss::StyleSheet<'a> {
569 let mut sheet: StyleSheet<'_> = simplecss::StyleSheet::new();
570
571 for node: Node<'_, '_> in xml.descendants().filter(|n: &Node<'_, '_>| n.has_tag_name("style")) {
572 match node.attribute(name:"type") {
573 Some("text/css") => {}
574 Some(_) => continue,
575 None => {}
576 }
577
578 let text: &str = match node.text() {
579 Some(v: &str) => v,
580 None => continue,
581 };
582
583 sheet.parse_more(text);
584 }
585
586 sheet
587}
588
589struct XmlNode<'a, 'input: 'a>(roxmltree::Node<'a, 'input>);
590
591impl simplecss::Element for XmlNode<'_, '_> {
592 fn parent_element(&self) -> Option<Self> {
593 self.0.parent_element().map(XmlNode)
594 }
595
596 fn prev_sibling_element(&self) -> Option<Self> {
597 self.0.prev_sibling_element().map(XmlNode)
598 }
599
600 fn has_local_name(&self, local_name: &str) -> bool {
601 self.0.tag_name().name() == local_name
602 }
603
604 fn attribute_matches(&self, local_name: &str, operator: simplecss::AttributeOperator) -> bool {
605 match self.0.attribute(local_name) {
606 Some(value) => operator.matches(value),
607 None => false,
608 }
609 }
610
611 fn pseudo_class_matches(&self, class: simplecss::PseudoClass) -> bool {
612 match class {
613 simplecss::PseudoClass::FirstChild => self.prev_sibling_element().is_none(),
614 // TODO: lang
615 _ => false, // Since we are querying a static SVG we can ignore other pseudo-classes.
616 }
617 }
618}
619
620fn fix_recursive_patterns(doc: &mut Document) {
621 while let Some(node_id: NodeId) = find_recursive_pattern(AId::Fill, doc) {
622 let idx: usize = doc.get(node_id).attribute_id(AId::Fill).unwrap();
623 doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
624 }
625
626 while let Some(node_id: NodeId) = find_recursive_pattern(AId::Stroke, doc) {
627 let idx: usize = doc.get(node_id).attribute_id(AId::Stroke).unwrap();
628 doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
629 }
630}
631
632fn find_recursive_pattern(aid: AId, doc: &mut Document) -> Option<NodeId> {
633 for pattern_node in doc
634 .root()
635 .descendants()
636 .filter(|n| n.tag_name() == Some(EId::Pattern))
637 {
638 for node in pattern_node.descendants() {
639 let value = match node.attribute(aid) {
640 Some(v) => v,
641 None => continue,
642 };
643
644 if let Ok(svgtypes::Paint::FuncIRI(link_id, _)) = svgtypes::Paint::from_str(value) {
645 if link_id == pattern_node.element_id() {
646 // If a pattern child has a link to the pattern itself
647 // then we have to replace it with `none`.
648 // Otherwise we will get endless loop/recursion and stack overflow.
649 return Some(node.id);
650 } else {
651 // Check that linked node children doesn't link this pattern.
652 if let Some(linked_node) = doc.element_by_id(link_id) {
653 for node2 in linked_node.descendants() {
654 let value2 = match node2.attribute(aid) {
655 Some(v) => v,
656 None => continue,
657 };
658
659 if let Ok(svgtypes::Paint::FuncIRI(link_id2, _)) =
660 svgtypes::Paint::from_str(value2)
661 {
662 if link_id2 == pattern_node.element_id() {
663 return Some(node2.id);
664 }
665 }
666 }
667 }
668 }
669 }
670 }
671 }
672
673 None
674}
675
676fn fix_recursive_links(eid: EId, aid: AId, doc: &mut Document) {
677 while let Some(node_id: NodeId) = find_recursive_link(eid, aid, doc) {
678 let idx: usize = doc.get(node_id).attribute_id(aid).unwrap();
679 doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
680 }
681}
682
683fn find_recursive_link(eid: EId, aid: AId, doc: &Document) -> Option<NodeId> {
684 for node in doc
685 .root()
686 .descendants()
687 .filter(|n| n.tag_name() == Some(eid))
688 {
689 for child in node.descendants() {
690 if let Some(link) = child.node_attribute(aid) {
691 if link == node {
692 // If an element child has a link to the element itself
693 // then we have to replace it with `none`.
694 // Otherwise we will get endless loop/recursion and stack overflow.
695 return Some(child.id);
696 } else {
697 // Check that linked node children doesn't link this element.
698 for node2 in link.descendants() {
699 if let Some(link2) = node2.node_attribute(aid) {
700 if link2 == node {
701 return Some(node2.id);
702 }
703 }
704 }
705 }
706 }
707 }
708 }
709
710 None
711}
712
713/// Detects cases like:
714///
715/// ```xml
716/// <filter id="filter1">
717/// <feImage xlink:href="#rect1"/>
718/// </filter>
719/// <rect id="rect1" x="36" y="36" width="120" height="120" fill="green" filter="url(#filter1)"/>
720/// ```
721fn fix_recursive_fe_image(doc: &mut Document) {
722 let mut ids = Vec::new();
723 for fe_node in doc
724 .root()
725 .descendants()
726 .filter(|n| n.tag_name() == Some(EId::FeImage))
727 {
728 if let Some(link) = fe_node.node_attribute(AId::Href) {
729 if let Some(filter_uri) = link.attribute::<&str>(AId::Filter) {
730 let filter_id = fe_node.parent().unwrap().element_id();
731 for func in svgtypes::FilterValueListParser::from(filter_uri).flatten() {
732 if let svgtypes::FilterValue::Url(url) = func {
733 if url == filter_id {
734 ids.push(link.id);
735 }
736 }
737 }
738 }
739 }
740 }
741
742 for id in ids {
743 let idx = doc.get(id).attribute_id(AId::Filter).unwrap();
744 doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
745 }
746}
747