1// Copyright 2014-2017 The html5ever Project Developers.
2// Copyright Michael Howell and others.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10#![allow(missing_docs)]
11
12//! A simple reference-counted DOM.
13//!
14//! This is sufficient as a static parse tree, but don't build a
15//! web browser using it. :)
16//!
17//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like
18//! format. For example, the following graph
19//!
20//! ```text
21//! div
22//! +- "text node"
23//! +- span
24//! ```
25//! in HTML would be serialized as
26//!
27//! ```html
28//! <div>text node<span></span></div>
29//! ```
30//!
31//! See the [document object model article on wikipedia][dom wiki] for more information.
32//!
33//! This implementation stores the information associated with each node once, and then hands out
34//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create
35//! a new ref and then a node will outlive the document. Nodes own their children, but only have
36//! weak references to their parents.
37//!
38//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
39//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model
40
41use std::borrow::Cow;
42use std::cell::{Cell, RefCell};
43use std::collections::{HashSet, VecDeque};
44use std::default::Default;
45use std::fmt;
46use std::io;
47use std::mem;
48use std::rc::{Rc, Weak};
49
50use tendril::StrTendril;
51
52use html5ever::interface::tree_builder;
53use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
54use html5ever::serialize::TraversalScope;
55use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
56use html5ever::serialize::{Serialize, Serializer};
57use html5ever::Attribute;
58use html5ever::ExpandedName;
59use html5ever::QualName;
60
61/// The different kinds of nodes in the DOM.
62#[derive(Debug)]
63pub enum NodeData {
64 /// The `Document` itself - the root node of a HTML document.
65 Document,
66
67 /// A `DOCTYPE` with name, public id, and system id. See
68 /// [document type declaration on wikipedia][dtd wiki].
69 ///
70 /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
71 Doctype {
72 name: StrTendril,
73 public_id: StrTendril,
74 system_id: StrTendril,
75 },
76
77 /// A text node.
78 Text { contents: RefCell<StrTendril> },
79
80 /// A comment.
81 Comment { contents: StrTendril },
82
83 /// An element with attributes.
84 Element {
85 name: QualName,
86 attrs: RefCell<Vec<Attribute>>,
87
88 /// For HTML \<template\> elements, the [template contents].
89 ///
90 /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
91 template_contents: RefCell<Option<Handle>>,
92
93 /// Whether the node is a [HTML integration point].
94 ///
95 /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
96 mathml_annotation_xml_integration_point: bool,
97 },
98
99 /// A Processing instruction.
100 ProcessingInstruction {
101 target: StrTendril,
102 contents: StrTendril,
103 },
104}
105
106/// A DOM node.
107pub struct Node {
108 /// Parent node.
109 pub parent: Cell<Option<WeakHandle>>,
110 /// Child nodes of this node.
111 pub children: RefCell<Vec<Handle>>,
112 /// Represents this node's data.
113 pub data: NodeData,
114}
115
116impl Node {
117 /// Create a new node from its contents
118 pub fn new(data: NodeData) -> Rc<Self> {
119 Rc::new(Node {
120 data,
121 parent: Cell::new(None),
122 children: RefCell::new(Vec::new()),
123 })
124 }
125}
126
127impl Drop for Node {
128 fn drop(&mut self) {
129 let mut nodes: Vec> = mem::take(&mut *self.children.borrow_mut());
130 while let Some(node: Rc) = nodes.pop() {
131 let children: Vec> = mem::take(&mut *node.children.borrow_mut());
132 nodes.extend(children.into_iter());
133 if let NodeData::Element { ref template_contents: &RefCell>>, .. } = node.data {
134 if let Some(template_contents: Rc) = template_contents.borrow_mut().take() {
135 nodes.push(template_contents);
136 }
137 }
138 }
139 }
140}
141
142impl fmt::Debug for Node {
143 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
144 fmt&mut DebugStruct<'_, '_>.debug_struct("Node")
145 .field("data", &self.data)
146 .field(name:"children", &self.children)
147 .finish()
148 }
149}
150
151/// Reference to a DOM node.
152pub type Handle = Rc<Node>;
153
154/// Weak reference to a DOM node, used for parent pointers.
155pub type WeakHandle = Weak<Node>;
156
157/// Append a parentless node to another nodes' children
158fn append(new_parent: &Handle, child: Handle) {
159 let previous_parent: Option> = child.parent.replace(val:Some(Rc::downgrade(this:new_parent)));
160 // Invariant: child cannot have existing parent
161 assert!(previous_parent.is_none());
162 new_parent.children.borrow_mut().push(child);
163}
164
165/// If the node has a parent, get it and this node's position in its children
166fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> {
167 if let Some(weak: Weak) = target.parent.take() {
168 let parent: Rc = weak.upgrade().expect(msg:"dangling weak pointer");
169 target.parent.set(val:Some(weak));
170 let i: usize = match parentimpl Iterator
171 .children
172 .borrow()
173 .iter()
174 .enumerate()
175 .find(|&(_, child: &Rc)| Rc::ptr_eq(this:child, other:target))
176 {
177 Some((i: usize, _)) => i,
178 None => panic!("have parent but couldn't find in parent's children!"),
179 };
180 Some((parent, i))
181 } else {
182 None
183 }
184}
185
186fn append_to_existing_text(prev: &Handle, text: &str) -> bool {
187 match prev.data {
188 NodeData::Text { ref contents: &RefCell> } => {
189 contents.borrow_mut().push_slice(text);
190 true
191 },
192 _ => false,
193 }
194}
195
196fn remove_from_parent(target: &Handle) {
197 if let Some((parent: Rc, i: usize)) = get_parent_and_index(target) {
198 parent.children.borrow_mut().remove(index:i);
199 target.parent.set(val:None);
200 }
201}
202
203/// The DOM itself; the result of parsing.
204pub struct RcDom {
205 /// The `Document` itself.
206 pub document: Handle,
207
208 /// Errors that occurred during parsing.
209 pub errors: Vec<Cow<'static, str>>,
210
211 /// The document's quirks mode.
212 pub quirks_mode: QuirksMode,
213}
214
215impl TreeSink for RcDom {
216 type Output = Self;
217 fn finish(self) -> Self {
218 self
219 }
220
221 type Handle = Handle;
222
223 fn parse_error(&mut self, msg: Cow<'static, str>) {
224 self.errors.push(msg);
225 }
226
227 fn get_document(&mut self) -> Handle {
228 self.document.clone()
229 }
230
231 fn get_template_contents(&mut self, target: &Handle) -> Handle {
232 if let NodeData::Element {
233 ref template_contents,
234 ..
235 } = target.data
236 {
237 template_contents.borrow().as_ref().expect("not a template element!").clone()
238 } else {
239 panic!("not a template element!")
240 }
241 }
242
243 fn set_quirks_mode(&mut self, mode: QuirksMode) {
244 self.quirks_mode = mode;
245 }
246
247 fn same_node(&self, x: &Handle, y: &Handle) -> bool {
248 Rc::ptr_eq(x, y)
249 }
250
251 fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> {
252 return match target.data {
253 NodeData::Element { ref name, .. } => name.expanded(),
254 _ => panic!("not an element!"),
255 };
256 }
257
258 fn create_element(
259 &mut self,
260 name: QualName,
261 attrs: Vec<Attribute>,
262 flags: ElementFlags,
263 ) -> Handle {
264 Node::new(NodeData::Element {
265 name,
266 attrs: RefCell::new(attrs),
267 template_contents: RefCell::new(if flags.template {
268 Some(Node::new(NodeData::Document))
269 } else {
270 None
271 }),
272 mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
273 })
274 }
275
276 fn create_comment(&mut self, text: StrTendril) -> Handle {
277 Node::new(NodeData::Comment { contents: text })
278 }
279
280 fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Handle {
281 Node::new(NodeData::ProcessingInstruction {
282 target,
283 contents: data,
284 })
285 }
286
287 fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) {
288 // Append to an existing Text node if we have one.
289 if let NodeOrText::AppendText(ref text) = child {
290 if let Some(h) = parent.children.borrow().last() {
291 if append_to_existing_text(h, text) {
292 return;
293 }
294 }
295 }
296
297 append(
298 parent,
299 match child {
300 NodeOrText::AppendText(text) => Node::new(NodeData::Text {
301 contents: RefCell::new(text),
302 }),
303 NodeOrText::AppendNode(node) => node,
304 },
305 );
306 }
307
308 fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) {
309 let (parent, i) = get_parent_and_index(sibling)
310 .expect("append_before_sibling called on node without parent");
311
312 let child = match (child, i) {
313 // No previous node.
314 (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text {
315 contents: RefCell::new(text),
316 }),
317
318 // Look for a text node before the insertion point.
319 (NodeOrText::AppendText(text), i) => {
320 let children = parent.children.borrow();
321 let prev = &children[i - 1];
322 if append_to_existing_text(prev, &text) {
323 return;
324 }
325 Node::new(NodeData::Text {
326 contents: RefCell::new(text),
327 })
328 },
329
330 // The tree builder promises we won't have a text node after
331 // the insertion point.
332
333 // Any other kind of node.
334 (NodeOrText::AppendNode(node), _) => node,
335 };
336
337 remove_from_parent(&child);
338
339 child.parent.set(Some(Rc::downgrade(&parent)));
340 parent.children.borrow_mut().insert(i, child);
341 }
342
343 fn append_based_on_parent_node(
344 &mut self,
345 element: &Self::Handle,
346 prev_element: &Self::Handle,
347 child: NodeOrText<Self::Handle>,
348 ) {
349 let parent = element.parent.take();
350 let has_parent = parent.is_some();
351 element.parent.set(parent);
352
353 if has_parent {
354 self.append_before_sibling(element, child);
355 } else {
356 self.append(prev_element, child);
357 }
358 }
359
360 fn append_doctype_to_document(
361 &mut self,
362 name: StrTendril,
363 public_id: StrTendril,
364 system_id: StrTendril,
365 ) {
366 append(
367 &self.document,
368 Node::new(NodeData::Doctype {
369 name,
370 public_id,
371 system_id,
372 }),
373 );
374 }
375
376 fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) {
377 let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
378 attrs.borrow_mut()
379 } else {
380 panic!("not an element")
381 };
382
383 let existing_names = existing
384 .iter()
385 .map(|e| e.name.clone())
386 .collect::<HashSet<_>>();
387 existing.extend(
388 attrs
389 .into_iter()
390 .filter(|attr| !existing_names.contains(&attr.name)),
391 );
392 }
393
394 fn remove_from_parent(&mut self, target: &Handle) {
395 remove_from_parent(target);
396 }
397
398 fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) {
399 let mut children = node.children.borrow_mut();
400 let mut new_children = new_parent.children.borrow_mut();
401 for child in children.iter() {
402 let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
403 assert!(Rc::ptr_eq(
404 node,
405 &previous_parent.unwrap().upgrade().expect("dangling weak")
406 ))
407 }
408 new_children.extend(mem::take(&mut *children));
409 }
410
411 fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool {
412 if let NodeData::Element {
413 mathml_annotation_xml_integration_point,
414 ..
415 } = target.data
416 {
417 mathml_annotation_xml_integration_point
418 } else {
419 panic!("not an element!")
420 }
421 }
422}
423
424impl Default for RcDom {
425 fn default() -> RcDom {
426 RcDom {
427 document: Node::new(data:NodeData::Document),
428 errors: vec![],
429 quirks_mode: tree_builder::NoQuirks,
430 }
431 }
432}
433
434enum SerializeOp {
435 Open(Handle),
436 Close(QualName),
437}
438
439pub struct SerializableHandle(Handle);
440
441impl From<Handle> for SerializableHandle {
442 fn from(h: Handle) -> SerializableHandle {
443 SerializableHandle(h)
444 }
445}
446
447impl Serialize for SerializableHandle {
448 fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
449 where
450 S: Serializer,
451 {
452 let mut ops = VecDeque::new();
453 match traversal_scope {
454 IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())),
455 ChildrenOnly(_) => ops.extend(self
456 .0
457 .children
458 .borrow()
459 .iter()
460 .map(|h| SerializeOp::Open(h.clone())))
461 }
462
463 while let Some(op) = ops.pop_front() {
464 match op {
465 SerializeOp::Open(handle) => match handle.data {
466 NodeData::Element {
467 ref name,
468 ref attrs,
469 ..
470 } => {
471 serializer.start_elem(
472 name.clone(),
473 attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
474 )?;
475
476 ops.reserve(1 + handle.children.borrow().len());
477 ops.push_front(SerializeOp::Close(name.clone()));
478
479 for child in handle.children.borrow().iter().rev() {
480 ops.push_front(SerializeOp::Open(child.clone()));
481 }
482 },
483
484 NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?,
485
486 NodeData::Text { ref contents } => {
487 serializer.write_text(&contents.borrow())?
488 },
489
490 NodeData::Comment { ref contents } => serializer.write_comment(contents)?,
491
492 NodeData::ProcessingInstruction {
493 ref target,
494 ref contents,
495 } => serializer.write_processing_instruction(target, contents)?,
496
497 NodeData::Document => panic!("Can't serialize Document node itself"),
498 },
499
500 SerializeOp::Close(name) => {
501 serializer.end_elem(name)?;
502 },
503 }
504 }
505
506 Ok(())
507 }
508}
509