1 | // Copyright 2014-2017 The html5ever Project Developers. |
2 | // Copyright Michael Howell and others. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | #![allow (missing_docs)] |
11 | |
12 | //! A simple reference-counted DOM. |
13 | //! |
14 | //! This is sufficient as a static parse tree, but don't build a |
15 | //! web browser using it. :) |
16 | //! |
17 | //! A DOM is a [tree structure] with ordered children that can be represented in an XML-like |
18 | //! format. For example, the following graph |
19 | //! |
20 | //! ```text |
21 | //! div |
22 | //! +- "text node" |
23 | //! +- span |
24 | //! ``` |
25 | //! in HTML would be serialized as |
26 | //! |
27 | //! ```html |
28 | //! <div>text node<span></span></div> |
29 | //! ``` |
30 | //! |
31 | //! See the [document object model article on wikipedia][dom wiki] for more information. |
32 | //! |
33 | //! This implementation stores the information associated with each node once, and then hands out |
34 | //! refs to children. The nodes themselves are reference-counted to avoid copying - you can create |
35 | //! a new ref and then a node will outlive the document. Nodes own their children, but only have |
36 | //! weak references to their parents. |
37 | //! |
38 | //! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure) |
39 | //! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model |
40 | |
41 | use std::borrow::Cow; |
42 | use std::cell::{Cell, RefCell}; |
43 | use std::collections::{HashSet, VecDeque}; |
44 | use std::default::Default; |
45 | use std::fmt; |
46 | use std::io; |
47 | use std::mem; |
48 | use std::rc::{Rc, Weak}; |
49 | |
50 | use tendril::StrTendril; |
51 | |
52 | use html5ever::interface::tree_builder; |
53 | use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; |
54 | use html5ever::serialize::TraversalScope; |
55 | use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode}; |
56 | use html5ever::serialize::{Serialize, Serializer}; |
57 | use html5ever::Attribute; |
58 | use html5ever::ExpandedName; |
59 | use html5ever::QualName; |
60 | |
61 | /// The different kinds of nodes in the DOM. |
62 | #[derive (Debug)] |
63 | pub enum NodeData { |
64 | /// The `Document` itself - the root node of a HTML document. |
65 | Document, |
66 | |
67 | /// A `DOCTYPE` with name, public id, and system id. See |
68 | /// [document type declaration on wikipedia][dtd wiki]. |
69 | /// |
70 | /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration |
71 | Doctype { |
72 | name: StrTendril, |
73 | public_id: StrTendril, |
74 | system_id: StrTendril, |
75 | }, |
76 | |
77 | /// A text node. |
78 | Text { contents: RefCell<StrTendril> }, |
79 | |
80 | /// A comment. |
81 | Comment { contents: StrTendril }, |
82 | |
83 | /// An element with attributes. |
84 | Element { |
85 | name: QualName, |
86 | attrs: RefCell<Vec<Attribute>>, |
87 | |
88 | /// For HTML \<template\> elements, the [template contents]. |
89 | /// |
90 | /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents |
91 | template_contents: RefCell<Option<Handle>>, |
92 | |
93 | /// Whether the node is a [HTML integration point]. |
94 | /// |
95 | /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point |
96 | mathml_annotation_xml_integration_point: bool, |
97 | }, |
98 | |
99 | /// A Processing instruction. |
100 | ProcessingInstruction { |
101 | target: StrTendril, |
102 | contents: StrTendril, |
103 | }, |
104 | } |
105 | |
106 | /// A DOM node. |
107 | pub struct Node { |
108 | /// Parent node. |
109 | pub parent: Cell<Option<WeakHandle>>, |
110 | /// Child nodes of this node. |
111 | pub children: RefCell<Vec<Handle>>, |
112 | /// Represents this node's data. |
113 | pub data: NodeData, |
114 | } |
115 | |
116 | impl Node { |
117 | /// Create a new node from its contents |
118 | pub fn new(data: NodeData) -> Rc<Self> { |
119 | Rc::new(Node { |
120 | data, |
121 | parent: Cell::new(None), |
122 | children: RefCell::new(Vec::new()), |
123 | }) |
124 | } |
125 | } |
126 | |
127 | impl Drop for Node { |
128 | fn drop(&mut self) { |
129 | let mut nodes: Vec> = mem::take(&mut *self.children.borrow_mut()); |
130 | while let Some(node: Rc) = nodes.pop() { |
131 | let children: Vec> = mem::take(&mut *node.children.borrow_mut()); |
132 | nodes.extend(children.into_iter()); |
133 | if let NodeData::Element { ref template_contents: &RefCell, .. } = node.data { |
134 | if let Some(template_contents: Rc) = template_contents.borrow_mut().take() { |
135 | nodes.push(template_contents); |
136 | } |
137 | } |
138 | } |
139 | } |
140 | } |
141 | |
142 | impl fmt::Debug for Node { |
143 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { |
144 | fmt&mut DebugStruct<'_, '_>.debug_struct("Node" ) |
145 | .field("data" , &self.data) |
146 | .field(name:"children" , &self.children) |
147 | .finish() |
148 | } |
149 | } |
150 | |
151 | /// Reference to a DOM node. |
152 | pub type Handle = Rc<Node>; |
153 | |
154 | /// Weak reference to a DOM node, used for parent pointers. |
155 | pub type WeakHandle = Weak<Node>; |
156 | |
157 | /// Append a parentless node to another nodes' children |
158 | fn append(new_parent: &Handle, child: Handle) { |
159 | let previous_parent: Option> = child.parent.replace(val:Some(Rc::downgrade(this:new_parent))); |
160 | // Invariant: child cannot have existing parent |
161 | assert!(previous_parent.is_none()); |
162 | new_parent.children.borrow_mut().push(child); |
163 | } |
164 | |
165 | /// If the node has a parent, get it and this node's position in its children |
166 | fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> { |
167 | if let Some(weak: Weak) = target.parent.take() { |
168 | let parent: Rc = weak.upgrade().expect(msg:"dangling weak pointer" ); |
169 | target.parent.set(val:Some(weak)); |
170 | let i: usize = match parentimpl Iterator |
171 | .children |
172 | .borrow() |
173 | .iter() |
174 | .enumerate() |
175 | .find(|&(_, child: &Rc)| Rc::ptr_eq(this:child, other:target)) |
176 | { |
177 | Some((i: usize, _)) => i, |
178 | None => panic!("have parent but couldn't find in parent's children!" ), |
179 | }; |
180 | Some((parent, i)) |
181 | } else { |
182 | None |
183 | } |
184 | } |
185 | |
186 | fn append_to_existing_text(prev: &Handle, text: &str) -> bool { |
187 | match prev.data { |
188 | NodeData::Text { ref contents: &RefCell> } => { |
189 | contents.borrow_mut().push_slice(text); |
190 | true |
191 | }, |
192 | _ => false, |
193 | } |
194 | } |
195 | |
196 | fn remove_from_parent(target: &Handle) { |
197 | if let Some((parent: Rc, i: usize)) = get_parent_and_index(target) { |
198 | parent.children.borrow_mut().remove(index:i); |
199 | target.parent.set(val:None); |
200 | } |
201 | } |
202 | |
203 | /// The DOM itself; the result of parsing. |
204 | pub struct RcDom { |
205 | /// The `Document` itself. |
206 | pub document: Handle, |
207 | |
208 | /// Errors that occurred during parsing. |
209 | pub errors: Vec<Cow<'static, str>>, |
210 | |
211 | /// The document's quirks mode. |
212 | pub quirks_mode: QuirksMode, |
213 | } |
214 | |
215 | impl TreeSink for RcDom { |
216 | type Output = Self; |
217 | fn finish(self) -> Self { |
218 | self |
219 | } |
220 | |
221 | type Handle = Handle; |
222 | |
223 | fn parse_error(&mut self, msg: Cow<'static, str>) { |
224 | self.errors.push(msg); |
225 | } |
226 | |
227 | fn get_document(&mut self) -> Handle { |
228 | self.document.clone() |
229 | } |
230 | |
231 | fn get_template_contents(&mut self, target: &Handle) -> Handle { |
232 | if let NodeData::Element { |
233 | ref template_contents, |
234 | .. |
235 | } = target.data |
236 | { |
237 | template_contents.borrow().as_ref().expect("not a template element!" ).clone() |
238 | } else { |
239 | panic!("not a template element!" ) |
240 | } |
241 | } |
242 | |
243 | fn set_quirks_mode(&mut self, mode: QuirksMode) { |
244 | self.quirks_mode = mode; |
245 | } |
246 | |
247 | fn same_node(&self, x: &Handle, y: &Handle) -> bool { |
248 | Rc::ptr_eq(x, y) |
249 | } |
250 | |
251 | fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> { |
252 | return match target.data { |
253 | NodeData::Element { ref name, .. } => name.expanded(), |
254 | _ => panic!("not an element!" ), |
255 | }; |
256 | } |
257 | |
258 | fn create_element( |
259 | &mut self, |
260 | name: QualName, |
261 | attrs: Vec<Attribute>, |
262 | flags: ElementFlags, |
263 | ) -> Handle { |
264 | Node::new(NodeData::Element { |
265 | name, |
266 | attrs: RefCell::new(attrs), |
267 | template_contents: RefCell::new(if flags.template { |
268 | Some(Node::new(NodeData::Document)) |
269 | } else { |
270 | None |
271 | }), |
272 | mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, |
273 | }) |
274 | } |
275 | |
276 | fn create_comment(&mut self, text: StrTendril) -> Handle { |
277 | Node::new(NodeData::Comment { contents: text }) |
278 | } |
279 | |
280 | fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Handle { |
281 | Node::new(NodeData::ProcessingInstruction { |
282 | target, |
283 | contents: data, |
284 | }) |
285 | } |
286 | |
287 | fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) { |
288 | // Append to an existing Text node if we have one. |
289 | if let NodeOrText::AppendText(ref text) = child { |
290 | if let Some(h) = parent.children.borrow().last() { |
291 | if append_to_existing_text(h, text) { |
292 | return; |
293 | } |
294 | } |
295 | } |
296 | |
297 | append( |
298 | parent, |
299 | match child { |
300 | NodeOrText::AppendText(text) => Node::new(NodeData::Text { |
301 | contents: RefCell::new(text), |
302 | }), |
303 | NodeOrText::AppendNode(node) => node, |
304 | }, |
305 | ); |
306 | } |
307 | |
308 | fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) { |
309 | let (parent, i) = get_parent_and_index(sibling) |
310 | .expect("append_before_sibling called on node without parent" ); |
311 | |
312 | let child = match (child, i) { |
313 | // No previous node. |
314 | (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text { |
315 | contents: RefCell::new(text), |
316 | }), |
317 | |
318 | // Look for a text node before the insertion point. |
319 | (NodeOrText::AppendText(text), i) => { |
320 | let children = parent.children.borrow(); |
321 | let prev = &children[i - 1]; |
322 | if append_to_existing_text(prev, &text) { |
323 | return; |
324 | } |
325 | Node::new(NodeData::Text { |
326 | contents: RefCell::new(text), |
327 | }) |
328 | }, |
329 | |
330 | // The tree builder promises we won't have a text node after |
331 | // the insertion point. |
332 | |
333 | // Any other kind of node. |
334 | (NodeOrText::AppendNode(node), _) => node, |
335 | }; |
336 | |
337 | remove_from_parent(&child); |
338 | |
339 | child.parent.set(Some(Rc::downgrade(&parent))); |
340 | parent.children.borrow_mut().insert(i, child); |
341 | } |
342 | |
343 | fn append_based_on_parent_node( |
344 | &mut self, |
345 | element: &Self::Handle, |
346 | prev_element: &Self::Handle, |
347 | child: NodeOrText<Self::Handle>, |
348 | ) { |
349 | let parent = element.parent.take(); |
350 | let has_parent = parent.is_some(); |
351 | element.parent.set(parent); |
352 | |
353 | if has_parent { |
354 | self.append_before_sibling(element, child); |
355 | } else { |
356 | self.append(prev_element, child); |
357 | } |
358 | } |
359 | |
360 | fn append_doctype_to_document( |
361 | &mut self, |
362 | name: StrTendril, |
363 | public_id: StrTendril, |
364 | system_id: StrTendril, |
365 | ) { |
366 | append( |
367 | &self.document, |
368 | Node::new(NodeData::Doctype { |
369 | name, |
370 | public_id, |
371 | system_id, |
372 | }), |
373 | ); |
374 | } |
375 | |
376 | fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) { |
377 | let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { |
378 | attrs.borrow_mut() |
379 | } else { |
380 | panic!("not an element" ) |
381 | }; |
382 | |
383 | let existing_names = existing |
384 | .iter() |
385 | .map(|e| e.name.clone()) |
386 | .collect::<HashSet<_>>(); |
387 | existing.extend( |
388 | attrs |
389 | .into_iter() |
390 | .filter(|attr| !existing_names.contains(&attr.name)), |
391 | ); |
392 | } |
393 | |
394 | fn remove_from_parent(&mut self, target: &Handle) { |
395 | remove_from_parent(target); |
396 | } |
397 | |
398 | fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) { |
399 | let mut children = node.children.borrow_mut(); |
400 | let mut new_children = new_parent.children.borrow_mut(); |
401 | for child in children.iter() { |
402 | let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent))); |
403 | assert!(Rc::ptr_eq( |
404 | node, |
405 | &previous_parent.unwrap().upgrade().expect("dangling weak" ) |
406 | )) |
407 | } |
408 | new_children.extend(mem::take(&mut *children)); |
409 | } |
410 | |
411 | fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool { |
412 | if let NodeData::Element { |
413 | mathml_annotation_xml_integration_point, |
414 | .. |
415 | } = target.data |
416 | { |
417 | mathml_annotation_xml_integration_point |
418 | } else { |
419 | panic!("not an element!" ) |
420 | } |
421 | } |
422 | } |
423 | |
424 | impl Default for RcDom { |
425 | fn default() -> RcDom { |
426 | RcDom { |
427 | document: Node::new(data:NodeData::Document), |
428 | errors: vec![], |
429 | quirks_mode: tree_builder::NoQuirks, |
430 | } |
431 | } |
432 | } |
433 | |
434 | enum SerializeOp { |
435 | Open(Handle), |
436 | Close(QualName), |
437 | } |
438 | |
439 | pub struct SerializableHandle(Handle); |
440 | |
441 | impl From<Handle> for SerializableHandle { |
442 | fn from(h: Handle) -> SerializableHandle { |
443 | SerializableHandle(h) |
444 | } |
445 | } |
446 | |
447 | impl Serialize for SerializableHandle { |
448 | fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()> |
449 | where |
450 | S: Serializer, |
451 | { |
452 | let mut ops = VecDeque::new(); |
453 | match traversal_scope { |
454 | IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())), |
455 | ChildrenOnly(_) => ops.extend(self |
456 | .0 |
457 | .children |
458 | .borrow() |
459 | .iter() |
460 | .map(|h| SerializeOp::Open(h.clone()))) |
461 | } |
462 | |
463 | while let Some(op) = ops.pop_front() { |
464 | match op { |
465 | SerializeOp::Open(handle) => match handle.data { |
466 | NodeData::Element { |
467 | ref name, |
468 | ref attrs, |
469 | .. |
470 | } => { |
471 | serializer.start_elem( |
472 | name.clone(), |
473 | attrs.borrow().iter().map(|at| (&at.name, &at.value[..])), |
474 | )?; |
475 | |
476 | ops.reserve(1 + handle.children.borrow().len()); |
477 | ops.push_front(SerializeOp::Close(name.clone())); |
478 | |
479 | for child in handle.children.borrow().iter().rev() { |
480 | ops.push_front(SerializeOp::Open(child.clone())); |
481 | } |
482 | }, |
483 | |
484 | NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?, |
485 | |
486 | NodeData::Text { ref contents } => { |
487 | serializer.write_text(&contents.borrow())? |
488 | }, |
489 | |
490 | NodeData::Comment { ref contents } => serializer.write_comment(contents)?, |
491 | |
492 | NodeData::ProcessingInstruction { |
493 | ref target, |
494 | ref contents, |
495 | } => serializer.write_processing_instruction(target, contents)?, |
496 | |
497 | NodeData::Document => panic!("Can't serialize Document node itself" ), |
498 | }, |
499 | |
500 | SerializeOp::Close(name) => { |
501 | serializer.end_elem(name)?; |
502 | }, |
503 | } |
504 | } |
505 | |
506 | Ok(()) |
507 | } |
508 | } |
509 | |