1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use log::warn;
11pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
12use markup5ever::{local_name, namespace_url, ns};
13use std::default::Default;
14use std::io::{self, Write};
15
16use crate::{LocalName, QualName};
17
18pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()>
19where
20 Wr: Write,
21 T: Serialize,
22{
23 let mut ser: HtmlSerializer = HtmlSerializer::new(writer, opts.clone());
24 node.serialize(&mut ser, opts.traversal_scope)
25}
26
27#[derive(Clone)]
28pub struct SerializeOpts {
29 /// Is scripting enabled?
30 pub scripting_enabled: bool,
31
32 /// Serialize the root node? Default: ChildrenOnly
33 pub traversal_scope: TraversalScope,
34
35 /// If the serializer is asked to serialize an invalid tree, the default
36 /// behavior is to panic in the event that an `end_elem` is created without a
37 /// matching `start_elem`. Setting this to true will prevent those panics by
38 /// creating a default parent on the element stack. No extra start elem will
39 /// actually be written. Default: false
40 pub create_missing_parent: bool,
41}
42
43impl Default for SerializeOpts {
44 fn default() -> SerializeOpts {
45 SerializeOpts {
46 scripting_enabled: true,
47 traversal_scope: TraversalScope::ChildrenOnly(None),
48 create_missing_parent: false,
49 }
50 }
51}
52
53#[derive(Default)]
54struct ElemInfo {
55 html_name: Option<LocalName>,
56 ignore_children: bool
57}
58
59pub struct HtmlSerializer<Wr: Write> {
60 pub writer: Wr,
61 opts: SerializeOpts,
62 stack: Vec<ElemInfo>,
63}
64
65fn tagname(name: &QualName) -> LocalName {
66 match name.ns {
67 ns!(html) | ns!(mathml) | ns!(svg) => (),
68 ref ns: &{unknown} => {
69 // FIXME(#122)
70 warn!("node with weird namespace {:?}", ns);
71 },
72 }
73
74 name.local.clone()
75}
76
77impl<Wr: Write> HtmlSerializer<Wr> {
78 pub fn new(writer: Wr, opts: SerializeOpts) -> Self {
79 let html_name = match opts.traversal_scope {
80 TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None,
81 TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)),
82 };
83 HtmlSerializer {
84 writer,
85 opts,
86 stack: vec![ElemInfo {
87 html_name,
88 ignore_children: false,
89 }],
90 }
91 }
92
93 fn parent(&mut self) -> &mut ElemInfo {
94 if self.stack.is_empty() {
95 if self.opts.create_missing_parent {
96 warn!("ElemInfo stack empty, creating new parent");
97 self.stack.push(Default::default());
98 } else {
99 panic!("no parent ElemInfo")
100 }
101 }
102 self.stack.last_mut().unwrap()
103 }
104
105 fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> {
106 for c in text.chars() {
107 match c {
108 '&' => self.writer.write_all(b"&amp;"),
109 '\u{00A0}' => self.writer.write_all(b"&nbsp;"),
110 '"' if attr_mode => self.writer.write_all(b"&quot;"),
111 '<' if !attr_mode => self.writer.write_all(b"&lt;"),
112 '>' if !attr_mode => self.writer.write_all(b"&gt;"),
113 c => self.writer.write_fmt(format_args!("{}", c)),
114 }?;
115 }
116 Ok(())
117 }
118}
119
120impl<Wr: Write> Serializer for HtmlSerializer<Wr> {
121 fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()>
122 where
123 AttrIter: Iterator<Item = AttrRef<'a>>,
124 {
125 let html_name = match name.ns {
126 ns!(html) => Some(name.local.clone()),
127 _ => None,
128 };
129
130 if self.parent().ignore_children {
131 self.stack.push(ElemInfo {
132 html_name,
133 ignore_children: true,
134 });
135 return Ok(());
136 }
137
138 self.writer.write_all(b"<")?;
139 self.writer.write_all(tagname(&name).as_bytes())?;
140 for (name, value) in attrs {
141 self.writer.write_all(b" ")?;
142
143 match name.ns {
144 ns!() => (),
145 ns!(xml) => self.writer.write_all(b"xml:")?,
146 ns!(xmlns) => {
147 if name.local != local_name!("xmlns") {
148 self.writer.write_all(b"xmlns:")?;
149 }
150 },
151 ns!(xlink) => self.writer.write_all(b"xlink:")?,
152 ref ns => {
153 // FIXME(#122)
154 warn!("attr with weird namespace {:?}", ns);
155 self.writer.write_all(b"unknown_namespace:")?;
156 },
157 }
158
159 self.writer.write_all(name.local.as_bytes())?;
160 self.writer.write_all(b"=\"")?;
161 self.write_escaped(value, true)?;
162 self.writer.write_all(b"\"")?;
163 }
164 self.writer.write_all(b">")?;
165
166 let ignore_children = name.ns == ns!(html) &&
167 match name.local {
168 local_name!("area") |
169 local_name!("base") |
170 local_name!("basefont") |
171 local_name!("bgsound") |
172 local_name!("br") |
173 local_name!("col") |
174 local_name!("embed") |
175 local_name!("frame") |
176 local_name!("hr") |
177 local_name!("img") |
178 local_name!("input") |
179 local_name!("keygen") |
180 local_name!("link") |
181 local_name!("meta") |
182 local_name!("param") |
183 local_name!("source") |
184 local_name!("track") |
185 local_name!("wbr") => true,
186 _ => false,
187 };
188
189 self.stack.push(ElemInfo {
190 html_name,
191 ignore_children,
192 });
193
194 Ok(())
195 }
196
197 fn end_elem(&mut self, name: QualName) -> io::Result<()> {
198 let info = match self.stack.pop() {
199 Some(info) => info,
200 None if self.opts.create_missing_parent => {
201 warn!("missing ElemInfo, creating default.");
202 Default::default()
203 },
204 _ => panic!("no ElemInfo"),
205 };
206 if info.ignore_children {
207 return Ok(());
208 }
209
210 self.writer.write_all(b"</")?;
211 self.writer.write_all(tagname(&name).as_bytes())?;
212 self.writer.write_all(b">")
213 }
214
215 fn write_text(&mut self, text: &str) -> io::Result<()> {
216 let escape = match self.parent().html_name {
217 Some(local_name!("style")) |
218 Some(local_name!("script")) |
219 Some(local_name!("xmp")) |
220 Some(local_name!("iframe")) |
221 Some(local_name!("noembed")) |
222 Some(local_name!("noframes")) |
223 Some(local_name!("plaintext")) => false,
224
225 Some(local_name!("noscript")) => !self.opts.scripting_enabled,
226
227 _ => true,
228 };
229
230 if escape {
231 self.write_escaped(text, false)
232 } else {
233 self.writer.write_all(text.as_bytes())
234 }
235 }
236
237 fn write_comment(&mut self, text: &str) -> io::Result<()> {
238 self.writer.write_all(b"<!--")?;
239 self.writer.write_all(text.as_bytes())?;
240 self.writer.write_all(b"-->")
241 }
242
243 fn write_doctype(&mut self, name: &str) -> io::Result<()> {
244 self.writer.write_all(b"<!DOCTYPE ")?;
245 self.writer.write_all(name.as_bytes())?;
246 self.writer.write_all(b">")
247 }
248
249 fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> {
250 self.writer.write_all(b"<?")?;
251 self.writer.write_all(target.as_bytes())?;
252 self.writer.write_all(b" ")?;
253 self.writer.write_all(data.as_bytes())?;
254 self.writer.write_all(b">")
255 }
256}
257